nf_conntrack_expect.c revision 83731671d9e6878c0a05d309c68fb71c16d3235a
1/* Expectation handling for nf_conntrack. */
2
3/* (C) 1999-2001 Paul `Rusty' Russell
4 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
5 * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11
12#include <linux/types.h>
13#include <linux/netfilter.h>
14#include <linux/skbuff.h>
15#include <linux/proc_fs.h>
16#include <linux/seq_file.h>
17#include <linux/stddef.h>
18#include <linux/slab.h>
19#include <linux/err.h>
20#include <linux/percpu.h>
21#include <linux/kernel.h>
22#include <linux/jhash.h>
23#include <net/net_namespace.h>
24
25#include <net/netfilter/nf_conntrack.h>
26#include <net/netfilter/nf_conntrack_core.h>
27#include <net/netfilter/nf_conntrack_expect.h>
28#include <net/netfilter/nf_conntrack_helper.h>
29#include <net/netfilter/nf_conntrack_tuple.h>
30
31unsigned int nf_ct_expect_hsize __read_mostly;
32EXPORT_SYMBOL_GPL(nf_ct_expect_hsize);
33
34static unsigned int nf_ct_expect_hash_rnd __read_mostly;
35unsigned int nf_ct_expect_max __read_mostly;
36static int nf_ct_expect_hash_rnd_initted __read_mostly;
37
38static struct kmem_cache *nf_ct_expect_cachep __read_mostly;
39
40/* nf_conntrack_expect helper functions */
41void nf_ct_unlink_expect(struct nf_conntrack_expect *exp)
42{
43	struct nf_conn_help *master_help = nfct_help(exp->master);
44	struct net *net = nf_ct_exp_net(exp);
45
46	NF_CT_ASSERT(master_help);
47	NF_CT_ASSERT(!timer_pending(&exp->timeout));
48
49	hlist_del_rcu(&exp->hnode);
50	net->ct.expect_count--;
51
52	hlist_del(&exp->lnode);
53	master_help->expecting[exp->class]--;
54	nf_ct_expect_put(exp);
55
56	NF_CT_STAT_INC(net, expect_delete);
57}
58EXPORT_SYMBOL_GPL(nf_ct_unlink_expect);
59
60static void nf_ct_expectation_timed_out(unsigned long ul_expect)
61{
62	struct nf_conntrack_expect *exp = (void *)ul_expect;
63
64	spin_lock_bh(&nf_conntrack_lock);
65	nf_ct_unlink_expect(exp);
66	spin_unlock_bh(&nf_conntrack_lock);
67	nf_ct_expect_put(exp);
68}
69
70static unsigned int nf_ct_expect_dst_hash(const struct nf_conntrack_tuple *tuple)
71{
72	unsigned int hash;
73
74	if (unlikely(!nf_ct_expect_hash_rnd_initted)) {
75		get_random_bytes(&nf_ct_expect_hash_rnd,
76				 sizeof(nf_ct_expect_hash_rnd));
77		nf_ct_expect_hash_rnd_initted = 1;
78	}
79
80	hash = jhash2(tuple->dst.u3.all, ARRAY_SIZE(tuple->dst.u3.all),
81		      (((tuple->dst.protonum ^ tuple->src.l3num) << 16) |
82		       (__force __u16)tuple->dst.u.all) ^ nf_ct_expect_hash_rnd);
83	return ((u64)hash * nf_ct_expect_hsize) >> 32;
84}
85
86struct nf_conntrack_expect *
87__nf_ct_expect_find(struct net *net, const struct nf_conntrack_tuple *tuple)
88{
89	struct nf_conntrack_expect *i;
90	struct hlist_node *n;
91	unsigned int h;
92
93	if (!net->ct.expect_count)
94		return NULL;
95
96	h = nf_ct_expect_dst_hash(tuple);
97	hlist_for_each_entry_rcu(i, n, &net->ct.expect_hash[h], hnode) {
98		if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask))
99			return i;
100	}
101	return NULL;
102}
103EXPORT_SYMBOL_GPL(__nf_ct_expect_find);
104
105/* Just find a expectation corresponding to a tuple. */
106struct nf_conntrack_expect *
107nf_ct_expect_find_get(struct net *net, const struct nf_conntrack_tuple *tuple)
108{
109	struct nf_conntrack_expect *i;
110
111	rcu_read_lock();
112	i = __nf_ct_expect_find(net, tuple);
113	if (i && !atomic_inc_not_zero(&i->use))
114		i = NULL;
115	rcu_read_unlock();
116
117	return i;
118}
119EXPORT_SYMBOL_GPL(nf_ct_expect_find_get);
120
121/* If an expectation for this connection is found, it gets delete from
122 * global list then returned. */
123struct nf_conntrack_expect *
124nf_ct_find_expectation(struct net *net, const struct nf_conntrack_tuple *tuple)
125{
126	struct nf_conntrack_expect *i, *exp = NULL;
127	struct hlist_node *n;
128	unsigned int h;
129
130	if (!net->ct.expect_count)
131		return NULL;
132
133	h = nf_ct_expect_dst_hash(tuple);
134	hlist_for_each_entry(i, n, &net->ct.expect_hash[h], hnode) {
135		if (!(i->flags & NF_CT_EXPECT_INACTIVE) &&
136		    nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)) {
137			exp = i;
138			break;
139		}
140	}
141	if (!exp)
142		return NULL;
143
144	/* If master is not in hash table yet (ie. packet hasn't left
145	   this machine yet), how can other end know about expected?
146	   Hence these are not the droids you are looking for (if
147	   master ct never got confirmed, we'd hold a reference to it
148	   and weird things would happen to future packets). */
149	if (!nf_ct_is_confirmed(exp->master))
150		return NULL;
151
152	if (exp->flags & NF_CT_EXPECT_PERMANENT) {
153		atomic_inc(&exp->use);
154		return exp;
155	} else if (del_timer(&exp->timeout)) {
156		nf_ct_unlink_expect(exp);
157		return exp;
158	}
159
160	return NULL;
161}
162
163/* delete all expectations for this conntrack */
164void nf_ct_remove_expectations(struct nf_conn *ct)
165{
166	struct nf_conn_help *help = nfct_help(ct);
167	struct nf_conntrack_expect *exp;
168	struct hlist_node *n, *next;
169
170	/* Optimization: most connection never expect any others. */
171	if (!help)
172		return;
173
174	hlist_for_each_entry_safe(exp, n, next, &help->expectations, lnode) {
175		if (del_timer(&exp->timeout)) {
176			nf_ct_unlink_expect(exp);
177			nf_ct_expect_put(exp);
178		}
179	}
180}
181EXPORT_SYMBOL_GPL(nf_ct_remove_expectations);
182
183/* Would two expected things clash? */
184static inline int expect_clash(const struct nf_conntrack_expect *a,
185			       const struct nf_conntrack_expect *b)
186{
187	/* Part covered by intersection of masks must be unequal,
188	   otherwise they clash */
189	struct nf_conntrack_tuple_mask intersect_mask;
190	int count;
191
192	intersect_mask.src.u.all = a->mask.src.u.all & b->mask.src.u.all;
193
194	for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){
195		intersect_mask.src.u3.all[count] =
196			a->mask.src.u3.all[count] & b->mask.src.u3.all[count];
197	}
198
199	return nf_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask);
200}
201
202static inline int expect_matches(const struct nf_conntrack_expect *a,
203				 const struct nf_conntrack_expect *b)
204{
205	return a->master == b->master && a->class == b->class
206		&& nf_ct_tuple_equal(&a->tuple, &b->tuple)
207		&& nf_ct_tuple_mask_equal(&a->mask, &b->mask);
208}
209
210/* Generally a bad idea to call this: could have matched already. */
211void nf_ct_unexpect_related(struct nf_conntrack_expect *exp)
212{
213	spin_lock_bh(&nf_conntrack_lock);
214	if (del_timer(&exp->timeout)) {
215		nf_ct_unlink_expect(exp);
216		nf_ct_expect_put(exp);
217	}
218	spin_unlock_bh(&nf_conntrack_lock);
219}
220EXPORT_SYMBOL_GPL(nf_ct_unexpect_related);
221
222/* We don't increase the master conntrack refcount for non-fulfilled
223 * conntracks. During the conntrack destruction, the expectations are
224 * always killed before the conntrack itself */
225struct nf_conntrack_expect *nf_ct_expect_alloc(struct nf_conn *me)
226{
227	struct nf_conntrack_expect *new;
228
229	new = kmem_cache_alloc(nf_ct_expect_cachep, GFP_ATOMIC);
230	if (!new)
231		return NULL;
232
233	new->master = me;
234	atomic_set(&new->use, 1);
235	INIT_RCU_HEAD(&new->rcu);
236	return new;
237}
238EXPORT_SYMBOL_GPL(nf_ct_expect_alloc);
239
240void nf_ct_expect_init(struct nf_conntrack_expect *exp, unsigned int class,
241		       u_int8_t family,
242		       const union nf_inet_addr *saddr,
243		       const union nf_inet_addr *daddr,
244		       u_int8_t proto, const __be16 *src, const __be16 *dst)
245{
246	int len;
247
248	if (family == AF_INET)
249		len = 4;
250	else
251		len = 16;
252
253	exp->flags = 0;
254	exp->class = class;
255	exp->expectfn = NULL;
256	exp->helper = NULL;
257	exp->tuple.src.l3num = family;
258	exp->tuple.dst.protonum = proto;
259
260	if (saddr) {
261		memcpy(&exp->tuple.src.u3, saddr, len);
262		if (sizeof(exp->tuple.src.u3) > len)
263			/* address needs to be cleared for nf_ct_tuple_equal */
264			memset((void *)&exp->tuple.src.u3 + len, 0x00,
265			       sizeof(exp->tuple.src.u3) - len);
266		memset(&exp->mask.src.u3, 0xFF, len);
267		if (sizeof(exp->mask.src.u3) > len)
268			memset((void *)&exp->mask.src.u3 + len, 0x00,
269			       sizeof(exp->mask.src.u3) - len);
270	} else {
271		memset(&exp->tuple.src.u3, 0x00, sizeof(exp->tuple.src.u3));
272		memset(&exp->mask.src.u3, 0x00, sizeof(exp->mask.src.u3));
273	}
274
275	if (src) {
276		exp->tuple.src.u.all = *src;
277		exp->mask.src.u.all = htons(0xFFFF);
278	} else {
279		exp->tuple.src.u.all = 0;
280		exp->mask.src.u.all = 0;
281	}
282
283	memcpy(&exp->tuple.dst.u3, daddr, len);
284	if (sizeof(exp->tuple.dst.u3) > len)
285		/* address needs to be cleared for nf_ct_tuple_equal */
286		memset((void *)&exp->tuple.dst.u3 + len, 0x00,
287		       sizeof(exp->tuple.dst.u3) - len);
288
289	exp->tuple.dst.u.all = *dst;
290}
291EXPORT_SYMBOL_GPL(nf_ct_expect_init);
292
293static void nf_ct_expect_free_rcu(struct rcu_head *head)
294{
295	struct nf_conntrack_expect *exp;
296
297	exp = container_of(head, struct nf_conntrack_expect, rcu);
298	kmem_cache_free(nf_ct_expect_cachep, exp);
299}
300
301void nf_ct_expect_put(struct nf_conntrack_expect *exp)
302{
303	if (atomic_dec_and_test(&exp->use))
304		call_rcu(&exp->rcu, nf_ct_expect_free_rcu);
305}
306EXPORT_SYMBOL_GPL(nf_ct_expect_put);
307
308static void nf_ct_expect_insert(struct nf_conntrack_expect *exp)
309{
310	struct nf_conn_help *master_help = nfct_help(exp->master);
311	struct net *net = nf_ct_exp_net(exp);
312	const struct nf_conntrack_expect_policy *p;
313	unsigned int h = nf_ct_expect_dst_hash(&exp->tuple);
314
315	atomic_inc(&exp->use);
316
317	hlist_add_head(&exp->lnode, &master_help->expectations);
318	master_help->expecting[exp->class]++;
319
320	hlist_add_head_rcu(&exp->hnode, &net->ct.expect_hash[h]);
321	net->ct.expect_count++;
322
323	setup_timer(&exp->timeout, nf_ct_expectation_timed_out,
324		    (unsigned long)exp);
325	p = &master_help->helper->expect_policy[exp->class];
326	exp->timeout.expires = jiffies + p->timeout * HZ;
327	add_timer(&exp->timeout);
328
329	atomic_inc(&exp->use);
330	NF_CT_STAT_INC(net, expect_create);
331}
332
333/* Race with expectations being used means we could have none to find; OK. */
334static void evict_oldest_expect(struct nf_conn *master,
335				struct nf_conntrack_expect *new)
336{
337	struct nf_conn_help *master_help = nfct_help(master);
338	struct nf_conntrack_expect *exp, *last = NULL;
339	struct hlist_node *n;
340
341	hlist_for_each_entry(exp, n, &master_help->expectations, lnode) {
342		if (exp->class == new->class)
343			last = exp;
344	}
345
346	if (last && del_timer(&last->timeout)) {
347		nf_ct_unlink_expect(last);
348		nf_ct_expect_put(last);
349	}
350}
351
352static inline int refresh_timer(struct nf_conntrack_expect *i)
353{
354	struct nf_conn_help *master_help = nfct_help(i->master);
355	const struct nf_conntrack_expect_policy *p;
356
357	if (!del_timer(&i->timeout))
358		return 0;
359
360	p = &master_help->helper->expect_policy[i->class];
361	i->timeout.expires = jiffies + p->timeout * HZ;
362	add_timer(&i->timeout);
363	return 1;
364}
365
366static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect)
367{
368	const struct nf_conntrack_expect_policy *p;
369	struct nf_conntrack_expect *i;
370	struct nf_conn *master = expect->master;
371	struct nf_conn_help *master_help = nfct_help(master);
372	struct net *net = nf_ct_exp_net(expect);
373	struct hlist_node *n;
374	unsigned int h;
375	int ret = 1;
376
377	if (!master_help->helper) {
378		ret = -ESHUTDOWN;
379		goto out;
380	}
381	h = nf_ct_expect_dst_hash(&expect->tuple);
382	hlist_for_each_entry(i, n, &net->ct.expect_hash[h], hnode) {
383		if (expect_matches(i, expect)) {
384			/* Refresh timer: if it's dying, ignore.. */
385			if (refresh_timer(i)) {
386				ret = 0;
387				goto out;
388			}
389		} else if (expect_clash(i, expect)) {
390			ret = -EBUSY;
391			goto out;
392		}
393	}
394	/* Will be over limit? */
395	p = &master_help->helper->expect_policy[expect->class];
396	if (p->max_expected &&
397	    master_help->expecting[expect->class] >= p->max_expected) {
398		evict_oldest_expect(master, expect);
399		if (master_help->expecting[expect->class] >= p->max_expected) {
400			ret = -EMFILE;
401			goto out;
402		}
403	}
404
405	if (net->ct.expect_count >= nf_ct_expect_max) {
406		if (net_ratelimit())
407			printk(KERN_WARNING
408			       "nf_conntrack: expectation table full\n");
409		ret = -EMFILE;
410	}
411out:
412	return ret;
413}
414
415int nf_ct_expect_related_report(struct nf_conntrack_expect *expect,
416				u32 pid, int report)
417{
418	int ret;
419
420	spin_lock_bh(&nf_conntrack_lock);
421	ret = __nf_ct_expect_check(expect);
422	if (ret <= 0)
423		goto out;
424
425	ret = 0;
426	nf_ct_expect_insert(expect);
427	spin_unlock_bh(&nf_conntrack_lock);
428	nf_ct_expect_event_report(IPEXP_NEW, expect, pid, report);
429	return ret;
430out:
431	spin_unlock_bh(&nf_conntrack_lock);
432	return ret;
433}
434EXPORT_SYMBOL_GPL(nf_ct_expect_related_report);
435
436#ifdef CONFIG_PROC_FS
437struct ct_expect_iter_state {
438	struct seq_net_private p;
439	unsigned int bucket;
440};
441
442static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
443{
444	struct net *net = seq_file_net(seq);
445	struct ct_expect_iter_state *st = seq->private;
446	struct hlist_node *n;
447
448	for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) {
449		n = rcu_dereference(net->ct.expect_hash[st->bucket].first);
450		if (n)
451			return n;
452	}
453	return NULL;
454}
455
456static struct hlist_node *ct_expect_get_next(struct seq_file *seq,
457					     struct hlist_node *head)
458{
459	struct net *net = seq_file_net(seq);
460	struct ct_expect_iter_state *st = seq->private;
461
462	head = rcu_dereference(head->next);
463	while (head == NULL) {
464		if (++st->bucket >= nf_ct_expect_hsize)
465			return NULL;
466		head = rcu_dereference(net->ct.expect_hash[st->bucket].first);
467	}
468	return head;
469}
470
471static struct hlist_node *ct_expect_get_idx(struct seq_file *seq, loff_t pos)
472{
473	struct hlist_node *head = ct_expect_get_first(seq);
474
475	if (head)
476		while (pos && (head = ct_expect_get_next(seq, head)))
477			pos--;
478	return pos ? NULL : head;
479}
480
481static void *exp_seq_start(struct seq_file *seq, loff_t *pos)
482	__acquires(RCU)
483{
484	rcu_read_lock();
485	return ct_expect_get_idx(seq, *pos);
486}
487
488static void *exp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
489{
490	(*pos)++;
491	return ct_expect_get_next(seq, v);
492}
493
494static void exp_seq_stop(struct seq_file *seq, void *v)
495	__releases(RCU)
496{
497	rcu_read_unlock();
498}
499
500static int exp_seq_show(struct seq_file *s, void *v)
501{
502	struct nf_conntrack_expect *expect;
503	struct hlist_node *n = v;
504	char *delim = "";
505
506	expect = hlist_entry(n, struct nf_conntrack_expect, hnode);
507
508	if (expect->timeout.function)
509		seq_printf(s, "%ld ", timer_pending(&expect->timeout)
510			   ? (long)(expect->timeout.expires - jiffies)/HZ : 0);
511	else
512		seq_printf(s, "- ");
513	seq_printf(s, "l3proto = %u proto=%u ",
514		   expect->tuple.src.l3num,
515		   expect->tuple.dst.protonum);
516	print_tuple(s, &expect->tuple,
517		    __nf_ct_l3proto_find(expect->tuple.src.l3num),
518		    __nf_ct_l4proto_find(expect->tuple.src.l3num,
519				       expect->tuple.dst.protonum));
520
521	if (expect->flags & NF_CT_EXPECT_PERMANENT) {
522		seq_printf(s, "PERMANENT");
523		delim = ",";
524	}
525	if (expect->flags & NF_CT_EXPECT_INACTIVE)
526		seq_printf(s, "%sINACTIVE", delim);
527
528	return seq_putc(s, '\n');
529}
530
531static const struct seq_operations exp_seq_ops = {
532	.start = exp_seq_start,
533	.next = exp_seq_next,
534	.stop = exp_seq_stop,
535	.show = exp_seq_show
536};
537
538static int exp_open(struct inode *inode, struct file *file)
539{
540	return seq_open_net(inode, file, &exp_seq_ops,
541			sizeof(struct ct_expect_iter_state));
542}
543
544static const struct file_operations exp_file_ops = {
545	.owner   = THIS_MODULE,
546	.open    = exp_open,
547	.read    = seq_read,
548	.llseek  = seq_lseek,
549	.release = seq_release_net,
550};
551#endif /* CONFIG_PROC_FS */
552
553static int exp_proc_init(struct net *net)
554{
555#ifdef CONFIG_PROC_FS
556	struct proc_dir_entry *proc;
557
558	proc = proc_net_fops_create(net, "nf_conntrack_expect", 0440, &exp_file_ops);
559	if (!proc)
560		return -ENOMEM;
561#endif /* CONFIG_PROC_FS */
562	return 0;
563}
564
565static void exp_proc_remove(struct net *net)
566{
567#ifdef CONFIG_PROC_FS
568	proc_net_remove(net, "nf_conntrack_expect");
569#endif /* CONFIG_PROC_FS */
570}
571
572module_param_named(expect_hashsize, nf_ct_expect_hsize, uint, 0600);
573
574int nf_conntrack_expect_init(struct net *net)
575{
576	int err = -ENOMEM;
577
578	if (net_eq(net, &init_net)) {
579		if (!nf_ct_expect_hsize) {
580			nf_ct_expect_hsize = nf_conntrack_htable_size / 256;
581			if (!nf_ct_expect_hsize)
582				nf_ct_expect_hsize = 1;
583		}
584		nf_ct_expect_max = nf_ct_expect_hsize * 4;
585	}
586
587	net->ct.expect_count = 0;
588	net->ct.expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize,
589						  &net->ct.expect_vmalloc, 0);
590	if (net->ct.expect_hash == NULL)
591		goto err1;
592
593	if (net_eq(net, &init_net)) {
594		nf_ct_expect_cachep = kmem_cache_create("nf_conntrack_expect",
595					sizeof(struct nf_conntrack_expect),
596					0, 0, NULL);
597		if (!nf_ct_expect_cachep)
598			goto err2;
599	}
600
601	err = exp_proc_init(net);
602	if (err < 0)
603		goto err3;
604
605	return 0;
606
607err3:
608	if (net_eq(net, &init_net))
609		kmem_cache_destroy(nf_ct_expect_cachep);
610err2:
611	nf_ct_free_hashtable(net->ct.expect_hash, net->ct.expect_vmalloc,
612			     nf_ct_expect_hsize);
613err1:
614	return err;
615}
616
617void nf_conntrack_expect_fini(struct net *net)
618{
619	exp_proc_remove(net);
620	if (net_eq(net, &init_net))
621		kmem_cache_destroy(nf_ct_expect_cachep);
622	nf_ct_free_hashtable(net->ct.expect_hash, net->ct.expect_vmalloc,
623			     nf_ct_expect_hsize);
624}
625