nf_conntrack_expect.c revision f682cefa5ad204d3bfaa54a58046c66d2d035ac1
1/* Expectation handling for nf_conntrack. */
2
3/* (C) 1999-2001 Paul `Rusty' Russell
4 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
5 * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11
12#include <linux/types.h>
13#include <linux/netfilter.h>
14#include <linux/skbuff.h>
15#include <linux/proc_fs.h>
16#include <linux/seq_file.h>
17#include <linux/stddef.h>
18#include <linux/slab.h>
19#include <linux/err.h>
20#include <linux/percpu.h>
21#include <linux/kernel.h>
22#include <linux/jhash.h>
23#include <net/net_namespace.h>
24
25#include <net/netfilter/nf_conntrack.h>
26#include <net/netfilter/nf_conntrack_core.h>
27#include <net/netfilter/nf_conntrack_expect.h>
28#include <net/netfilter/nf_conntrack_helper.h>
29#include <net/netfilter/nf_conntrack_tuple.h>
30#include <net/netfilter/nf_conntrack_zones.h>
31
32unsigned int nf_ct_expect_hsize __read_mostly;
33EXPORT_SYMBOL_GPL(nf_ct_expect_hsize);
34
35unsigned int nf_ct_expect_max __read_mostly;
36
37static struct kmem_cache *nf_ct_expect_cachep __read_mostly;
38
39static HLIST_HEAD(nf_ct_userspace_expect_list);
40
41/* nf_conntrack_expect helper functions */
42void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp,
43				u32 pid, int report)
44{
45	struct nf_conn_help *master_help = nfct_help(exp->master);
46	struct net *net = nf_ct_exp_net(exp);
47
48	NF_CT_ASSERT(!timer_pending(&exp->timeout));
49
50	hlist_del_rcu(&exp->hnode);
51	net->ct.expect_count--;
52
53	hlist_del(&exp->lnode);
54	if (!(exp->flags & NF_CT_EXPECT_USERSPACE))
55		master_help->expecting[exp->class]--;
56
57	nf_ct_expect_event_report(IPEXP_DESTROY, exp, pid, report);
58	nf_ct_expect_put(exp);
59
60	NF_CT_STAT_INC(net, expect_delete);
61}
62EXPORT_SYMBOL_GPL(nf_ct_unlink_expect_report);
63
64static void nf_ct_expectation_timed_out(unsigned long ul_expect)
65{
66	struct nf_conntrack_expect *exp = (void *)ul_expect;
67
68	spin_lock_bh(&nf_conntrack_lock);
69	nf_ct_unlink_expect(exp);
70	spin_unlock_bh(&nf_conntrack_lock);
71	nf_ct_expect_put(exp);
72}
73
74static unsigned int nf_ct_expect_dst_hash(const struct nf_conntrack_tuple *tuple)
75{
76	unsigned int hash;
77
78	if (unlikely(!nf_conntrack_hash_rnd)) {
79		init_nf_conntrack_hash_rnd();
80	}
81
82	hash = jhash2(tuple->dst.u3.all, ARRAY_SIZE(tuple->dst.u3.all),
83		      (((tuple->dst.protonum ^ tuple->src.l3num) << 16) |
84		       (__force __u16)tuple->dst.u.all) ^ nf_conntrack_hash_rnd);
85	return ((u64)hash * nf_ct_expect_hsize) >> 32;
86}
87
88struct nf_conntrack_expect *
89__nf_ct_expect_find(struct net *net, u16 zone,
90		    const struct nf_conntrack_tuple *tuple)
91{
92	struct nf_conntrack_expect *i;
93	struct hlist_node *n;
94	unsigned int h;
95
96	if (!net->ct.expect_count)
97		return NULL;
98
99	h = nf_ct_expect_dst_hash(tuple);
100	hlist_for_each_entry_rcu(i, n, &net->ct.expect_hash[h], hnode) {
101		if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) &&
102		    nf_ct_zone(i->master) == zone)
103			return i;
104	}
105	return NULL;
106}
107EXPORT_SYMBOL_GPL(__nf_ct_expect_find);
108
109/* Just find a expectation corresponding to a tuple. */
110struct nf_conntrack_expect *
111nf_ct_expect_find_get(struct net *net, u16 zone,
112		      const struct nf_conntrack_tuple *tuple)
113{
114	struct nf_conntrack_expect *i;
115
116	rcu_read_lock();
117	i = __nf_ct_expect_find(net, zone, tuple);
118	if (i && !atomic_inc_not_zero(&i->use))
119		i = NULL;
120	rcu_read_unlock();
121
122	return i;
123}
124EXPORT_SYMBOL_GPL(nf_ct_expect_find_get);
125
126/* If an expectation for this connection is found, it gets delete from
127 * global list then returned. */
128struct nf_conntrack_expect *
129nf_ct_find_expectation(struct net *net, u16 zone,
130		       const struct nf_conntrack_tuple *tuple)
131{
132	struct nf_conntrack_expect *i, *exp = NULL;
133	struct hlist_node *n;
134	unsigned int h;
135
136	if (!net->ct.expect_count)
137		return NULL;
138
139	h = nf_ct_expect_dst_hash(tuple);
140	hlist_for_each_entry(i, n, &net->ct.expect_hash[h], hnode) {
141		if (!(i->flags & NF_CT_EXPECT_INACTIVE) &&
142		    nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) &&
143		    nf_ct_zone(i->master) == zone) {
144			exp = i;
145			break;
146		}
147	}
148	if (!exp)
149		return NULL;
150
151	/* If master is not in hash table yet (ie. packet hasn't left
152	   this machine yet), how can other end know about expected?
153	   Hence these are not the droids you are looking for (if
154	   master ct never got confirmed, we'd hold a reference to it
155	   and weird things would happen to future packets). */
156	if (!nf_ct_is_confirmed(exp->master))
157		return NULL;
158
159	if (exp->flags & NF_CT_EXPECT_PERMANENT) {
160		atomic_inc(&exp->use);
161		return exp;
162	} else if (del_timer(&exp->timeout)) {
163		nf_ct_unlink_expect(exp);
164		return exp;
165	}
166
167	return NULL;
168}
169
170/* delete all expectations for this conntrack */
171void nf_ct_remove_expectations(struct nf_conn *ct)
172{
173	struct nf_conn_help *help = nfct_help(ct);
174	struct nf_conntrack_expect *exp;
175	struct hlist_node *n, *next;
176
177	/* Optimization: most connection never expect any others. */
178	if (!help)
179		return;
180
181	hlist_for_each_entry_safe(exp, n, next, &help->expectations, lnode) {
182		if (del_timer(&exp->timeout)) {
183			nf_ct_unlink_expect(exp);
184			nf_ct_expect_put(exp);
185		}
186	}
187}
188EXPORT_SYMBOL_GPL(nf_ct_remove_expectations);
189
190/* Would two expected things clash? */
191static inline int expect_clash(const struct nf_conntrack_expect *a,
192			       const struct nf_conntrack_expect *b)
193{
194	/* Part covered by intersection of masks must be unequal,
195	   otherwise they clash */
196	struct nf_conntrack_tuple_mask intersect_mask;
197	int count;
198
199	intersect_mask.src.u.all = a->mask.src.u.all & b->mask.src.u.all;
200
201	for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){
202		intersect_mask.src.u3.all[count] =
203			a->mask.src.u3.all[count] & b->mask.src.u3.all[count];
204	}
205
206	return nf_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask);
207}
208
209static inline int expect_matches(const struct nf_conntrack_expect *a,
210				 const struct nf_conntrack_expect *b)
211{
212	return a->master == b->master && a->class == b->class &&
213		nf_ct_tuple_equal(&a->tuple, &b->tuple) &&
214		nf_ct_tuple_mask_equal(&a->mask, &b->mask) &&
215		nf_ct_zone(a->master) == nf_ct_zone(b->master);
216}
217
218/* Generally a bad idea to call this: could have matched already. */
219void nf_ct_unexpect_related(struct nf_conntrack_expect *exp)
220{
221	spin_lock_bh(&nf_conntrack_lock);
222	if (del_timer(&exp->timeout)) {
223		nf_ct_unlink_expect(exp);
224		nf_ct_expect_put(exp);
225	}
226	spin_unlock_bh(&nf_conntrack_lock);
227}
228EXPORT_SYMBOL_GPL(nf_ct_unexpect_related);
229
230/* We don't increase the master conntrack refcount for non-fulfilled
231 * conntracks. During the conntrack destruction, the expectations are
232 * always killed before the conntrack itself */
233struct nf_conntrack_expect *nf_ct_expect_alloc(struct nf_conn *me)
234{
235	struct nf_conntrack_expect *new;
236
237	new = kmem_cache_alloc(nf_ct_expect_cachep, GFP_ATOMIC);
238	if (!new)
239		return NULL;
240
241	new->master = me;
242	atomic_set(&new->use, 1);
243	return new;
244}
245EXPORT_SYMBOL_GPL(nf_ct_expect_alloc);
246
247void nf_ct_expect_init(struct nf_conntrack_expect *exp, unsigned int class,
248		       u_int8_t family,
249		       const union nf_inet_addr *saddr,
250		       const union nf_inet_addr *daddr,
251		       u_int8_t proto, const __be16 *src, const __be16 *dst)
252{
253	int len;
254
255	if (family == AF_INET)
256		len = 4;
257	else
258		len = 16;
259
260	exp->flags = 0;
261	exp->class = class;
262	exp->expectfn = NULL;
263	exp->helper = NULL;
264	exp->tuple.src.l3num = family;
265	exp->tuple.dst.protonum = proto;
266
267	if (saddr) {
268		memcpy(&exp->tuple.src.u3, saddr, len);
269		if (sizeof(exp->tuple.src.u3) > len)
270			/* address needs to be cleared for nf_ct_tuple_equal */
271			memset((void *)&exp->tuple.src.u3 + len, 0x00,
272			       sizeof(exp->tuple.src.u3) - len);
273		memset(&exp->mask.src.u3, 0xFF, len);
274		if (sizeof(exp->mask.src.u3) > len)
275			memset((void *)&exp->mask.src.u3 + len, 0x00,
276			       sizeof(exp->mask.src.u3) - len);
277	} else {
278		memset(&exp->tuple.src.u3, 0x00, sizeof(exp->tuple.src.u3));
279		memset(&exp->mask.src.u3, 0x00, sizeof(exp->mask.src.u3));
280	}
281
282	if (src) {
283		exp->tuple.src.u.all = *src;
284		exp->mask.src.u.all = htons(0xFFFF);
285	} else {
286		exp->tuple.src.u.all = 0;
287		exp->mask.src.u.all = 0;
288	}
289
290	memcpy(&exp->tuple.dst.u3, daddr, len);
291	if (sizeof(exp->tuple.dst.u3) > len)
292		/* address needs to be cleared for nf_ct_tuple_equal */
293		memset((void *)&exp->tuple.dst.u3 + len, 0x00,
294		       sizeof(exp->tuple.dst.u3) - len);
295
296	exp->tuple.dst.u.all = *dst;
297}
298EXPORT_SYMBOL_GPL(nf_ct_expect_init);
299
300static void nf_ct_expect_free_rcu(struct rcu_head *head)
301{
302	struct nf_conntrack_expect *exp;
303
304	exp = container_of(head, struct nf_conntrack_expect, rcu);
305	kmem_cache_free(nf_ct_expect_cachep, exp);
306}
307
308void nf_ct_expect_put(struct nf_conntrack_expect *exp)
309{
310	if (atomic_dec_and_test(&exp->use))
311		call_rcu(&exp->rcu, nf_ct_expect_free_rcu);
312}
313EXPORT_SYMBOL_GPL(nf_ct_expect_put);
314
315static void nf_ct_expect_insert(struct nf_conntrack_expect *exp)
316{
317	struct nf_conn_help *master_help = nfct_help(exp->master);
318	struct net *net = nf_ct_exp_net(exp);
319	const struct nf_conntrack_expect_policy *p;
320	unsigned int h = nf_ct_expect_dst_hash(&exp->tuple);
321
322	atomic_inc(&exp->use);
323
324	if (master_help) {
325		hlist_add_head(&exp->lnode, &master_help->expectations);
326		master_help->expecting[exp->class]++;
327	} else if (exp->flags & NF_CT_EXPECT_USERSPACE)
328		hlist_add_head(&exp->lnode, &nf_ct_userspace_expect_list);
329
330	hlist_add_head_rcu(&exp->hnode, &net->ct.expect_hash[h]);
331	net->ct.expect_count++;
332
333	setup_timer(&exp->timeout, nf_ct_expectation_timed_out,
334		    (unsigned long)exp);
335	if (master_help) {
336		p = &master_help->helper->expect_policy[exp->class];
337		exp->timeout.expires = jiffies + p->timeout * HZ;
338	}
339	add_timer(&exp->timeout);
340
341	atomic_inc(&exp->use);
342	NF_CT_STAT_INC(net, expect_create);
343}
344
345/* Race with expectations being used means we could have none to find; OK. */
346static void evict_oldest_expect(struct nf_conn *master,
347				struct nf_conntrack_expect *new)
348{
349	struct nf_conn_help *master_help = nfct_help(master);
350	struct nf_conntrack_expect *exp, *last = NULL;
351	struct hlist_node *n;
352
353	hlist_for_each_entry(exp, n, &master_help->expectations, lnode) {
354		if (exp->class == new->class)
355			last = exp;
356	}
357
358	if (last && del_timer(&last->timeout)) {
359		nf_ct_unlink_expect(last);
360		nf_ct_expect_put(last);
361	}
362}
363
364static inline int refresh_timer(struct nf_conntrack_expect *i)
365{
366	struct nf_conn_help *master_help = nfct_help(i->master);
367	const struct nf_conntrack_expect_policy *p;
368
369	if (!del_timer(&i->timeout))
370		return 0;
371
372	p = &master_help->helper->expect_policy[i->class];
373	i->timeout.expires = jiffies + p->timeout * HZ;
374	add_timer(&i->timeout);
375	return 1;
376}
377
378static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect)
379{
380	const struct nf_conntrack_expect_policy *p;
381	struct nf_conntrack_expect *i;
382	struct nf_conn *master = expect->master;
383	struct nf_conn_help *master_help = nfct_help(master);
384	struct net *net = nf_ct_exp_net(expect);
385	struct hlist_node *n;
386	unsigned int h;
387	int ret = 1;
388
389	/* Don't allow expectations created from kernel-space with no helper */
390	if (!(expect->flags & NF_CT_EXPECT_USERSPACE) &&
391	    (!master_help || (master_help && !master_help->helper))) {
392		ret = -ESHUTDOWN;
393		goto out;
394	}
395	h = nf_ct_expect_dst_hash(&expect->tuple);
396	hlist_for_each_entry(i, n, &net->ct.expect_hash[h], hnode) {
397		if (expect_matches(i, expect)) {
398			/* Refresh timer: if it's dying, ignore.. */
399			if (refresh_timer(i)) {
400				ret = 0;
401				goto out;
402			}
403		} else if (expect_clash(i, expect)) {
404			ret = -EBUSY;
405			goto out;
406		}
407	}
408	/* Will be over limit? */
409	if (master_help) {
410		p = &master_help->helper->expect_policy[expect->class];
411		if (p->max_expected &&
412		    master_help->expecting[expect->class] >= p->max_expected) {
413			evict_oldest_expect(master, expect);
414			if (master_help->expecting[expect->class]
415						>= p->max_expected) {
416				ret = -EMFILE;
417				goto out;
418			}
419		}
420	}
421
422	if (net->ct.expect_count >= nf_ct_expect_max) {
423		if (net_ratelimit())
424			printk(KERN_WARNING
425			       "nf_conntrack: expectation table full\n");
426		ret = -EMFILE;
427	}
428out:
429	return ret;
430}
431
432int nf_ct_expect_related_report(struct nf_conntrack_expect *expect,
433				u32 pid, int report)
434{
435	int ret;
436
437	spin_lock_bh(&nf_conntrack_lock);
438	ret = __nf_ct_expect_check(expect);
439	if (ret <= 0)
440		goto out;
441
442	ret = 0;
443	nf_ct_expect_insert(expect);
444	spin_unlock_bh(&nf_conntrack_lock);
445	nf_ct_expect_event_report(IPEXP_NEW, expect, pid, report);
446	return ret;
447out:
448	spin_unlock_bh(&nf_conntrack_lock);
449	return ret;
450}
451EXPORT_SYMBOL_GPL(nf_ct_expect_related_report);
452
453void nf_ct_remove_userspace_expectations(void)
454{
455	struct nf_conntrack_expect *exp;
456	struct hlist_node *n, *next;
457
458	hlist_for_each_entry_safe(exp, n, next,
459				  &nf_ct_userspace_expect_list, lnode) {
460		if (del_timer(&exp->timeout)) {
461			nf_ct_unlink_expect(exp);
462			nf_ct_expect_put(exp);
463		}
464	}
465}
466EXPORT_SYMBOL_GPL(nf_ct_remove_userspace_expectations);
467
468#ifdef CONFIG_PROC_FS
469struct ct_expect_iter_state {
470	struct seq_net_private p;
471	unsigned int bucket;
472};
473
474static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
475{
476	struct net *net = seq_file_net(seq);
477	struct ct_expect_iter_state *st = seq->private;
478	struct hlist_node *n;
479
480	for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) {
481		n = rcu_dereference(net->ct.expect_hash[st->bucket].first);
482		if (n)
483			return n;
484	}
485	return NULL;
486}
487
488static struct hlist_node *ct_expect_get_next(struct seq_file *seq,
489					     struct hlist_node *head)
490{
491	struct net *net = seq_file_net(seq);
492	struct ct_expect_iter_state *st = seq->private;
493
494	head = rcu_dereference(head->next);
495	while (head == NULL) {
496		if (++st->bucket >= nf_ct_expect_hsize)
497			return NULL;
498		head = rcu_dereference(net->ct.expect_hash[st->bucket].first);
499	}
500	return head;
501}
502
503static struct hlist_node *ct_expect_get_idx(struct seq_file *seq, loff_t pos)
504{
505	struct hlist_node *head = ct_expect_get_first(seq);
506
507	if (head)
508		while (pos && (head = ct_expect_get_next(seq, head)))
509			pos--;
510	return pos ? NULL : head;
511}
512
513static void *exp_seq_start(struct seq_file *seq, loff_t *pos)
514	__acquires(RCU)
515{
516	rcu_read_lock();
517	return ct_expect_get_idx(seq, *pos);
518}
519
520static void *exp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
521{
522	(*pos)++;
523	return ct_expect_get_next(seq, v);
524}
525
526static void exp_seq_stop(struct seq_file *seq, void *v)
527	__releases(RCU)
528{
529	rcu_read_unlock();
530}
531
532static int exp_seq_show(struct seq_file *s, void *v)
533{
534	struct nf_conntrack_expect *expect;
535	struct nf_conntrack_helper *helper;
536	struct hlist_node *n = v;
537	char *delim = "";
538
539	expect = hlist_entry(n, struct nf_conntrack_expect, hnode);
540
541	if (expect->timeout.function)
542		seq_printf(s, "%ld ", timer_pending(&expect->timeout)
543			   ? (long)(expect->timeout.expires - jiffies)/HZ : 0);
544	else
545		seq_printf(s, "- ");
546	seq_printf(s, "l3proto = %u proto=%u ",
547		   expect->tuple.src.l3num,
548		   expect->tuple.dst.protonum);
549	print_tuple(s, &expect->tuple,
550		    __nf_ct_l3proto_find(expect->tuple.src.l3num),
551		    __nf_ct_l4proto_find(expect->tuple.src.l3num,
552				       expect->tuple.dst.protonum));
553
554	if (expect->flags & NF_CT_EXPECT_PERMANENT) {
555		seq_printf(s, "PERMANENT");
556		delim = ",";
557	}
558	if (expect->flags & NF_CT_EXPECT_INACTIVE) {
559		seq_printf(s, "%sINACTIVE", delim);
560		delim = ",";
561	}
562	if (expect->flags & NF_CT_EXPECT_USERSPACE)
563		seq_printf(s, "%sUSERSPACE", delim);
564
565	helper = rcu_dereference(nfct_help(expect->master)->helper);
566	if (helper) {
567		seq_printf(s, "%s%s", expect->flags ? " " : "", helper->name);
568		if (helper->expect_policy[expect->class].name)
569			seq_printf(s, "/%s",
570				   helper->expect_policy[expect->class].name);
571	}
572
573	return seq_putc(s, '\n');
574}
575
576static const struct seq_operations exp_seq_ops = {
577	.start = exp_seq_start,
578	.next = exp_seq_next,
579	.stop = exp_seq_stop,
580	.show = exp_seq_show
581};
582
583static int exp_open(struct inode *inode, struct file *file)
584{
585	return seq_open_net(inode, file, &exp_seq_ops,
586			sizeof(struct ct_expect_iter_state));
587}
588
589static const struct file_operations exp_file_ops = {
590	.owner   = THIS_MODULE,
591	.open    = exp_open,
592	.read    = seq_read,
593	.llseek  = seq_lseek,
594	.release = seq_release_net,
595};
596#endif /* CONFIG_PROC_FS */
597
598static int exp_proc_init(struct net *net)
599{
600#ifdef CONFIG_PROC_FS
601	struct proc_dir_entry *proc;
602
603	proc = proc_net_fops_create(net, "nf_conntrack_expect", 0440, &exp_file_ops);
604	if (!proc)
605		return -ENOMEM;
606#endif /* CONFIG_PROC_FS */
607	return 0;
608}
609
610static void exp_proc_remove(struct net *net)
611{
612#ifdef CONFIG_PROC_FS
613	proc_net_remove(net, "nf_conntrack_expect");
614#endif /* CONFIG_PROC_FS */
615}
616
617module_param_named(expect_hashsize, nf_ct_expect_hsize, uint, 0400);
618
619int nf_conntrack_expect_init(struct net *net)
620{
621	int err = -ENOMEM;
622
623	if (net_eq(net, &init_net)) {
624		if (!nf_ct_expect_hsize) {
625			nf_ct_expect_hsize = net->ct.htable_size / 256;
626			if (!nf_ct_expect_hsize)
627				nf_ct_expect_hsize = 1;
628		}
629		nf_ct_expect_max = nf_ct_expect_hsize * 4;
630	}
631
632	net->ct.expect_count = 0;
633	net->ct.expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize,
634						  &net->ct.expect_vmalloc, 0);
635	if (net->ct.expect_hash == NULL)
636		goto err1;
637
638	if (net_eq(net, &init_net)) {
639		nf_ct_expect_cachep = kmem_cache_create("nf_conntrack_expect",
640					sizeof(struct nf_conntrack_expect),
641					0, 0, NULL);
642		if (!nf_ct_expect_cachep)
643			goto err2;
644	}
645
646	err = exp_proc_init(net);
647	if (err < 0)
648		goto err3;
649
650	return 0;
651
652err3:
653	if (net_eq(net, &init_net))
654		kmem_cache_destroy(nf_ct_expect_cachep);
655err2:
656	nf_ct_free_hashtable(net->ct.expect_hash, net->ct.expect_vmalloc,
657			     nf_ct_expect_hsize);
658err1:
659	return err;
660}
661
662void nf_conntrack_expect_fini(struct net *net)
663{
664	exp_proc_remove(net);
665	if (net_eq(net, &init_net)) {
666		rcu_barrier(); /* Wait for call_rcu() before destroy */
667		kmem_cache_destroy(nf_ct_expect_cachep);
668	}
669	nf_ct_free_hashtable(net->ct.expect_hash, net->ct.expect_vmalloc,
670			     nf_ct_expect_hsize);
671}
672