nf_conntrack_expect.c revision 54b07dca68557b0952585b5f4834cd0dd86eba35
1/* Expectation handling for nf_conntrack. */
2
3/* (C) 1999-2001 Paul `Rusty' Russell
4 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
5 * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11
12#include <linux/types.h>
13#include <linux/netfilter.h>
14#include <linux/skbuff.h>
15#include <linux/proc_fs.h>
16#include <linux/seq_file.h>
17#include <linux/stddef.h>
18#include <linux/slab.h>
19#include <linux/err.h>
20#include <linux/percpu.h>
21#include <linux/kernel.h>
22#include <linux/jhash.h>
23#include <linux/moduleparam.h>
24#include <linux/export.h>
25#include <net/net_namespace.h>
26
27#include <net/netfilter/nf_conntrack.h>
28#include <net/netfilter/nf_conntrack_core.h>
29#include <net/netfilter/nf_conntrack_expect.h>
30#include <net/netfilter/nf_conntrack_helper.h>
31#include <net/netfilter/nf_conntrack_tuple.h>
32#include <net/netfilter/nf_conntrack_zones.h>
33
34unsigned int nf_ct_expect_hsize __read_mostly;
35EXPORT_SYMBOL_GPL(nf_ct_expect_hsize);
36
37unsigned int nf_ct_expect_max __read_mostly;
38
39static struct kmem_cache *nf_ct_expect_cachep __read_mostly;
40
41/* nf_conntrack_expect helper functions */
42void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp,
43				u32 pid, int report)
44{
45	struct nf_conn_help *master_help = nfct_help(exp->master);
46	struct net *net = nf_ct_exp_net(exp);
47
48	NF_CT_ASSERT(master_help);
49	NF_CT_ASSERT(!timer_pending(&exp->timeout));
50
51	hlist_del_rcu(&exp->hnode);
52	net->ct.expect_count--;
53
54	hlist_del(&exp->lnode);
55	master_help->expecting[exp->class]--;
56
57	nf_ct_expect_event_report(IPEXP_DESTROY, exp, pid, report);
58	nf_ct_expect_put(exp);
59
60	NF_CT_STAT_INC(net, expect_delete);
61}
62EXPORT_SYMBOL_GPL(nf_ct_unlink_expect_report);
63
64static void nf_ct_expectation_timed_out(unsigned long ul_expect)
65{
66	struct nf_conntrack_expect *exp = (void *)ul_expect;
67
68	spin_lock_bh(&nf_conntrack_lock);
69	nf_ct_unlink_expect(exp);
70	spin_unlock_bh(&nf_conntrack_lock);
71	nf_ct_expect_put(exp);
72}
73
74static unsigned int nf_ct_expect_dst_hash(const struct nf_conntrack_tuple *tuple)
75{
76	unsigned int hash;
77
78	if (unlikely(!nf_conntrack_hash_rnd)) {
79		init_nf_conntrack_hash_rnd();
80	}
81
82	hash = jhash2(tuple->dst.u3.all, ARRAY_SIZE(tuple->dst.u3.all),
83		      (((tuple->dst.protonum ^ tuple->src.l3num) << 16) |
84		       (__force __u16)tuple->dst.u.all) ^ nf_conntrack_hash_rnd);
85	return ((u64)hash * nf_ct_expect_hsize) >> 32;
86}
87
88struct nf_conntrack_expect *
89__nf_ct_expect_find(struct net *net, u16 zone,
90		    const struct nf_conntrack_tuple *tuple)
91{
92	struct nf_conntrack_expect *i;
93	struct hlist_node *n;
94	unsigned int h;
95
96	if (!net->ct.expect_count)
97		return NULL;
98
99	h = nf_ct_expect_dst_hash(tuple);
100	hlist_for_each_entry_rcu(i, n, &net->ct.expect_hash[h], hnode) {
101		if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) &&
102		    nf_ct_zone(i->master) == zone)
103			return i;
104	}
105	return NULL;
106}
107EXPORT_SYMBOL_GPL(__nf_ct_expect_find);
108
109/* Just find a expectation corresponding to a tuple. */
110struct nf_conntrack_expect *
111nf_ct_expect_find_get(struct net *net, u16 zone,
112		      const struct nf_conntrack_tuple *tuple)
113{
114	struct nf_conntrack_expect *i;
115
116	rcu_read_lock();
117	i = __nf_ct_expect_find(net, zone, tuple);
118	if (i && !atomic_inc_not_zero(&i->use))
119		i = NULL;
120	rcu_read_unlock();
121
122	return i;
123}
124EXPORT_SYMBOL_GPL(nf_ct_expect_find_get);
125
126/* If an expectation for this connection is found, it gets delete from
127 * global list then returned. */
128struct nf_conntrack_expect *
129nf_ct_find_expectation(struct net *net, u16 zone,
130		       const struct nf_conntrack_tuple *tuple)
131{
132	struct nf_conntrack_expect *i, *exp = NULL;
133	struct hlist_node *n;
134	unsigned int h;
135
136	if (!net->ct.expect_count)
137		return NULL;
138
139	h = nf_ct_expect_dst_hash(tuple);
140	hlist_for_each_entry(i, n, &net->ct.expect_hash[h], hnode) {
141		if (!(i->flags & NF_CT_EXPECT_INACTIVE) &&
142		    nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) &&
143		    nf_ct_zone(i->master) == zone) {
144			exp = i;
145			break;
146		}
147	}
148	if (!exp)
149		return NULL;
150
151	/* If master is not in hash table yet (ie. packet hasn't left
152	   this machine yet), how can other end know about expected?
153	   Hence these are not the droids you are looking for (if
154	   master ct never got confirmed, we'd hold a reference to it
155	   and weird things would happen to future packets). */
156	if (!nf_ct_is_confirmed(exp->master))
157		return NULL;
158
159	if (exp->flags & NF_CT_EXPECT_PERMANENT) {
160		atomic_inc(&exp->use);
161		return exp;
162	} else if (del_timer(&exp->timeout)) {
163		nf_ct_unlink_expect(exp);
164		return exp;
165	}
166
167	return NULL;
168}
169
170/* delete all expectations for this conntrack */
171void nf_ct_remove_expectations(struct nf_conn *ct)
172{
173	struct nf_conn_help *help = nfct_help(ct);
174	struct nf_conntrack_expect *exp;
175	struct hlist_node *n, *next;
176
177	/* Optimization: most connection never expect any others. */
178	if (!help)
179		return;
180
181	hlist_for_each_entry_safe(exp, n, next, &help->expectations, lnode) {
182		if (del_timer(&exp->timeout)) {
183			nf_ct_unlink_expect(exp);
184			nf_ct_expect_put(exp);
185		}
186	}
187}
188EXPORT_SYMBOL_GPL(nf_ct_remove_expectations);
189
190/* Would two expected things clash? */
191static inline int expect_clash(const struct nf_conntrack_expect *a,
192			       const struct nf_conntrack_expect *b)
193{
194	/* Part covered by intersection of masks must be unequal,
195	   otherwise they clash */
196	struct nf_conntrack_tuple_mask intersect_mask;
197	int count;
198
199	intersect_mask.src.u.all = a->mask.src.u.all & b->mask.src.u.all;
200
201	for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){
202		intersect_mask.src.u3.all[count] =
203			a->mask.src.u3.all[count] & b->mask.src.u3.all[count];
204	}
205
206	return nf_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask);
207}
208
209static inline int expect_matches(const struct nf_conntrack_expect *a,
210				 const struct nf_conntrack_expect *b)
211{
212	return a->master == b->master && a->class == b->class &&
213		nf_ct_tuple_equal(&a->tuple, &b->tuple) &&
214		nf_ct_tuple_mask_equal(&a->mask, &b->mask) &&
215		nf_ct_zone(a->master) == nf_ct_zone(b->master);
216}
217
218/* Generally a bad idea to call this: could have matched already. */
219void nf_ct_unexpect_related(struct nf_conntrack_expect *exp)
220{
221	spin_lock_bh(&nf_conntrack_lock);
222	if (del_timer(&exp->timeout)) {
223		nf_ct_unlink_expect(exp);
224		nf_ct_expect_put(exp);
225	}
226	spin_unlock_bh(&nf_conntrack_lock);
227}
228EXPORT_SYMBOL_GPL(nf_ct_unexpect_related);
229
230/* We don't increase the master conntrack refcount for non-fulfilled
231 * conntracks. During the conntrack destruction, the expectations are
232 * always killed before the conntrack itself */
233struct nf_conntrack_expect *nf_ct_expect_alloc(struct nf_conn *me)
234{
235	struct nf_conntrack_expect *new;
236
237	new = kmem_cache_alloc(nf_ct_expect_cachep, GFP_ATOMIC);
238	if (!new)
239		return NULL;
240
241	new->master = me;
242	atomic_set(&new->use, 1);
243	return new;
244}
245EXPORT_SYMBOL_GPL(nf_ct_expect_alloc);
246
247void nf_ct_expect_init(struct nf_conntrack_expect *exp, unsigned int class,
248		       u_int8_t family,
249		       const union nf_inet_addr *saddr,
250		       const union nf_inet_addr *daddr,
251		       u_int8_t proto, const __be16 *src, const __be16 *dst)
252{
253	int len;
254
255	if (family == AF_INET)
256		len = 4;
257	else
258		len = 16;
259
260	exp->flags = 0;
261	exp->class = class;
262	exp->expectfn = NULL;
263	exp->helper = NULL;
264	exp->tuple.src.l3num = family;
265	exp->tuple.dst.protonum = proto;
266
267	if (saddr) {
268		memcpy(&exp->tuple.src.u3, saddr, len);
269		if (sizeof(exp->tuple.src.u3) > len)
270			/* address needs to be cleared for nf_ct_tuple_equal */
271			memset((void *)&exp->tuple.src.u3 + len, 0x00,
272			       sizeof(exp->tuple.src.u3) - len);
273		memset(&exp->mask.src.u3, 0xFF, len);
274		if (sizeof(exp->mask.src.u3) > len)
275			memset((void *)&exp->mask.src.u3 + len, 0x00,
276			       sizeof(exp->mask.src.u3) - len);
277	} else {
278		memset(&exp->tuple.src.u3, 0x00, sizeof(exp->tuple.src.u3));
279		memset(&exp->mask.src.u3, 0x00, sizeof(exp->mask.src.u3));
280	}
281
282	if (src) {
283		exp->tuple.src.u.all = *src;
284		exp->mask.src.u.all = htons(0xFFFF);
285	} else {
286		exp->tuple.src.u.all = 0;
287		exp->mask.src.u.all = 0;
288	}
289
290	memcpy(&exp->tuple.dst.u3, daddr, len);
291	if (sizeof(exp->tuple.dst.u3) > len)
292		/* address needs to be cleared for nf_ct_tuple_equal */
293		memset((void *)&exp->tuple.dst.u3 + len, 0x00,
294		       sizeof(exp->tuple.dst.u3) - len);
295
296	exp->tuple.dst.u.all = *dst;
297}
298EXPORT_SYMBOL_GPL(nf_ct_expect_init);
299
300static void nf_ct_expect_free_rcu(struct rcu_head *head)
301{
302	struct nf_conntrack_expect *exp;
303
304	exp = container_of(head, struct nf_conntrack_expect, rcu);
305	kmem_cache_free(nf_ct_expect_cachep, exp);
306}
307
308void nf_ct_expect_put(struct nf_conntrack_expect *exp)
309{
310	if (atomic_dec_and_test(&exp->use))
311		call_rcu(&exp->rcu, nf_ct_expect_free_rcu);
312}
313EXPORT_SYMBOL_GPL(nf_ct_expect_put);
314
315static int nf_ct_expect_insert(struct nf_conntrack_expect *exp)
316{
317	struct nf_conn_help *master_help = nfct_help(exp->master);
318	struct nf_conntrack_helper *helper;
319	struct net *net = nf_ct_exp_net(exp);
320	unsigned int h = nf_ct_expect_dst_hash(&exp->tuple);
321
322	/* two references : one for hash insert, one for the timer */
323	atomic_add(2, &exp->use);
324
325	hlist_add_head(&exp->lnode, &master_help->expectations);
326	master_help->expecting[exp->class]++;
327
328	hlist_add_head_rcu(&exp->hnode, &net->ct.expect_hash[h]);
329	net->ct.expect_count++;
330
331	setup_timer(&exp->timeout, nf_ct_expectation_timed_out,
332		    (unsigned long)exp);
333	helper = rcu_dereference_protected(master_help->helper,
334					   lockdep_is_held(&nf_conntrack_lock));
335	if (helper) {
336		exp->timeout.expires = jiffies +
337			helper->expect_policy[exp->class].timeout * HZ;
338	}
339	add_timer(&exp->timeout);
340
341	NF_CT_STAT_INC(net, expect_create);
342	return 0;
343}
344
345/* Race with expectations being used means we could have none to find; OK. */
346static void evict_oldest_expect(struct nf_conn *master,
347				struct nf_conntrack_expect *new)
348{
349	struct nf_conn_help *master_help = nfct_help(master);
350	struct nf_conntrack_expect *exp, *last = NULL;
351	struct hlist_node *n;
352
353	hlist_for_each_entry(exp, n, &master_help->expectations, lnode) {
354		if (exp->class == new->class)
355			last = exp;
356	}
357
358	if (last && del_timer(&last->timeout)) {
359		nf_ct_unlink_expect(last);
360		nf_ct_expect_put(last);
361	}
362}
363
364static inline int refresh_timer(struct nf_conntrack_expect *i)
365{
366	struct nf_conn_help *master_help = nfct_help(i->master);
367	const struct nf_conntrack_expect_policy *p;
368
369	if (!del_timer(&i->timeout))
370		return 0;
371
372	p = &rcu_dereference_protected(
373		master_help->helper,
374		lockdep_is_held(&nf_conntrack_lock)
375		)->expect_policy[i->class];
376	i->timeout.expires = jiffies + p->timeout * HZ;
377	add_timer(&i->timeout);
378	return 1;
379}
380
381static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect)
382{
383	const struct nf_conntrack_expect_policy *p;
384	struct nf_conntrack_expect *i;
385	struct nf_conn *master = expect->master;
386	struct nf_conn_help *master_help = nfct_help(master);
387	struct nf_conntrack_helper *helper;
388	struct net *net = nf_ct_exp_net(expect);
389	struct hlist_node *n;
390	unsigned int h;
391	int ret = 1;
392
393	if (!master_help) {
394		ret = -ESHUTDOWN;
395		goto out;
396	}
397	h = nf_ct_expect_dst_hash(&expect->tuple);
398	hlist_for_each_entry(i, n, &net->ct.expect_hash[h], hnode) {
399		if (expect_matches(i, expect)) {
400			/* Refresh timer: if it's dying, ignore.. */
401			if (refresh_timer(i)) {
402				ret = 0;
403				goto out;
404			}
405		} else if (expect_clash(i, expect)) {
406			ret = -EBUSY;
407			goto out;
408		}
409	}
410	/* Will be over limit? */
411	helper = rcu_dereference_protected(master_help->helper,
412					   lockdep_is_held(&nf_conntrack_lock));
413	if (helper) {
414		p = &helper->expect_policy[expect->class];
415		if (p->max_expected &&
416		    master_help->expecting[expect->class] >= p->max_expected) {
417			evict_oldest_expect(master, expect);
418			if (master_help->expecting[expect->class]
419						>= p->max_expected) {
420				ret = -EMFILE;
421				goto out;
422			}
423		}
424	}
425
426	if (net->ct.expect_count >= nf_ct_expect_max) {
427		if (net_ratelimit())
428			printk(KERN_WARNING
429			       "nf_conntrack: expectation table full\n");
430		ret = -EMFILE;
431	}
432out:
433	return ret;
434}
435
436int nf_ct_expect_related_report(struct nf_conntrack_expect *expect,
437				u32 pid, int report)
438{
439	int ret;
440
441	spin_lock_bh(&nf_conntrack_lock);
442	ret = __nf_ct_expect_check(expect);
443	if (ret <= 0)
444		goto out;
445
446	ret = nf_ct_expect_insert(expect);
447	if (ret < 0)
448		goto out;
449	spin_unlock_bh(&nf_conntrack_lock);
450	nf_ct_expect_event_report(IPEXP_NEW, expect, pid, report);
451	return ret;
452out:
453	spin_unlock_bh(&nf_conntrack_lock);
454	return ret;
455}
456EXPORT_SYMBOL_GPL(nf_ct_expect_related_report);
457
458#ifdef CONFIG_NF_CONNTRACK_PROCFS
459struct ct_expect_iter_state {
460	struct seq_net_private p;
461	unsigned int bucket;
462};
463
464static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
465{
466	struct net *net = seq_file_net(seq);
467	struct ct_expect_iter_state *st = seq->private;
468	struct hlist_node *n;
469
470	for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) {
471		n = rcu_dereference(hlist_first_rcu(&net->ct.expect_hash[st->bucket]));
472		if (n)
473			return n;
474	}
475	return NULL;
476}
477
478static struct hlist_node *ct_expect_get_next(struct seq_file *seq,
479					     struct hlist_node *head)
480{
481	struct net *net = seq_file_net(seq);
482	struct ct_expect_iter_state *st = seq->private;
483
484	head = rcu_dereference(hlist_next_rcu(head));
485	while (head == NULL) {
486		if (++st->bucket >= nf_ct_expect_hsize)
487			return NULL;
488		head = rcu_dereference(hlist_first_rcu(&net->ct.expect_hash[st->bucket]));
489	}
490	return head;
491}
492
493static struct hlist_node *ct_expect_get_idx(struct seq_file *seq, loff_t pos)
494{
495	struct hlist_node *head = ct_expect_get_first(seq);
496
497	if (head)
498		while (pos && (head = ct_expect_get_next(seq, head)))
499			pos--;
500	return pos ? NULL : head;
501}
502
503static void *exp_seq_start(struct seq_file *seq, loff_t *pos)
504	__acquires(RCU)
505{
506	rcu_read_lock();
507	return ct_expect_get_idx(seq, *pos);
508}
509
510static void *exp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
511{
512	(*pos)++;
513	return ct_expect_get_next(seq, v);
514}
515
516static void exp_seq_stop(struct seq_file *seq, void *v)
517	__releases(RCU)
518{
519	rcu_read_unlock();
520}
521
522static int exp_seq_show(struct seq_file *s, void *v)
523{
524	struct nf_conntrack_expect *expect;
525	struct nf_conntrack_helper *helper;
526	struct hlist_node *n = v;
527	char *delim = "";
528
529	expect = hlist_entry(n, struct nf_conntrack_expect, hnode);
530
531	if (expect->timeout.function)
532		seq_printf(s, "%ld ", timer_pending(&expect->timeout)
533			   ? (long)(expect->timeout.expires - jiffies)/HZ : 0);
534	else
535		seq_printf(s, "- ");
536	seq_printf(s, "l3proto = %u proto=%u ",
537		   expect->tuple.src.l3num,
538		   expect->tuple.dst.protonum);
539	print_tuple(s, &expect->tuple,
540		    __nf_ct_l3proto_find(expect->tuple.src.l3num),
541		    __nf_ct_l4proto_find(expect->tuple.src.l3num,
542				       expect->tuple.dst.protonum));
543
544	if (expect->flags & NF_CT_EXPECT_PERMANENT) {
545		seq_printf(s, "PERMANENT");
546		delim = ",";
547	}
548	if (expect->flags & NF_CT_EXPECT_INACTIVE) {
549		seq_printf(s, "%sINACTIVE", delim);
550		delim = ",";
551	}
552	if (expect->flags & NF_CT_EXPECT_USERSPACE)
553		seq_printf(s, "%sUSERSPACE", delim);
554
555	helper = rcu_dereference(nfct_help(expect->master)->helper);
556	if (helper) {
557		seq_printf(s, "%s%s", expect->flags ? " " : "", helper->name);
558		if (helper->expect_policy[expect->class].name)
559			seq_printf(s, "/%s",
560				   helper->expect_policy[expect->class].name);
561	}
562
563	return seq_putc(s, '\n');
564}
565
566static const struct seq_operations exp_seq_ops = {
567	.start = exp_seq_start,
568	.next = exp_seq_next,
569	.stop = exp_seq_stop,
570	.show = exp_seq_show
571};
572
573static int exp_open(struct inode *inode, struct file *file)
574{
575	return seq_open_net(inode, file, &exp_seq_ops,
576			sizeof(struct ct_expect_iter_state));
577}
578
579static const struct file_operations exp_file_ops = {
580	.owner   = THIS_MODULE,
581	.open    = exp_open,
582	.read    = seq_read,
583	.llseek  = seq_lseek,
584	.release = seq_release_net,
585};
586#endif /* CONFIG_NF_CONNTRACK_PROCFS */
587
588static int exp_proc_init(struct net *net)
589{
590#ifdef CONFIG_NF_CONNTRACK_PROCFS
591	struct proc_dir_entry *proc;
592
593	proc = proc_net_fops_create(net, "nf_conntrack_expect", 0440, &exp_file_ops);
594	if (!proc)
595		return -ENOMEM;
596#endif /* CONFIG_NF_CONNTRACK_PROCFS */
597	return 0;
598}
599
600static void exp_proc_remove(struct net *net)
601{
602#ifdef CONFIG_NF_CONNTRACK_PROCFS
603	proc_net_remove(net, "nf_conntrack_expect");
604#endif /* CONFIG_NF_CONNTRACK_PROCFS */
605}
606
607module_param_named(expect_hashsize, nf_ct_expect_hsize, uint, 0400);
608
609int nf_conntrack_expect_init(struct net *net)
610{
611	int err = -ENOMEM;
612
613	if (net_eq(net, &init_net)) {
614		if (!nf_ct_expect_hsize) {
615			nf_ct_expect_hsize = net->ct.htable_size / 256;
616			if (!nf_ct_expect_hsize)
617				nf_ct_expect_hsize = 1;
618		}
619		nf_ct_expect_max = nf_ct_expect_hsize * 4;
620	}
621
622	net->ct.expect_count = 0;
623	net->ct.expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize, 0);
624	if (net->ct.expect_hash == NULL)
625		goto err1;
626
627	if (net_eq(net, &init_net)) {
628		nf_ct_expect_cachep = kmem_cache_create("nf_conntrack_expect",
629					sizeof(struct nf_conntrack_expect),
630					0, 0, NULL);
631		if (!nf_ct_expect_cachep)
632			goto err2;
633	}
634
635	err = exp_proc_init(net);
636	if (err < 0)
637		goto err3;
638
639	return 0;
640
641err3:
642	if (net_eq(net, &init_net))
643		kmem_cache_destroy(nf_ct_expect_cachep);
644err2:
645	nf_ct_free_hashtable(net->ct.expect_hash, nf_ct_expect_hsize);
646err1:
647	return err;
648}
649
650void nf_conntrack_expect_fini(struct net *net)
651{
652	exp_proc_remove(net);
653	if (net_eq(net, &init_net)) {
654		rcu_barrier(); /* Wait for call_rcu() before destroy */
655		kmem_cache_destroy(nf_ct_expect_cachep);
656	}
657	nf_ct_free_hashtable(net->ct.expect_hash, nf_ct_expect_hsize);
658}
659