1/* Expectation handling for nf_conntrack. */
2
3/* (C) 1999-2001 Paul `Rusty' Russell
4 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
5 * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
6 * (c) 2005-2012 Patrick McHardy <kaber@trash.net>
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation.
11 */
12
13#include <linux/types.h>
14#include <linux/netfilter.h>
15#include <linux/skbuff.h>
16#include <linux/proc_fs.h>
17#include <linux/seq_file.h>
18#include <linux/stddef.h>
19#include <linux/slab.h>
20#include <linux/err.h>
21#include <linux/percpu.h>
22#include <linux/kernel.h>
23#include <linux/jhash.h>
24#include <linux/moduleparam.h>
25#include <linux/export.h>
26#include <net/net_namespace.h>
27
28#include <net/netfilter/nf_conntrack.h>
29#include <net/netfilter/nf_conntrack_core.h>
30#include <net/netfilter/nf_conntrack_expect.h>
31#include <net/netfilter/nf_conntrack_helper.h>
32#include <net/netfilter/nf_conntrack_tuple.h>
33#include <net/netfilter/nf_conntrack_zones.h>
34
35unsigned int nf_ct_expect_hsize __read_mostly;
36EXPORT_SYMBOL_GPL(nf_ct_expect_hsize);
37
38unsigned int nf_ct_expect_max __read_mostly;
39
40static struct kmem_cache *nf_ct_expect_cachep __read_mostly;
41
42/* nf_conntrack_expect helper functions */
43void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp,
44				u32 portid, int report)
45{
46	struct nf_conn_help *master_help = nfct_help(exp->master);
47	struct net *net = nf_ct_exp_net(exp);
48
49	NF_CT_ASSERT(master_help);
50	NF_CT_ASSERT(!timer_pending(&exp->timeout));
51
52	hlist_del_rcu(&exp->hnode);
53	net->ct.expect_count--;
54
55	hlist_del(&exp->lnode);
56	master_help->expecting[exp->class]--;
57
58	nf_ct_expect_event_report(IPEXP_DESTROY, exp, portid, report);
59	nf_ct_expect_put(exp);
60
61	NF_CT_STAT_INC(net, expect_delete);
62}
63EXPORT_SYMBOL_GPL(nf_ct_unlink_expect_report);
64
65static void nf_ct_expectation_timed_out(unsigned long ul_expect)
66{
67	struct nf_conntrack_expect *exp = (void *)ul_expect;
68
69	spin_lock_bh(&nf_conntrack_lock);
70	nf_ct_unlink_expect(exp);
71	spin_unlock_bh(&nf_conntrack_lock);
72	nf_ct_expect_put(exp);
73}
74
75static unsigned int nf_ct_expect_dst_hash(const struct nf_conntrack_tuple *tuple)
76{
77	unsigned int hash;
78
79	if (unlikely(!nf_conntrack_hash_rnd)) {
80		init_nf_conntrack_hash_rnd();
81	}
82
83	hash = jhash2(tuple->dst.u3.all, ARRAY_SIZE(tuple->dst.u3.all),
84		      (((tuple->dst.protonum ^ tuple->src.l3num) << 16) |
85		       (__force __u16)tuple->dst.u.all) ^ nf_conntrack_hash_rnd);
86	return ((u64)hash * nf_ct_expect_hsize) >> 32;
87}
88
89struct nf_conntrack_expect *
90__nf_ct_expect_find(struct net *net, u16 zone,
91		    const struct nf_conntrack_tuple *tuple)
92{
93	struct nf_conntrack_expect *i;
94	unsigned int h;
95
96	if (!net->ct.expect_count)
97		return NULL;
98
99	h = nf_ct_expect_dst_hash(tuple);
100	hlist_for_each_entry_rcu(i, &net->ct.expect_hash[h], hnode) {
101		if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) &&
102		    nf_ct_zone(i->master) == zone)
103			return i;
104	}
105	return NULL;
106}
107EXPORT_SYMBOL_GPL(__nf_ct_expect_find);
108
109/* Just find a expectation corresponding to a tuple. */
110struct nf_conntrack_expect *
111nf_ct_expect_find_get(struct net *net, u16 zone,
112		      const struct nf_conntrack_tuple *tuple)
113{
114	struct nf_conntrack_expect *i;
115
116	rcu_read_lock();
117	i = __nf_ct_expect_find(net, zone, tuple);
118	if (i && !atomic_inc_not_zero(&i->use))
119		i = NULL;
120	rcu_read_unlock();
121
122	return i;
123}
124EXPORT_SYMBOL_GPL(nf_ct_expect_find_get);
125
126/* If an expectation for this connection is found, it gets delete from
127 * global list then returned. */
128struct nf_conntrack_expect *
129nf_ct_find_expectation(struct net *net, u16 zone,
130		       const struct nf_conntrack_tuple *tuple)
131{
132	struct nf_conntrack_expect *i, *exp = NULL;
133	unsigned int h;
134
135	if (!net->ct.expect_count)
136		return NULL;
137
138	h = nf_ct_expect_dst_hash(tuple);
139	hlist_for_each_entry(i, &net->ct.expect_hash[h], hnode) {
140		if (!(i->flags & NF_CT_EXPECT_INACTIVE) &&
141		    nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) &&
142		    nf_ct_zone(i->master) == zone) {
143			exp = i;
144			break;
145		}
146	}
147	if (!exp)
148		return NULL;
149
150	/* If master is not in hash table yet (ie. packet hasn't left
151	   this machine yet), how can other end know about expected?
152	   Hence these are not the droids you are looking for (if
153	   master ct never got confirmed, we'd hold a reference to it
154	   and weird things would happen to future packets). */
155	if (!nf_ct_is_confirmed(exp->master))
156		return NULL;
157
158	if (exp->flags & NF_CT_EXPECT_PERMANENT) {
159		atomic_inc(&exp->use);
160		return exp;
161	} else if (del_timer(&exp->timeout)) {
162		nf_ct_unlink_expect(exp);
163		return exp;
164	}
165
166	return NULL;
167}
168
169/* delete all expectations for this conntrack */
170void nf_ct_remove_expectations(struct nf_conn *ct)
171{
172	struct nf_conn_help *help = nfct_help(ct);
173	struct nf_conntrack_expect *exp;
174	struct hlist_node *next;
175
176	/* Optimization: most connection never expect any others. */
177	if (!help)
178		return;
179
180	hlist_for_each_entry_safe(exp, next, &help->expectations, lnode) {
181		if (del_timer(&exp->timeout)) {
182			nf_ct_unlink_expect(exp);
183			nf_ct_expect_put(exp);
184		}
185	}
186}
187EXPORT_SYMBOL_GPL(nf_ct_remove_expectations);
188
189/* Would two expected things clash? */
190static inline int expect_clash(const struct nf_conntrack_expect *a,
191			       const struct nf_conntrack_expect *b)
192{
193	/* Part covered by intersection of masks must be unequal,
194	   otherwise they clash */
195	struct nf_conntrack_tuple_mask intersect_mask;
196	int count;
197
198	intersect_mask.src.u.all = a->mask.src.u.all & b->mask.src.u.all;
199
200	for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){
201		intersect_mask.src.u3.all[count] =
202			a->mask.src.u3.all[count] & b->mask.src.u3.all[count];
203	}
204
205	return nf_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask);
206}
207
208static inline int expect_matches(const struct nf_conntrack_expect *a,
209				 const struct nf_conntrack_expect *b)
210{
211	return a->master == b->master && a->class == b->class &&
212		nf_ct_tuple_equal(&a->tuple, &b->tuple) &&
213		nf_ct_tuple_mask_equal(&a->mask, &b->mask) &&
214		nf_ct_zone(a->master) == nf_ct_zone(b->master);
215}
216
217/* Generally a bad idea to call this: could have matched already. */
218void nf_ct_unexpect_related(struct nf_conntrack_expect *exp)
219{
220	spin_lock_bh(&nf_conntrack_lock);
221	if (del_timer(&exp->timeout)) {
222		nf_ct_unlink_expect(exp);
223		nf_ct_expect_put(exp);
224	}
225	spin_unlock_bh(&nf_conntrack_lock);
226}
227EXPORT_SYMBOL_GPL(nf_ct_unexpect_related);
228
229/* We don't increase the master conntrack refcount for non-fulfilled
230 * conntracks. During the conntrack destruction, the expectations are
231 * always killed before the conntrack itself */
232struct nf_conntrack_expect *nf_ct_expect_alloc(struct nf_conn *me)
233{
234	struct nf_conntrack_expect *new;
235
236	new = kmem_cache_alloc(nf_ct_expect_cachep, GFP_ATOMIC);
237	if (!new)
238		return NULL;
239
240	new->master = me;
241	atomic_set(&new->use, 1);
242	return new;
243}
244EXPORT_SYMBOL_GPL(nf_ct_expect_alloc);
245
246void nf_ct_expect_init(struct nf_conntrack_expect *exp, unsigned int class,
247		       u_int8_t family,
248		       const union nf_inet_addr *saddr,
249		       const union nf_inet_addr *daddr,
250		       u_int8_t proto, const __be16 *src, const __be16 *dst)
251{
252	int len;
253
254	if (family == AF_INET)
255		len = 4;
256	else
257		len = 16;
258
259	exp->flags = 0;
260	exp->class = class;
261	exp->expectfn = NULL;
262	exp->helper = NULL;
263	exp->tuple.src.l3num = family;
264	exp->tuple.dst.protonum = proto;
265
266	if (saddr) {
267		memcpy(&exp->tuple.src.u3, saddr, len);
268		if (sizeof(exp->tuple.src.u3) > len)
269			/* address needs to be cleared for nf_ct_tuple_equal */
270			memset((void *)&exp->tuple.src.u3 + len, 0x00,
271			       sizeof(exp->tuple.src.u3) - len);
272		memset(&exp->mask.src.u3, 0xFF, len);
273		if (sizeof(exp->mask.src.u3) > len)
274			memset((void *)&exp->mask.src.u3 + len, 0x00,
275			       sizeof(exp->mask.src.u3) - len);
276	} else {
277		memset(&exp->tuple.src.u3, 0x00, sizeof(exp->tuple.src.u3));
278		memset(&exp->mask.src.u3, 0x00, sizeof(exp->mask.src.u3));
279	}
280
281	if (src) {
282		exp->tuple.src.u.all = *src;
283		exp->mask.src.u.all = htons(0xFFFF);
284	} else {
285		exp->tuple.src.u.all = 0;
286		exp->mask.src.u.all = 0;
287	}
288
289	memcpy(&exp->tuple.dst.u3, daddr, len);
290	if (sizeof(exp->tuple.dst.u3) > len)
291		/* address needs to be cleared for nf_ct_tuple_equal */
292		memset((void *)&exp->tuple.dst.u3 + len, 0x00,
293		       sizeof(exp->tuple.dst.u3) - len);
294
295	exp->tuple.dst.u.all = *dst;
296}
297EXPORT_SYMBOL_GPL(nf_ct_expect_init);
298
299static void nf_ct_expect_free_rcu(struct rcu_head *head)
300{
301	struct nf_conntrack_expect *exp;
302
303	exp = container_of(head, struct nf_conntrack_expect, rcu);
304	kmem_cache_free(nf_ct_expect_cachep, exp);
305}
306
307void nf_ct_expect_put(struct nf_conntrack_expect *exp)
308{
309	if (atomic_dec_and_test(&exp->use))
310		call_rcu(&exp->rcu, nf_ct_expect_free_rcu);
311}
312EXPORT_SYMBOL_GPL(nf_ct_expect_put);
313
314static int nf_ct_expect_insert(struct nf_conntrack_expect *exp)
315{
316	struct nf_conn_help *master_help = nfct_help(exp->master);
317	struct nf_conntrack_helper *helper;
318	struct net *net = nf_ct_exp_net(exp);
319	unsigned int h = nf_ct_expect_dst_hash(&exp->tuple);
320
321	/* two references : one for hash insert, one for the timer */
322	atomic_add(2, &exp->use);
323
324	hlist_add_head(&exp->lnode, &master_help->expectations);
325	master_help->expecting[exp->class]++;
326
327	hlist_add_head_rcu(&exp->hnode, &net->ct.expect_hash[h]);
328	net->ct.expect_count++;
329
330	setup_timer(&exp->timeout, nf_ct_expectation_timed_out,
331		    (unsigned long)exp);
332	helper = rcu_dereference_protected(master_help->helper,
333					   lockdep_is_held(&nf_conntrack_lock));
334	if (helper) {
335		exp->timeout.expires = jiffies +
336			helper->expect_policy[exp->class].timeout * HZ;
337	}
338	add_timer(&exp->timeout);
339
340	NF_CT_STAT_INC(net, expect_create);
341	return 0;
342}
343
344/* Race with expectations being used means we could have none to find; OK. */
345static void evict_oldest_expect(struct nf_conn *master,
346				struct nf_conntrack_expect *new)
347{
348	struct nf_conn_help *master_help = nfct_help(master);
349	struct nf_conntrack_expect *exp, *last = NULL;
350
351	hlist_for_each_entry(exp, &master_help->expectations, lnode) {
352		if (exp->class == new->class)
353			last = exp;
354	}
355
356	if (last && del_timer(&last->timeout)) {
357		nf_ct_unlink_expect(last);
358		nf_ct_expect_put(last);
359	}
360}
361
362static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect)
363{
364	const struct nf_conntrack_expect_policy *p;
365	struct nf_conntrack_expect *i;
366	struct nf_conn *master = expect->master;
367	struct nf_conn_help *master_help = nfct_help(master);
368	struct nf_conntrack_helper *helper;
369	struct net *net = nf_ct_exp_net(expect);
370	struct hlist_node *next;
371	unsigned int h;
372	int ret = 1;
373
374	if (!master_help) {
375		ret = -ESHUTDOWN;
376		goto out;
377	}
378	h = nf_ct_expect_dst_hash(&expect->tuple);
379	hlist_for_each_entry_safe(i, next, &net->ct.expect_hash[h], hnode) {
380		if (expect_matches(i, expect)) {
381			if (del_timer(&i->timeout)) {
382				nf_ct_unlink_expect(i);
383				nf_ct_expect_put(i);
384				break;
385			}
386		} else if (expect_clash(i, expect)) {
387			ret = -EBUSY;
388			goto out;
389		}
390	}
391	/* Will be over limit? */
392	helper = rcu_dereference_protected(master_help->helper,
393					   lockdep_is_held(&nf_conntrack_lock));
394	if (helper) {
395		p = &helper->expect_policy[expect->class];
396		if (p->max_expected &&
397		    master_help->expecting[expect->class] >= p->max_expected) {
398			evict_oldest_expect(master, expect);
399			if (master_help->expecting[expect->class]
400						>= p->max_expected) {
401				ret = -EMFILE;
402				goto out;
403			}
404		}
405	}
406
407	if (net->ct.expect_count >= nf_ct_expect_max) {
408		net_warn_ratelimited("nf_conntrack: expectation table full\n");
409		ret = -EMFILE;
410	}
411out:
412	return ret;
413}
414
415int nf_ct_expect_related_report(struct nf_conntrack_expect *expect,
416				u32 portid, int report)
417{
418	int ret;
419
420	spin_lock_bh(&nf_conntrack_lock);
421	ret = __nf_ct_expect_check(expect);
422	if (ret <= 0)
423		goto out;
424
425	ret = nf_ct_expect_insert(expect);
426	if (ret < 0)
427		goto out;
428	spin_unlock_bh(&nf_conntrack_lock);
429	nf_ct_expect_event_report(IPEXP_NEW, expect, portid, report);
430	return ret;
431out:
432	spin_unlock_bh(&nf_conntrack_lock);
433	return ret;
434}
435EXPORT_SYMBOL_GPL(nf_ct_expect_related_report);
436
437#ifdef CONFIG_NF_CONNTRACK_PROCFS
438struct ct_expect_iter_state {
439	struct seq_net_private p;
440	unsigned int bucket;
441};
442
443static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
444{
445	struct net *net = seq_file_net(seq);
446	struct ct_expect_iter_state *st = seq->private;
447	struct hlist_node *n;
448
449	for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) {
450		n = rcu_dereference(hlist_first_rcu(&net->ct.expect_hash[st->bucket]));
451		if (n)
452			return n;
453	}
454	return NULL;
455}
456
457static struct hlist_node *ct_expect_get_next(struct seq_file *seq,
458					     struct hlist_node *head)
459{
460	struct net *net = seq_file_net(seq);
461	struct ct_expect_iter_state *st = seq->private;
462
463	head = rcu_dereference(hlist_next_rcu(head));
464	while (head == NULL) {
465		if (++st->bucket >= nf_ct_expect_hsize)
466			return NULL;
467		head = rcu_dereference(hlist_first_rcu(&net->ct.expect_hash[st->bucket]));
468	}
469	return head;
470}
471
472static struct hlist_node *ct_expect_get_idx(struct seq_file *seq, loff_t pos)
473{
474	struct hlist_node *head = ct_expect_get_first(seq);
475
476	if (head)
477		while (pos && (head = ct_expect_get_next(seq, head)))
478			pos--;
479	return pos ? NULL : head;
480}
481
482static void *exp_seq_start(struct seq_file *seq, loff_t *pos)
483	__acquires(RCU)
484{
485	rcu_read_lock();
486	return ct_expect_get_idx(seq, *pos);
487}
488
489static void *exp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
490{
491	(*pos)++;
492	return ct_expect_get_next(seq, v);
493}
494
495static void exp_seq_stop(struct seq_file *seq, void *v)
496	__releases(RCU)
497{
498	rcu_read_unlock();
499}
500
501static int exp_seq_show(struct seq_file *s, void *v)
502{
503	struct nf_conntrack_expect *expect;
504	struct nf_conntrack_helper *helper;
505	struct hlist_node *n = v;
506	char *delim = "";
507
508	expect = hlist_entry(n, struct nf_conntrack_expect, hnode);
509
510	if (expect->timeout.function)
511		seq_printf(s, "%ld ", timer_pending(&expect->timeout)
512			   ? (long)(expect->timeout.expires - jiffies)/HZ : 0);
513	else
514		seq_printf(s, "- ");
515	seq_printf(s, "l3proto = %u proto=%u ",
516		   expect->tuple.src.l3num,
517		   expect->tuple.dst.protonum);
518	print_tuple(s, &expect->tuple,
519		    __nf_ct_l3proto_find(expect->tuple.src.l3num),
520		    __nf_ct_l4proto_find(expect->tuple.src.l3num,
521				       expect->tuple.dst.protonum));
522
523	if (expect->flags & NF_CT_EXPECT_PERMANENT) {
524		seq_printf(s, "PERMANENT");
525		delim = ",";
526	}
527	if (expect->flags & NF_CT_EXPECT_INACTIVE) {
528		seq_printf(s, "%sINACTIVE", delim);
529		delim = ",";
530	}
531	if (expect->flags & NF_CT_EXPECT_USERSPACE)
532		seq_printf(s, "%sUSERSPACE", delim);
533
534	helper = rcu_dereference(nfct_help(expect->master)->helper);
535	if (helper) {
536		seq_printf(s, "%s%s", expect->flags ? " " : "", helper->name);
537		if (helper->expect_policy[expect->class].name)
538			seq_printf(s, "/%s",
539				   helper->expect_policy[expect->class].name);
540	}
541
542	return seq_putc(s, '\n');
543}
544
545static const struct seq_operations exp_seq_ops = {
546	.start = exp_seq_start,
547	.next = exp_seq_next,
548	.stop = exp_seq_stop,
549	.show = exp_seq_show
550};
551
552static int exp_open(struct inode *inode, struct file *file)
553{
554	return seq_open_net(inode, file, &exp_seq_ops,
555			sizeof(struct ct_expect_iter_state));
556}
557
558static const struct file_operations exp_file_ops = {
559	.owner   = THIS_MODULE,
560	.open    = exp_open,
561	.read    = seq_read,
562	.llseek  = seq_lseek,
563	.release = seq_release_net,
564};
565#endif /* CONFIG_NF_CONNTRACK_PROCFS */
566
567static int exp_proc_init(struct net *net)
568{
569#ifdef CONFIG_NF_CONNTRACK_PROCFS
570	struct proc_dir_entry *proc;
571
572	proc = proc_create("nf_conntrack_expect", 0440, net->proc_net,
573			   &exp_file_ops);
574	if (!proc)
575		return -ENOMEM;
576#endif /* CONFIG_NF_CONNTRACK_PROCFS */
577	return 0;
578}
579
580static void exp_proc_remove(struct net *net)
581{
582#ifdef CONFIG_NF_CONNTRACK_PROCFS
583	remove_proc_entry("nf_conntrack_expect", net->proc_net);
584#endif /* CONFIG_NF_CONNTRACK_PROCFS */
585}
586
587module_param_named(expect_hashsize, nf_ct_expect_hsize, uint, 0400);
588
589int nf_conntrack_expect_pernet_init(struct net *net)
590{
591	int err = -ENOMEM;
592
593	net->ct.expect_count = 0;
594	net->ct.expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize, 0);
595	if (net->ct.expect_hash == NULL)
596		goto err1;
597
598	err = exp_proc_init(net);
599	if (err < 0)
600		goto err2;
601
602	return 0;
603err2:
604	nf_ct_free_hashtable(net->ct.expect_hash, nf_ct_expect_hsize);
605err1:
606	return err;
607}
608
609void nf_conntrack_expect_pernet_fini(struct net *net)
610{
611	exp_proc_remove(net);
612	nf_ct_free_hashtable(net->ct.expect_hash, nf_ct_expect_hsize);
613}
614
615int nf_conntrack_expect_init(void)
616{
617	if (!nf_ct_expect_hsize) {
618		nf_ct_expect_hsize = nf_conntrack_htable_size / 256;
619		if (!nf_ct_expect_hsize)
620			nf_ct_expect_hsize = 1;
621	}
622	nf_ct_expect_max = nf_ct_expect_hsize * 4;
623	nf_ct_expect_cachep = kmem_cache_create("nf_conntrack_expect",
624				sizeof(struct nf_conntrack_expect),
625				0, 0, NULL);
626	if (!nf_ct_expect_cachep)
627		return -ENOMEM;
628	return 0;
629}
630
631void nf_conntrack_expect_fini(void)
632{
633	rcu_barrier(); /* Wait for call_rcu() before destroy */
634	kmem_cache_destroy(nf_ct_expect_cachep);
635}
636