xfrm_state.c revision 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2
1/*
2 * xfrm_state.c
3 *
4 * Changes:
5 *	Mitsuru KANDA @USAGI
6 * 	Kazunori MIYAZAWA @USAGI
7 * 	Kunihiro Ishiguro <kunihiro@ipinfusion.com>
8 * 		IPv6 support
9 * 	YOSHIFUJI Hideaki @USAGI
10 * 		Split up af-specific functions
11 *	Derek Atkins <derek@ihtfp.com>
12 *		Add UDP Encapsulation
13 *
14 */
15
16#include <linux/workqueue.h>
17#include <net/xfrm.h>
18#include <linux/pfkeyv2.h>
19#include <linux/ipsec.h>
20#include <linux/module.h>
21#include <asm/uaccess.h>
22
23/* Each xfrm_state may be linked to two tables:
24
25   1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl)
26   2. Hash table by daddr to find what SAs exist for given
27      destination/tunnel endpoint. (output)
28 */
29
30static DEFINE_SPINLOCK(xfrm_state_lock);
31
32/* Hash table to find appropriate SA towards given target (endpoint
33 * of tunnel or destination of transport mode) allowed by selector.
34 *
35 * Main use is finding SA after policy selected tunnel or transport mode.
36 * Also, it can be used by ah/esp icmp error handler to find offending SA.
37 */
38static struct list_head xfrm_state_bydst[XFRM_DST_HSIZE];
39static struct list_head xfrm_state_byspi[XFRM_DST_HSIZE];
40
41DECLARE_WAIT_QUEUE_HEAD(km_waitq);
42EXPORT_SYMBOL(km_waitq);
43
44static DEFINE_RWLOCK(xfrm_state_afinfo_lock);
45static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO];
46
47static struct work_struct xfrm_state_gc_work;
48static struct list_head xfrm_state_gc_list = LIST_HEAD_INIT(xfrm_state_gc_list);
49static DEFINE_SPINLOCK(xfrm_state_gc_lock);
50
51static int xfrm_state_gc_flush_bundles;
52
53static void __xfrm_state_delete(struct xfrm_state *x);
54
55static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family);
56static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo);
57
58static int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol);
59static void km_state_expired(struct xfrm_state *x, int hard);
60
61static void xfrm_state_gc_destroy(struct xfrm_state *x)
62{
63	if (del_timer(&x->timer))
64		BUG();
65	if (x->aalg)
66		kfree(x->aalg);
67	if (x->ealg)
68		kfree(x->ealg);
69	if (x->calg)
70		kfree(x->calg);
71	if (x->encap)
72		kfree(x->encap);
73	if (x->type) {
74		x->type->destructor(x);
75		xfrm_put_type(x->type);
76	}
77	kfree(x);
78}
79
80static void xfrm_state_gc_task(void *data)
81{
82	struct xfrm_state *x;
83	struct list_head *entry, *tmp;
84	struct list_head gc_list = LIST_HEAD_INIT(gc_list);
85
86	if (xfrm_state_gc_flush_bundles) {
87		xfrm_state_gc_flush_bundles = 0;
88		xfrm_flush_bundles();
89	}
90
91	spin_lock_bh(&xfrm_state_gc_lock);
92	list_splice_init(&xfrm_state_gc_list, &gc_list);
93	spin_unlock_bh(&xfrm_state_gc_lock);
94
95	list_for_each_safe(entry, tmp, &gc_list) {
96		x = list_entry(entry, struct xfrm_state, bydst);
97		xfrm_state_gc_destroy(x);
98	}
99	wake_up(&km_waitq);
100}
101
102static inline unsigned long make_jiffies(long secs)
103{
104	if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
105		return MAX_SCHEDULE_TIMEOUT-1;
106	else
107	        return secs*HZ;
108}
109
110static void xfrm_timer_handler(unsigned long data)
111{
112	struct xfrm_state *x = (struct xfrm_state*)data;
113	unsigned long now = (unsigned long)xtime.tv_sec;
114	long next = LONG_MAX;
115	int warn = 0;
116
117	spin_lock(&x->lock);
118	if (x->km.state == XFRM_STATE_DEAD)
119		goto out;
120	if (x->km.state == XFRM_STATE_EXPIRED)
121		goto expired;
122	if (x->lft.hard_add_expires_seconds) {
123		long tmo = x->lft.hard_add_expires_seconds +
124			x->curlft.add_time - now;
125		if (tmo <= 0)
126			goto expired;
127		if (tmo < next)
128			next = tmo;
129	}
130	if (x->lft.hard_use_expires_seconds) {
131		long tmo = x->lft.hard_use_expires_seconds +
132			(x->curlft.use_time ? : now) - now;
133		if (tmo <= 0)
134			goto expired;
135		if (tmo < next)
136			next = tmo;
137	}
138	if (x->km.dying)
139		goto resched;
140	if (x->lft.soft_add_expires_seconds) {
141		long tmo = x->lft.soft_add_expires_seconds +
142			x->curlft.add_time - now;
143		if (tmo <= 0)
144			warn = 1;
145		else if (tmo < next)
146			next = tmo;
147	}
148	if (x->lft.soft_use_expires_seconds) {
149		long tmo = x->lft.soft_use_expires_seconds +
150			(x->curlft.use_time ? : now) - now;
151		if (tmo <= 0)
152			warn = 1;
153		else if (tmo < next)
154			next = tmo;
155	}
156
157	if (warn)
158		km_state_expired(x, 0);
159resched:
160	if (next != LONG_MAX &&
161	    !mod_timer(&x->timer, jiffies + make_jiffies(next)))
162		xfrm_state_hold(x);
163	goto out;
164
165expired:
166	if (x->km.state == XFRM_STATE_ACQ && x->id.spi == 0) {
167		x->km.state = XFRM_STATE_EXPIRED;
168		wake_up(&km_waitq);
169		next = 2;
170		goto resched;
171	}
172	if (x->id.spi != 0)
173		km_state_expired(x, 1);
174	__xfrm_state_delete(x);
175
176out:
177	spin_unlock(&x->lock);
178	xfrm_state_put(x);
179}
180
181struct xfrm_state *xfrm_state_alloc(void)
182{
183	struct xfrm_state *x;
184
185	x = kmalloc(sizeof(struct xfrm_state), GFP_ATOMIC);
186
187	if (x) {
188		memset(x, 0, sizeof(struct xfrm_state));
189		atomic_set(&x->refcnt, 1);
190		atomic_set(&x->tunnel_users, 0);
191		INIT_LIST_HEAD(&x->bydst);
192		INIT_LIST_HEAD(&x->byspi);
193		init_timer(&x->timer);
194		x->timer.function = xfrm_timer_handler;
195		x->timer.data	  = (unsigned long)x;
196		x->curlft.add_time = (unsigned long)xtime.tv_sec;
197		x->lft.soft_byte_limit = XFRM_INF;
198		x->lft.soft_packet_limit = XFRM_INF;
199		x->lft.hard_byte_limit = XFRM_INF;
200		x->lft.hard_packet_limit = XFRM_INF;
201		spin_lock_init(&x->lock);
202	}
203	return x;
204}
205EXPORT_SYMBOL(xfrm_state_alloc);
206
207void __xfrm_state_destroy(struct xfrm_state *x)
208{
209	BUG_TRAP(x->km.state == XFRM_STATE_DEAD);
210
211	spin_lock_bh(&xfrm_state_gc_lock);
212	list_add(&x->bydst, &xfrm_state_gc_list);
213	spin_unlock_bh(&xfrm_state_gc_lock);
214	schedule_work(&xfrm_state_gc_work);
215}
216EXPORT_SYMBOL(__xfrm_state_destroy);
217
218static void __xfrm_state_delete(struct xfrm_state *x)
219{
220	if (x->km.state != XFRM_STATE_DEAD) {
221		x->km.state = XFRM_STATE_DEAD;
222		spin_lock(&xfrm_state_lock);
223		list_del(&x->bydst);
224		atomic_dec(&x->refcnt);
225		if (x->id.spi) {
226			list_del(&x->byspi);
227			atomic_dec(&x->refcnt);
228		}
229		spin_unlock(&xfrm_state_lock);
230		if (del_timer(&x->timer))
231			atomic_dec(&x->refcnt);
232
233		/* The number two in this test is the reference
234		 * mentioned in the comment below plus the reference
235		 * our caller holds.  A larger value means that
236		 * there are DSTs attached to this xfrm_state.
237		 */
238		if (atomic_read(&x->refcnt) > 2) {
239			xfrm_state_gc_flush_bundles = 1;
240			schedule_work(&xfrm_state_gc_work);
241		}
242
243		/* All xfrm_state objects are created by xfrm_state_alloc.
244		 * The xfrm_state_alloc call gives a reference, and that
245		 * is what we are dropping here.
246		 */
247		atomic_dec(&x->refcnt);
248	}
249}
250
251void xfrm_state_delete(struct xfrm_state *x)
252{
253	spin_lock_bh(&x->lock);
254	__xfrm_state_delete(x);
255	spin_unlock_bh(&x->lock);
256}
257EXPORT_SYMBOL(xfrm_state_delete);
258
259void xfrm_state_flush(u8 proto)
260{
261	int i;
262	struct xfrm_state *x;
263
264	spin_lock_bh(&xfrm_state_lock);
265	for (i = 0; i < XFRM_DST_HSIZE; i++) {
266restart:
267		list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
268			if (!xfrm_state_kern(x) &&
269			    (proto == IPSEC_PROTO_ANY || x->id.proto == proto)) {
270				xfrm_state_hold(x);
271				spin_unlock_bh(&xfrm_state_lock);
272
273				xfrm_state_delete(x);
274				xfrm_state_put(x);
275
276				spin_lock_bh(&xfrm_state_lock);
277				goto restart;
278			}
279		}
280	}
281	spin_unlock_bh(&xfrm_state_lock);
282	wake_up(&km_waitq);
283}
284EXPORT_SYMBOL(xfrm_state_flush);
285
286static int
287xfrm_init_tempsel(struct xfrm_state *x, struct flowi *fl,
288		  struct xfrm_tmpl *tmpl,
289		  xfrm_address_t *daddr, xfrm_address_t *saddr,
290		  unsigned short family)
291{
292	struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
293	if (!afinfo)
294		return -1;
295	afinfo->init_tempsel(x, fl, tmpl, daddr, saddr);
296	xfrm_state_put_afinfo(afinfo);
297	return 0;
298}
299
300struct xfrm_state *
301xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
302		struct flowi *fl, struct xfrm_tmpl *tmpl,
303		struct xfrm_policy *pol, int *err,
304		unsigned short family)
305{
306	unsigned h = xfrm_dst_hash(daddr, family);
307	struct xfrm_state *x, *x0;
308	int acquire_in_progress = 0;
309	int error = 0;
310	struct xfrm_state *best = NULL;
311	struct xfrm_state_afinfo *afinfo;
312
313	afinfo = xfrm_state_get_afinfo(family);
314	if (afinfo == NULL) {
315		*err = -EAFNOSUPPORT;
316		return NULL;
317	}
318
319	spin_lock_bh(&xfrm_state_lock);
320	list_for_each_entry(x, xfrm_state_bydst+h, bydst) {
321		if (x->props.family == family &&
322		    x->props.reqid == tmpl->reqid &&
323		    xfrm_state_addr_check(x, daddr, saddr, family) &&
324		    tmpl->mode == x->props.mode &&
325		    tmpl->id.proto == x->id.proto &&
326		    (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) {
327			/* Resolution logic:
328			   1. There is a valid state with matching selector.
329			      Done.
330			   2. Valid state with inappropriate selector. Skip.
331
332			   Entering area of "sysdeps".
333
334			   3. If state is not valid, selector is temporary,
335			      it selects only session which triggered
336			      previous resolution. Key manager will do
337			      something to install a state with proper
338			      selector.
339			 */
340			if (x->km.state == XFRM_STATE_VALID) {
341				if (!xfrm_selector_match(&x->sel, fl, family))
342					continue;
343				if (!best ||
344				    best->km.dying > x->km.dying ||
345				    (best->km.dying == x->km.dying &&
346				     best->curlft.add_time < x->curlft.add_time))
347					best = x;
348			} else if (x->km.state == XFRM_STATE_ACQ) {
349				acquire_in_progress = 1;
350			} else if (x->km.state == XFRM_STATE_ERROR ||
351				   x->km.state == XFRM_STATE_EXPIRED) {
352				if (xfrm_selector_match(&x->sel, fl, family))
353					error = -ESRCH;
354			}
355		}
356	}
357
358	x = best;
359	if (!x && !error && !acquire_in_progress) {
360		x0 = afinfo->state_lookup(&tmpl->id.daddr, tmpl->id.spi, tmpl->id.proto);
361		if (x0 != NULL) {
362			xfrm_state_put(x0);
363			error = -EEXIST;
364			goto out;
365		}
366		x = xfrm_state_alloc();
367		if (x == NULL) {
368			error = -ENOMEM;
369			goto out;
370		}
371		/* Initialize temporary selector matching only
372		 * to current session. */
373		xfrm_init_tempsel(x, fl, tmpl, daddr, saddr, family);
374
375		if (km_query(x, tmpl, pol) == 0) {
376			x->km.state = XFRM_STATE_ACQ;
377			list_add_tail(&x->bydst, xfrm_state_bydst+h);
378			xfrm_state_hold(x);
379			if (x->id.spi) {
380				h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family);
381				list_add(&x->byspi, xfrm_state_byspi+h);
382				xfrm_state_hold(x);
383			}
384			x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
385			xfrm_state_hold(x);
386			x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
387			add_timer(&x->timer);
388		} else {
389			x->km.state = XFRM_STATE_DEAD;
390			xfrm_state_put(x);
391			x = NULL;
392			error = -ESRCH;
393		}
394	}
395out:
396	if (x)
397		xfrm_state_hold(x);
398	else
399		*err = acquire_in_progress ? -EAGAIN : error;
400	spin_unlock_bh(&xfrm_state_lock);
401	xfrm_state_put_afinfo(afinfo);
402	return x;
403}
404
405static void __xfrm_state_insert(struct xfrm_state *x)
406{
407	unsigned h = xfrm_dst_hash(&x->id.daddr, x->props.family);
408
409	list_add(&x->bydst, xfrm_state_bydst+h);
410	xfrm_state_hold(x);
411
412	h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family);
413
414	list_add(&x->byspi, xfrm_state_byspi+h);
415	xfrm_state_hold(x);
416
417	if (!mod_timer(&x->timer, jiffies + HZ))
418		xfrm_state_hold(x);
419
420	wake_up(&km_waitq);
421}
422
423void xfrm_state_insert(struct xfrm_state *x)
424{
425	spin_lock_bh(&xfrm_state_lock);
426	__xfrm_state_insert(x);
427	spin_unlock_bh(&xfrm_state_lock);
428}
429EXPORT_SYMBOL(xfrm_state_insert);
430
431static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq);
432
433int xfrm_state_add(struct xfrm_state *x)
434{
435	struct xfrm_state_afinfo *afinfo;
436	struct xfrm_state *x1;
437	int family;
438	int err;
439
440	family = x->props.family;
441	afinfo = xfrm_state_get_afinfo(family);
442	if (unlikely(afinfo == NULL))
443		return -EAFNOSUPPORT;
444
445	spin_lock_bh(&xfrm_state_lock);
446
447	x1 = afinfo->state_lookup(&x->id.daddr, x->id.spi, x->id.proto);
448	if (x1) {
449		xfrm_state_put(x1);
450		x1 = NULL;
451		err = -EEXIST;
452		goto out;
453	}
454
455	if (x->km.seq) {
456		x1 = __xfrm_find_acq_byseq(x->km.seq);
457		if (x1 && xfrm_addr_cmp(&x1->id.daddr, &x->id.daddr, family)) {
458			xfrm_state_put(x1);
459			x1 = NULL;
460		}
461	}
462
463	if (!x1)
464		x1 = afinfo->find_acq(
465			x->props.mode, x->props.reqid, x->id.proto,
466			&x->id.daddr, &x->props.saddr, 0);
467
468	__xfrm_state_insert(x);
469	err = 0;
470
471out:
472	spin_unlock_bh(&xfrm_state_lock);
473	xfrm_state_put_afinfo(afinfo);
474
475	if (x1) {
476		xfrm_state_delete(x1);
477		xfrm_state_put(x1);
478	}
479
480	return err;
481}
482EXPORT_SYMBOL(xfrm_state_add);
483
484int xfrm_state_update(struct xfrm_state *x)
485{
486	struct xfrm_state_afinfo *afinfo;
487	struct xfrm_state *x1;
488	int err;
489
490	afinfo = xfrm_state_get_afinfo(x->props.family);
491	if (unlikely(afinfo == NULL))
492		return -EAFNOSUPPORT;
493
494	spin_lock_bh(&xfrm_state_lock);
495	x1 = afinfo->state_lookup(&x->id.daddr, x->id.spi, x->id.proto);
496
497	err = -ESRCH;
498	if (!x1)
499		goto out;
500
501	if (xfrm_state_kern(x1)) {
502		xfrm_state_put(x1);
503		err = -EEXIST;
504		goto out;
505	}
506
507	if (x1->km.state == XFRM_STATE_ACQ) {
508		__xfrm_state_insert(x);
509		x = NULL;
510	}
511	err = 0;
512
513out:
514	spin_unlock_bh(&xfrm_state_lock);
515	xfrm_state_put_afinfo(afinfo);
516
517	if (err)
518		return err;
519
520	if (!x) {
521		xfrm_state_delete(x1);
522		xfrm_state_put(x1);
523		return 0;
524	}
525
526	err = -EINVAL;
527	spin_lock_bh(&x1->lock);
528	if (likely(x1->km.state == XFRM_STATE_VALID)) {
529		if (x->encap && x1->encap)
530			memcpy(x1->encap, x->encap, sizeof(*x1->encap));
531		memcpy(&x1->lft, &x->lft, sizeof(x1->lft));
532		x1->km.dying = 0;
533
534		if (!mod_timer(&x1->timer, jiffies + HZ))
535			xfrm_state_hold(x1);
536		if (x1->curlft.use_time)
537			xfrm_state_check_expire(x1);
538
539		err = 0;
540	}
541	spin_unlock_bh(&x1->lock);
542
543	xfrm_state_put(x1);
544
545	return err;
546}
547EXPORT_SYMBOL(xfrm_state_update);
548
549int xfrm_state_check_expire(struct xfrm_state *x)
550{
551	if (!x->curlft.use_time)
552		x->curlft.use_time = (unsigned long)xtime.tv_sec;
553
554	if (x->km.state != XFRM_STATE_VALID)
555		return -EINVAL;
556
557	if (x->curlft.bytes >= x->lft.hard_byte_limit ||
558	    x->curlft.packets >= x->lft.hard_packet_limit) {
559		km_state_expired(x, 1);
560		if (!mod_timer(&x->timer, jiffies + XFRM_ACQ_EXPIRES*HZ))
561			xfrm_state_hold(x);
562		return -EINVAL;
563	}
564
565	if (!x->km.dying &&
566	    (x->curlft.bytes >= x->lft.soft_byte_limit ||
567	     x->curlft.packets >= x->lft.soft_packet_limit))
568		km_state_expired(x, 0);
569	return 0;
570}
571EXPORT_SYMBOL(xfrm_state_check_expire);
572
573static int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb)
574{
575	int nhead = x->props.header_len + LL_RESERVED_SPACE(skb->dst->dev)
576		- skb_headroom(skb);
577
578	if (nhead > 0)
579		return pskb_expand_head(skb, nhead, 0, GFP_ATOMIC);
580
581	/* Check tail too... */
582	return 0;
583}
584
585int xfrm_state_check(struct xfrm_state *x, struct sk_buff *skb)
586{
587	int err = xfrm_state_check_expire(x);
588	if (err < 0)
589		goto err;
590	err = xfrm_state_check_space(x, skb);
591err:
592	return err;
593}
594EXPORT_SYMBOL(xfrm_state_check);
595
596struct xfrm_state *
597xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto,
598		  unsigned short family)
599{
600	struct xfrm_state *x;
601	struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
602	if (!afinfo)
603		return NULL;
604
605	spin_lock_bh(&xfrm_state_lock);
606	x = afinfo->state_lookup(daddr, spi, proto);
607	spin_unlock_bh(&xfrm_state_lock);
608	xfrm_state_put_afinfo(afinfo);
609	return x;
610}
611EXPORT_SYMBOL(xfrm_state_lookup);
612
613struct xfrm_state *
614xfrm_find_acq(u8 mode, u32 reqid, u8 proto,
615	      xfrm_address_t *daddr, xfrm_address_t *saddr,
616	      int create, unsigned short family)
617{
618	struct xfrm_state *x;
619	struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
620	if (!afinfo)
621		return NULL;
622
623	spin_lock_bh(&xfrm_state_lock);
624	x = afinfo->find_acq(mode, reqid, proto, daddr, saddr, create);
625	spin_unlock_bh(&xfrm_state_lock);
626	xfrm_state_put_afinfo(afinfo);
627	return x;
628}
629EXPORT_SYMBOL(xfrm_find_acq);
630
631/* Silly enough, but I'm lazy to build resolution list */
632
633static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq)
634{
635	int i;
636	struct xfrm_state *x;
637
638	for (i = 0; i < XFRM_DST_HSIZE; i++) {
639		list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
640			if (x->km.seq == seq && x->km.state == XFRM_STATE_ACQ) {
641				xfrm_state_hold(x);
642				return x;
643			}
644		}
645	}
646	return NULL;
647}
648
649struct xfrm_state *xfrm_find_acq_byseq(u32 seq)
650{
651	struct xfrm_state *x;
652
653	spin_lock_bh(&xfrm_state_lock);
654	x = __xfrm_find_acq_byseq(seq);
655	spin_unlock_bh(&xfrm_state_lock);
656	return x;
657}
658EXPORT_SYMBOL(xfrm_find_acq_byseq);
659
660u32 xfrm_get_acqseq(void)
661{
662	u32 res;
663	static u32 acqseq;
664	static DEFINE_SPINLOCK(acqseq_lock);
665
666	spin_lock_bh(&acqseq_lock);
667	res = (++acqseq ? : ++acqseq);
668	spin_unlock_bh(&acqseq_lock);
669	return res;
670}
671EXPORT_SYMBOL(xfrm_get_acqseq);
672
673void
674xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi)
675{
676	u32 h;
677	struct xfrm_state *x0;
678
679	if (x->id.spi)
680		return;
681
682	if (minspi == maxspi) {
683		x0 = xfrm_state_lookup(&x->id.daddr, minspi, x->id.proto, x->props.family);
684		if (x0) {
685			xfrm_state_put(x0);
686			return;
687		}
688		x->id.spi = minspi;
689	} else {
690		u32 spi = 0;
691		minspi = ntohl(minspi);
692		maxspi = ntohl(maxspi);
693		for (h=0; h<maxspi-minspi+1; h++) {
694			spi = minspi + net_random()%(maxspi-minspi+1);
695			x0 = xfrm_state_lookup(&x->id.daddr, htonl(spi), x->id.proto, x->props.family);
696			if (x0 == NULL) {
697				x->id.spi = htonl(spi);
698				break;
699			}
700			xfrm_state_put(x0);
701		}
702	}
703	if (x->id.spi) {
704		spin_lock_bh(&xfrm_state_lock);
705		h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family);
706		list_add(&x->byspi, xfrm_state_byspi+h);
707		xfrm_state_hold(x);
708		spin_unlock_bh(&xfrm_state_lock);
709		wake_up(&km_waitq);
710	}
711}
712EXPORT_SYMBOL(xfrm_alloc_spi);
713
714int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*),
715		    void *data)
716{
717	int i;
718	struct xfrm_state *x;
719	int count = 0;
720	int err = 0;
721
722	spin_lock_bh(&xfrm_state_lock);
723	for (i = 0; i < XFRM_DST_HSIZE; i++) {
724		list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
725			if (proto == IPSEC_PROTO_ANY || x->id.proto == proto)
726				count++;
727		}
728	}
729	if (count == 0) {
730		err = -ENOENT;
731		goto out;
732	}
733
734	for (i = 0; i < XFRM_DST_HSIZE; i++) {
735		list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
736			if (proto != IPSEC_PROTO_ANY && x->id.proto != proto)
737				continue;
738			err = func(x, --count, data);
739			if (err)
740				goto out;
741		}
742	}
743out:
744	spin_unlock_bh(&xfrm_state_lock);
745	return err;
746}
747EXPORT_SYMBOL(xfrm_state_walk);
748
749int xfrm_replay_check(struct xfrm_state *x, u32 seq)
750{
751	u32 diff;
752
753	seq = ntohl(seq);
754
755	if (unlikely(seq == 0))
756		return -EINVAL;
757
758	if (likely(seq > x->replay.seq))
759		return 0;
760
761	diff = x->replay.seq - seq;
762	if (diff >= x->props.replay_window) {
763		x->stats.replay_window++;
764		return -EINVAL;
765	}
766
767	if (x->replay.bitmap & (1U << diff)) {
768		x->stats.replay++;
769		return -EINVAL;
770	}
771	return 0;
772}
773EXPORT_SYMBOL(xfrm_replay_check);
774
775void xfrm_replay_advance(struct xfrm_state *x, u32 seq)
776{
777	u32 diff;
778
779	seq = ntohl(seq);
780
781	if (seq > x->replay.seq) {
782		diff = seq - x->replay.seq;
783		if (diff < x->props.replay_window)
784			x->replay.bitmap = ((x->replay.bitmap) << diff) | 1;
785		else
786			x->replay.bitmap = 1;
787		x->replay.seq = seq;
788	} else {
789		diff = x->replay.seq - seq;
790		x->replay.bitmap |= (1U << diff);
791	}
792}
793EXPORT_SYMBOL(xfrm_replay_advance);
794
795static struct list_head xfrm_km_list = LIST_HEAD_INIT(xfrm_km_list);
796static DEFINE_RWLOCK(xfrm_km_lock);
797
798static void km_state_expired(struct xfrm_state *x, int hard)
799{
800	struct xfrm_mgr *km;
801
802	if (hard)
803		x->km.state = XFRM_STATE_EXPIRED;
804	else
805		x->km.dying = 1;
806
807	read_lock(&xfrm_km_lock);
808	list_for_each_entry(km, &xfrm_km_list, list)
809		km->notify(x, hard);
810	read_unlock(&xfrm_km_lock);
811
812	if (hard)
813		wake_up(&km_waitq);
814}
815
816static int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol)
817{
818	int err = -EINVAL;
819	struct xfrm_mgr *km;
820
821	read_lock(&xfrm_km_lock);
822	list_for_each_entry(km, &xfrm_km_list, list) {
823		err = km->acquire(x, t, pol, XFRM_POLICY_OUT);
824		if (!err)
825			break;
826	}
827	read_unlock(&xfrm_km_lock);
828	return err;
829}
830
831int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, u16 sport)
832{
833	int err = -EINVAL;
834	struct xfrm_mgr *km;
835
836	read_lock(&xfrm_km_lock);
837	list_for_each_entry(km, &xfrm_km_list, list) {
838		if (km->new_mapping)
839			err = km->new_mapping(x, ipaddr, sport);
840		if (!err)
841			break;
842	}
843	read_unlock(&xfrm_km_lock);
844	return err;
845}
846EXPORT_SYMBOL(km_new_mapping);
847
848void km_policy_expired(struct xfrm_policy *pol, int dir, int hard)
849{
850	struct xfrm_mgr *km;
851
852	read_lock(&xfrm_km_lock);
853	list_for_each_entry(km, &xfrm_km_list, list)
854		if (km->notify_policy)
855			km->notify_policy(pol, dir, hard);
856	read_unlock(&xfrm_km_lock);
857
858	if (hard)
859		wake_up(&km_waitq);
860}
861
862int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen)
863{
864	int err;
865	u8 *data;
866	struct xfrm_mgr *km;
867	struct xfrm_policy *pol = NULL;
868
869	if (optlen <= 0 || optlen > PAGE_SIZE)
870		return -EMSGSIZE;
871
872	data = kmalloc(optlen, GFP_KERNEL);
873	if (!data)
874		return -ENOMEM;
875
876	err = -EFAULT;
877	if (copy_from_user(data, optval, optlen))
878		goto out;
879
880	err = -EINVAL;
881	read_lock(&xfrm_km_lock);
882	list_for_each_entry(km, &xfrm_km_list, list) {
883		pol = km->compile_policy(sk->sk_family, optname, data,
884					 optlen, &err);
885		if (err >= 0)
886			break;
887	}
888	read_unlock(&xfrm_km_lock);
889
890	if (err >= 0) {
891		xfrm_sk_policy_insert(sk, err, pol);
892		xfrm_pol_put(pol);
893		err = 0;
894	}
895
896out:
897	kfree(data);
898	return err;
899}
900EXPORT_SYMBOL(xfrm_user_policy);
901
902int xfrm_register_km(struct xfrm_mgr *km)
903{
904	write_lock_bh(&xfrm_km_lock);
905	list_add_tail(&km->list, &xfrm_km_list);
906	write_unlock_bh(&xfrm_km_lock);
907	return 0;
908}
909EXPORT_SYMBOL(xfrm_register_km);
910
911int xfrm_unregister_km(struct xfrm_mgr *km)
912{
913	write_lock_bh(&xfrm_km_lock);
914	list_del(&km->list);
915	write_unlock_bh(&xfrm_km_lock);
916	return 0;
917}
918EXPORT_SYMBOL(xfrm_unregister_km);
919
920int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo)
921{
922	int err = 0;
923	if (unlikely(afinfo == NULL))
924		return -EINVAL;
925	if (unlikely(afinfo->family >= NPROTO))
926		return -EAFNOSUPPORT;
927	write_lock(&xfrm_state_afinfo_lock);
928	if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL))
929		err = -ENOBUFS;
930	else {
931		afinfo->state_bydst = xfrm_state_bydst;
932		afinfo->state_byspi = xfrm_state_byspi;
933		xfrm_state_afinfo[afinfo->family] = afinfo;
934	}
935	write_unlock(&xfrm_state_afinfo_lock);
936	return err;
937}
938EXPORT_SYMBOL(xfrm_state_register_afinfo);
939
940int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo)
941{
942	int err = 0;
943	if (unlikely(afinfo == NULL))
944		return -EINVAL;
945	if (unlikely(afinfo->family >= NPROTO))
946		return -EAFNOSUPPORT;
947	write_lock(&xfrm_state_afinfo_lock);
948	if (likely(xfrm_state_afinfo[afinfo->family] != NULL)) {
949		if (unlikely(xfrm_state_afinfo[afinfo->family] != afinfo))
950			err = -EINVAL;
951		else {
952			xfrm_state_afinfo[afinfo->family] = NULL;
953			afinfo->state_byspi = NULL;
954			afinfo->state_bydst = NULL;
955		}
956	}
957	write_unlock(&xfrm_state_afinfo_lock);
958	return err;
959}
960EXPORT_SYMBOL(xfrm_state_unregister_afinfo);
961
962static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family)
963{
964	struct xfrm_state_afinfo *afinfo;
965	if (unlikely(family >= NPROTO))
966		return NULL;
967	read_lock(&xfrm_state_afinfo_lock);
968	afinfo = xfrm_state_afinfo[family];
969	if (likely(afinfo != NULL))
970		read_lock(&afinfo->lock);
971	read_unlock(&xfrm_state_afinfo_lock);
972	return afinfo;
973}
974
975static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo)
976{
977	if (unlikely(afinfo == NULL))
978		return;
979	read_unlock(&afinfo->lock);
980}
981
982/* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */
983void xfrm_state_delete_tunnel(struct xfrm_state *x)
984{
985	if (x->tunnel) {
986		struct xfrm_state *t = x->tunnel;
987
988		if (atomic_read(&t->tunnel_users) == 2)
989			xfrm_state_delete(t);
990		atomic_dec(&t->tunnel_users);
991		xfrm_state_put(t);
992		x->tunnel = NULL;
993	}
994}
995EXPORT_SYMBOL(xfrm_state_delete_tunnel);
996
997int xfrm_state_mtu(struct xfrm_state *x, int mtu)
998{
999	int res = mtu;
1000
1001	res -= x->props.header_len;
1002
1003	for (;;) {
1004		int m = res;
1005
1006		if (m < 68)
1007			return 68;
1008
1009		spin_lock_bh(&x->lock);
1010		if (x->km.state == XFRM_STATE_VALID &&
1011		    x->type && x->type->get_max_size)
1012			m = x->type->get_max_size(x, m);
1013		else
1014			m += x->props.header_len;
1015		spin_unlock_bh(&x->lock);
1016
1017		if (m <= mtu)
1018			break;
1019		res -= (m - mtu);
1020	}
1021
1022	return res;
1023}
1024
1025EXPORT_SYMBOL(xfrm_state_mtu);
1026
1027void __init xfrm_state_init(void)
1028{
1029	int i;
1030
1031	for (i=0; i<XFRM_DST_HSIZE; i++) {
1032		INIT_LIST_HEAD(&xfrm_state_bydst[i]);
1033		INIT_LIST_HEAD(&xfrm_state_byspi[i]);
1034	}
1035	INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task, NULL);
1036}
1037
1038