xfrm_state.c revision 6c5c8ca7ff20523e427b955aa84cef407934710f
1/*
2 * xfrm_state.c
3 *
4 * Changes:
5 *	Mitsuru KANDA @USAGI
6 * 	Kazunori MIYAZAWA @USAGI
7 * 	Kunihiro Ishiguro <kunihiro@ipinfusion.com>
8 * 		IPv6 support
9 * 	YOSHIFUJI Hideaki @USAGI
10 * 		Split up af-specific functions
11 *	Derek Atkins <derek@ihtfp.com>
12 *		Add UDP Encapsulation
13 *
14 */
15
16#include <linux/workqueue.h>
17#include <net/xfrm.h>
18#include <linux/pfkeyv2.h>
19#include <linux/ipsec.h>
20#include <linux/module.h>
21#include <asm/uaccess.h>
22
23u32 sysctl_xfrm_aevent_etime = XFRM_AE_ETIME;
24u32 sysctl_xfrm_aevent_rseqth = XFRM_AE_SEQT_SIZE;
25/* Each xfrm_state may be linked to two tables:
26
27   1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl)
28   2. Hash table by daddr to find what SAs exist for given
29      destination/tunnel endpoint. (output)
30 */
31
32static DEFINE_SPINLOCK(xfrm_state_lock);
33
34/* Hash table to find appropriate SA towards given target (endpoint
35 * of tunnel or destination of transport mode) allowed by selector.
36 *
37 * Main use is finding SA after policy selected tunnel or transport mode.
38 * Also, it can be used by ah/esp icmp error handler to find offending SA.
39 */
40static struct list_head xfrm_state_bydst[XFRM_DST_HSIZE];
41static struct list_head xfrm_state_byspi[XFRM_DST_HSIZE];
42
43DECLARE_WAIT_QUEUE_HEAD(km_waitq);
44EXPORT_SYMBOL(km_waitq);
45
46static DEFINE_RWLOCK(xfrm_state_afinfo_lock);
47static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO];
48
49static struct work_struct xfrm_state_gc_work;
50static struct list_head xfrm_state_gc_list = LIST_HEAD_INIT(xfrm_state_gc_list);
51static DEFINE_SPINLOCK(xfrm_state_gc_lock);
52
53static int xfrm_state_gc_flush_bundles;
54
55int __xfrm_state_delete(struct xfrm_state *x);
56
57static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family);
58static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo);
59
60int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol);
61void km_state_expired(struct xfrm_state *x, int hard, u32 pid);
62
63static void xfrm_state_gc_destroy(struct xfrm_state *x)
64{
65	if (del_timer(&x->timer))
66		BUG();
67	if (del_timer(&x->rtimer))
68		BUG();
69	kfree(x->aalg);
70	kfree(x->ealg);
71	kfree(x->calg);
72	kfree(x->encap);
73	if (x->type) {
74		x->type->destructor(x);
75		xfrm_put_type(x->type);
76	}
77	security_xfrm_state_free(x);
78	kfree(x);
79}
80
81static void xfrm_state_gc_task(void *data)
82{
83	struct xfrm_state *x;
84	struct list_head *entry, *tmp;
85	struct list_head gc_list = LIST_HEAD_INIT(gc_list);
86
87	if (xfrm_state_gc_flush_bundles) {
88		xfrm_state_gc_flush_bundles = 0;
89		xfrm_flush_bundles();
90	}
91
92	spin_lock_bh(&xfrm_state_gc_lock);
93	list_splice_init(&xfrm_state_gc_list, &gc_list);
94	spin_unlock_bh(&xfrm_state_gc_lock);
95
96	list_for_each_safe(entry, tmp, &gc_list) {
97		x = list_entry(entry, struct xfrm_state, bydst);
98		xfrm_state_gc_destroy(x);
99	}
100	wake_up(&km_waitq);
101}
102
103static inline unsigned long make_jiffies(long secs)
104{
105	if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
106		return MAX_SCHEDULE_TIMEOUT-1;
107	else
108	        return secs*HZ;
109}
110
111static void xfrm_timer_handler(unsigned long data)
112{
113	struct xfrm_state *x = (struct xfrm_state*)data;
114	unsigned long now = (unsigned long)xtime.tv_sec;
115	long next = LONG_MAX;
116	int warn = 0;
117
118	spin_lock(&x->lock);
119	if (x->km.state == XFRM_STATE_DEAD)
120		goto out;
121	if (x->km.state == XFRM_STATE_EXPIRED)
122		goto expired;
123	if (x->lft.hard_add_expires_seconds) {
124		long tmo = x->lft.hard_add_expires_seconds +
125			x->curlft.add_time - now;
126		if (tmo <= 0)
127			goto expired;
128		if (tmo < next)
129			next = tmo;
130	}
131	if (x->lft.hard_use_expires_seconds) {
132		long tmo = x->lft.hard_use_expires_seconds +
133			(x->curlft.use_time ? : now) - now;
134		if (tmo <= 0)
135			goto expired;
136		if (tmo < next)
137			next = tmo;
138	}
139	if (x->km.dying)
140		goto resched;
141	if (x->lft.soft_add_expires_seconds) {
142		long tmo = x->lft.soft_add_expires_seconds +
143			x->curlft.add_time - now;
144		if (tmo <= 0)
145			warn = 1;
146		else if (tmo < next)
147			next = tmo;
148	}
149	if (x->lft.soft_use_expires_seconds) {
150		long tmo = x->lft.soft_use_expires_seconds +
151			(x->curlft.use_time ? : now) - now;
152		if (tmo <= 0)
153			warn = 1;
154		else if (tmo < next)
155			next = tmo;
156	}
157
158	x->km.dying = warn;
159	if (warn)
160		km_state_expired(x, 0, 0);
161resched:
162	if (next != LONG_MAX &&
163	    !mod_timer(&x->timer, jiffies + make_jiffies(next)))
164		xfrm_state_hold(x);
165	goto out;
166
167expired:
168	if (x->km.state == XFRM_STATE_ACQ && x->id.spi == 0) {
169		x->km.state = XFRM_STATE_EXPIRED;
170		wake_up(&km_waitq);
171		next = 2;
172		goto resched;
173	}
174	if (!__xfrm_state_delete(x) && x->id.spi)
175		km_state_expired(x, 1, 0);
176
177out:
178	spin_unlock(&x->lock);
179	xfrm_state_put(x);
180}
181
182struct xfrm_state *xfrm_state_alloc(void)
183{
184	struct xfrm_state *x;
185
186	x = kmalloc(sizeof(struct xfrm_state), GFP_ATOMIC);
187
188	if (x) {
189		memset(x, 0, sizeof(struct xfrm_state));
190		atomic_set(&x->refcnt, 1);
191		atomic_set(&x->tunnel_users, 0);
192		INIT_LIST_HEAD(&x->bydst);
193		INIT_LIST_HEAD(&x->byspi);
194		init_timer(&x->timer);
195		x->timer.function = xfrm_timer_handler;
196		x->timer.data	  = (unsigned long)x;
197		init_timer(&x->rtimer);
198		x->rtimer.function = xfrm_replay_timer_handler;
199		x->rtimer.data     = (unsigned long)x;
200		x->curlft.add_time = (unsigned long)xtime.tv_sec;
201		x->lft.soft_byte_limit = XFRM_INF;
202		x->lft.soft_packet_limit = XFRM_INF;
203		x->lft.hard_byte_limit = XFRM_INF;
204		x->lft.hard_packet_limit = XFRM_INF;
205		x->replay_maxage = 0;
206		x->replay_maxdiff = 0;
207		spin_lock_init(&x->lock);
208	}
209	return x;
210}
211EXPORT_SYMBOL(xfrm_state_alloc);
212
213void __xfrm_state_destroy(struct xfrm_state *x)
214{
215	BUG_TRAP(x->km.state == XFRM_STATE_DEAD);
216
217	spin_lock_bh(&xfrm_state_gc_lock);
218	list_add(&x->bydst, &xfrm_state_gc_list);
219	spin_unlock_bh(&xfrm_state_gc_lock);
220	schedule_work(&xfrm_state_gc_work);
221}
222EXPORT_SYMBOL(__xfrm_state_destroy);
223
224int __xfrm_state_delete(struct xfrm_state *x)
225{
226	int err = -ESRCH;
227
228	if (x->km.state != XFRM_STATE_DEAD) {
229		x->km.state = XFRM_STATE_DEAD;
230		spin_lock(&xfrm_state_lock);
231		list_del(&x->bydst);
232		__xfrm_state_put(x);
233		if (x->id.spi) {
234			list_del(&x->byspi);
235			__xfrm_state_put(x);
236		}
237		spin_unlock(&xfrm_state_lock);
238		if (del_timer(&x->timer))
239			__xfrm_state_put(x);
240		if (del_timer(&x->rtimer))
241			__xfrm_state_put(x);
242
243		/* The number two in this test is the reference
244		 * mentioned in the comment below plus the reference
245		 * our caller holds.  A larger value means that
246		 * there are DSTs attached to this xfrm_state.
247		 */
248		if (atomic_read(&x->refcnt) > 2) {
249			xfrm_state_gc_flush_bundles = 1;
250			schedule_work(&xfrm_state_gc_work);
251		}
252
253		/* All xfrm_state objects are created by xfrm_state_alloc.
254		 * The xfrm_state_alloc call gives a reference, and that
255		 * is what we are dropping here.
256		 */
257		__xfrm_state_put(x);
258		err = 0;
259	}
260
261	return err;
262}
263EXPORT_SYMBOL(__xfrm_state_delete);
264
265int xfrm_state_delete(struct xfrm_state *x)
266{
267	int err;
268
269	spin_lock_bh(&x->lock);
270	err = __xfrm_state_delete(x);
271	spin_unlock_bh(&x->lock);
272
273	return err;
274}
275EXPORT_SYMBOL(xfrm_state_delete);
276
277void xfrm_state_flush(u8 proto)
278{
279	int i;
280	struct xfrm_state *x;
281
282	spin_lock_bh(&xfrm_state_lock);
283	for (i = 0; i < XFRM_DST_HSIZE; i++) {
284restart:
285		list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
286			if (!xfrm_state_kern(x) &&
287			    (proto == IPSEC_PROTO_ANY || x->id.proto == proto)) {
288				xfrm_state_hold(x);
289				spin_unlock_bh(&xfrm_state_lock);
290
291				xfrm_state_delete(x);
292				xfrm_state_put(x);
293
294				spin_lock_bh(&xfrm_state_lock);
295				goto restart;
296			}
297		}
298	}
299	spin_unlock_bh(&xfrm_state_lock);
300	wake_up(&km_waitq);
301}
302EXPORT_SYMBOL(xfrm_state_flush);
303
304static int
305xfrm_init_tempsel(struct xfrm_state *x, struct flowi *fl,
306		  struct xfrm_tmpl *tmpl,
307		  xfrm_address_t *daddr, xfrm_address_t *saddr,
308		  unsigned short family)
309{
310	struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
311	if (!afinfo)
312		return -1;
313	afinfo->init_tempsel(x, fl, tmpl, daddr, saddr);
314	xfrm_state_put_afinfo(afinfo);
315	return 0;
316}
317
318struct xfrm_state *
319xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
320		struct flowi *fl, struct xfrm_tmpl *tmpl,
321		struct xfrm_policy *pol, int *err,
322		unsigned short family)
323{
324	unsigned h = xfrm_dst_hash(daddr, family);
325	struct xfrm_state *x, *x0;
326	int acquire_in_progress = 0;
327	int error = 0;
328	struct xfrm_state *best = NULL;
329	struct xfrm_state_afinfo *afinfo;
330
331	afinfo = xfrm_state_get_afinfo(family);
332	if (afinfo == NULL) {
333		*err = -EAFNOSUPPORT;
334		return NULL;
335	}
336
337	spin_lock_bh(&xfrm_state_lock);
338	list_for_each_entry(x, xfrm_state_bydst+h, bydst) {
339		if (x->props.family == family &&
340		    x->props.reqid == tmpl->reqid &&
341		    xfrm_state_addr_check(x, daddr, saddr, family) &&
342		    tmpl->mode == x->props.mode &&
343		    tmpl->id.proto == x->id.proto &&
344		    (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) {
345			/* Resolution logic:
346			   1. There is a valid state with matching selector.
347			      Done.
348			   2. Valid state with inappropriate selector. Skip.
349
350			   Entering area of "sysdeps".
351
352			   3. If state is not valid, selector is temporary,
353			      it selects only session which triggered
354			      previous resolution. Key manager will do
355			      something to install a state with proper
356			      selector.
357			 */
358			if (x->km.state == XFRM_STATE_VALID) {
359				if (!xfrm_selector_match(&x->sel, fl, family) ||
360				    !xfrm_sec_ctx_match(pol->security, x->security))
361					continue;
362				if (!best ||
363				    best->km.dying > x->km.dying ||
364				    (best->km.dying == x->km.dying &&
365				     best->curlft.add_time < x->curlft.add_time))
366					best = x;
367			} else if (x->km.state == XFRM_STATE_ACQ) {
368				acquire_in_progress = 1;
369			} else if (x->km.state == XFRM_STATE_ERROR ||
370				   x->km.state == XFRM_STATE_EXPIRED) {
371 				if (xfrm_selector_match(&x->sel, fl, family) &&
372				    xfrm_sec_ctx_match(pol->security, x->security))
373					error = -ESRCH;
374			}
375		}
376	}
377
378	x = best;
379	if (!x && !error && !acquire_in_progress) {
380		if (tmpl->id.spi &&
381		    (x0 = afinfo->state_lookup(daddr, tmpl->id.spi,
382		                               tmpl->id.proto)) != NULL) {
383			xfrm_state_put(x0);
384			error = -EEXIST;
385			goto out;
386		}
387		x = xfrm_state_alloc();
388		if (x == NULL) {
389			error = -ENOMEM;
390			goto out;
391		}
392		/* Initialize temporary selector matching only
393		 * to current session. */
394		xfrm_init_tempsel(x, fl, tmpl, daddr, saddr, family);
395
396		if (km_query(x, tmpl, pol) == 0) {
397			x->km.state = XFRM_STATE_ACQ;
398			list_add_tail(&x->bydst, xfrm_state_bydst+h);
399			xfrm_state_hold(x);
400			if (x->id.spi) {
401				h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family);
402				list_add(&x->byspi, xfrm_state_byspi+h);
403				xfrm_state_hold(x);
404			}
405			x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
406			xfrm_state_hold(x);
407			x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
408			add_timer(&x->timer);
409		} else {
410			x->km.state = XFRM_STATE_DEAD;
411			xfrm_state_put(x);
412			x = NULL;
413			error = -ESRCH;
414		}
415	}
416out:
417	if (x)
418		xfrm_state_hold(x);
419	else
420		*err = acquire_in_progress ? -EAGAIN : error;
421	spin_unlock_bh(&xfrm_state_lock);
422	xfrm_state_put_afinfo(afinfo);
423	return x;
424}
425
426static void __xfrm_state_insert(struct xfrm_state *x)
427{
428	unsigned h = xfrm_dst_hash(&x->id.daddr, x->props.family);
429
430	list_add(&x->bydst, xfrm_state_bydst+h);
431	xfrm_state_hold(x);
432
433	h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family);
434
435	list_add(&x->byspi, xfrm_state_byspi+h);
436	xfrm_state_hold(x);
437
438	if (!mod_timer(&x->timer, jiffies + HZ))
439		xfrm_state_hold(x);
440
441	if (x->replay_maxage &&
442	    !mod_timer(&x->rtimer, jiffies + x->replay_maxage))
443		xfrm_state_hold(x);
444
445	wake_up(&km_waitq);
446}
447
448void xfrm_state_insert(struct xfrm_state *x)
449{
450	spin_lock_bh(&xfrm_state_lock);
451	__xfrm_state_insert(x);
452	spin_unlock_bh(&xfrm_state_lock);
453
454	xfrm_flush_all_bundles();
455}
456EXPORT_SYMBOL(xfrm_state_insert);
457
458static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq);
459
460int xfrm_state_add(struct xfrm_state *x)
461{
462	struct xfrm_state_afinfo *afinfo;
463	struct xfrm_state *x1;
464	int family;
465	int err;
466
467	family = x->props.family;
468	afinfo = xfrm_state_get_afinfo(family);
469	if (unlikely(afinfo == NULL))
470		return -EAFNOSUPPORT;
471
472	spin_lock_bh(&xfrm_state_lock);
473
474	x1 = afinfo->state_lookup(&x->id.daddr, x->id.spi, x->id.proto);
475	if (x1) {
476		xfrm_state_put(x1);
477		x1 = NULL;
478		err = -EEXIST;
479		goto out;
480	}
481
482	if (x->km.seq) {
483		x1 = __xfrm_find_acq_byseq(x->km.seq);
484		if (x1 && xfrm_addr_cmp(&x1->id.daddr, &x->id.daddr, family)) {
485			xfrm_state_put(x1);
486			x1 = NULL;
487		}
488	}
489
490	if (!x1)
491		x1 = afinfo->find_acq(
492			x->props.mode, x->props.reqid, x->id.proto,
493			&x->id.daddr, &x->props.saddr, 0);
494
495	__xfrm_state_insert(x);
496	err = 0;
497
498out:
499	spin_unlock_bh(&xfrm_state_lock);
500	xfrm_state_put_afinfo(afinfo);
501
502	if (!err)
503		xfrm_flush_all_bundles();
504
505	if (x1) {
506		xfrm_state_delete(x1);
507		xfrm_state_put(x1);
508	}
509
510	return err;
511}
512EXPORT_SYMBOL(xfrm_state_add);
513
514int xfrm_state_update(struct xfrm_state *x)
515{
516	struct xfrm_state_afinfo *afinfo;
517	struct xfrm_state *x1;
518	int err;
519
520	afinfo = xfrm_state_get_afinfo(x->props.family);
521	if (unlikely(afinfo == NULL))
522		return -EAFNOSUPPORT;
523
524	spin_lock_bh(&xfrm_state_lock);
525	x1 = afinfo->state_lookup(&x->id.daddr, x->id.spi, x->id.proto);
526
527	err = -ESRCH;
528	if (!x1)
529		goto out;
530
531	if (xfrm_state_kern(x1)) {
532		xfrm_state_put(x1);
533		err = -EEXIST;
534		goto out;
535	}
536
537	if (x1->km.state == XFRM_STATE_ACQ) {
538		__xfrm_state_insert(x);
539		x = NULL;
540	}
541	err = 0;
542
543out:
544	spin_unlock_bh(&xfrm_state_lock);
545	xfrm_state_put_afinfo(afinfo);
546
547	if (err)
548		return err;
549
550	if (!x) {
551		xfrm_state_delete(x1);
552		xfrm_state_put(x1);
553		return 0;
554	}
555
556	err = -EINVAL;
557	spin_lock_bh(&x1->lock);
558	if (likely(x1->km.state == XFRM_STATE_VALID)) {
559		if (x->encap && x1->encap)
560			memcpy(x1->encap, x->encap, sizeof(*x1->encap));
561		memcpy(&x1->lft, &x->lft, sizeof(x1->lft));
562		x1->km.dying = 0;
563
564		if (!mod_timer(&x1->timer, jiffies + HZ))
565			xfrm_state_hold(x1);
566		if (x1->curlft.use_time)
567			xfrm_state_check_expire(x1);
568
569		err = 0;
570	}
571	spin_unlock_bh(&x1->lock);
572
573	xfrm_state_put(x1);
574
575	return err;
576}
577EXPORT_SYMBOL(xfrm_state_update);
578
579int xfrm_state_check_expire(struct xfrm_state *x)
580{
581	if (!x->curlft.use_time)
582		x->curlft.use_time = (unsigned long)xtime.tv_sec;
583
584	if (x->km.state != XFRM_STATE_VALID)
585		return -EINVAL;
586
587	if (x->curlft.bytes >= x->lft.hard_byte_limit ||
588	    x->curlft.packets >= x->lft.hard_packet_limit) {
589		x->km.state = XFRM_STATE_EXPIRED;
590		if (!mod_timer(&x->timer, jiffies))
591			xfrm_state_hold(x);
592		return -EINVAL;
593	}
594
595	if (!x->km.dying &&
596	    (x->curlft.bytes >= x->lft.soft_byte_limit ||
597	     x->curlft.packets >= x->lft.soft_packet_limit)) {
598		x->km.dying = 1;
599		km_state_expired(x, 0, 0);
600	}
601	return 0;
602}
603EXPORT_SYMBOL(xfrm_state_check_expire);
604
605static int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb)
606{
607	int nhead = x->props.header_len + LL_RESERVED_SPACE(skb->dst->dev)
608		- skb_headroom(skb);
609
610	if (nhead > 0)
611		return pskb_expand_head(skb, nhead, 0, GFP_ATOMIC);
612
613	/* Check tail too... */
614	return 0;
615}
616
617int xfrm_state_check(struct xfrm_state *x, struct sk_buff *skb)
618{
619	int err = xfrm_state_check_expire(x);
620	if (err < 0)
621		goto err;
622	err = xfrm_state_check_space(x, skb);
623err:
624	return err;
625}
626EXPORT_SYMBOL(xfrm_state_check);
627
628struct xfrm_state *
629xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto,
630		  unsigned short family)
631{
632	struct xfrm_state *x;
633	struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
634	if (!afinfo)
635		return NULL;
636
637	spin_lock_bh(&xfrm_state_lock);
638	x = afinfo->state_lookup(daddr, spi, proto);
639	spin_unlock_bh(&xfrm_state_lock);
640	xfrm_state_put_afinfo(afinfo);
641	return x;
642}
643EXPORT_SYMBOL(xfrm_state_lookup);
644
645struct xfrm_state *
646xfrm_find_acq(u8 mode, u32 reqid, u8 proto,
647	      xfrm_address_t *daddr, xfrm_address_t *saddr,
648	      int create, unsigned short family)
649{
650	struct xfrm_state *x;
651	struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
652	if (!afinfo)
653		return NULL;
654
655	spin_lock_bh(&xfrm_state_lock);
656	x = afinfo->find_acq(mode, reqid, proto, daddr, saddr, create);
657	spin_unlock_bh(&xfrm_state_lock);
658	xfrm_state_put_afinfo(afinfo);
659	return x;
660}
661EXPORT_SYMBOL(xfrm_find_acq);
662
663/* Silly enough, but I'm lazy to build resolution list */
664
665static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq)
666{
667	int i;
668	struct xfrm_state *x;
669
670	for (i = 0; i < XFRM_DST_HSIZE; i++) {
671		list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
672			if (x->km.seq == seq && x->km.state == XFRM_STATE_ACQ) {
673				xfrm_state_hold(x);
674				return x;
675			}
676		}
677	}
678	return NULL;
679}
680
681struct xfrm_state *xfrm_find_acq_byseq(u32 seq)
682{
683	struct xfrm_state *x;
684
685	spin_lock_bh(&xfrm_state_lock);
686	x = __xfrm_find_acq_byseq(seq);
687	spin_unlock_bh(&xfrm_state_lock);
688	return x;
689}
690EXPORT_SYMBOL(xfrm_find_acq_byseq);
691
692u32 xfrm_get_acqseq(void)
693{
694	u32 res;
695	static u32 acqseq;
696	static DEFINE_SPINLOCK(acqseq_lock);
697
698	spin_lock_bh(&acqseq_lock);
699	res = (++acqseq ? : ++acqseq);
700	spin_unlock_bh(&acqseq_lock);
701	return res;
702}
703EXPORT_SYMBOL(xfrm_get_acqseq);
704
705void
706xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi)
707{
708	u32 h;
709	struct xfrm_state *x0;
710
711	if (x->id.spi)
712		return;
713
714	if (minspi == maxspi) {
715		x0 = xfrm_state_lookup(&x->id.daddr, minspi, x->id.proto, x->props.family);
716		if (x0) {
717			xfrm_state_put(x0);
718			return;
719		}
720		x->id.spi = minspi;
721	} else {
722		u32 spi = 0;
723		minspi = ntohl(minspi);
724		maxspi = ntohl(maxspi);
725		for (h=0; h<maxspi-minspi+1; h++) {
726			spi = minspi + net_random()%(maxspi-minspi+1);
727			x0 = xfrm_state_lookup(&x->id.daddr, htonl(spi), x->id.proto, x->props.family);
728			if (x0 == NULL) {
729				x->id.spi = htonl(spi);
730				break;
731			}
732			xfrm_state_put(x0);
733		}
734	}
735	if (x->id.spi) {
736		spin_lock_bh(&xfrm_state_lock);
737		h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family);
738		list_add(&x->byspi, xfrm_state_byspi+h);
739		xfrm_state_hold(x);
740		spin_unlock_bh(&xfrm_state_lock);
741		wake_up(&km_waitq);
742	}
743}
744EXPORT_SYMBOL(xfrm_alloc_spi);
745
746int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*),
747		    void *data)
748{
749	int i;
750	struct xfrm_state *x;
751	int count = 0;
752	int err = 0;
753
754	spin_lock_bh(&xfrm_state_lock);
755	for (i = 0; i < XFRM_DST_HSIZE; i++) {
756		list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
757			if (proto == IPSEC_PROTO_ANY || x->id.proto == proto)
758				count++;
759		}
760	}
761	if (count == 0) {
762		err = -ENOENT;
763		goto out;
764	}
765
766	for (i = 0; i < XFRM_DST_HSIZE; i++) {
767		list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
768			if (proto != IPSEC_PROTO_ANY && x->id.proto != proto)
769				continue;
770			err = func(x, --count, data);
771			if (err)
772				goto out;
773		}
774	}
775out:
776	spin_unlock_bh(&xfrm_state_lock);
777	return err;
778}
779EXPORT_SYMBOL(xfrm_state_walk);
780
781
782void xfrm_replay_notify(struct xfrm_state *x, int event)
783{
784	struct km_event c;
785	/* we send notify messages in case
786	 *  1. we updated on of the sequence numbers, and the seqno difference
787	 *     is at least x->replay_maxdiff, in this case we also update the
788	 *     timeout of our timer function
789	 *  2. if x->replay_maxage has elapsed since last update,
790	 *     and there were changes
791	 *
792	 *  The state structure must be locked!
793	 */
794
795	switch (event) {
796	case XFRM_REPLAY_UPDATE:
797		if (x->replay_maxdiff &&
798		    (x->replay.seq - x->preplay.seq < x->replay_maxdiff) &&
799		    (x->replay.oseq - x->preplay.oseq < x->replay_maxdiff))
800			return;
801
802		break;
803
804	case XFRM_REPLAY_TIMEOUT:
805		if ((x->replay.seq == x->preplay.seq) &&
806		    (x->replay.bitmap == x->preplay.bitmap) &&
807		    (x->replay.oseq == x->preplay.oseq))
808			return;
809
810		break;
811	}
812
813	memcpy(&x->preplay, &x->replay, sizeof(struct xfrm_replay_state));
814	c.event = XFRM_MSG_NEWAE;
815	c.data.aevent = event;
816	km_state_notify(x, &c);
817
818resched:
819	if (x->replay_maxage &&
820	    !mod_timer(&x->rtimer, jiffies + x->replay_maxage))
821		xfrm_state_hold(x);
822
823}
824
825static void xfrm_replay_timer_handler(unsigned long data)
826{
827	struct xfrm_state *x = (struct xfrm_state*)data;
828
829	spin_lock(&x->lock);
830
831	if (xfrm_aevent_is_on() && x->km.state == XFRM_STATE_VALID)
832		xfrm_replay_notify(x, XFRM_REPLAY_TIMEOUT);
833
834	spin_unlock(&x->lock);
835}
836
837int xfrm_replay_check(struct xfrm_state *x, u32 seq)
838{
839	u32 diff;
840
841	seq = ntohl(seq);
842
843	if (unlikely(seq == 0))
844		return -EINVAL;
845
846	if (likely(seq > x->replay.seq))
847		return 0;
848
849	diff = x->replay.seq - seq;
850	if (diff >= x->props.replay_window) {
851		x->stats.replay_window++;
852		return -EINVAL;
853	}
854
855	if (x->replay.bitmap & (1U << diff)) {
856		x->stats.replay++;
857		return -EINVAL;
858	}
859	return 0;
860}
861EXPORT_SYMBOL(xfrm_replay_check);
862
863void xfrm_replay_advance(struct xfrm_state *x, u32 seq)
864{
865	u32 diff;
866
867	seq = ntohl(seq);
868
869	if (seq > x->replay.seq) {
870		diff = seq - x->replay.seq;
871		if (diff < x->props.replay_window)
872			x->replay.bitmap = ((x->replay.bitmap) << diff) | 1;
873		else
874			x->replay.bitmap = 1;
875		x->replay.seq = seq;
876	} else {
877		diff = x->replay.seq - seq;
878		x->replay.bitmap |= (1U << diff);
879	}
880
881	if (xfrm_aevent_is_on())
882		xfrm_replay_notify(x, XFRM_REPLAY_UPDATE);
883}
884EXPORT_SYMBOL(xfrm_replay_advance);
885
886static struct list_head xfrm_km_list = LIST_HEAD_INIT(xfrm_km_list);
887static DEFINE_RWLOCK(xfrm_km_lock);
888
889void km_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c)
890{
891	struct xfrm_mgr *km;
892
893	read_lock(&xfrm_km_lock);
894	list_for_each_entry(km, &xfrm_km_list, list)
895		if (km->notify_policy)
896			km->notify_policy(xp, dir, c);
897	read_unlock(&xfrm_km_lock);
898}
899
900void km_state_notify(struct xfrm_state *x, struct km_event *c)
901{
902	struct xfrm_mgr *km;
903	read_lock(&xfrm_km_lock);
904	list_for_each_entry(km, &xfrm_km_list, list)
905		if (km->notify)
906			km->notify(x, c);
907	read_unlock(&xfrm_km_lock);
908}
909
910EXPORT_SYMBOL(km_policy_notify);
911EXPORT_SYMBOL(km_state_notify);
912
913void km_state_expired(struct xfrm_state *x, int hard, u32 pid)
914{
915	struct km_event c;
916
917	c.data.hard = hard;
918	c.pid = pid;
919	c.event = XFRM_MSG_EXPIRE;
920	km_state_notify(x, &c);
921
922	if (hard)
923		wake_up(&km_waitq);
924}
925
926EXPORT_SYMBOL(km_state_expired);
927/*
928 * We send to all registered managers regardless of failure
929 * We are happy with one success
930*/
931int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol)
932{
933	int err = -EINVAL, acqret;
934	struct xfrm_mgr *km;
935
936	read_lock(&xfrm_km_lock);
937	list_for_each_entry(km, &xfrm_km_list, list) {
938		acqret = km->acquire(x, t, pol, XFRM_POLICY_OUT);
939		if (!acqret)
940			err = acqret;
941	}
942	read_unlock(&xfrm_km_lock);
943	return err;
944}
945EXPORT_SYMBOL(km_query);
946
947int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, u16 sport)
948{
949	int err = -EINVAL;
950	struct xfrm_mgr *km;
951
952	read_lock(&xfrm_km_lock);
953	list_for_each_entry(km, &xfrm_km_list, list) {
954		if (km->new_mapping)
955			err = km->new_mapping(x, ipaddr, sport);
956		if (!err)
957			break;
958	}
959	read_unlock(&xfrm_km_lock);
960	return err;
961}
962EXPORT_SYMBOL(km_new_mapping);
963
964void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 pid)
965{
966	struct km_event c;
967
968	c.data.hard = hard;
969	c.pid = pid;
970	c.event = XFRM_MSG_POLEXPIRE;
971	km_policy_notify(pol, dir, &c);
972
973	if (hard)
974		wake_up(&km_waitq);
975}
976
977int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen)
978{
979	int err;
980	u8 *data;
981	struct xfrm_mgr *km;
982	struct xfrm_policy *pol = NULL;
983
984	if (optlen <= 0 || optlen > PAGE_SIZE)
985		return -EMSGSIZE;
986
987	data = kmalloc(optlen, GFP_KERNEL);
988	if (!data)
989		return -ENOMEM;
990
991	err = -EFAULT;
992	if (copy_from_user(data, optval, optlen))
993		goto out;
994
995	err = -EINVAL;
996	read_lock(&xfrm_km_lock);
997	list_for_each_entry(km, &xfrm_km_list, list) {
998		pol = km->compile_policy(sk->sk_family, optname, data,
999					 optlen, &err);
1000		if (err >= 0)
1001			break;
1002	}
1003	read_unlock(&xfrm_km_lock);
1004
1005	if (err >= 0) {
1006		xfrm_sk_policy_insert(sk, err, pol);
1007		xfrm_pol_put(pol);
1008		err = 0;
1009	}
1010
1011out:
1012	kfree(data);
1013	return err;
1014}
1015EXPORT_SYMBOL(xfrm_user_policy);
1016
1017int xfrm_register_km(struct xfrm_mgr *km)
1018{
1019	write_lock_bh(&xfrm_km_lock);
1020	list_add_tail(&km->list, &xfrm_km_list);
1021	write_unlock_bh(&xfrm_km_lock);
1022	return 0;
1023}
1024EXPORT_SYMBOL(xfrm_register_km);
1025
1026int xfrm_unregister_km(struct xfrm_mgr *km)
1027{
1028	write_lock_bh(&xfrm_km_lock);
1029	list_del(&km->list);
1030	write_unlock_bh(&xfrm_km_lock);
1031	return 0;
1032}
1033EXPORT_SYMBOL(xfrm_unregister_km);
1034
1035int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo)
1036{
1037	int err = 0;
1038	if (unlikely(afinfo == NULL))
1039		return -EINVAL;
1040	if (unlikely(afinfo->family >= NPROTO))
1041		return -EAFNOSUPPORT;
1042	write_lock(&xfrm_state_afinfo_lock);
1043	if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL))
1044		err = -ENOBUFS;
1045	else {
1046		afinfo->state_bydst = xfrm_state_bydst;
1047		afinfo->state_byspi = xfrm_state_byspi;
1048		xfrm_state_afinfo[afinfo->family] = afinfo;
1049	}
1050	write_unlock(&xfrm_state_afinfo_lock);
1051	return err;
1052}
1053EXPORT_SYMBOL(xfrm_state_register_afinfo);
1054
1055int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo)
1056{
1057	int err = 0;
1058	if (unlikely(afinfo == NULL))
1059		return -EINVAL;
1060	if (unlikely(afinfo->family >= NPROTO))
1061		return -EAFNOSUPPORT;
1062	write_lock(&xfrm_state_afinfo_lock);
1063	if (likely(xfrm_state_afinfo[afinfo->family] != NULL)) {
1064		if (unlikely(xfrm_state_afinfo[afinfo->family] != afinfo))
1065			err = -EINVAL;
1066		else {
1067			xfrm_state_afinfo[afinfo->family] = NULL;
1068			afinfo->state_byspi = NULL;
1069			afinfo->state_bydst = NULL;
1070		}
1071	}
1072	write_unlock(&xfrm_state_afinfo_lock);
1073	return err;
1074}
1075EXPORT_SYMBOL(xfrm_state_unregister_afinfo);
1076
1077static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family)
1078{
1079	struct xfrm_state_afinfo *afinfo;
1080	if (unlikely(family >= NPROTO))
1081		return NULL;
1082	read_lock(&xfrm_state_afinfo_lock);
1083	afinfo = xfrm_state_afinfo[family];
1084	if (likely(afinfo != NULL))
1085		read_lock(&afinfo->lock);
1086	read_unlock(&xfrm_state_afinfo_lock);
1087	return afinfo;
1088}
1089
1090static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo)
1091{
1092	if (unlikely(afinfo == NULL))
1093		return;
1094	read_unlock(&afinfo->lock);
1095}
1096
1097/* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */
1098void xfrm_state_delete_tunnel(struct xfrm_state *x)
1099{
1100	if (x->tunnel) {
1101		struct xfrm_state *t = x->tunnel;
1102
1103		if (atomic_read(&t->tunnel_users) == 2)
1104			xfrm_state_delete(t);
1105		atomic_dec(&t->tunnel_users);
1106		xfrm_state_put(t);
1107		x->tunnel = NULL;
1108	}
1109}
1110EXPORT_SYMBOL(xfrm_state_delete_tunnel);
1111
1112/*
1113 * This function is NOT optimal.  For example, with ESP it will give an
1114 * MTU that's usually two bytes short of being optimal.  However, it will
1115 * usually give an answer that's a multiple of 4 provided the input is
1116 * also a multiple of 4.
1117 */
1118int xfrm_state_mtu(struct xfrm_state *x, int mtu)
1119{
1120	int res = mtu;
1121
1122	res -= x->props.header_len;
1123
1124	for (;;) {
1125		int m = res;
1126
1127		if (m < 68)
1128			return 68;
1129
1130		spin_lock_bh(&x->lock);
1131		if (x->km.state == XFRM_STATE_VALID &&
1132		    x->type && x->type->get_max_size)
1133			m = x->type->get_max_size(x, m);
1134		else
1135			m += x->props.header_len;
1136		spin_unlock_bh(&x->lock);
1137
1138		if (m <= mtu)
1139			break;
1140		res -= (m - mtu);
1141	}
1142
1143	return res;
1144}
1145
1146EXPORT_SYMBOL(xfrm_state_mtu);
1147
1148int xfrm_init_state(struct xfrm_state *x)
1149{
1150	struct xfrm_state_afinfo *afinfo;
1151	int family = x->props.family;
1152	int err;
1153
1154	err = -EAFNOSUPPORT;
1155	afinfo = xfrm_state_get_afinfo(family);
1156	if (!afinfo)
1157		goto error;
1158
1159	err = 0;
1160	if (afinfo->init_flags)
1161		err = afinfo->init_flags(x);
1162
1163	xfrm_state_put_afinfo(afinfo);
1164
1165	if (err)
1166		goto error;
1167
1168	err = -EPROTONOSUPPORT;
1169	x->type = xfrm_get_type(x->id.proto, family);
1170	if (x->type == NULL)
1171		goto error;
1172
1173	err = x->type->init_state(x);
1174	if (err)
1175		goto error;
1176
1177	x->km.state = XFRM_STATE_VALID;
1178
1179error:
1180	return err;
1181}
1182
1183EXPORT_SYMBOL(xfrm_init_state);
1184
1185void __init xfrm_state_init(void)
1186{
1187	int i;
1188
1189	for (i=0; i<XFRM_DST_HSIZE; i++) {
1190		INIT_LIST_HEAD(&xfrm_state_bydst[i]);
1191		INIT_LIST_HEAD(&xfrm_state_byspi[i]);
1192	}
1193	INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task, NULL);
1194}
1195
1196