xfrm_state.c revision c4028958b6ecad064b1a6303a6a5906d4fe48d73
1/*
2 * xfrm_state.c
3 *
4 * Changes:
5 *	Mitsuru KANDA @USAGI
6 * 	Kazunori MIYAZAWA @USAGI
7 * 	Kunihiro Ishiguro <kunihiro@ipinfusion.com>
8 * 		IPv6 support
9 * 	YOSHIFUJI Hideaki @USAGI
10 * 		Split up af-specific functions
11 *	Derek Atkins <derek@ihtfp.com>
12 *		Add UDP Encapsulation
13 *
14 */
15
16#include <linux/workqueue.h>
17#include <net/xfrm.h>
18#include <linux/pfkeyv2.h>
19#include <linux/ipsec.h>
20#include <linux/module.h>
21#include <linux/cache.h>
22#include <asm/uaccess.h>
23
24#include "xfrm_hash.h"
25
26struct sock *xfrm_nl;
27EXPORT_SYMBOL(xfrm_nl);
28
29u32 sysctl_xfrm_aevent_etime = XFRM_AE_ETIME;
30EXPORT_SYMBOL(sysctl_xfrm_aevent_etime);
31
32u32 sysctl_xfrm_aevent_rseqth = XFRM_AE_SEQT_SIZE;
33EXPORT_SYMBOL(sysctl_xfrm_aevent_rseqth);
34
35/* Each xfrm_state may be linked to two tables:
36
37   1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl)
38   2. Hash table by (daddr,family,reqid) to find what SAs exist for given
39      destination/tunnel endpoint. (output)
40 */
41
42static DEFINE_SPINLOCK(xfrm_state_lock);
43
44/* Hash table to find appropriate SA towards given target (endpoint
45 * of tunnel or destination of transport mode) allowed by selector.
46 *
47 * Main use is finding SA after policy selected tunnel or transport mode.
48 * Also, it can be used by ah/esp icmp error handler to find offending SA.
49 */
50static struct hlist_head *xfrm_state_bydst __read_mostly;
51static struct hlist_head *xfrm_state_bysrc __read_mostly;
52static struct hlist_head *xfrm_state_byspi __read_mostly;
53static unsigned int xfrm_state_hmask __read_mostly;
54static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024;
55static unsigned int xfrm_state_num;
56static unsigned int xfrm_state_genid;
57
58static inline unsigned int xfrm_dst_hash(xfrm_address_t *daddr,
59					 xfrm_address_t *saddr,
60					 u32 reqid,
61					 unsigned short family)
62{
63	return __xfrm_dst_hash(daddr, saddr, reqid, family, xfrm_state_hmask);
64}
65
66static inline unsigned int xfrm_src_hash(xfrm_address_t *daddr,
67					 xfrm_address_t *saddr,
68					 unsigned short family)
69{
70	return __xfrm_src_hash(daddr, saddr, family, xfrm_state_hmask);
71}
72
73static inline unsigned int
74xfrm_spi_hash(xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family)
75{
76	return __xfrm_spi_hash(daddr, spi, proto, family, xfrm_state_hmask);
77}
78
79static void xfrm_hash_transfer(struct hlist_head *list,
80			       struct hlist_head *ndsttable,
81			       struct hlist_head *nsrctable,
82			       struct hlist_head *nspitable,
83			       unsigned int nhashmask)
84{
85	struct hlist_node *entry, *tmp;
86	struct xfrm_state *x;
87
88	hlist_for_each_entry_safe(x, entry, tmp, list, bydst) {
89		unsigned int h;
90
91		h = __xfrm_dst_hash(&x->id.daddr, &x->props.saddr,
92				    x->props.reqid, x->props.family,
93				    nhashmask);
94		hlist_add_head(&x->bydst, ndsttable+h);
95
96		h = __xfrm_src_hash(&x->id.daddr, &x->props.saddr,
97				    x->props.family,
98				    nhashmask);
99		hlist_add_head(&x->bysrc, nsrctable+h);
100
101		if (x->id.spi) {
102			h = __xfrm_spi_hash(&x->id.daddr, x->id.spi,
103					    x->id.proto, x->props.family,
104					    nhashmask);
105			hlist_add_head(&x->byspi, nspitable+h);
106		}
107	}
108}
109
110static unsigned long xfrm_hash_new_size(void)
111{
112	return ((xfrm_state_hmask + 1) << 1) *
113		sizeof(struct hlist_head);
114}
115
116static DEFINE_MUTEX(hash_resize_mutex);
117
118static void xfrm_hash_resize(struct work_struct *__unused)
119{
120	struct hlist_head *ndst, *nsrc, *nspi, *odst, *osrc, *ospi;
121	unsigned long nsize, osize;
122	unsigned int nhashmask, ohashmask;
123	int i;
124
125	mutex_lock(&hash_resize_mutex);
126
127	nsize = xfrm_hash_new_size();
128	ndst = xfrm_hash_alloc(nsize);
129	if (!ndst)
130		goto out_unlock;
131	nsrc = xfrm_hash_alloc(nsize);
132	if (!nsrc) {
133		xfrm_hash_free(ndst, nsize);
134		goto out_unlock;
135	}
136	nspi = xfrm_hash_alloc(nsize);
137	if (!nspi) {
138		xfrm_hash_free(ndst, nsize);
139		xfrm_hash_free(nsrc, nsize);
140		goto out_unlock;
141	}
142
143	spin_lock_bh(&xfrm_state_lock);
144
145	nhashmask = (nsize / sizeof(struct hlist_head)) - 1U;
146	for (i = xfrm_state_hmask; i >= 0; i--)
147		xfrm_hash_transfer(xfrm_state_bydst+i, ndst, nsrc, nspi,
148				   nhashmask);
149
150	odst = xfrm_state_bydst;
151	osrc = xfrm_state_bysrc;
152	ospi = xfrm_state_byspi;
153	ohashmask = xfrm_state_hmask;
154
155	xfrm_state_bydst = ndst;
156	xfrm_state_bysrc = nsrc;
157	xfrm_state_byspi = nspi;
158	xfrm_state_hmask = nhashmask;
159
160	spin_unlock_bh(&xfrm_state_lock);
161
162	osize = (ohashmask + 1) * sizeof(struct hlist_head);
163	xfrm_hash_free(odst, osize);
164	xfrm_hash_free(osrc, osize);
165	xfrm_hash_free(ospi, osize);
166
167out_unlock:
168	mutex_unlock(&hash_resize_mutex);
169}
170
171static DECLARE_WORK(xfrm_hash_work, xfrm_hash_resize);
172
173DECLARE_WAIT_QUEUE_HEAD(km_waitq);
174EXPORT_SYMBOL(km_waitq);
175
176static DEFINE_RWLOCK(xfrm_state_afinfo_lock);
177static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO];
178
179static struct work_struct xfrm_state_gc_work;
180static HLIST_HEAD(xfrm_state_gc_list);
181static DEFINE_SPINLOCK(xfrm_state_gc_lock);
182
183int __xfrm_state_delete(struct xfrm_state *x);
184
185static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family);
186static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo);
187
188int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol);
189void km_state_expired(struct xfrm_state *x, int hard, u32 pid);
190
191static void xfrm_state_gc_destroy(struct xfrm_state *x)
192{
193	del_timer_sync(&x->timer);
194	del_timer_sync(&x->rtimer);
195	kfree(x->aalg);
196	kfree(x->ealg);
197	kfree(x->calg);
198	kfree(x->encap);
199	kfree(x->coaddr);
200	if (x->mode)
201		xfrm_put_mode(x->mode);
202	if (x->type) {
203		x->type->destructor(x);
204		xfrm_put_type(x->type);
205	}
206	security_xfrm_state_free(x);
207	kfree(x);
208}
209
210static void xfrm_state_gc_task(struct work_struct *data)
211{
212	struct xfrm_state *x;
213	struct hlist_node *entry, *tmp;
214	struct hlist_head gc_list;
215
216	spin_lock_bh(&xfrm_state_gc_lock);
217	gc_list.first = xfrm_state_gc_list.first;
218	INIT_HLIST_HEAD(&xfrm_state_gc_list);
219	spin_unlock_bh(&xfrm_state_gc_lock);
220
221	hlist_for_each_entry_safe(x, entry, tmp, &gc_list, bydst)
222		xfrm_state_gc_destroy(x);
223
224	wake_up(&km_waitq);
225}
226
227static inline unsigned long make_jiffies(long secs)
228{
229	if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
230		return MAX_SCHEDULE_TIMEOUT-1;
231	else
232	        return secs*HZ;
233}
234
235static void xfrm_timer_handler(unsigned long data)
236{
237	struct xfrm_state *x = (struct xfrm_state*)data;
238	unsigned long now = (unsigned long)xtime.tv_sec;
239	long next = LONG_MAX;
240	int warn = 0;
241
242	spin_lock(&x->lock);
243	if (x->km.state == XFRM_STATE_DEAD)
244		goto out;
245	if (x->km.state == XFRM_STATE_EXPIRED)
246		goto expired;
247	if (x->lft.hard_add_expires_seconds) {
248		long tmo = x->lft.hard_add_expires_seconds +
249			x->curlft.add_time - now;
250		if (tmo <= 0)
251			goto expired;
252		if (tmo < next)
253			next = tmo;
254	}
255	if (x->lft.hard_use_expires_seconds) {
256		long tmo = x->lft.hard_use_expires_seconds +
257			(x->curlft.use_time ? : now) - now;
258		if (tmo <= 0)
259			goto expired;
260		if (tmo < next)
261			next = tmo;
262	}
263	if (x->km.dying)
264		goto resched;
265	if (x->lft.soft_add_expires_seconds) {
266		long tmo = x->lft.soft_add_expires_seconds +
267			x->curlft.add_time - now;
268		if (tmo <= 0)
269			warn = 1;
270		else if (tmo < next)
271			next = tmo;
272	}
273	if (x->lft.soft_use_expires_seconds) {
274		long tmo = x->lft.soft_use_expires_seconds +
275			(x->curlft.use_time ? : now) - now;
276		if (tmo <= 0)
277			warn = 1;
278		else if (tmo < next)
279			next = tmo;
280	}
281
282	x->km.dying = warn;
283	if (warn)
284		km_state_expired(x, 0, 0);
285resched:
286	if (next != LONG_MAX)
287		mod_timer(&x->timer, jiffies + make_jiffies(next));
288
289	goto out;
290
291expired:
292	if (x->km.state == XFRM_STATE_ACQ && x->id.spi == 0) {
293		x->km.state = XFRM_STATE_EXPIRED;
294		wake_up(&km_waitq);
295		next = 2;
296		goto resched;
297	}
298	if (!__xfrm_state_delete(x) && x->id.spi)
299		km_state_expired(x, 1, 0);
300
301out:
302	spin_unlock(&x->lock);
303}
304
305static void xfrm_replay_timer_handler(unsigned long data);
306
307struct xfrm_state *xfrm_state_alloc(void)
308{
309	struct xfrm_state *x;
310
311	x = kzalloc(sizeof(struct xfrm_state), GFP_ATOMIC);
312
313	if (x) {
314		atomic_set(&x->refcnt, 1);
315		atomic_set(&x->tunnel_users, 0);
316		INIT_HLIST_NODE(&x->bydst);
317		INIT_HLIST_NODE(&x->bysrc);
318		INIT_HLIST_NODE(&x->byspi);
319		init_timer(&x->timer);
320		x->timer.function = xfrm_timer_handler;
321		x->timer.data	  = (unsigned long)x;
322		init_timer(&x->rtimer);
323		x->rtimer.function = xfrm_replay_timer_handler;
324		x->rtimer.data     = (unsigned long)x;
325		x->curlft.add_time = (unsigned long)xtime.tv_sec;
326		x->lft.soft_byte_limit = XFRM_INF;
327		x->lft.soft_packet_limit = XFRM_INF;
328		x->lft.hard_byte_limit = XFRM_INF;
329		x->lft.hard_packet_limit = XFRM_INF;
330		x->replay_maxage = 0;
331		x->replay_maxdiff = 0;
332		spin_lock_init(&x->lock);
333	}
334	return x;
335}
336EXPORT_SYMBOL(xfrm_state_alloc);
337
338void __xfrm_state_destroy(struct xfrm_state *x)
339{
340	BUG_TRAP(x->km.state == XFRM_STATE_DEAD);
341
342	spin_lock_bh(&xfrm_state_gc_lock);
343	hlist_add_head(&x->bydst, &xfrm_state_gc_list);
344	spin_unlock_bh(&xfrm_state_gc_lock);
345	schedule_work(&xfrm_state_gc_work);
346}
347EXPORT_SYMBOL(__xfrm_state_destroy);
348
349int __xfrm_state_delete(struct xfrm_state *x)
350{
351	int err = -ESRCH;
352
353	if (x->km.state != XFRM_STATE_DEAD) {
354		x->km.state = XFRM_STATE_DEAD;
355		spin_lock(&xfrm_state_lock);
356		hlist_del(&x->bydst);
357		hlist_del(&x->bysrc);
358		if (x->id.spi)
359			hlist_del(&x->byspi);
360		xfrm_state_num--;
361		spin_unlock(&xfrm_state_lock);
362
363		/* All xfrm_state objects are created by xfrm_state_alloc.
364		 * The xfrm_state_alloc call gives a reference, and that
365		 * is what we are dropping here.
366		 */
367		__xfrm_state_put(x);
368		err = 0;
369	}
370
371	return err;
372}
373EXPORT_SYMBOL(__xfrm_state_delete);
374
375int xfrm_state_delete(struct xfrm_state *x)
376{
377	int err;
378
379	spin_lock_bh(&x->lock);
380	err = __xfrm_state_delete(x);
381	spin_unlock_bh(&x->lock);
382
383	return err;
384}
385EXPORT_SYMBOL(xfrm_state_delete);
386
387void xfrm_state_flush(u8 proto)
388{
389	int i;
390
391	spin_lock_bh(&xfrm_state_lock);
392	for (i = 0; i <= xfrm_state_hmask; i++) {
393		struct hlist_node *entry;
394		struct xfrm_state *x;
395restart:
396		hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
397			if (!xfrm_state_kern(x) &&
398			    xfrm_id_proto_match(x->id.proto, proto)) {
399				xfrm_state_hold(x);
400				spin_unlock_bh(&xfrm_state_lock);
401
402				xfrm_state_delete(x);
403				xfrm_state_put(x);
404
405				spin_lock_bh(&xfrm_state_lock);
406				goto restart;
407			}
408		}
409	}
410	spin_unlock_bh(&xfrm_state_lock);
411	wake_up(&km_waitq);
412}
413EXPORT_SYMBOL(xfrm_state_flush);
414
415static int
416xfrm_init_tempsel(struct xfrm_state *x, struct flowi *fl,
417		  struct xfrm_tmpl *tmpl,
418		  xfrm_address_t *daddr, xfrm_address_t *saddr,
419		  unsigned short family)
420{
421	struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
422	if (!afinfo)
423		return -1;
424	afinfo->init_tempsel(x, fl, tmpl, daddr, saddr);
425	xfrm_state_put_afinfo(afinfo);
426	return 0;
427}
428
429static struct xfrm_state *__xfrm_state_lookup(xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family)
430{
431	unsigned int h = xfrm_spi_hash(daddr, spi, proto, family);
432	struct xfrm_state *x;
433	struct hlist_node *entry;
434
435	hlist_for_each_entry(x, entry, xfrm_state_byspi+h, byspi) {
436		if (x->props.family != family ||
437		    x->id.spi       != spi ||
438		    x->id.proto     != proto)
439			continue;
440
441		switch (family) {
442		case AF_INET:
443			if (x->id.daddr.a4 != daddr->a4)
444				continue;
445			break;
446		case AF_INET6:
447			if (!ipv6_addr_equal((struct in6_addr *)daddr,
448					     (struct in6_addr *)
449					     x->id.daddr.a6))
450				continue;
451			break;
452		};
453
454		xfrm_state_hold(x);
455		return x;
456	}
457
458	return NULL;
459}
460
461static struct xfrm_state *__xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family)
462{
463	unsigned int h = xfrm_src_hash(daddr, saddr, family);
464	struct xfrm_state *x;
465	struct hlist_node *entry;
466
467	hlist_for_each_entry(x, entry, xfrm_state_bysrc+h, bysrc) {
468		if (x->props.family != family ||
469		    x->id.proto     != proto)
470			continue;
471
472		switch (family) {
473		case AF_INET:
474			if (x->id.daddr.a4 != daddr->a4 ||
475			    x->props.saddr.a4 != saddr->a4)
476				continue;
477			break;
478		case AF_INET6:
479			if (!ipv6_addr_equal((struct in6_addr *)daddr,
480					     (struct in6_addr *)
481					     x->id.daddr.a6) ||
482			    !ipv6_addr_equal((struct in6_addr *)saddr,
483					     (struct in6_addr *)
484					     x->props.saddr.a6))
485				continue;
486			break;
487		};
488
489		xfrm_state_hold(x);
490		return x;
491	}
492
493	return NULL;
494}
495
496static inline struct xfrm_state *
497__xfrm_state_locate(struct xfrm_state *x, int use_spi, int family)
498{
499	if (use_spi)
500		return __xfrm_state_lookup(&x->id.daddr, x->id.spi,
501					   x->id.proto, family);
502	else
503		return __xfrm_state_lookup_byaddr(&x->id.daddr,
504						  &x->props.saddr,
505						  x->id.proto, family);
506}
507
508static void xfrm_hash_grow_check(int have_hash_collision)
509{
510	if (have_hash_collision &&
511	    (xfrm_state_hmask + 1) < xfrm_state_hashmax &&
512	    xfrm_state_num > xfrm_state_hmask)
513		schedule_work(&xfrm_hash_work);
514}
515
516struct xfrm_state *
517xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
518		struct flowi *fl, struct xfrm_tmpl *tmpl,
519		struct xfrm_policy *pol, int *err,
520		unsigned short family)
521{
522	unsigned int h = xfrm_dst_hash(daddr, saddr, tmpl->reqid, family);
523	struct hlist_node *entry;
524	struct xfrm_state *x, *x0;
525	int acquire_in_progress = 0;
526	int error = 0;
527	struct xfrm_state *best = NULL;
528
529	spin_lock_bh(&xfrm_state_lock);
530	hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
531		if (x->props.family == family &&
532		    x->props.reqid == tmpl->reqid &&
533		    !(x->props.flags & XFRM_STATE_WILDRECV) &&
534		    xfrm_state_addr_check(x, daddr, saddr, family) &&
535		    tmpl->mode == x->props.mode &&
536		    tmpl->id.proto == x->id.proto &&
537		    (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) {
538			/* Resolution logic:
539			   1. There is a valid state with matching selector.
540			      Done.
541			   2. Valid state with inappropriate selector. Skip.
542
543			   Entering area of "sysdeps".
544
545			   3. If state is not valid, selector is temporary,
546			      it selects only session which triggered
547			      previous resolution. Key manager will do
548			      something to install a state with proper
549			      selector.
550			 */
551			if (x->km.state == XFRM_STATE_VALID) {
552				if (!xfrm_selector_match(&x->sel, fl, family) ||
553				    !security_xfrm_state_pol_flow_match(x, pol, fl))
554					continue;
555				if (!best ||
556				    best->km.dying > x->km.dying ||
557				    (best->km.dying == x->km.dying &&
558				     best->curlft.add_time < x->curlft.add_time))
559					best = x;
560			} else if (x->km.state == XFRM_STATE_ACQ) {
561				acquire_in_progress = 1;
562			} else if (x->km.state == XFRM_STATE_ERROR ||
563				   x->km.state == XFRM_STATE_EXPIRED) {
564 				if (xfrm_selector_match(&x->sel, fl, family) &&
565				    security_xfrm_state_pol_flow_match(x, pol, fl))
566					error = -ESRCH;
567			}
568		}
569	}
570
571	x = best;
572	if (!x && !error && !acquire_in_progress) {
573		if (tmpl->id.spi &&
574		    (x0 = __xfrm_state_lookup(daddr, tmpl->id.spi,
575					      tmpl->id.proto, family)) != NULL) {
576			xfrm_state_put(x0);
577			error = -EEXIST;
578			goto out;
579		}
580		x = xfrm_state_alloc();
581		if (x == NULL) {
582			error = -ENOMEM;
583			goto out;
584		}
585		/* Initialize temporary selector matching only
586		 * to current session. */
587		xfrm_init_tempsel(x, fl, tmpl, daddr, saddr, family);
588
589		error = security_xfrm_state_alloc_acquire(x, pol->security, fl->secid);
590		if (error) {
591			x->km.state = XFRM_STATE_DEAD;
592			xfrm_state_put(x);
593			x = NULL;
594			goto out;
595		}
596
597		if (km_query(x, tmpl, pol) == 0) {
598			x->km.state = XFRM_STATE_ACQ;
599			hlist_add_head(&x->bydst, xfrm_state_bydst+h);
600			h = xfrm_src_hash(daddr, saddr, family);
601			hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
602			if (x->id.spi) {
603				h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family);
604				hlist_add_head(&x->byspi, xfrm_state_byspi+h);
605			}
606			x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
607			x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
608			add_timer(&x->timer);
609			xfrm_state_num++;
610			xfrm_hash_grow_check(x->bydst.next != NULL);
611		} else {
612			x->km.state = XFRM_STATE_DEAD;
613			xfrm_state_put(x);
614			x = NULL;
615			error = -ESRCH;
616		}
617	}
618out:
619	if (x)
620		xfrm_state_hold(x);
621	else
622		*err = acquire_in_progress ? -EAGAIN : error;
623	spin_unlock_bh(&xfrm_state_lock);
624	return x;
625}
626
627static void __xfrm_state_insert(struct xfrm_state *x)
628{
629	unsigned int h;
630
631	x->genid = ++xfrm_state_genid;
632
633	h = xfrm_dst_hash(&x->id.daddr, &x->props.saddr,
634			  x->props.reqid, x->props.family);
635	hlist_add_head(&x->bydst, xfrm_state_bydst+h);
636
637	h = xfrm_src_hash(&x->id.daddr, &x->props.saddr, x->props.family);
638	hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
639
640	if (x->id.spi) {
641		h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto,
642				  x->props.family);
643
644		hlist_add_head(&x->byspi, xfrm_state_byspi+h);
645	}
646
647	mod_timer(&x->timer, jiffies + HZ);
648	if (x->replay_maxage)
649		mod_timer(&x->rtimer, jiffies + x->replay_maxage);
650
651	wake_up(&km_waitq);
652
653	xfrm_state_num++;
654
655	xfrm_hash_grow_check(x->bydst.next != NULL);
656}
657
658/* xfrm_state_lock is held */
659static void __xfrm_state_bump_genids(struct xfrm_state *xnew)
660{
661	unsigned short family = xnew->props.family;
662	u32 reqid = xnew->props.reqid;
663	struct xfrm_state *x;
664	struct hlist_node *entry;
665	unsigned int h;
666
667	h = xfrm_dst_hash(&xnew->id.daddr, &xnew->props.saddr, reqid, family);
668	hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
669		if (x->props.family	== family &&
670		    x->props.reqid	== reqid &&
671		    !xfrm_addr_cmp(&x->id.daddr, &xnew->id.daddr, family) &&
672		    !xfrm_addr_cmp(&x->props.saddr, &xnew->props.saddr, family))
673			x->genid = xfrm_state_genid;
674	}
675}
676
677void xfrm_state_insert(struct xfrm_state *x)
678{
679	spin_lock_bh(&xfrm_state_lock);
680	__xfrm_state_bump_genids(x);
681	__xfrm_state_insert(x);
682	spin_unlock_bh(&xfrm_state_lock);
683}
684EXPORT_SYMBOL(xfrm_state_insert);
685
686/* xfrm_state_lock is held */
687static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create)
688{
689	unsigned int h = xfrm_dst_hash(daddr, saddr, reqid, family);
690	struct hlist_node *entry;
691	struct xfrm_state *x;
692
693	hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
694		if (x->props.reqid  != reqid ||
695		    x->props.mode   != mode ||
696		    x->props.family != family ||
697		    x->km.state     != XFRM_STATE_ACQ ||
698		    x->id.spi       != 0)
699			continue;
700
701		switch (family) {
702		case AF_INET:
703			if (x->id.daddr.a4    != daddr->a4 ||
704			    x->props.saddr.a4 != saddr->a4)
705				continue;
706			break;
707		case AF_INET6:
708			if (!ipv6_addr_equal((struct in6_addr *)x->id.daddr.a6,
709					     (struct in6_addr *)daddr) ||
710			    !ipv6_addr_equal((struct in6_addr *)
711					     x->props.saddr.a6,
712					     (struct in6_addr *)saddr))
713				continue;
714			break;
715		};
716
717		xfrm_state_hold(x);
718		return x;
719	}
720
721	if (!create)
722		return NULL;
723
724	x = xfrm_state_alloc();
725	if (likely(x)) {
726		switch (family) {
727		case AF_INET:
728			x->sel.daddr.a4 = daddr->a4;
729			x->sel.saddr.a4 = saddr->a4;
730			x->sel.prefixlen_d = 32;
731			x->sel.prefixlen_s = 32;
732			x->props.saddr.a4 = saddr->a4;
733			x->id.daddr.a4 = daddr->a4;
734			break;
735
736		case AF_INET6:
737			ipv6_addr_copy((struct in6_addr *)x->sel.daddr.a6,
738				       (struct in6_addr *)daddr);
739			ipv6_addr_copy((struct in6_addr *)x->sel.saddr.a6,
740				       (struct in6_addr *)saddr);
741			x->sel.prefixlen_d = 128;
742			x->sel.prefixlen_s = 128;
743			ipv6_addr_copy((struct in6_addr *)x->props.saddr.a6,
744				       (struct in6_addr *)saddr);
745			ipv6_addr_copy((struct in6_addr *)x->id.daddr.a6,
746				       (struct in6_addr *)daddr);
747			break;
748		};
749
750		x->km.state = XFRM_STATE_ACQ;
751		x->id.proto = proto;
752		x->props.family = family;
753		x->props.mode = mode;
754		x->props.reqid = reqid;
755		x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
756		xfrm_state_hold(x);
757		x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
758		add_timer(&x->timer);
759		hlist_add_head(&x->bydst, xfrm_state_bydst+h);
760		h = xfrm_src_hash(daddr, saddr, family);
761		hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
762		wake_up(&km_waitq);
763
764		xfrm_state_num++;
765
766		xfrm_hash_grow_check(x->bydst.next != NULL);
767	}
768
769	return x;
770}
771
772static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq);
773
774int xfrm_state_add(struct xfrm_state *x)
775{
776	struct xfrm_state *x1;
777	int family;
778	int err;
779	int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
780
781	family = x->props.family;
782
783	spin_lock_bh(&xfrm_state_lock);
784
785	x1 = __xfrm_state_locate(x, use_spi, family);
786	if (x1) {
787		xfrm_state_put(x1);
788		x1 = NULL;
789		err = -EEXIST;
790		goto out;
791	}
792
793	if (use_spi && x->km.seq) {
794		x1 = __xfrm_find_acq_byseq(x->km.seq);
795		if (x1 && xfrm_addr_cmp(&x1->id.daddr, &x->id.daddr, family)) {
796			xfrm_state_put(x1);
797			x1 = NULL;
798		}
799	}
800
801	if (use_spi && !x1)
802		x1 = __find_acq_core(family, x->props.mode, x->props.reqid,
803				     x->id.proto,
804				     &x->id.daddr, &x->props.saddr, 0);
805
806	__xfrm_state_bump_genids(x);
807	__xfrm_state_insert(x);
808	err = 0;
809
810out:
811	spin_unlock_bh(&xfrm_state_lock);
812
813	if (x1) {
814		xfrm_state_delete(x1);
815		xfrm_state_put(x1);
816	}
817
818	return err;
819}
820EXPORT_SYMBOL(xfrm_state_add);
821
822int xfrm_state_update(struct xfrm_state *x)
823{
824	struct xfrm_state *x1;
825	int err;
826	int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
827
828	spin_lock_bh(&xfrm_state_lock);
829	x1 = __xfrm_state_locate(x, use_spi, x->props.family);
830
831	err = -ESRCH;
832	if (!x1)
833		goto out;
834
835	if (xfrm_state_kern(x1)) {
836		xfrm_state_put(x1);
837		err = -EEXIST;
838		goto out;
839	}
840
841	if (x1->km.state == XFRM_STATE_ACQ) {
842		__xfrm_state_insert(x);
843		x = NULL;
844	}
845	err = 0;
846
847out:
848	spin_unlock_bh(&xfrm_state_lock);
849
850	if (err)
851		return err;
852
853	if (!x) {
854		xfrm_state_delete(x1);
855		xfrm_state_put(x1);
856		return 0;
857	}
858
859	err = -EINVAL;
860	spin_lock_bh(&x1->lock);
861	if (likely(x1->km.state == XFRM_STATE_VALID)) {
862		if (x->encap && x1->encap)
863			memcpy(x1->encap, x->encap, sizeof(*x1->encap));
864		if (x->coaddr && x1->coaddr) {
865			memcpy(x1->coaddr, x->coaddr, sizeof(*x1->coaddr));
866		}
867		if (!use_spi && memcmp(&x1->sel, &x->sel, sizeof(x1->sel)))
868			memcpy(&x1->sel, &x->sel, sizeof(x1->sel));
869		memcpy(&x1->lft, &x->lft, sizeof(x1->lft));
870		x1->km.dying = 0;
871
872		mod_timer(&x1->timer, jiffies + HZ);
873		if (x1->curlft.use_time)
874			xfrm_state_check_expire(x1);
875
876		err = 0;
877	}
878	spin_unlock_bh(&x1->lock);
879
880	xfrm_state_put(x1);
881
882	return err;
883}
884EXPORT_SYMBOL(xfrm_state_update);
885
886int xfrm_state_check_expire(struct xfrm_state *x)
887{
888	if (!x->curlft.use_time)
889		x->curlft.use_time = (unsigned long)xtime.tv_sec;
890
891	if (x->km.state != XFRM_STATE_VALID)
892		return -EINVAL;
893
894	if (x->curlft.bytes >= x->lft.hard_byte_limit ||
895	    x->curlft.packets >= x->lft.hard_packet_limit) {
896		x->km.state = XFRM_STATE_EXPIRED;
897		mod_timer(&x->timer, jiffies);
898		return -EINVAL;
899	}
900
901	if (!x->km.dying &&
902	    (x->curlft.bytes >= x->lft.soft_byte_limit ||
903	     x->curlft.packets >= x->lft.soft_packet_limit)) {
904		x->km.dying = 1;
905		km_state_expired(x, 0, 0);
906	}
907	return 0;
908}
909EXPORT_SYMBOL(xfrm_state_check_expire);
910
911static int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb)
912{
913	int nhead = x->props.header_len + LL_RESERVED_SPACE(skb->dst->dev)
914		- skb_headroom(skb);
915
916	if (nhead > 0)
917		return pskb_expand_head(skb, nhead, 0, GFP_ATOMIC);
918
919	/* Check tail too... */
920	return 0;
921}
922
923int xfrm_state_check(struct xfrm_state *x, struct sk_buff *skb)
924{
925	int err = xfrm_state_check_expire(x);
926	if (err < 0)
927		goto err;
928	err = xfrm_state_check_space(x, skb);
929err:
930	return err;
931}
932EXPORT_SYMBOL(xfrm_state_check);
933
934struct xfrm_state *
935xfrm_state_lookup(xfrm_address_t *daddr, __be32 spi, u8 proto,
936		  unsigned short family)
937{
938	struct xfrm_state *x;
939
940	spin_lock_bh(&xfrm_state_lock);
941	x = __xfrm_state_lookup(daddr, spi, proto, family);
942	spin_unlock_bh(&xfrm_state_lock);
943	return x;
944}
945EXPORT_SYMBOL(xfrm_state_lookup);
946
947struct xfrm_state *
948xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr,
949			 u8 proto, unsigned short family)
950{
951	struct xfrm_state *x;
952
953	spin_lock_bh(&xfrm_state_lock);
954	x = __xfrm_state_lookup_byaddr(daddr, saddr, proto, family);
955	spin_unlock_bh(&xfrm_state_lock);
956	return x;
957}
958EXPORT_SYMBOL(xfrm_state_lookup_byaddr);
959
960struct xfrm_state *
961xfrm_find_acq(u8 mode, u32 reqid, u8 proto,
962	      xfrm_address_t *daddr, xfrm_address_t *saddr,
963	      int create, unsigned short family)
964{
965	struct xfrm_state *x;
966
967	spin_lock_bh(&xfrm_state_lock);
968	x = __find_acq_core(family, mode, reqid, proto, daddr, saddr, create);
969	spin_unlock_bh(&xfrm_state_lock);
970
971	return x;
972}
973EXPORT_SYMBOL(xfrm_find_acq);
974
975#ifdef CONFIG_XFRM_SUB_POLICY
976int
977xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n,
978	       unsigned short family)
979{
980	int err = 0;
981	struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
982	if (!afinfo)
983		return -EAFNOSUPPORT;
984
985	spin_lock_bh(&xfrm_state_lock);
986	if (afinfo->tmpl_sort)
987		err = afinfo->tmpl_sort(dst, src, n);
988	spin_unlock_bh(&xfrm_state_lock);
989	xfrm_state_put_afinfo(afinfo);
990	return err;
991}
992EXPORT_SYMBOL(xfrm_tmpl_sort);
993
994int
995xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n,
996		unsigned short family)
997{
998	int err = 0;
999	struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
1000	if (!afinfo)
1001		return -EAFNOSUPPORT;
1002
1003	spin_lock_bh(&xfrm_state_lock);
1004	if (afinfo->state_sort)
1005		err = afinfo->state_sort(dst, src, n);
1006	spin_unlock_bh(&xfrm_state_lock);
1007	xfrm_state_put_afinfo(afinfo);
1008	return err;
1009}
1010EXPORT_SYMBOL(xfrm_state_sort);
1011#endif
1012
1013/* Silly enough, but I'm lazy to build resolution list */
1014
1015static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq)
1016{
1017	int i;
1018
1019	for (i = 0; i <= xfrm_state_hmask; i++) {
1020		struct hlist_node *entry;
1021		struct xfrm_state *x;
1022
1023		hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
1024			if (x->km.seq == seq &&
1025			    x->km.state == XFRM_STATE_ACQ) {
1026				xfrm_state_hold(x);
1027				return x;
1028			}
1029		}
1030	}
1031	return NULL;
1032}
1033
1034struct xfrm_state *xfrm_find_acq_byseq(u32 seq)
1035{
1036	struct xfrm_state *x;
1037
1038	spin_lock_bh(&xfrm_state_lock);
1039	x = __xfrm_find_acq_byseq(seq);
1040	spin_unlock_bh(&xfrm_state_lock);
1041	return x;
1042}
1043EXPORT_SYMBOL(xfrm_find_acq_byseq);
1044
1045u32 xfrm_get_acqseq(void)
1046{
1047	u32 res;
1048	static u32 acqseq;
1049	static DEFINE_SPINLOCK(acqseq_lock);
1050
1051	spin_lock_bh(&acqseq_lock);
1052	res = (++acqseq ? : ++acqseq);
1053	spin_unlock_bh(&acqseq_lock);
1054	return res;
1055}
1056EXPORT_SYMBOL(xfrm_get_acqseq);
1057
1058void
1059xfrm_alloc_spi(struct xfrm_state *x, __be32 minspi, __be32 maxspi)
1060{
1061	unsigned int h;
1062	struct xfrm_state *x0;
1063
1064	if (x->id.spi)
1065		return;
1066
1067	if (minspi == maxspi) {
1068		x0 = xfrm_state_lookup(&x->id.daddr, minspi, x->id.proto, x->props.family);
1069		if (x0) {
1070			xfrm_state_put(x0);
1071			return;
1072		}
1073		x->id.spi = minspi;
1074	} else {
1075		u32 spi = 0;
1076		u32 low = ntohl(minspi);
1077		u32 high = ntohl(maxspi);
1078		for (h=0; h<high-low+1; h++) {
1079			spi = low + net_random()%(high-low+1);
1080			x0 = xfrm_state_lookup(&x->id.daddr, htonl(spi), x->id.proto, x->props.family);
1081			if (x0 == NULL) {
1082				x->id.spi = htonl(spi);
1083				break;
1084			}
1085			xfrm_state_put(x0);
1086		}
1087	}
1088	if (x->id.spi) {
1089		spin_lock_bh(&xfrm_state_lock);
1090		h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family);
1091		hlist_add_head(&x->byspi, xfrm_state_byspi+h);
1092		spin_unlock_bh(&xfrm_state_lock);
1093		wake_up(&km_waitq);
1094	}
1095}
1096EXPORT_SYMBOL(xfrm_alloc_spi);
1097
1098int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*),
1099		    void *data)
1100{
1101	int i;
1102	struct xfrm_state *x;
1103	struct hlist_node *entry;
1104	int count = 0;
1105	int err = 0;
1106
1107	spin_lock_bh(&xfrm_state_lock);
1108	for (i = 0; i <= xfrm_state_hmask; i++) {
1109		hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
1110			if (xfrm_id_proto_match(x->id.proto, proto))
1111				count++;
1112		}
1113	}
1114	if (count == 0) {
1115		err = -ENOENT;
1116		goto out;
1117	}
1118
1119	for (i = 0; i <= xfrm_state_hmask; i++) {
1120		hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
1121			if (!xfrm_id_proto_match(x->id.proto, proto))
1122				continue;
1123			err = func(x, --count, data);
1124			if (err)
1125				goto out;
1126		}
1127	}
1128out:
1129	spin_unlock_bh(&xfrm_state_lock);
1130	return err;
1131}
1132EXPORT_SYMBOL(xfrm_state_walk);
1133
1134
1135void xfrm_replay_notify(struct xfrm_state *x, int event)
1136{
1137	struct km_event c;
1138	/* we send notify messages in case
1139	 *  1. we updated on of the sequence numbers, and the seqno difference
1140	 *     is at least x->replay_maxdiff, in this case we also update the
1141	 *     timeout of our timer function
1142	 *  2. if x->replay_maxage has elapsed since last update,
1143	 *     and there were changes
1144	 *
1145	 *  The state structure must be locked!
1146	 */
1147
1148	switch (event) {
1149	case XFRM_REPLAY_UPDATE:
1150		if (x->replay_maxdiff &&
1151		    (x->replay.seq - x->preplay.seq < x->replay_maxdiff) &&
1152		    (x->replay.oseq - x->preplay.oseq < x->replay_maxdiff)) {
1153			if (x->xflags & XFRM_TIME_DEFER)
1154				event = XFRM_REPLAY_TIMEOUT;
1155			else
1156				return;
1157		}
1158
1159		break;
1160
1161	case XFRM_REPLAY_TIMEOUT:
1162		if ((x->replay.seq == x->preplay.seq) &&
1163		    (x->replay.bitmap == x->preplay.bitmap) &&
1164		    (x->replay.oseq == x->preplay.oseq)) {
1165			x->xflags |= XFRM_TIME_DEFER;
1166			return;
1167		}
1168
1169		break;
1170	}
1171
1172	memcpy(&x->preplay, &x->replay, sizeof(struct xfrm_replay_state));
1173	c.event = XFRM_MSG_NEWAE;
1174	c.data.aevent = event;
1175	km_state_notify(x, &c);
1176
1177	if (x->replay_maxage &&
1178	    !mod_timer(&x->rtimer, jiffies + x->replay_maxage))
1179		x->xflags &= ~XFRM_TIME_DEFER;
1180}
1181EXPORT_SYMBOL(xfrm_replay_notify);
1182
1183static void xfrm_replay_timer_handler(unsigned long data)
1184{
1185	struct xfrm_state *x = (struct xfrm_state*)data;
1186
1187	spin_lock(&x->lock);
1188
1189	if (x->km.state == XFRM_STATE_VALID) {
1190		if (xfrm_aevent_is_on())
1191			xfrm_replay_notify(x, XFRM_REPLAY_TIMEOUT);
1192		else
1193			x->xflags |= XFRM_TIME_DEFER;
1194	}
1195
1196	spin_unlock(&x->lock);
1197}
1198
1199int xfrm_replay_check(struct xfrm_state *x, __be32 net_seq)
1200{
1201	u32 diff;
1202	u32 seq = ntohl(net_seq);
1203
1204	if (unlikely(seq == 0))
1205		return -EINVAL;
1206
1207	if (likely(seq > x->replay.seq))
1208		return 0;
1209
1210	diff = x->replay.seq - seq;
1211	if (diff >= x->props.replay_window) {
1212		x->stats.replay_window++;
1213		return -EINVAL;
1214	}
1215
1216	if (x->replay.bitmap & (1U << diff)) {
1217		x->stats.replay++;
1218		return -EINVAL;
1219	}
1220	return 0;
1221}
1222EXPORT_SYMBOL(xfrm_replay_check);
1223
1224void xfrm_replay_advance(struct xfrm_state *x, __be32 net_seq)
1225{
1226	u32 diff;
1227	u32 seq = ntohl(net_seq);
1228
1229	if (seq > x->replay.seq) {
1230		diff = seq - x->replay.seq;
1231		if (diff < x->props.replay_window)
1232			x->replay.bitmap = ((x->replay.bitmap) << diff) | 1;
1233		else
1234			x->replay.bitmap = 1;
1235		x->replay.seq = seq;
1236	} else {
1237		diff = x->replay.seq - seq;
1238		x->replay.bitmap |= (1U << diff);
1239	}
1240
1241	if (xfrm_aevent_is_on())
1242		xfrm_replay_notify(x, XFRM_REPLAY_UPDATE);
1243}
1244EXPORT_SYMBOL(xfrm_replay_advance);
1245
1246static struct list_head xfrm_km_list = LIST_HEAD_INIT(xfrm_km_list);
1247static DEFINE_RWLOCK(xfrm_km_lock);
1248
1249void km_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c)
1250{
1251	struct xfrm_mgr *km;
1252
1253	read_lock(&xfrm_km_lock);
1254	list_for_each_entry(km, &xfrm_km_list, list)
1255		if (km->notify_policy)
1256			km->notify_policy(xp, dir, c);
1257	read_unlock(&xfrm_km_lock);
1258}
1259
1260void km_state_notify(struct xfrm_state *x, struct km_event *c)
1261{
1262	struct xfrm_mgr *km;
1263	read_lock(&xfrm_km_lock);
1264	list_for_each_entry(km, &xfrm_km_list, list)
1265		if (km->notify)
1266			km->notify(x, c);
1267	read_unlock(&xfrm_km_lock);
1268}
1269
1270EXPORT_SYMBOL(km_policy_notify);
1271EXPORT_SYMBOL(km_state_notify);
1272
1273void km_state_expired(struct xfrm_state *x, int hard, u32 pid)
1274{
1275	struct km_event c;
1276
1277	c.data.hard = hard;
1278	c.pid = pid;
1279	c.event = XFRM_MSG_EXPIRE;
1280	km_state_notify(x, &c);
1281
1282	if (hard)
1283		wake_up(&km_waitq);
1284}
1285
1286EXPORT_SYMBOL(km_state_expired);
1287/*
1288 * We send to all registered managers regardless of failure
1289 * We are happy with one success
1290*/
1291int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol)
1292{
1293	int err = -EINVAL, acqret;
1294	struct xfrm_mgr *km;
1295
1296	read_lock(&xfrm_km_lock);
1297	list_for_each_entry(km, &xfrm_km_list, list) {
1298		acqret = km->acquire(x, t, pol, XFRM_POLICY_OUT);
1299		if (!acqret)
1300			err = acqret;
1301	}
1302	read_unlock(&xfrm_km_lock);
1303	return err;
1304}
1305EXPORT_SYMBOL(km_query);
1306
1307int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, u16 sport)
1308{
1309	int err = -EINVAL;
1310	struct xfrm_mgr *km;
1311
1312	read_lock(&xfrm_km_lock);
1313	list_for_each_entry(km, &xfrm_km_list, list) {
1314		if (km->new_mapping)
1315			err = km->new_mapping(x, ipaddr, sport);
1316		if (!err)
1317			break;
1318	}
1319	read_unlock(&xfrm_km_lock);
1320	return err;
1321}
1322EXPORT_SYMBOL(km_new_mapping);
1323
1324void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 pid)
1325{
1326	struct km_event c;
1327
1328	c.data.hard = hard;
1329	c.pid = pid;
1330	c.event = XFRM_MSG_POLEXPIRE;
1331	km_policy_notify(pol, dir, &c);
1332
1333	if (hard)
1334		wake_up(&km_waitq);
1335}
1336EXPORT_SYMBOL(km_policy_expired);
1337
1338int km_report(u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr)
1339{
1340	int err = -EINVAL;
1341	int ret;
1342	struct xfrm_mgr *km;
1343
1344	read_lock(&xfrm_km_lock);
1345	list_for_each_entry(km, &xfrm_km_list, list) {
1346		if (km->report) {
1347			ret = km->report(proto, sel, addr);
1348			if (!ret)
1349				err = ret;
1350		}
1351	}
1352	read_unlock(&xfrm_km_lock);
1353	return err;
1354}
1355EXPORT_SYMBOL(km_report);
1356
1357int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen)
1358{
1359	int err;
1360	u8 *data;
1361	struct xfrm_mgr *km;
1362	struct xfrm_policy *pol = NULL;
1363
1364	if (optlen <= 0 || optlen > PAGE_SIZE)
1365		return -EMSGSIZE;
1366
1367	data = kmalloc(optlen, GFP_KERNEL);
1368	if (!data)
1369		return -ENOMEM;
1370
1371	err = -EFAULT;
1372	if (copy_from_user(data, optval, optlen))
1373		goto out;
1374
1375	err = -EINVAL;
1376	read_lock(&xfrm_km_lock);
1377	list_for_each_entry(km, &xfrm_km_list, list) {
1378		pol = km->compile_policy(sk, optname, data,
1379					 optlen, &err);
1380		if (err >= 0)
1381			break;
1382	}
1383	read_unlock(&xfrm_km_lock);
1384
1385	if (err >= 0) {
1386		xfrm_sk_policy_insert(sk, err, pol);
1387		xfrm_pol_put(pol);
1388		err = 0;
1389	}
1390
1391out:
1392	kfree(data);
1393	return err;
1394}
1395EXPORT_SYMBOL(xfrm_user_policy);
1396
1397int xfrm_register_km(struct xfrm_mgr *km)
1398{
1399	write_lock_bh(&xfrm_km_lock);
1400	list_add_tail(&km->list, &xfrm_km_list);
1401	write_unlock_bh(&xfrm_km_lock);
1402	return 0;
1403}
1404EXPORT_SYMBOL(xfrm_register_km);
1405
1406int xfrm_unregister_km(struct xfrm_mgr *km)
1407{
1408	write_lock_bh(&xfrm_km_lock);
1409	list_del(&km->list);
1410	write_unlock_bh(&xfrm_km_lock);
1411	return 0;
1412}
1413EXPORT_SYMBOL(xfrm_unregister_km);
1414
1415int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo)
1416{
1417	int err = 0;
1418	if (unlikely(afinfo == NULL))
1419		return -EINVAL;
1420	if (unlikely(afinfo->family >= NPROTO))
1421		return -EAFNOSUPPORT;
1422	write_lock_bh(&xfrm_state_afinfo_lock);
1423	if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL))
1424		err = -ENOBUFS;
1425	else
1426		xfrm_state_afinfo[afinfo->family] = afinfo;
1427	write_unlock_bh(&xfrm_state_afinfo_lock);
1428	return err;
1429}
1430EXPORT_SYMBOL(xfrm_state_register_afinfo);
1431
1432int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo)
1433{
1434	int err = 0;
1435	if (unlikely(afinfo == NULL))
1436		return -EINVAL;
1437	if (unlikely(afinfo->family >= NPROTO))
1438		return -EAFNOSUPPORT;
1439	write_lock_bh(&xfrm_state_afinfo_lock);
1440	if (likely(xfrm_state_afinfo[afinfo->family] != NULL)) {
1441		if (unlikely(xfrm_state_afinfo[afinfo->family] != afinfo))
1442			err = -EINVAL;
1443		else
1444			xfrm_state_afinfo[afinfo->family] = NULL;
1445	}
1446	write_unlock_bh(&xfrm_state_afinfo_lock);
1447	return err;
1448}
1449EXPORT_SYMBOL(xfrm_state_unregister_afinfo);
1450
1451static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family)
1452{
1453	struct xfrm_state_afinfo *afinfo;
1454	if (unlikely(family >= NPROTO))
1455		return NULL;
1456	read_lock(&xfrm_state_afinfo_lock);
1457	afinfo = xfrm_state_afinfo[family];
1458	if (unlikely(!afinfo))
1459		read_unlock(&xfrm_state_afinfo_lock);
1460	return afinfo;
1461}
1462
1463static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo)
1464{
1465	read_unlock(&xfrm_state_afinfo_lock);
1466}
1467
1468/* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */
1469void xfrm_state_delete_tunnel(struct xfrm_state *x)
1470{
1471	if (x->tunnel) {
1472		struct xfrm_state *t = x->tunnel;
1473
1474		if (atomic_read(&t->tunnel_users) == 2)
1475			xfrm_state_delete(t);
1476		atomic_dec(&t->tunnel_users);
1477		xfrm_state_put(t);
1478		x->tunnel = NULL;
1479	}
1480}
1481EXPORT_SYMBOL(xfrm_state_delete_tunnel);
1482
1483/*
1484 * This function is NOT optimal.  For example, with ESP it will give an
1485 * MTU that's usually two bytes short of being optimal.  However, it will
1486 * usually give an answer that's a multiple of 4 provided the input is
1487 * also a multiple of 4.
1488 */
1489int xfrm_state_mtu(struct xfrm_state *x, int mtu)
1490{
1491	int res = mtu;
1492
1493	res -= x->props.header_len;
1494
1495	for (;;) {
1496		int m = res;
1497
1498		if (m < 68)
1499			return 68;
1500
1501		spin_lock_bh(&x->lock);
1502		if (x->km.state == XFRM_STATE_VALID &&
1503		    x->type && x->type->get_max_size)
1504			m = x->type->get_max_size(x, m);
1505		else
1506			m += x->props.header_len;
1507		spin_unlock_bh(&x->lock);
1508
1509		if (m <= mtu)
1510			break;
1511		res -= (m - mtu);
1512	}
1513
1514	return res;
1515}
1516
1517int xfrm_init_state(struct xfrm_state *x)
1518{
1519	struct xfrm_state_afinfo *afinfo;
1520	int family = x->props.family;
1521	int err;
1522
1523	err = -EAFNOSUPPORT;
1524	afinfo = xfrm_state_get_afinfo(family);
1525	if (!afinfo)
1526		goto error;
1527
1528	err = 0;
1529	if (afinfo->init_flags)
1530		err = afinfo->init_flags(x);
1531
1532	xfrm_state_put_afinfo(afinfo);
1533
1534	if (err)
1535		goto error;
1536
1537	err = -EPROTONOSUPPORT;
1538	x->type = xfrm_get_type(x->id.proto, family);
1539	if (x->type == NULL)
1540		goto error;
1541
1542	err = x->type->init_state(x);
1543	if (err)
1544		goto error;
1545
1546	x->mode = xfrm_get_mode(x->props.mode, family);
1547	if (x->mode == NULL)
1548		goto error;
1549
1550	x->km.state = XFRM_STATE_VALID;
1551
1552error:
1553	return err;
1554}
1555
1556EXPORT_SYMBOL(xfrm_init_state);
1557
1558void __init xfrm_state_init(void)
1559{
1560	unsigned int sz;
1561
1562	sz = sizeof(struct hlist_head) * 8;
1563
1564	xfrm_state_bydst = xfrm_hash_alloc(sz);
1565	xfrm_state_bysrc = xfrm_hash_alloc(sz);
1566	xfrm_state_byspi = xfrm_hash_alloc(sz);
1567	if (!xfrm_state_bydst || !xfrm_state_bysrc || !xfrm_state_byspi)
1568		panic("XFRM: Cannot allocate bydst/bysrc/byspi hashes.");
1569	xfrm_state_hmask = ((sz / sizeof(struct hlist_head)) - 1);
1570
1571	INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task);
1572}
1573
1574