xfrm_state.c revision c9204d9ca79baac564b49d36d0228a69d7ded084
1/*
2 * xfrm_state.c
3 *
4 * Changes:
5 *	Mitsuru KANDA @USAGI
6 * 	Kazunori MIYAZAWA @USAGI
7 * 	Kunihiro Ishiguro <kunihiro@ipinfusion.com>
8 * 		IPv6 support
9 * 	YOSHIFUJI Hideaki @USAGI
10 * 		Split up af-specific functions
11 *	Derek Atkins <derek@ihtfp.com>
12 *		Add UDP Encapsulation
13 *
14 */
15
16#include <linux/workqueue.h>
17#include <net/xfrm.h>
18#include <linux/pfkeyv2.h>
19#include <linux/ipsec.h>
20#include <linux/module.h>
21#include <linux/cache.h>
22#include <asm/uaccess.h>
23#include <linux/audit.h>
24
25#include "xfrm_hash.h"
26
27struct sock *xfrm_nl;
28EXPORT_SYMBOL(xfrm_nl);
29
30u32 sysctl_xfrm_aevent_etime = XFRM_AE_ETIME;
31EXPORT_SYMBOL(sysctl_xfrm_aevent_etime);
32
33u32 sysctl_xfrm_aevent_rseqth = XFRM_AE_SEQT_SIZE;
34EXPORT_SYMBOL(sysctl_xfrm_aevent_rseqth);
35
36/* Each xfrm_state may be linked to two tables:
37
38   1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl)
39   2. Hash table by (daddr,family,reqid) to find what SAs exist for given
40      destination/tunnel endpoint. (output)
41 */
42
43static DEFINE_SPINLOCK(xfrm_state_lock);
44
45/* Hash table to find appropriate SA towards given target (endpoint
46 * of tunnel or destination of transport mode) allowed by selector.
47 *
48 * Main use is finding SA after policy selected tunnel or transport mode.
49 * Also, it can be used by ah/esp icmp error handler to find offending SA.
50 */
51static struct hlist_head *xfrm_state_bydst __read_mostly;
52static struct hlist_head *xfrm_state_bysrc __read_mostly;
53static struct hlist_head *xfrm_state_byspi __read_mostly;
54static unsigned int xfrm_state_hmask __read_mostly;
55static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024;
56static unsigned int xfrm_state_num;
57static unsigned int xfrm_state_genid;
58
59static inline unsigned int xfrm_dst_hash(xfrm_address_t *daddr,
60					 xfrm_address_t *saddr,
61					 u32 reqid,
62					 unsigned short family)
63{
64	return __xfrm_dst_hash(daddr, saddr, reqid, family, xfrm_state_hmask);
65}
66
67static inline unsigned int xfrm_src_hash(xfrm_address_t *daddr,
68					 xfrm_address_t *saddr,
69					 unsigned short family)
70{
71	return __xfrm_src_hash(daddr, saddr, family, xfrm_state_hmask);
72}
73
74static inline unsigned int
75xfrm_spi_hash(xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family)
76{
77	return __xfrm_spi_hash(daddr, spi, proto, family, xfrm_state_hmask);
78}
79
80static void xfrm_hash_transfer(struct hlist_head *list,
81			       struct hlist_head *ndsttable,
82			       struct hlist_head *nsrctable,
83			       struct hlist_head *nspitable,
84			       unsigned int nhashmask)
85{
86	struct hlist_node *entry, *tmp;
87	struct xfrm_state *x;
88
89	hlist_for_each_entry_safe(x, entry, tmp, list, bydst) {
90		unsigned int h;
91
92		h = __xfrm_dst_hash(&x->id.daddr, &x->props.saddr,
93				    x->props.reqid, x->props.family,
94				    nhashmask);
95		hlist_add_head(&x->bydst, ndsttable+h);
96
97		h = __xfrm_src_hash(&x->id.daddr, &x->props.saddr,
98				    x->props.family,
99				    nhashmask);
100		hlist_add_head(&x->bysrc, nsrctable+h);
101
102		if (x->id.spi) {
103			h = __xfrm_spi_hash(&x->id.daddr, x->id.spi,
104					    x->id.proto, x->props.family,
105					    nhashmask);
106			hlist_add_head(&x->byspi, nspitable+h);
107		}
108	}
109}
110
111static unsigned long xfrm_hash_new_size(void)
112{
113	return ((xfrm_state_hmask + 1) << 1) *
114		sizeof(struct hlist_head);
115}
116
117static DEFINE_MUTEX(hash_resize_mutex);
118
119static void xfrm_hash_resize(struct work_struct *__unused)
120{
121	struct hlist_head *ndst, *nsrc, *nspi, *odst, *osrc, *ospi;
122	unsigned long nsize, osize;
123	unsigned int nhashmask, ohashmask;
124	int i;
125
126	mutex_lock(&hash_resize_mutex);
127
128	nsize = xfrm_hash_new_size();
129	ndst = xfrm_hash_alloc(nsize);
130	if (!ndst)
131		goto out_unlock;
132	nsrc = xfrm_hash_alloc(nsize);
133	if (!nsrc) {
134		xfrm_hash_free(ndst, nsize);
135		goto out_unlock;
136	}
137	nspi = xfrm_hash_alloc(nsize);
138	if (!nspi) {
139		xfrm_hash_free(ndst, nsize);
140		xfrm_hash_free(nsrc, nsize);
141		goto out_unlock;
142	}
143
144	spin_lock_bh(&xfrm_state_lock);
145
146	nhashmask = (nsize / sizeof(struct hlist_head)) - 1U;
147	for (i = xfrm_state_hmask; i >= 0; i--)
148		xfrm_hash_transfer(xfrm_state_bydst+i, ndst, nsrc, nspi,
149				   nhashmask);
150
151	odst = xfrm_state_bydst;
152	osrc = xfrm_state_bysrc;
153	ospi = xfrm_state_byspi;
154	ohashmask = xfrm_state_hmask;
155
156	xfrm_state_bydst = ndst;
157	xfrm_state_bysrc = nsrc;
158	xfrm_state_byspi = nspi;
159	xfrm_state_hmask = nhashmask;
160
161	spin_unlock_bh(&xfrm_state_lock);
162
163	osize = (ohashmask + 1) * sizeof(struct hlist_head);
164	xfrm_hash_free(odst, osize);
165	xfrm_hash_free(osrc, osize);
166	xfrm_hash_free(ospi, osize);
167
168out_unlock:
169	mutex_unlock(&hash_resize_mutex);
170}
171
172static DECLARE_WORK(xfrm_hash_work, xfrm_hash_resize);
173
174DECLARE_WAIT_QUEUE_HEAD(km_waitq);
175EXPORT_SYMBOL(km_waitq);
176
177static DEFINE_RWLOCK(xfrm_state_afinfo_lock);
178static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO];
179
180static struct work_struct xfrm_state_gc_work;
181static HLIST_HEAD(xfrm_state_gc_list);
182static DEFINE_SPINLOCK(xfrm_state_gc_lock);
183
184int __xfrm_state_delete(struct xfrm_state *x);
185
186static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family);
187static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo);
188
189int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol);
190void km_state_expired(struct xfrm_state *x, int hard, u32 pid);
191
192static void xfrm_state_gc_destroy(struct xfrm_state *x)
193{
194	del_timer_sync(&x->timer);
195	del_timer_sync(&x->rtimer);
196	kfree(x->aalg);
197	kfree(x->ealg);
198	kfree(x->calg);
199	kfree(x->encap);
200	kfree(x->coaddr);
201	if (x->mode)
202		xfrm_put_mode(x->mode);
203	if (x->type) {
204		x->type->destructor(x);
205		xfrm_put_type(x->type);
206	}
207	security_xfrm_state_free(x);
208	kfree(x);
209}
210
211static void xfrm_state_gc_task(struct work_struct *data)
212{
213	struct xfrm_state *x;
214	struct hlist_node *entry, *tmp;
215	struct hlist_head gc_list;
216
217	spin_lock_bh(&xfrm_state_gc_lock);
218	gc_list.first = xfrm_state_gc_list.first;
219	INIT_HLIST_HEAD(&xfrm_state_gc_list);
220	spin_unlock_bh(&xfrm_state_gc_lock);
221
222	hlist_for_each_entry_safe(x, entry, tmp, &gc_list, bydst)
223		xfrm_state_gc_destroy(x);
224
225	wake_up(&km_waitq);
226}
227
228static inline unsigned long make_jiffies(long secs)
229{
230	if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
231		return MAX_SCHEDULE_TIMEOUT-1;
232	else
233	        return secs*HZ;
234}
235
236static void xfrm_timer_handler(unsigned long data)
237{
238	struct xfrm_state *x = (struct xfrm_state*)data;
239	unsigned long now = (unsigned long)xtime.tv_sec;
240	long next = LONG_MAX;
241	int warn = 0;
242	int err = 0;
243
244	spin_lock(&x->lock);
245	if (x->km.state == XFRM_STATE_DEAD)
246		goto out;
247	if (x->km.state == XFRM_STATE_EXPIRED)
248		goto expired;
249	if (x->lft.hard_add_expires_seconds) {
250		long tmo = x->lft.hard_add_expires_seconds +
251			x->curlft.add_time - now;
252		if (tmo <= 0)
253			goto expired;
254		if (tmo < next)
255			next = tmo;
256	}
257	if (x->lft.hard_use_expires_seconds) {
258		long tmo = x->lft.hard_use_expires_seconds +
259			(x->curlft.use_time ? : now) - now;
260		if (tmo <= 0)
261			goto expired;
262		if (tmo < next)
263			next = tmo;
264	}
265	if (x->km.dying)
266		goto resched;
267	if (x->lft.soft_add_expires_seconds) {
268		long tmo = x->lft.soft_add_expires_seconds +
269			x->curlft.add_time - now;
270		if (tmo <= 0)
271			warn = 1;
272		else if (tmo < next)
273			next = tmo;
274	}
275	if (x->lft.soft_use_expires_seconds) {
276		long tmo = x->lft.soft_use_expires_seconds +
277			(x->curlft.use_time ? : now) - now;
278		if (tmo <= 0)
279			warn = 1;
280		else if (tmo < next)
281			next = tmo;
282	}
283
284	x->km.dying = warn;
285	if (warn)
286		km_state_expired(x, 0, 0);
287resched:
288	if (next != LONG_MAX)
289		mod_timer(&x->timer, jiffies + make_jiffies(next));
290
291	goto out;
292
293expired:
294	if (x->km.state == XFRM_STATE_ACQ && x->id.spi == 0) {
295		x->km.state = XFRM_STATE_EXPIRED;
296		wake_up(&km_waitq);
297		next = 2;
298		goto resched;
299	}
300
301	err = __xfrm_state_delete(x);
302	if (!err && x->id.spi)
303		km_state_expired(x, 1, 0);
304
305	xfrm_audit_log(audit_get_loginuid(current->audit_context), 0,
306		       AUDIT_MAC_IPSEC_DELSA, err ? 0 : 1, NULL, x);
307
308out:
309	spin_unlock(&x->lock);
310}
311
312static void xfrm_replay_timer_handler(unsigned long data);
313
314struct xfrm_state *xfrm_state_alloc(void)
315{
316	struct xfrm_state *x;
317
318	x = kzalloc(sizeof(struct xfrm_state), GFP_ATOMIC);
319
320	if (x) {
321		atomic_set(&x->refcnt, 1);
322		atomic_set(&x->tunnel_users, 0);
323		INIT_HLIST_NODE(&x->bydst);
324		INIT_HLIST_NODE(&x->bysrc);
325		INIT_HLIST_NODE(&x->byspi);
326		init_timer(&x->timer);
327		x->timer.function = xfrm_timer_handler;
328		x->timer.data	  = (unsigned long)x;
329		init_timer(&x->rtimer);
330		x->rtimer.function = xfrm_replay_timer_handler;
331		x->rtimer.data     = (unsigned long)x;
332		x->curlft.add_time = (unsigned long)xtime.tv_sec;
333		x->lft.soft_byte_limit = XFRM_INF;
334		x->lft.soft_packet_limit = XFRM_INF;
335		x->lft.hard_byte_limit = XFRM_INF;
336		x->lft.hard_packet_limit = XFRM_INF;
337		x->replay_maxage = 0;
338		x->replay_maxdiff = 0;
339		spin_lock_init(&x->lock);
340	}
341	return x;
342}
343EXPORT_SYMBOL(xfrm_state_alloc);
344
345void __xfrm_state_destroy(struct xfrm_state *x)
346{
347	BUG_TRAP(x->km.state == XFRM_STATE_DEAD);
348
349	spin_lock_bh(&xfrm_state_gc_lock);
350	hlist_add_head(&x->bydst, &xfrm_state_gc_list);
351	spin_unlock_bh(&xfrm_state_gc_lock);
352	schedule_work(&xfrm_state_gc_work);
353}
354EXPORT_SYMBOL(__xfrm_state_destroy);
355
356int __xfrm_state_delete(struct xfrm_state *x)
357{
358	int err = -ESRCH;
359
360	if (x->km.state != XFRM_STATE_DEAD) {
361		x->km.state = XFRM_STATE_DEAD;
362		spin_lock(&xfrm_state_lock);
363		hlist_del(&x->bydst);
364		hlist_del(&x->bysrc);
365		if (x->id.spi)
366			hlist_del(&x->byspi);
367		xfrm_state_num--;
368		spin_unlock(&xfrm_state_lock);
369
370		/* All xfrm_state objects are created by xfrm_state_alloc.
371		 * The xfrm_state_alloc call gives a reference, and that
372		 * is what we are dropping here.
373		 */
374		__xfrm_state_put(x);
375		err = 0;
376	}
377
378	return err;
379}
380EXPORT_SYMBOL(__xfrm_state_delete);
381
382int xfrm_state_delete(struct xfrm_state *x)
383{
384	int err;
385
386	spin_lock_bh(&x->lock);
387	err = __xfrm_state_delete(x);
388	spin_unlock_bh(&x->lock);
389
390	return err;
391}
392EXPORT_SYMBOL(xfrm_state_delete);
393
394void xfrm_state_flush(u8 proto, struct xfrm_audit *audit_info)
395{
396	int i;
397	int err = 0;
398
399	spin_lock_bh(&xfrm_state_lock);
400	for (i = 0; i <= xfrm_state_hmask; i++) {
401		struct hlist_node *entry;
402		struct xfrm_state *x;
403restart:
404		hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
405			if (!xfrm_state_kern(x) &&
406			    xfrm_id_proto_match(x->id.proto, proto)) {
407				xfrm_state_hold(x);
408				spin_unlock_bh(&xfrm_state_lock);
409
410				err = xfrm_state_delete(x);
411				xfrm_audit_log(audit_info->loginuid,
412					       audit_info->secid,
413					       AUDIT_MAC_IPSEC_DELSA,
414					       err ? 0 : 1, NULL, x);
415				xfrm_state_put(x);
416
417				spin_lock_bh(&xfrm_state_lock);
418				goto restart;
419			}
420		}
421	}
422	spin_unlock_bh(&xfrm_state_lock);
423	wake_up(&km_waitq);
424}
425EXPORT_SYMBOL(xfrm_state_flush);
426
427static int
428xfrm_init_tempsel(struct xfrm_state *x, struct flowi *fl,
429		  struct xfrm_tmpl *tmpl,
430		  xfrm_address_t *daddr, xfrm_address_t *saddr,
431		  unsigned short family)
432{
433	struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
434	if (!afinfo)
435		return -1;
436	afinfo->init_tempsel(x, fl, tmpl, daddr, saddr);
437	xfrm_state_put_afinfo(afinfo);
438	return 0;
439}
440
441static struct xfrm_state *__xfrm_state_lookup(xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family)
442{
443	unsigned int h = xfrm_spi_hash(daddr, spi, proto, family);
444	struct xfrm_state *x;
445	struct hlist_node *entry;
446
447	hlist_for_each_entry(x, entry, xfrm_state_byspi+h, byspi) {
448		if (x->props.family != family ||
449		    x->id.spi       != spi ||
450		    x->id.proto     != proto)
451			continue;
452
453		switch (family) {
454		case AF_INET:
455			if (x->id.daddr.a4 != daddr->a4)
456				continue;
457			break;
458		case AF_INET6:
459			if (!ipv6_addr_equal((struct in6_addr *)daddr,
460					     (struct in6_addr *)
461					     x->id.daddr.a6))
462				continue;
463			break;
464		};
465
466		xfrm_state_hold(x);
467		return x;
468	}
469
470	return NULL;
471}
472
473static struct xfrm_state *__xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family)
474{
475	unsigned int h = xfrm_src_hash(daddr, saddr, family);
476	struct xfrm_state *x;
477	struct hlist_node *entry;
478
479	hlist_for_each_entry(x, entry, xfrm_state_bysrc+h, bysrc) {
480		if (x->props.family != family ||
481		    x->id.proto     != proto)
482			continue;
483
484		switch (family) {
485		case AF_INET:
486			if (x->id.daddr.a4 != daddr->a4 ||
487			    x->props.saddr.a4 != saddr->a4)
488				continue;
489			break;
490		case AF_INET6:
491			if (!ipv6_addr_equal((struct in6_addr *)daddr,
492					     (struct in6_addr *)
493					     x->id.daddr.a6) ||
494			    !ipv6_addr_equal((struct in6_addr *)saddr,
495					     (struct in6_addr *)
496					     x->props.saddr.a6))
497				continue;
498			break;
499		};
500
501		xfrm_state_hold(x);
502		return x;
503	}
504
505	return NULL;
506}
507
508static inline struct xfrm_state *
509__xfrm_state_locate(struct xfrm_state *x, int use_spi, int family)
510{
511	if (use_spi)
512		return __xfrm_state_lookup(&x->id.daddr, x->id.spi,
513					   x->id.proto, family);
514	else
515		return __xfrm_state_lookup_byaddr(&x->id.daddr,
516						  &x->props.saddr,
517						  x->id.proto, family);
518}
519
520static void xfrm_hash_grow_check(int have_hash_collision)
521{
522	if (have_hash_collision &&
523	    (xfrm_state_hmask + 1) < xfrm_state_hashmax &&
524	    xfrm_state_num > xfrm_state_hmask)
525		schedule_work(&xfrm_hash_work);
526}
527
528struct xfrm_state *
529xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
530		struct flowi *fl, struct xfrm_tmpl *tmpl,
531		struct xfrm_policy *pol, int *err,
532		unsigned short family)
533{
534	unsigned int h = xfrm_dst_hash(daddr, saddr, tmpl->reqid, family);
535	struct hlist_node *entry;
536	struct xfrm_state *x, *x0;
537	int acquire_in_progress = 0;
538	int error = 0;
539	struct xfrm_state *best = NULL;
540
541	spin_lock_bh(&xfrm_state_lock);
542	hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
543		if (x->props.family == family &&
544		    x->props.reqid == tmpl->reqid &&
545		    !(x->props.flags & XFRM_STATE_WILDRECV) &&
546		    xfrm_state_addr_check(x, daddr, saddr, family) &&
547		    tmpl->mode == x->props.mode &&
548		    tmpl->id.proto == x->id.proto &&
549		    (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) {
550			/* Resolution logic:
551			   1. There is a valid state with matching selector.
552			      Done.
553			   2. Valid state with inappropriate selector. Skip.
554
555			   Entering area of "sysdeps".
556
557			   3. If state is not valid, selector is temporary,
558			      it selects only session which triggered
559			      previous resolution. Key manager will do
560			      something to install a state with proper
561			      selector.
562			 */
563			if (x->km.state == XFRM_STATE_VALID) {
564				if (!xfrm_selector_match(&x->sel, fl, family) ||
565				    !security_xfrm_state_pol_flow_match(x, pol, fl))
566					continue;
567				if (!best ||
568				    best->km.dying > x->km.dying ||
569				    (best->km.dying == x->km.dying &&
570				     best->curlft.add_time < x->curlft.add_time))
571					best = x;
572			} else if (x->km.state == XFRM_STATE_ACQ) {
573				acquire_in_progress = 1;
574			} else if (x->km.state == XFRM_STATE_ERROR ||
575				   x->km.state == XFRM_STATE_EXPIRED) {
576 				if (xfrm_selector_match(&x->sel, fl, family) &&
577				    security_xfrm_state_pol_flow_match(x, pol, fl))
578					error = -ESRCH;
579			}
580		}
581	}
582
583	x = best;
584	if (!x && !error && !acquire_in_progress) {
585		if (tmpl->id.spi &&
586		    (x0 = __xfrm_state_lookup(daddr, tmpl->id.spi,
587					      tmpl->id.proto, family)) != NULL) {
588			xfrm_state_put(x0);
589			error = -EEXIST;
590			goto out;
591		}
592		x = xfrm_state_alloc();
593		if (x == NULL) {
594			error = -ENOMEM;
595			goto out;
596		}
597		/* Initialize temporary selector matching only
598		 * to current session. */
599		xfrm_init_tempsel(x, fl, tmpl, daddr, saddr, family);
600
601		error = security_xfrm_state_alloc_acquire(x, pol->security, fl->secid);
602		if (error) {
603			x->km.state = XFRM_STATE_DEAD;
604			xfrm_state_put(x);
605			x = NULL;
606			goto out;
607		}
608
609		if (km_query(x, tmpl, pol) == 0) {
610			x->km.state = XFRM_STATE_ACQ;
611			hlist_add_head(&x->bydst, xfrm_state_bydst+h);
612			h = xfrm_src_hash(daddr, saddr, family);
613			hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
614			if (x->id.spi) {
615				h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family);
616				hlist_add_head(&x->byspi, xfrm_state_byspi+h);
617			}
618			x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
619			x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
620			add_timer(&x->timer);
621			xfrm_state_num++;
622			xfrm_hash_grow_check(x->bydst.next != NULL);
623		} else {
624			x->km.state = XFRM_STATE_DEAD;
625			xfrm_state_put(x);
626			x = NULL;
627			error = -ESRCH;
628		}
629	}
630out:
631	if (x)
632		xfrm_state_hold(x);
633	else
634		*err = acquire_in_progress ? -EAGAIN : error;
635	spin_unlock_bh(&xfrm_state_lock);
636	return x;
637}
638
639static void __xfrm_state_insert(struct xfrm_state *x)
640{
641	unsigned int h;
642
643	x->genid = ++xfrm_state_genid;
644
645	h = xfrm_dst_hash(&x->id.daddr, &x->props.saddr,
646			  x->props.reqid, x->props.family);
647	hlist_add_head(&x->bydst, xfrm_state_bydst+h);
648
649	h = xfrm_src_hash(&x->id.daddr, &x->props.saddr, x->props.family);
650	hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
651
652	if (x->id.spi) {
653		h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto,
654				  x->props.family);
655
656		hlist_add_head(&x->byspi, xfrm_state_byspi+h);
657	}
658
659	mod_timer(&x->timer, jiffies + HZ);
660	if (x->replay_maxage)
661		mod_timer(&x->rtimer, jiffies + x->replay_maxage);
662
663	wake_up(&km_waitq);
664
665	xfrm_state_num++;
666
667	xfrm_hash_grow_check(x->bydst.next != NULL);
668}
669
670/* xfrm_state_lock is held */
671static void __xfrm_state_bump_genids(struct xfrm_state *xnew)
672{
673	unsigned short family = xnew->props.family;
674	u32 reqid = xnew->props.reqid;
675	struct xfrm_state *x;
676	struct hlist_node *entry;
677	unsigned int h;
678
679	h = xfrm_dst_hash(&xnew->id.daddr, &xnew->props.saddr, reqid, family);
680	hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
681		if (x->props.family	== family &&
682		    x->props.reqid	== reqid &&
683		    !xfrm_addr_cmp(&x->id.daddr, &xnew->id.daddr, family) &&
684		    !xfrm_addr_cmp(&x->props.saddr, &xnew->props.saddr, family))
685			x->genid = xfrm_state_genid;
686	}
687}
688
689void xfrm_state_insert(struct xfrm_state *x)
690{
691	spin_lock_bh(&xfrm_state_lock);
692	__xfrm_state_bump_genids(x);
693	__xfrm_state_insert(x);
694	spin_unlock_bh(&xfrm_state_lock);
695}
696EXPORT_SYMBOL(xfrm_state_insert);
697
698/* xfrm_state_lock is held */
699static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create)
700{
701	unsigned int h = xfrm_dst_hash(daddr, saddr, reqid, family);
702	struct hlist_node *entry;
703	struct xfrm_state *x;
704
705	hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
706		if (x->props.reqid  != reqid ||
707		    x->props.mode   != mode ||
708		    x->props.family != family ||
709		    x->km.state     != XFRM_STATE_ACQ ||
710		    x->id.spi       != 0)
711			continue;
712
713		switch (family) {
714		case AF_INET:
715			if (x->id.daddr.a4    != daddr->a4 ||
716			    x->props.saddr.a4 != saddr->a4)
717				continue;
718			break;
719		case AF_INET6:
720			if (!ipv6_addr_equal((struct in6_addr *)x->id.daddr.a6,
721					     (struct in6_addr *)daddr) ||
722			    !ipv6_addr_equal((struct in6_addr *)
723					     x->props.saddr.a6,
724					     (struct in6_addr *)saddr))
725				continue;
726			break;
727		};
728
729		xfrm_state_hold(x);
730		return x;
731	}
732
733	if (!create)
734		return NULL;
735
736	x = xfrm_state_alloc();
737	if (likely(x)) {
738		switch (family) {
739		case AF_INET:
740			x->sel.daddr.a4 = daddr->a4;
741			x->sel.saddr.a4 = saddr->a4;
742			x->sel.prefixlen_d = 32;
743			x->sel.prefixlen_s = 32;
744			x->props.saddr.a4 = saddr->a4;
745			x->id.daddr.a4 = daddr->a4;
746			break;
747
748		case AF_INET6:
749			ipv6_addr_copy((struct in6_addr *)x->sel.daddr.a6,
750				       (struct in6_addr *)daddr);
751			ipv6_addr_copy((struct in6_addr *)x->sel.saddr.a6,
752				       (struct in6_addr *)saddr);
753			x->sel.prefixlen_d = 128;
754			x->sel.prefixlen_s = 128;
755			ipv6_addr_copy((struct in6_addr *)x->props.saddr.a6,
756				       (struct in6_addr *)saddr);
757			ipv6_addr_copy((struct in6_addr *)x->id.daddr.a6,
758				       (struct in6_addr *)daddr);
759			break;
760		};
761
762		x->km.state = XFRM_STATE_ACQ;
763		x->id.proto = proto;
764		x->props.family = family;
765		x->props.mode = mode;
766		x->props.reqid = reqid;
767		x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
768		xfrm_state_hold(x);
769		x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
770		add_timer(&x->timer);
771		hlist_add_head(&x->bydst, xfrm_state_bydst+h);
772		h = xfrm_src_hash(daddr, saddr, family);
773		hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
774		wake_up(&km_waitq);
775
776		xfrm_state_num++;
777
778		xfrm_hash_grow_check(x->bydst.next != NULL);
779	}
780
781	return x;
782}
783
784static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq);
785
786int xfrm_state_add(struct xfrm_state *x)
787{
788	struct xfrm_state *x1;
789	int family;
790	int err;
791	int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
792
793	family = x->props.family;
794
795	spin_lock_bh(&xfrm_state_lock);
796
797	x1 = __xfrm_state_locate(x, use_spi, family);
798	if (x1) {
799		xfrm_state_put(x1);
800		x1 = NULL;
801		err = -EEXIST;
802		goto out;
803	}
804
805	if (use_spi && x->km.seq) {
806		x1 = __xfrm_find_acq_byseq(x->km.seq);
807		if (x1 && xfrm_addr_cmp(&x1->id.daddr, &x->id.daddr, family)) {
808			xfrm_state_put(x1);
809			x1 = NULL;
810		}
811	}
812
813	if (use_spi && !x1)
814		x1 = __find_acq_core(family, x->props.mode, x->props.reqid,
815				     x->id.proto,
816				     &x->id.daddr, &x->props.saddr, 0);
817
818	__xfrm_state_bump_genids(x);
819	__xfrm_state_insert(x);
820	err = 0;
821
822out:
823	spin_unlock_bh(&xfrm_state_lock);
824
825	if (x1) {
826		xfrm_state_delete(x1);
827		xfrm_state_put(x1);
828	}
829
830	return err;
831}
832EXPORT_SYMBOL(xfrm_state_add);
833
834int xfrm_state_update(struct xfrm_state *x)
835{
836	struct xfrm_state *x1;
837	int err;
838	int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
839
840	spin_lock_bh(&xfrm_state_lock);
841	x1 = __xfrm_state_locate(x, use_spi, x->props.family);
842
843	err = -ESRCH;
844	if (!x1)
845		goto out;
846
847	if (xfrm_state_kern(x1)) {
848		xfrm_state_put(x1);
849		err = -EEXIST;
850		goto out;
851	}
852
853	if (x1->km.state == XFRM_STATE_ACQ) {
854		__xfrm_state_insert(x);
855		x = NULL;
856	}
857	err = 0;
858
859out:
860	spin_unlock_bh(&xfrm_state_lock);
861
862	if (err)
863		return err;
864
865	if (!x) {
866		xfrm_state_delete(x1);
867		xfrm_state_put(x1);
868		return 0;
869	}
870
871	err = -EINVAL;
872	spin_lock_bh(&x1->lock);
873	if (likely(x1->km.state == XFRM_STATE_VALID)) {
874		if (x->encap && x1->encap)
875			memcpy(x1->encap, x->encap, sizeof(*x1->encap));
876		if (x->coaddr && x1->coaddr) {
877			memcpy(x1->coaddr, x->coaddr, sizeof(*x1->coaddr));
878		}
879		if (!use_spi && memcmp(&x1->sel, &x->sel, sizeof(x1->sel)))
880			memcpy(&x1->sel, &x->sel, sizeof(x1->sel));
881		memcpy(&x1->lft, &x->lft, sizeof(x1->lft));
882		x1->km.dying = 0;
883
884		mod_timer(&x1->timer, jiffies + HZ);
885		if (x1->curlft.use_time)
886			xfrm_state_check_expire(x1);
887
888		err = 0;
889	}
890	spin_unlock_bh(&x1->lock);
891
892	xfrm_state_put(x1);
893
894	return err;
895}
896EXPORT_SYMBOL(xfrm_state_update);
897
898int xfrm_state_check_expire(struct xfrm_state *x)
899{
900	if (!x->curlft.use_time)
901		x->curlft.use_time = (unsigned long)xtime.tv_sec;
902
903	if (x->km.state != XFRM_STATE_VALID)
904		return -EINVAL;
905
906	if (x->curlft.bytes >= x->lft.hard_byte_limit ||
907	    x->curlft.packets >= x->lft.hard_packet_limit) {
908		x->km.state = XFRM_STATE_EXPIRED;
909		mod_timer(&x->timer, jiffies);
910		return -EINVAL;
911	}
912
913	if (!x->km.dying &&
914	    (x->curlft.bytes >= x->lft.soft_byte_limit ||
915	     x->curlft.packets >= x->lft.soft_packet_limit)) {
916		x->km.dying = 1;
917		km_state_expired(x, 0, 0);
918	}
919	return 0;
920}
921EXPORT_SYMBOL(xfrm_state_check_expire);
922
923static int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb)
924{
925	int nhead = x->props.header_len + LL_RESERVED_SPACE(skb->dst->dev)
926		- skb_headroom(skb);
927
928	if (nhead > 0)
929		return pskb_expand_head(skb, nhead, 0, GFP_ATOMIC);
930
931	/* Check tail too... */
932	return 0;
933}
934
935int xfrm_state_check(struct xfrm_state *x, struct sk_buff *skb)
936{
937	int err = xfrm_state_check_expire(x);
938	if (err < 0)
939		goto err;
940	err = xfrm_state_check_space(x, skb);
941err:
942	return err;
943}
944EXPORT_SYMBOL(xfrm_state_check);
945
946struct xfrm_state *
947xfrm_state_lookup(xfrm_address_t *daddr, __be32 spi, u8 proto,
948		  unsigned short family)
949{
950	struct xfrm_state *x;
951
952	spin_lock_bh(&xfrm_state_lock);
953	x = __xfrm_state_lookup(daddr, spi, proto, family);
954	spin_unlock_bh(&xfrm_state_lock);
955	return x;
956}
957EXPORT_SYMBOL(xfrm_state_lookup);
958
959struct xfrm_state *
960xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr,
961			 u8 proto, unsigned short family)
962{
963	struct xfrm_state *x;
964
965	spin_lock_bh(&xfrm_state_lock);
966	x = __xfrm_state_lookup_byaddr(daddr, saddr, proto, family);
967	spin_unlock_bh(&xfrm_state_lock);
968	return x;
969}
970EXPORT_SYMBOL(xfrm_state_lookup_byaddr);
971
972struct xfrm_state *
973xfrm_find_acq(u8 mode, u32 reqid, u8 proto,
974	      xfrm_address_t *daddr, xfrm_address_t *saddr,
975	      int create, unsigned short family)
976{
977	struct xfrm_state *x;
978
979	spin_lock_bh(&xfrm_state_lock);
980	x = __find_acq_core(family, mode, reqid, proto, daddr, saddr, create);
981	spin_unlock_bh(&xfrm_state_lock);
982
983	return x;
984}
985EXPORT_SYMBOL(xfrm_find_acq);
986
987#ifdef CONFIG_XFRM_SUB_POLICY
988int
989xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n,
990	       unsigned short family)
991{
992	int err = 0;
993	struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
994	if (!afinfo)
995		return -EAFNOSUPPORT;
996
997	spin_lock_bh(&xfrm_state_lock);
998	if (afinfo->tmpl_sort)
999		err = afinfo->tmpl_sort(dst, src, n);
1000	spin_unlock_bh(&xfrm_state_lock);
1001	xfrm_state_put_afinfo(afinfo);
1002	return err;
1003}
1004EXPORT_SYMBOL(xfrm_tmpl_sort);
1005
1006int
1007xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n,
1008		unsigned short family)
1009{
1010	int err = 0;
1011	struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
1012	if (!afinfo)
1013		return -EAFNOSUPPORT;
1014
1015	spin_lock_bh(&xfrm_state_lock);
1016	if (afinfo->state_sort)
1017		err = afinfo->state_sort(dst, src, n);
1018	spin_unlock_bh(&xfrm_state_lock);
1019	xfrm_state_put_afinfo(afinfo);
1020	return err;
1021}
1022EXPORT_SYMBOL(xfrm_state_sort);
1023#endif
1024
1025/* Silly enough, but I'm lazy to build resolution list */
1026
1027static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq)
1028{
1029	int i;
1030
1031	for (i = 0; i <= xfrm_state_hmask; i++) {
1032		struct hlist_node *entry;
1033		struct xfrm_state *x;
1034
1035		hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
1036			if (x->km.seq == seq &&
1037			    x->km.state == XFRM_STATE_ACQ) {
1038				xfrm_state_hold(x);
1039				return x;
1040			}
1041		}
1042	}
1043	return NULL;
1044}
1045
1046struct xfrm_state *xfrm_find_acq_byseq(u32 seq)
1047{
1048	struct xfrm_state *x;
1049
1050	spin_lock_bh(&xfrm_state_lock);
1051	x = __xfrm_find_acq_byseq(seq);
1052	spin_unlock_bh(&xfrm_state_lock);
1053	return x;
1054}
1055EXPORT_SYMBOL(xfrm_find_acq_byseq);
1056
1057u32 xfrm_get_acqseq(void)
1058{
1059	u32 res;
1060	static u32 acqseq;
1061	static DEFINE_SPINLOCK(acqseq_lock);
1062
1063	spin_lock_bh(&acqseq_lock);
1064	res = (++acqseq ? : ++acqseq);
1065	spin_unlock_bh(&acqseq_lock);
1066	return res;
1067}
1068EXPORT_SYMBOL(xfrm_get_acqseq);
1069
1070void
1071xfrm_alloc_spi(struct xfrm_state *x, __be32 minspi, __be32 maxspi)
1072{
1073	unsigned int h;
1074	struct xfrm_state *x0;
1075
1076	if (x->id.spi)
1077		return;
1078
1079	if (minspi == maxspi) {
1080		x0 = xfrm_state_lookup(&x->id.daddr, minspi, x->id.proto, x->props.family);
1081		if (x0) {
1082			xfrm_state_put(x0);
1083			return;
1084		}
1085		x->id.spi = minspi;
1086	} else {
1087		u32 spi = 0;
1088		u32 low = ntohl(minspi);
1089		u32 high = ntohl(maxspi);
1090		for (h=0; h<high-low+1; h++) {
1091			spi = low + net_random()%(high-low+1);
1092			x0 = xfrm_state_lookup(&x->id.daddr, htonl(spi), x->id.proto, x->props.family);
1093			if (x0 == NULL) {
1094				x->id.spi = htonl(spi);
1095				break;
1096			}
1097			xfrm_state_put(x0);
1098		}
1099	}
1100	if (x->id.spi) {
1101		spin_lock_bh(&xfrm_state_lock);
1102		h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family);
1103		hlist_add_head(&x->byspi, xfrm_state_byspi+h);
1104		spin_unlock_bh(&xfrm_state_lock);
1105		wake_up(&km_waitq);
1106	}
1107}
1108EXPORT_SYMBOL(xfrm_alloc_spi);
1109
1110int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*),
1111		    void *data)
1112{
1113	int i;
1114	struct xfrm_state *x, *last = NULL;
1115	struct hlist_node *entry;
1116	int count = 0;
1117	int err = 0;
1118
1119	spin_lock_bh(&xfrm_state_lock);
1120	for (i = 0; i <= xfrm_state_hmask; i++) {
1121		hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
1122			if (!xfrm_id_proto_match(x->id.proto, proto))
1123				continue;
1124			if (last) {
1125				err = func(last, count, data);
1126				if (err)
1127					goto out;
1128			}
1129			last = x;
1130			count++;
1131		}
1132	}
1133	if (count == 0) {
1134		err = -ENOENT;
1135		goto out;
1136	}
1137	err = func(last, 0, data);
1138out:
1139	spin_unlock_bh(&xfrm_state_lock);
1140	return err;
1141}
1142EXPORT_SYMBOL(xfrm_state_walk);
1143
1144
1145void xfrm_replay_notify(struct xfrm_state *x, int event)
1146{
1147	struct km_event c;
1148	/* we send notify messages in case
1149	 *  1. we updated on of the sequence numbers, and the seqno difference
1150	 *     is at least x->replay_maxdiff, in this case we also update the
1151	 *     timeout of our timer function
1152	 *  2. if x->replay_maxage has elapsed since last update,
1153	 *     and there were changes
1154	 *
1155	 *  The state structure must be locked!
1156	 */
1157
1158	switch (event) {
1159	case XFRM_REPLAY_UPDATE:
1160		if (x->replay_maxdiff &&
1161		    (x->replay.seq - x->preplay.seq < x->replay_maxdiff) &&
1162		    (x->replay.oseq - x->preplay.oseq < x->replay_maxdiff)) {
1163			if (x->xflags & XFRM_TIME_DEFER)
1164				event = XFRM_REPLAY_TIMEOUT;
1165			else
1166				return;
1167		}
1168
1169		break;
1170
1171	case XFRM_REPLAY_TIMEOUT:
1172		if ((x->replay.seq == x->preplay.seq) &&
1173		    (x->replay.bitmap == x->preplay.bitmap) &&
1174		    (x->replay.oseq == x->preplay.oseq)) {
1175			x->xflags |= XFRM_TIME_DEFER;
1176			return;
1177		}
1178
1179		break;
1180	}
1181
1182	memcpy(&x->preplay, &x->replay, sizeof(struct xfrm_replay_state));
1183	c.event = XFRM_MSG_NEWAE;
1184	c.data.aevent = event;
1185	km_state_notify(x, &c);
1186
1187	if (x->replay_maxage &&
1188	    !mod_timer(&x->rtimer, jiffies + x->replay_maxage))
1189		x->xflags &= ~XFRM_TIME_DEFER;
1190}
1191EXPORT_SYMBOL(xfrm_replay_notify);
1192
1193static void xfrm_replay_timer_handler(unsigned long data)
1194{
1195	struct xfrm_state *x = (struct xfrm_state*)data;
1196
1197	spin_lock(&x->lock);
1198
1199	if (x->km.state == XFRM_STATE_VALID) {
1200		if (xfrm_aevent_is_on())
1201			xfrm_replay_notify(x, XFRM_REPLAY_TIMEOUT);
1202		else
1203			x->xflags |= XFRM_TIME_DEFER;
1204	}
1205
1206	spin_unlock(&x->lock);
1207}
1208
1209int xfrm_replay_check(struct xfrm_state *x, __be32 net_seq)
1210{
1211	u32 diff;
1212	u32 seq = ntohl(net_seq);
1213
1214	if (unlikely(seq == 0))
1215		return -EINVAL;
1216
1217	if (likely(seq > x->replay.seq))
1218		return 0;
1219
1220	diff = x->replay.seq - seq;
1221	if (diff >= x->props.replay_window) {
1222		x->stats.replay_window++;
1223		return -EINVAL;
1224	}
1225
1226	if (x->replay.bitmap & (1U << diff)) {
1227		x->stats.replay++;
1228		return -EINVAL;
1229	}
1230	return 0;
1231}
1232EXPORT_SYMBOL(xfrm_replay_check);
1233
1234void xfrm_replay_advance(struct xfrm_state *x, __be32 net_seq)
1235{
1236	u32 diff;
1237	u32 seq = ntohl(net_seq);
1238
1239	if (seq > x->replay.seq) {
1240		diff = seq - x->replay.seq;
1241		if (diff < x->props.replay_window)
1242			x->replay.bitmap = ((x->replay.bitmap) << diff) | 1;
1243		else
1244			x->replay.bitmap = 1;
1245		x->replay.seq = seq;
1246	} else {
1247		diff = x->replay.seq - seq;
1248		x->replay.bitmap |= (1U << diff);
1249	}
1250
1251	if (xfrm_aevent_is_on())
1252		xfrm_replay_notify(x, XFRM_REPLAY_UPDATE);
1253}
1254EXPORT_SYMBOL(xfrm_replay_advance);
1255
1256static struct list_head xfrm_km_list = LIST_HEAD_INIT(xfrm_km_list);
1257static DEFINE_RWLOCK(xfrm_km_lock);
1258
1259void km_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c)
1260{
1261	struct xfrm_mgr *km;
1262
1263	read_lock(&xfrm_km_lock);
1264	list_for_each_entry(km, &xfrm_km_list, list)
1265		if (km->notify_policy)
1266			km->notify_policy(xp, dir, c);
1267	read_unlock(&xfrm_km_lock);
1268}
1269
1270void km_state_notify(struct xfrm_state *x, struct km_event *c)
1271{
1272	struct xfrm_mgr *km;
1273	read_lock(&xfrm_km_lock);
1274	list_for_each_entry(km, &xfrm_km_list, list)
1275		if (km->notify)
1276			km->notify(x, c);
1277	read_unlock(&xfrm_km_lock);
1278}
1279
1280EXPORT_SYMBOL(km_policy_notify);
1281EXPORT_SYMBOL(km_state_notify);
1282
1283void km_state_expired(struct xfrm_state *x, int hard, u32 pid)
1284{
1285	struct km_event c;
1286
1287	c.data.hard = hard;
1288	c.pid = pid;
1289	c.event = XFRM_MSG_EXPIRE;
1290	km_state_notify(x, &c);
1291
1292	if (hard)
1293		wake_up(&km_waitq);
1294}
1295
1296EXPORT_SYMBOL(km_state_expired);
1297/*
1298 * We send to all registered managers regardless of failure
1299 * We are happy with one success
1300*/
1301int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol)
1302{
1303	int err = -EINVAL, acqret;
1304	struct xfrm_mgr *km;
1305
1306	read_lock(&xfrm_km_lock);
1307	list_for_each_entry(km, &xfrm_km_list, list) {
1308		acqret = km->acquire(x, t, pol, XFRM_POLICY_OUT);
1309		if (!acqret)
1310			err = acqret;
1311	}
1312	read_unlock(&xfrm_km_lock);
1313	return err;
1314}
1315EXPORT_SYMBOL(km_query);
1316
1317int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport)
1318{
1319	int err = -EINVAL;
1320	struct xfrm_mgr *km;
1321
1322	read_lock(&xfrm_km_lock);
1323	list_for_each_entry(km, &xfrm_km_list, list) {
1324		if (km->new_mapping)
1325			err = km->new_mapping(x, ipaddr, sport);
1326		if (!err)
1327			break;
1328	}
1329	read_unlock(&xfrm_km_lock);
1330	return err;
1331}
1332EXPORT_SYMBOL(km_new_mapping);
1333
1334void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 pid)
1335{
1336	struct km_event c;
1337
1338	c.data.hard = hard;
1339	c.pid = pid;
1340	c.event = XFRM_MSG_POLEXPIRE;
1341	km_policy_notify(pol, dir, &c);
1342
1343	if (hard)
1344		wake_up(&km_waitq);
1345}
1346EXPORT_SYMBOL(km_policy_expired);
1347
1348int km_report(u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr)
1349{
1350	int err = -EINVAL;
1351	int ret;
1352	struct xfrm_mgr *km;
1353
1354	read_lock(&xfrm_km_lock);
1355	list_for_each_entry(km, &xfrm_km_list, list) {
1356		if (km->report) {
1357			ret = km->report(proto, sel, addr);
1358			if (!ret)
1359				err = ret;
1360		}
1361	}
1362	read_unlock(&xfrm_km_lock);
1363	return err;
1364}
1365EXPORT_SYMBOL(km_report);
1366
1367int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen)
1368{
1369	int err;
1370	u8 *data;
1371	struct xfrm_mgr *km;
1372	struct xfrm_policy *pol = NULL;
1373
1374	if (optlen <= 0 || optlen > PAGE_SIZE)
1375		return -EMSGSIZE;
1376
1377	data = kmalloc(optlen, GFP_KERNEL);
1378	if (!data)
1379		return -ENOMEM;
1380
1381	err = -EFAULT;
1382	if (copy_from_user(data, optval, optlen))
1383		goto out;
1384
1385	err = -EINVAL;
1386	read_lock(&xfrm_km_lock);
1387	list_for_each_entry(km, &xfrm_km_list, list) {
1388		pol = km->compile_policy(sk, optname, data,
1389					 optlen, &err);
1390		if (err >= 0)
1391			break;
1392	}
1393	read_unlock(&xfrm_km_lock);
1394
1395	if (err >= 0) {
1396		xfrm_sk_policy_insert(sk, err, pol);
1397		xfrm_pol_put(pol);
1398		err = 0;
1399	}
1400
1401out:
1402	kfree(data);
1403	return err;
1404}
1405EXPORT_SYMBOL(xfrm_user_policy);
1406
1407int xfrm_register_km(struct xfrm_mgr *km)
1408{
1409	write_lock_bh(&xfrm_km_lock);
1410	list_add_tail(&km->list, &xfrm_km_list);
1411	write_unlock_bh(&xfrm_km_lock);
1412	return 0;
1413}
1414EXPORT_SYMBOL(xfrm_register_km);
1415
1416int xfrm_unregister_km(struct xfrm_mgr *km)
1417{
1418	write_lock_bh(&xfrm_km_lock);
1419	list_del(&km->list);
1420	write_unlock_bh(&xfrm_km_lock);
1421	return 0;
1422}
1423EXPORT_SYMBOL(xfrm_unregister_km);
1424
1425int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo)
1426{
1427	int err = 0;
1428	if (unlikely(afinfo == NULL))
1429		return -EINVAL;
1430	if (unlikely(afinfo->family >= NPROTO))
1431		return -EAFNOSUPPORT;
1432	write_lock_bh(&xfrm_state_afinfo_lock);
1433	if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL))
1434		err = -ENOBUFS;
1435	else
1436		xfrm_state_afinfo[afinfo->family] = afinfo;
1437	write_unlock_bh(&xfrm_state_afinfo_lock);
1438	return err;
1439}
1440EXPORT_SYMBOL(xfrm_state_register_afinfo);
1441
1442int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo)
1443{
1444	int err = 0;
1445	if (unlikely(afinfo == NULL))
1446		return -EINVAL;
1447	if (unlikely(afinfo->family >= NPROTO))
1448		return -EAFNOSUPPORT;
1449	write_lock_bh(&xfrm_state_afinfo_lock);
1450	if (likely(xfrm_state_afinfo[afinfo->family] != NULL)) {
1451		if (unlikely(xfrm_state_afinfo[afinfo->family] != afinfo))
1452			err = -EINVAL;
1453		else
1454			xfrm_state_afinfo[afinfo->family] = NULL;
1455	}
1456	write_unlock_bh(&xfrm_state_afinfo_lock);
1457	return err;
1458}
1459EXPORT_SYMBOL(xfrm_state_unregister_afinfo);
1460
1461static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family)
1462{
1463	struct xfrm_state_afinfo *afinfo;
1464	if (unlikely(family >= NPROTO))
1465		return NULL;
1466	read_lock(&xfrm_state_afinfo_lock);
1467	afinfo = xfrm_state_afinfo[family];
1468	if (unlikely(!afinfo))
1469		read_unlock(&xfrm_state_afinfo_lock);
1470	return afinfo;
1471}
1472
1473static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo)
1474{
1475	read_unlock(&xfrm_state_afinfo_lock);
1476}
1477
1478/* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */
1479void xfrm_state_delete_tunnel(struct xfrm_state *x)
1480{
1481	if (x->tunnel) {
1482		struct xfrm_state *t = x->tunnel;
1483
1484		if (atomic_read(&t->tunnel_users) == 2)
1485			xfrm_state_delete(t);
1486		atomic_dec(&t->tunnel_users);
1487		xfrm_state_put(t);
1488		x->tunnel = NULL;
1489	}
1490}
1491EXPORT_SYMBOL(xfrm_state_delete_tunnel);
1492
1493/*
1494 * This function is NOT optimal.  For example, with ESP it will give an
1495 * MTU that's usually two bytes short of being optimal.  However, it will
1496 * usually give an answer that's a multiple of 4 provided the input is
1497 * also a multiple of 4.
1498 */
1499int xfrm_state_mtu(struct xfrm_state *x, int mtu)
1500{
1501	int res = mtu;
1502
1503	res -= x->props.header_len;
1504
1505	for (;;) {
1506		int m = res;
1507
1508		if (m < 68)
1509			return 68;
1510
1511		spin_lock_bh(&x->lock);
1512		if (x->km.state == XFRM_STATE_VALID &&
1513		    x->type && x->type->get_max_size)
1514			m = x->type->get_max_size(x, m);
1515		else
1516			m += x->props.header_len;
1517		spin_unlock_bh(&x->lock);
1518
1519		if (m <= mtu)
1520			break;
1521		res -= (m - mtu);
1522	}
1523
1524	return res;
1525}
1526
1527int xfrm_init_state(struct xfrm_state *x)
1528{
1529	struct xfrm_state_afinfo *afinfo;
1530	int family = x->props.family;
1531	int err;
1532
1533	err = -EAFNOSUPPORT;
1534	afinfo = xfrm_state_get_afinfo(family);
1535	if (!afinfo)
1536		goto error;
1537
1538	err = 0;
1539	if (afinfo->init_flags)
1540		err = afinfo->init_flags(x);
1541
1542	xfrm_state_put_afinfo(afinfo);
1543
1544	if (err)
1545		goto error;
1546
1547	err = -EPROTONOSUPPORT;
1548	x->type = xfrm_get_type(x->id.proto, family);
1549	if (x->type == NULL)
1550		goto error;
1551
1552	err = x->type->init_state(x);
1553	if (err)
1554		goto error;
1555
1556	x->mode = xfrm_get_mode(x->props.mode, family);
1557	if (x->mode == NULL)
1558		goto error;
1559
1560	x->km.state = XFRM_STATE_VALID;
1561
1562error:
1563	return err;
1564}
1565
1566EXPORT_SYMBOL(xfrm_init_state);
1567
1568void __init xfrm_state_init(void)
1569{
1570	unsigned int sz;
1571
1572	sz = sizeof(struct hlist_head) * 8;
1573
1574	xfrm_state_bydst = xfrm_hash_alloc(sz);
1575	xfrm_state_bysrc = xfrm_hash_alloc(sz);
1576	xfrm_state_byspi = xfrm_hash_alloc(sz);
1577	if (!xfrm_state_bydst || !xfrm_state_bysrc || !xfrm_state_byspi)
1578		panic("XFRM: Cannot allocate bydst/bysrc/byspi hashes.");
1579	xfrm_state_hmask = ((sz / sizeof(struct hlist_head)) - 1);
1580
1581	INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task);
1582}
1583
1584