xfrm_state.c revision df01812eba19834e48abd43246abedfbc4feeb7e
1/*
2 * xfrm_state.c
3 *
4 * Changes:
5 *	Mitsuru KANDA @USAGI
6 * 	Kazunori MIYAZAWA @USAGI
7 * 	Kunihiro Ishiguro <kunihiro@ipinfusion.com>
8 * 		IPv6 support
9 * 	YOSHIFUJI Hideaki @USAGI
10 * 		Split up af-specific functions
11 *	Derek Atkins <derek@ihtfp.com>
12 *		Add UDP Encapsulation
13 *
14 */
15
16#include <linux/workqueue.h>
17#include <net/xfrm.h>
18#include <linux/pfkeyv2.h>
19#include <linux/ipsec.h>
20#include <linux/module.h>
21#include <linux/cache.h>
22#include <asm/uaccess.h>
23
24#include "xfrm_hash.h"
25
26struct sock *xfrm_nl;
27EXPORT_SYMBOL(xfrm_nl);
28
29u32 sysctl_xfrm_aevent_etime __read_mostly = XFRM_AE_ETIME;
30EXPORT_SYMBOL(sysctl_xfrm_aevent_etime);
31
32u32 sysctl_xfrm_aevent_rseqth __read_mostly = XFRM_AE_SEQT_SIZE;
33EXPORT_SYMBOL(sysctl_xfrm_aevent_rseqth);
34
35u32 sysctl_xfrm_acq_expires __read_mostly = 30;
36
37/* Each xfrm_state may be linked to two tables:
38
39   1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl)
40   2. Hash table by (daddr,family,reqid) to find what SAs exist for given
41      destination/tunnel endpoint. (output)
42 */
43
44static DEFINE_SPINLOCK(xfrm_state_lock);
45
46/* Hash table to find appropriate SA towards given target (endpoint
47 * of tunnel or destination of transport mode) allowed by selector.
48 *
49 * Main use is finding SA after policy selected tunnel or transport mode.
50 * Also, it can be used by ah/esp icmp error handler to find offending SA.
51 */
52static struct hlist_head *xfrm_state_bydst __read_mostly;
53static struct hlist_head *xfrm_state_bysrc __read_mostly;
54static struct hlist_head *xfrm_state_byspi __read_mostly;
55static unsigned int xfrm_state_hmask __read_mostly;
56static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024;
57static unsigned int xfrm_state_num;
58static unsigned int xfrm_state_genid;
59
60static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family);
61static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo);
62
63static inline unsigned int xfrm_dst_hash(xfrm_address_t *daddr,
64					 xfrm_address_t *saddr,
65					 u32 reqid,
66					 unsigned short family)
67{
68	return __xfrm_dst_hash(daddr, saddr, reqid, family, xfrm_state_hmask);
69}
70
71static inline unsigned int xfrm_src_hash(xfrm_address_t *daddr,
72					 xfrm_address_t *saddr,
73					 unsigned short family)
74{
75	return __xfrm_src_hash(daddr, saddr, family, xfrm_state_hmask);
76}
77
78static inline unsigned int
79xfrm_spi_hash(xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family)
80{
81	return __xfrm_spi_hash(daddr, spi, proto, family, xfrm_state_hmask);
82}
83
84static void xfrm_hash_transfer(struct hlist_head *list,
85			       struct hlist_head *ndsttable,
86			       struct hlist_head *nsrctable,
87			       struct hlist_head *nspitable,
88			       unsigned int nhashmask)
89{
90	struct hlist_node *entry, *tmp;
91	struct xfrm_state *x;
92
93	hlist_for_each_entry_safe(x, entry, tmp, list, bydst) {
94		unsigned int h;
95
96		h = __xfrm_dst_hash(&x->id.daddr, &x->props.saddr,
97				    x->props.reqid, x->props.family,
98				    nhashmask);
99		hlist_add_head(&x->bydst, ndsttable+h);
100
101		h = __xfrm_src_hash(&x->id.daddr, &x->props.saddr,
102				    x->props.family,
103				    nhashmask);
104		hlist_add_head(&x->bysrc, nsrctable+h);
105
106		if (x->id.spi) {
107			h = __xfrm_spi_hash(&x->id.daddr, x->id.spi,
108					    x->id.proto, x->props.family,
109					    nhashmask);
110			hlist_add_head(&x->byspi, nspitable+h);
111		}
112	}
113}
114
115static unsigned long xfrm_hash_new_size(void)
116{
117	return ((xfrm_state_hmask + 1) << 1) *
118		sizeof(struct hlist_head);
119}
120
121static DEFINE_MUTEX(hash_resize_mutex);
122
123static void xfrm_hash_resize(struct work_struct *__unused)
124{
125	struct hlist_head *ndst, *nsrc, *nspi, *odst, *osrc, *ospi;
126	unsigned long nsize, osize;
127	unsigned int nhashmask, ohashmask;
128	int i;
129
130	mutex_lock(&hash_resize_mutex);
131
132	nsize = xfrm_hash_new_size();
133	ndst = xfrm_hash_alloc(nsize);
134	if (!ndst)
135		goto out_unlock;
136	nsrc = xfrm_hash_alloc(nsize);
137	if (!nsrc) {
138		xfrm_hash_free(ndst, nsize);
139		goto out_unlock;
140	}
141	nspi = xfrm_hash_alloc(nsize);
142	if (!nspi) {
143		xfrm_hash_free(ndst, nsize);
144		xfrm_hash_free(nsrc, nsize);
145		goto out_unlock;
146	}
147
148	spin_lock_bh(&xfrm_state_lock);
149
150	nhashmask = (nsize / sizeof(struct hlist_head)) - 1U;
151	for (i = xfrm_state_hmask; i >= 0; i--)
152		xfrm_hash_transfer(xfrm_state_bydst+i, ndst, nsrc, nspi,
153				   nhashmask);
154
155	odst = xfrm_state_bydst;
156	osrc = xfrm_state_bysrc;
157	ospi = xfrm_state_byspi;
158	ohashmask = xfrm_state_hmask;
159
160	xfrm_state_bydst = ndst;
161	xfrm_state_bysrc = nsrc;
162	xfrm_state_byspi = nspi;
163	xfrm_state_hmask = nhashmask;
164
165	spin_unlock_bh(&xfrm_state_lock);
166
167	osize = (ohashmask + 1) * sizeof(struct hlist_head);
168	xfrm_hash_free(odst, osize);
169	xfrm_hash_free(osrc, osize);
170	xfrm_hash_free(ospi, osize);
171
172out_unlock:
173	mutex_unlock(&hash_resize_mutex);
174}
175
176static DECLARE_WORK(xfrm_hash_work, xfrm_hash_resize);
177
178DECLARE_WAIT_QUEUE_HEAD(km_waitq);
179EXPORT_SYMBOL(km_waitq);
180
181static DEFINE_RWLOCK(xfrm_state_afinfo_lock);
182static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO];
183
184static struct work_struct xfrm_state_gc_work;
185static HLIST_HEAD(xfrm_state_gc_list);
186static DEFINE_SPINLOCK(xfrm_state_gc_lock);
187
188int __xfrm_state_delete(struct xfrm_state *x);
189
190int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol);
191void km_state_expired(struct xfrm_state *x, int hard, u32 pid);
192
193static struct xfrm_state_afinfo *xfrm_state_lock_afinfo(unsigned int family)
194{
195	struct xfrm_state_afinfo *afinfo;
196	if (unlikely(family >= NPROTO))
197		return NULL;
198	write_lock_bh(&xfrm_state_afinfo_lock);
199	afinfo = xfrm_state_afinfo[family];
200	if (unlikely(!afinfo))
201		write_unlock_bh(&xfrm_state_afinfo_lock);
202	return afinfo;
203}
204
205static void xfrm_state_unlock_afinfo(struct xfrm_state_afinfo *afinfo)
206{
207	write_unlock_bh(&xfrm_state_afinfo_lock);
208}
209
210int xfrm_register_type(struct xfrm_type *type, unsigned short family)
211{
212	struct xfrm_state_afinfo *afinfo = xfrm_state_lock_afinfo(family);
213	struct xfrm_type **typemap;
214	int err = 0;
215
216	if (unlikely(afinfo == NULL))
217		return -EAFNOSUPPORT;
218	typemap = afinfo->type_map;
219
220	if (likely(typemap[type->proto] == NULL))
221		typemap[type->proto] = type;
222	else
223		err = -EEXIST;
224	xfrm_state_unlock_afinfo(afinfo);
225	return err;
226}
227EXPORT_SYMBOL(xfrm_register_type);
228
229int xfrm_unregister_type(struct xfrm_type *type, unsigned short family)
230{
231	struct xfrm_state_afinfo *afinfo = xfrm_state_lock_afinfo(family);
232	struct xfrm_type **typemap;
233	int err = 0;
234
235	if (unlikely(afinfo == NULL))
236		return -EAFNOSUPPORT;
237	typemap = afinfo->type_map;
238
239	if (unlikely(typemap[type->proto] != type))
240		err = -ENOENT;
241	else
242		typemap[type->proto] = NULL;
243	xfrm_state_unlock_afinfo(afinfo);
244	return err;
245}
246EXPORT_SYMBOL(xfrm_unregister_type);
247
248static struct xfrm_type *xfrm_get_type(u8 proto, unsigned short family)
249{
250	struct xfrm_state_afinfo *afinfo;
251	struct xfrm_type **typemap;
252	struct xfrm_type *type;
253	int modload_attempted = 0;
254
255retry:
256	afinfo = xfrm_state_get_afinfo(family);
257	if (unlikely(afinfo == NULL))
258		return NULL;
259	typemap = afinfo->type_map;
260
261	type = typemap[proto];
262	if (unlikely(type && !try_module_get(type->owner)))
263		type = NULL;
264	if (!type && !modload_attempted) {
265		xfrm_state_put_afinfo(afinfo);
266		request_module("xfrm-type-%d-%d", family, proto);
267		modload_attempted = 1;
268		goto retry;
269	}
270
271	xfrm_state_put_afinfo(afinfo);
272	return type;
273}
274
275static void xfrm_put_type(struct xfrm_type *type)
276{
277	module_put(type->owner);
278}
279
280int xfrm_register_mode(struct xfrm_mode *mode, int family)
281{
282	struct xfrm_state_afinfo *afinfo;
283	struct xfrm_mode **modemap;
284	int err;
285
286	if (unlikely(mode->encap >= XFRM_MODE_MAX))
287		return -EINVAL;
288
289	afinfo = xfrm_state_lock_afinfo(family);
290	if (unlikely(afinfo == NULL))
291		return -EAFNOSUPPORT;
292
293	err = -EEXIST;
294	modemap = afinfo->mode_map;
295	if (modemap[mode->encap])
296		goto out;
297
298	err = -ENOENT;
299	if (!try_module_get(afinfo->owner))
300		goto out;
301
302	mode->afinfo = afinfo;
303	modemap[mode->encap] = mode;
304	err = 0;
305
306out:
307	xfrm_state_unlock_afinfo(afinfo);
308	return err;
309}
310EXPORT_SYMBOL(xfrm_register_mode);
311
312int xfrm_unregister_mode(struct xfrm_mode *mode, int family)
313{
314	struct xfrm_state_afinfo *afinfo;
315	struct xfrm_mode **modemap;
316	int err;
317
318	if (unlikely(mode->encap >= XFRM_MODE_MAX))
319		return -EINVAL;
320
321	afinfo = xfrm_state_lock_afinfo(family);
322	if (unlikely(afinfo == NULL))
323		return -EAFNOSUPPORT;
324
325	err = -ENOENT;
326	modemap = afinfo->mode_map;
327	if (likely(modemap[mode->encap] == mode)) {
328		modemap[mode->encap] = NULL;
329		module_put(mode->afinfo->owner);
330		err = 0;
331	}
332
333	xfrm_state_unlock_afinfo(afinfo);
334	return err;
335}
336EXPORT_SYMBOL(xfrm_unregister_mode);
337
338static struct xfrm_mode *xfrm_get_mode(unsigned int encap, int family)
339{
340	struct xfrm_state_afinfo *afinfo;
341	struct xfrm_mode *mode;
342	int modload_attempted = 0;
343
344	if (unlikely(encap >= XFRM_MODE_MAX))
345		return NULL;
346
347retry:
348	afinfo = xfrm_state_get_afinfo(family);
349	if (unlikely(afinfo == NULL))
350		return NULL;
351
352	mode = afinfo->mode_map[encap];
353	if (unlikely(mode && !try_module_get(mode->owner)))
354		mode = NULL;
355	if (!mode && !modload_attempted) {
356		xfrm_state_put_afinfo(afinfo);
357		request_module("xfrm-mode-%d-%d", family, encap);
358		modload_attempted = 1;
359		goto retry;
360	}
361
362	xfrm_state_put_afinfo(afinfo);
363	return mode;
364}
365
366static void xfrm_put_mode(struct xfrm_mode *mode)
367{
368	module_put(mode->owner);
369}
370
371static void xfrm_state_gc_destroy(struct xfrm_state *x)
372{
373	del_timer_sync(&x->timer);
374	del_timer_sync(&x->rtimer);
375	kfree(x->aalg);
376	kfree(x->ealg);
377	kfree(x->calg);
378	kfree(x->encap);
379	kfree(x->coaddr);
380	if (x->inner_mode)
381		xfrm_put_mode(x->inner_mode);
382	if (x->outer_mode)
383		xfrm_put_mode(x->outer_mode);
384	if (x->type) {
385		x->type->destructor(x);
386		xfrm_put_type(x->type);
387	}
388	security_xfrm_state_free(x);
389	kfree(x);
390}
391
392static void xfrm_state_gc_task(struct work_struct *data)
393{
394	struct xfrm_state *x;
395	struct hlist_node *entry, *tmp;
396	struct hlist_head gc_list;
397
398	spin_lock_bh(&xfrm_state_gc_lock);
399	gc_list.first = xfrm_state_gc_list.first;
400	INIT_HLIST_HEAD(&xfrm_state_gc_list);
401	spin_unlock_bh(&xfrm_state_gc_lock);
402
403	hlist_for_each_entry_safe(x, entry, tmp, &gc_list, bydst)
404		xfrm_state_gc_destroy(x);
405
406	wake_up(&km_waitq);
407}
408
409static inline unsigned long make_jiffies(long secs)
410{
411	if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
412		return MAX_SCHEDULE_TIMEOUT-1;
413	else
414		return secs*HZ;
415}
416
417static void xfrm_timer_handler(unsigned long data)
418{
419	struct xfrm_state *x = (struct xfrm_state*)data;
420	unsigned long now = get_seconds();
421	long next = LONG_MAX;
422	int warn = 0;
423	int err = 0;
424
425	spin_lock(&x->lock);
426	if (x->km.state == XFRM_STATE_DEAD)
427		goto out;
428	if (x->km.state == XFRM_STATE_EXPIRED)
429		goto expired;
430	if (x->lft.hard_add_expires_seconds) {
431		long tmo = x->lft.hard_add_expires_seconds +
432			x->curlft.add_time - now;
433		if (tmo <= 0)
434			goto expired;
435		if (tmo < next)
436			next = tmo;
437	}
438	if (x->lft.hard_use_expires_seconds) {
439		long tmo = x->lft.hard_use_expires_seconds +
440			(x->curlft.use_time ? : now) - now;
441		if (tmo <= 0)
442			goto expired;
443		if (tmo < next)
444			next = tmo;
445	}
446	if (x->km.dying)
447		goto resched;
448	if (x->lft.soft_add_expires_seconds) {
449		long tmo = x->lft.soft_add_expires_seconds +
450			x->curlft.add_time - now;
451		if (tmo <= 0)
452			warn = 1;
453		else if (tmo < next)
454			next = tmo;
455	}
456	if (x->lft.soft_use_expires_seconds) {
457		long tmo = x->lft.soft_use_expires_seconds +
458			(x->curlft.use_time ? : now) - now;
459		if (tmo <= 0)
460			warn = 1;
461		else if (tmo < next)
462			next = tmo;
463	}
464
465	x->km.dying = warn;
466	if (warn)
467		km_state_expired(x, 0, 0);
468resched:
469	if (next != LONG_MAX)
470		mod_timer(&x->timer, jiffies + make_jiffies(next));
471
472	goto out;
473
474expired:
475	if (x->km.state == XFRM_STATE_ACQ && x->id.spi == 0) {
476		x->km.state = XFRM_STATE_EXPIRED;
477		wake_up(&km_waitq);
478		next = 2;
479		goto resched;
480	}
481
482	err = __xfrm_state_delete(x);
483	if (!err && x->id.spi)
484		km_state_expired(x, 1, 0);
485
486	xfrm_audit_state_delete(x, err ? 0 : 1,
487				audit_get_loginuid(current->audit_context), 0);
488
489out:
490	spin_unlock(&x->lock);
491}
492
493static void xfrm_replay_timer_handler(unsigned long data);
494
495struct xfrm_state *xfrm_state_alloc(void)
496{
497	struct xfrm_state *x;
498
499	x = kzalloc(sizeof(struct xfrm_state), GFP_ATOMIC);
500
501	if (x) {
502		atomic_set(&x->refcnt, 1);
503		atomic_set(&x->tunnel_users, 0);
504		INIT_HLIST_NODE(&x->bydst);
505		INIT_HLIST_NODE(&x->bysrc);
506		INIT_HLIST_NODE(&x->byspi);
507		setup_timer(&x->timer, xfrm_timer_handler, (unsigned long)x);
508		setup_timer(&x->rtimer, xfrm_replay_timer_handler,
509				(unsigned long)x);
510		x->curlft.add_time = get_seconds();
511		x->lft.soft_byte_limit = XFRM_INF;
512		x->lft.soft_packet_limit = XFRM_INF;
513		x->lft.hard_byte_limit = XFRM_INF;
514		x->lft.hard_packet_limit = XFRM_INF;
515		x->replay_maxage = 0;
516		x->replay_maxdiff = 0;
517		spin_lock_init(&x->lock);
518	}
519	return x;
520}
521EXPORT_SYMBOL(xfrm_state_alloc);
522
523void __xfrm_state_destroy(struct xfrm_state *x)
524{
525	BUG_TRAP(x->km.state == XFRM_STATE_DEAD);
526
527	spin_lock_bh(&xfrm_state_gc_lock);
528	hlist_add_head(&x->bydst, &xfrm_state_gc_list);
529	spin_unlock_bh(&xfrm_state_gc_lock);
530	schedule_work(&xfrm_state_gc_work);
531}
532EXPORT_SYMBOL(__xfrm_state_destroy);
533
534int __xfrm_state_delete(struct xfrm_state *x)
535{
536	int err = -ESRCH;
537
538	if (x->km.state != XFRM_STATE_DEAD) {
539		x->km.state = XFRM_STATE_DEAD;
540		spin_lock(&xfrm_state_lock);
541		hlist_del(&x->bydst);
542		hlist_del(&x->bysrc);
543		if (x->id.spi)
544			hlist_del(&x->byspi);
545		xfrm_state_num--;
546		spin_unlock(&xfrm_state_lock);
547
548		/* All xfrm_state objects are created by xfrm_state_alloc.
549		 * The xfrm_state_alloc call gives a reference, and that
550		 * is what we are dropping here.
551		 */
552		xfrm_state_put(x);
553		err = 0;
554	}
555
556	return err;
557}
558EXPORT_SYMBOL(__xfrm_state_delete);
559
560int xfrm_state_delete(struct xfrm_state *x)
561{
562	int err;
563
564	spin_lock_bh(&x->lock);
565	err = __xfrm_state_delete(x);
566	spin_unlock_bh(&x->lock);
567
568	return err;
569}
570EXPORT_SYMBOL(xfrm_state_delete);
571
572#ifdef CONFIG_SECURITY_NETWORK_XFRM
573static inline int
574xfrm_state_flush_secctx_check(u8 proto, struct xfrm_audit *audit_info)
575{
576	int i, err = 0;
577
578	for (i = 0; i <= xfrm_state_hmask; i++) {
579		struct hlist_node *entry;
580		struct xfrm_state *x;
581
582		hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
583			if (xfrm_id_proto_match(x->id.proto, proto) &&
584			   (err = security_xfrm_state_delete(x)) != 0) {
585				xfrm_audit_state_delete(x, 0,
586							audit_info->loginuid,
587							audit_info->secid);
588				return err;
589			}
590		}
591	}
592
593	return err;
594}
595#else
596static inline int
597xfrm_state_flush_secctx_check(u8 proto, struct xfrm_audit *audit_info)
598{
599	return 0;
600}
601#endif
602
603int xfrm_state_flush(u8 proto, struct xfrm_audit *audit_info)
604{
605	int i, err = 0;
606
607	spin_lock_bh(&xfrm_state_lock);
608	err = xfrm_state_flush_secctx_check(proto, audit_info);
609	if (err)
610		goto out;
611
612	for (i = 0; i <= xfrm_state_hmask; i++) {
613		struct hlist_node *entry;
614		struct xfrm_state *x;
615restart:
616		hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
617			if (!xfrm_state_kern(x) &&
618			    xfrm_id_proto_match(x->id.proto, proto)) {
619				xfrm_state_hold(x);
620				spin_unlock_bh(&xfrm_state_lock);
621
622				err = xfrm_state_delete(x);
623				xfrm_audit_state_delete(x, err ? 0 : 1,
624							audit_info->loginuid,
625							audit_info->secid);
626				xfrm_state_put(x);
627
628				spin_lock_bh(&xfrm_state_lock);
629				goto restart;
630			}
631		}
632	}
633	err = 0;
634
635out:
636	spin_unlock_bh(&xfrm_state_lock);
637	wake_up(&km_waitq);
638	return err;
639}
640EXPORT_SYMBOL(xfrm_state_flush);
641
642void xfrm_sad_getinfo(struct xfrmk_sadinfo *si)
643{
644	spin_lock_bh(&xfrm_state_lock);
645	si->sadcnt = xfrm_state_num;
646	si->sadhcnt = xfrm_state_hmask;
647	si->sadhmcnt = xfrm_state_hashmax;
648	spin_unlock_bh(&xfrm_state_lock);
649}
650EXPORT_SYMBOL(xfrm_sad_getinfo);
651
652static int
653xfrm_init_tempsel(struct xfrm_state *x, struct flowi *fl,
654		  struct xfrm_tmpl *tmpl,
655		  xfrm_address_t *daddr, xfrm_address_t *saddr,
656		  unsigned short family)
657{
658	struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
659	if (!afinfo)
660		return -1;
661	afinfo->init_tempsel(x, fl, tmpl, daddr, saddr);
662	xfrm_state_put_afinfo(afinfo);
663	return 0;
664}
665
666static struct xfrm_state *__xfrm_state_lookup(xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family)
667{
668	unsigned int h = xfrm_spi_hash(daddr, spi, proto, family);
669	struct xfrm_state *x;
670	struct hlist_node *entry;
671
672	hlist_for_each_entry(x, entry, xfrm_state_byspi+h, byspi) {
673		if (x->props.family != family ||
674		    x->id.spi       != spi ||
675		    x->id.proto     != proto)
676			continue;
677
678		switch (family) {
679		case AF_INET:
680			if (x->id.daddr.a4 != daddr->a4)
681				continue;
682			break;
683		case AF_INET6:
684			if (!ipv6_addr_equal((struct in6_addr *)daddr,
685					     (struct in6_addr *)
686					     x->id.daddr.a6))
687				continue;
688			break;
689		}
690
691		xfrm_state_hold(x);
692		return x;
693	}
694
695	return NULL;
696}
697
698static struct xfrm_state *__xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family)
699{
700	unsigned int h = xfrm_src_hash(daddr, saddr, family);
701	struct xfrm_state *x;
702	struct hlist_node *entry;
703
704	hlist_for_each_entry(x, entry, xfrm_state_bysrc+h, bysrc) {
705		if (x->props.family != family ||
706		    x->id.proto     != proto)
707			continue;
708
709		switch (family) {
710		case AF_INET:
711			if (x->id.daddr.a4 != daddr->a4 ||
712			    x->props.saddr.a4 != saddr->a4)
713				continue;
714			break;
715		case AF_INET6:
716			if (!ipv6_addr_equal((struct in6_addr *)daddr,
717					     (struct in6_addr *)
718					     x->id.daddr.a6) ||
719			    !ipv6_addr_equal((struct in6_addr *)saddr,
720					     (struct in6_addr *)
721					     x->props.saddr.a6))
722				continue;
723			break;
724		}
725
726		xfrm_state_hold(x);
727		return x;
728	}
729
730	return NULL;
731}
732
733static inline struct xfrm_state *
734__xfrm_state_locate(struct xfrm_state *x, int use_spi, int family)
735{
736	if (use_spi)
737		return __xfrm_state_lookup(&x->id.daddr, x->id.spi,
738					   x->id.proto, family);
739	else
740		return __xfrm_state_lookup_byaddr(&x->id.daddr,
741						  &x->props.saddr,
742						  x->id.proto, family);
743}
744
745static void xfrm_hash_grow_check(int have_hash_collision)
746{
747	if (have_hash_collision &&
748	    (xfrm_state_hmask + 1) < xfrm_state_hashmax &&
749	    xfrm_state_num > xfrm_state_hmask)
750		schedule_work(&xfrm_hash_work);
751}
752
753struct xfrm_state *
754xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
755		struct flowi *fl, struct xfrm_tmpl *tmpl,
756		struct xfrm_policy *pol, int *err,
757		unsigned short family)
758{
759	unsigned int h = xfrm_dst_hash(daddr, saddr, tmpl->reqid, family);
760	struct hlist_node *entry;
761	struct xfrm_state *x, *x0;
762	int acquire_in_progress = 0;
763	int error = 0;
764	struct xfrm_state *best = NULL;
765
766	spin_lock_bh(&xfrm_state_lock);
767	hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
768		if (x->props.family == family &&
769		    x->props.reqid == tmpl->reqid &&
770		    !(x->props.flags & XFRM_STATE_WILDRECV) &&
771		    xfrm_state_addr_check(x, daddr, saddr, family) &&
772		    tmpl->mode == x->props.mode &&
773		    tmpl->id.proto == x->id.proto &&
774		    (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) {
775			/* Resolution logic:
776			   1. There is a valid state with matching selector.
777			      Done.
778			   2. Valid state with inappropriate selector. Skip.
779
780			   Entering area of "sysdeps".
781
782			   3. If state is not valid, selector is temporary,
783			      it selects only session which triggered
784			      previous resolution. Key manager will do
785			      something to install a state with proper
786			      selector.
787			 */
788			if (x->km.state == XFRM_STATE_VALID) {
789				if (!xfrm_selector_match(&x->sel, fl, x->sel.family) ||
790				    !security_xfrm_state_pol_flow_match(x, pol, fl))
791					continue;
792				if (!best ||
793				    best->km.dying > x->km.dying ||
794				    (best->km.dying == x->km.dying &&
795				     best->curlft.add_time < x->curlft.add_time))
796					best = x;
797			} else if (x->km.state == XFRM_STATE_ACQ) {
798				acquire_in_progress = 1;
799			} else if (x->km.state == XFRM_STATE_ERROR ||
800				   x->km.state == XFRM_STATE_EXPIRED) {
801				if (xfrm_selector_match(&x->sel, fl, x->sel.family) &&
802				    security_xfrm_state_pol_flow_match(x, pol, fl))
803					error = -ESRCH;
804			}
805		}
806	}
807
808	x = best;
809	if (!x && !error && !acquire_in_progress) {
810		if (tmpl->id.spi &&
811		    (x0 = __xfrm_state_lookup(daddr, tmpl->id.spi,
812					      tmpl->id.proto, family)) != NULL) {
813			xfrm_state_put(x0);
814			error = -EEXIST;
815			goto out;
816		}
817		x = xfrm_state_alloc();
818		if (x == NULL) {
819			error = -ENOMEM;
820			goto out;
821		}
822		/* Initialize temporary selector matching only
823		 * to current session. */
824		xfrm_init_tempsel(x, fl, tmpl, daddr, saddr, family);
825
826		error = security_xfrm_state_alloc_acquire(x, pol->security, fl->secid);
827		if (error) {
828			x->km.state = XFRM_STATE_DEAD;
829			xfrm_state_put(x);
830			x = NULL;
831			goto out;
832		}
833
834		if (km_query(x, tmpl, pol) == 0) {
835			x->km.state = XFRM_STATE_ACQ;
836			hlist_add_head(&x->bydst, xfrm_state_bydst+h);
837			h = xfrm_src_hash(daddr, saddr, family);
838			hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
839			if (x->id.spi) {
840				h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family);
841				hlist_add_head(&x->byspi, xfrm_state_byspi+h);
842			}
843			x->lft.hard_add_expires_seconds = sysctl_xfrm_acq_expires;
844			x->timer.expires = jiffies + sysctl_xfrm_acq_expires*HZ;
845			add_timer(&x->timer);
846			xfrm_state_num++;
847			xfrm_hash_grow_check(x->bydst.next != NULL);
848		} else {
849			x->km.state = XFRM_STATE_DEAD;
850			xfrm_state_put(x);
851			x = NULL;
852			error = -ESRCH;
853		}
854	}
855out:
856	if (x)
857		xfrm_state_hold(x);
858	else
859		*err = acquire_in_progress ? -EAGAIN : error;
860	spin_unlock_bh(&xfrm_state_lock);
861	return x;
862}
863
864struct xfrm_state *
865xfrm_stateonly_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
866		    unsigned short family, u8 mode, u8 proto, u32 reqid)
867{
868	unsigned int h = xfrm_dst_hash(daddr, saddr, reqid, family);
869	struct xfrm_state *rx = NULL, *x = NULL;
870	struct hlist_node *entry;
871
872	spin_lock(&xfrm_state_lock);
873	hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
874		if (x->props.family == family &&
875		    x->props.reqid == reqid &&
876		    !(x->props.flags & XFRM_STATE_WILDRECV) &&
877		    xfrm_state_addr_check(x, daddr, saddr, family) &&
878		    mode == x->props.mode &&
879		    proto == x->id.proto &&
880		    x->km.state == XFRM_STATE_VALID) {
881			rx = x;
882			break;
883		}
884	}
885
886	if (rx)
887		xfrm_state_hold(rx);
888	spin_unlock(&xfrm_state_lock);
889
890
891	return rx;
892}
893EXPORT_SYMBOL(xfrm_stateonly_find);
894
895static void __xfrm_state_insert(struct xfrm_state *x)
896{
897	unsigned int h;
898
899	x->genid = ++xfrm_state_genid;
900
901	h = xfrm_dst_hash(&x->id.daddr, &x->props.saddr,
902			  x->props.reqid, x->props.family);
903	hlist_add_head(&x->bydst, xfrm_state_bydst+h);
904
905	h = xfrm_src_hash(&x->id.daddr, &x->props.saddr, x->props.family);
906	hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
907
908	if (x->id.spi) {
909		h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto,
910				  x->props.family);
911
912		hlist_add_head(&x->byspi, xfrm_state_byspi+h);
913	}
914
915	mod_timer(&x->timer, jiffies + HZ);
916	if (x->replay_maxage)
917		mod_timer(&x->rtimer, jiffies + x->replay_maxage);
918
919	wake_up(&km_waitq);
920
921	xfrm_state_num++;
922
923	xfrm_hash_grow_check(x->bydst.next != NULL);
924}
925
926/* xfrm_state_lock is held */
927static void __xfrm_state_bump_genids(struct xfrm_state *xnew)
928{
929	unsigned short family = xnew->props.family;
930	u32 reqid = xnew->props.reqid;
931	struct xfrm_state *x;
932	struct hlist_node *entry;
933	unsigned int h;
934
935	h = xfrm_dst_hash(&xnew->id.daddr, &xnew->props.saddr, reqid, family);
936	hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
937		if (x->props.family	== family &&
938		    x->props.reqid	== reqid &&
939		    !xfrm_addr_cmp(&x->id.daddr, &xnew->id.daddr, family) &&
940		    !xfrm_addr_cmp(&x->props.saddr, &xnew->props.saddr, family))
941			x->genid = xfrm_state_genid;
942	}
943}
944
945void xfrm_state_insert(struct xfrm_state *x)
946{
947	spin_lock_bh(&xfrm_state_lock);
948	__xfrm_state_bump_genids(x);
949	__xfrm_state_insert(x);
950	spin_unlock_bh(&xfrm_state_lock);
951}
952EXPORT_SYMBOL(xfrm_state_insert);
953
954/* xfrm_state_lock is held */
955static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create)
956{
957	unsigned int h = xfrm_dst_hash(daddr, saddr, reqid, family);
958	struct hlist_node *entry;
959	struct xfrm_state *x;
960
961	hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
962		if (x->props.reqid  != reqid ||
963		    x->props.mode   != mode ||
964		    x->props.family != family ||
965		    x->km.state     != XFRM_STATE_ACQ ||
966		    x->id.spi       != 0 ||
967		    x->id.proto	    != proto)
968			continue;
969
970		switch (family) {
971		case AF_INET:
972			if (x->id.daddr.a4    != daddr->a4 ||
973			    x->props.saddr.a4 != saddr->a4)
974				continue;
975			break;
976		case AF_INET6:
977			if (!ipv6_addr_equal((struct in6_addr *)x->id.daddr.a6,
978					     (struct in6_addr *)daddr) ||
979			    !ipv6_addr_equal((struct in6_addr *)
980					     x->props.saddr.a6,
981					     (struct in6_addr *)saddr))
982				continue;
983			break;
984		}
985
986		xfrm_state_hold(x);
987		return x;
988	}
989
990	if (!create)
991		return NULL;
992
993	x = xfrm_state_alloc();
994	if (likely(x)) {
995		switch (family) {
996		case AF_INET:
997			x->sel.daddr.a4 = daddr->a4;
998			x->sel.saddr.a4 = saddr->a4;
999			x->sel.prefixlen_d = 32;
1000			x->sel.prefixlen_s = 32;
1001			x->props.saddr.a4 = saddr->a4;
1002			x->id.daddr.a4 = daddr->a4;
1003			break;
1004
1005		case AF_INET6:
1006			ipv6_addr_copy((struct in6_addr *)x->sel.daddr.a6,
1007				       (struct in6_addr *)daddr);
1008			ipv6_addr_copy((struct in6_addr *)x->sel.saddr.a6,
1009				       (struct in6_addr *)saddr);
1010			x->sel.prefixlen_d = 128;
1011			x->sel.prefixlen_s = 128;
1012			ipv6_addr_copy((struct in6_addr *)x->props.saddr.a6,
1013				       (struct in6_addr *)saddr);
1014			ipv6_addr_copy((struct in6_addr *)x->id.daddr.a6,
1015				       (struct in6_addr *)daddr);
1016			break;
1017		}
1018
1019		x->km.state = XFRM_STATE_ACQ;
1020		x->id.proto = proto;
1021		x->props.family = family;
1022		x->props.mode = mode;
1023		x->props.reqid = reqid;
1024		x->lft.hard_add_expires_seconds = sysctl_xfrm_acq_expires;
1025		xfrm_state_hold(x);
1026		x->timer.expires = jiffies + sysctl_xfrm_acq_expires*HZ;
1027		add_timer(&x->timer);
1028		hlist_add_head(&x->bydst, xfrm_state_bydst+h);
1029		h = xfrm_src_hash(daddr, saddr, family);
1030		hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
1031
1032		xfrm_state_num++;
1033
1034		xfrm_hash_grow_check(x->bydst.next != NULL);
1035	}
1036
1037	return x;
1038}
1039
1040static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq);
1041
1042int xfrm_state_add(struct xfrm_state *x)
1043{
1044	struct xfrm_state *x1;
1045	int family;
1046	int err;
1047	int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
1048
1049	family = x->props.family;
1050
1051	spin_lock_bh(&xfrm_state_lock);
1052
1053	x1 = __xfrm_state_locate(x, use_spi, family);
1054	if (x1) {
1055		xfrm_state_put(x1);
1056		x1 = NULL;
1057		err = -EEXIST;
1058		goto out;
1059	}
1060
1061	if (use_spi && x->km.seq) {
1062		x1 = __xfrm_find_acq_byseq(x->km.seq);
1063		if (x1 && ((x1->id.proto != x->id.proto) ||
1064		    xfrm_addr_cmp(&x1->id.daddr, &x->id.daddr, family))) {
1065			xfrm_state_put(x1);
1066			x1 = NULL;
1067		}
1068	}
1069
1070	if (use_spi && !x1)
1071		x1 = __find_acq_core(family, x->props.mode, x->props.reqid,
1072				     x->id.proto,
1073				     &x->id.daddr, &x->props.saddr, 0);
1074
1075	__xfrm_state_bump_genids(x);
1076	__xfrm_state_insert(x);
1077	err = 0;
1078
1079out:
1080	spin_unlock_bh(&xfrm_state_lock);
1081
1082	if (x1) {
1083		xfrm_state_delete(x1);
1084		xfrm_state_put(x1);
1085	}
1086
1087	return err;
1088}
1089EXPORT_SYMBOL(xfrm_state_add);
1090
1091#ifdef CONFIG_XFRM_MIGRATE
1092struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, int *errp)
1093{
1094	int err = -ENOMEM;
1095	struct xfrm_state *x = xfrm_state_alloc();
1096	if (!x)
1097		goto error;
1098
1099	memcpy(&x->id, &orig->id, sizeof(x->id));
1100	memcpy(&x->sel, &orig->sel, sizeof(x->sel));
1101	memcpy(&x->lft, &orig->lft, sizeof(x->lft));
1102	x->props.mode = orig->props.mode;
1103	x->props.replay_window = orig->props.replay_window;
1104	x->props.reqid = orig->props.reqid;
1105	x->props.family = orig->props.family;
1106	x->props.saddr = orig->props.saddr;
1107
1108	if (orig->aalg) {
1109		x->aalg = xfrm_algo_clone(orig->aalg);
1110		if (!x->aalg)
1111			goto error;
1112	}
1113	x->props.aalgo = orig->props.aalgo;
1114
1115	if (orig->ealg) {
1116		x->ealg = xfrm_algo_clone(orig->ealg);
1117		if (!x->ealg)
1118			goto error;
1119	}
1120	x->props.ealgo = orig->props.ealgo;
1121
1122	if (orig->calg) {
1123		x->calg = xfrm_algo_clone(orig->calg);
1124		if (!x->calg)
1125			goto error;
1126	}
1127	x->props.calgo = orig->props.calgo;
1128
1129	if (orig->encap) {
1130		x->encap = kmemdup(orig->encap, sizeof(*x->encap), GFP_KERNEL);
1131		if (!x->encap)
1132			goto error;
1133	}
1134
1135	if (orig->coaddr) {
1136		x->coaddr = kmemdup(orig->coaddr, sizeof(*x->coaddr),
1137				    GFP_KERNEL);
1138		if (!x->coaddr)
1139			goto error;
1140	}
1141
1142	err = xfrm_init_state(x);
1143	if (err)
1144		goto error;
1145
1146	x->props.flags = orig->props.flags;
1147
1148	x->curlft.add_time = orig->curlft.add_time;
1149	x->km.state = orig->km.state;
1150	x->km.seq = orig->km.seq;
1151
1152	return x;
1153
1154 error:
1155	if (errp)
1156		*errp = err;
1157	if (x) {
1158		kfree(x->aalg);
1159		kfree(x->ealg);
1160		kfree(x->calg);
1161		kfree(x->encap);
1162		kfree(x->coaddr);
1163	}
1164	kfree(x);
1165	return NULL;
1166}
1167EXPORT_SYMBOL(xfrm_state_clone);
1168
1169/* xfrm_state_lock is held */
1170struct xfrm_state * xfrm_migrate_state_find(struct xfrm_migrate *m)
1171{
1172	unsigned int h;
1173	struct xfrm_state *x;
1174	struct hlist_node *entry;
1175
1176	if (m->reqid) {
1177		h = xfrm_dst_hash(&m->old_daddr, &m->old_saddr,
1178				  m->reqid, m->old_family);
1179		hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
1180			if (x->props.mode != m->mode ||
1181			    x->id.proto != m->proto)
1182				continue;
1183			if (m->reqid && x->props.reqid != m->reqid)
1184				continue;
1185			if (xfrm_addr_cmp(&x->id.daddr, &m->old_daddr,
1186					  m->old_family) ||
1187			    xfrm_addr_cmp(&x->props.saddr, &m->old_saddr,
1188					  m->old_family))
1189				continue;
1190			xfrm_state_hold(x);
1191			return x;
1192		}
1193	} else {
1194		h = xfrm_src_hash(&m->old_daddr, &m->old_saddr,
1195				  m->old_family);
1196		hlist_for_each_entry(x, entry, xfrm_state_bysrc+h, bysrc) {
1197			if (x->props.mode != m->mode ||
1198			    x->id.proto != m->proto)
1199				continue;
1200			if (xfrm_addr_cmp(&x->id.daddr, &m->old_daddr,
1201					  m->old_family) ||
1202			    xfrm_addr_cmp(&x->props.saddr, &m->old_saddr,
1203					  m->old_family))
1204				continue;
1205			xfrm_state_hold(x);
1206			return x;
1207		}
1208	}
1209
1210	return NULL;
1211}
1212EXPORT_SYMBOL(xfrm_migrate_state_find);
1213
1214struct xfrm_state * xfrm_state_migrate(struct xfrm_state *x,
1215				       struct xfrm_migrate *m)
1216{
1217	struct xfrm_state *xc;
1218	int err;
1219
1220	xc = xfrm_state_clone(x, &err);
1221	if (!xc)
1222		return NULL;
1223
1224	memcpy(&xc->id.daddr, &m->new_daddr, sizeof(xc->id.daddr));
1225	memcpy(&xc->props.saddr, &m->new_saddr, sizeof(xc->props.saddr));
1226
1227	/* add state */
1228	if (!xfrm_addr_cmp(&x->id.daddr, &m->new_daddr, m->new_family)) {
1229		/* a care is needed when the destination address of the
1230		   state is to be updated as it is a part of triplet */
1231		xfrm_state_insert(xc);
1232	} else {
1233		if ((err = xfrm_state_add(xc)) < 0)
1234			goto error;
1235	}
1236
1237	return xc;
1238error:
1239	kfree(xc);
1240	return NULL;
1241}
1242EXPORT_SYMBOL(xfrm_state_migrate);
1243#endif
1244
1245int xfrm_state_update(struct xfrm_state *x)
1246{
1247	struct xfrm_state *x1;
1248	int err;
1249	int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
1250
1251	spin_lock_bh(&xfrm_state_lock);
1252	x1 = __xfrm_state_locate(x, use_spi, x->props.family);
1253
1254	err = -ESRCH;
1255	if (!x1)
1256		goto out;
1257
1258	if (xfrm_state_kern(x1)) {
1259		xfrm_state_put(x1);
1260		err = -EEXIST;
1261		goto out;
1262	}
1263
1264	if (x1->km.state == XFRM_STATE_ACQ) {
1265		__xfrm_state_insert(x);
1266		x = NULL;
1267	}
1268	err = 0;
1269
1270out:
1271	spin_unlock_bh(&xfrm_state_lock);
1272
1273	if (err)
1274		return err;
1275
1276	if (!x) {
1277		xfrm_state_delete(x1);
1278		xfrm_state_put(x1);
1279		return 0;
1280	}
1281
1282	err = -EINVAL;
1283	spin_lock_bh(&x1->lock);
1284	if (likely(x1->km.state == XFRM_STATE_VALID)) {
1285		if (x->encap && x1->encap)
1286			memcpy(x1->encap, x->encap, sizeof(*x1->encap));
1287		if (x->coaddr && x1->coaddr) {
1288			memcpy(x1->coaddr, x->coaddr, sizeof(*x1->coaddr));
1289		}
1290		if (!use_spi && memcmp(&x1->sel, &x->sel, sizeof(x1->sel)))
1291			memcpy(&x1->sel, &x->sel, sizeof(x1->sel));
1292		memcpy(&x1->lft, &x->lft, sizeof(x1->lft));
1293		x1->km.dying = 0;
1294
1295		mod_timer(&x1->timer, jiffies + HZ);
1296		if (x1->curlft.use_time)
1297			xfrm_state_check_expire(x1);
1298
1299		err = 0;
1300	}
1301	spin_unlock_bh(&x1->lock);
1302
1303	xfrm_state_put(x1);
1304
1305	return err;
1306}
1307EXPORT_SYMBOL(xfrm_state_update);
1308
1309int xfrm_state_check_expire(struct xfrm_state *x)
1310{
1311	if (!x->curlft.use_time)
1312		x->curlft.use_time = get_seconds();
1313
1314	if (x->km.state != XFRM_STATE_VALID)
1315		return -EINVAL;
1316
1317	if (x->curlft.bytes >= x->lft.hard_byte_limit ||
1318	    x->curlft.packets >= x->lft.hard_packet_limit) {
1319		x->km.state = XFRM_STATE_EXPIRED;
1320		mod_timer(&x->timer, jiffies);
1321		return -EINVAL;
1322	}
1323
1324	if (!x->km.dying &&
1325	    (x->curlft.bytes >= x->lft.soft_byte_limit ||
1326	     x->curlft.packets >= x->lft.soft_packet_limit)) {
1327		x->km.dying = 1;
1328		km_state_expired(x, 0, 0);
1329	}
1330	return 0;
1331}
1332EXPORT_SYMBOL(xfrm_state_check_expire);
1333
1334struct xfrm_state *
1335xfrm_state_lookup(xfrm_address_t *daddr, __be32 spi, u8 proto,
1336		  unsigned short family)
1337{
1338	struct xfrm_state *x;
1339
1340	spin_lock_bh(&xfrm_state_lock);
1341	x = __xfrm_state_lookup(daddr, spi, proto, family);
1342	spin_unlock_bh(&xfrm_state_lock);
1343	return x;
1344}
1345EXPORT_SYMBOL(xfrm_state_lookup);
1346
1347struct xfrm_state *
1348xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr,
1349			 u8 proto, unsigned short family)
1350{
1351	struct xfrm_state *x;
1352
1353	spin_lock_bh(&xfrm_state_lock);
1354	x = __xfrm_state_lookup_byaddr(daddr, saddr, proto, family);
1355	spin_unlock_bh(&xfrm_state_lock);
1356	return x;
1357}
1358EXPORT_SYMBOL(xfrm_state_lookup_byaddr);
1359
1360struct xfrm_state *
1361xfrm_find_acq(u8 mode, u32 reqid, u8 proto,
1362	      xfrm_address_t *daddr, xfrm_address_t *saddr,
1363	      int create, unsigned short family)
1364{
1365	struct xfrm_state *x;
1366
1367	spin_lock_bh(&xfrm_state_lock);
1368	x = __find_acq_core(family, mode, reqid, proto, daddr, saddr, create);
1369	spin_unlock_bh(&xfrm_state_lock);
1370
1371	return x;
1372}
1373EXPORT_SYMBOL(xfrm_find_acq);
1374
1375#ifdef CONFIG_XFRM_SUB_POLICY
1376int
1377xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n,
1378	       unsigned short family)
1379{
1380	int err = 0;
1381	struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
1382	if (!afinfo)
1383		return -EAFNOSUPPORT;
1384
1385	spin_lock_bh(&xfrm_state_lock);
1386	if (afinfo->tmpl_sort)
1387		err = afinfo->tmpl_sort(dst, src, n);
1388	spin_unlock_bh(&xfrm_state_lock);
1389	xfrm_state_put_afinfo(afinfo);
1390	return err;
1391}
1392EXPORT_SYMBOL(xfrm_tmpl_sort);
1393
1394int
1395xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n,
1396		unsigned short family)
1397{
1398	int err = 0;
1399	struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
1400	if (!afinfo)
1401		return -EAFNOSUPPORT;
1402
1403	spin_lock_bh(&xfrm_state_lock);
1404	if (afinfo->state_sort)
1405		err = afinfo->state_sort(dst, src, n);
1406	spin_unlock_bh(&xfrm_state_lock);
1407	xfrm_state_put_afinfo(afinfo);
1408	return err;
1409}
1410EXPORT_SYMBOL(xfrm_state_sort);
1411#endif
1412
1413/* Silly enough, but I'm lazy to build resolution list */
1414
1415static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq)
1416{
1417	int i;
1418
1419	for (i = 0; i <= xfrm_state_hmask; i++) {
1420		struct hlist_node *entry;
1421		struct xfrm_state *x;
1422
1423		hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
1424			if (x->km.seq == seq &&
1425			    x->km.state == XFRM_STATE_ACQ) {
1426				xfrm_state_hold(x);
1427				return x;
1428			}
1429		}
1430	}
1431	return NULL;
1432}
1433
1434struct xfrm_state *xfrm_find_acq_byseq(u32 seq)
1435{
1436	struct xfrm_state *x;
1437
1438	spin_lock_bh(&xfrm_state_lock);
1439	x = __xfrm_find_acq_byseq(seq);
1440	spin_unlock_bh(&xfrm_state_lock);
1441	return x;
1442}
1443EXPORT_SYMBOL(xfrm_find_acq_byseq);
1444
1445u32 xfrm_get_acqseq(void)
1446{
1447	u32 res;
1448	static u32 acqseq;
1449	static DEFINE_SPINLOCK(acqseq_lock);
1450
1451	spin_lock_bh(&acqseq_lock);
1452	res = (++acqseq ? : ++acqseq);
1453	spin_unlock_bh(&acqseq_lock);
1454	return res;
1455}
1456EXPORT_SYMBOL(xfrm_get_acqseq);
1457
1458int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high)
1459{
1460	unsigned int h;
1461	struct xfrm_state *x0;
1462	int err = -ENOENT;
1463	__be32 minspi = htonl(low);
1464	__be32 maxspi = htonl(high);
1465
1466	spin_lock_bh(&x->lock);
1467	if (x->km.state == XFRM_STATE_DEAD)
1468		goto unlock;
1469
1470	err = 0;
1471	if (x->id.spi)
1472		goto unlock;
1473
1474	err = -ENOENT;
1475
1476	if (minspi == maxspi) {
1477		x0 = xfrm_state_lookup(&x->id.daddr, minspi, x->id.proto, x->props.family);
1478		if (x0) {
1479			xfrm_state_put(x0);
1480			goto unlock;
1481		}
1482		x->id.spi = minspi;
1483	} else {
1484		u32 spi = 0;
1485		for (h=0; h<high-low+1; h++) {
1486			spi = low + net_random()%(high-low+1);
1487			x0 = xfrm_state_lookup(&x->id.daddr, htonl(spi), x->id.proto, x->props.family);
1488			if (x0 == NULL) {
1489				x->id.spi = htonl(spi);
1490				break;
1491			}
1492			xfrm_state_put(x0);
1493		}
1494	}
1495	if (x->id.spi) {
1496		spin_lock_bh(&xfrm_state_lock);
1497		h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family);
1498		hlist_add_head(&x->byspi, xfrm_state_byspi+h);
1499		spin_unlock_bh(&xfrm_state_lock);
1500
1501		err = 0;
1502	}
1503
1504unlock:
1505	spin_unlock_bh(&x->lock);
1506
1507	return err;
1508}
1509EXPORT_SYMBOL(xfrm_alloc_spi);
1510
1511int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*),
1512		    void *data)
1513{
1514	int i;
1515	struct xfrm_state *x, *last = NULL;
1516	struct hlist_node *entry;
1517	int count = 0;
1518	int err = 0;
1519
1520	spin_lock_bh(&xfrm_state_lock);
1521	for (i = 0; i <= xfrm_state_hmask; i++) {
1522		hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
1523			if (!xfrm_id_proto_match(x->id.proto, proto))
1524				continue;
1525			if (last) {
1526				err = func(last, count, data);
1527				if (err)
1528					goto out;
1529			}
1530			last = x;
1531			count++;
1532		}
1533	}
1534	if (count == 0) {
1535		err = -ENOENT;
1536		goto out;
1537	}
1538	err = func(last, 0, data);
1539out:
1540	spin_unlock_bh(&xfrm_state_lock);
1541	return err;
1542}
1543EXPORT_SYMBOL(xfrm_state_walk);
1544
1545
1546void xfrm_replay_notify(struct xfrm_state *x, int event)
1547{
1548	struct km_event c;
1549	/* we send notify messages in case
1550	 *  1. we updated on of the sequence numbers, and the seqno difference
1551	 *     is at least x->replay_maxdiff, in this case we also update the
1552	 *     timeout of our timer function
1553	 *  2. if x->replay_maxage has elapsed since last update,
1554	 *     and there were changes
1555	 *
1556	 *  The state structure must be locked!
1557	 */
1558
1559	switch (event) {
1560	case XFRM_REPLAY_UPDATE:
1561		if (x->replay_maxdiff &&
1562		    (x->replay.seq - x->preplay.seq < x->replay_maxdiff) &&
1563		    (x->replay.oseq - x->preplay.oseq < x->replay_maxdiff)) {
1564			if (x->xflags & XFRM_TIME_DEFER)
1565				event = XFRM_REPLAY_TIMEOUT;
1566			else
1567				return;
1568		}
1569
1570		break;
1571
1572	case XFRM_REPLAY_TIMEOUT:
1573		if ((x->replay.seq == x->preplay.seq) &&
1574		    (x->replay.bitmap == x->preplay.bitmap) &&
1575		    (x->replay.oseq == x->preplay.oseq)) {
1576			x->xflags |= XFRM_TIME_DEFER;
1577			return;
1578		}
1579
1580		break;
1581	}
1582
1583	memcpy(&x->preplay, &x->replay, sizeof(struct xfrm_replay_state));
1584	c.event = XFRM_MSG_NEWAE;
1585	c.data.aevent = event;
1586	km_state_notify(x, &c);
1587
1588	if (x->replay_maxage &&
1589	    !mod_timer(&x->rtimer, jiffies + x->replay_maxage))
1590		x->xflags &= ~XFRM_TIME_DEFER;
1591}
1592
1593static void xfrm_replay_timer_handler(unsigned long data)
1594{
1595	struct xfrm_state *x = (struct xfrm_state*)data;
1596
1597	spin_lock(&x->lock);
1598
1599	if (x->km.state == XFRM_STATE_VALID) {
1600		if (xfrm_aevent_is_on())
1601			xfrm_replay_notify(x, XFRM_REPLAY_TIMEOUT);
1602		else
1603			x->xflags |= XFRM_TIME_DEFER;
1604	}
1605
1606	spin_unlock(&x->lock);
1607}
1608
1609int xfrm_replay_check(struct xfrm_state *x, __be32 net_seq)
1610{
1611	u32 diff;
1612	u32 seq = ntohl(net_seq);
1613
1614	if (unlikely(seq == 0))
1615		return -EINVAL;
1616
1617	if (likely(seq > x->replay.seq))
1618		return 0;
1619
1620	diff = x->replay.seq - seq;
1621	if (diff >= min_t(unsigned int, x->props.replay_window,
1622			  sizeof(x->replay.bitmap) * 8)) {
1623		x->stats.replay_window++;
1624		return -EINVAL;
1625	}
1626
1627	if (x->replay.bitmap & (1U << diff)) {
1628		x->stats.replay++;
1629		return -EINVAL;
1630	}
1631	return 0;
1632}
1633EXPORT_SYMBOL(xfrm_replay_check);
1634
1635void xfrm_replay_advance(struct xfrm_state *x, __be32 net_seq)
1636{
1637	u32 diff;
1638	u32 seq = ntohl(net_seq);
1639
1640	if (seq > x->replay.seq) {
1641		diff = seq - x->replay.seq;
1642		if (diff < x->props.replay_window)
1643			x->replay.bitmap = ((x->replay.bitmap) << diff) | 1;
1644		else
1645			x->replay.bitmap = 1;
1646		x->replay.seq = seq;
1647	} else {
1648		diff = x->replay.seq - seq;
1649		x->replay.bitmap |= (1U << diff);
1650	}
1651
1652	if (xfrm_aevent_is_on())
1653		xfrm_replay_notify(x, XFRM_REPLAY_UPDATE);
1654}
1655EXPORT_SYMBOL(xfrm_replay_advance);
1656
1657static LIST_HEAD(xfrm_km_list);
1658static DEFINE_RWLOCK(xfrm_km_lock);
1659
1660void km_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c)
1661{
1662	struct xfrm_mgr *km;
1663
1664	read_lock(&xfrm_km_lock);
1665	list_for_each_entry(km, &xfrm_km_list, list)
1666		if (km->notify_policy)
1667			km->notify_policy(xp, dir, c);
1668	read_unlock(&xfrm_km_lock);
1669}
1670
1671void km_state_notify(struct xfrm_state *x, struct km_event *c)
1672{
1673	struct xfrm_mgr *km;
1674	read_lock(&xfrm_km_lock);
1675	list_for_each_entry(km, &xfrm_km_list, list)
1676		if (km->notify)
1677			km->notify(x, c);
1678	read_unlock(&xfrm_km_lock);
1679}
1680
1681EXPORT_SYMBOL(km_policy_notify);
1682EXPORT_SYMBOL(km_state_notify);
1683
1684void km_state_expired(struct xfrm_state *x, int hard, u32 pid)
1685{
1686	struct km_event c;
1687
1688	c.data.hard = hard;
1689	c.pid = pid;
1690	c.event = XFRM_MSG_EXPIRE;
1691	km_state_notify(x, &c);
1692
1693	if (hard)
1694		wake_up(&km_waitq);
1695}
1696
1697EXPORT_SYMBOL(km_state_expired);
1698/*
1699 * We send to all registered managers regardless of failure
1700 * We are happy with one success
1701*/
1702int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol)
1703{
1704	int err = -EINVAL, acqret;
1705	struct xfrm_mgr *km;
1706
1707	read_lock(&xfrm_km_lock);
1708	list_for_each_entry(km, &xfrm_km_list, list) {
1709		acqret = km->acquire(x, t, pol, XFRM_POLICY_OUT);
1710		if (!acqret)
1711			err = acqret;
1712	}
1713	read_unlock(&xfrm_km_lock);
1714	return err;
1715}
1716EXPORT_SYMBOL(km_query);
1717
1718int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport)
1719{
1720	int err = -EINVAL;
1721	struct xfrm_mgr *km;
1722
1723	read_lock(&xfrm_km_lock);
1724	list_for_each_entry(km, &xfrm_km_list, list) {
1725		if (km->new_mapping)
1726			err = km->new_mapping(x, ipaddr, sport);
1727		if (!err)
1728			break;
1729	}
1730	read_unlock(&xfrm_km_lock);
1731	return err;
1732}
1733EXPORT_SYMBOL(km_new_mapping);
1734
1735void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 pid)
1736{
1737	struct km_event c;
1738
1739	c.data.hard = hard;
1740	c.pid = pid;
1741	c.event = XFRM_MSG_POLEXPIRE;
1742	km_policy_notify(pol, dir, &c);
1743
1744	if (hard)
1745		wake_up(&km_waitq);
1746}
1747EXPORT_SYMBOL(km_policy_expired);
1748
1749#ifdef CONFIG_XFRM_MIGRATE
1750int km_migrate(struct xfrm_selector *sel, u8 dir, u8 type,
1751	       struct xfrm_migrate *m, int num_migrate)
1752{
1753	int err = -EINVAL;
1754	int ret;
1755	struct xfrm_mgr *km;
1756
1757	read_lock(&xfrm_km_lock);
1758	list_for_each_entry(km, &xfrm_km_list, list) {
1759		if (km->migrate) {
1760			ret = km->migrate(sel, dir, type, m, num_migrate);
1761			if (!ret)
1762				err = ret;
1763		}
1764	}
1765	read_unlock(&xfrm_km_lock);
1766	return err;
1767}
1768EXPORT_SYMBOL(km_migrate);
1769#endif
1770
1771int km_report(u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr)
1772{
1773	int err = -EINVAL;
1774	int ret;
1775	struct xfrm_mgr *km;
1776
1777	read_lock(&xfrm_km_lock);
1778	list_for_each_entry(km, &xfrm_km_list, list) {
1779		if (km->report) {
1780			ret = km->report(proto, sel, addr);
1781			if (!ret)
1782				err = ret;
1783		}
1784	}
1785	read_unlock(&xfrm_km_lock);
1786	return err;
1787}
1788EXPORT_SYMBOL(km_report);
1789
1790int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen)
1791{
1792	int err;
1793	u8 *data;
1794	struct xfrm_mgr *km;
1795	struct xfrm_policy *pol = NULL;
1796
1797	if (optlen <= 0 || optlen > PAGE_SIZE)
1798		return -EMSGSIZE;
1799
1800	data = kmalloc(optlen, GFP_KERNEL);
1801	if (!data)
1802		return -ENOMEM;
1803
1804	err = -EFAULT;
1805	if (copy_from_user(data, optval, optlen))
1806		goto out;
1807
1808	err = -EINVAL;
1809	read_lock(&xfrm_km_lock);
1810	list_for_each_entry(km, &xfrm_km_list, list) {
1811		pol = km->compile_policy(sk, optname, data,
1812					 optlen, &err);
1813		if (err >= 0)
1814			break;
1815	}
1816	read_unlock(&xfrm_km_lock);
1817
1818	if (err >= 0) {
1819		xfrm_sk_policy_insert(sk, err, pol);
1820		xfrm_pol_put(pol);
1821		err = 0;
1822	}
1823
1824out:
1825	kfree(data);
1826	return err;
1827}
1828EXPORT_SYMBOL(xfrm_user_policy);
1829
1830int xfrm_register_km(struct xfrm_mgr *km)
1831{
1832	write_lock_bh(&xfrm_km_lock);
1833	list_add_tail(&km->list, &xfrm_km_list);
1834	write_unlock_bh(&xfrm_km_lock);
1835	return 0;
1836}
1837EXPORT_SYMBOL(xfrm_register_km);
1838
1839int xfrm_unregister_km(struct xfrm_mgr *km)
1840{
1841	write_lock_bh(&xfrm_km_lock);
1842	list_del(&km->list);
1843	write_unlock_bh(&xfrm_km_lock);
1844	return 0;
1845}
1846EXPORT_SYMBOL(xfrm_unregister_km);
1847
1848int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo)
1849{
1850	int err = 0;
1851	if (unlikely(afinfo == NULL))
1852		return -EINVAL;
1853	if (unlikely(afinfo->family >= NPROTO))
1854		return -EAFNOSUPPORT;
1855	write_lock_bh(&xfrm_state_afinfo_lock);
1856	if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL))
1857		err = -ENOBUFS;
1858	else
1859		xfrm_state_afinfo[afinfo->family] = afinfo;
1860	write_unlock_bh(&xfrm_state_afinfo_lock);
1861	return err;
1862}
1863EXPORT_SYMBOL(xfrm_state_register_afinfo);
1864
1865int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo)
1866{
1867	int err = 0;
1868	if (unlikely(afinfo == NULL))
1869		return -EINVAL;
1870	if (unlikely(afinfo->family >= NPROTO))
1871		return -EAFNOSUPPORT;
1872	write_lock_bh(&xfrm_state_afinfo_lock);
1873	if (likely(xfrm_state_afinfo[afinfo->family] != NULL)) {
1874		if (unlikely(xfrm_state_afinfo[afinfo->family] != afinfo))
1875			err = -EINVAL;
1876		else
1877			xfrm_state_afinfo[afinfo->family] = NULL;
1878	}
1879	write_unlock_bh(&xfrm_state_afinfo_lock);
1880	return err;
1881}
1882EXPORT_SYMBOL(xfrm_state_unregister_afinfo);
1883
1884static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family)
1885{
1886	struct xfrm_state_afinfo *afinfo;
1887	if (unlikely(family >= NPROTO))
1888		return NULL;
1889	read_lock(&xfrm_state_afinfo_lock);
1890	afinfo = xfrm_state_afinfo[family];
1891	if (unlikely(!afinfo))
1892		read_unlock(&xfrm_state_afinfo_lock);
1893	return afinfo;
1894}
1895
1896static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo)
1897{
1898	read_unlock(&xfrm_state_afinfo_lock);
1899}
1900
1901/* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */
1902void xfrm_state_delete_tunnel(struct xfrm_state *x)
1903{
1904	if (x->tunnel) {
1905		struct xfrm_state *t = x->tunnel;
1906
1907		if (atomic_read(&t->tunnel_users) == 2)
1908			xfrm_state_delete(t);
1909		atomic_dec(&t->tunnel_users);
1910		xfrm_state_put(t);
1911		x->tunnel = NULL;
1912	}
1913}
1914EXPORT_SYMBOL(xfrm_state_delete_tunnel);
1915
1916int xfrm_state_mtu(struct xfrm_state *x, int mtu)
1917{
1918	int res;
1919
1920	spin_lock_bh(&x->lock);
1921	if (x->km.state == XFRM_STATE_VALID &&
1922	    x->type && x->type->get_mtu)
1923		res = x->type->get_mtu(x, mtu);
1924	else
1925		res = mtu - x->props.header_len;
1926	spin_unlock_bh(&x->lock);
1927	return res;
1928}
1929
1930int xfrm_init_state(struct xfrm_state *x)
1931{
1932	struct xfrm_state_afinfo *afinfo;
1933	int family = x->props.family;
1934	int err;
1935
1936	err = -EAFNOSUPPORT;
1937	afinfo = xfrm_state_get_afinfo(family);
1938	if (!afinfo)
1939		goto error;
1940
1941	err = 0;
1942	if (afinfo->init_flags)
1943		err = afinfo->init_flags(x);
1944
1945	xfrm_state_put_afinfo(afinfo);
1946
1947	if (err)
1948		goto error;
1949
1950	err = -EPROTONOSUPPORT;
1951	x->inner_mode = xfrm_get_mode(x->props.mode, x->sel.family);
1952	if (x->inner_mode == NULL)
1953		goto error;
1954
1955	if (!(x->inner_mode->flags & XFRM_MODE_FLAG_TUNNEL) &&
1956	    family != x->sel.family)
1957		goto error;
1958
1959	x->type = xfrm_get_type(x->id.proto, family);
1960	if (x->type == NULL)
1961		goto error;
1962
1963	err = x->type->init_state(x);
1964	if (err)
1965		goto error;
1966
1967	x->outer_mode = xfrm_get_mode(x->props.mode, family);
1968	if (x->outer_mode == NULL)
1969		goto error;
1970
1971	x->km.state = XFRM_STATE_VALID;
1972
1973error:
1974	return err;
1975}
1976
1977EXPORT_SYMBOL(xfrm_init_state);
1978
1979void __init xfrm_state_init(void)
1980{
1981	unsigned int sz;
1982
1983	sz = sizeof(struct hlist_head) * 8;
1984
1985	xfrm_state_bydst = xfrm_hash_alloc(sz);
1986	xfrm_state_bysrc = xfrm_hash_alloc(sz);
1987	xfrm_state_byspi = xfrm_hash_alloc(sz);
1988	if (!xfrm_state_bydst || !xfrm_state_bysrc || !xfrm_state_byspi)
1989		panic("XFRM: Cannot allocate bydst/bysrc/byspi hashes.");
1990	xfrm_state_hmask = ((sz / sizeof(struct hlist_head)) - 1);
1991
1992	INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task);
1993}
1994
1995#ifdef CONFIG_AUDITSYSCALL
1996static inline void xfrm_audit_common_stateinfo(struct xfrm_state *x,
1997					       struct audit_buffer *audit_buf)
1998{
1999	if (x->security)
2000		audit_log_format(audit_buf, " sec_alg=%u sec_doi=%u sec_obj=%s",
2001				 x->security->ctx_alg, x->security->ctx_doi,
2002				 x->security->ctx_str);
2003
2004	switch(x->props.family) {
2005	case AF_INET:
2006		audit_log_format(audit_buf, " src=%u.%u.%u.%u dst=%u.%u.%u.%u",
2007				 NIPQUAD(x->props.saddr.a4),
2008				 NIPQUAD(x->id.daddr.a4));
2009		break;
2010	case AF_INET6:
2011		{
2012			struct in6_addr saddr6, daddr6;
2013
2014			memcpy(&saddr6, x->props.saddr.a6,
2015				sizeof(struct in6_addr));
2016			memcpy(&daddr6, x->id.daddr.a6,
2017				sizeof(struct in6_addr));
2018			audit_log_format(audit_buf,
2019					 " src=" NIP6_FMT " dst=" NIP6_FMT,
2020					 NIP6(saddr6), NIP6(daddr6));
2021		}
2022		break;
2023	}
2024}
2025
2026void
2027xfrm_audit_state_add(struct xfrm_state *x, int result, u32 auid, u32 sid)
2028{
2029	struct audit_buffer *audit_buf;
2030	u32 spi;
2031	extern int audit_enabled;
2032
2033	if (audit_enabled == 0)
2034		return;
2035	audit_buf = xfrm_audit_start(auid, sid);
2036	if (audit_buf == NULL)
2037		return;
2038	audit_log_format(audit_buf, " op=SAD-add res=%u",result);
2039	xfrm_audit_common_stateinfo(x, audit_buf);
2040	spi = ntohl(x->id.spi);
2041	audit_log_format(audit_buf, " spi=%u(0x%x)", spi, spi);
2042	audit_log_end(audit_buf);
2043}
2044EXPORT_SYMBOL_GPL(xfrm_audit_state_add);
2045
2046void
2047xfrm_audit_state_delete(struct xfrm_state *x, int result, u32 auid, u32 sid)
2048{
2049	struct audit_buffer *audit_buf;
2050	u32 spi;
2051	extern int audit_enabled;
2052
2053	if (audit_enabled == 0)
2054		return;
2055	audit_buf = xfrm_audit_start(auid, sid);
2056	if (audit_buf == NULL)
2057		return;
2058	audit_log_format(audit_buf, " op=SAD-delete res=%u",result);
2059	xfrm_audit_common_stateinfo(x, audit_buf);
2060	spi = ntohl(x->id.spi);
2061	audit_log_format(audit_buf, " spi=%u(0x%x)", spi, spi);
2062	audit_log_end(audit_buf);
2063}
2064EXPORT_SYMBOL_GPL(xfrm_audit_state_delete);
2065#endif /* CONFIG_AUDITSYSCALL */
2066