xfrm_policy.c revision c5d18e984a313adf5a1a4ae69e0b1d93cf410229
1/*
2 * xfrm_policy.c
3 *
4 * Changes:
5 *	Mitsuru KANDA @USAGI
6 * 	Kazunori MIYAZAWA @USAGI
7 * 	Kunihiro Ishiguro <kunihiro@ipinfusion.com>
8 * 		IPv6 support
9 * 	Kazunori MIYAZAWA @USAGI
10 * 	YOSHIFUJI Hideaki
11 * 		Split up af-specific portion
12 *	Derek Atkins <derek@ihtfp.com>		Add the post_input processor
13 *
14 */
15
16#include <linux/err.h>
17#include <linux/slab.h>
18#include <linux/kmod.h>
19#include <linux/list.h>
20#include <linux/spinlock.h>
21#include <linux/workqueue.h>
22#include <linux/notifier.h>
23#include <linux/netdevice.h>
24#include <linux/netfilter.h>
25#include <linux/module.h>
26#include <linux/cache.h>
27#include <linux/audit.h>
28#include <net/dst.h>
29#include <net/xfrm.h>
30#include <net/ip.h>
31#ifdef CONFIG_XFRM_STATISTICS
32#include <net/snmp.h>
33#endif
34
35#include "xfrm_hash.h"
36
37int sysctl_xfrm_larval_drop __read_mostly;
38
39#ifdef CONFIG_XFRM_STATISTICS
40DEFINE_SNMP_STAT(struct linux_xfrm_mib, xfrm_statistics) __read_mostly;
41EXPORT_SYMBOL(xfrm_statistics);
42#endif
43
44DEFINE_MUTEX(xfrm_cfg_mutex);
45EXPORT_SYMBOL(xfrm_cfg_mutex);
46
47static DEFINE_RWLOCK(xfrm_policy_lock);
48
49static struct list_head xfrm_policy_bytype[XFRM_POLICY_TYPE_MAX];
50unsigned int xfrm_policy_count[XFRM_POLICY_MAX*2];
51EXPORT_SYMBOL(xfrm_policy_count);
52
53static DEFINE_RWLOCK(xfrm_policy_afinfo_lock);
54static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO];
55
56static struct kmem_cache *xfrm_dst_cache __read_mostly;
57
58static struct work_struct xfrm_policy_gc_work;
59static HLIST_HEAD(xfrm_policy_gc_list);
60static DEFINE_SPINLOCK(xfrm_policy_gc_lock);
61
62static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family);
63static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo);
64static void xfrm_init_pmtu(struct dst_entry *dst);
65
66static inline int
67__xfrm4_selector_match(struct xfrm_selector *sel, struct flowi *fl)
68{
69	return  addr_match(&fl->fl4_dst, &sel->daddr, sel->prefixlen_d) &&
70		addr_match(&fl->fl4_src, &sel->saddr, sel->prefixlen_s) &&
71		!((xfrm_flowi_dport(fl) ^ sel->dport) & sel->dport_mask) &&
72		!((xfrm_flowi_sport(fl) ^ sel->sport) & sel->sport_mask) &&
73		(fl->proto == sel->proto || !sel->proto) &&
74		(fl->oif == sel->ifindex || !sel->ifindex);
75}
76
77static inline int
78__xfrm6_selector_match(struct xfrm_selector *sel, struct flowi *fl)
79{
80	return  addr_match(&fl->fl6_dst, &sel->daddr, sel->prefixlen_d) &&
81		addr_match(&fl->fl6_src, &sel->saddr, sel->prefixlen_s) &&
82		!((xfrm_flowi_dport(fl) ^ sel->dport) & sel->dport_mask) &&
83		!((xfrm_flowi_sport(fl) ^ sel->sport) & sel->sport_mask) &&
84		(fl->proto == sel->proto || !sel->proto) &&
85		(fl->oif == sel->ifindex || !sel->ifindex);
86}
87
88int xfrm_selector_match(struct xfrm_selector *sel, struct flowi *fl,
89		    unsigned short family)
90{
91	switch (family) {
92	case AF_INET:
93		return __xfrm4_selector_match(sel, fl);
94	case AF_INET6:
95		return __xfrm6_selector_match(sel, fl);
96	}
97	return 0;
98}
99
100static inline struct dst_entry *__xfrm_dst_lookup(int tos,
101						  xfrm_address_t *saddr,
102						  xfrm_address_t *daddr,
103						  int family)
104{
105	struct xfrm_policy_afinfo *afinfo;
106	struct dst_entry *dst;
107
108	afinfo = xfrm_policy_get_afinfo(family);
109	if (unlikely(afinfo == NULL))
110		return ERR_PTR(-EAFNOSUPPORT);
111
112	dst = afinfo->dst_lookup(tos, saddr, daddr);
113
114	xfrm_policy_put_afinfo(afinfo);
115
116	return dst;
117}
118
119static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x, int tos,
120						xfrm_address_t *prev_saddr,
121						xfrm_address_t *prev_daddr,
122						int family)
123{
124	xfrm_address_t *saddr = &x->props.saddr;
125	xfrm_address_t *daddr = &x->id.daddr;
126	struct dst_entry *dst;
127
128	if (x->type->flags & XFRM_TYPE_LOCAL_COADDR) {
129		saddr = x->coaddr;
130		daddr = prev_daddr;
131	}
132	if (x->type->flags & XFRM_TYPE_REMOTE_COADDR) {
133		saddr = prev_saddr;
134		daddr = x->coaddr;
135	}
136
137	dst = __xfrm_dst_lookup(tos, saddr, daddr, family);
138
139	if (!IS_ERR(dst)) {
140		if (prev_saddr != saddr)
141			memcpy(prev_saddr, saddr,  sizeof(*prev_saddr));
142		if (prev_daddr != daddr)
143			memcpy(prev_daddr, daddr,  sizeof(*prev_daddr));
144	}
145
146	return dst;
147}
148
149static inline unsigned long make_jiffies(long secs)
150{
151	if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
152		return MAX_SCHEDULE_TIMEOUT-1;
153	else
154		return secs*HZ;
155}
156
157static void xfrm_policy_timer(unsigned long data)
158{
159	struct xfrm_policy *xp = (struct xfrm_policy*)data;
160	unsigned long now = get_seconds();
161	long next = LONG_MAX;
162	int warn = 0;
163	int dir;
164
165	read_lock(&xp->lock);
166
167	if (xp->dead)
168		goto out;
169
170	dir = xfrm_policy_id2dir(xp->index);
171
172	if (xp->lft.hard_add_expires_seconds) {
173		long tmo = xp->lft.hard_add_expires_seconds +
174			xp->curlft.add_time - now;
175		if (tmo <= 0)
176			goto expired;
177		if (tmo < next)
178			next = tmo;
179	}
180	if (xp->lft.hard_use_expires_seconds) {
181		long tmo = xp->lft.hard_use_expires_seconds +
182			(xp->curlft.use_time ? : xp->curlft.add_time) - now;
183		if (tmo <= 0)
184			goto expired;
185		if (tmo < next)
186			next = tmo;
187	}
188	if (xp->lft.soft_add_expires_seconds) {
189		long tmo = xp->lft.soft_add_expires_seconds +
190			xp->curlft.add_time - now;
191		if (tmo <= 0) {
192			warn = 1;
193			tmo = XFRM_KM_TIMEOUT;
194		}
195		if (tmo < next)
196			next = tmo;
197	}
198	if (xp->lft.soft_use_expires_seconds) {
199		long tmo = xp->lft.soft_use_expires_seconds +
200			(xp->curlft.use_time ? : xp->curlft.add_time) - now;
201		if (tmo <= 0) {
202			warn = 1;
203			tmo = XFRM_KM_TIMEOUT;
204		}
205		if (tmo < next)
206			next = tmo;
207	}
208
209	if (warn)
210		km_policy_expired(xp, dir, 0, 0);
211	if (next != LONG_MAX &&
212	    !mod_timer(&xp->timer, jiffies + make_jiffies(next)))
213		xfrm_pol_hold(xp);
214
215out:
216	read_unlock(&xp->lock);
217	xfrm_pol_put(xp);
218	return;
219
220expired:
221	read_unlock(&xp->lock);
222	if (!xfrm_policy_delete(xp, dir))
223		km_policy_expired(xp, dir, 1, 0);
224	xfrm_pol_put(xp);
225}
226
227
228/* Allocate xfrm_policy. Not used here, it is supposed to be used by pfkeyv2
229 * SPD calls.
230 */
231
232struct xfrm_policy *xfrm_policy_alloc(gfp_t gfp)
233{
234	struct xfrm_policy *policy;
235
236	policy = kzalloc(sizeof(struct xfrm_policy), gfp);
237
238	if (policy) {
239		INIT_LIST_HEAD(&policy->bytype);
240		INIT_HLIST_NODE(&policy->bydst);
241		INIT_HLIST_NODE(&policy->byidx);
242		rwlock_init(&policy->lock);
243		atomic_set(&policy->refcnt, 1);
244		setup_timer(&policy->timer, xfrm_policy_timer,
245				(unsigned long)policy);
246	}
247	return policy;
248}
249EXPORT_SYMBOL(xfrm_policy_alloc);
250
251/* Destroy xfrm_policy: descendant resources must be released to this moment. */
252
253void xfrm_policy_destroy(struct xfrm_policy *policy)
254{
255	BUG_ON(!policy->dead);
256
257	BUG_ON(policy->bundles);
258
259	if (del_timer(&policy->timer))
260		BUG();
261
262	write_lock_bh(&xfrm_policy_lock);
263	list_del(&policy->bytype);
264	write_unlock_bh(&xfrm_policy_lock);
265
266	security_xfrm_policy_free(policy->security);
267	kfree(policy);
268}
269EXPORT_SYMBOL(xfrm_policy_destroy);
270
271static void xfrm_policy_gc_kill(struct xfrm_policy *policy)
272{
273	struct dst_entry *dst;
274
275	while ((dst = policy->bundles) != NULL) {
276		policy->bundles = dst->next;
277		dst_free(dst);
278	}
279
280	if (del_timer(&policy->timer))
281		atomic_dec(&policy->refcnt);
282
283	if (atomic_read(&policy->refcnt) > 1)
284		flow_cache_flush();
285
286	xfrm_pol_put(policy);
287}
288
289static void xfrm_policy_gc_task(struct work_struct *work)
290{
291	struct xfrm_policy *policy;
292	struct hlist_node *entry, *tmp;
293	struct hlist_head gc_list;
294
295	spin_lock_bh(&xfrm_policy_gc_lock);
296	gc_list.first = xfrm_policy_gc_list.first;
297	INIT_HLIST_HEAD(&xfrm_policy_gc_list);
298	spin_unlock_bh(&xfrm_policy_gc_lock);
299
300	hlist_for_each_entry_safe(policy, entry, tmp, &gc_list, bydst)
301		xfrm_policy_gc_kill(policy);
302}
303
304/* Rule must be locked. Release descentant resources, announce
305 * entry dead. The rule must be unlinked from lists to the moment.
306 */
307
308static void xfrm_policy_kill(struct xfrm_policy *policy)
309{
310	int dead;
311
312	write_lock_bh(&policy->lock);
313	dead = policy->dead;
314	policy->dead = 1;
315	write_unlock_bh(&policy->lock);
316
317	if (unlikely(dead)) {
318		WARN_ON(1);
319		return;
320	}
321
322	spin_lock(&xfrm_policy_gc_lock);
323	hlist_add_head(&policy->bydst, &xfrm_policy_gc_list);
324	spin_unlock(&xfrm_policy_gc_lock);
325
326	schedule_work(&xfrm_policy_gc_work);
327}
328
329struct xfrm_policy_hash {
330	struct hlist_head	*table;
331	unsigned int		hmask;
332};
333
334static struct hlist_head xfrm_policy_inexact[XFRM_POLICY_MAX*2];
335static struct xfrm_policy_hash xfrm_policy_bydst[XFRM_POLICY_MAX*2] __read_mostly;
336static struct hlist_head *xfrm_policy_byidx __read_mostly;
337static unsigned int xfrm_idx_hmask __read_mostly;
338static unsigned int xfrm_policy_hashmax __read_mostly = 1 * 1024 * 1024;
339
340static inline unsigned int idx_hash(u32 index)
341{
342	return __idx_hash(index, xfrm_idx_hmask);
343}
344
345static struct hlist_head *policy_hash_bysel(struct xfrm_selector *sel, unsigned short family, int dir)
346{
347	unsigned int hmask = xfrm_policy_bydst[dir].hmask;
348	unsigned int hash = __sel_hash(sel, family, hmask);
349
350	return (hash == hmask + 1 ?
351		&xfrm_policy_inexact[dir] :
352		xfrm_policy_bydst[dir].table + hash);
353}
354
355static struct hlist_head *policy_hash_direct(xfrm_address_t *daddr, xfrm_address_t *saddr, unsigned short family, int dir)
356{
357	unsigned int hmask = xfrm_policy_bydst[dir].hmask;
358	unsigned int hash = __addr_hash(daddr, saddr, family, hmask);
359
360	return xfrm_policy_bydst[dir].table + hash;
361}
362
363static void xfrm_dst_hash_transfer(struct hlist_head *list,
364				   struct hlist_head *ndsttable,
365				   unsigned int nhashmask)
366{
367	struct hlist_node *entry, *tmp, *entry0 = NULL;
368	struct xfrm_policy *pol;
369	unsigned int h0 = 0;
370
371redo:
372	hlist_for_each_entry_safe(pol, entry, tmp, list, bydst) {
373		unsigned int h;
374
375		h = __addr_hash(&pol->selector.daddr, &pol->selector.saddr,
376				pol->family, nhashmask);
377		if (!entry0) {
378			hlist_del(entry);
379			hlist_add_head(&pol->bydst, ndsttable+h);
380			h0 = h;
381		} else {
382			if (h != h0)
383				continue;
384			hlist_del(entry);
385			hlist_add_after(entry0, &pol->bydst);
386		}
387		entry0 = entry;
388	}
389	if (!hlist_empty(list)) {
390		entry0 = NULL;
391		goto redo;
392	}
393}
394
395static void xfrm_idx_hash_transfer(struct hlist_head *list,
396				   struct hlist_head *nidxtable,
397				   unsigned int nhashmask)
398{
399	struct hlist_node *entry, *tmp;
400	struct xfrm_policy *pol;
401
402	hlist_for_each_entry_safe(pol, entry, tmp, list, byidx) {
403		unsigned int h;
404
405		h = __idx_hash(pol->index, nhashmask);
406		hlist_add_head(&pol->byidx, nidxtable+h);
407	}
408}
409
410static unsigned long xfrm_new_hash_mask(unsigned int old_hmask)
411{
412	return ((old_hmask + 1) << 1) - 1;
413}
414
415static void xfrm_bydst_resize(int dir)
416{
417	unsigned int hmask = xfrm_policy_bydst[dir].hmask;
418	unsigned int nhashmask = xfrm_new_hash_mask(hmask);
419	unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head);
420	struct hlist_head *odst = xfrm_policy_bydst[dir].table;
421	struct hlist_head *ndst = xfrm_hash_alloc(nsize);
422	int i;
423
424	if (!ndst)
425		return;
426
427	write_lock_bh(&xfrm_policy_lock);
428
429	for (i = hmask; i >= 0; i--)
430		xfrm_dst_hash_transfer(odst + i, ndst, nhashmask);
431
432	xfrm_policy_bydst[dir].table = ndst;
433	xfrm_policy_bydst[dir].hmask = nhashmask;
434
435	write_unlock_bh(&xfrm_policy_lock);
436
437	xfrm_hash_free(odst, (hmask + 1) * sizeof(struct hlist_head));
438}
439
440static void xfrm_byidx_resize(int total)
441{
442	unsigned int hmask = xfrm_idx_hmask;
443	unsigned int nhashmask = xfrm_new_hash_mask(hmask);
444	unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head);
445	struct hlist_head *oidx = xfrm_policy_byidx;
446	struct hlist_head *nidx = xfrm_hash_alloc(nsize);
447	int i;
448
449	if (!nidx)
450		return;
451
452	write_lock_bh(&xfrm_policy_lock);
453
454	for (i = hmask; i >= 0; i--)
455		xfrm_idx_hash_transfer(oidx + i, nidx, nhashmask);
456
457	xfrm_policy_byidx = nidx;
458	xfrm_idx_hmask = nhashmask;
459
460	write_unlock_bh(&xfrm_policy_lock);
461
462	xfrm_hash_free(oidx, (hmask + 1) * sizeof(struct hlist_head));
463}
464
465static inline int xfrm_bydst_should_resize(int dir, int *total)
466{
467	unsigned int cnt = xfrm_policy_count[dir];
468	unsigned int hmask = xfrm_policy_bydst[dir].hmask;
469
470	if (total)
471		*total += cnt;
472
473	if ((hmask + 1) < xfrm_policy_hashmax &&
474	    cnt > hmask)
475		return 1;
476
477	return 0;
478}
479
480static inline int xfrm_byidx_should_resize(int total)
481{
482	unsigned int hmask = xfrm_idx_hmask;
483
484	if ((hmask + 1) < xfrm_policy_hashmax &&
485	    total > hmask)
486		return 1;
487
488	return 0;
489}
490
491void xfrm_spd_getinfo(struct xfrmk_spdinfo *si)
492{
493	read_lock_bh(&xfrm_policy_lock);
494	si->incnt = xfrm_policy_count[XFRM_POLICY_IN];
495	si->outcnt = xfrm_policy_count[XFRM_POLICY_OUT];
496	si->fwdcnt = xfrm_policy_count[XFRM_POLICY_FWD];
497	si->inscnt = xfrm_policy_count[XFRM_POLICY_IN+XFRM_POLICY_MAX];
498	si->outscnt = xfrm_policy_count[XFRM_POLICY_OUT+XFRM_POLICY_MAX];
499	si->fwdscnt = xfrm_policy_count[XFRM_POLICY_FWD+XFRM_POLICY_MAX];
500	si->spdhcnt = xfrm_idx_hmask;
501	si->spdhmcnt = xfrm_policy_hashmax;
502	read_unlock_bh(&xfrm_policy_lock);
503}
504EXPORT_SYMBOL(xfrm_spd_getinfo);
505
506static DEFINE_MUTEX(hash_resize_mutex);
507static void xfrm_hash_resize(struct work_struct *__unused)
508{
509	int dir, total;
510
511	mutex_lock(&hash_resize_mutex);
512
513	total = 0;
514	for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
515		if (xfrm_bydst_should_resize(dir, &total))
516			xfrm_bydst_resize(dir);
517	}
518	if (xfrm_byidx_should_resize(total))
519		xfrm_byidx_resize(total);
520
521	mutex_unlock(&hash_resize_mutex);
522}
523
524static DECLARE_WORK(xfrm_hash_work, xfrm_hash_resize);
525
526/* Generate new index... KAME seems to generate them ordered by cost
527 * of an absolute inpredictability of ordering of rules. This will not pass. */
528static u32 xfrm_gen_index(u8 type, int dir)
529{
530	static u32 idx_generator;
531
532	for (;;) {
533		struct hlist_node *entry;
534		struct hlist_head *list;
535		struct xfrm_policy *p;
536		u32 idx;
537		int found;
538
539		idx = (idx_generator | dir);
540		idx_generator += 8;
541		if (idx == 0)
542			idx = 8;
543		list = xfrm_policy_byidx + idx_hash(idx);
544		found = 0;
545		hlist_for_each_entry(p, entry, list, byidx) {
546			if (p->index == idx) {
547				found = 1;
548				break;
549			}
550		}
551		if (!found)
552			return idx;
553	}
554}
555
556static inline int selector_cmp(struct xfrm_selector *s1, struct xfrm_selector *s2)
557{
558	u32 *p1 = (u32 *) s1;
559	u32 *p2 = (u32 *) s2;
560	int len = sizeof(struct xfrm_selector) / sizeof(u32);
561	int i;
562
563	for (i = 0; i < len; i++) {
564		if (p1[i] != p2[i])
565			return 1;
566	}
567
568	return 0;
569}
570
571int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
572{
573	struct xfrm_policy *pol;
574	struct xfrm_policy *delpol;
575	struct hlist_head *chain;
576	struct hlist_node *entry, *newpos;
577	struct dst_entry *gc_list;
578
579	write_lock_bh(&xfrm_policy_lock);
580	chain = policy_hash_bysel(&policy->selector, policy->family, dir);
581	delpol = NULL;
582	newpos = NULL;
583	hlist_for_each_entry(pol, entry, chain, bydst) {
584		if (pol->type == policy->type &&
585		    !selector_cmp(&pol->selector, &policy->selector) &&
586		    xfrm_sec_ctx_match(pol->security, policy->security) &&
587		    !WARN_ON(delpol)) {
588			if (excl) {
589				write_unlock_bh(&xfrm_policy_lock);
590				return -EEXIST;
591			}
592			delpol = pol;
593			if (policy->priority > pol->priority)
594				continue;
595		} else if (policy->priority >= pol->priority) {
596			newpos = &pol->bydst;
597			continue;
598		}
599		if (delpol)
600			break;
601	}
602	if (newpos)
603		hlist_add_after(newpos, &policy->bydst);
604	else
605		hlist_add_head(&policy->bydst, chain);
606	xfrm_pol_hold(policy);
607	xfrm_policy_count[dir]++;
608	atomic_inc(&flow_cache_genid);
609	if (delpol) {
610		hlist_del(&delpol->bydst);
611		hlist_del(&delpol->byidx);
612		xfrm_policy_count[dir]--;
613	}
614	policy->index = delpol ? delpol->index : xfrm_gen_index(policy->type, dir);
615	hlist_add_head(&policy->byidx, xfrm_policy_byidx+idx_hash(policy->index));
616	policy->curlft.add_time = get_seconds();
617	policy->curlft.use_time = 0;
618	if (!mod_timer(&policy->timer, jiffies + HZ))
619		xfrm_pol_hold(policy);
620	list_add_tail(&policy->bytype, &xfrm_policy_bytype[policy->type]);
621	write_unlock_bh(&xfrm_policy_lock);
622
623	if (delpol)
624		xfrm_policy_kill(delpol);
625	else if (xfrm_bydst_should_resize(dir, NULL))
626		schedule_work(&xfrm_hash_work);
627
628	read_lock_bh(&xfrm_policy_lock);
629	gc_list = NULL;
630	entry = &policy->bydst;
631	hlist_for_each_entry_continue(policy, entry, bydst) {
632		struct dst_entry *dst;
633
634		write_lock(&policy->lock);
635		dst = policy->bundles;
636		if (dst) {
637			struct dst_entry *tail = dst;
638			while (tail->next)
639				tail = tail->next;
640			tail->next = gc_list;
641			gc_list = dst;
642
643			policy->bundles = NULL;
644		}
645		write_unlock(&policy->lock);
646	}
647	read_unlock_bh(&xfrm_policy_lock);
648
649	while (gc_list) {
650		struct dst_entry *dst = gc_list;
651
652		gc_list = dst->next;
653		dst_free(dst);
654	}
655
656	return 0;
657}
658EXPORT_SYMBOL(xfrm_policy_insert);
659
660struct xfrm_policy *xfrm_policy_bysel_ctx(u8 type, int dir,
661					  struct xfrm_selector *sel,
662					  struct xfrm_sec_ctx *ctx, int delete,
663					  int *err)
664{
665	struct xfrm_policy *pol, *ret;
666	struct hlist_head *chain;
667	struct hlist_node *entry;
668
669	*err = 0;
670	write_lock_bh(&xfrm_policy_lock);
671	chain = policy_hash_bysel(sel, sel->family, dir);
672	ret = NULL;
673	hlist_for_each_entry(pol, entry, chain, bydst) {
674		if (pol->type == type &&
675		    !selector_cmp(sel, &pol->selector) &&
676		    xfrm_sec_ctx_match(ctx, pol->security)) {
677			xfrm_pol_hold(pol);
678			if (delete) {
679				*err = security_xfrm_policy_delete(
680								pol->security);
681				if (*err) {
682					write_unlock_bh(&xfrm_policy_lock);
683					return pol;
684				}
685				hlist_del(&pol->bydst);
686				hlist_del(&pol->byidx);
687				xfrm_policy_count[dir]--;
688			}
689			ret = pol;
690			break;
691		}
692	}
693	write_unlock_bh(&xfrm_policy_lock);
694
695	if (ret && delete) {
696		atomic_inc(&flow_cache_genid);
697		xfrm_policy_kill(ret);
698	}
699	return ret;
700}
701EXPORT_SYMBOL(xfrm_policy_bysel_ctx);
702
703struct xfrm_policy *xfrm_policy_byid(u8 type, int dir, u32 id, int delete,
704				     int *err)
705{
706	struct xfrm_policy *pol, *ret;
707	struct hlist_head *chain;
708	struct hlist_node *entry;
709
710	*err = -ENOENT;
711	if (xfrm_policy_id2dir(id) != dir)
712		return NULL;
713
714	*err = 0;
715	write_lock_bh(&xfrm_policy_lock);
716	chain = xfrm_policy_byidx + idx_hash(id);
717	ret = NULL;
718	hlist_for_each_entry(pol, entry, chain, byidx) {
719		if (pol->type == type && pol->index == id) {
720			xfrm_pol_hold(pol);
721			if (delete) {
722				*err = security_xfrm_policy_delete(
723								pol->security);
724				if (*err) {
725					write_unlock_bh(&xfrm_policy_lock);
726					return pol;
727				}
728				hlist_del(&pol->bydst);
729				hlist_del(&pol->byidx);
730				xfrm_policy_count[dir]--;
731			}
732			ret = pol;
733			break;
734		}
735	}
736	write_unlock_bh(&xfrm_policy_lock);
737
738	if (ret && delete) {
739		atomic_inc(&flow_cache_genid);
740		xfrm_policy_kill(ret);
741	}
742	return ret;
743}
744EXPORT_SYMBOL(xfrm_policy_byid);
745
746#ifdef CONFIG_SECURITY_NETWORK_XFRM
747static inline int
748xfrm_policy_flush_secctx_check(u8 type, struct xfrm_audit *audit_info)
749{
750	int dir, err = 0;
751
752	for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
753		struct xfrm_policy *pol;
754		struct hlist_node *entry;
755		int i;
756
757		hlist_for_each_entry(pol, entry,
758				     &xfrm_policy_inexact[dir], bydst) {
759			if (pol->type != type)
760				continue;
761			err = security_xfrm_policy_delete(pol->security);
762			if (err) {
763				xfrm_audit_policy_delete(pol, 0,
764							 audit_info->loginuid,
765							 audit_info->secid);
766				return err;
767			}
768		}
769		for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) {
770			hlist_for_each_entry(pol, entry,
771					     xfrm_policy_bydst[dir].table + i,
772					     bydst) {
773				if (pol->type != type)
774					continue;
775				err = security_xfrm_policy_delete(
776								pol->security);
777				if (err) {
778					xfrm_audit_policy_delete(pol, 0,
779							audit_info->loginuid,
780							audit_info->secid);
781					return err;
782				}
783			}
784		}
785	}
786	return err;
787}
788#else
789static inline int
790xfrm_policy_flush_secctx_check(u8 type, struct xfrm_audit *audit_info)
791{
792	return 0;
793}
794#endif
795
796int xfrm_policy_flush(u8 type, struct xfrm_audit *audit_info)
797{
798	int dir, err = 0;
799
800	write_lock_bh(&xfrm_policy_lock);
801
802	err = xfrm_policy_flush_secctx_check(type, audit_info);
803	if (err)
804		goto out;
805
806	for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
807		struct xfrm_policy *pol;
808		struct hlist_node *entry;
809		int i, killed;
810
811		killed = 0;
812	again1:
813		hlist_for_each_entry(pol, entry,
814				     &xfrm_policy_inexact[dir], bydst) {
815			if (pol->type != type)
816				continue;
817			hlist_del(&pol->bydst);
818			hlist_del(&pol->byidx);
819			write_unlock_bh(&xfrm_policy_lock);
820
821			xfrm_audit_policy_delete(pol, 1, audit_info->loginuid,
822						 audit_info->secid);
823
824			xfrm_policy_kill(pol);
825			killed++;
826
827			write_lock_bh(&xfrm_policy_lock);
828			goto again1;
829		}
830
831		for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) {
832	again2:
833			hlist_for_each_entry(pol, entry,
834					     xfrm_policy_bydst[dir].table + i,
835					     bydst) {
836				if (pol->type != type)
837					continue;
838				hlist_del(&pol->bydst);
839				hlist_del(&pol->byidx);
840				write_unlock_bh(&xfrm_policy_lock);
841
842				xfrm_audit_policy_delete(pol, 1,
843							 audit_info->loginuid,
844							 audit_info->secid);
845				xfrm_policy_kill(pol);
846				killed++;
847
848				write_lock_bh(&xfrm_policy_lock);
849				goto again2;
850			}
851		}
852
853		xfrm_policy_count[dir] -= killed;
854	}
855	atomic_inc(&flow_cache_genid);
856out:
857	write_unlock_bh(&xfrm_policy_lock);
858	return err;
859}
860EXPORT_SYMBOL(xfrm_policy_flush);
861
862int xfrm_policy_walk(struct xfrm_policy_walk *walk,
863		     int (*func)(struct xfrm_policy *, int, int, void*),
864		     void *data)
865{
866	struct xfrm_policy *old, *pol, *last = NULL;
867	int error = 0;
868
869	if (walk->type >= XFRM_POLICY_TYPE_MAX &&
870	    walk->type != XFRM_POLICY_TYPE_ANY)
871		return -EINVAL;
872
873	if (walk->policy == NULL && walk->count != 0)
874		return 0;
875
876	old = pol = walk->policy;
877	walk->policy = NULL;
878	read_lock_bh(&xfrm_policy_lock);
879
880	for (; walk->cur_type < XFRM_POLICY_TYPE_MAX; walk->cur_type++) {
881		if (walk->type != walk->cur_type &&
882		    walk->type != XFRM_POLICY_TYPE_ANY)
883			continue;
884
885		if (pol == NULL) {
886			pol = list_first_entry(&xfrm_policy_bytype[walk->cur_type],
887					       struct xfrm_policy, bytype);
888		}
889		list_for_each_entry_from(pol, &xfrm_policy_bytype[walk->cur_type], bytype) {
890			if (pol->dead)
891				continue;
892			if (last) {
893				error = func(last, xfrm_policy_id2dir(last->index),
894					     walk->count, data);
895				if (error) {
896					xfrm_pol_hold(last);
897					walk->policy = last;
898					goto out;
899				}
900			}
901			last = pol;
902			walk->count++;
903		}
904		pol = NULL;
905	}
906	if (walk->count == 0) {
907		error = -ENOENT;
908		goto out;
909	}
910	if (last)
911		error = func(last, xfrm_policy_id2dir(last->index), 0, data);
912out:
913	read_unlock_bh(&xfrm_policy_lock);
914	if (old != NULL)
915		xfrm_pol_put(old);
916	return error;
917}
918EXPORT_SYMBOL(xfrm_policy_walk);
919
920/*
921 * Find policy to apply to this flow.
922 *
923 * Returns 0 if policy found, else an -errno.
924 */
925static int xfrm_policy_match(struct xfrm_policy *pol, struct flowi *fl,
926			     u8 type, u16 family, int dir)
927{
928	struct xfrm_selector *sel = &pol->selector;
929	int match, ret = -ESRCH;
930
931	if (pol->family != family ||
932	    pol->type != type)
933		return ret;
934
935	match = xfrm_selector_match(sel, fl, family);
936	if (match)
937		ret = security_xfrm_policy_lookup(pol->security, fl->secid,
938						  dir);
939
940	return ret;
941}
942
943static struct xfrm_policy *xfrm_policy_lookup_bytype(u8 type, struct flowi *fl,
944						     u16 family, u8 dir)
945{
946	int err;
947	struct xfrm_policy *pol, *ret;
948	xfrm_address_t *daddr, *saddr;
949	struct hlist_node *entry;
950	struct hlist_head *chain;
951	u32 priority = ~0U;
952
953	daddr = xfrm_flowi_daddr(fl, family);
954	saddr = xfrm_flowi_saddr(fl, family);
955	if (unlikely(!daddr || !saddr))
956		return NULL;
957
958	read_lock_bh(&xfrm_policy_lock);
959	chain = policy_hash_direct(daddr, saddr, family, dir);
960	ret = NULL;
961	hlist_for_each_entry(pol, entry, chain, bydst) {
962		err = xfrm_policy_match(pol, fl, type, family, dir);
963		if (err) {
964			if (err == -ESRCH)
965				continue;
966			else {
967				ret = ERR_PTR(err);
968				goto fail;
969			}
970		} else {
971			ret = pol;
972			priority = ret->priority;
973			break;
974		}
975	}
976	chain = &xfrm_policy_inexact[dir];
977	hlist_for_each_entry(pol, entry, chain, bydst) {
978		err = xfrm_policy_match(pol, fl, type, family, dir);
979		if (err) {
980			if (err == -ESRCH)
981				continue;
982			else {
983				ret = ERR_PTR(err);
984				goto fail;
985			}
986		} else if (pol->priority < priority) {
987			ret = pol;
988			break;
989		}
990	}
991	if (ret)
992		xfrm_pol_hold(ret);
993fail:
994	read_unlock_bh(&xfrm_policy_lock);
995
996	return ret;
997}
998
999static int xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir,
1000			       void **objp, atomic_t **obj_refp)
1001{
1002	struct xfrm_policy *pol;
1003	int err = 0;
1004
1005#ifdef CONFIG_XFRM_SUB_POLICY
1006	pol = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_SUB, fl, family, dir);
1007	if (IS_ERR(pol)) {
1008		err = PTR_ERR(pol);
1009		pol = NULL;
1010	}
1011	if (pol || err)
1012		goto end;
1013#endif
1014	pol = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN, fl, family, dir);
1015	if (IS_ERR(pol)) {
1016		err = PTR_ERR(pol);
1017		pol = NULL;
1018	}
1019#ifdef CONFIG_XFRM_SUB_POLICY
1020end:
1021#endif
1022	if ((*objp = (void *) pol) != NULL)
1023		*obj_refp = &pol->refcnt;
1024	return err;
1025}
1026
1027static inline int policy_to_flow_dir(int dir)
1028{
1029	if (XFRM_POLICY_IN == FLOW_DIR_IN &&
1030	    XFRM_POLICY_OUT == FLOW_DIR_OUT &&
1031	    XFRM_POLICY_FWD == FLOW_DIR_FWD)
1032		return dir;
1033	switch (dir) {
1034	default:
1035	case XFRM_POLICY_IN:
1036		return FLOW_DIR_IN;
1037	case XFRM_POLICY_OUT:
1038		return FLOW_DIR_OUT;
1039	case XFRM_POLICY_FWD:
1040		return FLOW_DIR_FWD;
1041	}
1042}
1043
1044static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struct flowi *fl)
1045{
1046	struct xfrm_policy *pol;
1047
1048	read_lock_bh(&xfrm_policy_lock);
1049	if ((pol = sk->sk_policy[dir]) != NULL) {
1050		int match = xfrm_selector_match(&pol->selector, fl,
1051						sk->sk_family);
1052		int err = 0;
1053
1054		if (match) {
1055			err = security_xfrm_policy_lookup(pol->security,
1056						      fl->secid,
1057						      policy_to_flow_dir(dir));
1058			if (!err)
1059				xfrm_pol_hold(pol);
1060			else if (err == -ESRCH)
1061				pol = NULL;
1062			else
1063				pol = ERR_PTR(err);
1064		} else
1065			pol = NULL;
1066	}
1067	read_unlock_bh(&xfrm_policy_lock);
1068	return pol;
1069}
1070
1071static void __xfrm_policy_link(struct xfrm_policy *pol, int dir)
1072{
1073	struct hlist_head *chain = policy_hash_bysel(&pol->selector,
1074						     pol->family, dir);
1075
1076	hlist_add_head(&pol->bydst, chain);
1077	hlist_add_head(&pol->byidx, xfrm_policy_byidx+idx_hash(pol->index));
1078	xfrm_policy_count[dir]++;
1079	xfrm_pol_hold(pol);
1080
1081	if (xfrm_bydst_should_resize(dir, NULL))
1082		schedule_work(&xfrm_hash_work);
1083}
1084
1085static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
1086						int dir)
1087{
1088	if (hlist_unhashed(&pol->bydst))
1089		return NULL;
1090
1091	hlist_del(&pol->bydst);
1092	hlist_del(&pol->byidx);
1093	xfrm_policy_count[dir]--;
1094
1095	return pol;
1096}
1097
1098int xfrm_policy_delete(struct xfrm_policy *pol, int dir)
1099{
1100	write_lock_bh(&xfrm_policy_lock);
1101	pol = __xfrm_policy_unlink(pol, dir);
1102	write_unlock_bh(&xfrm_policy_lock);
1103	if (pol) {
1104		if (dir < XFRM_POLICY_MAX)
1105			atomic_inc(&flow_cache_genid);
1106		xfrm_policy_kill(pol);
1107		return 0;
1108	}
1109	return -ENOENT;
1110}
1111EXPORT_SYMBOL(xfrm_policy_delete);
1112
1113int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol)
1114{
1115	struct xfrm_policy *old_pol;
1116
1117#ifdef CONFIG_XFRM_SUB_POLICY
1118	if (pol && pol->type != XFRM_POLICY_TYPE_MAIN)
1119		return -EINVAL;
1120#endif
1121
1122	write_lock_bh(&xfrm_policy_lock);
1123	old_pol = sk->sk_policy[dir];
1124	sk->sk_policy[dir] = pol;
1125	if (pol) {
1126		pol->curlft.add_time = get_seconds();
1127		pol->index = xfrm_gen_index(pol->type, XFRM_POLICY_MAX+dir);
1128		__xfrm_policy_link(pol, XFRM_POLICY_MAX+dir);
1129	}
1130	if (old_pol)
1131		__xfrm_policy_unlink(old_pol, XFRM_POLICY_MAX+dir);
1132	write_unlock_bh(&xfrm_policy_lock);
1133
1134	if (old_pol) {
1135		xfrm_policy_kill(old_pol);
1136	}
1137	return 0;
1138}
1139
1140static struct xfrm_policy *clone_policy(struct xfrm_policy *old, int dir)
1141{
1142	struct xfrm_policy *newp = xfrm_policy_alloc(GFP_ATOMIC);
1143
1144	if (newp) {
1145		newp->selector = old->selector;
1146		if (security_xfrm_policy_clone(old->security,
1147					       &newp->security)) {
1148			kfree(newp);
1149			return NULL;  /* ENOMEM */
1150		}
1151		newp->lft = old->lft;
1152		newp->curlft = old->curlft;
1153		newp->action = old->action;
1154		newp->flags = old->flags;
1155		newp->xfrm_nr = old->xfrm_nr;
1156		newp->index = old->index;
1157		newp->type = old->type;
1158		memcpy(newp->xfrm_vec, old->xfrm_vec,
1159		       newp->xfrm_nr*sizeof(struct xfrm_tmpl));
1160		write_lock_bh(&xfrm_policy_lock);
1161		__xfrm_policy_link(newp, XFRM_POLICY_MAX+dir);
1162		write_unlock_bh(&xfrm_policy_lock);
1163		xfrm_pol_put(newp);
1164	}
1165	return newp;
1166}
1167
1168int __xfrm_sk_clone_policy(struct sock *sk)
1169{
1170	struct xfrm_policy *p0 = sk->sk_policy[0],
1171			   *p1 = sk->sk_policy[1];
1172
1173	sk->sk_policy[0] = sk->sk_policy[1] = NULL;
1174	if (p0 && (sk->sk_policy[0] = clone_policy(p0, 0)) == NULL)
1175		return -ENOMEM;
1176	if (p1 && (sk->sk_policy[1] = clone_policy(p1, 1)) == NULL)
1177		return -ENOMEM;
1178	return 0;
1179}
1180
1181static int
1182xfrm_get_saddr(xfrm_address_t *local, xfrm_address_t *remote,
1183	       unsigned short family)
1184{
1185	int err;
1186	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1187
1188	if (unlikely(afinfo == NULL))
1189		return -EINVAL;
1190	err = afinfo->get_saddr(local, remote);
1191	xfrm_policy_put_afinfo(afinfo);
1192	return err;
1193}
1194
1195/* Resolve list of templates for the flow, given policy. */
1196
1197static int
1198xfrm_tmpl_resolve_one(struct xfrm_policy *policy, struct flowi *fl,
1199		      struct xfrm_state **xfrm,
1200		      unsigned short family)
1201{
1202	int nx;
1203	int i, error;
1204	xfrm_address_t *daddr = xfrm_flowi_daddr(fl, family);
1205	xfrm_address_t *saddr = xfrm_flowi_saddr(fl, family);
1206	xfrm_address_t tmp;
1207
1208	for (nx=0, i = 0; i < policy->xfrm_nr; i++) {
1209		struct xfrm_state *x;
1210		xfrm_address_t *remote = daddr;
1211		xfrm_address_t *local  = saddr;
1212		struct xfrm_tmpl *tmpl = &policy->xfrm_vec[i];
1213
1214		if (tmpl->mode == XFRM_MODE_TUNNEL ||
1215		    tmpl->mode == XFRM_MODE_BEET) {
1216			remote = &tmpl->id.daddr;
1217			local = &tmpl->saddr;
1218			family = tmpl->encap_family;
1219			if (xfrm_addr_any(local, family)) {
1220				error = xfrm_get_saddr(&tmp, remote, family);
1221				if (error)
1222					goto fail;
1223				local = &tmp;
1224			}
1225		}
1226
1227		x = xfrm_state_find(remote, local, fl, tmpl, policy, &error, family);
1228
1229		if (x && x->km.state == XFRM_STATE_VALID) {
1230			xfrm[nx++] = x;
1231			daddr = remote;
1232			saddr = local;
1233			continue;
1234		}
1235		if (x) {
1236			error = (x->km.state == XFRM_STATE_ERROR ?
1237				 -EINVAL : -EAGAIN);
1238			xfrm_state_put(x);
1239		}
1240
1241		if (!tmpl->optional)
1242			goto fail;
1243	}
1244	return nx;
1245
1246fail:
1247	for (nx--; nx>=0; nx--)
1248		xfrm_state_put(xfrm[nx]);
1249	return error;
1250}
1251
1252static int
1253xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, struct flowi *fl,
1254		  struct xfrm_state **xfrm,
1255		  unsigned short family)
1256{
1257	struct xfrm_state *tp[XFRM_MAX_DEPTH];
1258	struct xfrm_state **tpp = (npols > 1) ? tp : xfrm;
1259	int cnx = 0;
1260	int error;
1261	int ret;
1262	int i;
1263
1264	for (i = 0; i < npols; i++) {
1265		if (cnx + pols[i]->xfrm_nr >= XFRM_MAX_DEPTH) {
1266			error = -ENOBUFS;
1267			goto fail;
1268		}
1269
1270		ret = xfrm_tmpl_resolve_one(pols[i], fl, &tpp[cnx], family);
1271		if (ret < 0) {
1272			error = ret;
1273			goto fail;
1274		} else
1275			cnx += ret;
1276	}
1277
1278	/* found states are sorted for outbound processing */
1279	if (npols > 1)
1280		xfrm_state_sort(xfrm, tpp, cnx, family);
1281
1282	return cnx;
1283
1284 fail:
1285	for (cnx--; cnx>=0; cnx--)
1286		xfrm_state_put(tpp[cnx]);
1287	return error;
1288
1289}
1290
1291/* Check that the bundle accepts the flow and its components are
1292 * still valid.
1293 */
1294
1295static struct dst_entry *
1296xfrm_find_bundle(struct flowi *fl, struct xfrm_policy *policy, unsigned short family)
1297{
1298	struct dst_entry *x;
1299	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1300	if (unlikely(afinfo == NULL))
1301		return ERR_PTR(-EINVAL);
1302	x = afinfo->find_bundle(fl, policy);
1303	xfrm_policy_put_afinfo(afinfo);
1304	return x;
1305}
1306
1307static inline int xfrm_get_tos(struct flowi *fl, int family)
1308{
1309	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1310	int tos;
1311
1312	if (!afinfo)
1313		return -EINVAL;
1314
1315	tos = afinfo->get_tos(fl);
1316
1317	xfrm_policy_put_afinfo(afinfo);
1318
1319	return tos;
1320}
1321
1322static inline struct xfrm_dst *xfrm_alloc_dst(int family)
1323{
1324	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1325	struct xfrm_dst *xdst;
1326
1327	if (!afinfo)
1328		return ERR_PTR(-EINVAL);
1329
1330	xdst = dst_alloc(afinfo->dst_ops) ?: ERR_PTR(-ENOBUFS);
1331
1332	xfrm_policy_put_afinfo(afinfo);
1333
1334	return xdst;
1335}
1336
1337static inline int xfrm_init_path(struct xfrm_dst *path, struct dst_entry *dst,
1338				 int nfheader_len)
1339{
1340	struct xfrm_policy_afinfo *afinfo =
1341		xfrm_policy_get_afinfo(dst->ops->family);
1342	int err;
1343
1344	if (!afinfo)
1345		return -EINVAL;
1346
1347	err = afinfo->init_path(path, dst, nfheader_len);
1348
1349	xfrm_policy_put_afinfo(afinfo);
1350
1351	return err;
1352}
1353
1354static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev)
1355{
1356	struct xfrm_policy_afinfo *afinfo =
1357		xfrm_policy_get_afinfo(xdst->u.dst.ops->family);
1358	int err;
1359
1360	if (!afinfo)
1361		return -EINVAL;
1362
1363	err = afinfo->fill_dst(xdst, dev);
1364
1365	xfrm_policy_put_afinfo(afinfo);
1366
1367	return err;
1368}
1369
1370/* Allocate chain of dst_entry's, attach known xfrm's, calculate
1371 * all the metrics... Shortly, bundle a bundle.
1372 */
1373
1374static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
1375					    struct xfrm_state **xfrm, int nx,
1376					    struct flowi *fl,
1377					    struct dst_entry *dst)
1378{
1379	unsigned long now = jiffies;
1380	struct net_device *dev;
1381	struct dst_entry *dst_prev = NULL;
1382	struct dst_entry *dst0 = NULL;
1383	int i = 0;
1384	int err;
1385	int header_len = 0;
1386	int nfheader_len = 0;
1387	int trailer_len = 0;
1388	int tos;
1389	int family = policy->selector.family;
1390	xfrm_address_t saddr, daddr;
1391
1392	xfrm_flowi_addr_get(fl, &saddr, &daddr, family);
1393
1394	tos = xfrm_get_tos(fl, family);
1395	err = tos;
1396	if (tos < 0)
1397		goto put_states;
1398
1399	dst_hold(dst);
1400
1401	for (; i < nx; i++) {
1402		struct xfrm_dst *xdst = xfrm_alloc_dst(family);
1403		struct dst_entry *dst1 = &xdst->u.dst;
1404
1405		err = PTR_ERR(xdst);
1406		if (IS_ERR(xdst)) {
1407			dst_release(dst);
1408			goto put_states;
1409		}
1410
1411		if (!dst_prev)
1412			dst0 = dst1;
1413		else {
1414			dst_prev->child = dst_clone(dst1);
1415			dst1->flags |= DST_NOHASH;
1416		}
1417
1418		xdst->route = dst;
1419		memcpy(&dst1->metrics, &dst->metrics, sizeof(dst->metrics));
1420
1421		if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) {
1422			family = xfrm[i]->props.family;
1423			dst = xfrm_dst_lookup(xfrm[i], tos, &saddr, &daddr,
1424					      family);
1425			err = PTR_ERR(dst);
1426			if (IS_ERR(dst))
1427				goto put_states;
1428		} else
1429			dst_hold(dst);
1430
1431		dst1->xfrm = xfrm[i];
1432		xdst->genid = xfrm[i]->genid;
1433
1434		dst1->obsolete = -1;
1435		dst1->flags |= DST_HOST;
1436		dst1->lastuse = now;
1437
1438		dst1->input = dst_discard;
1439		dst1->output = xfrm[i]->outer_mode->afinfo->output;
1440
1441		dst1->next = dst_prev;
1442		dst_prev = dst1;
1443
1444		header_len += xfrm[i]->props.header_len;
1445		if (xfrm[i]->type->flags & XFRM_TYPE_NON_FRAGMENT)
1446			nfheader_len += xfrm[i]->props.header_len;
1447		trailer_len += xfrm[i]->props.trailer_len;
1448	}
1449
1450	dst_prev->child = dst;
1451	dst0->path = dst;
1452
1453	err = -ENODEV;
1454	dev = dst->dev;
1455	if (!dev)
1456		goto free_dst;
1457
1458	/* Copy neighbout for reachability confirmation */
1459	dst0->neighbour = neigh_clone(dst->neighbour);
1460
1461	xfrm_init_path((struct xfrm_dst *)dst0, dst, nfheader_len);
1462	xfrm_init_pmtu(dst_prev);
1463
1464	for (dst_prev = dst0; dst_prev != dst; dst_prev = dst_prev->child) {
1465		struct xfrm_dst *xdst = (struct xfrm_dst *)dst_prev;
1466
1467		err = xfrm_fill_dst(xdst, dev);
1468		if (err)
1469			goto free_dst;
1470
1471		dst_prev->header_len = header_len;
1472		dst_prev->trailer_len = trailer_len;
1473		header_len -= xdst->u.dst.xfrm->props.header_len;
1474		trailer_len -= xdst->u.dst.xfrm->props.trailer_len;
1475	}
1476
1477out:
1478	return dst0;
1479
1480put_states:
1481	for (; i < nx; i++)
1482		xfrm_state_put(xfrm[i]);
1483free_dst:
1484	if (dst0)
1485		dst_free(dst0);
1486	dst0 = ERR_PTR(err);
1487	goto out;
1488}
1489
1490static int inline
1491xfrm_dst_alloc_copy(void **target, void *src, int size)
1492{
1493	if (!*target) {
1494		*target = kmalloc(size, GFP_ATOMIC);
1495		if (!*target)
1496			return -ENOMEM;
1497	}
1498	memcpy(*target, src, size);
1499	return 0;
1500}
1501
1502static int inline
1503xfrm_dst_update_parent(struct dst_entry *dst, struct xfrm_selector *sel)
1504{
1505#ifdef CONFIG_XFRM_SUB_POLICY
1506	struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
1507	return xfrm_dst_alloc_copy((void **)&(xdst->partner),
1508				   sel, sizeof(*sel));
1509#else
1510	return 0;
1511#endif
1512}
1513
1514static int inline
1515xfrm_dst_update_origin(struct dst_entry *dst, struct flowi *fl)
1516{
1517#ifdef CONFIG_XFRM_SUB_POLICY
1518	struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
1519	return xfrm_dst_alloc_copy((void **)&(xdst->origin), fl, sizeof(*fl));
1520#else
1521	return 0;
1522#endif
1523}
1524
1525static int stale_bundle(struct dst_entry *dst);
1526
1527/* Main function: finds/creates a bundle for given flow.
1528 *
1529 * At the moment we eat a raw IP route. Mostly to speed up lookups
1530 * on interfaces with disabled IPsec.
1531 */
1532int __xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl,
1533		  struct sock *sk, int flags)
1534{
1535	struct xfrm_policy *policy;
1536	struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
1537	int npols;
1538	int pol_dead;
1539	int xfrm_nr;
1540	int pi;
1541	struct xfrm_state *xfrm[XFRM_MAX_DEPTH];
1542	struct dst_entry *dst, *dst_orig = *dst_p;
1543	int nx = 0;
1544	int err;
1545	u32 genid;
1546	u16 family;
1547	u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT);
1548
1549restart:
1550	genid = atomic_read(&flow_cache_genid);
1551	policy = NULL;
1552	for (pi = 0; pi < ARRAY_SIZE(pols); pi++)
1553		pols[pi] = NULL;
1554	npols = 0;
1555	pol_dead = 0;
1556	xfrm_nr = 0;
1557
1558	if (sk && sk->sk_policy[XFRM_POLICY_OUT]) {
1559		policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl);
1560		err = PTR_ERR(policy);
1561		if (IS_ERR(policy)) {
1562			XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLERROR);
1563			goto dropdst;
1564		}
1565	}
1566
1567	if (!policy) {
1568		/* To accelerate a bit...  */
1569		if ((dst_orig->flags & DST_NOXFRM) ||
1570		    !xfrm_policy_count[XFRM_POLICY_OUT])
1571			goto nopol;
1572
1573		policy = flow_cache_lookup(fl, dst_orig->ops->family,
1574					   dir, xfrm_policy_lookup);
1575		err = PTR_ERR(policy);
1576		if (IS_ERR(policy)) {
1577			XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLERROR);
1578			goto dropdst;
1579		}
1580	}
1581
1582	if (!policy)
1583		goto nopol;
1584
1585	family = dst_orig->ops->family;
1586	pols[0] = policy;
1587	npols ++;
1588	xfrm_nr += pols[0]->xfrm_nr;
1589
1590	err = -ENOENT;
1591	if ((flags & XFRM_LOOKUP_ICMP) && !(policy->flags & XFRM_POLICY_ICMP))
1592		goto error;
1593
1594	policy->curlft.use_time = get_seconds();
1595
1596	switch (policy->action) {
1597	default:
1598	case XFRM_POLICY_BLOCK:
1599		/* Prohibit the flow */
1600		XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLBLOCK);
1601		err = -EPERM;
1602		goto error;
1603
1604	case XFRM_POLICY_ALLOW:
1605#ifndef CONFIG_XFRM_SUB_POLICY
1606		if (policy->xfrm_nr == 0) {
1607			/* Flow passes not transformed. */
1608			xfrm_pol_put(policy);
1609			return 0;
1610		}
1611#endif
1612
1613		/* Try to find matching bundle.
1614		 *
1615		 * LATER: help from flow cache. It is optional, this
1616		 * is required only for output policy.
1617		 */
1618		dst = xfrm_find_bundle(fl, policy, family);
1619		if (IS_ERR(dst)) {
1620			XFRM_INC_STATS(LINUX_MIB_XFRMOUTBUNDLECHECKERROR);
1621			err = PTR_ERR(dst);
1622			goto error;
1623		}
1624
1625		if (dst)
1626			break;
1627
1628#ifdef CONFIG_XFRM_SUB_POLICY
1629		if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) {
1630			pols[1] = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN,
1631							    fl, family,
1632							    XFRM_POLICY_OUT);
1633			if (pols[1]) {
1634				if (IS_ERR(pols[1])) {
1635					XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLERROR);
1636					err = PTR_ERR(pols[1]);
1637					goto error;
1638				}
1639				if (pols[1]->action == XFRM_POLICY_BLOCK) {
1640					XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLBLOCK);
1641					err = -EPERM;
1642					goto error;
1643				}
1644				npols ++;
1645				xfrm_nr += pols[1]->xfrm_nr;
1646			}
1647		}
1648
1649		/*
1650		 * Because neither flowi nor bundle information knows about
1651		 * transformation template size. On more than one policy usage
1652		 * we can realize whether all of them is bypass or not after
1653		 * they are searched. See above not-transformed bypass
1654		 * is surrounded by non-sub policy configuration, too.
1655		 */
1656		if (xfrm_nr == 0) {
1657			/* Flow passes not transformed. */
1658			xfrm_pols_put(pols, npols);
1659			return 0;
1660		}
1661
1662#endif
1663		nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family);
1664
1665		if (unlikely(nx<0)) {
1666			err = nx;
1667			if (err == -EAGAIN && sysctl_xfrm_larval_drop) {
1668				/* EREMOTE tells the caller to generate
1669				 * a one-shot blackhole route.
1670				 */
1671				XFRM_INC_STATS(LINUX_MIB_XFRMOUTNOSTATES);
1672				xfrm_pol_put(policy);
1673				return -EREMOTE;
1674			}
1675			if (err == -EAGAIN && (flags & XFRM_LOOKUP_WAIT)) {
1676				DECLARE_WAITQUEUE(wait, current);
1677
1678				add_wait_queue(&km_waitq, &wait);
1679				set_current_state(TASK_INTERRUPTIBLE);
1680				schedule();
1681				set_current_state(TASK_RUNNING);
1682				remove_wait_queue(&km_waitq, &wait);
1683
1684				nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family);
1685
1686				if (nx == -EAGAIN && signal_pending(current)) {
1687					XFRM_INC_STATS(LINUX_MIB_XFRMOUTNOSTATES);
1688					err = -ERESTART;
1689					goto error;
1690				}
1691				if (nx == -EAGAIN ||
1692				    genid != atomic_read(&flow_cache_genid)) {
1693					xfrm_pols_put(pols, npols);
1694					goto restart;
1695				}
1696				err = nx;
1697			}
1698			if (err < 0) {
1699				XFRM_INC_STATS(LINUX_MIB_XFRMOUTNOSTATES);
1700				goto error;
1701			}
1702		}
1703		if (nx == 0) {
1704			/* Flow passes not transformed. */
1705			xfrm_pols_put(pols, npols);
1706			return 0;
1707		}
1708
1709		dst = xfrm_bundle_create(policy, xfrm, nx, fl, dst_orig);
1710		err = PTR_ERR(dst);
1711		if (IS_ERR(dst)) {
1712			XFRM_INC_STATS(LINUX_MIB_XFRMOUTBUNDLEGENERROR);
1713			goto error;
1714		}
1715
1716		for (pi = 0; pi < npols; pi++) {
1717			read_lock_bh(&pols[pi]->lock);
1718			pol_dead |= pols[pi]->dead;
1719			read_unlock_bh(&pols[pi]->lock);
1720		}
1721
1722		write_lock_bh(&policy->lock);
1723		if (unlikely(pol_dead || stale_bundle(dst))) {
1724			/* Wow! While we worked on resolving, this
1725			 * policy has gone. Retry. It is not paranoia,
1726			 * we just cannot enlist new bundle to dead object.
1727			 * We can't enlist stable bundles either.
1728			 */
1729			write_unlock_bh(&policy->lock);
1730			if (dst)
1731				dst_free(dst);
1732
1733			if (pol_dead)
1734				XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLDEAD);
1735			else
1736				XFRM_INC_STATS(LINUX_MIB_XFRMOUTBUNDLECHECKERROR);
1737			err = -EHOSTUNREACH;
1738			goto error;
1739		}
1740
1741		if (npols > 1)
1742			err = xfrm_dst_update_parent(dst, &pols[1]->selector);
1743		else
1744			err = xfrm_dst_update_origin(dst, fl);
1745		if (unlikely(err)) {
1746			write_unlock_bh(&policy->lock);
1747			if (dst)
1748				dst_free(dst);
1749			XFRM_INC_STATS(LINUX_MIB_XFRMOUTBUNDLECHECKERROR);
1750			goto error;
1751		}
1752
1753		dst->next = policy->bundles;
1754		policy->bundles = dst;
1755		dst_hold(dst);
1756		write_unlock_bh(&policy->lock);
1757	}
1758	*dst_p = dst;
1759	dst_release(dst_orig);
1760	xfrm_pols_put(pols, npols);
1761	return 0;
1762
1763error:
1764	xfrm_pols_put(pols, npols);
1765dropdst:
1766	dst_release(dst_orig);
1767	*dst_p = NULL;
1768	return err;
1769
1770nopol:
1771	err = -ENOENT;
1772	if (flags & XFRM_LOOKUP_ICMP)
1773		goto dropdst;
1774	return 0;
1775}
1776EXPORT_SYMBOL(__xfrm_lookup);
1777
1778int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl,
1779		struct sock *sk, int flags)
1780{
1781	int err = __xfrm_lookup(dst_p, fl, sk, flags);
1782
1783	if (err == -EREMOTE) {
1784		dst_release(*dst_p);
1785		*dst_p = NULL;
1786		err = -EAGAIN;
1787	}
1788
1789	return err;
1790}
1791EXPORT_SYMBOL(xfrm_lookup);
1792
1793static inline int
1794xfrm_secpath_reject(int idx, struct sk_buff *skb, struct flowi *fl)
1795{
1796	struct xfrm_state *x;
1797
1798	if (!skb->sp || idx < 0 || idx >= skb->sp->len)
1799		return 0;
1800	x = skb->sp->xvec[idx];
1801	if (!x->type->reject)
1802		return 0;
1803	return x->type->reject(x, skb, fl);
1804}
1805
1806/* When skb is transformed back to its "native" form, we have to
1807 * check policy restrictions. At the moment we make this in maximally
1808 * stupid way. Shame on me. :-) Of course, connected sockets must
1809 * have policy cached at them.
1810 */
1811
1812static inline int
1813xfrm_state_ok(struct xfrm_tmpl *tmpl, struct xfrm_state *x,
1814	      unsigned short family)
1815{
1816	if (xfrm_state_kern(x))
1817		return tmpl->optional && !xfrm_state_addr_cmp(tmpl, x, tmpl->encap_family);
1818	return	x->id.proto == tmpl->id.proto &&
1819		(x->id.spi == tmpl->id.spi || !tmpl->id.spi) &&
1820		(x->props.reqid == tmpl->reqid || !tmpl->reqid) &&
1821		x->props.mode == tmpl->mode &&
1822		(tmpl->allalgs || (tmpl->aalgos & (1<<x->props.aalgo)) ||
1823		 !(xfrm_id_proto_match(tmpl->id.proto, IPSEC_PROTO_ANY))) &&
1824		!(x->props.mode != XFRM_MODE_TRANSPORT &&
1825		  xfrm_state_addr_cmp(tmpl, x, family));
1826}
1827
1828/*
1829 * 0 or more than 0 is returned when validation is succeeded (either bypass
1830 * because of optional transport mode, or next index of the mathced secpath
1831 * state with the template.
1832 * -1 is returned when no matching template is found.
1833 * Otherwise "-2 - errored_index" is returned.
1834 */
1835static inline int
1836xfrm_policy_ok(struct xfrm_tmpl *tmpl, struct sec_path *sp, int start,
1837	       unsigned short family)
1838{
1839	int idx = start;
1840
1841	if (tmpl->optional) {
1842		if (tmpl->mode == XFRM_MODE_TRANSPORT)
1843			return start;
1844	} else
1845		start = -1;
1846	for (; idx < sp->len; idx++) {
1847		if (xfrm_state_ok(tmpl, sp->xvec[idx], family))
1848			return ++idx;
1849		if (sp->xvec[idx]->props.mode != XFRM_MODE_TRANSPORT) {
1850			if (start == -1)
1851				start = -2-idx;
1852			break;
1853		}
1854	}
1855	return start;
1856}
1857
1858int __xfrm_decode_session(struct sk_buff *skb, struct flowi *fl,
1859			  unsigned int family, int reverse)
1860{
1861	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1862	int err;
1863
1864	if (unlikely(afinfo == NULL))
1865		return -EAFNOSUPPORT;
1866
1867	afinfo->decode_session(skb, fl, reverse);
1868	err = security_xfrm_decode_session(skb, &fl->secid);
1869	xfrm_policy_put_afinfo(afinfo);
1870	return err;
1871}
1872EXPORT_SYMBOL(__xfrm_decode_session);
1873
1874static inline int secpath_has_nontransport(struct sec_path *sp, int k, int *idxp)
1875{
1876	for (; k < sp->len; k++) {
1877		if (sp->xvec[k]->props.mode != XFRM_MODE_TRANSPORT) {
1878			*idxp = k;
1879			return 1;
1880		}
1881	}
1882
1883	return 0;
1884}
1885
1886int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
1887			unsigned short family)
1888{
1889	struct xfrm_policy *pol;
1890	struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
1891	int npols = 0;
1892	int xfrm_nr;
1893	int pi;
1894	int reverse;
1895	struct flowi fl;
1896	u8 fl_dir;
1897	int xerr_idx = -1;
1898
1899	reverse = dir & ~XFRM_POLICY_MASK;
1900	dir &= XFRM_POLICY_MASK;
1901	fl_dir = policy_to_flow_dir(dir);
1902
1903	if (__xfrm_decode_session(skb, &fl, family, reverse) < 0) {
1904		XFRM_INC_STATS(LINUX_MIB_XFRMINHDRERROR);
1905		return 0;
1906	}
1907
1908	nf_nat_decode_session(skb, &fl, family);
1909
1910	/* First, check used SA against their selectors. */
1911	if (skb->sp) {
1912		int i;
1913
1914		for (i=skb->sp->len-1; i>=0; i--) {
1915			struct xfrm_state *x = skb->sp->xvec[i];
1916			if (!xfrm_selector_match(&x->sel, &fl, family)) {
1917				XFRM_INC_STATS(LINUX_MIB_XFRMINSTATEMISMATCH);
1918				return 0;
1919			}
1920		}
1921	}
1922
1923	pol = NULL;
1924	if (sk && sk->sk_policy[dir]) {
1925		pol = xfrm_sk_policy_lookup(sk, dir, &fl);
1926		if (IS_ERR(pol)) {
1927			XFRM_INC_STATS(LINUX_MIB_XFRMINPOLERROR);
1928			return 0;
1929		}
1930	}
1931
1932	if (!pol)
1933		pol = flow_cache_lookup(&fl, family, fl_dir,
1934					xfrm_policy_lookup);
1935
1936	if (IS_ERR(pol)) {
1937		XFRM_INC_STATS(LINUX_MIB_XFRMINPOLERROR);
1938		return 0;
1939	}
1940
1941	if (!pol) {
1942		if (skb->sp && secpath_has_nontransport(skb->sp, 0, &xerr_idx)) {
1943			xfrm_secpath_reject(xerr_idx, skb, &fl);
1944			XFRM_INC_STATS(LINUX_MIB_XFRMINNOPOLS);
1945			return 0;
1946		}
1947		return 1;
1948	}
1949
1950	pol->curlft.use_time = get_seconds();
1951
1952	pols[0] = pol;
1953	npols ++;
1954#ifdef CONFIG_XFRM_SUB_POLICY
1955	if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) {
1956		pols[1] = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN,
1957						    &fl, family,
1958						    XFRM_POLICY_IN);
1959		if (pols[1]) {
1960			if (IS_ERR(pols[1])) {
1961				XFRM_INC_STATS(LINUX_MIB_XFRMINPOLERROR);
1962				return 0;
1963			}
1964			pols[1]->curlft.use_time = get_seconds();
1965			npols ++;
1966		}
1967	}
1968#endif
1969
1970	if (pol->action == XFRM_POLICY_ALLOW) {
1971		struct sec_path *sp;
1972		static struct sec_path dummy;
1973		struct xfrm_tmpl *tp[XFRM_MAX_DEPTH];
1974		struct xfrm_tmpl *stp[XFRM_MAX_DEPTH];
1975		struct xfrm_tmpl **tpp = tp;
1976		int ti = 0;
1977		int i, k;
1978
1979		if ((sp = skb->sp) == NULL)
1980			sp = &dummy;
1981
1982		for (pi = 0; pi < npols; pi++) {
1983			if (pols[pi] != pol &&
1984			    pols[pi]->action != XFRM_POLICY_ALLOW) {
1985				XFRM_INC_STATS(LINUX_MIB_XFRMINPOLBLOCK);
1986				goto reject;
1987			}
1988			if (ti + pols[pi]->xfrm_nr >= XFRM_MAX_DEPTH) {
1989				XFRM_INC_STATS(LINUX_MIB_XFRMINBUFFERERROR);
1990				goto reject_error;
1991			}
1992			for (i = 0; i < pols[pi]->xfrm_nr; i++)
1993				tpp[ti++] = &pols[pi]->xfrm_vec[i];
1994		}
1995		xfrm_nr = ti;
1996		if (npols > 1) {
1997			xfrm_tmpl_sort(stp, tpp, xfrm_nr, family);
1998			tpp = stp;
1999		}
2000
2001		/* For each tunnel xfrm, find the first matching tmpl.
2002		 * For each tmpl before that, find corresponding xfrm.
2003		 * Order is _important_. Later we will implement
2004		 * some barriers, but at the moment barriers
2005		 * are implied between each two transformations.
2006		 */
2007		for (i = xfrm_nr-1, k = 0; i >= 0; i--) {
2008			k = xfrm_policy_ok(tpp[i], sp, k, family);
2009			if (k < 0) {
2010				if (k < -1)
2011					/* "-2 - errored_index" returned */
2012					xerr_idx = -(2+k);
2013				XFRM_INC_STATS(LINUX_MIB_XFRMINTMPLMISMATCH);
2014				goto reject;
2015			}
2016		}
2017
2018		if (secpath_has_nontransport(sp, k, &xerr_idx)) {
2019			XFRM_INC_STATS(LINUX_MIB_XFRMINTMPLMISMATCH);
2020			goto reject;
2021		}
2022
2023		xfrm_pols_put(pols, npols);
2024		return 1;
2025	}
2026	XFRM_INC_STATS(LINUX_MIB_XFRMINPOLBLOCK);
2027
2028reject:
2029	xfrm_secpath_reject(xerr_idx, skb, &fl);
2030reject_error:
2031	xfrm_pols_put(pols, npols);
2032	return 0;
2033}
2034EXPORT_SYMBOL(__xfrm_policy_check);
2035
2036int __xfrm_route_forward(struct sk_buff *skb, unsigned short family)
2037{
2038	struct flowi fl;
2039
2040	if (xfrm_decode_session(skb, &fl, family) < 0) {
2041		/* XXX: we should have something like FWDHDRERROR here. */
2042		XFRM_INC_STATS(LINUX_MIB_XFRMINHDRERROR);
2043		return 0;
2044	}
2045
2046	return xfrm_lookup(&skb->dst, &fl, NULL, 0) == 0;
2047}
2048EXPORT_SYMBOL(__xfrm_route_forward);
2049
2050/* Optimize later using cookies and generation ids. */
2051
2052static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie)
2053{
2054	/* Code (such as __xfrm4_bundle_create()) sets dst->obsolete
2055	 * to "-1" to force all XFRM destinations to get validated by
2056	 * dst_ops->check on every use.  We do this because when a
2057	 * normal route referenced by an XFRM dst is obsoleted we do
2058	 * not go looking around for all parent referencing XFRM dsts
2059	 * so that we can invalidate them.  It is just too much work.
2060	 * Instead we make the checks here on every use.  For example:
2061	 *
2062	 *	XFRM dst A --> IPv4 dst X
2063	 *
2064	 * X is the "xdst->route" of A (X is also the "dst->path" of A
2065	 * in this example).  If X is marked obsolete, "A" will not
2066	 * notice.  That's what we are validating here via the
2067	 * stale_bundle() check.
2068	 *
2069	 * When a policy's bundle is pruned, we dst_free() the XFRM
2070	 * dst which causes it's ->obsolete field to be set to a
2071	 * positive non-zero integer.  If an XFRM dst has been pruned
2072	 * like this, we want to force a new route lookup.
2073	 */
2074	if (dst->obsolete < 0 && !stale_bundle(dst))
2075		return dst;
2076
2077	return NULL;
2078}
2079
2080static int stale_bundle(struct dst_entry *dst)
2081{
2082	return !xfrm_bundle_ok(NULL, (struct xfrm_dst *)dst, NULL, AF_UNSPEC, 0);
2083}
2084
2085void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev)
2086{
2087	while ((dst = dst->child) && dst->xfrm && dst->dev == dev) {
2088		dst->dev = dev_net(dev)->loopback_dev;
2089		dev_hold(dst->dev);
2090		dev_put(dev);
2091	}
2092}
2093EXPORT_SYMBOL(xfrm_dst_ifdown);
2094
2095static void xfrm_link_failure(struct sk_buff *skb)
2096{
2097	/* Impossible. Such dst must be popped before reaches point of failure. */
2098	return;
2099}
2100
2101static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst)
2102{
2103	if (dst) {
2104		if (dst->obsolete) {
2105			dst_release(dst);
2106			dst = NULL;
2107		}
2108	}
2109	return dst;
2110}
2111
2112static void prune_one_bundle(struct xfrm_policy *pol, int (*func)(struct dst_entry *), struct dst_entry **gc_list_p)
2113{
2114	struct dst_entry *dst, **dstp;
2115
2116	write_lock(&pol->lock);
2117	dstp = &pol->bundles;
2118	while ((dst=*dstp) != NULL) {
2119		if (func(dst)) {
2120			*dstp = dst->next;
2121			dst->next = *gc_list_p;
2122			*gc_list_p = dst;
2123		} else {
2124			dstp = &dst->next;
2125		}
2126	}
2127	write_unlock(&pol->lock);
2128}
2129
2130static void xfrm_prune_bundles(int (*func)(struct dst_entry *))
2131{
2132	struct dst_entry *gc_list = NULL;
2133	int dir;
2134
2135	read_lock_bh(&xfrm_policy_lock);
2136	for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
2137		struct xfrm_policy *pol;
2138		struct hlist_node *entry;
2139		struct hlist_head *table;
2140		int i;
2141
2142		hlist_for_each_entry(pol, entry,
2143				     &xfrm_policy_inexact[dir], bydst)
2144			prune_one_bundle(pol, func, &gc_list);
2145
2146		table = xfrm_policy_bydst[dir].table;
2147		for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) {
2148			hlist_for_each_entry(pol, entry, table + i, bydst)
2149				prune_one_bundle(pol, func, &gc_list);
2150		}
2151	}
2152	read_unlock_bh(&xfrm_policy_lock);
2153
2154	while (gc_list) {
2155		struct dst_entry *dst = gc_list;
2156		gc_list = dst->next;
2157		dst_free(dst);
2158	}
2159}
2160
2161static int unused_bundle(struct dst_entry *dst)
2162{
2163	return !atomic_read(&dst->__refcnt);
2164}
2165
2166static void __xfrm_garbage_collect(void)
2167{
2168	xfrm_prune_bundles(unused_bundle);
2169}
2170
2171static int xfrm_flush_bundles(void)
2172{
2173	xfrm_prune_bundles(stale_bundle);
2174	return 0;
2175}
2176
2177static void xfrm_init_pmtu(struct dst_entry *dst)
2178{
2179	do {
2180		struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
2181		u32 pmtu, route_mtu_cached;
2182
2183		pmtu = dst_mtu(dst->child);
2184		xdst->child_mtu_cached = pmtu;
2185
2186		pmtu = xfrm_state_mtu(dst->xfrm, pmtu);
2187
2188		route_mtu_cached = dst_mtu(xdst->route);
2189		xdst->route_mtu_cached = route_mtu_cached;
2190
2191		if (pmtu > route_mtu_cached)
2192			pmtu = route_mtu_cached;
2193
2194		dst->metrics[RTAX_MTU-1] = pmtu;
2195	} while ((dst = dst->next));
2196}
2197
2198/* Check that the bundle accepts the flow and its components are
2199 * still valid.
2200 */
2201
2202int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first,
2203		struct flowi *fl, int family, int strict)
2204{
2205	struct dst_entry *dst = &first->u.dst;
2206	struct xfrm_dst *last;
2207	u32 mtu;
2208
2209	if (!dst_check(dst->path, ((struct xfrm_dst *)dst)->path_cookie) ||
2210	    (dst->dev && !netif_running(dst->dev)))
2211		return 0;
2212#ifdef CONFIG_XFRM_SUB_POLICY
2213	if (fl) {
2214		if (first->origin && !flow_cache_uli_match(first->origin, fl))
2215			return 0;
2216		if (first->partner &&
2217		    !xfrm_selector_match(first->partner, fl, family))
2218			return 0;
2219	}
2220#endif
2221
2222	last = NULL;
2223
2224	do {
2225		struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
2226
2227		if (fl && !xfrm_selector_match(&dst->xfrm->sel, fl, family))
2228			return 0;
2229		if (fl && pol &&
2230		    !security_xfrm_state_pol_flow_match(dst->xfrm, pol, fl))
2231			return 0;
2232		if (dst->xfrm->km.state != XFRM_STATE_VALID)
2233			return 0;
2234		if (xdst->genid != dst->xfrm->genid)
2235			return 0;
2236
2237		if (strict && fl &&
2238		    !(dst->xfrm->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL) &&
2239		    !xfrm_state_addr_flow_check(dst->xfrm, fl, family))
2240			return 0;
2241
2242		mtu = dst_mtu(dst->child);
2243		if (xdst->child_mtu_cached != mtu) {
2244			last = xdst;
2245			xdst->child_mtu_cached = mtu;
2246		}
2247
2248		if (!dst_check(xdst->route, xdst->route_cookie))
2249			return 0;
2250		mtu = dst_mtu(xdst->route);
2251		if (xdst->route_mtu_cached != mtu) {
2252			last = xdst;
2253			xdst->route_mtu_cached = mtu;
2254		}
2255
2256		dst = dst->child;
2257	} while (dst->xfrm);
2258
2259	if (likely(!last))
2260		return 1;
2261
2262	mtu = last->child_mtu_cached;
2263	for (;;) {
2264		dst = &last->u.dst;
2265
2266		mtu = xfrm_state_mtu(dst->xfrm, mtu);
2267		if (mtu > last->route_mtu_cached)
2268			mtu = last->route_mtu_cached;
2269		dst->metrics[RTAX_MTU-1] = mtu;
2270
2271		if (last == first)
2272			break;
2273
2274		last = (struct xfrm_dst *)last->u.dst.next;
2275		last->child_mtu_cached = mtu;
2276	}
2277
2278	return 1;
2279}
2280
2281EXPORT_SYMBOL(xfrm_bundle_ok);
2282
2283int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)
2284{
2285	int err = 0;
2286	if (unlikely(afinfo == NULL))
2287		return -EINVAL;
2288	if (unlikely(afinfo->family >= NPROTO))
2289		return -EAFNOSUPPORT;
2290	write_lock_bh(&xfrm_policy_afinfo_lock);
2291	if (unlikely(xfrm_policy_afinfo[afinfo->family] != NULL))
2292		err = -ENOBUFS;
2293	else {
2294		struct dst_ops *dst_ops = afinfo->dst_ops;
2295		if (likely(dst_ops->kmem_cachep == NULL))
2296			dst_ops->kmem_cachep = xfrm_dst_cache;
2297		if (likely(dst_ops->check == NULL))
2298			dst_ops->check = xfrm_dst_check;
2299		if (likely(dst_ops->negative_advice == NULL))
2300			dst_ops->negative_advice = xfrm_negative_advice;
2301		if (likely(dst_ops->link_failure == NULL))
2302			dst_ops->link_failure = xfrm_link_failure;
2303		if (likely(afinfo->garbage_collect == NULL))
2304			afinfo->garbage_collect = __xfrm_garbage_collect;
2305		xfrm_policy_afinfo[afinfo->family] = afinfo;
2306	}
2307	write_unlock_bh(&xfrm_policy_afinfo_lock);
2308	return err;
2309}
2310EXPORT_SYMBOL(xfrm_policy_register_afinfo);
2311
2312int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo)
2313{
2314	int err = 0;
2315	if (unlikely(afinfo == NULL))
2316		return -EINVAL;
2317	if (unlikely(afinfo->family >= NPROTO))
2318		return -EAFNOSUPPORT;
2319	write_lock_bh(&xfrm_policy_afinfo_lock);
2320	if (likely(xfrm_policy_afinfo[afinfo->family] != NULL)) {
2321		if (unlikely(xfrm_policy_afinfo[afinfo->family] != afinfo))
2322			err = -EINVAL;
2323		else {
2324			struct dst_ops *dst_ops = afinfo->dst_ops;
2325			xfrm_policy_afinfo[afinfo->family] = NULL;
2326			dst_ops->kmem_cachep = NULL;
2327			dst_ops->check = NULL;
2328			dst_ops->negative_advice = NULL;
2329			dst_ops->link_failure = NULL;
2330			afinfo->garbage_collect = NULL;
2331		}
2332	}
2333	write_unlock_bh(&xfrm_policy_afinfo_lock);
2334	return err;
2335}
2336EXPORT_SYMBOL(xfrm_policy_unregister_afinfo);
2337
2338static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family)
2339{
2340	struct xfrm_policy_afinfo *afinfo;
2341	if (unlikely(family >= NPROTO))
2342		return NULL;
2343	read_lock(&xfrm_policy_afinfo_lock);
2344	afinfo = xfrm_policy_afinfo[family];
2345	if (unlikely(!afinfo))
2346		read_unlock(&xfrm_policy_afinfo_lock);
2347	return afinfo;
2348}
2349
2350static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo)
2351{
2352	read_unlock(&xfrm_policy_afinfo_lock);
2353}
2354
2355static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr)
2356{
2357	struct net_device *dev = ptr;
2358
2359	if (dev_net(dev) != &init_net)
2360		return NOTIFY_DONE;
2361
2362	switch (event) {
2363	case NETDEV_DOWN:
2364		xfrm_flush_bundles();
2365	}
2366	return NOTIFY_DONE;
2367}
2368
2369static struct notifier_block xfrm_dev_notifier = {
2370	xfrm_dev_event,
2371	NULL,
2372	0
2373};
2374
2375#ifdef CONFIG_XFRM_STATISTICS
2376static int __init xfrm_statistics_init(void)
2377{
2378	if (snmp_mib_init((void **)xfrm_statistics,
2379			  sizeof(struct linux_xfrm_mib)) < 0)
2380		return -ENOMEM;
2381	return 0;
2382}
2383#endif
2384
2385static void __init xfrm_policy_init(void)
2386{
2387	unsigned int hmask, sz;
2388	int dir;
2389
2390	xfrm_dst_cache = kmem_cache_create("xfrm_dst_cache",
2391					   sizeof(struct xfrm_dst),
2392					   0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
2393					   NULL);
2394
2395	hmask = 8 - 1;
2396	sz = (hmask+1) * sizeof(struct hlist_head);
2397
2398	xfrm_policy_byidx = xfrm_hash_alloc(sz);
2399	xfrm_idx_hmask = hmask;
2400	if (!xfrm_policy_byidx)
2401		panic("XFRM: failed to allocate byidx hash\n");
2402
2403	for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
2404		struct xfrm_policy_hash *htab;
2405
2406		INIT_HLIST_HEAD(&xfrm_policy_inexact[dir]);
2407
2408		htab = &xfrm_policy_bydst[dir];
2409		htab->table = xfrm_hash_alloc(sz);
2410		htab->hmask = hmask;
2411		if (!htab->table)
2412			panic("XFRM: failed to allocate bydst hash\n");
2413	}
2414
2415	for (dir = 0; dir < XFRM_POLICY_TYPE_MAX; dir++)
2416		INIT_LIST_HEAD(&xfrm_policy_bytype[dir]);
2417
2418	INIT_WORK(&xfrm_policy_gc_work, xfrm_policy_gc_task);
2419	register_netdevice_notifier(&xfrm_dev_notifier);
2420}
2421
2422void __init xfrm_init(void)
2423{
2424#ifdef CONFIG_XFRM_STATISTICS
2425	xfrm_statistics_init();
2426#endif
2427	xfrm_state_init();
2428	xfrm_policy_init();
2429	xfrm_input_init();
2430#ifdef CONFIG_XFRM_STATISTICS
2431	xfrm_proc_init();
2432#endif
2433}
2434
2435#ifdef CONFIG_AUDITSYSCALL
2436static void xfrm_audit_common_policyinfo(struct xfrm_policy *xp,
2437					 struct audit_buffer *audit_buf)
2438{
2439	struct xfrm_sec_ctx *ctx = xp->security;
2440	struct xfrm_selector *sel = &xp->selector;
2441
2442	if (ctx)
2443		audit_log_format(audit_buf, " sec_alg=%u sec_doi=%u sec_obj=%s",
2444				 ctx->ctx_alg, ctx->ctx_doi, ctx->ctx_str);
2445
2446	switch(sel->family) {
2447	case AF_INET:
2448		audit_log_format(audit_buf, " src=" NIPQUAD_FMT,
2449				 NIPQUAD(sel->saddr.a4));
2450		if (sel->prefixlen_s != 32)
2451			audit_log_format(audit_buf, " src_prefixlen=%d",
2452					 sel->prefixlen_s);
2453		audit_log_format(audit_buf, " dst=" NIPQUAD_FMT,
2454				 NIPQUAD(sel->daddr.a4));
2455		if (sel->prefixlen_d != 32)
2456			audit_log_format(audit_buf, " dst_prefixlen=%d",
2457					 sel->prefixlen_d);
2458		break;
2459	case AF_INET6:
2460		audit_log_format(audit_buf, " src=" NIP6_FMT,
2461				 NIP6(*(struct in6_addr *)sel->saddr.a6));
2462		if (sel->prefixlen_s != 128)
2463			audit_log_format(audit_buf, " src_prefixlen=%d",
2464					 sel->prefixlen_s);
2465		audit_log_format(audit_buf, " dst=" NIP6_FMT,
2466				 NIP6(*(struct in6_addr *)sel->daddr.a6));
2467		if (sel->prefixlen_d != 128)
2468			audit_log_format(audit_buf, " dst_prefixlen=%d",
2469					 sel->prefixlen_d);
2470		break;
2471	}
2472}
2473
2474void xfrm_audit_policy_add(struct xfrm_policy *xp, int result,
2475			   u32 auid, u32 secid)
2476{
2477	struct audit_buffer *audit_buf;
2478
2479	audit_buf = xfrm_audit_start("SPD-add");
2480	if (audit_buf == NULL)
2481		return;
2482	xfrm_audit_helper_usrinfo(auid, secid, audit_buf);
2483	audit_log_format(audit_buf, " res=%u", result);
2484	xfrm_audit_common_policyinfo(xp, audit_buf);
2485	audit_log_end(audit_buf);
2486}
2487EXPORT_SYMBOL_GPL(xfrm_audit_policy_add);
2488
2489void xfrm_audit_policy_delete(struct xfrm_policy *xp, int result,
2490			      u32 auid, u32 secid)
2491{
2492	struct audit_buffer *audit_buf;
2493
2494	audit_buf = xfrm_audit_start("SPD-delete");
2495	if (audit_buf == NULL)
2496		return;
2497	xfrm_audit_helper_usrinfo(auid, secid, audit_buf);
2498	audit_log_format(audit_buf, " res=%u", result);
2499	xfrm_audit_common_policyinfo(xp, audit_buf);
2500	audit_log_end(audit_buf);
2501}
2502EXPORT_SYMBOL_GPL(xfrm_audit_policy_delete);
2503#endif
2504
2505#ifdef CONFIG_XFRM_MIGRATE
2506static int xfrm_migrate_selector_match(struct xfrm_selector *sel_cmp,
2507				       struct xfrm_selector *sel_tgt)
2508{
2509	if (sel_cmp->proto == IPSEC_ULPROTO_ANY) {
2510		if (sel_tgt->family == sel_cmp->family &&
2511		    xfrm_addr_cmp(&sel_tgt->daddr, &sel_cmp->daddr,
2512				  sel_cmp->family) == 0 &&
2513		    xfrm_addr_cmp(&sel_tgt->saddr, &sel_cmp->saddr,
2514				  sel_cmp->family) == 0 &&
2515		    sel_tgt->prefixlen_d == sel_cmp->prefixlen_d &&
2516		    sel_tgt->prefixlen_s == sel_cmp->prefixlen_s) {
2517			return 1;
2518		}
2519	} else {
2520		if (memcmp(sel_tgt, sel_cmp, sizeof(*sel_tgt)) == 0) {
2521			return 1;
2522		}
2523	}
2524	return 0;
2525}
2526
2527static struct xfrm_policy * xfrm_migrate_policy_find(struct xfrm_selector *sel,
2528						     u8 dir, u8 type)
2529{
2530	struct xfrm_policy *pol, *ret = NULL;
2531	struct hlist_node *entry;
2532	struct hlist_head *chain;
2533	u32 priority = ~0U;
2534
2535	read_lock_bh(&xfrm_policy_lock);
2536	chain = policy_hash_direct(&sel->daddr, &sel->saddr, sel->family, dir);
2537	hlist_for_each_entry(pol, entry, chain, bydst) {
2538		if (xfrm_migrate_selector_match(sel, &pol->selector) &&
2539		    pol->type == type) {
2540			ret = pol;
2541			priority = ret->priority;
2542			break;
2543		}
2544	}
2545	chain = &xfrm_policy_inexact[dir];
2546	hlist_for_each_entry(pol, entry, chain, bydst) {
2547		if (xfrm_migrate_selector_match(sel, &pol->selector) &&
2548		    pol->type == type &&
2549		    pol->priority < priority) {
2550			ret = pol;
2551			break;
2552		}
2553	}
2554
2555	if (ret)
2556		xfrm_pol_hold(ret);
2557
2558	read_unlock_bh(&xfrm_policy_lock);
2559
2560	return ret;
2561}
2562
2563static int migrate_tmpl_match(struct xfrm_migrate *m, struct xfrm_tmpl *t)
2564{
2565	int match = 0;
2566
2567	if (t->mode == m->mode && t->id.proto == m->proto &&
2568	    (m->reqid == 0 || t->reqid == m->reqid)) {
2569		switch (t->mode) {
2570		case XFRM_MODE_TUNNEL:
2571		case XFRM_MODE_BEET:
2572			if (xfrm_addr_cmp(&t->id.daddr, &m->old_daddr,
2573					  m->old_family) == 0 &&
2574			    xfrm_addr_cmp(&t->saddr, &m->old_saddr,
2575					  m->old_family) == 0) {
2576				match = 1;
2577			}
2578			break;
2579		case XFRM_MODE_TRANSPORT:
2580			/* in case of transport mode, template does not store
2581			   any IP addresses, hence we just compare mode and
2582			   protocol */
2583			match = 1;
2584			break;
2585		default:
2586			break;
2587		}
2588	}
2589	return match;
2590}
2591
2592/* update endpoint address(es) of template(s) */
2593static int xfrm_policy_migrate(struct xfrm_policy *pol,
2594			       struct xfrm_migrate *m, int num_migrate)
2595{
2596	struct xfrm_migrate *mp;
2597	struct dst_entry *dst;
2598	int i, j, n = 0;
2599
2600	write_lock_bh(&pol->lock);
2601	if (unlikely(pol->dead)) {
2602		/* target policy has been deleted */
2603		write_unlock_bh(&pol->lock);
2604		return -ENOENT;
2605	}
2606
2607	for (i = 0; i < pol->xfrm_nr; i++) {
2608		for (j = 0, mp = m; j < num_migrate; j++, mp++) {
2609			if (!migrate_tmpl_match(mp, &pol->xfrm_vec[i]))
2610				continue;
2611			n++;
2612			if (pol->xfrm_vec[i].mode != XFRM_MODE_TUNNEL &&
2613			    pol->xfrm_vec[i].mode != XFRM_MODE_BEET)
2614				continue;
2615			/* update endpoints */
2616			memcpy(&pol->xfrm_vec[i].id.daddr, &mp->new_daddr,
2617			       sizeof(pol->xfrm_vec[i].id.daddr));
2618			memcpy(&pol->xfrm_vec[i].saddr, &mp->new_saddr,
2619			       sizeof(pol->xfrm_vec[i].saddr));
2620			pol->xfrm_vec[i].encap_family = mp->new_family;
2621			/* flush bundles */
2622			while ((dst = pol->bundles) != NULL) {
2623				pol->bundles = dst->next;
2624				dst_free(dst);
2625			}
2626		}
2627	}
2628
2629	write_unlock_bh(&pol->lock);
2630
2631	if (!n)
2632		return -ENODATA;
2633
2634	return 0;
2635}
2636
2637static int xfrm_migrate_check(struct xfrm_migrate *m, int num_migrate)
2638{
2639	int i, j;
2640
2641	if (num_migrate < 1 || num_migrate > XFRM_MAX_DEPTH)
2642		return -EINVAL;
2643
2644	for (i = 0; i < num_migrate; i++) {
2645		if ((xfrm_addr_cmp(&m[i].old_daddr, &m[i].new_daddr,
2646				   m[i].old_family) == 0) &&
2647		    (xfrm_addr_cmp(&m[i].old_saddr, &m[i].new_saddr,
2648				   m[i].old_family) == 0))
2649			return -EINVAL;
2650		if (xfrm_addr_any(&m[i].new_daddr, m[i].new_family) ||
2651		    xfrm_addr_any(&m[i].new_saddr, m[i].new_family))
2652			return -EINVAL;
2653
2654		/* check if there is any duplicated entry */
2655		for (j = i + 1; j < num_migrate; j++) {
2656			if (!memcmp(&m[i].old_daddr, &m[j].old_daddr,
2657				    sizeof(m[i].old_daddr)) &&
2658			    !memcmp(&m[i].old_saddr, &m[j].old_saddr,
2659				    sizeof(m[i].old_saddr)) &&
2660			    m[i].proto == m[j].proto &&
2661			    m[i].mode == m[j].mode &&
2662			    m[i].reqid == m[j].reqid &&
2663			    m[i].old_family == m[j].old_family)
2664				return -EINVAL;
2665		}
2666	}
2667
2668	return 0;
2669}
2670
2671int xfrm_migrate(struct xfrm_selector *sel, u8 dir, u8 type,
2672		 struct xfrm_migrate *m, int num_migrate)
2673{
2674	int i, err, nx_cur = 0, nx_new = 0;
2675	struct xfrm_policy *pol = NULL;
2676	struct xfrm_state *x, *xc;
2677	struct xfrm_state *x_cur[XFRM_MAX_DEPTH];
2678	struct xfrm_state *x_new[XFRM_MAX_DEPTH];
2679	struct xfrm_migrate *mp;
2680
2681	if ((err = xfrm_migrate_check(m, num_migrate)) < 0)
2682		goto out;
2683
2684	/* Stage 1 - find policy */
2685	if ((pol = xfrm_migrate_policy_find(sel, dir, type)) == NULL) {
2686		err = -ENOENT;
2687		goto out;
2688	}
2689
2690	/* Stage 2 - find and update state(s) */
2691	for (i = 0, mp = m; i < num_migrate; i++, mp++) {
2692		if ((x = xfrm_migrate_state_find(mp))) {
2693			x_cur[nx_cur] = x;
2694			nx_cur++;
2695			if ((xc = xfrm_state_migrate(x, mp))) {
2696				x_new[nx_new] = xc;
2697				nx_new++;
2698			} else {
2699				err = -ENODATA;
2700				goto restore_state;
2701			}
2702		}
2703	}
2704
2705	/* Stage 3 - update policy */
2706	if ((err = xfrm_policy_migrate(pol, m, num_migrate)) < 0)
2707		goto restore_state;
2708
2709	/* Stage 4 - delete old state(s) */
2710	if (nx_cur) {
2711		xfrm_states_put(x_cur, nx_cur);
2712		xfrm_states_delete(x_cur, nx_cur);
2713	}
2714
2715	/* Stage 5 - announce */
2716	km_migrate(sel, dir, type, m, num_migrate);
2717
2718	xfrm_pol_put(pol);
2719
2720	return 0;
2721out:
2722	return err;
2723
2724restore_state:
2725	if (pol)
2726		xfrm_pol_put(pol);
2727	if (nx_cur)
2728		xfrm_states_put(x_cur, nx_cur);
2729	if (nx_new)
2730		xfrm_states_delete(x_new, nx_new);
2731
2732	return err;
2733}
2734EXPORT_SYMBOL(xfrm_migrate);
2735#endif
2736