xfrm_policy.c revision d5654efd3ff1cd0baa935a0c9a5d89862f07d009
1/*
2 * xfrm_policy.c
3 *
4 * Changes:
5 *	Mitsuru KANDA @USAGI
6 * 	Kazunori MIYAZAWA @USAGI
7 * 	Kunihiro Ishiguro <kunihiro@ipinfusion.com>
8 * 		IPv6 support
9 * 	Kazunori MIYAZAWA @USAGI
10 * 	YOSHIFUJI Hideaki
11 * 		Split up af-specific portion
12 *	Derek Atkins <derek@ihtfp.com>		Add the post_input processor
13 *
14 */
15
16#include <linux/err.h>
17#include <linux/slab.h>
18#include <linux/kmod.h>
19#include <linux/list.h>
20#include <linux/spinlock.h>
21#include <linux/workqueue.h>
22#include <linux/notifier.h>
23#include <linux/netdevice.h>
24#include <linux/netfilter.h>
25#include <linux/module.h>
26#include <linux/cache.h>
27#include <linux/audit.h>
28#include <net/dst.h>
29#include <net/xfrm.h>
30#include <net/ip.h>
31#ifdef CONFIG_XFRM_STATISTICS
32#include <net/snmp.h>
33#endif
34
35#include "xfrm_hash.h"
36
37int sysctl_xfrm_larval_drop __read_mostly = 1;
38
39#ifdef CONFIG_XFRM_STATISTICS
40DEFINE_SNMP_STAT(struct linux_xfrm_mib, xfrm_statistics) __read_mostly;
41EXPORT_SYMBOL(xfrm_statistics);
42#endif
43
44DEFINE_MUTEX(xfrm_cfg_mutex);
45EXPORT_SYMBOL(xfrm_cfg_mutex);
46
47static DEFINE_RWLOCK(xfrm_policy_lock);
48
49static struct list_head xfrm_policy_all;
50unsigned int xfrm_policy_count[XFRM_POLICY_MAX*2];
51EXPORT_SYMBOL(xfrm_policy_count);
52
53static DEFINE_RWLOCK(xfrm_policy_afinfo_lock);
54static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO];
55
56static struct kmem_cache *xfrm_dst_cache __read_mostly;
57
58static struct work_struct xfrm_policy_gc_work;
59static HLIST_HEAD(xfrm_policy_gc_list);
60static DEFINE_SPINLOCK(xfrm_policy_gc_lock);
61
62static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family);
63static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo);
64static void xfrm_init_pmtu(struct dst_entry *dst);
65
66static inline int
67__xfrm4_selector_match(struct xfrm_selector *sel, struct flowi *fl)
68{
69	return  addr_match(&fl->fl4_dst, &sel->daddr, sel->prefixlen_d) &&
70		addr_match(&fl->fl4_src, &sel->saddr, sel->prefixlen_s) &&
71		!((xfrm_flowi_dport(fl) ^ sel->dport) & sel->dport_mask) &&
72		!((xfrm_flowi_sport(fl) ^ sel->sport) & sel->sport_mask) &&
73		(fl->proto == sel->proto || !sel->proto) &&
74		(fl->oif == sel->ifindex || !sel->ifindex);
75}
76
77static inline int
78__xfrm6_selector_match(struct xfrm_selector *sel, struct flowi *fl)
79{
80	return  addr_match(&fl->fl6_dst, &sel->daddr, sel->prefixlen_d) &&
81		addr_match(&fl->fl6_src, &sel->saddr, sel->prefixlen_s) &&
82		!((xfrm_flowi_dport(fl) ^ sel->dport) & sel->dport_mask) &&
83		!((xfrm_flowi_sport(fl) ^ sel->sport) & sel->sport_mask) &&
84		(fl->proto == sel->proto || !sel->proto) &&
85		(fl->oif == sel->ifindex || !sel->ifindex);
86}
87
88int xfrm_selector_match(struct xfrm_selector *sel, struct flowi *fl,
89		    unsigned short family)
90{
91	switch (family) {
92	case AF_INET:
93		return __xfrm4_selector_match(sel, fl);
94	case AF_INET6:
95		return __xfrm6_selector_match(sel, fl);
96	}
97	return 0;
98}
99
100static inline struct dst_entry *__xfrm_dst_lookup(int tos,
101						  xfrm_address_t *saddr,
102						  xfrm_address_t *daddr,
103						  int family)
104{
105	struct xfrm_policy_afinfo *afinfo;
106	struct dst_entry *dst;
107
108	afinfo = xfrm_policy_get_afinfo(family);
109	if (unlikely(afinfo == NULL))
110		return ERR_PTR(-EAFNOSUPPORT);
111
112	dst = afinfo->dst_lookup(tos, saddr, daddr);
113
114	xfrm_policy_put_afinfo(afinfo);
115
116	return dst;
117}
118
119static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x, int tos,
120						xfrm_address_t *prev_saddr,
121						xfrm_address_t *prev_daddr,
122						int family)
123{
124	xfrm_address_t *saddr = &x->props.saddr;
125	xfrm_address_t *daddr = &x->id.daddr;
126	struct dst_entry *dst;
127
128	if (x->type->flags & XFRM_TYPE_LOCAL_COADDR) {
129		saddr = x->coaddr;
130		daddr = prev_daddr;
131	}
132	if (x->type->flags & XFRM_TYPE_REMOTE_COADDR) {
133		saddr = prev_saddr;
134		daddr = x->coaddr;
135	}
136
137	dst = __xfrm_dst_lookup(tos, saddr, daddr, family);
138
139	if (!IS_ERR(dst)) {
140		if (prev_saddr != saddr)
141			memcpy(prev_saddr, saddr,  sizeof(*prev_saddr));
142		if (prev_daddr != daddr)
143			memcpy(prev_daddr, daddr,  sizeof(*prev_daddr));
144	}
145
146	return dst;
147}
148
149static inline unsigned long make_jiffies(long secs)
150{
151	if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
152		return MAX_SCHEDULE_TIMEOUT-1;
153	else
154		return secs*HZ;
155}
156
157static void xfrm_policy_timer(unsigned long data)
158{
159	struct xfrm_policy *xp = (struct xfrm_policy*)data;
160	unsigned long now = get_seconds();
161	long next = LONG_MAX;
162	int warn = 0;
163	int dir;
164
165	read_lock(&xp->lock);
166
167	if (xp->walk.dead)
168		goto out;
169
170	dir = xfrm_policy_id2dir(xp->index);
171
172	if (xp->lft.hard_add_expires_seconds) {
173		long tmo = xp->lft.hard_add_expires_seconds +
174			xp->curlft.add_time - now;
175		if (tmo <= 0)
176			goto expired;
177		if (tmo < next)
178			next = tmo;
179	}
180	if (xp->lft.hard_use_expires_seconds) {
181		long tmo = xp->lft.hard_use_expires_seconds +
182			(xp->curlft.use_time ? : xp->curlft.add_time) - now;
183		if (tmo <= 0)
184			goto expired;
185		if (tmo < next)
186			next = tmo;
187	}
188	if (xp->lft.soft_add_expires_seconds) {
189		long tmo = xp->lft.soft_add_expires_seconds +
190			xp->curlft.add_time - now;
191		if (tmo <= 0) {
192			warn = 1;
193			tmo = XFRM_KM_TIMEOUT;
194		}
195		if (tmo < next)
196			next = tmo;
197	}
198	if (xp->lft.soft_use_expires_seconds) {
199		long tmo = xp->lft.soft_use_expires_seconds +
200			(xp->curlft.use_time ? : xp->curlft.add_time) - now;
201		if (tmo <= 0) {
202			warn = 1;
203			tmo = XFRM_KM_TIMEOUT;
204		}
205		if (tmo < next)
206			next = tmo;
207	}
208
209	if (warn)
210		km_policy_expired(xp, dir, 0, 0);
211	if (next != LONG_MAX &&
212	    !mod_timer(&xp->timer, jiffies + make_jiffies(next)))
213		xfrm_pol_hold(xp);
214
215out:
216	read_unlock(&xp->lock);
217	xfrm_pol_put(xp);
218	return;
219
220expired:
221	read_unlock(&xp->lock);
222	if (!xfrm_policy_delete(xp, dir))
223		km_policy_expired(xp, dir, 1, 0);
224	xfrm_pol_put(xp);
225}
226
227
228/* Allocate xfrm_policy. Not used here, it is supposed to be used by pfkeyv2
229 * SPD calls.
230 */
231
232struct xfrm_policy *xfrm_policy_alloc(gfp_t gfp)
233{
234	struct xfrm_policy *policy;
235
236	policy = kzalloc(sizeof(struct xfrm_policy), gfp);
237
238	if (policy) {
239		INIT_LIST_HEAD(&policy->walk.all);
240		INIT_HLIST_NODE(&policy->bydst);
241		INIT_HLIST_NODE(&policy->byidx);
242		rwlock_init(&policy->lock);
243		atomic_set(&policy->refcnt, 1);
244		setup_timer(&policy->timer, xfrm_policy_timer,
245				(unsigned long)policy);
246	}
247	return policy;
248}
249EXPORT_SYMBOL(xfrm_policy_alloc);
250
251/* Destroy xfrm_policy: descendant resources must be released to this moment. */
252
253void xfrm_policy_destroy(struct xfrm_policy *policy)
254{
255	BUG_ON(!policy->walk.dead);
256
257	BUG_ON(policy->bundles);
258
259	if (del_timer(&policy->timer))
260		BUG();
261
262	security_xfrm_policy_free(policy->security);
263	kfree(policy);
264}
265EXPORT_SYMBOL(xfrm_policy_destroy);
266
267static void xfrm_policy_gc_kill(struct xfrm_policy *policy)
268{
269	struct dst_entry *dst;
270
271	while ((dst = policy->bundles) != NULL) {
272		policy->bundles = dst->next;
273		dst_free(dst);
274	}
275
276	if (del_timer(&policy->timer))
277		atomic_dec(&policy->refcnt);
278
279	if (atomic_read(&policy->refcnt) > 1)
280		flow_cache_flush();
281
282	xfrm_pol_put(policy);
283}
284
285static void xfrm_policy_gc_task(struct work_struct *work)
286{
287	struct xfrm_policy *policy;
288	struct hlist_node *entry, *tmp;
289	struct hlist_head gc_list;
290
291	spin_lock_bh(&xfrm_policy_gc_lock);
292	gc_list.first = xfrm_policy_gc_list.first;
293	INIT_HLIST_HEAD(&xfrm_policy_gc_list);
294	spin_unlock_bh(&xfrm_policy_gc_lock);
295
296	hlist_for_each_entry_safe(policy, entry, tmp, &gc_list, bydst)
297		xfrm_policy_gc_kill(policy);
298}
299
300/* Rule must be locked. Release descentant resources, announce
301 * entry dead. The rule must be unlinked from lists to the moment.
302 */
303
304static void xfrm_policy_kill(struct xfrm_policy *policy)
305{
306	int dead;
307
308	write_lock_bh(&policy->lock);
309	dead = policy->walk.dead;
310	policy->walk.dead = 1;
311	write_unlock_bh(&policy->lock);
312
313	if (unlikely(dead)) {
314		WARN_ON(1);
315		return;
316	}
317
318	spin_lock_bh(&xfrm_policy_gc_lock);
319	hlist_add_head(&policy->bydst, &xfrm_policy_gc_list);
320	spin_unlock_bh(&xfrm_policy_gc_lock);
321
322	schedule_work(&xfrm_policy_gc_work);
323}
324
325struct xfrm_policy_hash {
326	struct hlist_head	*table;
327	unsigned int		hmask;
328};
329
330static struct hlist_head xfrm_policy_inexact[XFRM_POLICY_MAX*2];
331static struct xfrm_policy_hash xfrm_policy_bydst[XFRM_POLICY_MAX*2] __read_mostly;
332static struct hlist_head *xfrm_policy_byidx __read_mostly;
333static unsigned int xfrm_idx_hmask __read_mostly;
334static unsigned int xfrm_policy_hashmax __read_mostly = 1 * 1024 * 1024;
335
336static inline unsigned int idx_hash(u32 index)
337{
338	return __idx_hash(index, xfrm_idx_hmask);
339}
340
341static struct hlist_head *policy_hash_bysel(struct xfrm_selector *sel, unsigned short family, int dir)
342{
343	unsigned int hmask = xfrm_policy_bydst[dir].hmask;
344	unsigned int hash = __sel_hash(sel, family, hmask);
345
346	return (hash == hmask + 1 ?
347		&xfrm_policy_inexact[dir] :
348		xfrm_policy_bydst[dir].table + hash);
349}
350
351static struct hlist_head *policy_hash_direct(xfrm_address_t *daddr, xfrm_address_t *saddr, unsigned short family, int dir)
352{
353	unsigned int hmask = xfrm_policy_bydst[dir].hmask;
354	unsigned int hash = __addr_hash(daddr, saddr, family, hmask);
355
356	return xfrm_policy_bydst[dir].table + hash;
357}
358
359static void xfrm_dst_hash_transfer(struct hlist_head *list,
360				   struct hlist_head *ndsttable,
361				   unsigned int nhashmask)
362{
363	struct hlist_node *entry, *tmp, *entry0 = NULL;
364	struct xfrm_policy *pol;
365	unsigned int h0 = 0;
366
367redo:
368	hlist_for_each_entry_safe(pol, entry, tmp, list, bydst) {
369		unsigned int h;
370
371		h = __addr_hash(&pol->selector.daddr, &pol->selector.saddr,
372				pol->family, nhashmask);
373		if (!entry0) {
374			hlist_del(entry);
375			hlist_add_head(&pol->bydst, ndsttable+h);
376			h0 = h;
377		} else {
378			if (h != h0)
379				continue;
380			hlist_del(entry);
381			hlist_add_after(entry0, &pol->bydst);
382		}
383		entry0 = entry;
384	}
385	if (!hlist_empty(list)) {
386		entry0 = NULL;
387		goto redo;
388	}
389}
390
391static void xfrm_idx_hash_transfer(struct hlist_head *list,
392				   struct hlist_head *nidxtable,
393				   unsigned int nhashmask)
394{
395	struct hlist_node *entry, *tmp;
396	struct xfrm_policy *pol;
397
398	hlist_for_each_entry_safe(pol, entry, tmp, list, byidx) {
399		unsigned int h;
400
401		h = __idx_hash(pol->index, nhashmask);
402		hlist_add_head(&pol->byidx, nidxtable+h);
403	}
404}
405
406static unsigned long xfrm_new_hash_mask(unsigned int old_hmask)
407{
408	return ((old_hmask + 1) << 1) - 1;
409}
410
411static void xfrm_bydst_resize(int dir)
412{
413	unsigned int hmask = xfrm_policy_bydst[dir].hmask;
414	unsigned int nhashmask = xfrm_new_hash_mask(hmask);
415	unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head);
416	struct hlist_head *odst = xfrm_policy_bydst[dir].table;
417	struct hlist_head *ndst = xfrm_hash_alloc(nsize);
418	int i;
419
420	if (!ndst)
421		return;
422
423	write_lock_bh(&xfrm_policy_lock);
424
425	for (i = hmask; i >= 0; i--)
426		xfrm_dst_hash_transfer(odst + i, ndst, nhashmask);
427
428	xfrm_policy_bydst[dir].table = ndst;
429	xfrm_policy_bydst[dir].hmask = nhashmask;
430
431	write_unlock_bh(&xfrm_policy_lock);
432
433	xfrm_hash_free(odst, (hmask + 1) * sizeof(struct hlist_head));
434}
435
436static void xfrm_byidx_resize(int total)
437{
438	unsigned int hmask = xfrm_idx_hmask;
439	unsigned int nhashmask = xfrm_new_hash_mask(hmask);
440	unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head);
441	struct hlist_head *oidx = xfrm_policy_byidx;
442	struct hlist_head *nidx = xfrm_hash_alloc(nsize);
443	int i;
444
445	if (!nidx)
446		return;
447
448	write_lock_bh(&xfrm_policy_lock);
449
450	for (i = hmask; i >= 0; i--)
451		xfrm_idx_hash_transfer(oidx + i, nidx, nhashmask);
452
453	xfrm_policy_byidx = nidx;
454	xfrm_idx_hmask = nhashmask;
455
456	write_unlock_bh(&xfrm_policy_lock);
457
458	xfrm_hash_free(oidx, (hmask + 1) * sizeof(struct hlist_head));
459}
460
461static inline int xfrm_bydst_should_resize(int dir, int *total)
462{
463	unsigned int cnt = xfrm_policy_count[dir];
464	unsigned int hmask = xfrm_policy_bydst[dir].hmask;
465
466	if (total)
467		*total += cnt;
468
469	if ((hmask + 1) < xfrm_policy_hashmax &&
470	    cnt > hmask)
471		return 1;
472
473	return 0;
474}
475
476static inline int xfrm_byidx_should_resize(int total)
477{
478	unsigned int hmask = xfrm_idx_hmask;
479
480	if ((hmask + 1) < xfrm_policy_hashmax &&
481	    total > hmask)
482		return 1;
483
484	return 0;
485}
486
487void xfrm_spd_getinfo(struct xfrmk_spdinfo *si)
488{
489	read_lock_bh(&xfrm_policy_lock);
490	si->incnt = xfrm_policy_count[XFRM_POLICY_IN];
491	si->outcnt = xfrm_policy_count[XFRM_POLICY_OUT];
492	si->fwdcnt = xfrm_policy_count[XFRM_POLICY_FWD];
493	si->inscnt = xfrm_policy_count[XFRM_POLICY_IN+XFRM_POLICY_MAX];
494	si->outscnt = xfrm_policy_count[XFRM_POLICY_OUT+XFRM_POLICY_MAX];
495	si->fwdscnt = xfrm_policy_count[XFRM_POLICY_FWD+XFRM_POLICY_MAX];
496	si->spdhcnt = xfrm_idx_hmask;
497	si->spdhmcnt = xfrm_policy_hashmax;
498	read_unlock_bh(&xfrm_policy_lock);
499}
500EXPORT_SYMBOL(xfrm_spd_getinfo);
501
502static DEFINE_MUTEX(hash_resize_mutex);
503static void xfrm_hash_resize(struct work_struct *__unused)
504{
505	int dir, total;
506
507	mutex_lock(&hash_resize_mutex);
508
509	total = 0;
510	for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
511		if (xfrm_bydst_should_resize(dir, &total))
512			xfrm_bydst_resize(dir);
513	}
514	if (xfrm_byidx_should_resize(total))
515		xfrm_byidx_resize(total);
516
517	mutex_unlock(&hash_resize_mutex);
518}
519
520static DECLARE_WORK(xfrm_hash_work, xfrm_hash_resize);
521
522/* Generate new index... KAME seems to generate them ordered by cost
523 * of an absolute inpredictability of ordering of rules. This will not pass. */
524static u32 xfrm_gen_index(u8 type, int dir)
525{
526	static u32 idx_generator;
527
528	for (;;) {
529		struct hlist_node *entry;
530		struct hlist_head *list;
531		struct xfrm_policy *p;
532		u32 idx;
533		int found;
534
535		idx = (idx_generator | dir);
536		idx_generator += 8;
537		if (idx == 0)
538			idx = 8;
539		list = xfrm_policy_byidx + idx_hash(idx);
540		found = 0;
541		hlist_for_each_entry(p, entry, list, byidx) {
542			if (p->index == idx) {
543				found = 1;
544				break;
545			}
546		}
547		if (!found)
548			return idx;
549	}
550}
551
552static inline int selector_cmp(struct xfrm_selector *s1, struct xfrm_selector *s2)
553{
554	u32 *p1 = (u32 *) s1;
555	u32 *p2 = (u32 *) s2;
556	int len = sizeof(struct xfrm_selector) / sizeof(u32);
557	int i;
558
559	for (i = 0; i < len; i++) {
560		if (p1[i] != p2[i])
561			return 1;
562	}
563
564	return 0;
565}
566
567int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
568{
569	struct xfrm_policy *pol;
570	struct xfrm_policy *delpol;
571	struct hlist_head *chain;
572	struct hlist_node *entry, *newpos;
573	struct dst_entry *gc_list;
574
575	write_lock_bh(&xfrm_policy_lock);
576	chain = policy_hash_bysel(&policy->selector, policy->family, dir);
577	delpol = NULL;
578	newpos = NULL;
579	hlist_for_each_entry(pol, entry, chain, bydst) {
580		if (pol->type == policy->type &&
581		    !selector_cmp(&pol->selector, &policy->selector) &&
582		    xfrm_sec_ctx_match(pol->security, policy->security) &&
583		    !WARN_ON(delpol)) {
584			if (excl) {
585				write_unlock_bh(&xfrm_policy_lock);
586				return -EEXIST;
587			}
588			delpol = pol;
589			if (policy->priority > pol->priority)
590				continue;
591		} else if (policy->priority >= pol->priority) {
592			newpos = &pol->bydst;
593			continue;
594		}
595		if (delpol)
596			break;
597	}
598	if (newpos)
599		hlist_add_after(newpos, &policy->bydst);
600	else
601		hlist_add_head(&policy->bydst, chain);
602	xfrm_pol_hold(policy);
603	xfrm_policy_count[dir]++;
604	atomic_inc(&flow_cache_genid);
605	if (delpol) {
606		hlist_del(&delpol->bydst);
607		hlist_del(&delpol->byidx);
608		list_del(&delpol->walk.all);
609		xfrm_policy_count[dir]--;
610	}
611	policy->index = delpol ? delpol->index : xfrm_gen_index(policy->type, dir);
612	hlist_add_head(&policy->byidx, xfrm_policy_byidx+idx_hash(policy->index));
613	policy->curlft.add_time = get_seconds();
614	policy->curlft.use_time = 0;
615	if (!mod_timer(&policy->timer, jiffies + HZ))
616		xfrm_pol_hold(policy);
617	list_add(&policy->walk.all, &xfrm_policy_all);
618	write_unlock_bh(&xfrm_policy_lock);
619
620	if (delpol)
621		xfrm_policy_kill(delpol);
622	else if (xfrm_bydst_should_resize(dir, NULL))
623		schedule_work(&xfrm_hash_work);
624
625	read_lock_bh(&xfrm_policy_lock);
626	gc_list = NULL;
627	entry = &policy->bydst;
628	hlist_for_each_entry_continue(policy, entry, bydst) {
629		struct dst_entry *dst;
630
631		write_lock(&policy->lock);
632		dst = policy->bundles;
633		if (dst) {
634			struct dst_entry *tail = dst;
635			while (tail->next)
636				tail = tail->next;
637			tail->next = gc_list;
638			gc_list = dst;
639
640			policy->bundles = NULL;
641		}
642		write_unlock(&policy->lock);
643	}
644	read_unlock_bh(&xfrm_policy_lock);
645
646	while (gc_list) {
647		struct dst_entry *dst = gc_list;
648
649		gc_list = dst->next;
650		dst_free(dst);
651	}
652
653	return 0;
654}
655EXPORT_SYMBOL(xfrm_policy_insert);
656
657struct xfrm_policy *xfrm_policy_bysel_ctx(u8 type, int dir,
658					  struct xfrm_selector *sel,
659					  struct xfrm_sec_ctx *ctx, int delete,
660					  int *err)
661{
662	struct xfrm_policy *pol, *ret;
663	struct hlist_head *chain;
664	struct hlist_node *entry;
665
666	*err = 0;
667	write_lock_bh(&xfrm_policy_lock);
668	chain = policy_hash_bysel(sel, sel->family, dir);
669	ret = NULL;
670	hlist_for_each_entry(pol, entry, chain, bydst) {
671		if (pol->type == type &&
672		    !selector_cmp(sel, &pol->selector) &&
673		    xfrm_sec_ctx_match(ctx, pol->security)) {
674			xfrm_pol_hold(pol);
675			if (delete) {
676				*err = security_xfrm_policy_delete(
677								pol->security);
678				if (*err) {
679					write_unlock_bh(&xfrm_policy_lock);
680					return pol;
681				}
682				hlist_del(&pol->bydst);
683				hlist_del(&pol->byidx);
684				list_del(&pol->walk.all);
685				xfrm_policy_count[dir]--;
686			}
687			ret = pol;
688			break;
689		}
690	}
691	write_unlock_bh(&xfrm_policy_lock);
692
693	if (ret && delete) {
694		atomic_inc(&flow_cache_genid);
695		xfrm_policy_kill(ret);
696	}
697	return ret;
698}
699EXPORT_SYMBOL(xfrm_policy_bysel_ctx);
700
701struct xfrm_policy *xfrm_policy_byid(u8 type, int dir, u32 id, int delete,
702				     int *err)
703{
704	struct xfrm_policy *pol, *ret;
705	struct hlist_head *chain;
706	struct hlist_node *entry;
707
708	*err = -ENOENT;
709	if (xfrm_policy_id2dir(id) != dir)
710		return NULL;
711
712	*err = 0;
713	write_lock_bh(&xfrm_policy_lock);
714	chain = xfrm_policy_byidx + idx_hash(id);
715	ret = NULL;
716	hlist_for_each_entry(pol, entry, chain, byidx) {
717		if (pol->type == type && pol->index == id) {
718			xfrm_pol_hold(pol);
719			if (delete) {
720				*err = security_xfrm_policy_delete(
721								pol->security);
722				if (*err) {
723					write_unlock_bh(&xfrm_policy_lock);
724					return pol;
725				}
726				hlist_del(&pol->bydst);
727				hlist_del(&pol->byidx);
728				list_del(&pol->walk.all);
729				xfrm_policy_count[dir]--;
730			}
731			ret = pol;
732			break;
733		}
734	}
735	write_unlock_bh(&xfrm_policy_lock);
736
737	if (ret && delete) {
738		atomic_inc(&flow_cache_genid);
739		xfrm_policy_kill(ret);
740	}
741	return ret;
742}
743EXPORT_SYMBOL(xfrm_policy_byid);
744
745#ifdef CONFIG_SECURITY_NETWORK_XFRM
746static inline int
747xfrm_policy_flush_secctx_check(u8 type, struct xfrm_audit *audit_info)
748{
749	int dir, err = 0;
750
751	for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
752		struct xfrm_policy *pol;
753		struct hlist_node *entry;
754		int i;
755
756		hlist_for_each_entry(pol, entry,
757				     &xfrm_policy_inexact[dir], bydst) {
758			if (pol->type != type)
759				continue;
760			err = security_xfrm_policy_delete(pol->security);
761			if (err) {
762				xfrm_audit_policy_delete(pol, 0,
763							 audit_info->loginuid,
764							 audit_info->sessionid,
765							 audit_info->secid);
766				return err;
767			}
768		}
769		for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) {
770			hlist_for_each_entry(pol, entry,
771					     xfrm_policy_bydst[dir].table + i,
772					     bydst) {
773				if (pol->type != type)
774					continue;
775				err = security_xfrm_policy_delete(
776								pol->security);
777				if (err) {
778					xfrm_audit_policy_delete(pol, 0,
779							audit_info->loginuid,
780							audit_info->sessionid,
781							audit_info->secid);
782					return err;
783				}
784			}
785		}
786	}
787	return err;
788}
789#else
790static inline int
791xfrm_policy_flush_secctx_check(u8 type, struct xfrm_audit *audit_info)
792{
793	return 0;
794}
795#endif
796
797int xfrm_policy_flush(u8 type, struct xfrm_audit *audit_info)
798{
799	int dir, err = 0;
800
801	write_lock_bh(&xfrm_policy_lock);
802
803	err = xfrm_policy_flush_secctx_check(type, audit_info);
804	if (err)
805		goto out;
806
807	for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
808		struct xfrm_policy *pol;
809		struct hlist_node *entry;
810		int i, killed;
811
812		killed = 0;
813	again1:
814		hlist_for_each_entry(pol, entry,
815				     &xfrm_policy_inexact[dir], bydst) {
816			if (pol->type != type)
817				continue;
818			hlist_del(&pol->bydst);
819			hlist_del(&pol->byidx);
820			list_del(&pol->walk.all);
821			write_unlock_bh(&xfrm_policy_lock);
822
823			xfrm_audit_policy_delete(pol, 1, audit_info->loginuid,
824						 audit_info->sessionid,
825						 audit_info->secid);
826
827			xfrm_policy_kill(pol);
828			killed++;
829
830			write_lock_bh(&xfrm_policy_lock);
831			goto again1;
832		}
833
834		for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) {
835	again2:
836			hlist_for_each_entry(pol, entry,
837					     xfrm_policy_bydst[dir].table + i,
838					     bydst) {
839				if (pol->type != type)
840					continue;
841				hlist_del(&pol->bydst);
842				hlist_del(&pol->byidx);
843				list_del(&pol->walk.all);
844				write_unlock_bh(&xfrm_policy_lock);
845
846				xfrm_audit_policy_delete(pol, 1,
847							 audit_info->loginuid,
848							 audit_info->sessionid,
849							 audit_info->secid);
850				xfrm_policy_kill(pol);
851				killed++;
852
853				write_lock_bh(&xfrm_policy_lock);
854				goto again2;
855			}
856		}
857
858		xfrm_policy_count[dir] -= killed;
859	}
860	atomic_inc(&flow_cache_genid);
861out:
862	write_unlock_bh(&xfrm_policy_lock);
863	return err;
864}
865EXPORT_SYMBOL(xfrm_policy_flush);
866
867int xfrm_policy_walk(struct xfrm_policy_walk *walk,
868		     int (*func)(struct xfrm_policy *, int, int, void*),
869		     void *data)
870{
871	struct xfrm_policy *pol;
872	struct xfrm_policy_walk_entry *x;
873	int error = 0;
874
875	if (walk->type >= XFRM_POLICY_TYPE_MAX &&
876	    walk->type != XFRM_POLICY_TYPE_ANY)
877		return -EINVAL;
878
879	if (list_empty(&walk->walk.all) && walk->seq != 0)
880		return 0;
881
882	write_lock_bh(&xfrm_policy_lock);
883	if (list_empty(&walk->walk.all))
884		x = list_first_entry(&xfrm_policy_all, struct xfrm_policy_walk_entry, all);
885	else
886		x = list_entry(&walk->walk.all, struct xfrm_policy_walk_entry, all);
887	list_for_each_entry_from(x, &xfrm_policy_all, all) {
888		if (x->dead)
889			continue;
890		pol = container_of(x, struct xfrm_policy, walk);
891		if (walk->type != XFRM_POLICY_TYPE_ANY &&
892		    walk->type != pol->type)
893			continue;
894		error = func(pol, xfrm_policy_id2dir(pol->index),
895			     walk->seq, data);
896		if (error) {
897			list_move_tail(&walk->walk.all, &x->all);
898			goto out;
899		}
900		walk->seq++;
901	}
902	if (walk->seq == 0) {
903		error = -ENOENT;
904		goto out;
905	}
906	list_del_init(&walk->walk.all);
907out:
908	write_unlock_bh(&xfrm_policy_lock);
909	return error;
910}
911EXPORT_SYMBOL(xfrm_policy_walk);
912
913void xfrm_policy_walk_init(struct xfrm_policy_walk *walk, u8 type)
914{
915	INIT_LIST_HEAD(&walk->walk.all);
916	walk->walk.dead = 1;
917	walk->type = type;
918	walk->seq = 0;
919}
920EXPORT_SYMBOL(xfrm_policy_walk_init);
921
922void xfrm_policy_walk_done(struct xfrm_policy_walk *walk)
923{
924	if (list_empty(&walk->walk.all))
925		return;
926
927	write_lock_bh(&xfrm_policy_lock);
928	list_del(&walk->walk.all);
929	write_unlock_bh(&xfrm_policy_lock);
930}
931EXPORT_SYMBOL(xfrm_policy_walk_done);
932
933/*
934 * Find policy to apply to this flow.
935 *
936 * Returns 0 if policy found, else an -errno.
937 */
938static int xfrm_policy_match(struct xfrm_policy *pol, struct flowi *fl,
939			     u8 type, u16 family, int dir)
940{
941	struct xfrm_selector *sel = &pol->selector;
942	int match, ret = -ESRCH;
943
944	if (pol->family != family ||
945	    pol->type != type)
946		return ret;
947
948	match = xfrm_selector_match(sel, fl, family);
949	if (match)
950		ret = security_xfrm_policy_lookup(pol->security, fl->secid,
951						  dir);
952
953	return ret;
954}
955
956static struct xfrm_policy *xfrm_policy_lookup_bytype(u8 type, struct flowi *fl,
957						     u16 family, u8 dir)
958{
959	int err;
960	struct xfrm_policy *pol, *ret;
961	xfrm_address_t *daddr, *saddr;
962	struct hlist_node *entry;
963	struct hlist_head *chain;
964	u32 priority = ~0U;
965
966	daddr = xfrm_flowi_daddr(fl, family);
967	saddr = xfrm_flowi_saddr(fl, family);
968	if (unlikely(!daddr || !saddr))
969		return NULL;
970
971	read_lock_bh(&xfrm_policy_lock);
972	chain = policy_hash_direct(daddr, saddr, family, dir);
973	ret = NULL;
974	hlist_for_each_entry(pol, entry, chain, bydst) {
975		err = xfrm_policy_match(pol, fl, type, family, dir);
976		if (err) {
977			if (err == -ESRCH)
978				continue;
979			else {
980				ret = ERR_PTR(err);
981				goto fail;
982			}
983		} else {
984			ret = pol;
985			priority = ret->priority;
986			break;
987		}
988	}
989	chain = &xfrm_policy_inexact[dir];
990	hlist_for_each_entry(pol, entry, chain, bydst) {
991		err = xfrm_policy_match(pol, fl, type, family, dir);
992		if (err) {
993			if (err == -ESRCH)
994				continue;
995			else {
996				ret = ERR_PTR(err);
997				goto fail;
998			}
999		} else if (pol->priority < priority) {
1000			ret = pol;
1001			break;
1002		}
1003	}
1004	if (ret)
1005		xfrm_pol_hold(ret);
1006fail:
1007	read_unlock_bh(&xfrm_policy_lock);
1008
1009	return ret;
1010}
1011
1012static int xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir,
1013			       void **objp, atomic_t **obj_refp)
1014{
1015	struct xfrm_policy *pol;
1016	int err = 0;
1017
1018#ifdef CONFIG_XFRM_SUB_POLICY
1019	pol = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_SUB, fl, family, dir);
1020	if (IS_ERR(pol)) {
1021		err = PTR_ERR(pol);
1022		pol = NULL;
1023	}
1024	if (pol || err)
1025		goto end;
1026#endif
1027	pol = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN, fl, family, dir);
1028	if (IS_ERR(pol)) {
1029		err = PTR_ERR(pol);
1030		pol = NULL;
1031	}
1032#ifdef CONFIG_XFRM_SUB_POLICY
1033end:
1034#endif
1035	if ((*objp = (void *) pol) != NULL)
1036		*obj_refp = &pol->refcnt;
1037	return err;
1038}
1039
1040static inline int policy_to_flow_dir(int dir)
1041{
1042	if (XFRM_POLICY_IN == FLOW_DIR_IN &&
1043	    XFRM_POLICY_OUT == FLOW_DIR_OUT &&
1044	    XFRM_POLICY_FWD == FLOW_DIR_FWD)
1045		return dir;
1046	switch (dir) {
1047	default:
1048	case XFRM_POLICY_IN:
1049		return FLOW_DIR_IN;
1050	case XFRM_POLICY_OUT:
1051		return FLOW_DIR_OUT;
1052	case XFRM_POLICY_FWD:
1053		return FLOW_DIR_FWD;
1054	}
1055}
1056
1057static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struct flowi *fl)
1058{
1059	struct xfrm_policy *pol;
1060
1061	read_lock_bh(&xfrm_policy_lock);
1062	if ((pol = sk->sk_policy[dir]) != NULL) {
1063		int match = xfrm_selector_match(&pol->selector, fl,
1064						sk->sk_family);
1065		int err = 0;
1066
1067		if (match) {
1068			err = security_xfrm_policy_lookup(pol->security,
1069						      fl->secid,
1070						      policy_to_flow_dir(dir));
1071			if (!err)
1072				xfrm_pol_hold(pol);
1073			else if (err == -ESRCH)
1074				pol = NULL;
1075			else
1076				pol = ERR_PTR(err);
1077		} else
1078			pol = NULL;
1079	}
1080	read_unlock_bh(&xfrm_policy_lock);
1081	return pol;
1082}
1083
1084static void __xfrm_policy_link(struct xfrm_policy *pol, int dir)
1085{
1086	struct hlist_head *chain = policy_hash_bysel(&pol->selector,
1087						     pol->family, dir);
1088
1089	list_add(&pol->walk.all, &xfrm_policy_all);
1090	hlist_add_head(&pol->bydst, chain);
1091	hlist_add_head(&pol->byidx, xfrm_policy_byidx+idx_hash(pol->index));
1092	xfrm_policy_count[dir]++;
1093	xfrm_pol_hold(pol);
1094
1095	if (xfrm_bydst_should_resize(dir, NULL))
1096		schedule_work(&xfrm_hash_work);
1097}
1098
1099static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
1100						int dir)
1101{
1102	if (hlist_unhashed(&pol->bydst))
1103		return NULL;
1104
1105	hlist_del(&pol->bydst);
1106	hlist_del(&pol->byidx);
1107	list_del(&pol->walk.all);
1108	xfrm_policy_count[dir]--;
1109
1110	return pol;
1111}
1112
1113int xfrm_policy_delete(struct xfrm_policy *pol, int dir)
1114{
1115	write_lock_bh(&xfrm_policy_lock);
1116	pol = __xfrm_policy_unlink(pol, dir);
1117	write_unlock_bh(&xfrm_policy_lock);
1118	if (pol) {
1119		if (dir < XFRM_POLICY_MAX)
1120			atomic_inc(&flow_cache_genid);
1121		xfrm_policy_kill(pol);
1122		return 0;
1123	}
1124	return -ENOENT;
1125}
1126EXPORT_SYMBOL(xfrm_policy_delete);
1127
1128int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol)
1129{
1130	struct xfrm_policy *old_pol;
1131
1132#ifdef CONFIG_XFRM_SUB_POLICY
1133	if (pol && pol->type != XFRM_POLICY_TYPE_MAIN)
1134		return -EINVAL;
1135#endif
1136
1137	write_lock_bh(&xfrm_policy_lock);
1138	old_pol = sk->sk_policy[dir];
1139	sk->sk_policy[dir] = pol;
1140	if (pol) {
1141		pol->curlft.add_time = get_seconds();
1142		pol->index = xfrm_gen_index(pol->type, XFRM_POLICY_MAX+dir);
1143		__xfrm_policy_link(pol, XFRM_POLICY_MAX+dir);
1144	}
1145	if (old_pol)
1146		__xfrm_policy_unlink(old_pol, XFRM_POLICY_MAX+dir);
1147	write_unlock_bh(&xfrm_policy_lock);
1148
1149	if (old_pol) {
1150		xfrm_policy_kill(old_pol);
1151	}
1152	return 0;
1153}
1154
1155static struct xfrm_policy *clone_policy(struct xfrm_policy *old, int dir)
1156{
1157	struct xfrm_policy *newp = xfrm_policy_alloc(GFP_ATOMIC);
1158
1159	if (newp) {
1160		newp->selector = old->selector;
1161		if (security_xfrm_policy_clone(old->security,
1162					       &newp->security)) {
1163			kfree(newp);
1164			return NULL;  /* ENOMEM */
1165		}
1166		newp->lft = old->lft;
1167		newp->curlft = old->curlft;
1168		newp->action = old->action;
1169		newp->flags = old->flags;
1170		newp->xfrm_nr = old->xfrm_nr;
1171		newp->index = old->index;
1172		newp->type = old->type;
1173		memcpy(newp->xfrm_vec, old->xfrm_vec,
1174		       newp->xfrm_nr*sizeof(struct xfrm_tmpl));
1175		write_lock_bh(&xfrm_policy_lock);
1176		__xfrm_policy_link(newp, XFRM_POLICY_MAX+dir);
1177		write_unlock_bh(&xfrm_policy_lock);
1178		xfrm_pol_put(newp);
1179	}
1180	return newp;
1181}
1182
1183int __xfrm_sk_clone_policy(struct sock *sk)
1184{
1185	struct xfrm_policy *p0 = sk->sk_policy[0],
1186			   *p1 = sk->sk_policy[1];
1187
1188	sk->sk_policy[0] = sk->sk_policy[1] = NULL;
1189	if (p0 && (sk->sk_policy[0] = clone_policy(p0, 0)) == NULL)
1190		return -ENOMEM;
1191	if (p1 && (sk->sk_policy[1] = clone_policy(p1, 1)) == NULL)
1192		return -ENOMEM;
1193	return 0;
1194}
1195
1196static int
1197xfrm_get_saddr(xfrm_address_t *local, xfrm_address_t *remote,
1198	       unsigned short family)
1199{
1200	int err;
1201	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1202
1203	if (unlikely(afinfo == NULL))
1204		return -EINVAL;
1205	err = afinfo->get_saddr(local, remote);
1206	xfrm_policy_put_afinfo(afinfo);
1207	return err;
1208}
1209
1210/* Resolve list of templates for the flow, given policy. */
1211
1212static int
1213xfrm_tmpl_resolve_one(struct xfrm_policy *policy, struct flowi *fl,
1214		      struct xfrm_state **xfrm,
1215		      unsigned short family)
1216{
1217	int nx;
1218	int i, error;
1219	xfrm_address_t *daddr = xfrm_flowi_daddr(fl, family);
1220	xfrm_address_t *saddr = xfrm_flowi_saddr(fl, family);
1221	xfrm_address_t tmp;
1222
1223	for (nx=0, i = 0; i < policy->xfrm_nr; i++) {
1224		struct xfrm_state *x;
1225		xfrm_address_t *remote = daddr;
1226		xfrm_address_t *local  = saddr;
1227		struct xfrm_tmpl *tmpl = &policy->xfrm_vec[i];
1228
1229		if (tmpl->mode == XFRM_MODE_TUNNEL ||
1230		    tmpl->mode == XFRM_MODE_BEET) {
1231			remote = &tmpl->id.daddr;
1232			local = &tmpl->saddr;
1233			family = tmpl->encap_family;
1234			if (xfrm_addr_any(local, family)) {
1235				error = xfrm_get_saddr(&tmp, remote, family);
1236				if (error)
1237					goto fail;
1238				local = &tmp;
1239			}
1240		}
1241
1242		x = xfrm_state_find(remote, local, fl, tmpl, policy, &error, family);
1243
1244		if (x && x->km.state == XFRM_STATE_VALID) {
1245			xfrm[nx++] = x;
1246			daddr = remote;
1247			saddr = local;
1248			continue;
1249		}
1250		if (x) {
1251			error = (x->km.state == XFRM_STATE_ERROR ?
1252				 -EINVAL : -EAGAIN);
1253			xfrm_state_put(x);
1254		}
1255		else if (error == -ESRCH)
1256			error = -EAGAIN;
1257
1258		if (!tmpl->optional)
1259			goto fail;
1260	}
1261	return nx;
1262
1263fail:
1264	for (nx--; nx>=0; nx--)
1265		xfrm_state_put(xfrm[nx]);
1266	return error;
1267}
1268
1269static int
1270xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, struct flowi *fl,
1271		  struct xfrm_state **xfrm,
1272		  unsigned short family)
1273{
1274	struct xfrm_state *tp[XFRM_MAX_DEPTH];
1275	struct xfrm_state **tpp = (npols > 1) ? tp : xfrm;
1276	int cnx = 0;
1277	int error;
1278	int ret;
1279	int i;
1280
1281	for (i = 0; i < npols; i++) {
1282		if (cnx + pols[i]->xfrm_nr >= XFRM_MAX_DEPTH) {
1283			error = -ENOBUFS;
1284			goto fail;
1285		}
1286
1287		ret = xfrm_tmpl_resolve_one(pols[i], fl, &tpp[cnx], family);
1288		if (ret < 0) {
1289			error = ret;
1290			goto fail;
1291		} else
1292			cnx += ret;
1293	}
1294
1295	/* found states are sorted for outbound processing */
1296	if (npols > 1)
1297		xfrm_state_sort(xfrm, tpp, cnx, family);
1298
1299	return cnx;
1300
1301 fail:
1302	for (cnx--; cnx>=0; cnx--)
1303		xfrm_state_put(tpp[cnx]);
1304	return error;
1305
1306}
1307
1308/* Check that the bundle accepts the flow and its components are
1309 * still valid.
1310 */
1311
1312static struct dst_entry *
1313xfrm_find_bundle(struct flowi *fl, struct xfrm_policy *policy, unsigned short family)
1314{
1315	struct dst_entry *x;
1316	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1317	if (unlikely(afinfo == NULL))
1318		return ERR_PTR(-EINVAL);
1319	x = afinfo->find_bundle(fl, policy);
1320	xfrm_policy_put_afinfo(afinfo);
1321	return x;
1322}
1323
1324static inline int xfrm_get_tos(struct flowi *fl, int family)
1325{
1326	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1327	int tos;
1328
1329	if (!afinfo)
1330		return -EINVAL;
1331
1332	tos = afinfo->get_tos(fl);
1333
1334	xfrm_policy_put_afinfo(afinfo);
1335
1336	return tos;
1337}
1338
1339static inline struct xfrm_dst *xfrm_alloc_dst(int family)
1340{
1341	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1342	struct xfrm_dst *xdst;
1343
1344	if (!afinfo)
1345		return ERR_PTR(-EINVAL);
1346
1347	xdst = dst_alloc(afinfo->dst_ops) ?: ERR_PTR(-ENOBUFS);
1348
1349	xfrm_policy_put_afinfo(afinfo);
1350
1351	return xdst;
1352}
1353
1354static inline int xfrm_init_path(struct xfrm_dst *path, struct dst_entry *dst,
1355				 int nfheader_len)
1356{
1357	struct xfrm_policy_afinfo *afinfo =
1358		xfrm_policy_get_afinfo(dst->ops->family);
1359	int err;
1360
1361	if (!afinfo)
1362		return -EINVAL;
1363
1364	err = afinfo->init_path(path, dst, nfheader_len);
1365
1366	xfrm_policy_put_afinfo(afinfo);
1367
1368	return err;
1369}
1370
1371static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev)
1372{
1373	struct xfrm_policy_afinfo *afinfo =
1374		xfrm_policy_get_afinfo(xdst->u.dst.ops->family);
1375	int err;
1376
1377	if (!afinfo)
1378		return -EINVAL;
1379
1380	err = afinfo->fill_dst(xdst, dev);
1381
1382	xfrm_policy_put_afinfo(afinfo);
1383
1384	return err;
1385}
1386
1387/* Allocate chain of dst_entry's, attach known xfrm's, calculate
1388 * all the metrics... Shortly, bundle a bundle.
1389 */
1390
1391static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
1392					    struct xfrm_state **xfrm, int nx,
1393					    struct flowi *fl,
1394					    struct dst_entry *dst)
1395{
1396	unsigned long now = jiffies;
1397	struct net_device *dev;
1398	struct dst_entry *dst_prev = NULL;
1399	struct dst_entry *dst0 = NULL;
1400	int i = 0;
1401	int err;
1402	int header_len = 0;
1403	int nfheader_len = 0;
1404	int trailer_len = 0;
1405	int tos;
1406	int family = policy->selector.family;
1407	xfrm_address_t saddr, daddr;
1408
1409	xfrm_flowi_addr_get(fl, &saddr, &daddr, family);
1410
1411	tos = xfrm_get_tos(fl, family);
1412	err = tos;
1413	if (tos < 0)
1414		goto put_states;
1415
1416	dst_hold(dst);
1417
1418	for (; i < nx; i++) {
1419		struct xfrm_dst *xdst = xfrm_alloc_dst(family);
1420		struct dst_entry *dst1 = &xdst->u.dst;
1421
1422		err = PTR_ERR(xdst);
1423		if (IS_ERR(xdst)) {
1424			dst_release(dst);
1425			goto put_states;
1426		}
1427
1428		if (!dst_prev)
1429			dst0 = dst1;
1430		else {
1431			dst_prev->child = dst_clone(dst1);
1432			dst1->flags |= DST_NOHASH;
1433		}
1434
1435		xdst->route = dst;
1436		memcpy(&dst1->metrics, &dst->metrics, sizeof(dst->metrics));
1437
1438		if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) {
1439			family = xfrm[i]->props.family;
1440			dst = xfrm_dst_lookup(xfrm[i], tos, &saddr, &daddr,
1441					      family);
1442			err = PTR_ERR(dst);
1443			if (IS_ERR(dst))
1444				goto put_states;
1445		} else
1446			dst_hold(dst);
1447
1448		dst1->xfrm = xfrm[i];
1449		xdst->genid = xfrm[i]->genid;
1450
1451		dst1->obsolete = -1;
1452		dst1->flags |= DST_HOST;
1453		dst1->lastuse = now;
1454
1455		dst1->input = dst_discard;
1456		dst1->output = xfrm[i]->outer_mode->afinfo->output;
1457
1458		dst1->next = dst_prev;
1459		dst_prev = dst1;
1460
1461		header_len += xfrm[i]->props.header_len;
1462		if (xfrm[i]->type->flags & XFRM_TYPE_NON_FRAGMENT)
1463			nfheader_len += xfrm[i]->props.header_len;
1464		trailer_len += xfrm[i]->props.trailer_len;
1465	}
1466
1467	dst_prev->child = dst;
1468	dst0->path = dst;
1469
1470	err = -ENODEV;
1471	dev = dst->dev;
1472	if (!dev)
1473		goto free_dst;
1474
1475	/* Copy neighbout for reachability confirmation */
1476	dst0->neighbour = neigh_clone(dst->neighbour);
1477
1478	xfrm_init_path((struct xfrm_dst *)dst0, dst, nfheader_len);
1479	xfrm_init_pmtu(dst_prev);
1480
1481	for (dst_prev = dst0; dst_prev != dst; dst_prev = dst_prev->child) {
1482		struct xfrm_dst *xdst = (struct xfrm_dst *)dst_prev;
1483
1484		err = xfrm_fill_dst(xdst, dev);
1485		if (err)
1486			goto free_dst;
1487
1488		dst_prev->header_len = header_len;
1489		dst_prev->trailer_len = trailer_len;
1490		header_len -= xdst->u.dst.xfrm->props.header_len;
1491		trailer_len -= xdst->u.dst.xfrm->props.trailer_len;
1492	}
1493
1494out:
1495	return dst0;
1496
1497put_states:
1498	for (; i < nx; i++)
1499		xfrm_state_put(xfrm[i]);
1500free_dst:
1501	if (dst0)
1502		dst_free(dst0);
1503	dst0 = ERR_PTR(err);
1504	goto out;
1505}
1506
1507static int inline
1508xfrm_dst_alloc_copy(void **target, void *src, int size)
1509{
1510	if (!*target) {
1511		*target = kmalloc(size, GFP_ATOMIC);
1512		if (!*target)
1513			return -ENOMEM;
1514	}
1515	memcpy(*target, src, size);
1516	return 0;
1517}
1518
1519static int inline
1520xfrm_dst_update_parent(struct dst_entry *dst, struct xfrm_selector *sel)
1521{
1522#ifdef CONFIG_XFRM_SUB_POLICY
1523	struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
1524	return xfrm_dst_alloc_copy((void **)&(xdst->partner),
1525				   sel, sizeof(*sel));
1526#else
1527	return 0;
1528#endif
1529}
1530
1531static int inline
1532xfrm_dst_update_origin(struct dst_entry *dst, struct flowi *fl)
1533{
1534#ifdef CONFIG_XFRM_SUB_POLICY
1535	struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
1536	return xfrm_dst_alloc_copy((void **)&(xdst->origin), fl, sizeof(*fl));
1537#else
1538	return 0;
1539#endif
1540}
1541
1542static int stale_bundle(struct dst_entry *dst);
1543
1544/* Main function: finds/creates a bundle for given flow.
1545 *
1546 * At the moment we eat a raw IP route. Mostly to speed up lookups
1547 * on interfaces with disabled IPsec.
1548 */
1549int __xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl,
1550		  struct sock *sk, int flags)
1551{
1552	struct xfrm_policy *policy;
1553	struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
1554	int npols;
1555	int pol_dead;
1556	int xfrm_nr;
1557	int pi;
1558	struct xfrm_state *xfrm[XFRM_MAX_DEPTH];
1559	struct dst_entry *dst, *dst_orig = *dst_p;
1560	int nx = 0;
1561	int err;
1562	u32 genid;
1563	u16 family;
1564	u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT);
1565
1566restart:
1567	genid = atomic_read(&flow_cache_genid);
1568	policy = NULL;
1569	for (pi = 0; pi < ARRAY_SIZE(pols); pi++)
1570		pols[pi] = NULL;
1571	npols = 0;
1572	pol_dead = 0;
1573	xfrm_nr = 0;
1574
1575	if (sk && sk->sk_policy[XFRM_POLICY_OUT]) {
1576		policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl);
1577		err = PTR_ERR(policy);
1578		if (IS_ERR(policy)) {
1579			XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLERROR);
1580			goto dropdst;
1581		}
1582	}
1583
1584	if (!policy) {
1585		/* To accelerate a bit...  */
1586		if ((dst_orig->flags & DST_NOXFRM) ||
1587		    !xfrm_policy_count[XFRM_POLICY_OUT])
1588			goto nopol;
1589
1590		policy = flow_cache_lookup(fl, dst_orig->ops->family,
1591					   dir, xfrm_policy_lookup);
1592		err = PTR_ERR(policy);
1593		if (IS_ERR(policy)) {
1594			XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLERROR);
1595			goto dropdst;
1596		}
1597	}
1598
1599	if (!policy)
1600		goto nopol;
1601
1602	family = dst_orig->ops->family;
1603	pols[0] = policy;
1604	npols ++;
1605	xfrm_nr += pols[0]->xfrm_nr;
1606
1607	err = -ENOENT;
1608	if ((flags & XFRM_LOOKUP_ICMP) && !(policy->flags & XFRM_POLICY_ICMP))
1609		goto error;
1610
1611	policy->curlft.use_time = get_seconds();
1612
1613	switch (policy->action) {
1614	default:
1615	case XFRM_POLICY_BLOCK:
1616		/* Prohibit the flow */
1617		XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLBLOCK);
1618		err = -EPERM;
1619		goto error;
1620
1621	case XFRM_POLICY_ALLOW:
1622#ifndef CONFIG_XFRM_SUB_POLICY
1623		if (policy->xfrm_nr == 0) {
1624			/* Flow passes not transformed. */
1625			xfrm_pol_put(policy);
1626			return 0;
1627		}
1628#endif
1629
1630		/* Try to find matching bundle.
1631		 *
1632		 * LATER: help from flow cache. It is optional, this
1633		 * is required only for output policy.
1634		 */
1635		dst = xfrm_find_bundle(fl, policy, family);
1636		if (IS_ERR(dst)) {
1637			XFRM_INC_STATS(LINUX_MIB_XFRMOUTBUNDLECHECKERROR);
1638			err = PTR_ERR(dst);
1639			goto error;
1640		}
1641
1642		if (dst)
1643			break;
1644
1645#ifdef CONFIG_XFRM_SUB_POLICY
1646		if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) {
1647			pols[1] = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN,
1648							    fl, family,
1649							    XFRM_POLICY_OUT);
1650			if (pols[1]) {
1651				if (IS_ERR(pols[1])) {
1652					XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLERROR);
1653					err = PTR_ERR(pols[1]);
1654					goto error;
1655				}
1656				if (pols[1]->action == XFRM_POLICY_BLOCK) {
1657					XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLBLOCK);
1658					err = -EPERM;
1659					goto error;
1660				}
1661				npols ++;
1662				xfrm_nr += pols[1]->xfrm_nr;
1663			}
1664		}
1665
1666		/*
1667		 * Because neither flowi nor bundle information knows about
1668		 * transformation template size. On more than one policy usage
1669		 * we can realize whether all of them is bypass or not after
1670		 * they are searched. See above not-transformed bypass
1671		 * is surrounded by non-sub policy configuration, too.
1672		 */
1673		if (xfrm_nr == 0) {
1674			/* Flow passes not transformed. */
1675			xfrm_pols_put(pols, npols);
1676			return 0;
1677		}
1678
1679#endif
1680		nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family);
1681
1682		if (unlikely(nx<0)) {
1683			err = nx;
1684			if (err == -EAGAIN && sysctl_xfrm_larval_drop) {
1685				/* EREMOTE tells the caller to generate
1686				 * a one-shot blackhole route.
1687				 */
1688				XFRM_INC_STATS(LINUX_MIB_XFRMOUTNOSTATES);
1689				xfrm_pol_put(policy);
1690				return -EREMOTE;
1691			}
1692			if (err == -EAGAIN && (flags & XFRM_LOOKUP_WAIT)) {
1693				DECLARE_WAITQUEUE(wait, current);
1694
1695				add_wait_queue(&km_waitq, &wait);
1696				set_current_state(TASK_INTERRUPTIBLE);
1697				schedule();
1698				set_current_state(TASK_RUNNING);
1699				remove_wait_queue(&km_waitq, &wait);
1700
1701				nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family);
1702
1703				if (nx == -EAGAIN && signal_pending(current)) {
1704					XFRM_INC_STATS(LINUX_MIB_XFRMOUTNOSTATES);
1705					err = -ERESTART;
1706					goto error;
1707				}
1708				if (nx == -EAGAIN ||
1709				    genid != atomic_read(&flow_cache_genid)) {
1710					xfrm_pols_put(pols, npols);
1711					goto restart;
1712				}
1713				err = nx;
1714			}
1715			if (err < 0) {
1716				XFRM_INC_STATS(LINUX_MIB_XFRMOUTNOSTATES);
1717				goto error;
1718			}
1719		}
1720		if (nx == 0) {
1721			/* Flow passes not transformed. */
1722			xfrm_pols_put(pols, npols);
1723			return 0;
1724		}
1725
1726		dst = xfrm_bundle_create(policy, xfrm, nx, fl, dst_orig);
1727		err = PTR_ERR(dst);
1728		if (IS_ERR(dst)) {
1729			XFRM_INC_STATS(LINUX_MIB_XFRMOUTBUNDLEGENERROR);
1730			goto error;
1731		}
1732
1733		for (pi = 0; pi < npols; pi++) {
1734			read_lock_bh(&pols[pi]->lock);
1735			pol_dead |= pols[pi]->walk.dead;
1736			read_unlock_bh(&pols[pi]->lock);
1737		}
1738
1739		write_lock_bh(&policy->lock);
1740		if (unlikely(pol_dead || stale_bundle(dst))) {
1741			/* Wow! While we worked on resolving, this
1742			 * policy has gone. Retry. It is not paranoia,
1743			 * we just cannot enlist new bundle to dead object.
1744			 * We can't enlist stable bundles either.
1745			 */
1746			write_unlock_bh(&policy->lock);
1747			dst_free(dst);
1748
1749			if (pol_dead)
1750				XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLDEAD);
1751			else
1752				XFRM_INC_STATS(LINUX_MIB_XFRMOUTBUNDLECHECKERROR);
1753			err = -EHOSTUNREACH;
1754			goto error;
1755		}
1756
1757		if (npols > 1)
1758			err = xfrm_dst_update_parent(dst, &pols[1]->selector);
1759		else
1760			err = xfrm_dst_update_origin(dst, fl);
1761		if (unlikely(err)) {
1762			write_unlock_bh(&policy->lock);
1763			dst_free(dst);
1764			XFRM_INC_STATS(LINUX_MIB_XFRMOUTBUNDLECHECKERROR);
1765			goto error;
1766		}
1767
1768		dst->next = policy->bundles;
1769		policy->bundles = dst;
1770		dst_hold(dst);
1771		write_unlock_bh(&policy->lock);
1772	}
1773	*dst_p = dst;
1774	dst_release(dst_orig);
1775	xfrm_pols_put(pols, npols);
1776	return 0;
1777
1778error:
1779	xfrm_pols_put(pols, npols);
1780dropdst:
1781	dst_release(dst_orig);
1782	*dst_p = NULL;
1783	return err;
1784
1785nopol:
1786	err = -ENOENT;
1787	if (flags & XFRM_LOOKUP_ICMP)
1788		goto dropdst;
1789	return 0;
1790}
1791EXPORT_SYMBOL(__xfrm_lookup);
1792
1793int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl,
1794		struct sock *sk, int flags)
1795{
1796	int err = __xfrm_lookup(dst_p, fl, sk, flags);
1797
1798	if (err == -EREMOTE) {
1799		dst_release(*dst_p);
1800		*dst_p = NULL;
1801		err = -EAGAIN;
1802	}
1803
1804	return err;
1805}
1806EXPORT_SYMBOL(xfrm_lookup);
1807
1808static inline int
1809xfrm_secpath_reject(int idx, struct sk_buff *skb, struct flowi *fl)
1810{
1811	struct xfrm_state *x;
1812
1813	if (!skb->sp || idx < 0 || idx >= skb->sp->len)
1814		return 0;
1815	x = skb->sp->xvec[idx];
1816	if (!x->type->reject)
1817		return 0;
1818	return x->type->reject(x, skb, fl);
1819}
1820
1821/* When skb is transformed back to its "native" form, we have to
1822 * check policy restrictions. At the moment we make this in maximally
1823 * stupid way. Shame on me. :-) Of course, connected sockets must
1824 * have policy cached at them.
1825 */
1826
1827static inline int
1828xfrm_state_ok(struct xfrm_tmpl *tmpl, struct xfrm_state *x,
1829	      unsigned short family)
1830{
1831	if (xfrm_state_kern(x))
1832		return tmpl->optional && !xfrm_state_addr_cmp(tmpl, x, tmpl->encap_family);
1833	return	x->id.proto == tmpl->id.proto &&
1834		(x->id.spi == tmpl->id.spi || !tmpl->id.spi) &&
1835		(x->props.reqid == tmpl->reqid || !tmpl->reqid) &&
1836		x->props.mode == tmpl->mode &&
1837		(tmpl->allalgs || (tmpl->aalgos & (1<<x->props.aalgo)) ||
1838		 !(xfrm_id_proto_match(tmpl->id.proto, IPSEC_PROTO_ANY))) &&
1839		!(x->props.mode != XFRM_MODE_TRANSPORT &&
1840		  xfrm_state_addr_cmp(tmpl, x, family));
1841}
1842
1843/*
1844 * 0 or more than 0 is returned when validation is succeeded (either bypass
1845 * because of optional transport mode, or next index of the mathced secpath
1846 * state with the template.
1847 * -1 is returned when no matching template is found.
1848 * Otherwise "-2 - errored_index" is returned.
1849 */
1850static inline int
1851xfrm_policy_ok(struct xfrm_tmpl *tmpl, struct sec_path *sp, int start,
1852	       unsigned short family)
1853{
1854	int idx = start;
1855
1856	if (tmpl->optional) {
1857		if (tmpl->mode == XFRM_MODE_TRANSPORT)
1858			return start;
1859	} else
1860		start = -1;
1861	for (; idx < sp->len; idx++) {
1862		if (xfrm_state_ok(tmpl, sp->xvec[idx], family))
1863			return ++idx;
1864		if (sp->xvec[idx]->props.mode != XFRM_MODE_TRANSPORT) {
1865			if (start == -1)
1866				start = -2-idx;
1867			break;
1868		}
1869	}
1870	return start;
1871}
1872
1873int __xfrm_decode_session(struct sk_buff *skb, struct flowi *fl,
1874			  unsigned int family, int reverse)
1875{
1876	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1877	int err;
1878
1879	if (unlikely(afinfo == NULL))
1880		return -EAFNOSUPPORT;
1881
1882	afinfo->decode_session(skb, fl, reverse);
1883	err = security_xfrm_decode_session(skb, &fl->secid);
1884	xfrm_policy_put_afinfo(afinfo);
1885	return err;
1886}
1887EXPORT_SYMBOL(__xfrm_decode_session);
1888
1889static inline int secpath_has_nontransport(struct sec_path *sp, int k, int *idxp)
1890{
1891	for (; k < sp->len; k++) {
1892		if (sp->xvec[k]->props.mode != XFRM_MODE_TRANSPORT) {
1893			*idxp = k;
1894			return 1;
1895		}
1896	}
1897
1898	return 0;
1899}
1900
1901int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
1902			unsigned short family)
1903{
1904	struct xfrm_policy *pol;
1905	struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
1906	int npols = 0;
1907	int xfrm_nr;
1908	int pi;
1909	int reverse;
1910	struct flowi fl;
1911	u8 fl_dir;
1912	int xerr_idx = -1;
1913
1914	reverse = dir & ~XFRM_POLICY_MASK;
1915	dir &= XFRM_POLICY_MASK;
1916	fl_dir = policy_to_flow_dir(dir);
1917
1918	if (__xfrm_decode_session(skb, &fl, family, reverse) < 0) {
1919		XFRM_INC_STATS(LINUX_MIB_XFRMINHDRERROR);
1920		return 0;
1921	}
1922
1923	nf_nat_decode_session(skb, &fl, family);
1924
1925	/* First, check used SA against their selectors. */
1926	if (skb->sp) {
1927		int i;
1928
1929		for (i=skb->sp->len-1; i>=0; i--) {
1930			struct xfrm_state *x = skb->sp->xvec[i];
1931			if (!xfrm_selector_match(&x->sel, &fl, family)) {
1932				XFRM_INC_STATS(LINUX_MIB_XFRMINSTATEMISMATCH);
1933				return 0;
1934			}
1935		}
1936	}
1937
1938	pol = NULL;
1939	if (sk && sk->sk_policy[dir]) {
1940		pol = xfrm_sk_policy_lookup(sk, dir, &fl);
1941		if (IS_ERR(pol)) {
1942			XFRM_INC_STATS(LINUX_MIB_XFRMINPOLERROR);
1943			return 0;
1944		}
1945	}
1946
1947	if (!pol)
1948		pol = flow_cache_lookup(&fl, family, fl_dir,
1949					xfrm_policy_lookup);
1950
1951	if (IS_ERR(pol)) {
1952		XFRM_INC_STATS(LINUX_MIB_XFRMINPOLERROR);
1953		return 0;
1954	}
1955
1956	if (!pol) {
1957		if (skb->sp && secpath_has_nontransport(skb->sp, 0, &xerr_idx)) {
1958			xfrm_secpath_reject(xerr_idx, skb, &fl);
1959			XFRM_INC_STATS(LINUX_MIB_XFRMINNOPOLS);
1960			return 0;
1961		}
1962		return 1;
1963	}
1964
1965	pol->curlft.use_time = get_seconds();
1966
1967	pols[0] = pol;
1968	npols ++;
1969#ifdef CONFIG_XFRM_SUB_POLICY
1970	if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) {
1971		pols[1] = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN,
1972						    &fl, family,
1973						    XFRM_POLICY_IN);
1974		if (pols[1]) {
1975			if (IS_ERR(pols[1])) {
1976				XFRM_INC_STATS(LINUX_MIB_XFRMINPOLERROR);
1977				return 0;
1978			}
1979			pols[1]->curlft.use_time = get_seconds();
1980			npols ++;
1981		}
1982	}
1983#endif
1984
1985	if (pol->action == XFRM_POLICY_ALLOW) {
1986		struct sec_path *sp;
1987		static struct sec_path dummy;
1988		struct xfrm_tmpl *tp[XFRM_MAX_DEPTH];
1989		struct xfrm_tmpl *stp[XFRM_MAX_DEPTH];
1990		struct xfrm_tmpl **tpp = tp;
1991		int ti = 0;
1992		int i, k;
1993
1994		if ((sp = skb->sp) == NULL)
1995			sp = &dummy;
1996
1997		for (pi = 0; pi < npols; pi++) {
1998			if (pols[pi] != pol &&
1999			    pols[pi]->action != XFRM_POLICY_ALLOW) {
2000				XFRM_INC_STATS(LINUX_MIB_XFRMINPOLBLOCK);
2001				goto reject;
2002			}
2003			if (ti + pols[pi]->xfrm_nr >= XFRM_MAX_DEPTH) {
2004				XFRM_INC_STATS(LINUX_MIB_XFRMINBUFFERERROR);
2005				goto reject_error;
2006			}
2007			for (i = 0; i < pols[pi]->xfrm_nr; i++)
2008				tpp[ti++] = &pols[pi]->xfrm_vec[i];
2009		}
2010		xfrm_nr = ti;
2011		if (npols > 1) {
2012			xfrm_tmpl_sort(stp, tpp, xfrm_nr, family);
2013			tpp = stp;
2014		}
2015
2016		/* For each tunnel xfrm, find the first matching tmpl.
2017		 * For each tmpl before that, find corresponding xfrm.
2018		 * Order is _important_. Later we will implement
2019		 * some barriers, but at the moment barriers
2020		 * are implied between each two transformations.
2021		 */
2022		for (i = xfrm_nr-1, k = 0; i >= 0; i--) {
2023			k = xfrm_policy_ok(tpp[i], sp, k, family);
2024			if (k < 0) {
2025				if (k < -1)
2026					/* "-2 - errored_index" returned */
2027					xerr_idx = -(2+k);
2028				XFRM_INC_STATS(LINUX_MIB_XFRMINTMPLMISMATCH);
2029				goto reject;
2030			}
2031		}
2032
2033		if (secpath_has_nontransport(sp, k, &xerr_idx)) {
2034			XFRM_INC_STATS(LINUX_MIB_XFRMINTMPLMISMATCH);
2035			goto reject;
2036		}
2037
2038		xfrm_pols_put(pols, npols);
2039		return 1;
2040	}
2041	XFRM_INC_STATS(LINUX_MIB_XFRMINPOLBLOCK);
2042
2043reject:
2044	xfrm_secpath_reject(xerr_idx, skb, &fl);
2045reject_error:
2046	xfrm_pols_put(pols, npols);
2047	return 0;
2048}
2049EXPORT_SYMBOL(__xfrm_policy_check);
2050
2051int __xfrm_route_forward(struct sk_buff *skb, unsigned short family)
2052{
2053	struct flowi fl;
2054
2055	if (xfrm_decode_session(skb, &fl, family) < 0) {
2056		/* XXX: we should have something like FWDHDRERROR here. */
2057		XFRM_INC_STATS(LINUX_MIB_XFRMINHDRERROR);
2058		return 0;
2059	}
2060
2061	return xfrm_lookup(&skb->dst, &fl, NULL, 0) == 0;
2062}
2063EXPORT_SYMBOL(__xfrm_route_forward);
2064
2065/* Optimize later using cookies and generation ids. */
2066
2067static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie)
2068{
2069	/* Code (such as __xfrm4_bundle_create()) sets dst->obsolete
2070	 * to "-1" to force all XFRM destinations to get validated by
2071	 * dst_ops->check on every use.  We do this because when a
2072	 * normal route referenced by an XFRM dst is obsoleted we do
2073	 * not go looking around for all parent referencing XFRM dsts
2074	 * so that we can invalidate them.  It is just too much work.
2075	 * Instead we make the checks here on every use.  For example:
2076	 *
2077	 *	XFRM dst A --> IPv4 dst X
2078	 *
2079	 * X is the "xdst->route" of A (X is also the "dst->path" of A
2080	 * in this example).  If X is marked obsolete, "A" will not
2081	 * notice.  That's what we are validating here via the
2082	 * stale_bundle() check.
2083	 *
2084	 * When a policy's bundle is pruned, we dst_free() the XFRM
2085	 * dst which causes it's ->obsolete field to be set to a
2086	 * positive non-zero integer.  If an XFRM dst has been pruned
2087	 * like this, we want to force a new route lookup.
2088	 */
2089	if (dst->obsolete < 0 && !stale_bundle(dst))
2090		return dst;
2091
2092	return NULL;
2093}
2094
2095static int stale_bundle(struct dst_entry *dst)
2096{
2097	return !xfrm_bundle_ok(NULL, (struct xfrm_dst *)dst, NULL, AF_UNSPEC, 0);
2098}
2099
2100void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev)
2101{
2102	while ((dst = dst->child) && dst->xfrm && dst->dev == dev) {
2103		dst->dev = dev_net(dev)->loopback_dev;
2104		dev_hold(dst->dev);
2105		dev_put(dev);
2106	}
2107}
2108EXPORT_SYMBOL(xfrm_dst_ifdown);
2109
2110static void xfrm_link_failure(struct sk_buff *skb)
2111{
2112	/* Impossible. Such dst must be popped before reaches point of failure. */
2113	return;
2114}
2115
2116static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst)
2117{
2118	if (dst) {
2119		if (dst->obsolete) {
2120			dst_release(dst);
2121			dst = NULL;
2122		}
2123	}
2124	return dst;
2125}
2126
2127static void prune_one_bundle(struct xfrm_policy *pol, int (*func)(struct dst_entry *), struct dst_entry **gc_list_p)
2128{
2129	struct dst_entry *dst, **dstp;
2130
2131	write_lock(&pol->lock);
2132	dstp = &pol->bundles;
2133	while ((dst=*dstp) != NULL) {
2134		if (func(dst)) {
2135			*dstp = dst->next;
2136			dst->next = *gc_list_p;
2137			*gc_list_p = dst;
2138		} else {
2139			dstp = &dst->next;
2140		}
2141	}
2142	write_unlock(&pol->lock);
2143}
2144
2145static void xfrm_prune_bundles(int (*func)(struct dst_entry *))
2146{
2147	struct dst_entry *gc_list = NULL;
2148	int dir;
2149
2150	read_lock_bh(&xfrm_policy_lock);
2151	for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
2152		struct xfrm_policy *pol;
2153		struct hlist_node *entry;
2154		struct hlist_head *table;
2155		int i;
2156
2157		hlist_for_each_entry(pol, entry,
2158				     &xfrm_policy_inexact[dir], bydst)
2159			prune_one_bundle(pol, func, &gc_list);
2160
2161		table = xfrm_policy_bydst[dir].table;
2162		for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) {
2163			hlist_for_each_entry(pol, entry, table + i, bydst)
2164				prune_one_bundle(pol, func, &gc_list);
2165		}
2166	}
2167	read_unlock_bh(&xfrm_policy_lock);
2168
2169	while (gc_list) {
2170		struct dst_entry *dst = gc_list;
2171		gc_list = dst->next;
2172		dst_free(dst);
2173	}
2174}
2175
2176static int unused_bundle(struct dst_entry *dst)
2177{
2178	return !atomic_read(&dst->__refcnt);
2179}
2180
2181static void __xfrm_garbage_collect(void)
2182{
2183	xfrm_prune_bundles(unused_bundle);
2184}
2185
2186static int xfrm_flush_bundles(void)
2187{
2188	xfrm_prune_bundles(stale_bundle);
2189	return 0;
2190}
2191
2192static void xfrm_init_pmtu(struct dst_entry *dst)
2193{
2194	do {
2195		struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
2196		u32 pmtu, route_mtu_cached;
2197
2198		pmtu = dst_mtu(dst->child);
2199		xdst->child_mtu_cached = pmtu;
2200
2201		pmtu = xfrm_state_mtu(dst->xfrm, pmtu);
2202
2203		route_mtu_cached = dst_mtu(xdst->route);
2204		xdst->route_mtu_cached = route_mtu_cached;
2205
2206		if (pmtu > route_mtu_cached)
2207			pmtu = route_mtu_cached;
2208
2209		dst->metrics[RTAX_MTU-1] = pmtu;
2210	} while ((dst = dst->next));
2211}
2212
2213/* Check that the bundle accepts the flow and its components are
2214 * still valid.
2215 */
2216
2217int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first,
2218		struct flowi *fl, int family, int strict)
2219{
2220	struct dst_entry *dst = &first->u.dst;
2221	struct xfrm_dst *last;
2222	u32 mtu;
2223
2224	if (!dst_check(dst->path, ((struct xfrm_dst *)dst)->path_cookie) ||
2225	    (dst->dev && !netif_running(dst->dev)))
2226		return 0;
2227#ifdef CONFIG_XFRM_SUB_POLICY
2228	if (fl) {
2229		if (first->origin && !flow_cache_uli_match(first->origin, fl))
2230			return 0;
2231		if (first->partner &&
2232		    !xfrm_selector_match(first->partner, fl, family))
2233			return 0;
2234	}
2235#endif
2236
2237	last = NULL;
2238
2239	do {
2240		struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
2241
2242		if (fl && !xfrm_selector_match(&dst->xfrm->sel, fl, family))
2243			return 0;
2244		if (fl && pol &&
2245		    !security_xfrm_state_pol_flow_match(dst->xfrm, pol, fl))
2246			return 0;
2247		if (dst->xfrm->km.state != XFRM_STATE_VALID)
2248			return 0;
2249		if (xdst->genid != dst->xfrm->genid)
2250			return 0;
2251
2252		if (strict && fl &&
2253		    !(dst->xfrm->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL) &&
2254		    !xfrm_state_addr_flow_check(dst->xfrm, fl, family))
2255			return 0;
2256
2257		mtu = dst_mtu(dst->child);
2258		if (xdst->child_mtu_cached != mtu) {
2259			last = xdst;
2260			xdst->child_mtu_cached = mtu;
2261		}
2262
2263		if (!dst_check(xdst->route, xdst->route_cookie))
2264			return 0;
2265		mtu = dst_mtu(xdst->route);
2266		if (xdst->route_mtu_cached != mtu) {
2267			last = xdst;
2268			xdst->route_mtu_cached = mtu;
2269		}
2270
2271		dst = dst->child;
2272	} while (dst->xfrm);
2273
2274	if (likely(!last))
2275		return 1;
2276
2277	mtu = last->child_mtu_cached;
2278	for (;;) {
2279		dst = &last->u.dst;
2280
2281		mtu = xfrm_state_mtu(dst->xfrm, mtu);
2282		if (mtu > last->route_mtu_cached)
2283			mtu = last->route_mtu_cached;
2284		dst->metrics[RTAX_MTU-1] = mtu;
2285
2286		if (last == first)
2287			break;
2288
2289		last = (struct xfrm_dst *)last->u.dst.next;
2290		last->child_mtu_cached = mtu;
2291	}
2292
2293	return 1;
2294}
2295
2296EXPORT_SYMBOL(xfrm_bundle_ok);
2297
2298int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)
2299{
2300	int err = 0;
2301	if (unlikely(afinfo == NULL))
2302		return -EINVAL;
2303	if (unlikely(afinfo->family >= NPROTO))
2304		return -EAFNOSUPPORT;
2305	write_lock_bh(&xfrm_policy_afinfo_lock);
2306	if (unlikely(xfrm_policy_afinfo[afinfo->family] != NULL))
2307		err = -ENOBUFS;
2308	else {
2309		struct dst_ops *dst_ops = afinfo->dst_ops;
2310		if (likely(dst_ops->kmem_cachep == NULL))
2311			dst_ops->kmem_cachep = xfrm_dst_cache;
2312		if (likely(dst_ops->check == NULL))
2313			dst_ops->check = xfrm_dst_check;
2314		if (likely(dst_ops->negative_advice == NULL))
2315			dst_ops->negative_advice = xfrm_negative_advice;
2316		if (likely(dst_ops->link_failure == NULL))
2317			dst_ops->link_failure = xfrm_link_failure;
2318		if (likely(afinfo->garbage_collect == NULL))
2319			afinfo->garbage_collect = __xfrm_garbage_collect;
2320		xfrm_policy_afinfo[afinfo->family] = afinfo;
2321	}
2322	write_unlock_bh(&xfrm_policy_afinfo_lock);
2323	return err;
2324}
2325EXPORT_SYMBOL(xfrm_policy_register_afinfo);
2326
2327int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo)
2328{
2329	int err = 0;
2330	if (unlikely(afinfo == NULL))
2331		return -EINVAL;
2332	if (unlikely(afinfo->family >= NPROTO))
2333		return -EAFNOSUPPORT;
2334	write_lock_bh(&xfrm_policy_afinfo_lock);
2335	if (likely(xfrm_policy_afinfo[afinfo->family] != NULL)) {
2336		if (unlikely(xfrm_policy_afinfo[afinfo->family] != afinfo))
2337			err = -EINVAL;
2338		else {
2339			struct dst_ops *dst_ops = afinfo->dst_ops;
2340			xfrm_policy_afinfo[afinfo->family] = NULL;
2341			dst_ops->kmem_cachep = NULL;
2342			dst_ops->check = NULL;
2343			dst_ops->negative_advice = NULL;
2344			dst_ops->link_failure = NULL;
2345			afinfo->garbage_collect = NULL;
2346		}
2347	}
2348	write_unlock_bh(&xfrm_policy_afinfo_lock);
2349	return err;
2350}
2351EXPORT_SYMBOL(xfrm_policy_unregister_afinfo);
2352
2353static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family)
2354{
2355	struct xfrm_policy_afinfo *afinfo;
2356	if (unlikely(family >= NPROTO))
2357		return NULL;
2358	read_lock(&xfrm_policy_afinfo_lock);
2359	afinfo = xfrm_policy_afinfo[family];
2360	if (unlikely(!afinfo))
2361		read_unlock(&xfrm_policy_afinfo_lock);
2362	return afinfo;
2363}
2364
2365static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo)
2366{
2367	read_unlock(&xfrm_policy_afinfo_lock);
2368}
2369
2370static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr)
2371{
2372	struct net_device *dev = ptr;
2373
2374	if (!net_eq(dev_net(dev), &init_net))
2375		return NOTIFY_DONE;
2376
2377	switch (event) {
2378	case NETDEV_DOWN:
2379		xfrm_flush_bundles();
2380	}
2381	return NOTIFY_DONE;
2382}
2383
2384static struct notifier_block xfrm_dev_notifier = {
2385	xfrm_dev_event,
2386	NULL,
2387	0
2388};
2389
2390#ifdef CONFIG_XFRM_STATISTICS
2391static int __init xfrm_statistics_init(void)
2392{
2393	if (snmp_mib_init((void **)xfrm_statistics,
2394			  sizeof(struct linux_xfrm_mib)) < 0)
2395		return -ENOMEM;
2396	return 0;
2397}
2398#endif
2399
2400static void __init xfrm_policy_init(void)
2401{
2402	unsigned int hmask, sz;
2403	int dir;
2404
2405	xfrm_dst_cache = kmem_cache_create("xfrm_dst_cache",
2406					   sizeof(struct xfrm_dst),
2407					   0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
2408					   NULL);
2409
2410	hmask = 8 - 1;
2411	sz = (hmask+1) * sizeof(struct hlist_head);
2412
2413	xfrm_policy_byidx = xfrm_hash_alloc(sz);
2414	xfrm_idx_hmask = hmask;
2415	if (!xfrm_policy_byidx)
2416		panic("XFRM: failed to allocate byidx hash\n");
2417
2418	for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
2419		struct xfrm_policy_hash *htab;
2420
2421		INIT_HLIST_HEAD(&xfrm_policy_inexact[dir]);
2422
2423		htab = &xfrm_policy_bydst[dir];
2424		htab->table = xfrm_hash_alloc(sz);
2425		htab->hmask = hmask;
2426		if (!htab->table)
2427			panic("XFRM: failed to allocate bydst hash\n");
2428	}
2429
2430	INIT_LIST_HEAD(&xfrm_policy_all);
2431	INIT_WORK(&xfrm_policy_gc_work, xfrm_policy_gc_task);
2432	register_netdevice_notifier(&xfrm_dev_notifier);
2433}
2434
2435void __init xfrm_init(void)
2436{
2437#ifdef CONFIG_XFRM_STATISTICS
2438	xfrm_statistics_init();
2439#endif
2440	xfrm_state_init();
2441	xfrm_policy_init();
2442	xfrm_input_init();
2443#ifdef CONFIG_XFRM_STATISTICS
2444	xfrm_proc_init();
2445#endif
2446}
2447
2448#ifdef CONFIG_AUDITSYSCALL
2449static void xfrm_audit_common_policyinfo(struct xfrm_policy *xp,
2450					 struct audit_buffer *audit_buf)
2451{
2452	struct xfrm_sec_ctx *ctx = xp->security;
2453	struct xfrm_selector *sel = &xp->selector;
2454
2455	if (ctx)
2456		audit_log_format(audit_buf, " sec_alg=%u sec_doi=%u sec_obj=%s",
2457				 ctx->ctx_alg, ctx->ctx_doi, ctx->ctx_str);
2458
2459	switch(sel->family) {
2460	case AF_INET:
2461		audit_log_format(audit_buf, " src=" NIPQUAD_FMT,
2462				 NIPQUAD(sel->saddr.a4));
2463		if (sel->prefixlen_s != 32)
2464			audit_log_format(audit_buf, " src_prefixlen=%d",
2465					 sel->prefixlen_s);
2466		audit_log_format(audit_buf, " dst=" NIPQUAD_FMT,
2467				 NIPQUAD(sel->daddr.a4));
2468		if (sel->prefixlen_d != 32)
2469			audit_log_format(audit_buf, " dst_prefixlen=%d",
2470					 sel->prefixlen_d);
2471		break;
2472	case AF_INET6:
2473		audit_log_format(audit_buf, " src=" NIP6_FMT,
2474				 NIP6(*(struct in6_addr *)sel->saddr.a6));
2475		if (sel->prefixlen_s != 128)
2476			audit_log_format(audit_buf, " src_prefixlen=%d",
2477					 sel->prefixlen_s);
2478		audit_log_format(audit_buf, " dst=" NIP6_FMT,
2479				 NIP6(*(struct in6_addr *)sel->daddr.a6));
2480		if (sel->prefixlen_d != 128)
2481			audit_log_format(audit_buf, " dst_prefixlen=%d",
2482					 sel->prefixlen_d);
2483		break;
2484	}
2485}
2486
2487void xfrm_audit_policy_add(struct xfrm_policy *xp, int result,
2488			   uid_t auid, u32 sessionid, u32 secid)
2489{
2490	struct audit_buffer *audit_buf;
2491
2492	audit_buf = xfrm_audit_start("SPD-add");
2493	if (audit_buf == NULL)
2494		return;
2495	xfrm_audit_helper_usrinfo(auid, sessionid, secid, audit_buf);
2496	audit_log_format(audit_buf, " res=%u", result);
2497	xfrm_audit_common_policyinfo(xp, audit_buf);
2498	audit_log_end(audit_buf);
2499}
2500EXPORT_SYMBOL_GPL(xfrm_audit_policy_add);
2501
2502void xfrm_audit_policy_delete(struct xfrm_policy *xp, int result,
2503			      uid_t auid, u32 sessionid, u32 secid)
2504{
2505	struct audit_buffer *audit_buf;
2506
2507	audit_buf = xfrm_audit_start("SPD-delete");
2508	if (audit_buf == NULL)
2509		return;
2510	xfrm_audit_helper_usrinfo(auid, sessionid, secid, audit_buf);
2511	audit_log_format(audit_buf, " res=%u", result);
2512	xfrm_audit_common_policyinfo(xp, audit_buf);
2513	audit_log_end(audit_buf);
2514}
2515EXPORT_SYMBOL_GPL(xfrm_audit_policy_delete);
2516#endif
2517
2518#ifdef CONFIG_XFRM_MIGRATE
2519static int xfrm_migrate_selector_match(struct xfrm_selector *sel_cmp,
2520				       struct xfrm_selector *sel_tgt)
2521{
2522	if (sel_cmp->proto == IPSEC_ULPROTO_ANY) {
2523		if (sel_tgt->family == sel_cmp->family &&
2524		    xfrm_addr_cmp(&sel_tgt->daddr, &sel_cmp->daddr,
2525				  sel_cmp->family) == 0 &&
2526		    xfrm_addr_cmp(&sel_tgt->saddr, &sel_cmp->saddr,
2527				  sel_cmp->family) == 0 &&
2528		    sel_tgt->prefixlen_d == sel_cmp->prefixlen_d &&
2529		    sel_tgt->prefixlen_s == sel_cmp->prefixlen_s) {
2530			return 1;
2531		}
2532	} else {
2533		if (memcmp(sel_tgt, sel_cmp, sizeof(*sel_tgt)) == 0) {
2534			return 1;
2535		}
2536	}
2537	return 0;
2538}
2539
2540static struct xfrm_policy * xfrm_migrate_policy_find(struct xfrm_selector *sel,
2541						     u8 dir, u8 type)
2542{
2543	struct xfrm_policy *pol, *ret = NULL;
2544	struct hlist_node *entry;
2545	struct hlist_head *chain;
2546	u32 priority = ~0U;
2547
2548	read_lock_bh(&xfrm_policy_lock);
2549	chain = policy_hash_direct(&sel->daddr, &sel->saddr, sel->family, dir);
2550	hlist_for_each_entry(pol, entry, chain, bydst) {
2551		if (xfrm_migrate_selector_match(sel, &pol->selector) &&
2552		    pol->type == type) {
2553			ret = pol;
2554			priority = ret->priority;
2555			break;
2556		}
2557	}
2558	chain = &xfrm_policy_inexact[dir];
2559	hlist_for_each_entry(pol, entry, chain, bydst) {
2560		if (xfrm_migrate_selector_match(sel, &pol->selector) &&
2561		    pol->type == type &&
2562		    pol->priority < priority) {
2563			ret = pol;
2564			break;
2565		}
2566	}
2567
2568	if (ret)
2569		xfrm_pol_hold(ret);
2570
2571	read_unlock_bh(&xfrm_policy_lock);
2572
2573	return ret;
2574}
2575
2576static int migrate_tmpl_match(struct xfrm_migrate *m, struct xfrm_tmpl *t)
2577{
2578	int match = 0;
2579
2580	if (t->mode == m->mode && t->id.proto == m->proto &&
2581	    (m->reqid == 0 || t->reqid == m->reqid)) {
2582		switch (t->mode) {
2583		case XFRM_MODE_TUNNEL:
2584		case XFRM_MODE_BEET:
2585			if (xfrm_addr_cmp(&t->id.daddr, &m->old_daddr,
2586					  m->old_family) == 0 &&
2587			    xfrm_addr_cmp(&t->saddr, &m->old_saddr,
2588					  m->old_family) == 0) {
2589				match = 1;
2590			}
2591			break;
2592		case XFRM_MODE_TRANSPORT:
2593			/* in case of transport mode, template does not store
2594			   any IP addresses, hence we just compare mode and
2595			   protocol */
2596			match = 1;
2597			break;
2598		default:
2599			break;
2600		}
2601	}
2602	return match;
2603}
2604
2605/* update endpoint address(es) of template(s) */
2606static int xfrm_policy_migrate(struct xfrm_policy *pol,
2607			       struct xfrm_migrate *m, int num_migrate)
2608{
2609	struct xfrm_migrate *mp;
2610	struct dst_entry *dst;
2611	int i, j, n = 0;
2612
2613	write_lock_bh(&pol->lock);
2614	if (unlikely(pol->walk.dead)) {
2615		/* target policy has been deleted */
2616		write_unlock_bh(&pol->lock);
2617		return -ENOENT;
2618	}
2619
2620	for (i = 0; i < pol->xfrm_nr; i++) {
2621		for (j = 0, mp = m; j < num_migrate; j++, mp++) {
2622			if (!migrate_tmpl_match(mp, &pol->xfrm_vec[i]))
2623				continue;
2624			n++;
2625			if (pol->xfrm_vec[i].mode != XFRM_MODE_TUNNEL &&
2626			    pol->xfrm_vec[i].mode != XFRM_MODE_BEET)
2627				continue;
2628			/* update endpoints */
2629			memcpy(&pol->xfrm_vec[i].id.daddr, &mp->new_daddr,
2630			       sizeof(pol->xfrm_vec[i].id.daddr));
2631			memcpy(&pol->xfrm_vec[i].saddr, &mp->new_saddr,
2632			       sizeof(pol->xfrm_vec[i].saddr));
2633			pol->xfrm_vec[i].encap_family = mp->new_family;
2634			/* flush bundles */
2635			while ((dst = pol->bundles) != NULL) {
2636				pol->bundles = dst->next;
2637				dst_free(dst);
2638			}
2639		}
2640	}
2641
2642	write_unlock_bh(&pol->lock);
2643
2644	if (!n)
2645		return -ENODATA;
2646
2647	return 0;
2648}
2649
2650static int xfrm_migrate_check(struct xfrm_migrate *m, int num_migrate)
2651{
2652	int i, j;
2653
2654	if (num_migrate < 1 || num_migrate > XFRM_MAX_DEPTH)
2655		return -EINVAL;
2656
2657	for (i = 0; i < num_migrate; i++) {
2658		if ((xfrm_addr_cmp(&m[i].old_daddr, &m[i].new_daddr,
2659				   m[i].old_family) == 0) &&
2660		    (xfrm_addr_cmp(&m[i].old_saddr, &m[i].new_saddr,
2661				   m[i].old_family) == 0))
2662			return -EINVAL;
2663		if (xfrm_addr_any(&m[i].new_daddr, m[i].new_family) ||
2664		    xfrm_addr_any(&m[i].new_saddr, m[i].new_family))
2665			return -EINVAL;
2666
2667		/* check if there is any duplicated entry */
2668		for (j = i + 1; j < num_migrate; j++) {
2669			if (!memcmp(&m[i].old_daddr, &m[j].old_daddr,
2670				    sizeof(m[i].old_daddr)) &&
2671			    !memcmp(&m[i].old_saddr, &m[j].old_saddr,
2672				    sizeof(m[i].old_saddr)) &&
2673			    m[i].proto == m[j].proto &&
2674			    m[i].mode == m[j].mode &&
2675			    m[i].reqid == m[j].reqid &&
2676			    m[i].old_family == m[j].old_family)
2677				return -EINVAL;
2678		}
2679	}
2680
2681	return 0;
2682}
2683
2684int xfrm_migrate(struct xfrm_selector *sel, u8 dir, u8 type,
2685		 struct xfrm_migrate *m, int num_migrate,
2686		 struct xfrm_kmaddress *k)
2687{
2688	int i, err, nx_cur = 0, nx_new = 0;
2689	struct xfrm_policy *pol = NULL;
2690	struct xfrm_state *x, *xc;
2691	struct xfrm_state *x_cur[XFRM_MAX_DEPTH];
2692	struct xfrm_state *x_new[XFRM_MAX_DEPTH];
2693	struct xfrm_migrate *mp;
2694
2695	if ((err = xfrm_migrate_check(m, num_migrate)) < 0)
2696		goto out;
2697
2698	/* Stage 1 - find policy */
2699	if ((pol = xfrm_migrate_policy_find(sel, dir, type)) == NULL) {
2700		err = -ENOENT;
2701		goto out;
2702	}
2703
2704	/* Stage 2 - find and update state(s) */
2705	for (i = 0, mp = m; i < num_migrate; i++, mp++) {
2706		if ((x = xfrm_migrate_state_find(mp))) {
2707			x_cur[nx_cur] = x;
2708			nx_cur++;
2709			if ((xc = xfrm_state_migrate(x, mp))) {
2710				x_new[nx_new] = xc;
2711				nx_new++;
2712			} else {
2713				err = -ENODATA;
2714				goto restore_state;
2715			}
2716		}
2717	}
2718
2719	/* Stage 3 - update policy */
2720	if ((err = xfrm_policy_migrate(pol, m, num_migrate)) < 0)
2721		goto restore_state;
2722
2723	/* Stage 4 - delete old state(s) */
2724	if (nx_cur) {
2725		xfrm_states_put(x_cur, nx_cur);
2726		xfrm_states_delete(x_cur, nx_cur);
2727	}
2728
2729	/* Stage 5 - announce */
2730	km_migrate(sel, dir, type, m, num_migrate, k);
2731
2732	xfrm_pol_put(pol);
2733
2734	return 0;
2735out:
2736	return err;
2737
2738restore_state:
2739	if (pol)
2740		xfrm_pol_put(pol);
2741	if (nx_cur)
2742		xfrm_states_put(x_cur, nx_cur);
2743	if (nx_new)
2744		xfrm_states_delete(x_new, nx_new);
2745
2746	return err;
2747}
2748EXPORT_SYMBOL(xfrm_migrate);
2749#endif
2750