xfrm_policy.c revision ea2dea9dacc256fe927857feb423872051642ae7
1/*
2 * xfrm_policy.c
3 *
4 * Changes:
5 *	Mitsuru KANDA @USAGI
6 * 	Kazunori MIYAZAWA @USAGI
7 * 	Kunihiro Ishiguro <kunihiro@ipinfusion.com>
8 * 		IPv6 support
9 * 	Kazunori MIYAZAWA @USAGI
10 * 	YOSHIFUJI Hideaki
11 * 		Split up af-specific portion
12 *	Derek Atkins <derek@ihtfp.com>		Add the post_input processor
13 *
14 */
15
16#include <linux/err.h>
17#include <linux/slab.h>
18#include <linux/kmod.h>
19#include <linux/list.h>
20#include <linux/spinlock.h>
21#include <linux/workqueue.h>
22#include <linux/notifier.h>
23#include <linux/netdevice.h>
24#include <linux/netfilter.h>
25#include <linux/module.h>
26#include <linux/cache.h>
27#include <linux/audit.h>
28#include <net/dst.h>
29#include <net/xfrm.h>
30#include <net/ip.h>
31#ifdef CONFIG_XFRM_STATISTICS
32#include <net/snmp.h>
33#endif
34
35#include "xfrm_hash.h"
36
37DEFINE_MUTEX(xfrm_cfg_mutex);
38EXPORT_SYMBOL(xfrm_cfg_mutex);
39
40static DEFINE_RWLOCK(xfrm_policy_lock);
41
42static DEFINE_RWLOCK(xfrm_policy_afinfo_lock);
43static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO];
44
45static struct kmem_cache *xfrm_dst_cache __read_mostly;
46
47static HLIST_HEAD(xfrm_policy_gc_list);
48static DEFINE_SPINLOCK(xfrm_policy_gc_lock);
49
50static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family);
51static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo);
52static void xfrm_init_pmtu(struct dst_entry *dst);
53
54static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
55						int dir);
56
57static inline int
58__xfrm4_selector_match(struct xfrm_selector *sel, struct flowi *fl)
59{
60	return  addr_match(&fl->fl4_dst, &sel->daddr, sel->prefixlen_d) &&
61		addr_match(&fl->fl4_src, &sel->saddr, sel->prefixlen_s) &&
62		!((xfrm_flowi_dport(fl) ^ sel->dport) & sel->dport_mask) &&
63		!((xfrm_flowi_sport(fl) ^ sel->sport) & sel->sport_mask) &&
64		(fl->proto == sel->proto || !sel->proto) &&
65		(fl->oif == sel->ifindex || !sel->ifindex);
66}
67
68static inline int
69__xfrm6_selector_match(struct xfrm_selector *sel, struct flowi *fl)
70{
71	return  addr_match(&fl->fl6_dst, &sel->daddr, sel->prefixlen_d) &&
72		addr_match(&fl->fl6_src, &sel->saddr, sel->prefixlen_s) &&
73		!((xfrm_flowi_dport(fl) ^ sel->dport) & sel->dport_mask) &&
74		!((xfrm_flowi_sport(fl) ^ sel->sport) & sel->sport_mask) &&
75		(fl->proto == sel->proto || !sel->proto) &&
76		(fl->oif == sel->ifindex || !sel->ifindex);
77}
78
79int xfrm_selector_match(struct xfrm_selector *sel, struct flowi *fl,
80		    unsigned short family)
81{
82	switch (family) {
83	case AF_INET:
84		return __xfrm4_selector_match(sel, fl);
85	case AF_INET6:
86		return __xfrm6_selector_match(sel, fl);
87	}
88	return 0;
89}
90
91static inline struct dst_entry *__xfrm_dst_lookup(struct net *net, int tos,
92						  xfrm_address_t *saddr,
93						  xfrm_address_t *daddr,
94						  int family)
95{
96	struct xfrm_policy_afinfo *afinfo;
97	struct dst_entry *dst;
98
99	afinfo = xfrm_policy_get_afinfo(family);
100	if (unlikely(afinfo == NULL))
101		return ERR_PTR(-EAFNOSUPPORT);
102
103	dst = afinfo->dst_lookup(net, tos, saddr, daddr);
104
105	xfrm_policy_put_afinfo(afinfo);
106
107	return dst;
108}
109
110static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x, int tos,
111						xfrm_address_t *prev_saddr,
112						xfrm_address_t *prev_daddr,
113						int family)
114{
115	struct net *net = xs_net(x);
116	xfrm_address_t *saddr = &x->props.saddr;
117	xfrm_address_t *daddr = &x->id.daddr;
118	struct dst_entry *dst;
119
120	if (x->type->flags & XFRM_TYPE_LOCAL_COADDR) {
121		saddr = x->coaddr;
122		daddr = prev_daddr;
123	}
124	if (x->type->flags & XFRM_TYPE_REMOTE_COADDR) {
125		saddr = prev_saddr;
126		daddr = x->coaddr;
127	}
128
129	dst = __xfrm_dst_lookup(net, tos, saddr, daddr, family);
130
131	if (!IS_ERR(dst)) {
132		if (prev_saddr != saddr)
133			memcpy(prev_saddr, saddr,  sizeof(*prev_saddr));
134		if (prev_daddr != daddr)
135			memcpy(prev_daddr, daddr,  sizeof(*prev_daddr));
136	}
137
138	return dst;
139}
140
141static inline unsigned long make_jiffies(long secs)
142{
143	if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
144		return MAX_SCHEDULE_TIMEOUT-1;
145	else
146		return secs*HZ;
147}
148
149static void xfrm_policy_timer(unsigned long data)
150{
151	struct xfrm_policy *xp = (struct xfrm_policy*)data;
152	unsigned long now = get_seconds();
153	long next = LONG_MAX;
154	int warn = 0;
155	int dir;
156
157	read_lock(&xp->lock);
158
159	if (unlikely(xp->walk.dead))
160		goto out;
161
162	dir = xfrm_policy_id2dir(xp->index);
163
164	if (xp->lft.hard_add_expires_seconds) {
165		long tmo = xp->lft.hard_add_expires_seconds +
166			xp->curlft.add_time - now;
167		if (tmo <= 0)
168			goto expired;
169		if (tmo < next)
170			next = tmo;
171	}
172	if (xp->lft.hard_use_expires_seconds) {
173		long tmo = xp->lft.hard_use_expires_seconds +
174			(xp->curlft.use_time ? : xp->curlft.add_time) - now;
175		if (tmo <= 0)
176			goto expired;
177		if (tmo < next)
178			next = tmo;
179	}
180	if (xp->lft.soft_add_expires_seconds) {
181		long tmo = xp->lft.soft_add_expires_seconds +
182			xp->curlft.add_time - now;
183		if (tmo <= 0) {
184			warn = 1;
185			tmo = XFRM_KM_TIMEOUT;
186		}
187		if (tmo < next)
188			next = tmo;
189	}
190	if (xp->lft.soft_use_expires_seconds) {
191		long tmo = xp->lft.soft_use_expires_seconds +
192			(xp->curlft.use_time ? : xp->curlft.add_time) - now;
193		if (tmo <= 0) {
194			warn = 1;
195			tmo = XFRM_KM_TIMEOUT;
196		}
197		if (tmo < next)
198			next = tmo;
199	}
200
201	if (warn)
202		km_policy_expired(xp, dir, 0, 0);
203	if (next != LONG_MAX &&
204	    !mod_timer(&xp->timer, jiffies + make_jiffies(next)))
205		xfrm_pol_hold(xp);
206
207out:
208	read_unlock(&xp->lock);
209	xfrm_pol_put(xp);
210	return;
211
212expired:
213	read_unlock(&xp->lock);
214	if (!xfrm_policy_delete(xp, dir))
215		km_policy_expired(xp, dir, 1, 0);
216	xfrm_pol_put(xp);
217}
218
219
220/* Allocate xfrm_policy. Not used here, it is supposed to be used by pfkeyv2
221 * SPD calls.
222 */
223
224struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp)
225{
226	struct xfrm_policy *policy;
227
228	policy = kzalloc(sizeof(struct xfrm_policy), gfp);
229
230	if (policy) {
231		write_pnet(&policy->xp_net, net);
232		INIT_LIST_HEAD(&policy->walk.all);
233		INIT_HLIST_NODE(&policy->bydst);
234		INIT_HLIST_NODE(&policy->byidx);
235		rwlock_init(&policy->lock);
236		atomic_set(&policy->refcnt, 1);
237		setup_timer(&policy->timer, xfrm_policy_timer,
238				(unsigned long)policy);
239	}
240	return policy;
241}
242EXPORT_SYMBOL(xfrm_policy_alloc);
243
244/* Destroy xfrm_policy: descendant resources must be released to this moment. */
245
246void xfrm_policy_destroy(struct xfrm_policy *policy)
247{
248	BUG_ON(!policy->walk.dead);
249
250	BUG_ON(policy->bundles);
251
252	if (del_timer(&policy->timer))
253		BUG();
254
255	security_xfrm_policy_free(policy->security);
256	kfree(policy);
257}
258EXPORT_SYMBOL(xfrm_policy_destroy);
259
260static void xfrm_policy_gc_kill(struct xfrm_policy *policy)
261{
262	struct dst_entry *dst;
263
264	while ((dst = policy->bundles) != NULL) {
265		policy->bundles = dst->next;
266		dst_free(dst);
267	}
268
269	if (del_timer(&policy->timer))
270		atomic_dec(&policy->refcnt);
271
272	if (atomic_read(&policy->refcnt) > 1)
273		flow_cache_flush();
274
275	xfrm_pol_put(policy);
276}
277
278static void xfrm_policy_gc_task(struct work_struct *work)
279{
280	struct xfrm_policy *policy;
281	struct hlist_node *entry, *tmp;
282	struct hlist_head gc_list;
283
284	spin_lock_bh(&xfrm_policy_gc_lock);
285	gc_list.first = xfrm_policy_gc_list.first;
286	INIT_HLIST_HEAD(&xfrm_policy_gc_list);
287	spin_unlock_bh(&xfrm_policy_gc_lock);
288
289	hlist_for_each_entry_safe(policy, entry, tmp, &gc_list, bydst)
290		xfrm_policy_gc_kill(policy);
291}
292static DECLARE_WORK(xfrm_policy_gc_work, xfrm_policy_gc_task);
293
294/* Rule must be locked. Release descentant resources, announce
295 * entry dead. The rule must be unlinked from lists to the moment.
296 */
297
298static void xfrm_policy_kill(struct xfrm_policy *policy)
299{
300	policy->walk.dead = 1;
301
302	spin_lock_bh(&xfrm_policy_gc_lock);
303	hlist_add_head(&policy->bydst, &xfrm_policy_gc_list);
304	spin_unlock_bh(&xfrm_policy_gc_lock);
305
306	schedule_work(&xfrm_policy_gc_work);
307}
308
309static unsigned int xfrm_policy_hashmax __read_mostly = 1 * 1024 * 1024;
310
311static inline unsigned int idx_hash(struct net *net, u32 index)
312{
313	return __idx_hash(index, net->xfrm.policy_idx_hmask);
314}
315
316static struct hlist_head *policy_hash_bysel(struct net *net, struct xfrm_selector *sel, unsigned short family, int dir)
317{
318	unsigned int hmask = net->xfrm.policy_bydst[dir].hmask;
319	unsigned int hash = __sel_hash(sel, family, hmask);
320
321	return (hash == hmask + 1 ?
322		&net->xfrm.policy_inexact[dir] :
323		net->xfrm.policy_bydst[dir].table + hash);
324}
325
326static struct hlist_head *policy_hash_direct(struct net *net, xfrm_address_t *daddr, xfrm_address_t *saddr, unsigned short family, int dir)
327{
328	unsigned int hmask = net->xfrm.policy_bydst[dir].hmask;
329	unsigned int hash = __addr_hash(daddr, saddr, family, hmask);
330
331	return net->xfrm.policy_bydst[dir].table + hash;
332}
333
334static void xfrm_dst_hash_transfer(struct hlist_head *list,
335				   struct hlist_head *ndsttable,
336				   unsigned int nhashmask)
337{
338	struct hlist_node *entry, *tmp, *entry0 = NULL;
339	struct xfrm_policy *pol;
340	unsigned int h0 = 0;
341
342redo:
343	hlist_for_each_entry_safe(pol, entry, tmp, list, bydst) {
344		unsigned int h;
345
346		h = __addr_hash(&pol->selector.daddr, &pol->selector.saddr,
347				pol->family, nhashmask);
348		if (!entry0) {
349			hlist_del(entry);
350			hlist_add_head(&pol->bydst, ndsttable+h);
351			h0 = h;
352		} else {
353			if (h != h0)
354				continue;
355			hlist_del(entry);
356			hlist_add_after(entry0, &pol->bydst);
357		}
358		entry0 = entry;
359	}
360	if (!hlist_empty(list)) {
361		entry0 = NULL;
362		goto redo;
363	}
364}
365
366static void xfrm_idx_hash_transfer(struct hlist_head *list,
367				   struct hlist_head *nidxtable,
368				   unsigned int nhashmask)
369{
370	struct hlist_node *entry, *tmp;
371	struct xfrm_policy *pol;
372
373	hlist_for_each_entry_safe(pol, entry, tmp, list, byidx) {
374		unsigned int h;
375
376		h = __idx_hash(pol->index, nhashmask);
377		hlist_add_head(&pol->byidx, nidxtable+h);
378	}
379}
380
381static unsigned long xfrm_new_hash_mask(unsigned int old_hmask)
382{
383	return ((old_hmask + 1) << 1) - 1;
384}
385
386static void xfrm_bydst_resize(struct net *net, int dir)
387{
388	unsigned int hmask = net->xfrm.policy_bydst[dir].hmask;
389	unsigned int nhashmask = xfrm_new_hash_mask(hmask);
390	unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head);
391	struct hlist_head *odst = net->xfrm.policy_bydst[dir].table;
392	struct hlist_head *ndst = xfrm_hash_alloc(nsize);
393	int i;
394
395	if (!ndst)
396		return;
397
398	write_lock_bh(&xfrm_policy_lock);
399
400	for (i = hmask; i >= 0; i--)
401		xfrm_dst_hash_transfer(odst + i, ndst, nhashmask);
402
403	net->xfrm.policy_bydst[dir].table = ndst;
404	net->xfrm.policy_bydst[dir].hmask = nhashmask;
405
406	write_unlock_bh(&xfrm_policy_lock);
407
408	xfrm_hash_free(odst, (hmask + 1) * sizeof(struct hlist_head));
409}
410
411static void xfrm_byidx_resize(struct net *net, int total)
412{
413	unsigned int hmask = net->xfrm.policy_idx_hmask;
414	unsigned int nhashmask = xfrm_new_hash_mask(hmask);
415	unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head);
416	struct hlist_head *oidx = net->xfrm.policy_byidx;
417	struct hlist_head *nidx = xfrm_hash_alloc(nsize);
418	int i;
419
420	if (!nidx)
421		return;
422
423	write_lock_bh(&xfrm_policy_lock);
424
425	for (i = hmask; i >= 0; i--)
426		xfrm_idx_hash_transfer(oidx + i, nidx, nhashmask);
427
428	net->xfrm.policy_byidx = nidx;
429	net->xfrm.policy_idx_hmask = nhashmask;
430
431	write_unlock_bh(&xfrm_policy_lock);
432
433	xfrm_hash_free(oidx, (hmask + 1) * sizeof(struct hlist_head));
434}
435
436static inline int xfrm_bydst_should_resize(struct net *net, int dir, int *total)
437{
438	unsigned int cnt = net->xfrm.policy_count[dir];
439	unsigned int hmask = net->xfrm.policy_bydst[dir].hmask;
440
441	if (total)
442		*total += cnt;
443
444	if ((hmask + 1) < xfrm_policy_hashmax &&
445	    cnt > hmask)
446		return 1;
447
448	return 0;
449}
450
451static inline int xfrm_byidx_should_resize(struct net *net, int total)
452{
453	unsigned int hmask = net->xfrm.policy_idx_hmask;
454
455	if ((hmask + 1) < xfrm_policy_hashmax &&
456	    total > hmask)
457		return 1;
458
459	return 0;
460}
461
462void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si)
463{
464	read_lock_bh(&xfrm_policy_lock);
465	si->incnt = net->xfrm.policy_count[XFRM_POLICY_IN];
466	si->outcnt = net->xfrm.policy_count[XFRM_POLICY_OUT];
467	si->fwdcnt = net->xfrm.policy_count[XFRM_POLICY_FWD];
468	si->inscnt = net->xfrm.policy_count[XFRM_POLICY_IN+XFRM_POLICY_MAX];
469	si->outscnt = net->xfrm.policy_count[XFRM_POLICY_OUT+XFRM_POLICY_MAX];
470	si->fwdscnt = net->xfrm.policy_count[XFRM_POLICY_FWD+XFRM_POLICY_MAX];
471	si->spdhcnt = net->xfrm.policy_idx_hmask;
472	si->spdhmcnt = xfrm_policy_hashmax;
473	read_unlock_bh(&xfrm_policy_lock);
474}
475EXPORT_SYMBOL(xfrm_spd_getinfo);
476
477static DEFINE_MUTEX(hash_resize_mutex);
478static void xfrm_hash_resize(struct work_struct *work)
479{
480	struct net *net = container_of(work, struct net, xfrm.policy_hash_work);
481	int dir, total;
482
483	mutex_lock(&hash_resize_mutex);
484
485	total = 0;
486	for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
487		if (xfrm_bydst_should_resize(net, dir, &total))
488			xfrm_bydst_resize(net, dir);
489	}
490	if (xfrm_byidx_should_resize(net, total))
491		xfrm_byidx_resize(net, total);
492
493	mutex_unlock(&hash_resize_mutex);
494}
495
496/* Generate new index... KAME seems to generate them ordered by cost
497 * of an absolute inpredictability of ordering of rules. This will not pass. */
498static u32 xfrm_gen_index(struct net *net, int dir)
499{
500	static u32 idx_generator;
501
502	for (;;) {
503		struct hlist_node *entry;
504		struct hlist_head *list;
505		struct xfrm_policy *p;
506		u32 idx;
507		int found;
508
509		idx = (idx_generator | dir);
510		idx_generator += 8;
511		if (idx == 0)
512			idx = 8;
513		list = net->xfrm.policy_byidx + idx_hash(net, idx);
514		found = 0;
515		hlist_for_each_entry(p, entry, list, byidx) {
516			if (p->index == idx) {
517				found = 1;
518				break;
519			}
520		}
521		if (!found)
522			return idx;
523	}
524}
525
526static inline int selector_cmp(struct xfrm_selector *s1, struct xfrm_selector *s2)
527{
528	u32 *p1 = (u32 *) s1;
529	u32 *p2 = (u32 *) s2;
530	int len = sizeof(struct xfrm_selector) / sizeof(u32);
531	int i;
532
533	for (i = 0; i < len; i++) {
534		if (p1[i] != p2[i])
535			return 1;
536	}
537
538	return 0;
539}
540
541int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
542{
543	struct net *net = xp_net(policy);
544	struct xfrm_policy *pol;
545	struct xfrm_policy *delpol;
546	struct hlist_head *chain;
547	struct hlist_node *entry, *newpos;
548	struct dst_entry *gc_list;
549	u32 mark = policy->mark.v & policy->mark.m;
550
551	write_lock_bh(&xfrm_policy_lock);
552	chain = policy_hash_bysel(net, &policy->selector, policy->family, dir);
553	delpol = NULL;
554	newpos = NULL;
555	hlist_for_each_entry(pol, entry, chain, bydst) {
556		if (pol->type == policy->type &&
557		    !selector_cmp(&pol->selector, &policy->selector) &&
558		    (mark & pol->mark.m) == pol->mark.v &&
559		    xfrm_sec_ctx_match(pol->security, policy->security) &&
560		    !WARN_ON(delpol)) {
561			if (excl) {
562				write_unlock_bh(&xfrm_policy_lock);
563				return -EEXIST;
564			}
565			delpol = pol;
566			if (policy->priority > pol->priority)
567				continue;
568		} else if (policy->priority >= pol->priority) {
569			newpos = &pol->bydst;
570			continue;
571		}
572		if (delpol)
573			break;
574	}
575	if (newpos)
576		hlist_add_after(newpos, &policy->bydst);
577	else
578		hlist_add_head(&policy->bydst, chain);
579	xfrm_pol_hold(policy);
580	net->xfrm.policy_count[dir]++;
581	atomic_inc(&flow_cache_genid);
582	if (delpol)
583		__xfrm_policy_unlink(delpol, dir);
584	policy->index = delpol ? delpol->index : xfrm_gen_index(net, dir);
585	hlist_add_head(&policy->byidx, net->xfrm.policy_byidx+idx_hash(net, policy->index));
586	policy->curlft.add_time = get_seconds();
587	policy->curlft.use_time = 0;
588	if (!mod_timer(&policy->timer, jiffies + HZ))
589		xfrm_pol_hold(policy);
590	list_add(&policy->walk.all, &net->xfrm.policy_all);
591	write_unlock_bh(&xfrm_policy_lock);
592
593	if (delpol)
594		xfrm_policy_kill(delpol);
595	else if (xfrm_bydst_should_resize(net, dir, NULL))
596		schedule_work(&net->xfrm.policy_hash_work);
597
598	read_lock_bh(&xfrm_policy_lock);
599	gc_list = NULL;
600	entry = &policy->bydst;
601	hlist_for_each_entry_continue(policy, entry, bydst) {
602		struct dst_entry *dst;
603
604		write_lock(&policy->lock);
605		dst = policy->bundles;
606		if (dst) {
607			struct dst_entry *tail = dst;
608			while (tail->next)
609				tail = tail->next;
610			tail->next = gc_list;
611			gc_list = dst;
612
613			policy->bundles = NULL;
614		}
615		write_unlock(&policy->lock);
616	}
617	read_unlock_bh(&xfrm_policy_lock);
618
619	while (gc_list) {
620		struct dst_entry *dst = gc_list;
621
622		gc_list = dst->next;
623		dst_free(dst);
624	}
625
626	return 0;
627}
628EXPORT_SYMBOL(xfrm_policy_insert);
629
630struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u8 type,
631					  int dir, struct xfrm_selector *sel,
632					  struct xfrm_sec_ctx *ctx, int delete,
633					  int *err)
634{
635	struct xfrm_policy *pol, *ret;
636	struct hlist_head *chain;
637	struct hlist_node *entry;
638
639	*err = 0;
640	write_lock_bh(&xfrm_policy_lock);
641	chain = policy_hash_bysel(net, sel, sel->family, dir);
642	ret = NULL;
643	hlist_for_each_entry(pol, entry, chain, bydst) {
644		if (pol->type == type &&
645		    (mark & pol->mark.m) == pol->mark.v &&
646		    !selector_cmp(sel, &pol->selector) &&
647		    xfrm_sec_ctx_match(ctx, pol->security)) {
648			xfrm_pol_hold(pol);
649			if (delete) {
650				*err = security_xfrm_policy_delete(
651								pol->security);
652				if (*err) {
653					write_unlock_bh(&xfrm_policy_lock);
654					return pol;
655				}
656				__xfrm_policy_unlink(pol, dir);
657			}
658			ret = pol;
659			break;
660		}
661	}
662	write_unlock_bh(&xfrm_policy_lock);
663
664	if (ret && delete) {
665		atomic_inc(&flow_cache_genid);
666		xfrm_policy_kill(ret);
667	}
668	return ret;
669}
670EXPORT_SYMBOL(xfrm_policy_bysel_ctx);
671
672struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8 type,
673				     int dir, u32 id, int delete, int *err)
674{
675	struct xfrm_policy *pol, *ret;
676	struct hlist_head *chain;
677	struct hlist_node *entry;
678
679	*err = -ENOENT;
680	if (xfrm_policy_id2dir(id) != dir)
681		return NULL;
682
683	*err = 0;
684	write_lock_bh(&xfrm_policy_lock);
685	chain = net->xfrm.policy_byidx + idx_hash(net, id);
686	ret = NULL;
687	hlist_for_each_entry(pol, entry, chain, byidx) {
688		if (pol->type == type && pol->index == id &&
689		    (mark & pol->mark.m) == pol->mark.v) {
690			xfrm_pol_hold(pol);
691			if (delete) {
692				*err = security_xfrm_policy_delete(
693								pol->security);
694				if (*err) {
695					write_unlock_bh(&xfrm_policy_lock);
696					return pol;
697				}
698				__xfrm_policy_unlink(pol, dir);
699			}
700			ret = pol;
701			break;
702		}
703	}
704	write_unlock_bh(&xfrm_policy_lock);
705
706	if (ret && delete) {
707		atomic_inc(&flow_cache_genid);
708		xfrm_policy_kill(ret);
709	}
710	return ret;
711}
712EXPORT_SYMBOL(xfrm_policy_byid);
713
714#ifdef CONFIG_SECURITY_NETWORK_XFRM
715static inline int
716xfrm_policy_flush_secctx_check(struct net *net, u8 type, struct xfrm_audit *audit_info)
717{
718	int dir, err = 0;
719
720	for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
721		struct xfrm_policy *pol;
722		struct hlist_node *entry;
723		int i;
724
725		hlist_for_each_entry(pol, entry,
726				     &net->xfrm.policy_inexact[dir], bydst) {
727			if (pol->type != type)
728				continue;
729			err = security_xfrm_policy_delete(pol->security);
730			if (err) {
731				xfrm_audit_policy_delete(pol, 0,
732							 audit_info->loginuid,
733							 audit_info->sessionid,
734							 audit_info->secid);
735				return err;
736			}
737		}
738		for (i = net->xfrm.policy_bydst[dir].hmask; i >= 0; i--) {
739			hlist_for_each_entry(pol, entry,
740					     net->xfrm.policy_bydst[dir].table + i,
741					     bydst) {
742				if (pol->type != type)
743					continue;
744				err = security_xfrm_policy_delete(
745								pol->security);
746				if (err) {
747					xfrm_audit_policy_delete(pol, 0,
748							audit_info->loginuid,
749							audit_info->sessionid,
750							audit_info->secid);
751					return err;
752				}
753			}
754		}
755	}
756	return err;
757}
758#else
759static inline int
760xfrm_policy_flush_secctx_check(struct net *net, u8 type, struct xfrm_audit *audit_info)
761{
762	return 0;
763}
764#endif
765
766int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info)
767{
768	int dir, err = 0, cnt = 0;
769
770	write_lock_bh(&xfrm_policy_lock);
771
772	err = xfrm_policy_flush_secctx_check(net, type, audit_info);
773	if (err)
774		goto out;
775
776	for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
777		struct xfrm_policy *pol;
778		struct hlist_node *entry;
779		int i;
780
781	again1:
782		hlist_for_each_entry(pol, entry,
783				     &net->xfrm.policy_inexact[dir], bydst) {
784			if (pol->type != type)
785				continue;
786			__xfrm_policy_unlink(pol, dir);
787			write_unlock_bh(&xfrm_policy_lock);
788			cnt++;
789
790			xfrm_audit_policy_delete(pol, 1, audit_info->loginuid,
791						 audit_info->sessionid,
792						 audit_info->secid);
793
794			xfrm_policy_kill(pol);
795
796			write_lock_bh(&xfrm_policy_lock);
797			goto again1;
798		}
799
800		for (i = net->xfrm.policy_bydst[dir].hmask; i >= 0; i--) {
801	again2:
802			hlist_for_each_entry(pol, entry,
803					     net->xfrm.policy_bydst[dir].table + i,
804					     bydst) {
805				if (pol->type != type)
806					continue;
807				__xfrm_policy_unlink(pol, dir);
808				write_unlock_bh(&xfrm_policy_lock);
809				cnt++;
810
811				xfrm_audit_policy_delete(pol, 1,
812							 audit_info->loginuid,
813							 audit_info->sessionid,
814							 audit_info->secid);
815				xfrm_policy_kill(pol);
816
817				write_lock_bh(&xfrm_policy_lock);
818				goto again2;
819			}
820		}
821
822	}
823	if (!cnt)
824		err = -ESRCH;
825	atomic_inc(&flow_cache_genid);
826out:
827	write_unlock_bh(&xfrm_policy_lock);
828	return err;
829}
830EXPORT_SYMBOL(xfrm_policy_flush);
831
832int xfrm_policy_walk(struct net *net, struct xfrm_policy_walk *walk,
833		     int (*func)(struct xfrm_policy *, int, int, void*),
834		     void *data)
835{
836	struct xfrm_policy *pol;
837	struct xfrm_policy_walk_entry *x;
838	int error = 0;
839
840	if (walk->type >= XFRM_POLICY_TYPE_MAX &&
841	    walk->type != XFRM_POLICY_TYPE_ANY)
842		return -EINVAL;
843
844	if (list_empty(&walk->walk.all) && walk->seq != 0)
845		return 0;
846
847	write_lock_bh(&xfrm_policy_lock);
848	if (list_empty(&walk->walk.all))
849		x = list_first_entry(&net->xfrm.policy_all, struct xfrm_policy_walk_entry, all);
850	else
851		x = list_entry(&walk->walk.all, struct xfrm_policy_walk_entry, all);
852	list_for_each_entry_from(x, &net->xfrm.policy_all, all) {
853		if (x->dead)
854			continue;
855		pol = container_of(x, struct xfrm_policy, walk);
856		if (walk->type != XFRM_POLICY_TYPE_ANY &&
857		    walk->type != pol->type)
858			continue;
859		error = func(pol, xfrm_policy_id2dir(pol->index),
860			     walk->seq, data);
861		if (error) {
862			list_move_tail(&walk->walk.all, &x->all);
863			goto out;
864		}
865		walk->seq++;
866	}
867	if (walk->seq == 0) {
868		error = -ENOENT;
869		goto out;
870	}
871	list_del_init(&walk->walk.all);
872out:
873	write_unlock_bh(&xfrm_policy_lock);
874	return error;
875}
876EXPORT_SYMBOL(xfrm_policy_walk);
877
878void xfrm_policy_walk_init(struct xfrm_policy_walk *walk, u8 type)
879{
880	INIT_LIST_HEAD(&walk->walk.all);
881	walk->walk.dead = 1;
882	walk->type = type;
883	walk->seq = 0;
884}
885EXPORT_SYMBOL(xfrm_policy_walk_init);
886
887void xfrm_policy_walk_done(struct xfrm_policy_walk *walk)
888{
889	if (list_empty(&walk->walk.all))
890		return;
891
892	write_lock_bh(&xfrm_policy_lock);
893	list_del(&walk->walk.all);
894	write_unlock_bh(&xfrm_policy_lock);
895}
896EXPORT_SYMBOL(xfrm_policy_walk_done);
897
898/*
899 * Find policy to apply to this flow.
900 *
901 * Returns 0 if policy found, else an -errno.
902 */
903static int xfrm_policy_match(struct xfrm_policy *pol, struct flowi *fl,
904			     u8 type, u16 family, int dir)
905{
906	struct xfrm_selector *sel = &pol->selector;
907	int match, ret = -ESRCH;
908
909	if (pol->family != family ||
910	    (fl->mark & pol->mark.m) != pol->mark.v ||
911	    pol->type != type)
912		return ret;
913
914	match = xfrm_selector_match(sel, fl, family);
915	if (match)
916		ret = security_xfrm_policy_lookup(pol->security, fl->secid,
917						  dir);
918
919	return ret;
920}
921
922static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type,
923						     struct flowi *fl,
924						     u16 family, u8 dir)
925{
926	int err;
927	struct xfrm_policy *pol, *ret;
928	xfrm_address_t *daddr, *saddr;
929	struct hlist_node *entry;
930	struct hlist_head *chain;
931	u32 priority = ~0U;
932
933	daddr = xfrm_flowi_daddr(fl, family);
934	saddr = xfrm_flowi_saddr(fl, family);
935	if (unlikely(!daddr || !saddr))
936		return NULL;
937
938	read_lock_bh(&xfrm_policy_lock);
939	chain = policy_hash_direct(net, daddr, saddr, family, dir);
940	ret = NULL;
941	hlist_for_each_entry(pol, entry, chain, bydst) {
942		err = xfrm_policy_match(pol, fl, type, family, dir);
943		if (err) {
944			if (err == -ESRCH)
945				continue;
946			else {
947				ret = ERR_PTR(err);
948				goto fail;
949			}
950		} else {
951			ret = pol;
952			priority = ret->priority;
953			break;
954		}
955	}
956	chain = &net->xfrm.policy_inexact[dir];
957	hlist_for_each_entry(pol, entry, chain, bydst) {
958		err = xfrm_policy_match(pol, fl, type, family, dir);
959		if (err) {
960			if (err == -ESRCH)
961				continue;
962			else {
963				ret = ERR_PTR(err);
964				goto fail;
965			}
966		} else if (pol->priority < priority) {
967			ret = pol;
968			break;
969		}
970	}
971	if (ret)
972		xfrm_pol_hold(ret);
973fail:
974	read_unlock_bh(&xfrm_policy_lock);
975
976	return ret;
977}
978
979static int xfrm_policy_lookup(struct net *net, struct flowi *fl, u16 family,
980			      u8 dir, void **objp, atomic_t **obj_refp)
981{
982	struct xfrm_policy *pol;
983	int err = 0;
984
985#ifdef CONFIG_XFRM_SUB_POLICY
986	pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_SUB, fl, family, dir);
987	if (IS_ERR(pol)) {
988		err = PTR_ERR(pol);
989		pol = NULL;
990	}
991	if (pol || err)
992		goto end;
993#endif
994	pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family, dir);
995	if (IS_ERR(pol)) {
996		err = PTR_ERR(pol);
997		pol = NULL;
998	}
999#ifdef CONFIG_XFRM_SUB_POLICY
1000end:
1001#endif
1002	if ((*objp = (void *) pol) != NULL)
1003		*obj_refp = &pol->refcnt;
1004	return err;
1005}
1006
1007static inline int policy_to_flow_dir(int dir)
1008{
1009	if (XFRM_POLICY_IN == FLOW_DIR_IN &&
1010	    XFRM_POLICY_OUT == FLOW_DIR_OUT &&
1011	    XFRM_POLICY_FWD == FLOW_DIR_FWD)
1012		return dir;
1013	switch (dir) {
1014	default:
1015	case XFRM_POLICY_IN:
1016		return FLOW_DIR_IN;
1017	case XFRM_POLICY_OUT:
1018		return FLOW_DIR_OUT;
1019	case XFRM_POLICY_FWD:
1020		return FLOW_DIR_FWD;
1021	}
1022}
1023
1024static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struct flowi *fl)
1025{
1026	struct xfrm_policy *pol;
1027
1028	read_lock_bh(&xfrm_policy_lock);
1029	if ((pol = sk->sk_policy[dir]) != NULL) {
1030		int match = xfrm_selector_match(&pol->selector, fl,
1031						sk->sk_family);
1032		int err = 0;
1033
1034		if (match) {
1035			if ((sk->sk_mark & pol->mark.m) != pol->mark.v) {
1036				pol = NULL;
1037				goto out;
1038			}
1039			err = security_xfrm_policy_lookup(pol->security,
1040						      fl->secid,
1041						      policy_to_flow_dir(dir));
1042			if (!err)
1043				xfrm_pol_hold(pol);
1044			else if (err == -ESRCH)
1045				pol = NULL;
1046			else
1047				pol = ERR_PTR(err);
1048		} else
1049			pol = NULL;
1050	}
1051out:
1052	read_unlock_bh(&xfrm_policy_lock);
1053	return pol;
1054}
1055
1056static void __xfrm_policy_link(struct xfrm_policy *pol, int dir)
1057{
1058	struct net *net = xp_net(pol);
1059	struct hlist_head *chain = policy_hash_bysel(net, &pol->selector,
1060						     pol->family, dir);
1061
1062	list_add(&pol->walk.all, &net->xfrm.policy_all);
1063	hlist_add_head(&pol->bydst, chain);
1064	hlist_add_head(&pol->byidx, net->xfrm.policy_byidx+idx_hash(net, pol->index));
1065	net->xfrm.policy_count[dir]++;
1066	xfrm_pol_hold(pol);
1067
1068	if (xfrm_bydst_should_resize(net, dir, NULL))
1069		schedule_work(&net->xfrm.policy_hash_work);
1070}
1071
1072static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
1073						int dir)
1074{
1075	struct net *net = xp_net(pol);
1076
1077	if (hlist_unhashed(&pol->bydst))
1078		return NULL;
1079
1080	hlist_del(&pol->bydst);
1081	hlist_del(&pol->byidx);
1082	list_del(&pol->walk.all);
1083	net->xfrm.policy_count[dir]--;
1084
1085	return pol;
1086}
1087
1088int xfrm_policy_delete(struct xfrm_policy *pol, int dir)
1089{
1090	write_lock_bh(&xfrm_policy_lock);
1091	pol = __xfrm_policy_unlink(pol, dir);
1092	write_unlock_bh(&xfrm_policy_lock);
1093	if (pol) {
1094		if (dir < XFRM_POLICY_MAX)
1095			atomic_inc(&flow_cache_genid);
1096		xfrm_policy_kill(pol);
1097		return 0;
1098	}
1099	return -ENOENT;
1100}
1101EXPORT_SYMBOL(xfrm_policy_delete);
1102
1103int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol)
1104{
1105	struct net *net = xp_net(pol);
1106	struct xfrm_policy *old_pol;
1107
1108#ifdef CONFIG_XFRM_SUB_POLICY
1109	if (pol && pol->type != XFRM_POLICY_TYPE_MAIN)
1110		return -EINVAL;
1111#endif
1112
1113	write_lock_bh(&xfrm_policy_lock);
1114	old_pol = sk->sk_policy[dir];
1115	sk->sk_policy[dir] = pol;
1116	if (pol) {
1117		pol->curlft.add_time = get_seconds();
1118		pol->index = xfrm_gen_index(net, XFRM_POLICY_MAX+dir);
1119		__xfrm_policy_link(pol, XFRM_POLICY_MAX+dir);
1120	}
1121	if (old_pol)
1122		/* Unlinking succeeds always. This is the only function
1123		 * allowed to delete or replace socket policy.
1124		 */
1125		__xfrm_policy_unlink(old_pol, XFRM_POLICY_MAX+dir);
1126	write_unlock_bh(&xfrm_policy_lock);
1127
1128	if (old_pol) {
1129		xfrm_policy_kill(old_pol);
1130	}
1131	return 0;
1132}
1133
1134static struct xfrm_policy *clone_policy(struct xfrm_policy *old, int dir)
1135{
1136	struct xfrm_policy *newp = xfrm_policy_alloc(xp_net(old), GFP_ATOMIC);
1137
1138	if (newp) {
1139		newp->selector = old->selector;
1140		if (security_xfrm_policy_clone(old->security,
1141					       &newp->security)) {
1142			kfree(newp);
1143			return NULL;  /* ENOMEM */
1144		}
1145		newp->lft = old->lft;
1146		newp->curlft = old->curlft;
1147		newp->mark = old->mark;
1148		newp->action = old->action;
1149		newp->flags = old->flags;
1150		newp->xfrm_nr = old->xfrm_nr;
1151		newp->index = old->index;
1152		newp->type = old->type;
1153		memcpy(newp->xfrm_vec, old->xfrm_vec,
1154		       newp->xfrm_nr*sizeof(struct xfrm_tmpl));
1155		write_lock_bh(&xfrm_policy_lock);
1156		__xfrm_policy_link(newp, XFRM_POLICY_MAX+dir);
1157		write_unlock_bh(&xfrm_policy_lock);
1158		xfrm_pol_put(newp);
1159	}
1160	return newp;
1161}
1162
1163int __xfrm_sk_clone_policy(struct sock *sk)
1164{
1165	struct xfrm_policy *p0 = sk->sk_policy[0],
1166			   *p1 = sk->sk_policy[1];
1167
1168	sk->sk_policy[0] = sk->sk_policy[1] = NULL;
1169	if (p0 && (sk->sk_policy[0] = clone_policy(p0, 0)) == NULL)
1170		return -ENOMEM;
1171	if (p1 && (sk->sk_policy[1] = clone_policy(p1, 1)) == NULL)
1172		return -ENOMEM;
1173	return 0;
1174}
1175
1176static int
1177xfrm_get_saddr(struct net *net, xfrm_address_t *local, xfrm_address_t *remote,
1178	       unsigned short family)
1179{
1180	int err;
1181	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1182
1183	if (unlikely(afinfo == NULL))
1184		return -EINVAL;
1185	err = afinfo->get_saddr(net, local, remote);
1186	xfrm_policy_put_afinfo(afinfo);
1187	return err;
1188}
1189
1190/* Resolve list of templates for the flow, given policy. */
1191
1192static int
1193xfrm_tmpl_resolve_one(struct xfrm_policy *policy, struct flowi *fl,
1194		      struct xfrm_state **xfrm,
1195		      unsigned short family)
1196{
1197	struct net *net = xp_net(policy);
1198	int nx;
1199	int i, error;
1200	xfrm_address_t *daddr = xfrm_flowi_daddr(fl, family);
1201	xfrm_address_t *saddr = xfrm_flowi_saddr(fl, family);
1202	xfrm_address_t tmp;
1203
1204	for (nx=0, i = 0; i < policy->xfrm_nr; i++) {
1205		struct xfrm_state *x;
1206		xfrm_address_t *remote = daddr;
1207		xfrm_address_t *local  = saddr;
1208		struct xfrm_tmpl *tmpl = &policy->xfrm_vec[i];
1209
1210		if (tmpl->mode == XFRM_MODE_TUNNEL ||
1211		    tmpl->mode == XFRM_MODE_BEET) {
1212			remote = &tmpl->id.daddr;
1213			local = &tmpl->saddr;
1214			family = tmpl->encap_family;
1215			if (xfrm_addr_any(local, family)) {
1216				error = xfrm_get_saddr(net, &tmp, remote, family);
1217				if (error)
1218					goto fail;
1219				local = &tmp;
1220			}
1221		}
1222
1223		x = xfrm_state_find(remote, local, fl, tmpl, policy, &error, family);
1224
1225		if (x && x->km.state == XFRM_STATE_VALID) {
1226			xfrm[nx++] = x;
1227			daddr = remote;
1228			saddr = local;
1229			continue;
1230		}
1231		if (x) {
1232			error = (x->km.state == XFRM_STATE_ERROR ?
1233				 -EINVAL : -EAGAIN);
1234			xfrm_state_put(x);
1235		}
1236		else if (error == -ESRCH)
1237			error = -EAGAIN;
1238
1239		if (!tmpl->optional)
1240			goto fail;
1241	}
1242	return nx;
1243
1244fail:
1245	for (nx--; nx>=0; nx--)
1246		xfrm_state_put(xfrm[nx]);
1247	return error;
1248}
1249
1250static int
1251xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, struct flowi *fl,
1252		  struct xfrm_state **xfrm,
1253		  unsigned short family)
1254{
1255	struct xfrm_state *tp[XFRM_MAX_DEPTH];
1256	struct xfrm_state **tpp = (npols > 1) ? tp : xfrm;
1257	int cnx = 0;
1258	int error;
1259	int ret;
1260	int i;
1261
1262	for (i = 0; i < npols; i++) {
1263		if (cnx + pols[i]->xfrm_nr >= XFRM_MAX_DEPTH) {
1264			error = -ENOBUFS;
1265			goto fail;
1266		}
1267
1268		ret = xfrm_tmpl_resolve_one(pols[i], fl, &tpp[cnx], family);
1269		if (ret < 0) {
1270			error = ret;
1271			goto fail;
1272		} else
1273			cnx += ret;
1274	}
1275
1276	/* found states are sorted for outbound processing */
1277	if (npols > 1)
1278		xfrm_state_sort(xfrm, tpp, cnx, family);
1279
1280	return cnx;
1281
1282 fail:
1283	for (cnx--; cnx>=0; cnx--)
1284		xfrm_state_put(tpp[cnx]);
1285	return error;
1286
1287}
1288
1289/* Check that the bundle accepts the flow and its components are
1290 * still valid.
1291 */
1292
1293static struct dst_entry *
1294xfrm_find_bundle(struct flowi *fl, struct xfrm_policy *policy, unsigned short family)
1295{
1296	struct dst_entry *x;
1297	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1298	if (unlikely(afinfo == NULL))
1299		return ERR_PTR(-EINVAL);
1300	x = afinfo->find_bundle(fl, policy);
1301	xfrm_policy_put_afinfo(afinfo);
1302	return x;
1303}
1304
1305static inline int xfrm_get_tos(struct flowi *fl, int family)
1306{
1307	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1308	int tos;
1309
1310	if (!afinfo)
1311		return -EINVAL;
1312
1313	tos = afinfo->get_tos(fl);
1314
1315	xfrm_policy_put_afinfo(afinfo);
1316
1317	return tos;
1318}
1319
1320static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family)
1321{
1322	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1323	struct dst_ops *dst_ops;
1324	struct xfrm_dst *xdst;
1325
1326	if (!afinfo)
1327		return ERR_PTR(-EINVAL);
1328
1329	switch (family) {
1330	case AF_INET:
1331		dst_ops = &net->xfrm.xfrm4_dst_ops;
1332		break;
1333#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
1334	case AF_INET6:
1335		dst_ops = &net->xfrm.xfrm6_dst_ops;
1336		break;
1337#endif
1338	default:
1339		BUG();
1340	}
1341	xdst = dst_alloc(dst_ops) ?: ERR_PTR(-ENOBUFS);
1342
1343	xfrm_policy_put_afinfo(afinfo);
1344
1345	return xdst;
1346}
1347
1348static inline int xfrm_init_path(struct xfrm_dst *path, struct dst_entry *dst,
1349				 int nfheader_len)
1350{
1351	struct xfrm_policy_afinfo *afinfo =
1352		xfrm_policy_get_afinfo(dst->ops->family);
1353	int err;
1354
1355	if (!afinfo)
1356		return -EINVAL;
1357
1358	err = afinfo->init_path(path, dst, nfheader_len);
1359
1360	xfrm_policy_put_afinfo(afinfo);
1361
1362	return err;
1363}
1364
1365static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
1366				struct flowi *fl)
1367{
1368	struct xfrm_policy_afinfo *afinfo =
1369		xfrm_policy_get_afinfo(xdst->u.dst.ops->family);
1370	int err;
1371
1372	if (!afinfo)
1373		return -EINVAL;
1374
1375	err = afinfo->fill_dst(xdst, dev, fl);
1376
1377	xfrm_policy_put_afinfo(afinfo);
1378
1379	return err;
1380}
1381
1382/* Allocate chain of dst_entry's, attach known xfrm's, calculate
1383 * all the metrics... Shortly, bundle a bundle.
1384 */
1385
1386static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
1387					    struct xfrm_state **xfrm, int nx,
1388					    struct flowi *fl,
1389					    struct dst_entry *dst)
1390{
1391	struct net *net = xp_net(policy);
1392	unsigned long now = jiffies;
1393	struct net_device *dev;
1394	struct dst_entry *dst_prev = NULL;
1395	struct dst_entry *dst0 = NULL;
1396	int i = 0;
1397	int err;
1398	int header_len = 0;
1399	int nfheader_len = 0;
1400	int trailer_len = 0;
1401	int tos;
1402	int family = policy->selector.family;
1403	xfrm_address_t saddr, daddr;
1404
1405	xfrm_flowi_addr_get(fl, &saddr, &daddr, family);
1406
1407	tos = xfrm_get_tos(fl, family);
1408	err = tos;
1409	if (tos < 0)
1410		goto put_states;
1411
1412	dst_hold(dst);
1413
1414	for (; i < nx; i++) {
1415		struct xfrm_dst *xdst = xfrm_alloc_dst(net, family);
1416		struct dst_entry *dst1 = &xdst->u.dst;
1417
1418		err = PTR_ERR(xdst);
1419		if (IS_ERR(xdst)) {
1420			dst_release(dst);
1421			goto put_states;
1422		}
1423
1424		if (!dst_prev)
1425			dst0 = dst1;
1426		else {
1427			dst_prev->child = dst_clone(dst1);
1428			dst1->flags |= DST_NOHASH;
1429		}
1430
1431		xdst->route = dst;
1432		memcpy(&dst1->metrics, &dst->metrics, sizeof(dst->metrics));
1433
1434		if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) {
1435			family = xfrm[i]->props.family;
1436			dst = xfrm_dst_lookup(xfrm[i], tos, &saddr, &daddr,
1437					      family);
1438			err = PTR_ERR(dst);
1439			if (IS_ERR(dst))
1440				goto put_states;
1441		} else
1442			dst_hold(dst);
1443
1444		dst1->xfrm = xfrm[i];
1445		xdst->genid = xfrm[i]->genid;
1446
1447		dst1->obsolete = -1;
1448		dst1->flags |= DST_HOST;
1449		dst1->lastuse = now;
1450
1451		dst1->input = dst_discard;
1452		dst1->output = xfrm[i]->outer_mode->afinfo->output;
1453
1454		dst1->next = dst_prev;
1455		dst_prev = dst1;
1456
1457		header_len += xfrm[i]->props.header_len;
1458		if (xfrm[i]->type->flags & XFRM_TYPE_NON_FRAGMENT)
1459			nfheader_len += xfrm[i]->props.header_len;
1460		trailer_len += xfrm[i]->props.trailer_len;
1461	}
1462
1463	dst_prev->child = dst;
1464	dst0->path = dst;
1465
1466	err = -ENODEV;
1467	dev = dst->dev;
1468	if (!dev)
1469		goto free_dst;
1470
1471	/* Copy neighbour for reachability confirmation */
1472	dst0->neighbour = neigh_clone(dst->neighbour);
1473
1474	xfrm_init_path((struct xfrm_dst *)dst0, dst, nfheader_len);
1475	xfrm_init_pmtu(dst_prev);
1476
1477	for (dst_prev = dst0; dst_prev != dst; dst_prev = dst_prev->child) {
1478		struct xfrm_dst *xdst = (struct xfrm_dst *)dst_prev;
1479
1480		err = xfrm_fill_dst(xdst, dev, fl);
1481		if (err)
1482			goto free_dst;
1483
1484		dst_prev->header_len = header_len;
1485		dst_prev->trailer_len = trailer_len;
1486		header_len -= xdst->u.dst.xfrm->props.header_len;
1487		trailer_len -= xdst->u.dst.xfrm->props.trailer_len;
1488	}
1489
1490out:
1491	return dst0;
1492
1493put_states:
1494	for (; i < nx; i++)
1495		xfrm_state_put(xfrm[i]);
1496free_dst:
1497	if (dst0)
1498		dst_free(dst0);
1499	dst0 = ERR_PTR(err);
1500	goto out;
1501}
1502
1503static int inline
1504xfrm_dst_alloc_copy(void **target, void *src, int size)
1505{
1506	if (!*target) {
1507		*target = kmalloc(size, GFP_ATOMIC);
1508		if (!*target)
1509			return -ENOMEM;
1510	}
1511	memcpy(*target, src, size);
1512	return 0;
1513}
1514
1515static int inline
1516xfrm_dst_update_parent(struct dst_entry *dst, struct xfrm_selector *sel)
1517{
1518#ifdef CONFIG_XFRM_SUB_POLICY
1519	struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
1520	return xfrm_dst_alloc_copy((void **)&(xdst->partner),
1521				   sel, sizeof(*sel));
1522#else
1523	return 0;
1524#endif
1525}
1526
1527static int inline
1528xfrm_dst_update_origin(struct dst_entry *dst, struct flowi *fl)
1529{
1530#ifdef CONFIG_XFRM_SUB_POLICY
1531	struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
1532	return xfrm_dst_alloc_copy((void **)&(xdst->origin), fl, sizeof(*fl));
1533#else
1534	return 0;
1535#endif
1536}
1537
1538static int stale_bundle(struct dst_entry *dst);
1539
1540/* Main function: finds/creates a bundle for given flow.
1541 *
1542 * At the moment we eat a raw IP route. Mostly to speed up lookups
1543 * on interfaces with disabled IPsec.
1544 */
1545int __xfrm_lookup(struct net *net, struct dst_entry **dst_p, struct flowi *fl,
1546		  struct sock *sk, int flags)
1547{
1548	struct xfrm_policy *policy;
1549	struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
1550	int npols;
1551	int pol_dead;
1552	int xfrm_nr;
1553	int pi;
1554	struct xfrm_state *xfrm[XFRM_MAX_DEPTH];
1555	struct dst_entry *dst, *dst_orig = *dst_p;
1556	int nx = 0;
1557	int err;
1558	u32 genid;
1559	u16 family;
1560	u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT);
1561
1562restart:
1563	genid = atomic_read(&flow_cache_genid);
1564	policy = NULL;
1565	for (pi = 0; pi < ARRAY_SIZE(pols); pi++)
1566		pols[pi] = NULL;
1567	npols = 0;
1568	pol_dead = 0;
1569	xfrm_nr = 0;
1570
1571	if (sk && sk->sk_policy[XFRM_POLICY_OUT]) {
1572		policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl);
1573		err = PTR_ERR(policy);
1574		if (IS_ERR(policy)) {
1575			XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
1576			goto dropdst;
1577		}
1578	}
1579
1580	if (!policy) {
1581		/* To accelerate a bit...  */
1582		if ((dst_orig->flags & DST_NOXFRM) ||
1583		    !net->xfrm.policy_count[XFRM_POLICY_OUT])
1584			goto nopol;
1585
1586		policy = flow_cache_lookup(net, fl, dst_orig->ops->family,
1587					   dir, xfrm_policy_lookup);
1588		err = PTR_ERR(policy);
1589		if (IS_ERR(policy)) {
1590			XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
1591			goto dropdst;
1592		}
1593	}
1594
1595	if (!policy)
1596		goto nopol;
1597
1598	family = dst_orig->ops->family;
1599	pols[0] = policy;
1600	npols ++;
1601	xfrm_nr += pols[0]->xfrm_nr;
1602
1603	err = -ENOENT;
1604	if ((flags & XFRM_LOOKUP_ICMP) && !(policy->flags & XFRM_POLICY_ICMP))
1605		goto error;
1606
1607	policy->curlft.use_time = get_seconds();
1608
1609	switch (policy->action) {
1610	default:
1611	case XFRM_POLICY_BLOCK:
1612		/* Prohibit the flow */
1613		XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLBLOCK);
1614		err = -EPERM;
1615		goto error;
1616
1617	case XFRM_POLICY_ALLOW:
1618#ifndef CONFIG_XFRM_SUB_POLICY
1619		if (policy->xfrm_nr == 0) {
1620			/* Flow passes not transformed. */
1621			xfrm_pol_put(policy);
1622			return 0;
1623		}
1624#endif
1625
1626		/* Try to find matching bundle.
1627		 *
1628		 * LATER: help from flow cache. It is optional, this
1629		 * is required only for output policy.
1630		 */
1631		dst = xfrm_find_bundle(fl, policy, family);
1632		if (IS_ERR(dst)) {
1633			XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLECHECKERROR);
1634			err = PTR_ERR(dst);
1635			goto error;
1636		}
1637
1638		if (dst)
1639			break;
1640
1641#ifdef CONFIG_XFRM_SUB_POLICY
1642		if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) {
1643			pols[1] = xfrm_policy_lookup_bytype(net,
1644							    XFRM_POLICY_TYPE_MAIN,
1645							    fl, family,
1646							    XFRM_POLICY_OUT);
1647			if (pols[1]) {
1648				if (IS_ERR(pols[1])) {
1649					XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
1650					err = PTR_ERR(pols[1]);
1651					goto error;
1652				}
1653				if (pols[1]->action == XFRM_POLICY_BLOCK) {
1654					XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLBLOCK);
1655					err = -EPERM;
1656					goto error;
1657				}
1658				npols ++;
1659				xfrm_nr += pols[1]->xfrm_nr;
1660			}
1661		}
1662
1663		/*
1664		 * Because neither flowi nor bundle information knows about
1665		 * transformation template size. On more than one policy usage
1666		 * we can realize whether all of them is bypass or not after
1667		 * they are searched. See above not-transformed bypass
1668		 * is surrounded by non-sub policy configuration, too.
1669		 */
1670		if (xfrm_nr == 0) {
1671			/* Flow passes not transformed. */
1672			xfrm_pols_put(pols, npols);
1673			return 0;
1674		}
1675
1676#endif
1677		nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family);
1678
1679		if (unlikely(nx<0)) {
1680			err = nx;
1681			if (err == -EAGAIN && net->xfrm.sysctl_larval_drop) {
1682				/* EREMOTE tells the caller to generate
1683				 * a one-shot blackhole route.
1684				 */
1685				XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES);
1686				xfrm_pol_put(policy);
1687				return -EREMOTE;
1688			}
1689			if (err == -EAGAIN && (flags & XFRM_LOOKUP_WAIT)) {
1690				DECLARE_WAITQUEUE(wait, current);
1691
1692				add_wait_queue(&net->xfrm.km_waitq, &wait);
1693				set_current_state(TASK_INTERRUPTIBLE);
1694				schedule();
1695				set_current_state(TASK_RUNNING);
1696				remove_wait_queue(&net->xfrm.km_waitq, &wait);
1697
1698				nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family);
1699
1700				if (nx == -EAGAIN && signal_pending(current)) {
1701					XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES);
1702					err = -ERESTART;
1703					goto error;
1704				}
1705				if (nx == -EAGAIN ||
1706				    genid != atomic_read(&flow_cache_genid)) {
1707					xfrm_pols_put(pols, npols);
1708					goto restart;
1709				}
1710				err = nx;
1711			}
1712			if (err < 0) {
1713				XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES);
1714				goto error;
1715			}
1716		}
1717		if (nx == 0) {
1718			/* Flow passes not transformed. */
1719			xfrm_pols_put(pols, npols);
1720			return 0;
1721		}
1722
1723		dst = xfrm_bundle_create(policy, xfrm, nx, fl, dst_orig);
1724		err = PTR_ERR(dst);
1725		if (IS_ERR(dst)) {
1726			XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLEGENERROR);
1727			goto error;
1728		}
1729
1730		for (pi = 0; pi < npols; pi++)
1731			pol_dead |= pols[pi]->walk.dead;
1732
1733		write_lock_bh(&policy->lock);
1734		if (unlikely(pol_dead || stale_bundle(dst))) {
1735			/* Wow! While we worked on resolving, this
1736			 * policy has gone. Retry. It is not paranoia,
1737			 * we just cannot enlist new bundle to dead object.
1738			 * We can't enlist stable bundles either.
1739			 */
1740			write_unlock_bh(&policy->lock);
1741			dst_free(dst);
1742
1743			if (pol_dead)
1744				XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLDEAD);
1745			else
1746				XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLECHECKERROR);
1747			err = -EHOSTUNREACH;
1748			goto error;
1749		}
1750
1751		if (npols > 1)
1752			err = xfrm_dst_update_parent(dst, &pols[1]->selector);
1753		else
1754			err = xfrm_dst_update_origin(dst, fl);
1755		if (unlikely(err)) {
1756			write_unlock_bh(&policy->lock);
1757			dst_free(dst);
1758			XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLECHECKERROR);
1759			goto error;
1760		}
1761
1762		dst->next = policy->bundles;
1763		policy->bundles = dst;
1764		dst_hold(dst);
1765		write_unlock_bh(&policy->lock);
1766	}
1767	*dst_p = dst;
1768	dst_release(dst_orig);
1769	xfrm_pols_put(pols, npols);
1770	return 0;
1771
1772error:
1773	xfrm_pols_put(pols, npols);
1774dropdst:
1775	dst_release(dst_orig);
1776	*dst_p = NULL;
1777	return err;
1778
1779nopol:
1780	err = -ENOENT;
1781	if (flags & XFRM_LOOKUP_ICMP)
1782		goto dropdst;
1783	return 0;
1784}
1785EXPORT_SYMBOL(__xfrm_lookup);
1786
1787int xfrm_lookup(struct net *net, struct dst_entry **dst_p, struct flowi *fl,
1788		struct sock *sk, int flags)
1789{
1790	int err = __xfrm_lookup(net, dst_p, fl, sk, flags);
1791
1792	if (err == -EREMOTE) {
1793		dst_release(*dst_p);
1794		*dst_p = NULL;
1795		err = -EAGAIN;
1796	}
1797
1798	return err;
1799}
1800EXPORT_SYMBOL(xfrm_lookup);
1801
1802static inline int
1803xfrm_secpath_reject(int idx, struct sk_buff *skb, struct flowi *fl)
1804{
1805	struct xfrm_state *x;
1806
1807	if (!skb->sp || idx < 0 || idx >= skb->sp->len)
1808		return 0;
1809	x = skb->sp->xvec[idx];
1810	if (!x->type->reject)
1811		return 0;
1812	return x->type->reject(x, skb, fl);
1813}
1814
1815/* When skb is transformed back to its "native" form, we have to
1816 * check policy restrictions. At the moment we make this in maximally
1817 * stupid way. Shame on me. :-) Of course, connected sockets must
1818 * have policy cached at them.
1819 */
1820
1821static inline int
1822xfrm_state_ok(struct xfrm_tmpl *tmpl, struct xfrm_state *x,
1823	      unsigned short family)
1824{
1825	if (xfrm_state_kern(x))
1826		return tmpl->optional && !xfrm_state_addr_cmp(tmpl, x, tmpl->encap_family);
1827	return	x->id.proto == tmpl->id.proto &&
1828		(x->id.spi == tmpl->id.spi || !tmpl->id.spi) &&
1829		(x->props.reqid == tmpl->reqid || !tmpl->reqid) &&
1830		x->props.mode == tmpl->mode &&
1831		(tmpl->allalgs || (tmpl->aalgos & (1<<x->props.aalgo)) ||
1832		 !(xfrm_id_proto_match(tmpl->id.proto, IPSEC_PROTO_ANY))) &&
1833		!(x->props.mode != XFRM_MODE_TRANSPORT &&
1834		  xfrm_state_addr_cmp(tmpl, x, family));
1835}
1836
1837/*
1838 * 0 or more than 0 is returned when validation is succeeded (either bypass
1839 * because of optional transport mode, or next index of the mathced secpath
1840 * state with the template.
1841 * -1 is returned when no matching template is found.
1842 * Otherwise "-2 - errored_index" is returned.
1843 */
1844static inline int
1845xfrm_policy_ok(struct xfrm_tmpl *tmpl, struct sec_path *sp, int start,
1846	       unsigned short family)
1847{
1848	int idx = start;
1849
1850	if (tmpl->optional) {
1851		if (tmpl->mode == XFRM_MODE_TRANSPORT)
1852			return start;
1853	} else
1854		start = -1;
1855	for (; idx < sp->len; idx++) {
1856		if (xfrm_state_ok(tmpl, sp->xvec[idx], family))
1857			return ++idx;
1858		if (sp->xvec[idx]->props.mode != XFRM_MODE_TRANSPORT) {
1859			if (start == -1)
1860				start = -2-idx;
1861			break;
1862		}
1863	}
1864	return start;
1865}
1866
1867int __xfrm_decode_session(struct sk_buff *skb, struct flowi *fl,
1868			  unsigned int family, int reverse)
1869{
1870	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1871	int err;
1872
1873	if (unlikely(afinfo == NULL))
1874		return -EAFNOSUPPORT;
1875
1876	afinfo->decode_session(skb, fl, reverse);
1877	err = security_xfrm_decode_session(skb, &fl->secid);
1878	xfrm_policy_put_afinfo(afinfo);
1879	return err;
1880}
1881EXPORT_SYMBOL(__xfrm_decode_session);
1882
1883static inline int secpath_has_nontransport(struct sec_path *sp, int k, int *idxp)
1884{
1885	for (; k < sp->len; k++) {
1886		if (sp->xvec[k]->props.mode != XFRM_MODE_TRANSPORT) {
1887			*idxp = k;
1888			return 1;
1889		}
1890	}
1891
1892	return 0;
1893}
1894
1895int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
1896			unsigned short family)
1897{
1898	struct net *net = dev_net(skb->dev);
1899	struct xfrm_policy *pol;
1900	struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
1901	int npols = 0;
1902	int xfrm_nr;
1903	int pi;
1904	int reverse;
1905	struct flowi fl;
1906	u8 fl_dir;
1907	int xerr_idx = -1;
1908
1909	reverse = dir & ~XFRM_POLICY_MASK;
1910	dir &= XFRM_POLICY_MASK;
1911	fl_dir = policy_to_flow_dir(dir);
1912
1913	if (__xfrm_decode_session(skb, &fl, family, reverse) < 0) {
1914		XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR);
1915		return 0;
1916	}
1917
1918	nf_nat_decode_session(skb, &fl, family);
1919
1920	/* First, check used SA against their selectors. */
1921	if (skb->sp) {
1922		int i;
1923
1924		for (i=skb->sp->len-1; i>=0; i--) {
1925			struct xfrm_state *x = skb->sp->xvec[i];
1926			if (!xfrm_selector_match(&x->sel, &fl, family)) {
1927				XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMISMATCH);
1928				return 0;
1929			}
1930		}
1931	}
1932
1933	pol = NULL;
1934	if (sk && sk->sk_policy[dir]) {
1935		pol = xfrm_sk_policy_lookup(sk, dir, &fl);
1936		if (IS_ERR(pol)) {
1937			XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
1938			return 0;
1939		}
1940	}
1941
1942	if (!pol)
1943		pol = flow_cache_lookup(net, &fl, family, fl_dir,
1944					xfrm_policy_lookup);
1945
1946	if (IS_ERR(pol)) {
1947		XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
1948		return 0;
1949	}
1950
1951	if (!pol) {
1952		if (skb->sp && secpath_has_nontransport(skb->sp, 0, &xerr_idx)) {
1953			xfrm_secpath_reject(xerr_idx, skb, &fl);
1954			XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOPOLS);
1955			return 0;
1956		}
1957		return 1;
1958	}
1959
1960	pol->curlft.use_time = get_seconds();
1961
1962	pols[0] = pol;
1963	npols ++;
1964#ifdef CONFIG_XFRM_SUB_POLICY
1965	if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) {
1966		pols[1] = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN,
1967						    &fl, family,
1968						    XFRM_POLICY_IN);
1969		if (pols[1]) {
1970			if (IS_ERR(pols[1])) {
1971				XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
1972				return 0;
1973			}
1974			pols[1]->curlft.use_time = get_seconds();
1975			npols ++;
1976		}
1977	}
1978#endif
1979
1980	if (pol->action == XFRM_POLICY_ALLOW) {
1981		struct sec_path *sp;
1982		static struct sec_path dummy;
1983		struct xfrm_tmpl *tp[XFRM_MAX_DEPTH];
1984		struct xfrm_tmpl *stp[XFRM_MAX_DEPTH];
1985		struct xfrm_tmpl **tpp = tp;
1986		int ti = 0;
1987		int i, k;
1988
1989		if ((sp = skb->sp) == NULL)
1990			sp = &dummy;
1991
1992		for (pi = 0; pi < npols; pi++) {
1993			if (pols[pi] != pol &&
1994			    pols[pi]->action != XFRM_POLICY_ALLOW) {
1995				XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLBLOCK);
1996				goto reject;
1997			}
1998			if (ti + pols[pi]->xfrm_nr >= XFRM_MAX_DEPTH) {
1999				XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR);
2000				goto reject_error;
2001			}
2002			for (i = 0; i < pols[pi]->xfrm_nr; i++)
2003				tpp[ti++] = &pols[pi]->xfrm_vec[i];
2004		}
2005		xfrm_nr = ti;
2006		if (npols > 1) {
2007			xfrm_tmpl_sort(stp, tpp, xfrm_nr, family);
2008			tpp = stp;
2009		}
2010
2011		/* For each tunnel xfrm, find the first matching tmpl.
2012		 * For each tmpl before that, find corresponding xfrm.
2013		 * Order is _important_. Later we will implement
2014		 * some barriers, but at the moment barriers
2015		 * are implied between each two transformations.
2016		 */
2017		for (i = xfrm_nr-1, k = 0; i >= 0; i--) {
2018			k = xfrm_policy_ok(tpp[i], sp, k, family);
2019			if (k < 0) {
2020				if (k < -1)
2021					/* "-2 - errored_index" returned */
2022					xerr_idx = -(2+k);
2023				XFRM_INC_STATS(net, LINUX_MIB_XFRMINTMPLMISMATCH);
2024				goto reject;
2025			}
2026		}
2027
2028		if (secpath_has_nontransport(sp, k, &xerr_idx)) {
2029			XFRM_INC_STATS(net, LINUX_MIB_XFRMINTMPLMISMATCH);
2030			goto reject;
2031		}
2032
2033		xfrm_pols_put(pols, npols);
2034		return 1;
2035	}
2036	XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLBLOCK);
2037
2038reject:
2039	xfrm_secpath_reject(xerr_idx, skb, &fl);
2040reject_error:
2041	xfrm_pols_put(pols, npols);
2042	return 0;
2043}
2044EXPORT_SYMBOL(__xfrm_policy_check);
2045
2046int __xfrm_route_forward(struct sk_buff *skb, unsigned short family)
2047{
2048	struct net *net = dev_net(skb->dev);
2049	struct flowi fl;
2050	struct dst_entry *dst;
2051	int res;
2052
2053	if (xfrm_decode_session(skb, &fl, family) < 0) {
2054		XFRM_INC_STATS(net, LINUX_MIB_XFRMFWDHDRERROR);
2055		return 0;
2056	}
2057
2058	dst = skb_dst(skb);
2059
2060	res = xfrm_lookup(net, &dst, &fl, NULL, 0) == 0;
2061	skb_dst_set(skb, dst);
2062	return res;
2063}
2064EXPORT_SYMBOL(__xfrm_route_forward);
2065
2066/* Optimize later using cookies and generation ids. */
2067
2068static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie)
2069{
2070	/* Code (such as __xfrm4_bundle_create()) sets dst->obsolete
2071	 * to "-1" to force all XFRM destinations to get validated by
2072	 * dst_ops->check on every use.  We do this because when a
2073	 * normal route referenced by an XFRM dst is obsoleted we do
2074	 * not go looking around for all parent referencing XFRM dsts
2075	 * so that we can invalidate them.  It is just too much work.
2076	 * Instead we make the checks here on every use.  For example:
2077	 *
2078	 *	XFRM dst A --> IPv4 dst X
2079	 *
2080	 * X is the "xdst->route" of A (X is also the "dst->path" of A
2081	 * in this example).  If X is marked obsolete, "A" will not
2082	 * notice.  That's what we are validating here via the
2083	 * stale_bundle() check.
2084	 *
2085	 * When a policy's bundle is pruned, we dst_free() the XFRM
2086	 * dst which causes it's ->obsolete field to be set to a
2087	 * positive non-zero integer.  If an XFRM dst has been pruned
2088	 * like this, we want to force a new route lookup.
2089	 */
2090	if (dst->obsolete < 0 && !stale_bundle(dst))
2091		return dst;
2092
2093	return NULL;
2094}
2095
2096static int stale_bundle(struct dst_entry *dst)
2097{
2098	return !xfrm_bundle_ok(NULL, (struct xfrm_dst *)dst, NULL, AF_UNSPEC, 0);
2099}
2100
2101void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev)
2102{
2103	while ((dst = dst->child) && dst->xfrm && dst->dev == dev) {
2104		dst->dev = dev_net(dev)->loopback_dev;
2105		dev_hold(dst->dev);
2106		dev_put(dev);
2107	}
2108}
2109EXPORT_SYMBOL(xfrm_dst_ifdown);
2110
2111static void xfrm_link_failure(struct sk_buff *skb)
2112{
2113	/* Impossible. Such dst must be popped before reaches point of failure. */
2114	return;
2115}
2116
2117static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst)
2118{
2119	if (dst) {
2120		if (dst->obsolete) {
2121			dst_release(dst);
2122			dst = NULL;
2123		}
2124	}
2125	return dst;
2126}
2127
2128static void prune_one_bundle(struct xfrm_policy *pol, int (*func)(struct dst_entry *), struct dst_entry **gc_list_p)
2129{
2130	struct dst_entry *dst, **dstp;
2131
2132	write_lock(&pol->lock);
2133	dstp = &pol->bundles;
2134	while ((dst=*dstp) != NULL) {
2135		if (func(dst)) {
2136			*dstp = dst->next;
2137			dst->next = *gc_list_p;
2138			*gc_list_p = dst;
2139		} else {
2140			dstp = &dst->next;
2141		}
2142	}
2143	write_unlock(&pol->lock);
2144}
2145
2146static void xfrm_prune_bundles(struct net *net, int (*func)(struct dst_entry *))
2147{
2148	struct dst_entry *gc_list = NULL;
2149	int dir;
2150
2151	read_lock_bh(&xfrm_policy_lock);
2152	for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
2153		struct xfrm_policy *pol;
2154		struct hlist_node *entry;
2155		struct hlist_head *table;
2156		int i;
2157
2158		hlist_for_each_entry(pol, entry,
2159				     &net->xfrm.policy_inexact[dir], bydst)
2160			prune_one_bundle(pol, func, &gc_list);
2161
2162		table = net->xfrm.policy_bydst[dir].table;
2163		for (i = net->xfrm.policy_bydst[dir].hmask; i >= 0; i--) {
2164			hlist_for_each_entry(pol, entry, table + i, bydst)
2165				prune_one_bundle(pol, func, &gc_list);
2166		}
2167	}
2168	read_unlock_bh(&xfrm_policy_lock);
2169
2170	while (gc_list) {
2171		struct dst_entry *dst = gc_list;
2172		gc_list = dst->next;
2173		dst_free(dst);
2174	}
2175}
2176
2177static int unused_bundle(struct dst_entry *dst)
2178{
2179	return !atomic_read(&dst->__refcnt);
2180}
2181
2182static void __xfrm_garbage_collect(struct net *net)
2183{
2184	xfrm_prune_bundles(net, unused_bundle);
2185}
2186
2187static int xfrm_flush_bundles(struct net *net)
2188{
2189	xfrm_prune_bundles(net, stale_bundle);
2190	return 0;
2191}
2192
2193static void xfrm_init_pmtu(struct dst_entry *dst)
2194{
2195	do {
2196		struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
2197		u32 pmtu, route_mtu_cached;
2198
2199		pmtu = dst_mtu(dst->child);
2200		xdst->child_mtu_cached = pmtu;
2201
2202		pmtu = xfrm_state_mtu(dst->xfrm, pmtu);
2203
2204		route_mtu_cached = dst_mtu(xdst->route);
2205		xdst->route_mtu_cached = route_mtu_cached;
2206
2207		if (pmtu > route_mtu_cached)
2208			pmtu = route_mtu_cached;
2209
2210		dst->metrics[RTAX_MTU-1] = pmtu;
2211	} while ((dst = dst->next));
2212}
2213
2214/* Check that the bundle accepts the flow and its components are
2215 * still valid.
2216 */
2217
2218int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first,
2219		struct flowi *fl, int family, int strict)
2220{
2221	struct dst_entry *dst = &first->u.dst;
2222	struct xfrm_dst *last;
2223	u32 mtu;
2224
2225	if (!dst_check(dst->path, ((struct xfrm_dst *)dst)->path_cookie) ||
2226	    (dst->dev && !netif_running(dst->dev)))
2227		return 0;
2228#ifdef CONFIG_XFRM_SUB_POLICY
2229	if (fl) {
2230		if (first->origin && !flow_cache_uli_match(first->origin, fl))
2231			return 0;
2232		if (first->partner &&
2233		    !xfrm_selector_match(first->partner, fl, family))
2234			return 0;
2235	}
2236#endif
2237
2238	last = NULL;
2239
2240	do {
2241		struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
2242
2243		if (fl && !xfrm_selector_match(&dst->xfrm->sel, fl, family))
2244			return 0;
2245		if (fl && pol &&
2246		    !security_xfrm_state_pol_flow_match(dst->xfrm, pol, fl))
2247			return 0;
2248		if (dst->xfrm->km.state != XFRM_STATE_VALID)
2249			return 0;
2250		if (xdst->genid != dst->xfrm->genid)
2251			return 0;
2252
2253		if (strict && fl &&
2254		    !(dst->xfrm->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL) &&
2255		    !xfrm_state_addr_flow_check(dst->xfrm, fl, family))
2256			return 0;
2257
2258		mtu = dst_mtu(dst->child);
2259		if (xdst->child_mtu_cached != mtu) {
2260			last = xdst;
2261			xdst->child_mtu_cached = mtu;
2262		}
2263
2264		if (!dst_check(xdst->route, xdst->route_cookie))
2265			return 0;
2266		mtu = dst_mtu(xdst->route);
2267		if (xdst->route_mtu_cached != mtu) {
2268			last = xdst;
2269			xdst->route_mtu_cached = mtu;
2270		}
2271
2272		dst = dst->child;
2273	} while (dst->xfrm);
2274
2275	if (likely(!last))
2276		return 1;
2277
2278	mtu = last->child_mtu_cached;
2279	for (;;) {
2280		dst = &last->u.dst;
2281
2282		mtu = xfrm_state_mtu(dst->xfrm, mtu);
2283		if (mtu > last->route_mtu_cached)
2284			mtu = last->route_mtu_cached;
2285		dst->metrics[RTAX_MTU-1] = mtu;
2286
2287		if (last == first)
2288			break;
2289
2290		last = (struct xfrm_dst *)last->u.dst.next;
2291		last->child_mtu_cached = mtu;
2292	}
2293
2294	return 1;
2295}
2296
2297EXPORT_SYMBOL(xfrm_bundle_ok);
2298
2299int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)
2300{
2301	struct net *net;
2302	int err = 0;
2303	if (unlikely(afinfo == NULL))
2304		return -EINVAL;
2305	if (unlikely(afinfo->family >= NPROTO))
2306		return -EAFNOSUPPORT;
2307	write_lock_bh(&xfrm_policy_afinfo_lock);
2308	if (unlikely(xfrm_policy_afinfo[afinfo->family] != NULL))
2309		err = -ENOBUFS;
2310	else {
2311		struct dst_ops *dst_ops = afinfo->dst_ops;
2312		if (likely(dst_ops->kmem_cachep == NULL))
2313			dst_ops->kmem_cachep = xfrm_dst_cache;
2314		if (likely(dst_ops->check == NULL))
2315			dst_ops->check = xfrm_dst_check;
2316		if (likely(dst_ops->negative_advice == NULL))
2317			dst_ops->negative_advice = xfrm_negative_advice;
2318		if (likely(dst_ops->link_failure == NULL))
2319			dst_ops->link_failure = xfrm_link_failure;
2320		if (likely(afinfo->garbage_collect == NULL))
2321			afinfo->garbage_collect = __xfrm_garbage_collect;
2322		xfrm_policy_afinfo[afinfo->family] = afinfo;
2323	}
2324	write_unlock_bh(&xfrm_policy_afinfo_lock);
2325
2326	rtnl_lock();
2327	for_each_net(net) {
2328		struct dst_ops *xfrm_dst_ops;
2329
2330		switch (afinfo->family) {
2331		case AF_INET:
2332			xfrm_dst_ops = &net->xfrm.xfrm4_dst_ops;
2333			break;
2334#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
2335		case AF_INET6:
2336			xfrm_dst_ops = &net->xfrm.xfrm6_dst_ops;
2337			break;
2338#endif
2339		default:
2340			BUG();
2341		}
2342		*xfrm_dst_ops = *afinfo->dst_ops;
2343	}
2344	rtnl_unlock();
2345
2346	return err;
2347}
2348EXPORT_SYMBOL(xfrm_policy_register_afinfo);
2349
2350int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo)
2351{
2352	int err = 0;
2353	if (unlikely(afinfo == NULL))
2354		return -EINVAL;
2355	if (unlikely(afinfo->family >= NPROTO))
2356		return -EAFNOSUPPORT;
2357	write_lock_bh(&xfrm_policy_afinfo_lock);
2358	if (likely(xfrm_policy_afinfo[afinfo->family] != NULL)) {
2359		if (unlikely(xfrm_policy_afinfo[afinfo->family] != afinfo))
2360			err = -EINVAL;
2361		else {
2362			struct dst_ops *dst_ops = afinfo->dst_ops;
2363			xfrm_policy_afinfo[afinfo->family] = NULL;
2364			dst_ops->kmem_cachep = NULL;
2365			dst_ops->check = NULL;
2366			dst_ops->negative_advice = NULL;
2367			dst_ops->link_failure = NULL;
2368			afinfo->garbage_collect = NULL;
2369		}
2370	}
2371	write_unlock_bh(&xfrm_policy_afinfo_lock);
2372	return err;
2373}
2374EXPORT_SYMBOL(xfrm_policy_unregister_afinfo);
2375
2376static void __net_init xfrm_dst_ops_init(struct net *net)
2377{
2378	struct xfrm_policy_afinfo *afinfo;
2379
2380	read_lock_bh(&xfrm_policy_afinfo_lock);
2381	afinfo = xfrm_policy_afinfo[AF_INET];
2382	if (afinfo)
2383		net->xfrm.xfrm4_dst_ops = *afinfo->dst_ops;
2384#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
2385	afinfo = xfrm_policy_afinfo[AF_INET6];
2386	if (afinfo)
2387		net->xfrm.xfrm6_dst_ops = *afinfo->dst_ops;
2388#endif
2389	read_unlock_bh(&xfrm_policy_afinfo_lock);
2390}
2391
2392static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family)
2393{
2394	struct xfrm_policy_afinfo *afinfo;
2395	if (unlikely(family >= NPROTO))
2396		return NULL;
2397	read_lock(&xfrm_policy_afinfo_lock);
2398	afinfo = xfrm_policy_afinfo[family];
2399	if (unlikely(!afinfo))
2400		read_unlock(&xfrm_policy_afinfo_lock);
2401	return afinfo;
2402}
2403
2404static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo)
2405{
2406	read_unlock(&xfrm_policy_afinfo_lock);
2407}
2408
2409static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr)
2410{
2411	struct net_device *dev = ptr;
2412
2413	switch (event) {
2414	case NETDEV_DOWN:
2415		xfrm_flush_bundles(dev_net(dev));
2416	}
2417	return NOTIFY_DONE;
2418}
2419
2420static struct notifier_block xfrm_dev_notifier = {
2421	.notifier_call	= xfrm_dev_event,
2422};
2423
2424#ifdef CONFIG_XFRM_STATISTICS
2425static int __net_init xfrm_statistics_init(struct net *net)
2426{
2427	int rv;
2428
2429	if (snmp_mib_init((void __percpu **)net->mib.xfrm_statistics,
2430			  sizeof(struct linux_xfrm_mib)) < 0)
2431		return -ENOMEM;
2432	rv = xfrm_proc_init(net);
2433	if (rv < 0)
2434		snmp_mib_free((void __percpu **)net->mib.xfrm_statistics);
2435	return rv;
2436}
2437
2438static void xfrm_statistics_fini(struct net *net)
2439{
2440	xfrm_proc_fini(net);
2441	snmp_mib_free((void __percpu **)net->mib.xfrm_statistics);
2442}
2443#else
2444static int __net_init xfrm_statistics_init(struct net *net)
2445{
2446	return 0;
2447}
2448
2449static void xfrm_statistics_fini(struct net *net)
2450{
2451}
2452#endif
2453
2454static int __net_init xfrm_policy_init(struct net *net)
2455{
2456	unsigned int hmask, sz;
2457	int dir;
2458
2459	if (net_eq(net, &init_net))
2460		xfrm_dst_cache = kmem_cache_create("xfrm_dst_cache",
2461					   sizeof(struct xfrm_dst),
2462					   0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
2463					   NULL);
2464
2465	hmask = 8 - 1;
2466	sz = (hmask+1) * sizeof(struct hlist_head);
2467
2468	net->xfrm.policy_byidx = xfrm_hash_alloc(sz);
2469	if (!net->xfrm.policy_byidx)
2470		goto out_byidx;
2471	net->xfrm.policy_idx_hmask = hmask;
2472
2473	for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
2474		struct xfrm_policy_hash *htab;
2475
2476		net->xfrm.policy_count[dir] = 0;
2477		INIT_HLIST_HEAD(&net->xfrm.policy_inexact[dir]);
2478
2479		htab = &net->xfrm.policy_bydst[dir];
2480		htab->table = xfrm_hash_alloc(sz);
2481		if (!htab->table)
2482			goto out_bydst;
2483		htab->hmask = hmask;
2484	}
2485
2486	INIT_LIST_HEAD(&net->xfrm.policy_all);
2487	INIT_WORK(&net->xfrm.policy_hash_work, xfrm_hash_resize);
2488	if (net_eq(net, &init_net))
2489		register_netdevice_notifier(&xfrm_dev_notifier);
2490	return 0;
2491
2492out_bydst:
2493	for (dir--; dir >= 0; dir--) {
2494		struct xfrm_policy_hash *htab;
2495
2496		htab = &net->xfrm.policy_bydst[dir];
2497		xfrm_hash_free(htab->table, sz);
2498	}
2499	xfrm_hash_free(net->xfrm.policy_byidx, sz);
2500out_byidx:
2501	return -ENOMEM;
2502}
2503
2504static void xfrm_policy_fini(struct net *net)
2505{
2506	struct xfrm_audit audit_info;
2507	unsigned int sz;
2508	int dir;
2509
2510	flush_work(&net->xfrm.policy_hash_work);
2511#ifdef CONFIG_XFRM_SUB_POLICY
2512	audit_info.loginuid = -1;
2513	audit_info.sessionid = -1;
2514	audit_info.secid = 0;
2515	xfrm_policy_flush(net, XFRM_POLICY_TYPE_SUB, &audit_info);
2516#endif
2517	audit_info.loginuid = -1;
2518	audit_info.sessionid = -1;
2519	audit_info.secid = 0;
2520	xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, &audit_info);
2521	flush_work(&xfrm_policy_gc_work);
2522
2523	WARN_ON(!list_empty(&net->xfrm.policy_all));
2524
2525	for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
2526		struct xfrm_policy_hash *htab;
2527
2528		WARN_ON(!hlist_empty(&net->xfrm.policy_inexact[dir]));
2529
2530		htab = &net->xfrm.policy_bydst[dir];
2531		sz = (htab->hmask + 1);
2532		WARN_ON(!hlist_empty(htab->table));
2533		xfrm_hash_free(htab->table, sz);
2534	}
2535
2536	sz = (net->xfrm.policy_idx_hmask + 1) * sizeof(struct hlist_head);
2537	WARN_ON(!hlist_empty(net->xfrm.policy_byidx));
2538	xfrm_hash_free(net->xfrm.policy_byidx, sz);
2539}
2540
2541static int __net_init xfrm_net_init(struct net *net)
2542{
2543	int rv;
2544
2545	rv = xfrm_statistics_init(net);
2546	if (rv < 0)
2547		goto out_statistics;
2548	rv = xfrm_state_init(net);
2549	if (rv < 0)
2550		goto out_state;
2551	rv = xfrm_policy_init(net);
2552	if (rv < 0)
2553		goto out_policy;
2554	xfrm_dst_ops_init(net);
2555	rv = xfrm_sysctl_init(net);
2556	if (rv < 0)
2557		goto out_sysctl;
2558	return 0;
2559
2560out_sysctl:
2561	xfrm_policy_fini(net);
2562out_policy:
2563	xfrm_state_fini(net);
2564out_state:
2565	xfrm_statistics_fini(net);
2566out_statistics:
2567	return rv;
2568}
2569
2570static void __net_exit xfrm_net_exit(struct net *net)
2571{
2572	xfrm_sysctl_fini(net);
2573	xfrm_policy_fini(net);
2574	xfrm_state_fini(net);
2575	xfrm_statistics_fini(net);
2576}
2577
2578static struct pernet_operations __net_initdata xfrm_net_ops = {
2579	.init = xfrm_net_init,
2580	.exit = xfrm_net_exit,
2581};
2582
2583void __init xfrm_init(void)
2584{
2585	register_pernet_subsys(&xfrm_net_ops);
2586	xfrm_input_init();
2587}
2588
2589#ifdef CONFIG_AUDITSYSCALL
2590static void xfrm_audit_common_policyinfo(struct xfrm_policy *xp,
2591					 struct audit_buffer *audit_buf)
2592{
2593	struct xfrm_sec_ctx *ctx = xp->security;
2594	struct xfrm_selector *sel = &xp->selector;
2595
2596	if (ctx)
2597		audit_log_format(audit_buf, " sec_alg=%u sec_doi=%u sec_obj=%s",
2598				 ctx->ctx_alg, ctx->ctx_doi, ctx->ctx_str);
2599
2600	switch(sel->family) {
2601	case AF_INET:
2602		audit_log_format(audit_buf, " src=%pI4", &sel->saddr.a4);
2603		if (sel->prefixlen_s != 32)
2604			audit_log_format(audit_buf, " src_prefixlen=%d",
2605					 sel->prefixlen_s);
2606		audit_log_format(audit_buf, " dst=%pI4", &sel->daddr.a4);
2607		if (sel->prefixlen_d != 32)
2608			audit_log_format(audit_buf, " dst_prefixlen=%d",
2609					 sel->prefixlen_d);
2610		break;
2611	case AF_INET6:
2612		audit_log_format(audit_buf, " src=%pI6", sel->saddr.a6);
2613		if (sel->prefixlen_s != 128)
2614			audit_log_format(audit_buf, " src_prefixlen=%d",
2615					 sel->prefixlen_s);
2616		audit_log_format(audit_buf, " dst=%pI6", sel->daddr.a6);
2617		if (sel->prefixlen_d != 128)
2618			audit_log_format(audit_buf, " dst_prefixlen=%d",
2619					 sel->prefixlen_d);
2620		break;
2621	}
2622}
2623
2624void xfrm_audit_policy_add(struct xfrm_policy *xp, int result,
2625			   uid_t auid, u32 sessionid, u32 secid)
2626{
2627	struct audit_buffer *audit_buf;
2628
2629	audit_buf = xfrm_audit_start("SPD-add");
2630	if (audit_buf == NULL)
2631		return;
2632	xfrm_audit_helper_usrinfo(auid, sessionid, secid, audit_buf);
2633	audit_log_format(audit_buf, " res=%u", result);
2634	xfrm_audit_common_policyinfo(xp, audit_buf);
2635	audit_log_end(audit_buf);
2636}
2637EXPORT_SYMBOL_GPL(xfrm_audit_policy_add);
2638
2639void xfrm_audit_policy_delete(struct xfrm_policy *xp, int result,
2640			      uid_t auid, u32 sessionid, u32 secid)
2641{
2642	struct audit_buffer *audit_buf;
2643
2644	audit_buf = xfrm_audit_start("SPD-delete");
2645	if (audit_buf == NULL)
2646		return;
2647	xfrm_audit_helper_usrinfo(auid, sessionid, secid, audit_buf);
2648	audit_log_format(audit_buf, " res=%u", result);
2649	xfrm_audit_common_policyinfo(xp, audit_buf);
2650	audit_log_end(audit_buf);
2651}
2652EXPORT_SYMBOL_GPL(xfrm_audit_policy_delete);
2653#endif
2654
2655#ifdef CONFIG_XFRM_MIGRATE
2656static int xfrm_migrate_selector_match(struct xfrm_selector *sel_cmp,
2657				       struct xfrm_selector *sel_tgt)
2658{
2659	if (sel_cmp->proto == IPSEC_ULPROTO_ANY) {
2660		if (sel_tgt->family == sel_cmp->family &&
2661		    xfrm_addr_cmp(&sel_tgt->daddr, &sel_cmp->daddr,
2662				  sel_cmp->family) == 0 &&
2663		    xfrm_addr_cmp(&sel_tgt->saddr, &sel_cmp->saddr,
2664				  sel_cmp->family) == 0 &&
2665		    sel_tgt->prefixlen_d == sel_cmp->prefixlen_d &&
2666		    sel_tgt->prefixlen_s == sel_cmp->prefixlen_s) {
2667			return 1;
2668		}
2669	} else {
2670		if (memcmp(sel_tgt, sel_cmp, sizeof(*sel_tgt)) == 0) {
2671			return 1;
2672		}
2673	}
2674	return 0;
2675}
2676
2677static struct xfrm_policy * xfrm_migrate_policy_find(struct xfrm_selector *sel,
2678						     u8 dir, u8 type)
2679{
2680	struct xfrm_policy *pol, *ret = NULL;
2681	struct hlist_node *entry;
2682	struct hlist_head *chain;
2683	u32 priority = ~0U;
2684
2685	read_lock_bh(&xfrm_policy_lock);
2686	chain = policy_hash_direct(&init_net, &sel->daddr, &sel->saddr, sel->family, dir);
2687	hlist_for_each_entry(pol, entry, chain, bydst) {
2688		if (xfrm_migrate_selector_match(sel, &pol->selector) &&
2689		    pol->type == type) {
2690			ret = pol;
2691			priority = ret->priority;
2692			break;
2693		}
2694	}
2695	chain = &init_net.xfrm.policy_inexact[dir];
2696	hlist_for_each_entry(pol, entry, chain, bydst) {
2697		if (xfrm_migrate_selector_match(sel, &pol->selector) &&
2698		    pol->type == type &&
2699		    pol->priority < priority) {
2700			ret = pol;
2701			break;
2702		}
2703	}
2704
2705	if (ret)
2706		xfrm_pol_hold(ret);
2707
2708	read_unlock_bh(&xfrm_policy_lock);
2709
2710	return ret;
2711}
2712
2713static int migrate_tmpl_match(struct xfrm_migrate *m, struct xfrm_tmpl *t)
2714{
2715	int match = 0;
2716
2717	if (t->mode == m->mode && t->id.proto == m->proto &&
2718	    (m->reqid == 0 || t->reqid == m->reqid)) {
2719		switch (t->mode) {
2720		case XFRM_MODE_TUNNEL:
2721		case XFRM_MODE_BEET:
2722			if (xfrm_addr_cmp(&t->id.daddr, &m->old_daddr,
2723					  m->old_family) == 0 &&
2724			    xfrm_addr_cmp(&t->saddr, &m->old_saddr,
2725					  m->old_family) == 0) {
2726				match = 1;
2727			}
2728			break;
2729		case XFRM_MODE_TRANSPORT:
2730			/* in case of transport mode, template does not store
2731			   any IP addresses, hence we just compare mode and
2732			   protocol */
2733			match = 1;
2734			break;
2735		default:
2736			break;
2737		}
2738	}
2739	return match;
2740}
2741
2742/* update endpoint address(es) of template(s) */
2743static int xfrm_policy_migrate(struct xfrm_policy *pol,
2744			       struct xfrm_migrate *m, int num_migrate)
2745{
2746	struct xfrm_migrate *mp;
2747	struct dst_entry *dst;
2748	int i, j, n = 0;
2749
2750	write_lock_bh(&pol->lock);
2751	if (unlikely(pol->walk.dead)) {
2752		/* target policy has been deleted */
2753		write_unlock_bh(&pol->lock);
2754		return -ENOENT;
2755	}
2756
2757	for (i = 0; i < pol->xfrm_nr; i++) {
2758		for (j = 0, mp = m; j < num_migrate; j++, mp++) {
2759			if (!migrate_tmpl_match(mp, &pol->xfrm_vec[i]))
2760				continue;
2761			n++;
2762			if (pol->xfrm_vec[i].mode != XFRM_MODE_TUNNEL &&
2763			    pol->xfrm_vec[i].mode != XFRM_MODE_BEET)
2764				continue;
2765			/* update endpoints */
2766			memcpy(&pol->xfrm_vec[i].id.daddr, &mp->new_daddr,
2767			       sizeof(pol->xfrm_vec[i].id.daddr));
2768			memcpy(&pol->xfrm_vec[i].saddr, &mp->new_saddr,
2769			       sizeof(pol->xfrm_vec[i].saddr));
2770			pol->xfrm_vec[i].encap_family = mp->new_family;
2771			/* flush bundles */
2772			while ((dst = pol->bundles) != NULL) {
2773				pol->bundles = dst->next;
2774				dst_free(dst);
2775			}
2776		}
2777	}
2778
2779	write_unlock_bh(&pol->lock);
2780
2781	if (!n)
2782		return -ENODATA;
2783
2784	return 0;
2785}
2786
2787static int xfrm_migrate_check(struct xfrm_migrate *m, int num_migrate)
2788{
2789	int i, j;
2790
2791	if (num_migrate < 1 || num_migrate > XFRM_MAX_DEPTH)
2792		return -EINVAL;
2793
2794	for (i = 0; i < num_migrate; i++) {
2795		if ((xfrm_addr_cmp(&m[i].old_daddr, &m[i].new_daddr,
2796				   m[i].old_family) == 0) &&
2797		    (xfrm_addr_cmp(&m[i].old_saddr, &m[i].new_saddr,
2798				   m[i].old_family) == 0))
2799			return -EINVAL;
2800		if (xfrm_addr_any(&m[i].new_daddr, m[i].new_family) ||
2801		    xfrm_addr_any(&m[i].new_saddr, m[i].new_family))
2802			return -EINVAL;
2803
2804		/* check if there is any duplicated entry */
2805		for (j = i + 1; j < num_migrate; j++) {
2806			if (!memcmp(&m[i].old_daddr, &m[j].old_daddr,
2807				    sizeof(m[i].old_daddr)) &&
2808			    !memcmp(&m[i].old_saddr, &m[j].old_saddr,
2809				    sizeof(m[i].old_saddr)) &&
2810			    m[i].proto == m[j].proto &&
2811			    m[i].mode == m[j].mode &&
2812			    m[i].reqid == m[j].reqid &&
2813			    m[i].old_family == m[j].old_family)
2814				return -EINVAL;
2815		}
2816	}
2817
2818	return 0;
2819}
2820
2821int xfrm_migrate(struct xfrm_selector *sel, u8 dir, u8 type,
2822		 struct xfrm_migrate *m, int num_migrate,
2823		 struct xfrm_kmaddress *k)
2824{
2825	int i, err, nx_cur = 0, nx_new = 0;
2826	struct xfrm_policy *pol = NULL;
2827	struct xfrm_state *x, *xc;
2828	struct xfrm_state *x_cur[XFRM_MAX_DEPTH];
2829	struct xfrm_state *x_new[XFRM_MAX_DEPTH];
2830	struct xfrm_migrate *mp;
2831
2832	if ((err = xfrm_migrate_check(m, num_migrate)) < 0)
2833		goto out;
2834
2835	/* Stage 1 - find policy */
2836	if ((pol = xfrm_migrate_policy_find(sel, dir, type)) == NULL) {
2837		err = -ENOENT;
2838		goto out;
2839	}
2840
2841	/* Stage 2 - find and update state(s) */
2842	for (i = 0, mp = m; i < num_migrate; i++, mp++) {
2843		if ((x = xfrm_migrate_state_find(mp))) {
2844			x_cur[nx_cur] = x;
2845			nx_cur++;
2846			if ((xc = xfrm_state_migrate(x, mp))) {
2847				x_new[nx_new] = xc;
2848				nx_new++;
2849			} else {
2850				err = -ENODATA;
2851				goto restore_state;
2852			}
2853		}
2854	}
2855
2856	/* Stage 3 - update policy */
2857	if ((err = xfrm_policy_migrate(pol, m, num_migrate)) < 0)
2858		goto restore_state;
2859
2860	/* Stage 4 - delete old state(s) */
2861	if (nx_cur) {
2862		xfrm_states_put(x_cur, nx_cur);
2863		xfrm_states_delete(x_cur, nx_cur);
2864	}
2865
2866	/* Stage 5 - announce */
2867	km_migrate(sel, dir, type, m, num_migrate, k);
2868
2869	xfrm_pol_put(pol);
2870
2871	return 0;
2872out:
2873	return err;
2874
2875restore_state:
2876	if (pol)
2877		xfrm_pol_put(pol);
2878	if (nx_cur)
2879		xfrm_states_put(x_cur, nx_cur);
2880	if (nx_new)
2881		xfrm_states_delete(x_new, nx_new);
2882
2883	return err;
2884}
2885EXPORT_SYMBOL(xfrm_migrate);
2886#endif
2887