1/*
2 * xfrm_policy.c
3 *
4 * Changes:
5 *	Mitsuru KANDA @USAGI
6 * 	Kazunori MIYAZAWA @USAGI
7 * 	Kunihiro Ishiguro <kunihiro@ipinfusion.com>
8 * 		IPv6 support
9 * 	Kazunori MIYAZAWA @USAGI
10 * 	YOSHIFUJI Hideaki
11 * 		Split up af-specific portion
12 *	Derek Atkins <derek@ihtfp.com>		Add the post_input processor
13 *
14 */
15
16#include <linux/err.h>
17#include <linux/slab.h>
18#include <linux/kmod.h>
19#include <linux/list.h>
20#include <linux/spinlock.h>
21#include <linux/workqueue.h>
22#include <linux/notifier.h>
23#include <linux/netdevice.h>
24#include <linux/netfilter.h>
25#include <linux/module.h>
26#include <linux/cache.h>
27#include <linux/audit.h>
28#include <net/dst.h>
29#include <net/flow.h>
30#include <net/xfrm.h>
31#include <net/ip.h>
32#ifdef CONFIG_XFRM_STATISTICS
33#include <net/snmp.h>
34#endif
35
36#include "xfrm_hash.h"
37
38#define XFRM_QUEUE_TMO_MIN ((unsigned)(HZ/10))
39#define XFRM_QUEUE_TMO_MAX ((unsigned)(60*HZ))
40#define XFRM_MAX_QUEUE_LEN	100
41
42DEFINE_MUTEX(xfrm_cfg_mutex);
43EXPORT_SYMBOL(xfrm_cfg_mutex);
44
45static DEFINE_SPINLOCK(xfrm_policy_sk_bundle_lock);
46static struct dst_entry *xfrm_policy_sk_bundles;
47static DEFINE_RWLOCK(xfrm_policy_lock);
48
49static DEFINE_SPINLOCK(xfrm_policy_afinfo_lock);
50static struct xfrm_policy_afinfo __rcu *xfrm_policy_afinfo[NPROTO]
51						__read_mostly;
52
53static struct kmem_cache *xfrm_dst_cache __read_mostly;
54
55static void xfrm_init_pmtu(struct dst_entry *dst);
56static int stale_bundle(struct dst_entry *dst);
57static int xfrm_bundle_ok(struct xfrm_dst *xdst);
58static void xfrm_policy_queue_process(unsigned long arg);
59
60static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
61						int dir);
62
63static inline bool
64__xfrm4_selector_match(const struct xfrm_selector *sel, const struct flowi *fl)
65{
66	const struct flowi4 *fl4 = &fl->u.ip4;
67
68	return  addr4_match(fl4->daddr, sel->daddr.a4, sel->prefixlen_d) &&
69		addr4_match(fl4->saddr, sel->saddr.a4, sel->prefixlen_s) &&
70		!((xfrm_flowi_dport(fl, &fl4->uli) ^ sel->dport) & sel->dport_mask) &&
71		!((xfrm_flowi_sport(fl, &fl4->uli) ^ sel->sport) & sel->sport_mask) &&
72		(fl4->flowi4_proto == sel->proto || !sel->proto) &&
73		(fl4->flowi4_oif == sel->ifindex || !sel->ifindex);
74}
75
76static inline bool
77__xfrm6_selector_match(const struct xfrm_selector *sel, const struct flowi *fl)
78{
79	const struct flowi6 *fl6 = &fl->u.ip6;
80
81	return  addr_match(&fl6->daddr, &sel->daddr, sel->prefixlen_d) &&
82		addr_match(&fl6->saddr, &sel->saddr, sel->prefixlen_s) &&
83		!((xfrm_flowi_dport(fl, &fl6->uli) ^ sel->dport) & sel->dport_mask) &&
84		!((xfrm_flowi_sport(fl, &fl6->uli) ^ sel->sport) & sel->sport_mask) &&
85		(fl6->flowi6_proto == sel->proto || !sel->proto) &&
86		(fl6->flowi6_oif == sel->ifindex || !sel->ifindex);
87}
88
89bool xfrm_selector_match(const struct xfrm_selector *sel, const struct flowi *fl,
90			 unsigned short family)
91{
92	switch (family) {
93	case AF_INET:
94		return __xfrm4_selector_match(sel, fl);
95	case AF_INET6:
96		return __xfrm6_selector_match(sel, fl);
97	}
98	return false;
99}
100
101static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family)
102{
103	struct xfrm_policy_afinfo *afinfo;
104
105	if (unlikely(family >= NPROTO))
106		return NULL;
107	rcu_read_lock();
108	afinfo = rcu_dereference(xfrm_policy_afinfo[family]);
109	if (unlikely(!afinfo))
110		rcu_read_unlock();
111	return afinfo;
112}
113
114static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo)
115{
116	rcu_read_unlock();
117}
118
119static inline struct dst_entry *__xfrm_dst_lookup(struct net *net, int tos,
120						  const xfrm_address_t *saddr,
121						  const xfrm_address_t *daddr,
122						  int family)
123{
124	struct xfrm_policy_afinfo *afinfo;
125	struct dst_entry *dst;
126
127	afinfo = xfrm_policy_get_afinfo(family);
128	if (unlikely(afinfo == NULL))
129		return ERR_PTR(-EAFNOSUPPORT);
130
131	dst = afinfo->dst_lookup(net, tos, saddr, daddr);
132
133	xfrm_policy_put_afinfo(afinfo);
134
135	return dst;
136}
137
138static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x, int tos,
139						xfrm_address_t *prev_saddr,
140						xfrm_address_t *prev_daddr,
141						int family)
142{
143	struct net *net = xs_net(x);
144	xfrm_address_t *saddr = &x->props.saddr;
145	xfrm_address_t *daddr = &x->id.daddr;
146	struct dst_entry *dst;
147
148	if (x->type->flags & XFRM_TYPE_LOCAL_COADDR) {
149		saddr = x->coaddr;
150		daddr = prev_daddr;
151	}
152	if (x->type->flags & XFRM_TYPE_REMOTE_COADDR) {
153		saddr = prev_saddr;
154		daddr = x->coaddr;
155	}
156
157	dst = __xfrm_dst_lookup(net, tos, saddr, daddr, family);
158
159	if (!IS_ERR(dst)) {
160		if (prev_saddr != saddr)
161			memcpy(prev_saddr, saddr,  sizeof(*prev_saddr));
162		if (prev_daddr != daddr)
163			memcpy(prev_daddr, daddr,  sizeof(*prev_daddr));
164	}
165
166	return dst;
167}
168
169static inline unsigned long make_jiffies(long secs)
170{
171	if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
172		return MAX_SCHEDULE_TIMEOUT-1;
173	else
174		return secs*HZ;
175}
176
177static void xfrm_policy_timer(unsigned long data)
178{
179	struct xfrm_policy *xp = (struct xfrm_policy*)data;
180	unsigned long now = get_seconds();
181	long next = LONG_MAX;
182	int warn = 0;
183	int dir;
184
185	read_lock(&xp->lock);
186
187	if (unlikely(xp->walk.dead))
188		goto out;
189
190	dir = xfrm_policy_id2dir(xp->index);
191
192	if (xp->lft.hard_add_expires_seconds) {
193		long tmo = xp->lft.hard_add_expires_seconds +
194			xp->curlft.add_time - now;
195		if (tmo <= 0)
196			goto expired;
197		if (tmo < next)
198			next = tmo;
199	}
200	if (xp->lft.hard_use_expires_seconds) {
201		long tmo = xp->lft.hard_use_expires_seconds +
202			(xp->curlft.use_time ? : xp->curlft.add_time) - now;
203		if (tmo <= 0)
204			goto expired;
205		if (tmo < next)
206			next = tmo;
207	}
208	if (xp->lft.soft_add_expires_seconds) {
209		long tmo = xp->lft.soft_add_expires_seconds +
210			xp->curlft.add_time - now;
211		if (tmo <= 0) {
212			warn = 1;
213			tmo = XFRM_KM_TIMEOUT;
214		}
215		if (tmo < next)
216			next = tmo;
217	}
218	if (xp->lft.soft_use_expires_seconds) {
219		long tmo = xp->lft.soft_use_expires_seconds +
220			(xp->curlft.use_time ? : xp->curlft.add_time) - now;
221		if (tmo <= 0) {
222			warn = 1;
223			tmo = XFRM_KM_TIMEOUT;
224		}
225		if (tmo < next)
226			next = tmo;
227	}
228
229	if (warn)
230		km_policy_expired(xp, dir, 0, 0);
231	if (next != LONG_MAX &&
232	    !mod_timer(&xp->timer, jiffies + make_jiffies(next)))
233		xfrm_pol_hold(xp);
234
235out:
236	read_unlock(&xp->lock);
237	xfrm_pol_put(xp);
238	return;
239
240expired:
241	read_unlock(&xp->lock);
242	if (!xfrm_policy_delete(xp, dir))
243		km_policy_expired(xp, dir, 1, 0);
244	xfrm_pol_put(xp);
245}
246
247static struct flow_cache_object *xfrm_policy_flo_get(struct flow_cache_object *flo)
248{
249	struct xfrm_policy *pol = container_of(flo, struct xfrm_policy, flo);
250
251	if (unlikely(pol->walk.dead))
252		flo = NULL;
253	else
254		xfrm_pol_hold(pol);
255
256	return flo;
257}
258
259static int xfrm_policy_flo_check(struct flow_cache_object *flo)
260{
261	struct xfrm_policy *pol = container_of(flo, struct xfrm_policy, flo);
262
263	return !pol->walk.dead;
264}
265
266static void xfrm_policy_flo_delete(struct flow_cache_object *flo)
267{
268	xfrm_pol_put(container_of(flo, struct xfrm_policy, flo));
269}
270
271static const struct flow_cache_ops xfrm_policy_fc_ops = {
272	.get = xfrm_policy_flo_get,
273	.check = xfrm_policy_flo_check,
274	.delete = xfrm_policy_flo_delete,
275};
276
277/* Allocate xfrm_policy. Not used here, it is supposed to be used by pfkeyv2
278 * SPD calls.
279 */
280
281struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp)
282{
283	struct xfrm_policy *policy;
284
285	policy = kzalloc(sizeof(struct xfrm_policy), gfp);
286
287	if (policy) {
288		write_pnet(&policy->xp_net, net);
289		INIT_LIST_HEAD(&policy->walk.all);
290		INIT_HLIST_NODE(&policy->bydst);
291		INIT_HLIST_NODE(&policy->byidx);
292		rwlock_init(&policy->lock);
293		atomic_set(&policy->refcnt, 1);
294		skb_queue_head_init(&policy->polq.hold_queue);
295		setup_timer(&policy->timer, xfrm_policy_timer,
296				(unsigned long)policy);
297		setup_timer(&policy->polq.hold_timer, xfrm_policy_queue_process,
298			    (unsigned long)policy);
299		policy->flo.ops = &xfrm_policy_fc_ops;
300	}
301	return policy;
302}
303EXPORT_SYMBOL(xfrm_policy_alloc);
304
305/* Destroy xfrm_policy: descendant resources must be released to this moment. */
306
307void xfrm_policy_destroy(struct xfrm_policy *policy)
308{
309	BUG_ON(!policy->walk.dead);
310
311	if (del_timer(&policy->timer))
312		BUG();
313
314	security_xfrm_policy_free(policy->security);
315	kfree(policy);
316}
317EXPORT_SYMBOL(xfrm_policy_destroy);
318
319static void xfrm_queue_purge(struct sk_buff_head *list)
320{
321	struct sk_buff *skb;
322
323	while ((skb = skb_dequeue(list)) != NULL) {
324		dev_put(skb->dev);
325		kfree_skb(skb);
326	}
327}
328
329/* Rule must be locked. Release descentant resources, announce
330 * entry dead. The rule must be unlinked from lists to the moment.
331 */
332
333static void xfrm_policy_kill(struct xfrm_policy *policy)
334{
335	policy->walk.dead = 1;
336
337	atomic_inc(&policy->genid);
338
339	del_timer(&policy->polq.hold_timer);
340	xfrm_queue_purge(&policy->polq.hold_queue);
341
342	if (del_timer(&policy->timer))
343		xfrm_pol_put(policy);
344
345	xfrm_pol_put(policy);
346}
347
348static unsigned int xfrm_policy_hashmax __read_mostly = 1 * 1024 * 1024;
349
350static inline unsigned int idx_hash(struct net *net, u32 index)
351{
352	return __idx_hash(index, net->xfrm.policy_idx_hmask);
353}
354
355static struct hlist_head *policy_hash_bysel(struct net *net,
356					    const struct xfrm_selector *sel,
357					    unsigned short family, int dir)
358{
359	unsigned int hmask = net->xfrm.policy_bydst[dir].hmask;
360	unsigned int hash = __sel_hash(sel, family, hmask);
361
362	return (hash == hmask + 1 ?
363		&net->xfrm.policy_inexact[dir] :
364		net->xfrm.policy_bydst[dir].table + hash);
365}
366
367static struct hlist_head *policy_hash_direct(struct net *net,
368					     const xfrm_address_t *daddr,
369					     const xfrm_address_t *saddr,
370					     unsigned short family, int dir)
371{
372	unsigned int hmask = net->xfrm.policy_bydst[dir].hmask;
373	unsigned int hash = __addr_hash(daddr, saddr, family, hmask);
374
375	return net->xfrm.policy_bydst[dir].table + hash;
376}
377
378static void xfrm_dst_hash_transfer(struct hlist_head *list,
379				   struct hlist_head *ndsttable,
380				   unsigned int nhashmask)
381{
382	struct hlist_node *tmp, *entry0 = NULL;
383	struct xfrm_policy *pol;
384	unsigned int h0 = 0;
385
386redo:
387	hlist_for_each_entry_safe(pol, tmp, list, bydst) {
388		unsigned int h;
389
390		h = __addr_hash(&pol->selector.daddr, &pol->selector.saddr,
391				pol->family, nhashmask);
392		if (!entry0) {
393			hlist_del(&pol->bydst);
394			hlist_add_head(&pol->bydst, ndsttable+h);
395			h0 = h;
396		} else {
397			if (h != h0)
398				continue;
399			hlist_del(&pol->bydst);
400			hlist_add_after(entry0, &pol->bydst);
401		}
402		entry0 = &pol->bydst;
403	}
404	if (!hlist_empty(list)) {
405		entry0 = NULL;
406		goto redo;
407	}
408}
409
410static void xfrm_idx_hash_transfer(struct hlist_head *list,
411				   struct hlist_head *nidxtable,
412				   unsigned int nhashmask)
413{
414	struct hlist_node *tmp;
415	struct xfrm_policy *pol;
416
417	hlist_for_each_entry_safe(pol, tmp, list, byidx) {
418		unsigned int h;
419
420		h = __idx_hash(pol->index, nhashmask);
421		hlist_add_head(&pol->byidx, nidxtable+h);
422	}
423}
424
425static unsigned long xfrm_new_hash_mask(unsigned int old_hmask)
426{
427	return ((old_hmask + 1) << 1) - 1;
428}
429
430static void xfrm_bydst_resize(struct net *net, int dir)
431{
432	unsigned int hmask = net->xfrm.policy_bydst[dir].hmask;
433	unsigned int nhashmask = xfrm_new_hash_mask(hmask);
434	unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head);
435	struct hlist_head *odst = net->xfrm.policy_bydst[dir].table;
436	struct hlist_head *ndst = xfrm_hash_alloc(nsize);
437	int i;
438
439	if (!ndst)
440		return;
441
442	write_lock_bh(&xfrm_policy_lock);
443
444	for (i = hmask; i >= 0; i--)
445		xfrm_dst_hash_transfer(odst + i, ndst, nhashmask);
446
447	net->xfrm.policy_bydst[dir].table = ndst;
448	net->xfrm.policy_bydst[dir].hmask = nhashmask;
449
450	write_unlock_bh(&xfrm_policy_lock);
451
452	xfrm_hash_free(odst, (hmask + 1) * sizeof(struct hlist_head));
453}
454
455static void xfrm_byidx_resize(struct net *net, int total)
456{
457	unsigned int hmask = net->xfrm.policy_idx_hmask;
458	unsigned int nhashmask = xfrm_new_hash_mask(hmask);
459	unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head);
460	struct hlist_head *oidx = net->xfrm.policy_byidx;
461	struct hlist_head *nidx = xfrm_hash_alloc(nsize);
462	int i;
463
464	if (!nidx)
465		return;
466
467	write_lock_bh(&xfrm_policy_lock);
468
469	for (i = hmask; i >= 0; i--)
470		xfrm_idx_hash_transfer(oidx + i, nidx, nhashmask);
471
472	net->xfrm.policy_byidx = nidx;
473	net->xfrm.policy_idx_hmask = nhashmask;
474
475	write_unlock_bh(&xfrm_policy_lock);
476
477	xfrm_hash_free(oidx, (hmask + 1) * sizeof(struct hlist_head));
478}
479
480static inline int xfrm_bydst_should_resize(struct net *net, int dir, int *total)
481{
482	unsigned int cnt = net->xfrm.policy_count[dir];
483	unsigned int hmask = net->xfrm.policy_bydst[dir].hmask;
484
485	if (total)
486		*total += cnt;
487
488	if ((hmask + 1) < xfrm_policy_hashmax &&
489	    cnt > hmask)
490		return 1;
491
492	return 0;
493}
494
495static inline int xfrm_byidx_should_resize(struct net *net, int total)
496{
497	unsigned int hmask = net->xfrm.policy_idx_hmask;
498
499	if ((hmask + 1) < xfrm_policy_hashmax &&
500	    total > hmask)
501		return 1;
502
503	return 0;
504}
505
506void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si)
507{
508	read_lock_bh(&xfrm_policy_lock);
509	si->incnt = net->xfrm.policy_count[XFRM_POLICY_IN];
510	si->outcnt = net->xfrm.policy_count[XFRM_POLICY_OUT];
511	si->fwdcnt = net->xfrm.policy_count[XFRM_POLICY_FWD];
512	si->inscnt = net->xfrm.policy_count[XFRM_POLICY_IN+XFRM_POLICY_MAX];
513	si->outscnt = net->xfrm.policy_count[XFRM_POLICY_OUT+XFRM_POLICY_MAX];
514	si->fwdscnt = net->xfrm.policy_count[XFRM_POLICY_FWD+XFRM_POLICY_MAX];
515	si->spdhcnt = net->xfrm.policy_idx_hmask;
516	si->spdhmcnt = xfrm_policy_hashmax;
517	read_unlock_bh(&xfrm_policy_lock);
518}
519EXPORT_SYMBOL(xfrm_spd_getinfo);
520
521static DEFINE_MUTEX(hash_resize_mutex);
522static void xfrm_hash_resize(struct work_struct *work)
523{
524	struct net *net = container_of(work, struct net, xfrm.policy_hash_work);
525	int dir, total;
526
527	mutex_lock(&hash_resize_mutex);
528
529	total = 0;
530	for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
531		if (xfrm_bydst_should_resize(net, dir, &total))
532			xfrm_bydst_resize(net, dir);
533	}
534	if (xfrm_byidx_should_resize(net, total))
535		xfrm_byidx_resize(net, total);
536
537	mutex_unlock(&hash_resize_mutex);
538}
539
540/* Generate new index... KAME seems to generate them ordered by cost
541 * of an absolute inpredictability of ordering of rules. This will not pass. */
542static u32 xfrm_gen_index(struct net *net, int dir)
543{
544	static u32 idx_generator;
545
546	for (;;) {
547		struct hlist_head *list;
548		struct xfrm_policy *p;
549		u32 idx;
550		int found;
551
552		idx = (idx_generator | dir);
553		idx_generator += 8;
554		if (idx == 0)
555			idx = 8;
556		list = net->xfrm.policy_byidx + idx_hash(net, idx);
557		found = 0;
558		hlist_for_each_entry(p, list, byidx) {
559			if (p->index == idx) {
560				found = 1;
561				break;
562			}
563		}
564		if (!found)
565			return idx;
566	}
567}
568
569static inline int selector_cmp(struct xfrm_selector *s1, struct xfrm_selector *s2)
570{
571	u32 *p1 = (u32 *) s1;
572	u32 *p2 = (u32 *) s2;
573	int len = sizeof(struct xfrm_selector) / sizeof(u32);
574	int i;
575
576	for (i = 0; i < len; i++) {
577		if (p1[i] != p2[i])
578			return 1;
579	}
580
581	return 0;
582}
583
584static void xfrm_policy_requeue(struct xfrm_policy *old,
585				struct xfrm_policy *new)
586{
587	struct xfrm_policy_queue *pq = &old->polq;
588	struct sk_buff_head list;
589
590	__skb_queue_head_init(&list);
591
592	spin_lock_bh(&pq->hold_queue.lock);
593	skb_queue_splice_init(&pq->hold_queue, &list);
594	del_timer(&pq->hold_timer);
595	spin_unlock_bh(&pq->hold_queue.lock);
596
597	if (skb_queue_empty(&list))
598		return;
599
600	pq = &new->polq;
601
602	spin_lock_bh(&pq->hold_queue.lock);
603	skb_queue_splice(&list, &pq->hold_queue);
604	pq->timeout = XFRM_QUEUE_TMO_MIN;
605	mod_timer(&pq->hold_timer, jiffies);
606	spin_unlock_bh(&pq->hold_queue.lock);
607}
608
609static bool xfrm_policy_mark_match(struct xfrm_policy *policy,
610				   struct xfrm_policy *pol)
611{
612	u32 mark = policy->mark.v & policy->mark.m;
613
614	if (policy->mark.v == pol->mark.v && policy->mark.m == pol->mark.m)
615		return true;
616
617	if ((mark & pol->mark.m) == pol->mark.v &&
618	    policy->priority == pol->priority)
619		return true;
620
621	return false;
622}
623
624int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
625{
626	struct net *net = xp_net(policy);
627	struct xfrm_policy *pol;
628	struct xfrm_policy *delpol;
629	struct hlist_head *chain;
630	struct hlist_node *newpos;
631
632	write_lock_bh(&xfrm_policy_lock);
633	chain = policy_hash_bysel(net, &policy->selector, policy->family, dir);
634	delpol = NULL;
635	newpos = NULL;
636	hlist_for_each_entry(pol, chain, bydst) {
637		if (pol->type == policy->type &&
638		    !selector_cmp(&pol->selector, &policy->selector) &&
639		    xfrm_policy_mark_match(policy, pol) &&
640		    xfrm_sec_ctx_match(pol->security, policy->security) &&
641		    !WARN_ON(delpol)) {
642			if (excl) {
643				write_unlock_bh(&xfrm_policy_lock);
644				return -EEXIST;
645			}
646			delpol = pol;
647			if (policy->priority > pol->priority)
648				continue;
649		} else if (policy->priority >= pol->priority) {
650			newpos = &pol->bydst;
651			continue;
652		}
653		if (delpol)
654			break;
655	}
656	if (newpos)
657		hlist_add_after(newpos, &policy->bydst);
658	else
659		hlist_add_head(&policy->bydst, chain);
660	xfrm_pol_hold(policy);
661	net->xfrm.policy_count[dir]++;
662	atomic_inc(&flow_cache_genid);
663	rt_genid_bump(net);
664	if (delpol) {
665		xfrm_policy_requeue(delpol, policy);
666		__xfrm_policy_unlink(delpol, dir);
667	}
668	policy->index = delpol ? delpol->index : xfrm_gen_index(net, dir);
669	hlist_add_head(&policy->byidx, net->xfrm.policy_byidx+idx_hash(net, policy->index));
670	policy->curlft.add_time = get_seconds();
671	policy->curlft.use_time = 0;
672	if (!mod_timer(&policy->timer, jiffies + HZ))
673		xfrm_pol_hold(policy);
674	list_add(&policy->walk.all, &net->xfrm.policy_all);
675	write_unlock_bh(&xfrm_policy_lock);
676
677	if (delpol)
678		xfrm_policy_kill(delpol);
679	else if (xfrm_bydst_should_resize(net, dir, NULL))
680		schedule_work(&net->xfrm.policy_hash_work);
681
682	return 0;
683}
684EXPORT_SYMBOL(xfrm_policy_insert);
685
686struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u8 type,
687					  int dir, struct xfrm_selector *sel,
688					  struct xfrm_sec_ctx *ctx, int delete,
689					  int *err)
690{
691	struct xfrm_policy *pol, *ret;
692	struct hlist_head *chain;
693
694	*err = 0;
695	write_lock_bh(&xfrm_policy_lock);
696	chain = policy_hash_bysel(net, sel, sel->family, dir);
697	ret = NULL;
698	hlist_for_each_entry(pol, chain, bydst) {
699		if (pol->type == type &&
700		    (mark & pol->mark.m) == pol->mark.v &&
701		    !selector_cmp(sel, &pol->selector) &&
702		    xfrm_sec_ctx_match(ctx, pol->security)) {
703			xfrm_pol_hold(pol);
704			if (delete) {
705				*err = security_xfrm_policy_delete(
706								pol->security);
707				if (*err) {
708					write_unlock_bh(&xfrm_policy_lock);
709					return pol;
710				}
711				__xfrm_policy_unlink(pol, dir);
712			}
713			ret = pol;
714			break;
715		}
716	}
717	write_unlock_bh(&xfrm_policy_lock);
718
719	if (ret && delete)
720		xfrm_policy_kill(ret);
721	return ret;
722}
723EXPORT_SYMBOL(xfrm_policy_bysel_ctx);
724
725struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8 type,
726				     int dir, u32 id, int delete, int *err)
727{
728	struct xfrm_policy *pol, *ret;
729	struct hlist_head *chain;
730
731	*err = -ENOENT;
732	if (xfrm_policy_id2dir(id) != dir)
733		return NULL;
734
735	*err = 0;
736	write_lock_bh(&xfrm_policy_lock);
737	chain = net->xfrm.policy_byidx + idx_hash(net, id);
738	ret = NULL;
739	hlist_for_each_entry(pol, chain, byidx) {
740		if (pol->type == type && pol->index == id &&
741		    (mark & pol->mark.m) == pol->mark.v) {
742			xfrm_pol_hold(pol);
743			if (delete) {
744				*err = security_xfrm_policy_delete(
745								pol->security);
746				if (*err) {
747					write_unlock_bh(&xfrm_policy_lock);
748					return pol;
749				}
750				__xfrm_policy_unlink(pol, dir);
751			}
752			ret = pol;
753			break;
754		}
755	}
756	write_unlock_bh(&xfrm_policy_lock);
757
758	if (ret && delete)
759		xfrm_policy_kill(ret);
760	return ret;
761}
762EXPORT_SYMBOL(xfrm_policy_byid);
763
764#ifdef CONFIG_SECURITY_NETWORK_XFRM
765static inline int
766xfrm_policy_flush_secctx_check(struct net *net, u8 type, struct xfrm_audit *audit_info)
767{
768	int dir, err = 0;
769
770	for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
771		struct xfrm_policy *pol;
772		int i;
773
774		hlist_for_each_entry(pol,
775				     &net->xfrm.policy_inexact[dir], bydst) {
776			if (pol->type != type)
777				continue;
778			err = security_xfrm_policy_delete(pol->security);
779			if (err) {
780				xfrm_audit_policy_delete(pol, 0,
781							 audit_info->loginuid,
782							 audit_info->sessionid,
783							 audit_info->secid);
784				return err;
785			}
786		}
787		for (i = net->xfrm.policy_bydst[dir].hmask; i >= 0; i--) {
788			hlist_for_each_entry(pol,
789					     net->xfrm.policy_bydst[dir].table + i,
790					     bydst) {
791				if (pol->type != type)
792					continue;
793				err = security_xfrm_policy_delete(
794								pol->security);
795				if (err) {
796					xfrm_audit_policy_delete(pol, 0,
797							audit_info->loginuid,
798							audit_info->sessionid,
799							audit_info->secid);
800					return err;
801				}
802			}
803		}
804	}
805	return err;
806}
807#else
808static inline int
809xfrm_policy_flush_secctx_check(struct net *net, u8 type, struct xfrm_audit *audit_info)
810{
811	return 0;
812}
813#endif
814
815int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info)
816{
817	int dir, err = 0, cnt = 0;
818
819	write_lock_bh(&xfrm_policy_lock);
820
821	err = xfrm_policy_flush_secctx_check(net, type, audit_info);
822	if (err)
823		goto out;
824
825	for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
826		struct xfrm_policy *pol;
827		int i;
828
829	again1:
830		hlist_for_each_entry(pol,
831				     &net->xfrm.policy_inexact[dir], bydst) {
832			if (pol->type != type)
833				continue;
834			__xfrm_policy_unlink(pol, dir);
835			write_unlock_bh(&xfrm_policy_lock);
836			cnt++;
837
838			xfrm_audit_policy_delete(pol, 1, audit_info->loginuid,
839						 audit_info->sessionid,
840						 audit_info->secid);
841
842			xfrm_policy_kill(pol);
843
844			write_lock_bh(&xfrm_policy_lock);
845			goto again1;
846		}
847
848		for (i = net->xfrm.policy_bydst[dir].hmask; i >= 0; i--) {
849	again2:
850			hlist_for_each_entry(pol,
851					     net->xfrm.policy_bydst[dir].table + i,
852					     bydst) {
853				if (pol->type != type)
854					continue;
855				__xfrm_policy_unlink(pol, dir);
856				write_unlock_bh(&xfrm_policy_lock);
857				cnt++;
858
859				xfrm_audit_policy_delete(pol, 1,
860							 audit_info->loginuid,
861							 audit_info->sessionid,
862							 audit_info->secid);
863				xfrm_policy_kill(pol);
864
865				write_lock_bh(&xfrm_policy_lock);
866				goto again2;
867			}
868		}
869
870	}
871	if (!cnt)
872		err = -ESRCH;
873out:
874	write_unlock_bh(&xfrm_policy_lock);
875	return err;
876}
877EXPORT_SYMBOL(xfrm_policy_flush);
878
879int xfrm_policy_walk(struct net *net, struct xfrm_policy_walk *walk,
880		     int (*func)(struct xfrm_policy *, int, int, void*),
881		     void *data)
882{
883	struct xfrm_policy *pol;
884	struct xfrm_policy_walk_entry *x;
885	int error = 0;
886
887	if (walk->type >= XFRM_POLICY_TYPE_MAX &&
888	    walk->type != XFRM_POLICY_TYPE_ANY)
889		return -EINVAL;
890
891	if (list_empty(&walk->walk.all) && walk->seq != 0)
892		return 0;
893
894	write_lock_bh(&xfrm_policy_lock);
895	if (list_empty(&walk->walk.all))
896		x = list_first_entry(&net->xfrm.policy_all, struct xfrm_policy_walk_entry, all);
897	else
898		x = list_entry(&walk->walk.all, struct xfrm_policy_walk_entry, all);
899	list_for_each_entry_from(x, &net->xfrm.policy_all, all) {
900		if (x->dead)
901			continue;
902		pol = container_of(x, struct xfrm_policy, walk);
903		if (walk->type != XFRM_POLICY_TYPE_ANY &&
904		    walk->type != pol->type)
905			continue;
906		error = func(pol, xfrm_policy_id2dir(pol->index),
907			     walk->seq, data);
908		if (error) {
909			list_move_tail(&walk->walk.all, &x->all);
910			goto out;
911		}
912		walk->seq++;
913	}
914	if (walk->seq == 0) {
915		error = -ENOENT;
916		goto out;
917	}
918	list_del_init(&walk->walk.all);
919out:
920	write_unlock_bh(&xfrm_policy_lock);
921	return error;
922}
923EXPORT_SYMBOL(xfrm_policy_walk);
924
925void xfrm_policy_walk_init(struct xfrm_policy_walk *walk, u8 type)
926{
927	INIT_LIST_HEAD(&walk->walk.all);
928	walk->walk.dead = 1;
929	walk->type = type;
930	walk->seq = 0;
931}
932EXPORT_SYMBOL(xfrm_policy_walk_init);
933
934void xfrm_policy_walk_done(struct xfrm_policy_walk *walk)
935{
936	if (list_empty(&walk->walk.all))
937		return;
938
939	write_lock_bh(&xfrm_policy_lock);
940	list_del(&walk->walk.all);
941	write_unlock_bh(&xfrm_policy_lock);
942}
943EXPORT_SYMBOL(xfrm_policy_walk_done);
944
945/*
946 * Find policy to apply to this flow.
947 *
948 * Returns 0 if policy found, else an -errno.
949 */
950static int xfrm_policy_match(const struct xfrm_policy *pol,
951			     const struct flowi *fl,
952			     u8 type, u16 family, int dir)
953{
954	const struct xfrm_selector *sel = &pol->selector;
955	int ret = -ESRCH;
956	bool match;
957
958	if (pol->family != family ||
959	    (fl->flowi_mark & pol->mark.m) != pol->mark.v ||
960	    pol->type != type)
961		return ret;
962
963	match = xfrm_selector_match(sel, fl, family);
964	if (match)
965		ret = security_xfrm_policy_lookup(pol->security, fl->flowi_secid,
966						  dir);
967
968	return ret;
969}
970
971static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type,
972						     const struct flowi *fl,
973						     u16 family, u8 dir)
974{
975	int err;
976	struct xfrm_policy *pol, *ret;
977	const xfrm_address_t *daddr, *saddr;
978	struct hlist_head *chain;
979	u32 priority = ~0U;
980
981	daddr = xfrm_flowi_daddr(fl, family);
982	saddr = xfrm_flowi_saddr(fl, family);
983	if (unlikely(!daddr || !saddr))
984		return NULL;
985
986	read_lock_bh(&xfrm_policy_lock);
987	chain = policy_hash_direct(net, daddr, saddr, family, dir);
988	ret = NULL;
989	hlist_for_each_entry(pol, chain, bydst) {
990		err = xfrm_policy_match(pol, fl, type, family, dir);
991		if (err) {
992			if (err == -ESRCH)
993				continue;
994			else {
995				ret = ERR_PTR(err);
996				goto fail;
997			}
998		} else {
999			ret = pol;
1000			priority = ret->priority;
1001			break;
1002		}
1003	}
1004	chain = &net->xfrm.policy_inexact[dir];
1005	hlist_for_each_entry(pol, chain, bydst) {
1006		err = xfrm_policy_match(pol, fl, type, family, dir);
1007		if (err) {
1008			if (err == -ESRCH)
1009				continue;
1010			else {
1011				ret = ERR_PTR(err);
1012				goto fail;
1013			}
1014		} else if (pol->priority < priority) {
1015			ret = pol;
1016			break;
1017		}
1018	}
1019	if (ret)
1020		xfrm_pol_hold(ret);
1021fail:
1022	read_unlock_bh(&xfrm_policy_lock);
1023
1024	return ret;
1025}
1026
1027static struct xfrm_policy *
1028__xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir)
1029{
1030#ifdef CONFIG_XFRM_SUB_POLICY
1031	struct xfrm_policy *pol;
1032
1033	pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_SUB, fl, family, dir);
1034	if (pol != NULL)
1035		return pol;
1036#endif
1037	return xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family, dir);
1038}
1039
1040static int flow_to_policy_dir(int dir)
1041{
1042	if (XFRM_POLICY_IN == FLOW_DIR_IN &&
1043	    XFRM_POLICY_OUT == FLOW_DIR_OUT &&
1044	    XFRM_POLICY_FWD == FLOW_DIR_FWD)
1045		return dir;
1046
1047	switch (dir) {
1048	default:
1049	case FLOW_DIR_IN:
1050		return XFRM_POLICY_IN;
1051	case FLOW_DIR_OUT:
1052		return XFRM_POLICY_OUT;
1053	case FLOW_DIR_FWD:
1054		return XFRM_POLICY_FWD;
1055	}
1056}
1057
1058static struct flow_cache_object *
1059xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family,
1060		   u8 dir, struct flow_cache_object *old_obj, void *ctx)
1061{
1062	struct xfrm_policy *pol;
1063
1064	if (old_obj)
1065		xfrm_pol_put(container_of(old_obj, struct xfrm_policy, flo));
1066
1067	pol = __xfrm_policy_lookup(net, fl, family, flow_to_policy_dir(dir));
1068	if (IS_ERR_OR_NULL(pol))
1069		return ERR_CAST(pol);
1070
1071	/* Resolver returns two references:
1072	 * one for cache and one for caller of flow_cache_lookup() */
1073	xfrm_pol_hold(pol);
1074
1075	return &pol->flo;
1076}
1077
1078static inline int policy_to_flow_dir(int dir)
1079{
1080	if (XFRM_POLICY_IN == FLOW_DIR_IN &&
1081	    XFRM_POLICY_OUT == FLOW_DIR_OUT &&
1082	    XFRM_POLICY_FWD == FLOW_DIR_FWD)
1083		return dir;
1084	switch (dir) {
1085	default:
1086	case XFRM_POLICY_IN:
1087		return FLOW_DIR_IN;
1088	case XFRM_POLICY_OUT:
1089		return FLOW_DIR_OUT;
1090	case XFRM_POLICY_FWD:
1091		return FLOW_DIR_FWD;
1092	}
1093}
1094
1095static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir,
1096						 const struct flowi *fl)
1097{
1098	struct xfrm_policy *pol;
1099
1100	read_lock_bh(&xfrm_policy_lock);
1101	if ((pol = sk->sk_policy[dir]) != NULL) {
1102		bool match = xfrm_selector_match(&pol->selector, fl,
1103						 sk->sk_family);
1104		int err = 0;
1105
1106		if (match) {
1107			if ((sk->sk_mark & pol->mark.m) != pol->mark.v) {
1108				pol = NULL;
1109				goto out;
1110			}
1111			err = security_xfrm_policy_lookup(pol->security,
1112						      fl->flowi_secid,
1113						      policy_to_flow_dir(dir));
1114			if (!err)
1115				xfrm_pol_hold(pol);
1116			else if (err == -ESRCH)
1117				pol = NULL;
1118			else
1119				pol = ERR_PTR(err);
1120		} else
1121			pol = NULL;
1122	}
1123out:
1124	read_unlock_bh(&xfrm_policy_lock);
1125	return pol;
1126}
1127
1128static void __xfrm_policy_link(struct xfrm_policy *pol, int dir)
1129{
1130	struct net *net = xp_net(pol);
1131	struct hlist_head *chain = policy_hash_bysel(net, &pol->selector,
1132						     pol->family, dir);
1133
1134	list_add(&pol->walk.all, &net->xfrm.policy_all);
1135	hlist_add_head(&pol->bydst, chain);
1136	hlist_add_head(&pol->byidx, net->xfrm.policy_byidx+idx_hash(net, pol->index));
1137	net->xfrm.policy_count[dir]++;
1138	xfrm_pol_hold(pol);
1139
1140	if (xfrm_bydst_should_resize(net, dir, NULL))
1141		schedule_work(&net->xfrm.policy_hash_work);
1142}
1143
1144static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
1145						int dir)
1146{
1147	struct net *net = xp_net(pol);
1148
1149	if (hlist_unhashed(&pol->bydst))
1150		return NULL;
1151
1152	hlist_del(&pol->bydst);
1153	hlist_del(&pol->byidx);
1154	list_del(&pol->walk.all);
1155	net->xfrm.policy_count[dir]--;
1156
1157	return pol;
1158}
1159
1160int xfrm_policy_delete(struct xfrm_policy *pol, int dir)
1161{
1162	write_lock_bh(&xfrm_policy_lock);
1163	pol = __xfrm_policy_unlink(pol, dir);
1164	write_unlock_bh(&xfrm_policy_lock);
1165	if (pol) {
1166		xfrm_policy_kill(pol);
1167		return 0;
1168	}
1169	return -ENOENT;
1170}
1171EXPORT_SYMBOL(xfrm_policy_delete);
1172
1173int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol)
1174{
1175	struct net *net = xp_net(pol);
1176	struct xfrm_policy *old_pol;
1177
1178#ifdef CONFIG_XFRM_SUB_POLICY
1179	if (pol && pol->type != XFRM_POLICY_TYPE_MAIN)
1180		return -EINVAL;
1181#endif
1182
1183	write_lock_bh(&xfrm_policy_lock);
1184	old_pol = sk->sk_policy[dir];
1185	sk->sk_policy[dir] = pol;
1186	if (pol) {
1187		pol->curlft.add_time = get_seconds();
1188		pol->index = xfrm_gen_index(net, XFRM_POLICY_MAX+dir);
1189		__xfrm_policy_link(pol, XFRM_POLICY_MAX+dir);
1190	}
1191	if (old_pol) {
1192		if (pol)
1193			xfrm_policy_requeue(old_pol, pol);
1194
1195		/* Unlinking succeeds always. This is the only function
1196		 * allowed to delete or replace socket policy.
1197		 */
1198		__xfrm_policy_unlink(old_pol, XFRM_POLICY_MAX+dir);
1199	}
1200	write_unlock_bh(&xfrm_policy_lock);
1201
1202	if (old_pol) {
1203		xfrm_policy_kill(old_pol);
1204	}
1205	return 0;
1206}
1207
1208static struct xfrm_policy *clone_policy(const struct xfrm_policy *old, int dir)
1209{
1210	struct xfrm_policy *newp = xfrm_policy_alloc(xp_net(old), GFP_ATOMIC);
1211
1212	if (newp) {
1213		newp->selector = old->selector;
1214		if (security_xfrm_policy_clone(old->security,
1215					       &newp->security)) {
1216			kfree(newp);
1217			return NULL;  /* ENOMEM */
1218		}
1219		newp->lft = old->lft;
1220		newp->curlft = old->curlft;
1221		newp->mark = old->mark;
1222		newp->action = old->action;
1223		newp->flags = old->flags;
1224		newp->xfrm_nr = old->xfrm_nr;
1225		newp->index = old->index;
1226		newp->type = old->type;
1227		memcpy(newp->xfrm_vec, old->xfrm_vec,
1228		       newp->xfrm_nr*sizeof(struct xfrm_tmpl));
1229		write_lock_bh(&xfrm_policy_lock);
1230		__xfrm_policy_link(newp, XFRM_POLICY_MAX+dir);
1231		write_unlock_bh(&xfrm_policy_lock);
1232		xfrm_pol_put(newp);
1233	}
1234	return newp;
1235}
1236
1237int __xfrm_sk_clone_policy(struct sock *sk)
1238{
1239	struct xfrm_policy *p0 = sk->sk_policy[0],
1240			   *p1 = sk->sk_policy[1];
1241
1242	sk->sk_policy[0] = sk->sk_policy[1] = NULL;
1243	if (p0 && (sk->sk_policy[0] = clone_policy(p0, 0)) == NULL)
1244		return -ENOMEM;
1245	if (p1 && (sk->sk_policy[1] = clone_policy(p1, 1)) == NULL)
1246		return -ENOMEM;
1247	return 0;
1248}
1249
1250static int
1251xfrm_get_saddr(struct net *net, xfrm_address_t *local, xfrm_address_t *remote,
1252	       unsigned short family)
1253{
1254	int err;
1255	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1256
1257	if (unlikely(afinfo == NULL))
1258		return -EINVAL;
1259	err = afinfo->get_saddr(net, local, remote);
1260	xfrm_policy_put_afinfo(afinfo);
1261	return err;
1262}
1263
1264/* Resolve list of templates for the flow, given policy. */
1265
1266static int
1267xfrm_tmpl_resolve_one(struct xfrm_policy *policy, const struct flowi *fl,
1268		      struct xfrm_state **xfrm, unsigned short family)
1269{
1270	struct net *net = xp_net(policy);
1271	int nx;
1272	int i, error;
1273	xfrm_address_t *daddr = xfrm_flowi_daddr(fl, family);
1274	xfrm_address_t *saddr = xfrm_flowi_saddr(fl, family);
1275	xfrm_address_t tmp;
1276
1277	for (nx=0, i = 0; i < policy->xfrm_nr; i++) {
1278		struct xfrm_state *x;
1279		xfrm_address_t *remote = daddr;
1280		xfrm_address_t *local  = saddr;
1281		struct xfrm_tmpl *tmpl = &policy->xfrm_vec[i];
1282
1283		if (tmpl->mode == XFRM_MODE_TUNNEL ||
1284		    tmpl->mode == XFRM_MODE_BEET) {
1285			remote = &tmpl->id.daddr;
1286			local = &tmpl->saddr;
1287			if (xfrm_addr_any(local, tmpl->encap_family)) {
1288				error = xfrm_get_saddr(net, &tmp, remote, tmpl->encap_family);
1289				if (error)
1290					goto fail;
1291				local = &tmp;
1292			}
1293		}
1294
1295		x = xfrm_state_find(remote, local, fl, tmpl, policy, &error, family);
1296
1297		if (x && x->km.state == XFRM_STATE_VALID) {
1298			xfrm[nx++] = x;
1299			daddr = remote;
1300			saddr = local;
1301			continue;
1302		}
1303		if (x) {
1304			error = (x->km.state == XFRM_STATE_ERROR ?
1305				 -EINVAL : -EAGAIN);
1306			xfrm_state_put(x);
1307		}
1308		else if (error == -ESRCH)
1309			error = -EAGAIN;
1310
1311		if (!tmpl->optional)
1312			goto fail;
1313	}
1314	return nx;
1315
1316fail:
1317	for (nx--; nx>=0; nx--)
1318		xfrm_state_put(xfrm[nx]);
1319	return error;
1320}
1321
1322static int
1323xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, const struct flowi *fl,
1324		  struct xfrm_state **xfrm, unsigned short family)
1325{
1326	struct xfrm_state *tp[XFRM_MAX_DEPTH];
1327	struct xfrm_state **tpp = (npols > 1) ? tp : xfrm;
1328	int cnx = 0;
1329	int error;
1330	int ret;
1331	int i;
1332
1333	for (i = 0; i < npols; i++) {
1334		if (cnx + pols[i]->xfrm_nr >= XFRM_MAX_DEPTH) {
1335			error = -ENOBUFS;
1336			goto fail;
1337		}
1338
1339		ret = xfrm_tmpl_resolve_one(pols[i], fl, &tpp[cnx], family);
1340		if (ret < 0) {
1341			error = ret;
1342			goto fail;
1343		} else
1344			cnx += ret;
1345	}
1346
1347	/* found states are sorted for outbound processing */
1348	if (npols > 1)
1349		xfrm_state_sort(xfrm, tpp, cnx, family);
1350
1351	return cnx;
1352
1353 fail:
1354	for (cnx--; cnx>=0; cnx--)
1355		xfrm_state_put(tpp[cnx]);
1356	return error;
1357
1358}
1359
1360/* Check that the bundle accepts the flow and its components are
1361 * still valid.
1362 */
1363
1364static inline int xfrm_get_tos(const struct flowi *fl, int family)
1365{
1366	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1367	int tos;
1368
1369	if (!afinfo)
1370		return -EINVAL;
1371
1372	tos = afinfo->get_tos(fl);
1373
1374	xfrm_policy_put_afinfo(afinfo);
1375
1376	return tos;
1377}
1378
1379static struct flow_cache_object *xfrm_bundle_flo_get(struct flow_cache_object *flo)
1380{
1381	struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo);
1382	struct dst_entry *dst = &xdst->u.dst;
1383
1384	if (xdst->route == NULL) {
1385		/* Dummy bundle - if it has xfrms we were not
1386		 * able to build bundle as template resolution failed.
1387		 * It means we need to try again resolving. */
1388		if (xdst->num_xfrms > 0)
1389			return NULL;
1390	} else if (dst->flags & DST_XFRM_QUEUE) {
1391		return NULL;
1392	} else {
1393		/* Real bundle */
1394		if (stale_bundle(dst))
1395			return NULL;
1396	}
1397
1398	dst_hold(dst);
1399	return flo;
1400}
1401
1402static int xfrm_bundle_flo_check(struct flow_cache_object *flo)
1403{
1404	struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo);
1405	struct dst_entry *dst = &xdst->u.dst;
1406
1407	if (!xdst->route)
1408		return 0;
1409	if (stale_bundle(dst))
1410		return 0;
1411
1412	return 1;
1413}
1414
1415static void xfrm_bundle_flo_delete(struct flow_cache_object *flo)
1416{
1417	struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo);
1418	struct dst_entry *dst = &xdst->u.dst;
1419
1420	dst_free(dst);
1421}
1422
1423static const struct flow_cache_ops xfrm_bundle_fc_ops = {
1424	.get = xfrm_bundle_flo_get,
1425	.check = xfrm_bundle_flo_check,
1426	.delete = xfrm_bundle_flo_delete,
1427};
1428
1429static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family)
1430{
1431	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1432	struct dst_ops *dst_ops;
1433	struct xfrm_dst *xdst;
1434
1435	if (!afinfo)
1436		return ERR_PTR(-EINVAL);
1437
1438	switch (family) {
1439	case AF_INET:
1440		dst_ops = &net->xfrm.xfrm4_dst_ops;
1441		break;
1442#if IS_ENABLED(CONFIG_IPV6)
1443	case AF_INET6:
1444		dst_ops = &net->xfrm.xfrm6_dst_ops;
1445		break;
1446#endif
1447	default:
1448		BUG();
1449	}
1450	xdst = dst_alloc(dst_ops, NULL, 0, DST_OBSOLETE_NONE, 0);
1451
1452	if (likely(xdst)) {
1453		struct dst_entry *dst = &xdst->u.dst;
1454
1455		memset(dst + 1, 0, sizeof(*xdst) - sizeof(*dst));
1456		xdst->flo.ops = &xfrm_bundle_fc_ops;
1457		if (afinfo->init_dst)
1458			afinfo->init_dst(net, xdst);
1459	} else
1460		xdst = ERR_PTR(-ENOBUFS);
1461
1462	xfrm_policy_put_afinfo(afinfo);
1463
1464	return xdst;
1465}
1466
1467static inline int xfrm_init_path(struct xfrm_dst *path, struct dst_entry *dst,
1468				 int nfheader_len)
1469{
1470	struct xfrm_policy_afinfo *afinfo =
1471		xfrm_policy_get_afinfo(dst->ops->family);
1472	int err;
1473
1474	if (!afinfo)
1475		return -EINVAL;
1476
1477	err = afinfo->init_path(path, dst, nfheader_len);
1478
1479	xfrm_policy_put_afinfo(afinfo);
1480
1481	return err;
1482}
1483
1484static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
1485				const struct flowi *fl)
1486{
1487	struct xfrm_policy_afinfo *afinfo =
1488		xfrm_policy_get_afinfo(xdst->u.dst.ops->family);
1489	int err;
1490
1491	if (!afinfo)
1492		return -EINVAL;
1493
1494	err = afinfo->fill_dst(xdst, dev, fl);
1495
1496	xfrm_policy_put_afinfo(afinfo);
1497
1498	return err;
1499}
1500
1501
1502/* Allocate chain of dst_entry's, attach known xfrm's, calculate
1503 * all the metrics... Shortly, bundle a bundle.
1504 */
1505
1506static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
1507					    struct xfrm_state **xfrm, int nx,
1508					    const struct flowi *fl,
1509					    struct dst_entry *dst)
1510{
1511	struct net *net = xp_net(policy);
1512	unsigned long now = jiffies;
1513	struct net_device *dev;
1514	struct xfrm_mode *inner_mode;
1515	struct dst_entry *dst_prev = NULL;
1516	struct dst_entry *dst0 = NULL;
1517	int i = 0;
1518	int err;
1519	int header_len = 0;
1520	int nfheader_len = 0;
1521	int trailer_len = 0;
1522	int tos;
1523	int family = policy->selector.family;
1524	xfrm_address_t saddr, daddr;
1525
1526	xfrm_flowi_addr_get(fl, &saddr, &daddr, family);
1527
1528	tos = xfrm_get_tos(fl, family);
1529	err = tos;
1530	if (tos < 0)
1531		goto put_states;
1532
1533	dst_hold(dst);
1534
1535	for (; i < nx; i++) {
1536		struct xfrm_dst *xdst = xfrm_alloc_dst(net, family);
1537		struct dst_entry *dst1 = &xdst->u.dst;
1538
1539		err = PTR_ERR(xdst);
1540		if (IS_ERR(xdst)) {
1541			dst_release(dst);
1542			goto put_states;
1543		}
1544
1545		if (xfrm[i]->sel.family == AF_UNSPEC) {
1546			inner_mode = xfrm_ip2inner_mode(xfrm[i],
1547							xfrm_af2proto(family));
1548			if (!inner_mode) {
1549				err = -EAFNOSUPPORT;
1550				dst_release(dst);
1551				goto put_states;
1552			}
1553		} else
1554			inner_mode = xfrm[i]->inner_mode;
1555
1556		if (!dst_prev)
1557			dst0 = dst1;
1558		else {
1559			dst_prev->child = dst_clone(dst1);
1560			dst1->flags |= DST_NOHASH;
1561		}
1562
1563		xdst->route = dst;
1564		dst_copy_metrics(dst1, dst);
1565
1566		if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) {
1567			family = xfrm[i]->props.family;
1568			dst = xfrm_dst_lookup(xfrm[i], tos, &saddr, &daddr,
1569					      family);
1570			err = PTR_ERR(dst);
1571			if (IS_ERR(dst))
1572				goto put_states;
1573		} else
1574			dst_hold(dst);
1575
1576		dst1->xfrm = xfrm[i];
1577		xdst->xfrm_genid = xfrm[i]->genid;
1578
1579		dst1->obsolete = DST_OBSOLETE_FORCE_CHK;
1580		dst1->flags |= DST_HOST;
1581		dst1->lastuse = now;
1582
1583		dst1->input = dst_discard;
1584		dst1->output = inner_mode->afinfo->output;
1585
1586		dst1->next = dst_prev;
1587		dst_prev = dst1;
1588
1589		header_len += xfrm[i]->props.header_len;
1590		if (xfrm[i]->type->flags & XFRM_TYPE_NON_FRAGMENT)
1591			nfheader_len += xfrm[i]->props.header_len;
1592		trailer_len += xfrm[i]->props.trailer_len;
1593	}
1594
1595	dst_prev->child = dst;
1596	dst0->path = dst;
1597
1598	err = -ENODEV;
1599	dev = dst->dev;
1600	if (!dev)
1601		goto free_dst;
1602
1603	xfrm_init_path((struct xfrm_dst *)dst0, dst, nfheader_len);
1604	xfrm_init_pmtu(dst_prev);
1605
1606	for (dst_prev = dst0; dst_prev != dst; dst_prev = dst_prev->child) {
1607		struct xfrm_dst *xdst = (struct xfrm_dst *)dst_prev;
1608
1609		err = xfrm_fill_dst(xdst, dev, fl);
1610		if (err)
1611			goto free_dst;
1612
1613		dst_prev->header_len = header_len;
1614		dst_prev->trailer_len = trailer_len;
1615		header_len -= xdst->u.dst.xfrm->props.header_len;
1616		trailer_len -= xdst->u.dst.xfrm->props.trailer_len;
1617	}
1618
1619out:
1620	return dst0;
1621
1622put_states:
1623	for (; i < nx; i++)
1624		xfrm_state_put(xfrm[i]);
1625free_dst:
1626	if (dst0)
1627		dst_free(dst0);
1628	dst0 = ERR_PTR(err);
1629	goto out;
1630}
1631
1632static int inline
1633xfrm_dst_alloc_copy(void **target, const void *src, int size)
1634{
1635	if (!*target) {
1636		*target = kmalloc(size, GFP_ATOMIC);
1637		if (!*target)
1638			return -ENOMEM;
1639	}
1640	memcpy(*target, src, size);
1641	return 0;
1642}
1643
1644static int inline
1645xfrm_dst_update_parent(struct dst_entry *dst, const struct xfrm_selector *sel)
1646{
1647#ifdef CONFIG_XFRM_SUB_POLICY
1648	struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
1649	return xfrm_dst_alloc_copy((void **)&(xdst->partner),
1650				   sel, sizeof(*sel));
1651#else
1652	return 0;
1653#endif
1654}
1655
1656static int inline
1657xfrm_dst_update_origin(struct dst_entry *dst, const struct flowi *fl)
1658{
1659#ifdef CONFIG_XFRM_SUB_POLICY
1660	struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
1661	return xfrm_dst_alloc_copy((void **)&(xdst->origin), fl, sizeof(*fl));
1662#else
1663	return 0;
1664#endif
1665}
1666
1667static int xfrm_expand_policies(const struct flowi *fl, u16 family,
1668				struct xfrm_policy **pols,
1669				int *num_pols, int *num_xfrms)
1670{
1671	int i;
1672
1673	if (*num_pols == 0 || !pols[0]) {
1674		*num_pols = 0;
1675		*num_xfrms = 0;
1676		return 0;
1677	}
1678	if (IS_ERR(pols[0]))
1679		return PTR_ERR(pols[0]);
1680
1681	*num_xfrms = pols[0]->xfrm_nr;
1682
1683#ifdef CONFIG_XFRM_SUB_POLICY
1684	if (pols[0] && pols[0]->action == XFRM_POLICY_ALLOW &&
1685	    pols[0]->type != XFRM_POLICY_TYPE_MAIN) {
1686		pols[1] = xfrm_policy_lookup_bytype(xp_net(pols[0]),
1687						    XFRM_POLICY_TYPE_MAIN,
1688						    fl, family,
1689						    XFRM_POLICY_OUT);
1690		if (pols[1]) {
1691			if (IS_ERR(pols[1])) {
1692				xfrm_pols_put(pols, *num_pols);
1693				return PTR_ERR(pols[1]);
1694			}
1695			(*num_pols) ++;
1696			(*num_xfrms) += pols[1]->xfrm_nr;
1697		}
1698	}
1699#endif
1700	for (i = 0; i < *num_pols; i++) {
1701		if (pols[i]->action != XFRM_POLICY_ALLOW) {
1702			*num_xfrms = -1;
1703			break;
1704		}
1705	}
1706
1707	return 0;
1708
1709}
1710
1711static struct xfrm_dst *
1712xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
1713			       const struct flowi *fl, u16 family,
1714			       struct dst_entry *dst_orig)
1715{
1716	struct net *net = xp_net(pols[0]);
1717	struct xfrm_state *xfrm[XFRM_MAX_DEPTH];
1718	struct dst_entry *dst;
1719	struct xfrm_dst *xdst;
1720	int err;
1721
1722	/* Try to instantiate a bundle */
1723	err = xfrm_tmpl_resolve(pols, num_pols, fl, xfrm, family);
1724	if (err <= 0) {
1725		if (err != 0 && err != -EAGAIN)
1726			XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
1727		return ERR_PTR(err);
1728	}
1729
1730	dst = xfrm_bundle_create(pols[0], xfrm, err, fl, dst_orig);
1731	if (IS_ERR(dst)) {
1732		XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLEGENERROR);
1733		return ERR_CAST(dst);
1734	}
1735
1736	xdst = (struct xfrm_dst *)dst;
1737	xdst->num_xfrms = err;
1738	if (num_pols > 1)
1739		err = xfrm_dst_update_parent(dst, &pols[1]->selector);
1740	else
1741		err = xfrm_dst_update_origin(dst, fl);
1742	if (unlikely(err)) {
1743		dst_free(dst);
1744		XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLECHECKERROR);
1745		return ERR_PTR(err);
1746	}
1747
1748	xdst->num_pols = num_pols;
1749	memcpy(xdst->pols, pols, sizeof(struct xfrm_policy*) * num_pols);
1750	xdst->policy_genid = atomic_read(&pols[0]->genid);
1751
1752	return xdst;
1753}
1754
1755static void xfrm_policy_queue_process(unsigned long arg)
1756{
1757	int err = 0;
1758	struct sk_buff *skb;
1759	struct sock *sk;
1760	struct dst_entry *dst;
1761	struct net_device *dev;
1762	struct xfrm_policy *pol = (struct xfrm_policy *)arg;
1763	struct xfrm_policy_queue *pq = &pol->polq;
1764	struct flowi fl;
1765	struct sk_buff_head list;
1766
1767	spin_lock(&pq->hold_queue.lock);
1768	skb = skb_peek(&pq->hold_queue);
1769	dst = skb_dst(skb);
1770	sk = skb->sk;
1771	xfrm_decode_session(skb, &fl, dst->ops->family);
1772	spin_unlock(&pq->hold_queue.lock);
1773
1774	dst_hold(dst->path);
1775	dst = xfrm_lookup(xp_net(pol), dst->path, &fl,
1776			  sk, 0);
1777	if (IS_ERR(dst))
1778		goto purge_queue;
1779
1780	if (dst->flags & DST_XFRM_QUEUE) {
1781		dst_release(dst);
1782
1783		if (pq->timeout >= XFRM_QUEUE_TMO_MAX)
1784			goto purge_queue;
1785
1786		pq->timeout = pq->timeout << 1;
1787		mod_timer(&pq->hold_timer, jiffies + pq->timeout);
1788		return;
1789	}
1790
1791	dst_release(dst);
1792
1793	__skb_queue_head_init(&list);
1794
1795	spin_lock(&pq->hold_queue.lock);
1796	pq->timeout = 0;
1797	skb_queue_splice_init(&pq->hold_queue, &list);
1798	spin_unlock(&pq->hold_queue.lock);
1799
1800	while (!skb_queue_empty(&list)) {
1801		skb = __skb_dequeue(&list);
1802
1803		xfrm_decode_session(skb, &fl, skb_dst(skb)->ops->family);
1804		dst_hold(skb_dst(skb)->path);
1805		dst = xfrm_lookup(xp_net(pol), skb_dst(skb)->path,
1806				  &fl, skb->sk, 0);
1807		if (IS_ERR(dst)) {
1808			dev_put(skb->dev);
1809			kfree_skb(skb);
1810			continue;
1811		}
1812
1813		nf_reset(skb);
1814		skb_dst_drop(skb);
1815		skb_dst_set(skb, dst);
1816
1817		dev = skb->dev;
1818		err = dst_output(skb);
1819		dev_put(dev);
1820	}
1821
1822	return;
1823
1824purge_queue:
1825	pq->timeout = 0;
1826	xfrm_queue_purge(&pq->hold_queue);
1827}
1828
1829static int xdst_queue_output(struct sk_buff *skb)
1830{
1831	unsigned long sched_next;
1832	struct dst_entry *dst = skb_dst(skb);
1833	struct xfrm_dst *xdst = (struct xfrm_dst *) dst;
1834	struct xfrm_policy_queue *pq = &xdst->pols[0]->polq;
1835
1836	if (pq->hold_queue.qlen > XFRM_MAX_QUEUE_LEN) {
1837		kfree_skb(skb);
1838		return -EAGAIN;
1839	}
1840
1841	skb_dst_force(skb);
1842	dev_hold(skb->dev);
1843
1844	spin_lock_bh(&pq->hold_queue.lock);
1845
1846	if (!pq->timeout)
1847		pq->timeout = XFRM_QUEUE_TMO_MIN;
1848
1849	sched_next = jiffies + pq->timeout;
1850
1851	if (del_timer(&pq->hold_timer)) {
1852		if (time_before(pq->hold_timer.expires, sched_next))
1853			sched_next = pq->hold_timer.expires;
1854	}
1855
1856	__skb_queue_tail(&pq->hold_queue, skb);
1857	mod_timer(&pq->hold_timer, sched_next);
1858
1859	spin_unlock_bh(&pq->hold_queue.lock);
1860
1861	return 0;
1862}
1863
1864static struct xfrm_dst *xfrm_create_dummy_bundle(struct net *net,
1865						 struct dst_entry *dst,
1866						 const struct flowi *fl,
1867						 int num_xfrms,
1868						 u16 family)
1869{
1870	int err;
1871	struct net_device *dev;
1872	struct dst_entry *dst1;
1873	struct xfrm_dst *xdst;
1874
1875	xdst = xfrm_alloc_dst(net, family);
1876	if (IS_ERR(xdst))
1877		return xdst;
1878
1879	if (net->xfrm.sysctl_larval_drop || num_xfrms <= 0 ||
1880	    (fl->flowi_flags & FLOWI_FLAG_CAN_SLEEP))
1881		return xdst;
1882
1883	dst1 = &xdst->u.dst;
1884	dst_hold(dst);
1885	xdst->route = dst;
1886
1887	dst_copy_metrics(dst1, dst);
1888
1889	dst1->obsolete = DST_OBSOLETE_FORCE_CHK;
1890	dst1->flags |= DST_HOST | DST_XFRM_QUEUE;
1891	dst1->lastuse = jiffies;
1892
1893	dst1->input = dst_discard;
1894	dst1->output = xdst_queue_output;
1895
1896	dst_hold(dst);
1897	dst1->child = dst;
1898	dst1->path = dst;
1899
1900	xfrm_init_path((struct xfrm_dst *)dst1, dst, 0);
1901
1902	err = -ENODEV;
1903	dev = dst->dev;
1904	if (!dev)
1905		goto free_dst;
1906
1907	err = xfrm_fill_dst(xdst, dev, fl);
1908	if (err)
1909		goto free_dst;
1910
1911out:
1912	return xdst;
1913
1914free_dst:
1915	dst_release(dst1);
1916	xdst = ERR_PTR(err);
1917	goto out;
1918}
1919
1920static struct flow_cache_object *
1921xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir,
1922		   struct flow_cache_object *oldflo, void *ctx)
1923{
1924	struct dst_entry *dst_orig = (struct dst_entry *)ctx;
1925	struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
1926	struct xfrm_dst *xdst, *new_xdst;
1927	int num_pols = 0, num_xfrms = 0, i, err, pol_dead;
1928
1929	/* Check if the policies from old bundle are usable */
1930	xdst = NULL;
1931	if (oldflo) {
1932		xdst = container_of(oldflo, struct xfrm_dst, flo);
1933		num_pols = xdst->num_pols;
1934		num_xfrms = xdst->num_xfrms;
1935		pol_dead = 0;
1936		for (i = 0; i < num_pols; i++) {
1937			pols[i] = xdst->pols[i];
1938			pol_dead |= pols[i]->walk.dead;
1939		}
1940		if (pol_dead) {
1941			dst_free(&xdst->u.dst);
1942			xdst = NULL;
1943			num_pols = 0;
1944			num_xfrms = 0;
1945			oldflo = NULL;
1946		}
1947	}
1948
1949	/* Resolve policies to use if we couldn't get them from
1950	 * previous cache entry */
1951	if (xdst == NULL) {
1952		num_pols = 1;
1953		pols[0] = __xfrm_policy_lookup(net, fl, family,
1954					       flow_to_policy_dir(dir));
1955		err = xfrm_expand_policies(fl, family, pols,
1956					   &num_pols, &num_xfrms);
1957		if (err < 0)
1958			goto inc_error;
1959		if (num_pols == 0)
1960			return NULL;
1961		if (num_xfrms <= 0)
1962			goto make_dummy_bundle;
1963	}
1964
1965	new_xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family, dst_orig);
1966	if (IS_ERR(new_xdst)) {
1967		err = PTR_ERR(new_xdst);
1968		if (err != -EAGAIN)
1969			goto error;
1970		if (oldflo == NULL)
1971			goto make_dummy_bundle;
1972		dst_hold(&xdst->u.dst);
1973		return oldflo;
1974	} else if (new_xdst == NULL) {
1975		num_xfrms = 0;
1976		if (oldflo == NULL)
1977			goto make_dummy_bundle;
1978		xdst->num_xfrms = 0;
1979		dst_hold(&xdst->u.dst);
1980		return oldflo;
1981	}
1982
1983	/* Kill the previous bundle */
1984	if (xdst) {
1985		/* The policies were stolen for newly generated bundle */
1986		xdst->num_pols = 0;
1987		dst_free(&xdst->u.dst);
1988	}
1989
1990	/* Flow cache does not have reference, it dst_free()'s,
1991	 * but we do need to return one reference for original caller */
1992	dst_hold(&new_xdst->u.dst);
1993	return &new_xdst->flo;
1994
1995make_dummy_bundle:
1996	/* We found policies, but there's no bundles to instantiate:
1997	 * either because the policy blocks, has no transformations or
1998	 * we could not build template (no xfrm_states).*/
1999	xdst = xfrm_create_dummy_bundle(net, dst_orig, fl, num_xfrms, family);
2000	if (IS_ERR(xdst)) {
2001		xfrm_pols_put(pols, num_pols);
2002		return ERR_CAST(xdst);
2003	}
2004	xdst->num_pols = num_pols;
2005	xdst->num_xfrms = num_xfrms;
2006	memcpy(xdst->pols, pols, sizeof(struct xfrm_policy*) * num_pols);
2007
2008	dst_hold(&xdst->u.dst);
2009	return &xdst->flo;
2010
2011inc_error:
2012	XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
2013error:
2014	if (xdst != NULL)
2015		dst_free(&xdst->u.dst);
2016	else
2017		xfrm_pols_put(pols, num_pols);
2018	return ERR_PTR(err);
2019}
2020
2021static struct dst_entry *make_blackhole(struct net *net, u16 family,
2022					struct dst_entry *dst_orig)
2023{
2024	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
2025	struct dst_entry *ret;
2026
2027	if (!afinfo) {
2028		dst_release(dst_orig);
2029		return ERR_PTR(-EINVAL);
2030	} else {
2031		ret = afinfo->blackhole_route(net, dst_orig);
2032	}
2033	xfrm_policy_put_afinfo(afinfo);
2034
2035	return ret;
2036}
2037
2038/* Main function: finds/creates a bundle for given flow.
2039 *
2040 * At the moment we eat a raw IP route. Mostly to speed up lookups
2041 * on interfaces with disabled IPsec.
2042 */
2043struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
2044			      const struct flowi *fl,
2045			      struct sock *sk, int flags)
2046{
2047	struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
2048	struct flow_cache_object *flo;
2049	struct xfrm_dst *xdst;
2050	struct dst_entry *dst, *route;
2051	u16 family = dst_orig->ops->family;
2052	u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT);
2053	int i, err, num_pols, num_xfrms = 0, drop_pols = 0;
2054
2055restart:
2056	dst = NULL;
2057	xdst = NULL;
2058	route = NULL;
2059
2060	if (sk && sk->sk_policy[XFRM_POLICY_OUT]) {
2061		num_pols = 1;
2062		pols[0] = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl);
2063		err = xfrm_expand_policies(fl, family, pols,
2064					   &num_pols, &num_xfrms);
2065		if (err < 0)
2066			goto dropdst;
2067
2068		if (num_pols) {
2069			if (num_xfrms <= 0) {
2070				drop_pols = num_pols;
2071				goto no_transform;
2072			}
2073
2074			xdst = xfrm_resolve_and_create_bundle(
2075					pols, num_pols, fl,
2076					family, dst_orig);
2077			if (IS_ERR(xdst)) {
2078				xfrm_pols_put(pols, num_pols);
2079				err = PTR_ERR(xdst);
2080				goto dropdst;
2081			} else if (xdst == NULL) {
2082				num_xfrms = 0;
2083				drop_pols = num_pols;
2084				goto no_transform;
2085			}
2086
2087			dst_hold(&xdst->u.dst);
2088
2089			spin_lock_bh(&xfrm_policy_sk_bundle_lock);
2090			xdst->u.dst.next = xfrm_policy_sk_bundles;
2091			xfrm_policy_sk_bundles = &xdst->u.dst;
2092			spin_unlock_bh(&xfrm_policy_sk_bundle_lock);
2093
2094			route = xdst->route;
2095		}
2096	}
2097
2098	if (xdst == NULL) {
2099		/* To accelerate a bit...  */
2100		if ((dst_orig->flags & DST_NOXFRM) ||
2101		    !net->xfrm.policy_count[XFRM_POLICY_OUT])
2102			goto nopol;
2103
2104		flo = flow_cache_lookup(net, fl, family, dir,
2105					xfrm_bundle_lookup, dst_orig);
2106		if (flo == NULL)
2107			goto nopol;
2108		if (IS_ERR(flo)) {
2109			err = PTR_ERR(flo);
2110			goto dropdst;
2111		}
2112		xdst = container_of(flo, struct xfrm_dst, flo);
2113
2114		num_pols = xdst->num_pols;
2115		num_xfrms = xdst->num_xfrms;
2116		memcpy(pols, xdst->pols, sizeof(struct xfrm_policy*) * num_pols);
2117		route = xdst->route;
2118	}
2119
2120	dst = &xdst->u.dst;
2121	if (route == NULL && num_xfrms > 0) {
2122		/* The only case when xfrm_bundle_lookup() returns a
2123		 * bundle with null route, is when the template could
2124		 * not be resolved. It means policies are there, but
2125		 * bundle could not be created, since we don't yet
2126		 * have the xfrm_state's. We need to wait for KM to
2127		 * negotiate new SA's or bail out with error.*/
2128		if (net->xfrm.sysctl_larval_drop) {
2129			/* EREMOTE tells the caller to generate
2130			 * a one-shot blackhole route. */
2131			dst_release(dst);
2132			xfrm_pols_put(pols, drop_pols);
2133			XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES);
2134
2135			return make_blackhole(net, family, dst_orig);
2136		}
2137		if (fl->flowi_flags & FLOWI_FLAG_CAN_SLEEP) {
2138			DECLARE_WAITQUEUE(wait, current);
2139
2140			add_wait_queue(&net->xfrm.km_waitq, &wait);
2141			set_current_state(TASK_INTERRUPTIBLE);
2142			schedule();
2143			set_current_state(TASK_RUNNING);
2144			remove_wait_queue(&net->xfrm.km_waitq, &wait);
2145
2146			if (!signal_pending(current)) {
2147				dst_release(dst);
2148				goto restart;
2149			}
2150
2151			err = -ERESTART;
2152		} else
2153			err = -EAGAIN;
2154
2155		XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES);
2156		goto error;
2157	}
2158
2159no_transform:
2160	if (num_pols == 0)
2161		goto nopol;
2162
2163	if ((flags & XFRM_LOOKUP_ICMP) &&
2164	    !(pols[0]->flags & XFRM_POLICY_ICMP)) {
2165		err = -ENOENT;
2166		goto error;
2167	}
2168
2169	for (i = 0; i < num_pols; i++)
2170		pols[i]->curlft.use_time = get_seconds();
2171
2172	if (num_xfrms < 0) {
2173		/* Prohibit the flow */
2174		XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLBLOCK);
2175		err = -EPERM;
2176		goto error;
2177	} else if (num_xfrms > 0) {
2178		/* Flow transformed */
2179		dst_release(dst_orig);
2180	} else {
2181		/* Flow passes untransformed */
2182		dst_release(dst);
2183		dst = dst_orig;
2184	}
2185ok:
2186	xfrm_pols_put(pols, drop_pols);
2187	if (dst && dst->xfrm &&
2188	    dst->xfrm->props.mode == XFRM_MODE_TUNNEL)
2189		dst->flags |= DST_XFRM_TUNNEL;
2190	return dst;
2191
2192nopol:
2193	if (!(flags & XFRM_LOOKUP_ICMP)) {
2194		dst = dst_orig;
2195		goto ok;
2196	}
2197	err = -ENOENT;
2198error:
2199	dst_release(dst);
2200dropdst:
2201	dst_release(dst_orig);
2202	xfrm_pols_put(pols, drop_pols);
2203	return ERR_PTR(err);
2204}
2205EXPORT_SYMBOL(xfrm_lookup);
2206
2207static inline int
2208xfrm_secpath_reject(int idx, struct sk_buff *skb, const struct flowi *fl)
2209{
2210	struct xfrm_state *x;
2211
2212	if (!skb->sp || idx < 0 || idx >= skb->sp->len)
2213		return 0;
2214	x = skb->sp->xvec[idx];
2215	if (!x->type->reject)
2216		return 0;
2217	return x->type->reject(x, skb, fl);
2218}
2219
2220/* When skb is transformed back to its "native" form, we have to
2221 * check policy restrictions. At the moment we make this in maximally
2222 * stupid way. Shame on me. :-) Of course, connected sockets must
2223 * have policy cached at them.
2224 */
2225
2226static inline int
2227xfrm_state_ok(const struct xfrm_tmpl *tmpl, const struct xfrm_state *x,
2228	      unsigned short family)
2229{
2230	if (xfrm_state_kern(x))
2231		return tmpl->optional && !xfrm_state_addr_cmp(tmpl, x, tmpl->encap_family);
2232	return	x->id.proto == tmpl->id.proto &&
2233		(x->id.spi == tmpl->id.spi || !tmpl->id.spi) &&
2234		(x->props.reqid == tmpl->reqid || !tmpl->reqid) &&
2235		x->props.mode == tmpl->mode &&
2236		(tmpl->allalgs || (tmpl->aalgos & (1<<x->props.aalgo)) ||
2237		 !(xfrm_id_proto_match(tmpl->id.proto, IPSEC_PROTO_ANY))) &&
2238		!(x->props.mode != XFRM_MODE_TRANSPORT &&
2239		  xfrm_state_addr_cmp(tmpl, x, family));
2240}
2241
2242/*
2243 * 0 or more than 0 is returned when validation is succeeded (either bypass
2244 * because of optional transport mode, or next index of the mathced secpath
2245 * state with the template.
2246 * -1 is returned when no matching template is found.
2247 * Otherwise "-2 - errored_index" is returned.
2248 */
2249static inline int
2250xfrm_policy_ok(const struct xfrm_tmpl *tmpl, const struct sec_path *sp, int start,
2251	       unsigned short family)
2252{
2253	int idx = start;
2254
2255	if (tmpl->optional) {
2256		if (tmpl->mode == XFRM_MODE_TRANSPORT)
2257			return start;
2258	} else
2259		start = -1;
2260	for (; idx < sp->len; idx++) {
2261		if (xfrm_state_ok(tmpl, sp->xvec[idx], family))
2262			return ++idx;
2263		if (sp->xvec[idx]->props.mode != XFRM_MODE_TRANSPORT) {
2264			if (start == -1)
2265				start = -2-idx;
2266			break;
2267		}
2268	}
2269	return start;
2270}
2271
2272int __xfrm_decode_session(struct sk_buff *skb, struct flowi *fl,
2273			  unsigned int family, int reverse)
2274{
2275	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
2276	int err;
2277
2278	if (unlikely(afinfo == NULL))
2279		return -EAFNOSUPPORT;
2280
2281	afinfo->decode_session(skb, fl, reverse);
2282	err = security_xfrm_decode_session(skb, &fl->flowi_secid);
2283	xfrm_policy_put_afinfo(afinfo);
2284	return err;
2285}
2286EXPORT_SYMBOL(__xfrm_decode_session);
2287
2288static inline int secpath_has_nontransport(const struct sec_path *sp, int k, int *idxp)
2289{
2290	for (; k < sp->len; k++) {
2291		if (sp->xvec[k]->props.mode != XFRM_MODE_TRANSPORT) {
2292			*idxp = k;
2293			return 1;
2294		}
2295	}
2296
2297	return 0;
2298}
2299
2300int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
2301			unsigned short family)
2302{
2303	struct net *net = dev_net(skb->dev);
2304	struct xfrm_policy *pol;
2305	struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
2306	int npols = 0;
2307	int xfrm_nr;
2308	int pi;
2309	int reverse;
2310	struct flowi fl;
2311	u8 fl_dir;
2312	int xerr_idx = -1;
2313
2314	reverse = dir & ~XFRM_POLICY_MASK;
2315	dir &= XFRM_POLICY_MASK;
2316	fl_dir = policy_to_flow_dir(dir);
2317
2318	if (__xfrm_decode_session(skb, &fl, family, reverse) < 0) {
2319		XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR);
2320		return 0;
2321	}
2322
2323	nf_nat_decode_session(skb, &fl, family);
2324
2325	/* First, check used SA against their selectors. */
2326	if (skb->sp) {
2327		int i;
2328
2329		for (i=skb->sp->len-1; i>=0; i--) {
2330			struct xfrm_state *x = skb->sp->xvec[i];
2331			if (!xfrm_selector_match(&x->sel, &fl, family)) {
2332				XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMISMATCH);
2333				return 0;
2334			}
2335		}
2336	}
2337
2338	pol = NULL;
2339	if (sk && sk->sk_policy[dir]) {
2340		pol = xfrm_sk_policy_lookup(sk, dir, &fl);
2341		if (IS_ERR(pol)) {
2342			XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
2343			return 0;
2344		}
2345	}
2346
2347	if (!pol) {
2348		struct flow_cache_object *flo;
2349
2350		flo = flow_cache_lookup(net, &fl, family, fl_dir,
2351					xfrm_policy_lookup, NULL);
2352		if (IS_ERR_OR_NULL(flo))
2353			pol = ERR_CAST(flo);
2354		else
2355			pol = container_of(flo, struct xfrm_policy, flo);
2356	}
2357
2358	if (IS_ERR(pol)) {
2359		XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
2360		return 0;
2361	}
2362
2363	if (!pol) {
2364		if (skb->sp && secpath_has_nontransport(skb->sp, 0, &xerr_idx)) {
2365			xfrm_secpath_reject(xerr_idx, skb, &fl);
2366			XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOPOLS);
2367			return 0;
2368		}
2369		return 1;
2370	}
2371
2372	pol->curlft.use_time = get_seconds();
2373
2374	pols[0] = pol;
2375	npols ++;
2376#ifdef CONFIG_XFRM_SUB_POLICY
2377	if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) {
2378		pols[1] = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN,
2379						    &fl, family,
2380						    XFRM_POLICY_IN);
2381		if (pols[1]) {
2382			if (IS_ERR(pols[1])) {
2383				XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
2384				return 0;
2385			}
2386			pols[1]->curlft.use_time = get_seconds();
2387			npols ++;
2388		}
2389	}
2390#endif
2391
2392	if (pol->action == XFRM_POLICY_ALLOW) {
2393		struct sec_path *sp;
2394		static struct sec_path dummy;
2395		struct xfrm_tmpl *tp[XFRM_MAX_DEPTH];
2396		struct xfrm_tmpl *stp[XFRM_MAX_DEPTH];
2397		struct xfrm_tmpl **tpp = tp;
2398		int ti = 0;
2399		int i, k;
2400
2401		if ((sp = skb->sp) == NULL)
2402			sp = &dummy;
2403
2404		for (pi = 0; pi < npols; pi++) {
2405			if (pols[pi] != pol &&
2406			    pols[pi]->action != XFRM_POLICY_ALLOW) {
2407				XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLBLOCK);
2408				goto reject;
2409			}
2410			if (ti + pols[pi]->xfrm_nr >= XFRM_MAX_DEPTH) {
2411				XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR);
2412				goto reject_error;
2413			}
2414			for (i = 0; i < pols[pi]->xfrm_nr; i++)
2415				tpp[ti++] = &pols[pi]->xfrm_vec[i];
2416		}
2417		xfrm_nr = ti;
2418		if (npols > 1) {
2419			xfrm_tmpl_sort(stp, tpp, xfrm_nr, family);
2420			tpp = stp;
2421		}
2422
2423		/* For each tunnel xfrm, find the first matching tmpl.
2424		 * For each tmpl before that, find corresponding xfrm.
2425		 * Order is _important_. Later we will implement
2426		 * some barriers, but at the moment barriers
2427		 * are implied between each two transformations.
2428		 */
2429		for (i = xfrm_nr-1, k = 0; i >= 0; i--) {
2430			k = xfrm_policy_ok(tpp[i], sp, k, family);
2431			if (k < 0) {
2432				if (k < -1)
2433					/* "-2 - errored_index" returned */
2434					xerr_idx = -(2+k);
2435				XFRM_INC_STATS(net, LINUX_MIB_XFRMINTMPLMISMATCH);
2436				goto reject;
2437			}
2438		}
2439
2440		if (secpath_has_nontransport(sp, k, &xerr_idx)) {
2441			XFRM_INC_STATS(net, LINUX_MIB_XFRMINTMPLMISMATCH);
2442			goto reject;
2443		}
2444
2445		xfrm_pols_put(pols, npols);
2446		return 1;
2447	}
2448	XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLBLOCK);
2449
2450reject:
2451	xfrm_secpath_reject(xerr_idx, skb, &fl);
2452reject_error:
2453	xfrm_pols_put(pols, npols);
2454	return 0;
2455}
2456EXPORT_SYMBOL(__xfrm_policy_check);
2457
2458int __xfrm_route_forward(struct sk_buff *skb, unsigned short family)
2459{
2460	struct net *net = dev_net(skb->dev);
2461	struct flowi fl;
2462	struct dst_entry *dst;
2463	int res = 1;
2464
2465	if (xfrm_decode_session(skb, &fl, family) < 0) {
2466		XFRM_INC_STATS(net, LINUX_MIB_XFRMFWDHDRERROR);
2467		return 0;
2468	}
2469
2470	skb_dst_force(skb);
2471
2472	dst = xfrm_lookup(net, skb_dst(skb), &fl, NULL, 0);
2473	if (IS_ERR(dst)) {
2474		res = 0;
2475		dst = NULL;
2476	}
2477	skb_dst_set(skb, dst);
2478	return res;
2479}
2480EXPORT_SYMBOL(__xfrm_route_forward);
2481
2482/* Optimize later using cookies and generation ids. */
2483
2484static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie)
2485{
2486	/* Code (such as __xfrm4_bundle_create()) sets dst->obsolete
2487	 * to DST_OBSOLETE_FORCE_CHK to force all XFRM destinations to
2488	 * get validated by dst_ops->check on every use.  We do this
2489	 * because when a normal route referenced by an XFRM dst is
2490	 * obsoleted we do not go looking around for all parent
2491	 * referencing XFRM dsts so that we can invalidate them.  It
2492	 * is just too much work.  Instead we make the checks here on
2493	 * every use.  For example:
2494	 *
2495	 *	XFRM dst A --> IPv4 dst X
2496	 *
2497	 * X is the "xdst->route" of A (X is also the "dst->path" of A
2498	 * in this example).  If X is marked obsolete, "A" will not
2499	 * notice.  That's what we are validating here via the
2500	 * stale_bundle() check.
2501	 *
2502	 * When a policy's bundle is pruned, we dst_free() the XFRM
2503	 * dst which causes it's ->obsolete field to be set to
2504	 * DST_OBSOLETE_DEAD.  If an XFRM dst has been pruned like
2505	 * this, we want to force a new route lookup.
2506	 */
2507	if (dst->obsolete < 0 && !stale_bundle(dst))
2508		return dst;
2509
2510	return NULL;
2511}
2512
2513static int stale_bundle(struct dst_entry *dst)
2514{
2515	return !xfrm_bundle_ok((struct xfrm_dst *)dst);
2516}
2517
2518void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev)
2519{
2520	while ((dst = dst->child) && dst->xfrm && dst->dev == dev) {
2521		dst->dev = dev_net(dev)->loopback_dev;
2522		dev_hold(dst->dev);
2523		dev_put(dev);
2524	}
2525}
2526EXPORT_SYMBOL(xfrm_dst_ifdown);
2527
2528static void xfrm_link_failure(struct sk_buff *skb)
2529{
2530	/* Impossible. Such dst must be popped before reaches point of failure. */
2531}
2532
2533static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst)
2534{
2535	if (dst) {
2536		if (dst->obsolete) {
2537			dst_release(dst);
2538			dst = NULL;
2539		}
2540	}
2541	return dst;
2542}
2543
2544static void __xfrm_garbage_collect(struct net *net)
2545{
2546	struct dst_entry *head, *next;
2547
2548	spin_lock_bh(&xfrm_policy_sk_bundle_lock);
2549	head = xfrm_policy_sk_bundles;
2550	xfrm_policy_sk_bundles = NULL;
2551	spin_unlock_bh(&xfrm_policy_sk_bundle_lock);
2552
2553	while (head) {
2554		next = head->next;
2555		dst_free(head);
2556		head = next;
2557	}
2558}
2559
2560void xfrm_garbage_collect(struct net *net)
2561{
2562	flow_cache_flush();
2563	__xfrm_garbage_collect(net);
2564}
2565EXPORT_SYMBOL(xfrm_garbage_collect);
2566
2567static void xfrm_garbage_collect_deferred(struct net *net)
2568{
2569	flow_cache_flush_deferred();
2570	__xfrm_garbage_collect(net);
2571}
2572
2573static void xfrm_init_pmtu(struct dst_entry *dst)
2574{
2575	do {
2576		struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
2577		u32 pmtu, route_mtu_cached;
2578
2579		pmtu = dst_mtu(dst->child);
2580		xdst->child_mtu_cached = pmtu;
2581
2582		pmtu = xfrm_state_mtu(dst->xfrm, pmtu);
2583
2584		route_mtu_cached = dst_mtu(xdst->route);
2585		xdst->route_mtu_cached = route_mtu_cached;
2586
2587		if (pmtu > route_mtu_cached)
2588			pmtu = route_mtu_cached;
2589
2590		dst_metric_set(dst, RTAX_MTU, pmtu);
2591	} while ((dst = dst->next));
2592}
2593
2594/* Check that the bundle accepts the flow and its components are
2595 * still valid.
2596 */
2597
2598static int xfrm_bundle_ok(struct xfrm_dst *first)
2599{
2600	struct dst_entry *dst = &first->u.dst;
2601	struct xfrm_dst *last;
2602	u32 mtu;
2603
2604	if (!dst_check(dst->path, ((struct xfrm_dst *)dst)->path_cookie) ||
2605	    (dst->dev && !netif_running(dst->dev)))
2606		return 0;
2607
2608	if (dst->flags & DST_XFRM_QUEUE)
2609		return 1;
2610
2611	last = NULL;
2612
2613	do {
2614		struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
2615
2616		if (dst->xfrm->km.state != XFRM_STATE_VALID)
2617			return 0;
2618		if (xdst->xfrm_genid != dst->xfrm->genid)
2619			return 0;
2620		if (xdst->num_pols > 0 &&
2621		    xdst->policy_genid != atomic_read(&xdst->pols[0]->genid))
2622			return 0;
2623
2624		mtu = dst_mtu(dst->child);
2625		if (xdst->child_mtu_cached != mtu) {
2626			last = xdst;
2627			xdst->child_mtu_cached = mtu;
2628		}
2629
2630		if (!dst_check(xdst->route, xdst->route_cookie))
2631			return 0;
2632		mtu = dst_mtu(xdst->route);
2633		if (xdst->route_mtu_cached != mtu) {
2634			last = xdst;
2635			xdst->route_mtu_cached = mtu;
2636		}
2637
2638		dst = dst->child;
2639	} while (dst->xfrm);
2640
2641	if (likely(!last))
2642		return 1;
2643
2644	mtu = last->child_mtu_cached;
2645	for (;;) {
2646		dst = &last->u.dst;
2647
2648		mtu = xfrm_state_mtu(dst->xfrm, mtu);
2649		if (mtu > last->route_mtu_cached)
2650			mtu = last->route_mtu_cached;
2651		dst_metric_set(dst, RTAX_MTU, mtu);
2652
2653		if (last == first)
2654			break;
2655
2656		last = (struct xfrm_dst *)last->u.dst.next;
2657		last->child_mtu_cached = mtu;
2658	}
2659
2660	return 1;
2661}
2662
2663static unsigned int xfrm_default_advmss(const struct dst_entry *dst)
2664{
2665	return dst_metric_advmss(dst->path);
2666}
2667
2668static unsigned int xfrm_mtu(const struct dst_entry *dst)
2669{
2670	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
2671
2672	return mtu ? : dst_mtu(dst->path);
2673}
2674
2675static struct neighbour *xfrm_neigh_lookup(const struct dst_entry *dst,
2676					   struct sk_buff *skb,
2677					   const void *daddr)
2678{
2679	return dst->path->ops->neigh_lookup(dst, skb, daddr);
2680}
2681
2682int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)
2683{
2684	struct net *net;
2685	int err = 0;
2686	if (unlikely(afinfo == NULL))
2687		return -EINVAL;
2688	if (unlikely(afinfo->family >= NPROTO))
2689		return -EAFNOSUPPORT;
2690	spin_lock(&xfrm_policy_afinfo_lock);
2691	if (unlikely(xfrm_policy_afinfo[afinfo->family] != NULL))
2692		err = -ENOBUFS;
2693	else {
2694		struct dst_ops *dst_ops = afinfo->dst_ops;
2695		if (likely(dst_ops->kmem_cachep == NULL))
2696			dst_ops->kmem_cachep = xfrm_dst_cache;
2697		if (likely(dst_ops->check == NULL))
2698			dst_ops->check = xfrm_dst_check;
2699		if (likely(dst_ops->default_advmss == NULL))
2700			dst_ops->default_advmss = xfrm_default_advmss;
2701		if (likely(dst_ops->mtu == NULL))
2702			dst_ops->mtu = xfrm_mtu;
2703		if (likely(dst_ops->negative_advice == NULL))
2704			dst_ops->negative_advice = xfrm_negative_advice;
2705		if (likely(dst_ops->link_failure == NULL))
2706			dst_ops->link_failure = xfrm_link_failure;
2707		if (likely(dst_ops->neigh_lookup == NULL))
2708			dst_ops->neigh_lookup = xfrm_neigh_lookup;
2709		if (likely(afinfo->garbage_collect == NULL))
2710			afinfo->garbage_collect = xfrm_garbage_collect_deferred;
2711		rcu_assign_pointer(xfrm_policy_afinfo[afinfo->family], afinfo);
2712	}
2713	spin_unlock(&xfrm_policy_afinfo_lock);
2714
2715	rtnl_lock();
2716	for_each_net(net) {
2717		struct dst_ops *xfrm_dst_ops;
2718
2719		switch (afinfo->family) {
2720		case AF_INET:
2721			xfrm_dst_ops = &net->xfrm.xfrm4_dst_ops;
2722			break;
2723#if IS_ENABLED(CONFIG_IPV6)
2724		case AF_INET6:
2725			xfrm_dst_ops = &net->xfrm.xfrm6_dst_ops;
2726			break;
2727#endif
2728		default:
2729			BUG();
2730		}
2731		*xfrm_dst_ops = *afinfo->dst_ops;
2732	}
2733	rtnl_unlock();
2734
2735	return err;
2736}
2737EXPORT_SYMBOL(xfrm_policy_register_afinfo);
2738
2739int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo)
2740{
2741	int err = 0;
2742	if (unlikely(afinfo == NULL))
2743		return -EINVAL;
2744	if (unlikely(afinfo->family >= NPROTO))
2745		return -EAFNOSUPPORT;
2746	spin_lock(&xfrm_policy_afinfo_lock);
2747	if (likely(xfrm_policy_afinfo[afinfo->family] != NULL)) {
2748		if (unlikely(xfrm_policy_afinfo[afinfo->family] != afinfo))
2749			err = -EINVAL;
2750		else
2751			RCU_INIT_POINTER(xfrm_policy_afinfo[afinfo->family],
2752					 NULL);
2753	}
2754	spin_unlock(&xfrm_policy_afinfo_lock);
2755	if (!err) {
2756		struct dst_ops *dst_ops = afinfo->dst_ops;
2757
2758		synchronize_rcu();
2759
2760		dst_ops->kmem_cachep = NULL;
2761		dst_ops->check = NULL;
2762		dst_ops->negative_advice = NULL;
2763		dst_ops->link_failure = NULL;
2764		afinfo->garbage_collect = NULL;
2765	}
2766	return err;
2767}
2768EXPORT_SYMBOL(xfrm_policy_unregister_afinfo);
2769
2770static void __net_init xfrm_dst_ops_init(struct net *net)
2771{
2772	struct xfrm_policy_afinfo *afinfo;
2773
2774	rcu_read_lock();
2775	afinfo = rcu_dereference(xfrm_policy_afinfo[AF_INET]);
2776	if (afinfo)
2777		net->xfrm.xfrm4_dst_ops = *afinfo->dst_ops;
2778#if IS_ENABLED(CONFIG_IPV6)
2779	afinfo = rcu_dereference(xfrm_policy_afinfo[AF_INET6]);
2780	if (afinfo)
2781		net->xfrm.xfrm6_dst_ops = *afinfo->dst_ops;
2782#endif
2783	rcu_read_unlock();
2784}
2785
2786static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr)
2787{
2788	struct net_device *dev = ptr;
2789
2790	switch (event) {
2791	case NETDEV_DOWN:
2792		xfrm_garbage_collect(dev_net(dev));
2793	}
2794	return NOTIFY_DONE;
2795}
2796
2797static struct notifier_block xfrm_dev_notifier = {
2798	.notifier_call	= xfrm_dev_event,
2799};
2800
2801#ifdef CONFIG_XFRM_STATISTICS
2802static int __net_init xfrm_statistics_init(struct net *net)
2803{
2804	int rv;
2805
2806	if (snmp_mib_init((void __percpu **)net->mib.xfrm_statistics,
2807			  sizeof(struct linux_xfrm_mib),
2808			  __alignof__(struct linux_xfrm_mib)) < 0)
2809		return -ENOMEM;
2810	rv = xfrm_proc_init(net);
2811	if (rv < 0)
2812		snmp_mib_free((void __percpu **)net->mib.xfrm_statistics);
2813	return rv;
2814}
2815
2816static void xfrm_statistics_fini(struct net *net)
2817{
2818	xfrm_proc_fini(net);
2819	snmp_mib_free((void __percpu **)net->mib.xfrm_statistics);
2820}
2821#else
2822static int __net_init xfrm_statistics_init(struct net *net)
2823{
2824	return 0;
2825}
2826
2827static void xfrm_statistics_fini(struct net *net)
2828{
2829}
2830#endif
2831
2832static int __net_init xfrm_policy_init(struct net *net)
2833{
2834	unsigned int hmask, sz;
2835	int dir;
2836
2837	if (net_eq(net, &init_net))
2838		xfrm_dst_cache = kmem_cache_create("xfrm_dst_cache",
2839					   sizeof(struct xfrm_dst),
2840					   0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
2841					   NULL);
2842
2843	hmask = 8 - 1;
2844	sz = (hmask+1) * sizeof(struct hlist_head);
2845
2846	net->xfrm.policy_byidx = xfrm_hash_alloc(sz);
2847	if (!net->xfrm.policy_byidx)
2848		goto out_byidx;
2849	net->xfrm.policy_idx_hmask = hmask;
2850
2851	for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
2852		struct xfrm_policy_hash *htab;
2853
2854		net->xfrm.policy_count[dir] = 0;
2855		INIT_HLIST_HEAD(&net->xfrm.policy_inexact[dir]);
2856
2857		htab = &net->xfrm.policy_bydst[dir];
2858		htab->table = xfrm_hash_alloc(sz);
2859		if (!htab->table)
2860			goto out_bydst;
2861		htab->hmask = hmask;
2862	}
2863
2864	INIT_LIST_HEAD(&net->xfrm.policy_all);
2865	INIT_WORK(&net->xfrm.policy_hash_work, xfrm_hash_resize);
2866	if (net_eq(net, &init_net))
2867		register_netdevice_notifier(&xfrm_dev_notifier);
2868	return 0;
2869
2870out_bydst:
2871	for (dir--; dir >= 0; dir--) {
2872		struct xfrm_policy_hash *htab;
2873
2874		htab = &net->xfrm.policy_bydst[dir];
2875		xfrm_hash_free(htab->table, sz);
2876	}
2877	xfrm_hash_free(net->xfrm.policy_byidx, sz);
2878out_byidx:
2879	return -ENOMEM;
2880}
2881
2882static void xfrm_policy_fini(struct net *net)
2883{
2884	struct xfrm_audit audit_info;
2885	unsigned int sz;
2886	int dir;
2887
2888	flush_work(&net->xfrm.policy_hash_work);
2889#ifdef CONFIG_XFRM_SUB_POLICY
2890	audit_info.loginuid = INVALID_UID;
2891	audit_info.sessionid = -1;
2892	audit_info.secid = 0;
2893	xfrm_policy_flush(net, XFRM_POLICY_TYPE_SUB, &audit_info);
2894#endif
2895	audit_info.loginuid = INVALID_UID;
2896	audit_info.sessionid = -1;
2897	audit_info.secid = 0;
2898	xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, &audit_info);
2899
2900	WARN_ON(!list_empty(&net->xfrm.policy_all));
2901
2902	for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
2903		struct xfrm_policy_hash *htab;
2904
2905		WARN_ON(!hlist_empty(&net->xfrm.policy_inexact[dir]));
2906
2907		htab = &net->xfrm.policy_bydst[dir];
2908		sz = (htab->hmask + 1) * sizeof(struct hlist_head);
2909		WARN_ON(!hlist_empty(htab->table));
2910		xfrm_hash_free(htab->table, sz);
2911	}
2912
2913	sz = (net->xfrm.policy_idx_hmask + 1) * sizeof(struct hlist_head);
2914	WARN_ON(!hlist_empty(net->xfrm.policy_byidx));
2915	xfrm_hash_free(net->xfrm.policy_byidx, sz);
2916}
2917
2918static int __net_init xfrm_net_init(struct net *net)
2919{
2920	int rv;
2921
2922	rv = xfrm_statistics_init(net);
2923	if (rv < 0)
2924		goto out_statistics;
2925	rv = xfrm_state_init(net);
2926	if (rv < 0)
2927		goto out_state;
2928	rv = xfrm_policy_init(net);
2929	if (rv < 0)
2930		goto out_policy;
2931	xfrm_dst_ops_init(net);
2932	rv = xfrm_sysctl_init(net);
2933	if (rv < 0)
2934		goto out_sysctl;
2935	return 0;
2936
2937out_sysctl:
2938	xfrm_policy_fini(net);
2939out_policy:
2940	xfrm_state_fini(net);
2941out_state:
2942	xfrm_statistics_fini(net);
2943out_statistics:
2944	return rv;
2945}
2946
2947static void __net_exit xfrm_net_exit(struct net *net)
2948{
2949	xfrm_sysctl_fini(net);
2950	xfrm_policy_fini(net);
2951	xfrm_state_fini(net);
2952	xfrm_statistics_fini(net);
2953}
2954
2955static struct pernet_operations __net_initdata xfrm_net_ops = {
2956	.init = xfrm_net_init,
2957	.exit = xfrm_net_exit,
2958};
2959
2960void __init xfrm_init(void)
2961{
2962	register_pernet_subsys(&xfrm_net_ops);
2963	xfrm_input_init();
2964}
2965
2966#ifdef CONFIG_AUDITSYSCALL
2967static void xfrm_audit_common_policyinfo(struct xfrm_policy *xp,
2968					 struct audit_buffer *audit_buf)
2969{
2970	struct xfrm_sec_ctx *ctx = xp->security;
2971	struct xfrm_selector *sel = &xp->selector;
2972
2973	if (ctx)
2974		audit_log_format(audit_buf, " sec_alg=%u sec_doi=%u sec_obj=%s",
2975				 ctx->ctx_alg, ctx->ctx_doi, ctx->ctx_str);
2976
2977	switch(sel->family) {
2978	case AF_INET:
2979		audit_log_format(audit_buf, " src=%pI4", &sel->saddr.a4);
2980		if (sel->prefixlen_s != 32)
2981			audit_log_format(audit_buf, " src_prefixlen=%d",
2982					 sel->prefixlen_s);
2983		audit_log_format(audit_buf, " dst=%pI4", &sel->daddr.a4);
2984		if (sel->prefixlen_d != 32)
2985			audit_log_format(audit_buf, " dst_prefixlen=%d",
2986					 sel->prefixlen_d);
2987		break;
2988	case AF_INET6:
2989		audit_log_format(audit_buf, " src=%pI6", sel->saddr.a6);
2990		if (sel->prefixlen_s != 128)
2991			audit_log_format(audit_buf, " src_prefixlen=%d",
2992					 sel->prefixlen_s);
2993		audit_log_format(audit_buf, " dst=%pI6", sel->daddr.a6);
2994		if (sel->prefixlen_d != 128)
2995			audit_log_format(audit_buf, " dst_prefixlen=%d",
2996					 sel->prefixlen_d);
2997		break;
2998	}
2999}
3000
3001void xfrm_audit_policy_add(struct xfrm_policy *xp, int result,
3002			   kuid_t auid, u32 sessionid, u32 secid)
3003{
3004	struct audit_buffer *audit_buf;
3005
3006	audit_buf = xfrm_audit_start("SPD-add");
3007	if (audit_buf == NULL)
3008		return;
3009	xfrm_audit_helper_usrinfo(auid, sessionid, secid, audit_buf);
3010	audit_log_format(audit_buf, " res=%u", result);
3011	xfrm_audit_common_policyinfo(xp, audit_buf);
3012	audit_log_end(audit_buf);
3013}
3014EXPORT_SYMBOL_GPL(xfrm_audit_policy_add);
3015
3016void xfrm_audit_policy_delete(struct xfrm_policy *xp, int result,
3017			      kuid_t auid, u32 sessionid, u32 secid)
3018{
3019	struct audit_buffer *audit_buf;
3020
3021	audit_buf = xfrm_audit_start("SPD-delete");
3022	if (audit_buf == NULL)
3023		return;
3024	xfrm_audit_helper_usrinfo(auid, sessionid, secid, audit_buf);
3025	audit_log_format(audit_buf, " res=%u", result);
3026	xfrm_audit_common_policyinfo(xp, audit_buf);
3027	audit_log_end(audit_buf);
3028}
3029EXPORT_SYMBOL_GPL(xfrm_audit_policy_delete);
3030#endif
3031
3032#ifdef CONFIG_XFRM_MIGRATE
3033static bool xfrm_migrate_selector_match(const struct xfrm_selector *sel_cmp,
3034					const struct xfrm_selector *sel_tgt)
3035{
3036	if (sel_cmp->proto == IPSEC_ULPROTO_ANY) {
3037		if (sel_tgt->family == sel_cmp->family &&
3038		    xfrm_addr_equal(&sel_tgt->daddr, &sel_cmp->daddr,
3039				    sel_cmp->family) &&
3040		    xfrm_addr_equal(&sel_tgt->saddr, &sel_cmp->saddr,
3041				    sel_cmp->family) &&
3042		    sel_tgt->prefixlen_d == sel_cmp->prefixlen_d &&
3043		    sel_tgt->prefixlen_s == sel_cmp->prefixlen_s) {
3044			return true;
3045		}
3046	} else {
3047		if (memcmp(sel_tgt, sel_cmp, sizeof(*sel_tgt)) == 0) {
3048			return true;
3049		}
3050	}
3051	return false;
3052}
3053
3054static struct xfrm_policy * xfrm_migrate_policy_find(const struct xfrm_selector *sel,
3055						     u8 dir, u8 type)
3056{
3057	struct xfrm_policy *pol, *ret = NULL;
3058	struct hlist_head *chain;
3059	u32 priority = ~0U;
3060
3061	read_lock_bh(&xfrm_policy_lock);
3062	chain = policy_hash_direct(&init_net, &sel->daddr, &sel->saddr, sel->family, dir);
3063	hlist_for_each_entry(pol, chain, bydst) {
3064		if (xfrm_migrate_selector_match(sel, &pol->selector) &&
3065		    pol->type == type) {
3066			ret = pol;
3067			priority = ret->priority;
3068			break;
3069		}
3070	}
3071	chain = &init_net.xfrm.policy_inexact[dir];
3072	hlist_for_each_entry(pol, chain, bydst) {
3073		if (xfrm_migrate_selector_match(sel, &pol->selector) &&
3074		    pol->type == type &&
3075		    pol->priority < priority) {
3076			ret = pol;
3077			break;
3078		}
3079	}
3080
3081	if (ret)
3082		xfrm_pol_hold(ret);
3083
3084	read_unlock_bh(&xfrm_policy_lock);
3085
3086	return ret;
3087}
3088
3089static int migrate_tmpl_match(const struct xfrm_migrate *m, const struct xfrm_tmpl *t)
3090{
3091	int match = 0;
3092
3093	if (t->mode == m->mode && t->id.proto == m->proto &&
3094	    (m->reqid == 0 || t->reqid == m->reqid)) {
3095		switch (t->mode) {
3096		case XFRM_MODE_TUNNEL:
3097		case XFRM_MODE_BEET:
3098			if (xfrm_addr_equal(&t->id.daddr, &m->old_daddr,
3099					    m->old_family) &&
3100			    xfrm_addr_equal(&t->saddr, &m->old_saddr,
3101					    m->old_family)) {
3102				match = 1;
3103			}
3104			break;
3105		case XFRM_MODE_TRANSPORT:
3106			/* in case of transport mode, template does not store
3107			   any IP addresses, hence we just compare mode and
3108			   protocol */
3109			match = 1;
3110			break;
3111		default:
3112			break;
3113		}
3114	}
3115	return match;
3116}
3117
3118/* update endpoint address(es) of template(s) */
3119static int xfrm_policy_migrate(struct xfrm_policy *pol,
3120			       struct xfrm_migrate *m, int num_migrate)
3121{
3122	struct xfrm_migrate *mp;
3123	int i, j, n = 0;
3124
3125	write_lock_bh(&pol->lock);
3126	if (unlikely(pol->walk.dead)) {
3127		/* target policy has been deleted */
3128		write_unlock_bh(&pol->lock);
3129		return -ENOENT;
3130	}
3131
3132	for (i = 0; i < pol->xfrm_nr; i++) {
3133		for (j = 0, mp = m; j < num_migrate; j++, mp++) {
3134			if (!migrate_tmpl_match(mp, &pol->xfrm_vec[i]))
3135				continue;
3136			n++;
3137			if (pol->xfrm_vec[i].mode != XFRM_MODE_TUNNEL &&
3138			    pol->xfrm_vec[i].mode != XFRM_MODE_BEET)
3139				continue;
3140			/* update endpoints */
3141			memcpy(&pol->xfrm_vec[i].id.daddr, &mp->new_daddr,
3142			       sizeof(pol->xfrm_vec[i].id.daddr));
3143			memcpy(&pol->xfrm_vec[i].saddr, &mp->new_saddr,
3144			       sizeof(pol->xfrm_vec[i].saddr));
3145			pol->xfrm_vec[i].encap_family = mp->new_family;
3146			/* flush bundles */
3147			atomic_inc(&pol->genid);
3148		}
3149	}
3150
3151	write_unlock_bh(&pol->lock);
3152
3153	if (!n)
3154		return -ENODATA;
3155
3156	return 0;
3157}
3158
3159static int xfrm_migrate_check(const struct xfrm_migrate *m, int num_migrate)
3160{
3161	int i, j;
3162
3163	if (num_migrate < 1 || num_migrate > XFRM_MAX_DEPTH)
3164		return -EINVAL;
3165
3166	for (i = 0; i < num_migrate; i++) {
3167		if (xfrm_addr_equal(&m[i].old_daddr, &m[i].new_daddr,
3168				    m[i].old_family) &&
3169		    xfrm_addr_equal(&m[i].old_saddr, &m[i].new_saddr,
3170				    m[i].old_family))
3171			return -EINVAL;
3172		if (xfrm_addr_any(&m[i].new_daddr, m[i].new_family) ||
3173		    xfrm_addr_any(&m[i].new_saddr, m[i].new_family))
3174			return -EINVAL;
3175
3176		/* check if there is any duplicated entry */
3177		for (j = i + 1; j < num_migrate; j++) {
3178			if (!memcmp(&m[i].old_daddr, &m[j].old_daddr,
3179				    sizeof(m[i].old_daddr)) &&
3180			    !memcmp(&m[i].old_saddr, &m[j].old_saddr,
3181				    sizeof(m[i].old_saddr)) &&
3182			    m[i].proto == m[j].proto &&
3183			    m[i].mode == m[j].mode &&
3184			    m[i].reqid == m[j].reqid &&
3185			    m[i].old_family == m[j].old_family)
3186				return -EINVAL;
3187		}
3188	}
3189
3190	return 0;
3191}
3192
3193int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
3194		 struct xfrm_migrate *m, int num_migrate,
3195		 struct xfrm_kmaddress *k)
3196{
3197	int i, err, nx_cur = 0, nx_new = 0;
3198	struct xfrm_policy *pol = NULL;
3199	struct xfrm_state *x, *xc;
3200	struct xfrm_state *x_cur[XFRM_MAX_DEPTH];
3201	struct xfrm_state *x_new[XFRM_MAX_DEPTH];
3202	struct xfrm_migrate *mp;
3203
3204	if ((err = xfrm_migrate_check(m, num_migrate)) < 0)
3205		goto out;
3206
3207	/* Stage 1 - find policy */
3208	if ((pol = xfrm_migrate_policy_find(sel, dir, type)) == NULL) {
3209		err = -ENOENT;
3210		goto out;
3211	}
3212
3213	/* Stage 2 - find and update state(s) */
3214	for (i = 0, mp = m; i < num_migrate; i++, mp++) {
3215		if ((x = xfrm_migrate_state_find(mp))) {
3216			x_cur[nx_cur] = x;
3217			nx_cur++;
3218			if ((xc = xfrm_state_migrate(x, mp))) {
3219				x_new[nx_new] = xc;
3220				nx_new++;
3221			} else {
3222				err = -ENODATA;
3223				goto restore_state;
3224			}
3225		}
3226	}
3227
3228	/* Stage 3 - update policy */
3229	if ((err = xfrm_policy_migrate(pol, m, num_migrate)) < 0)
3230		goto restore_state;
3231
3232	/* Stage 4 - delete old state(s) */
3233	if (nx_cur) {
3234		xfrm_states_put(x_cur, nx_cur);
3235		xfrm_states_delete(x_cur, nx_cur);
3236	}
3237
3238	/* Stage 5 - announce */
3239	km_migrate(sel, dir, type, m, num_migrate, k);
3240
3241	xfrm_pol_put(pol);
3242
3243	return 0;
3244out:
3245	return err;
3246
3247restore_state:
3248	if (pol)
3249		xfrm_pol_put(pol);
3250	if (nx_cur)
3251		xfrm_states_put(x_cur, nx_cur);
3252	if (nx_new)
3253		xfrm_states_delete(x_new, nx_new);
3254
3255	return err;
3256}
3257EXPORT_SYMBOL(xfrm_migrate);
3258#endif
3259