addrlabel.c revision 0c6ce78abf6e228d44c3840edb8a4ae0c1299825
1/*
2 * IPv6 Address Label subsystem
3 * for the IPv6 "Default" Source Address Selection
4 *
5 * Copyright (C)2007 USAGI/WIDE Project
6 */
7/*
8 * Author:
9 * 	YOSHIFUJI Hideaki @ USAGI/WIDE Project <yoshfuji@linux-ipv6.org>
10 */
11
12#include <linux/kernel.h>
13#include <linux/list.h>
14#include <linux/rcupdate.h>
15#include <linux/in6.h>
16#include <net/addrconf.h>
17#include <linux/if_addrlabel.h>
18#include <linux/netlink.h>
19#include <linux/rtnetlink.h>
20
21#if 0
22#define ADDRLABEL(x...) printk(x)
23#else
24#define ADDRLABEL(x...) do { ; } while(0)
25#endif
26
27/*
28 * Policy Table
29 */
30struct ip6addrlbl_entry
31{
32#ifdef CONFIG_NET_NS
33	struct net *lbl_net;
34#endif
35	struct in6_addr prefix;
36	int prefixlen;
37	int ifindex;
38	int addrtype;
39	u32 label;
40	struct hlist_node list;
41	atomic_t refcnt;
42	struct rcu_head rcu;
43};
44
45static struct ip6addrlbl_table
46{
47	struct hlist_head head;
48	spinlock_t lock;
49	u32 seq;
50} ip6addrlbl_table;
51
52static inline
53struct net *ip6addrlbl_net(const struct ip6addrlbl_entry *lbl)
54{
55#ifdef CONFIG_NET_NS
56	return lbl->lbl_net;
57#else
58	return &init_net;
59#endif
60}
61
62/*
63 * Default policy table (RFC3484 + extensions)
64 *
65 * prefix		addr_type	label
66 * -------------------------------------------------------------------------
67 * ::1/128		LOOPBACK	0
68 * ::/0			N/A		1
69 * 2002::/16		N/A		2
70 * ::/96		COMPATv4	3
71 * ::ffff:0:0/96	V4MAPPED	4
72 * fc00::/7		N/A		5		ULA (RFC 4193)
73 * 2001::/32		N/A		6		Teredo (RFC 4380)
74 * 2001:10::/28		N/A		7		ORCHID (RFC 4843)
75 *
76 * Note: 0xffffffff is used if we do not have any policies.
77 */
78
79#define IPV6_ADDR_LABEL_DEFAULT	0xffffffffUL
80
81static const __net_initdata struct ip6addrlbl_init_table
82{
83	const struct in6_addr *prefix;
84	int prefixlen;
85	u32 label;
86} ip6addrlbl_init_table[] = {
87	{	/* ::/0 */
88		.prefix = &in6addr_any,
89		.label = 1,
90	},{	/* fc00::/7 */
91		.prefix = &(struct in6_addr){{{ 0xfc }}},
92		.prefixlen = 7,
93		.label = 5,
94	},{	/* 2002::/16 */
95		.prefix = &(struct in6_addr){{{ 0x20, 0x02 }}},
96		.prefixlen = 16,
97		.label = 2,
98	},{	/* 2001::/32 */
99		.prefix = &(struct in6_addr){{{ 0x20, 0x01 }}},
100		.prefixlen = 32,
101		.label = 6,
102	},{	/* 2001:10::/28 */
103		.prefix = &(struct in6_addr){{{ 0x20, 0x01, 0x00, 0x10 }}},
104		.prefixlen = 28,
105		.label = 7,
106	},{	/* ::ffff:0:0 */
107		.prefix = &(struct in6_addr){{{ [10] = 0xff, [11] = 0xff }}},
108		.prefixlen = 96,
109		.label = 4,
110	},{	/* ::/96 */
111		.prefix = &in6addr_any,
112		.prefixlen = 96,
113		.label = 3,
114	},{	/* ::1/128 */
115		.prefix = &in6addr_loopback,
116		.prefixlen = 128,
117		.label = 0,
118	}
119};
120
121/* Object management */
122static inline void ip6addrlbl_free(struct ip6addrlbl_entry *p)
123{
124#ifdef CONFIG_NET_NS
125	release_net(p->lbl_net);
126#endif
127	kfree(p);
128}
129
130static void ip6addrlbl_free_rcu(struct rcu_head *h)
131{
132	ip6addrlbl_free(container_of(h, struct ip6addrlbl_entry, rcu));
133}
134
135static inline int ip6addrlbl_hold(struct ip6addrlbl_entry *p)
136{
137	return atomic_inc_not_zero(&p->refcnt);
138}
139
140static inline void ip6addrlbl_put(struct ip6addrlbl_entry *p)
141{
142	if (atomic_dec_and_test(&p->refcnt))
143		call_rcu(&p->rcu, ip6addrlbl_free_rcu);
144}
145
146/* Find label */
147static int __ip6addrlbl_match(struct net *net,
148			      struct ip6addrlbl_entry *p,
149			      const struct in6_addr *addr,
150			      int addrtype, int ifindex)
151{
152	if (!net_eq(ip6addrlbl_net(p), net))
153		return 0;
154	if (p->ifindex && p->ifindex != ifindex)
155		return 0;
156	if (p->addrtype && p->addrtype != addrtype)
157		return 0;
158	if (!ipv6_prefix_equal(addr, &p->prefix, p->prefixlen))
159		return 0;
160	return 1;
161}
162
163static struct ip6addrlbl_entry *__ipv6_addr_label(struct net *net,
164						  const struct in6_addr *addr,
165						  int type, int ifindex)
166{
167	struct hlist_node *pos;
168	struct ip6addrlbl_entry *p;
169	hlist_for_each_entry_rcu(p, pos, &ip6addrlbl_table.head, list) {
170		if (__ip6addrlbl_match(net, p, addr, type, ifindex))
171			return p;
172	}
173	return NULL;
174}
175
176u32 ipv6_addr_label(struct net *net,
177		    const struct in6_addr *addr, int type, int ifindex)
178{
179	u32 label;
180	struct ip6addrlbl_entry *p;
181
182	type &= IPV6_ADDR_MAPPED | IPV6_ADDR_COMPATv4 | IPV6_ADDR_LOOPBACK;
183
184	rcu_read_lock();
185	p = __ipv6_addr_label(net, addr, type, ifindex);
186	label = p ? p->label : IPV6_ADDR_LABEL_DEFAULT;
187	rcu_read_unlock();
188
189	ADDRLABEL(KERN_DEBUG "%s(addr=%p6, type=%d, ifindex=%d) => %08x\n",
190		  __func__, addr, type, ifindex, label);
191
192	return label;
193}
194
195/* allocate one entry */
196static struct ip6addrlbl_entry *ip6addrlbl_alloc(struct net *net,
197						 const struct in6_addr *prefix,
198						 int prefixlen, int ifindex,
199						 u32 label)
200{
201	struct ip6addrlbl_entry *newp;
202	int addrtype;
203
204	ADDRLABEL(KERN_DEBUG "%s(prefix=%p6, prefixlen=%d, ifindex=%d, label=%u)\n",
205		  __func__, prefix, prefixlen, ifindex, (unsigned int)label);
206
207	addrtype = ipv6_addr_type(prefix) & (IPV6_ADDR_MAPPED | IPV6_ADDR_COMPATv4 | IPV6_ADDR_LOOPBACK);
208
209	switch (addrtype) {
210	case IPV6_ADDR_MAPPED:
211		if (prefixlen > 96)
212			return ERR_PTR(-EINVAL);
213		if (prefixlen < 96)
214			addrtype = 0;
215		break;
216	case IPV6_ADDR_COMPATv4:
217		if (prefixlen != 96)
218			addrtype = 0;
219		break;
220	case IPV6_ADDR_LOOPBACK:
221		if (prefixlen != 128)
222			addrtype = 0;
223		break;
224	}
225
226	newp = kmalloc(sizeof(*newp), GFP_KERNEL);
227	if (!newp)
228		return ERR_PTR(-ENOMEM);
229
230	ipv6_addr_prefix(&newp->prefix, prefix, prefixlen);
231	newp->prefixlen = prefixlen;
232	newp->ifindex = ifindex;
233	newp->addrtype = addrtype;
234	newp->label = label;
235	INIT_HLIST_NODE(&newp->list);
236#ifdef CONFIG_NET_NS
237	newp->lbl_net = hold_net(net);
238#endif
239	atomic_set(&newp->refcnt, 1);
240	return newp;
241}
242
243/* add a label */
244static int __ip6addrlbl_add(struct ip6addrlbl_entry *newp, int replace)
245{
246	int ret = 0;
247
248	ADDRLABEL(KERN_DEBUG "%s(newp=%p, replace=%d)\n",
249			__func__,
250			newp, replace);
251
252	if (hlist_empty(&ip6addrlbl_table.head)) {
253		hlist_add_head_rcu(&newp->list, &ip6addrlbl_table.head);
254	} else {
255		struct hlist_node *pos, *n;
256		struct ip6addrlbl_entry *p = NULL;
257		hlist_for_each_entry_safe(p, pos, n,
258					  &ip6addrlbl_table.head, list) {
259			if (p->prefixlen == newp->prefixlen &&
260			    net_eq(ip6addrlbl_net(p), ip6addrlbl_net(newp)) &&
261			    p->ifindex == newp->ifindex &&
262			    ipv6_addr_equal(&p->prefix, &newp->prefix)) {
263				if (!replace) {
264					ret = -EEXIST;
265					goto out;
266				}
267				hlist_replace_rcu(&p->list, &newp->list);
268				ip6addrlbl_put(p);
269				goto out;
270			} else if ((p->prefixlen == newp->prefixlen && !p->ifindex) ||
271				   (p->prefixlen < newp->prefixlen)) {
272				hlist_add_before_rcu(&newp->list, &p->list);
273				goto out;
274			}
275		}
276		hlist_add_after_rcu(&p->list, &newp->list);
277	}
278out:
279	if (!ret)
280		ip6addrlbl_table.seq++;
281	return ret;
282}
283
284/* add a label */
285static int ip6addrlbl_add(struct net *net,
286			  const struct in6_addr *prefix, int prefixlen,
287			  int ifindex, u32 label, int replace)
288{
289	struct ip6addrlbl_entry *newp;
290	int ret = 0;
291
292	ADDRLABEL(KERN_DEBUG "%s(prefix=%p6, prefixlen=%d, ifindex=%d, label=%u, replace=%d)\n",
293		  __func__, prefix, prefixlen, ifindex, (unsigned int)label,
294		  replace);
295
296	newp = ip6addrlbl_alloc(net, prefix, prefixlen, ifindex, label);
297	if (IS_ERR(newp))
298		return PTR_ERR(newp);
299	spin_lock(&ip6addrlbl_table.lock);
300	ret = __ip6addrlbl_add(newp, replace);
301	spin_unlock(&ip6addrlbl_table.lock);
302	if (ret)
303		ip6addrlbl_free(newp);
304	return ret;
305}
306
307/* remove a label */
308static int __ip6addrlbl_del(struct net *net,
309			    const struct in6_addr *prefix, int prefixlen,
310			    int ifindex)
311{
312	struct ip6addrlbl_entry *p = NULL;
313	struct hlist_node *pos, *n;
314	int ret = -ESRCH;
315
316	ADDRLABEL(KERN_DEBUG "%s(prefix=%p6, prefixlen=%d, ifindex=%d)\n",
317		  __func__, prefix, prefixlen, ifindex);
318
319	hlist_for_each_entry_safe(p, pos, n, &ip6addrlbl_table.head, list) {
320		if (p->prefixlen == prefixlen &&
321		    net_eq(ip6addrlbl_net(p), net) &&
322		    p->ifindex == ifindex &&
323		    ipv6_addr_equal(&p->prefix, prefix)) {
324			hlist_del_rcu(&p->list);
325			ip6addrlbl_put(p);
326			ret = 0;
327			break;
328		}
329	}
330	return ret;
331}
332
333static int ip6addrlbl_del(struct net *net,
334			  const struct in6_addr *prefix, int prefixlen,
335			  int ifindex)
336{
337	struct in6_addr prefix_buf;
338	int ret;
339
340	ADDRLABEL(KERN_DEBUG "%s(prefix=%p6, prefixlen=%d, ifindex=%d)\n",
341		  __func__, prefix, prefixlen, ifindex);
342
343	ipv6_addr_prefix(&prefix_buf, prefix, prefixlen);
344	spin_lock(&ip6addrlbl_table.lock);
345	ret = __ip6addrlbl_del(net, &prefix_buf, prefixlen, ifindex);
346	spin_unlock(&ip6addrlbl_table.lock);
347	return ret;
348}
349
350/* add default label */
351static int __net_init ip6addrlbl_net_init(struct net *net)
352{
353	int err = 0;
354	int i;
355
356	ADDRLABEL(KERN_DEBUG "%s()\n", __func__);
357
358	for (i = 0; i < ARRAY_SIZE(ip6addrlbl_init_table); i++) {
359		int ret = ip6addrlbl_add(net,
360					 ip6addrlbl_init_table[i].prefix,
361					 ip6addrlbl_init_table[i].prefixlen,
362					 0,
363					 ip6addrlbl_init_table[i].label, 0);
364		/* XXX: should we free all rules when we catch an error? */
365		if (ret && (!err || err != -ENOMEM))
366			err = ret;
367	}
368	return err;
369}
370
371static void __net_exit ip6addrlbl_net_exit(struct net *net)
372{
373	struct ip6addrlbl_entry *p = NULL;
374	struct hlist_node *pos, *n;
375
376	/* Remove all labels belonging to the exiting net */
377	spin_lock(&ip6addrlbl_table.lock);
378	hlist_for_each_entry_safe(p, pos, n, &ip6addrlbl_table.head, list) {
379		if (net_eq(ip6addrlbl_net(p), net)) {
380			hlist_del_rcu(&p->list);
381			ip6addrlbl_put(p);
382		}
383	}
384	spin_unlock(&ip6addrlbl_table.lock);
385}
386
387static struct pernet_operations ipv6_addr_label_ops = {
388	.init = ip6addrlbl_net_init,
389	.exit = ip6addrlbl_net_exit,
390};
391
392int __init ipv6_addr_label_init(void)
393{
394	spin_lock_init(&ip6addrlbl_table.lock);
395
396	return register_pernet_subsys(&ipv6_addr_label_ops);
397}
398
399static const struct nla_policy ifal_policy[IFAL_MAX+1] = {
400	[IFAL_ADDRESS]		= { .len = sizeof(struct in6_addr), },
401	[IFAL_LABEL]		= { .len = sizeof(u32), },
402};
403
404static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh,
405			     void *arg)
406{
407	struct net *net = sock_net(skb->sk);
408	struct ifaddrlblmsg *ifal;
409	struct nlattr *tb[IFAL_MAX+1];
410	struct in6_addr *pfx;
411	u32 label;
412	int err = 0;
413
414	err = nlmsg_parse(nlh, sizeof(*ifal), tb, IFAL_MAX, ifal_policy);
415	if (err < 0)
416		return err;
417
418	ifal = nlmsg_data(nlh);
419
420	if (ifal->ifal_family != AF_INET6 ||
421	    ifal->ifal_prefixlen > 128)
422		return -EINVAL;
423
424	if (ifal->ifal_index &&
425	    !__dev_get_by_index(net, ifal->ifal_index))
426		return -EINVAL;
427
428	if (!tb[IFAL_ADDRESS])
429		return -EINVAL;
430
431	pfx = nla_data(tb[IFAL_ADDRESS]);
432	if (!pfx)
433		return -EINVAL;
434
435	if (!tb[IFAL_LABEL])
436		return -EINVAL;
437	label = nla_get_u32(tb[IFAL_LABEL]);
438	if (label == IPV6_ADDR_LABEL_DEFAULT)
439		return -EINVAL;
440
441	switch(nlh->nlmsg_type) {
442	case RTM_NEWADDRLABEL:
443		err = ip6addrlbl_add(net, pfx, ifal->ifal_prefixlen,
444				     ifal->ifal_index, label,
445				     nlh->nlmsg_flags & NLM_F_REPLACE);
446		break;
447	case RTM_DELADDRLABEL:
448		err = ip6addrlbl_del(net, pfx, ifal->ifal_prefixlen,
449				     ifal->ifal_index);
450		break;
451	default:
452		err = -EOPNOTSUPP;
453	}
454	return err;
455}
456
457static inline void ip6addrlbl_putmsg(struct nlmsghdr *nlh,
458				     int prefixlen, int ifindex, u32 lseq)
459{
460	struct ifaddrlblmsg *ifal = nlmsg_data(nlh);
461	ifal->ifal_family = AF_INET6;
462	ifal->ifal_prefixlen = prefixlen;
463	ifal->ifal_flags = 0;
464	ifal->ifal_index = ifindex;
465	ifal->ifal_seq = lseq;
466};
467
468static int ip6addrlbl_fill(struct sk_buff *skb,
469			   struct ip6addrlbl_entry *p,
470			   u32 lseq,
471			   u32 pid, u32 seq, int event,
472			   unsigned int flags)
473{
474	struct nlmsghdr *nlh = nlmsg_put(skb, pid, seq, event,
475					 sizeof(struct ifaddrlblmsg), flags);
476	if (!nlh)
477		return -EMSGSIZE;
478
479	ip6addrlbl_putmsg(nlh, p->prefixlen, p->ifindex, lseq);
480
481	if (nla_put(skb, IFAL_ADDRESS, 16, &p->prefix) < 0 ||
482	    nla_put_u32(skb, IFAL_LABEL, p->label) < 0) {
483		nlmsg_cancel(skb, nlh);
484		return -EMSGSIZE;
485	}
486
487	return nlmsg_end(skb, nlh);
488}
489
490static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb)
491{
492	struct net *net = sock_net(skb->sk);
493	struct ip6addrlbl_entry *p;
494	struct hlist_node *pos;
495	int idx = 0, s_idx = cb->args[0];
496	int err;
497
498	rcu_read_lock();
499	hlist_for_each_entry_rcu(p, pos, &ip6addrlbl_table.head, list) {
500		if (idx >= s_idx &&
501		    net_eq(ip6addrlbl_net(p), net)) {
502			if ((err = ip6addrlbl_fill(skb, p,
503						   ip6addrlbl_table.seq,
504						   NETLINK_CB(cb->skb).pid,
505						   cb->nlh->nlmsg_seq,
506						   RTM_NEWADDRLABEL,
507						   NLM_F_MULTI)) <= 0)
508				break;
509		}
510		idx++;
511	}
512	rcu_read_unlock();
513	cb->args[0] = idx;
514	return skb->len;
515}
516
517static inline int ip6addrlbl_msgsize(void)
518{
519	return (NLMSG_ALIGN(sizeof(struct ifaddrlblmsg))
520		+ nla_total_size(16)	/* IFAL_ADDRESS */
521		+ nla_total_size(4)	/* IFAL_LABEL */
522	);
523}
524
525static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh,
526			  void *arg)
527{
528	struct net *net = sock_net(in_skb->sk);
529	struct ifaddrlblmsg *ifal;
530	struct nlattr *tb[IFAL_MAX+1];
531	struct in6_addr *addr;
532	u32 lseq;
533	int err = 0;
534	struct ip6addrlbl_entry *p;
535	struct sk_buff *skb;
536
537	err = nlmsg_parse(nlh, sizeof(*ifal), tb, IFAL_MAX, ifal_policy);
538	if (err < 0)
539		return err;
540
541	ifal = nlmsg_data(nlh);
542
543	if (ifal->ifal_family != AF_INET6 ||
544	    ifal->ifal_prefixlen != 128)
545		return -EINVAL;
546
547	if (ifal->ifal_index &&
548	    !__dev_get_by_index(net, ifal->ifal_index))
549		return -EINVAL;
550
551	if (!tb[IFAL_ADDRESS])
552		return -EINVAL;
553
554	addr = nla_data(tb[IFAL_ADDRESS]);
555	if (!addr)
556		return -EINVAL;
557
558	rcu_read_lock();
559	p = __ipv6_addr_label(net, addr, ipv6_addr_type(addr), ifal->ifal_index);
560	if (p && ip6addrlbl_hold(p))
561		p = NULL;
562	lseq = ip6addrlbl_table.seq;
563	rcu_read_unlock();
564
565	if (!p) {
566		err = -ESRCH;
567		goto out;
568	}
569
570	if (!(skb = nlmsg_new(ip6addrlbl_msgsize(), GFP_KERNEL))) {
571		ip6addrlbl_put(p);
572		return -ENOBUFS;
573	}
574
575	err = ip6addrlbl_fill(skb, p, lseq,
576			      NETLINK_CB(in_skb).pid, nlh->nlmsg_seq,
577			      RTM_NEWADDRLABEL, 0);
578
579	ip6addrlbl_put(p);
580
581	if (err < 0) {
582		WARN_ON(err == -EMSGSIZE);
583		kfree_skb(skb);
584		goto out;
585	}
586
587	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
588out:
589	return err;
590}
591
592void __init ipv6_addr_label_rtnl_register(void)
593{
594	__rtnl_register(PF_INET6, RTM_NEWADDRLABEL, ip6addrlbl_newdel, NULL);
595	__rtnl_register(PF_INET6, RTM_DELADDRLABEL, ip6addrlbl_newdel, NULL);
596	__rtnl_register(PF_INET6, RTM_GETADDRLABEL, ip6addrlbl_get, ip6addrlbl_dump);
597}
598
599