addrlabel.c revision 0dc47877a3de00ceadea0005189656ae8dc52669
1/*
2 * IPv6 Address Label subsystem
3 * for the IPv6 "Default" Source Address Selection
4 *
5 * Copyright (C)2007 USAGI/WIDE Project
6 */
7/*
8 * Author:
9 * 	YOSHIFUJI Hideaki @ USAGI/WIDE Project <yoshfuji@linux-ipv6.org>
10 */
11
12#include <linux/kernel.h>
13#include <linux/list.h>
14#include <linux/rcupdate.h>
15#include <linux/in6.h>
16#include <net/addrconf.h>
17#include <linux/if_addrlabel.h>
18#include <linux/netlink.h>
19#include <linux/rtnetlink.h>
20
21#if 0
22#define ADDRLABEL(x...) printk(x)
23#else
24#define ADDRLABEL(x...) do { ; } while(0)
25#endif
26
27/*
28 * Policy Table
29 */
30struct ip6addrlbl_entry
31{
32	struct in6_addr prefix;
33	int prefixlen;
34	int ifindex;
35	int addrtype;
36	u32 label;
37	struct hlist_node list;
38	atomic_t refcnt;
39	struct rcu_head rcu;
40};
41
42static struct ip6addrlbl_table
43{
44	struct hlist_head head;
45	spinlock_t lock;
46	u32 seq;
47} ip6addrlbl_table;
48
49/*
50 * Default policy table (RFC3484 + extensions)
51 *
52 * prefix		addr_type	label
53 * -------------------------------------------------------------------------
54 * ::1/128		LOOPBACK	0
55 * ::/0			N/A		1
56 * 2002::/16		N/A		2
57 * ::/96		COMPATv4	3
58 * ::ffff:0:0/96	V4MAPPED	4
59 * fc00::/7		N/A		5		ULA (RFC 4193)
60 * 2001::/32		N/A		6		Teredo (RFC 4380)
61 * 2001:10::/28		N/A		7		ORCHID (RFC 4843)
62 *
63 * Note: 0xffffffff is used if we do not have any policies.
64 */
65
66#define IPV6_ADDR_LABEL_DEFAULT	0xffffffffUL
67
68static const __initdata struct ip6addrlbl_init_table
69{
70	const struct in6_addr *prefix;
71	int prefixlen;
72	u32 label;
73} ip6addrlbl_init_table[] = {
74	{	/* ::/0 */
75		.prefix = &in6addr_any,
76		.label = 1,
77	},{	/* fc00::/7 */
78		.prefix = &(struct in6_addr){{{ 0xfc }}},
79		.prefixlen = 7,
80		.label = 5,
81	},{	/* 2002::/16 */
82		.prefix = &(struct in6_addr){{{ 0x20, 0x02 }}},
83		.prefixlen = 16,
84		.label = 2,
85	},{	/* 2001::/32 */
86		.prefix = &(struct in6_addr){{{ 0x20, 0x01 }}},
87		.prefixlen = 32,
88		.label = 6,
89	},{	/* 2001:10::/28 */
90		.prefix = &(struct in6_addr){{{ 0x20, 0x01, 0x00, 0x10 }}},
91		.prefixlen = 28,
92		.label = 7,
93	},{	/* ::ffff:0:0 */
94		.prefix = &(struct in6_addr){{{ [10] = 0xff, [11] = 0xff }}},
95		.prefixlen = 96,
96		.label = 4,
97	},{	/* ::/96 */
98		.prefix = &in6addr_any,
99		.prefixlen = 96,
100		.label = 3,
101	},{	/* ::1/128 */
102		.prefix = &in6addr_loopback,
103		.prefixlen = 128,
104		.label = 0,
105	}
106};
107
108/* Object management */
109static inline void ip6addrlbl_free(struct ip6addrlbl_entry *p)
110{
111	kfree(p);
112}
113
114static void ip6addrlbl_free_rcu(struct rcu_head *h)
115{
116	ip6addrlbl_free(container_of(h, struct ip6addrlbl_entry, rcu));
117}
118
119static inline int ip6addrlbl_hold(struct ip6addrlbl_entry *p)
120{
121	return atomic_inc_not_zero(&p->refcnt);
122}
123
124static inline void ip6addrlbl_put(struct ip6addrlbl_entry *p)
125{
126	if (atomic_dec_and_test(&p->refcnt))
127		call_rcu(&p->rcu, ip6addrlbl_free_rcu);
128}
129
130/* Find label */
131static int __ip6addrlbl_match(struct ip6addrlbl_entry *p,
132			      const struct in6_addr *addr,
133			      int addrtype, int ifindex)
134{
135	if (p->ifindex && p->ifindex != ifindex)
136		return 0;
137	if (p->addrtype && p->addrtype != addrtype)
138		return 0;
139	if (!ipv6_prefix_equal(addr, &p->prefix, p->prefixlen))
140		return 0;
141	return 1;
142}
143
144static struct ip6addrlbl_entry *__ipv6_addr_label(const struct in6_addr *addr,
145						  int type, int ifindex)
146{
147	struct hlist_node *pos;
148	struct ip6addrlbl_entry *p;
149	hlist_for_each_entry_rcu(p, pos, &ip6addrlbl_table.head, list) {
150		if (__ip6addrlbl_match(p, addr, type, ifindex))
151			return p;
152	}
153	return NULL;
154}
155
156u32 ipv6_addr_label(const struct in6_addr *addr, int type, int ifindex)
157{
158	u32 label;
159	struct ip6addrlbl_entry *p;
160
161	type &= IPV6_ADDR_MAPPED | IPV6_ADDR_COMPATv4 | IPV6_ADDR_LOOPBACK;
162
163	rcu_read_lock();
164	p = __ipv6_addr_label(addr, type, ifindex);
165	label = p ? p->label : IPV6_ADDR_LABEL_DEFAULT;
166	rcu_read_unlock();
167
168	ADDRLABEL(KERN_DEBUG "%s(addr=" NIP6_FMT ", type=%d, ifindex=%d) => %08x\n",
169			__func__,
170			NIP6(*addr), type, ifindex,
171			label);
172
173	return label;
174}
175
176/* allocate one entry */
177static struct ip6addrlbl_entry *ip6addrlbl_alloc(const struct in6_addr *prefix,
178						 int prefixlen, int ifindex,
179						 u32 label)
180{
181	struct ip6addrlbl_entry *newp;
182	int addrtype;
183
184	ADDRLABEL(KERN_DEBUG "%s(prefix=" NIP6_FMT ", prefixlen=%d, ifindex=%d, label=%u)\n",
185			__func__,
186			NIP6(*prefix), prefixlen,
187			ifindex,
188			(unsigned int)label);
189
190	addrtype = ipv6_addr_type(prefix) & (IPV6_ADDR_MAPPED | IPV6_ADDR_COMPATv4 | IPV6_ADDR_LOOPBACK);
191
192	switch (addrtype) {
193	case IPV6_ADDR_MAPPED:
194		if (prefixlen > 96)
195			return ERR_PTR(-EINVAL);
196		if (prefixlen < 96)
197			addrtype = 0;
198		break;
199	case IPV6_ADDR_COMPATv4:
200		if (prefixlen != 96)
201			addrtype = 0;
202		break;
203	case IPV6_ADDR_LOOPBACK:
204		if (prefixlen != 128)
205			addrtype = 0;
206		break;
207	}
208
209	newp = kmalloc(sizeof(*newp), GFP_KERNEL);
210	if (!newp)
211		return ERR_PTR(-ENOMEM);
212
213	ipv6_addr_prefix(&newp->prefix, prefix, prefixlen);
214	newp->prefixlen = prefixlen;
215	newp->ifindex = ifindex;
216	newp->addrtype = addrtype;
217	newp->label = label;
218	INIT_HLIST_NODE(&newp->list);
219	atomic_set(&newp->refcnt, 1);
220	return newp;
221}
222
223/* add a label */
224static int __ip6addrlbl_add(struct ip6addrlbl_entry *newp, int replace)
225{
226	int ret = 0;
227
228	ADDRLABEL(KERN_DEBUG "%s(newp=%p, replace=%d)\n",
229			__func__,
230			newp, replace);
231
232	if (hlist_empty(&ip6addrlbl_table.head)) {
233		hlist_add_head_rcu(&newp->list, &ip6addrlbl_table.head);
234	} else {
235		struct hlist_node *pos, *n;
236		struct ip6addrlbl_entry *p = NULL;
237		hlist_for_each_entry_safe(p, pos, n,
238					  &ip6addrlbl_table.head, list) {
239			if (p->prefixlen == newp->prefixlen &&
240			    p->ifindex == newp->ifindex &&
241			    ipv6_addr_equal(&p->prefix, &newp->prefix)) {
242				if (!replace) {
243					ret = -EEXIST;
244					goto out;
245				}
246				hlist_replace_rcu(&p->list, &newp->list);
247				ip6addrlbl_put(p);
248				goto out;
249			} else if ((p->prefixlen == newp->prefixlen && !p->ifindex) ||
250				   (p->prefixlen < newp->prefixlen)) {
251				hlist_add_before_rcu(&newp->list, &p->list);
252				goto out;
253			}
254		}
255		hlist_add_after_rcu(&p->list, &newp->list);
256	}
257out:
258	if (!ret)
259		ip6addrlbl_table.seq++;
260	return ret;
261}
262
263/* add a label */
264static int ip6addrlbl_add(const struct in6_addr *prefix, int prefixlen,
265			  int ifindex, u32 label, int replace)
266{
267	struct ip6addrlbl_entry *newp;
268	int ret = 0;
269
270	ADDRLABEL(KERN_DEBUG "%s(prefix=" NIP6_FMT ", prefixlen=%d, ifindex=%d, label=%u, replace=%d)\n",
271			__func__,
272			NIP6(*prefix), prefixlen,
273			ifindex,
274			(unsigned int)label,
275			replace);
276
277	newp = ip6addrlbl_alloc(prefix, prefixlen, ifindex, label);
278	if (IS_ERR(newp))
279		return PTR_ERR(newp);
280	spin_lock(&ip6addrlbl_table.lock);
281	ret = __ip6addrlbl_add(newp, replace);
282	spin_unlock(&ip6addrlbl_table.lock);
283	if (ret)
284		ip6addrlbl_free(newp);
285	return ret;
286}
287
288/* remove a label */
289static int __ip6addrlbl_del(const struct in6_addr *prefix, int prefixlen,
290			    int ifindex)
291{
292	struct ip6addrlbl_entry *p = NULL;
293	struct hlist_node *pos, *n;
294	int ret = -ESRCH;
295
296	ADDRLABEL(KERN_DEBUG "%s(prefix=" NIP6_FMT ", prefixlen=%d, ifindex=%d)\n",
297			__func__,
298			NIP6(*prefix), prefixlen,
299			ifindex);
300
301	hlist_for_each_entry_safe(p, pos, n, &ip6addrlbl_table.head, list) {
302		if (p->prefixlen == prefixlen &&
303		    p->ifindex == ifindex &&
304		    ipv6_addr_equal(&p->prefix, prefix)) {
305			hlist_del_rcu(&p->list);
306			ip6addrlbl_put(p);
307			ret = 0;
308			break;
309		}
310	}
311	return ret;
312}
313
314static int ip6addrlbl_del(const struct in6_addr *prefix, int prefixlen,
315			  int ifindex)
316{
317	struct in6_addr prefix_buf;
318	int ret;
319
320	ADDRLABEL(KERN_DEBUG "%s(prefix=" NIP6_FMT ", prefixlen=%d, ifindex=%d)\n",
321			__func__,
322			NIP6(*prefix), prefixlen,
323			ifindex);
324
325	ipv6_addr_prefix(&prefix_buf, prefix, prefixlen);
326	spin_lock(&ip6addrlbl_table.lock);
327	ret = __ip6addrlbl_del(&prefix_buf, prefixlen, ifindex);
328	spin_unlock(&ip6addrlbl_table.lock);
329	return ret;
330}
331
332/* add default label */
333static __init int ip6addrlbl_init(void)
334{
335	int err = 0;
336	int i;
337
338	ADDRLABEL(KERN_DEBUG "%s()\n", __func__);
339
340	for (i = 0; i < ARRAY_SIZE(ip6addrlbl_init_table); i++) {
341		int ret = ip6addrlbl_add(ip6addrlbl_init_table[i].prefix,
342					 ip6addrlbl_init_table[i].prefixlen,
343					 0,
344					 ip6addrlbl_init_table[i].label, 0);
345		/* XXX: should we free all rules when we catch an error? */
346		if (ret && (!err || err != -ENOMEM))
347			err = ret;
348	}
349	return err;
350}
351
352int __init ipv6_addr_label_init(void)
353{
354	spin_lock_init(&ip6addrlbl_table.lock);
355
356	return ip6addrlbl_init();
357}
358
359static const struct nla_policy ifal_policy[IFAL_MAX+1] = {
360	[IFAL_ADDRESS]		= { .len = sizeof(struct in6_addr), },
361	[IFAL_LABEL]		= { .len = sizeof(u32), },
362};
363
364static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh,
365			     void *arg)
366{
367	struct net *net = skb->sk->sk_net;
368	struct ifaddrlblmsg *ifal;
369	struct nlattr *tb[IFAL_MAX+1];
370	struct in6_addr *pfx;
371	u32 label;
372	int err = 0;
373
374	if (net != &init_net)
375		return 0;
376
377	err = nlmsg_parse(nlh, sizeof(*ifal), tb, IFAL_MAX, ifal_policy);
378	if (err < 0)
379		return err;
380
381	ifal = nlmsg_data(nlh);
382
383	if (ifal->ifal_family != AF_INET6 ||
384	    ifal->ifal_prefixlen > 128)
385		return -EINVAL;
386
387	if (ifal->ifal_index &&
388	    !__dev_get_by_index(&init_net, ifal->ifal_index))
389		return -EINVAL;
390
391	if (!tb[IFAL_ADDRESS])
392		return -EINVAL;
393
394	pfx = nla_data(tb[IFAL_ADDRESS]);
395	if (!pfx)
396		return -EINVAL;
397
398	if (!tb[IFAL_LABEL])
399		return -EINVAL;
400	label = nla_get_u32(tb[IFAL_LABEL]);
401	if (label == IPV6_ADDR_LABEL_DEFAULT)
402		return -EINVAL;
403
404	switch(nlh->nlmsg_type) {
405	case RTM_NEWADDRLABEL:
406		err = ip6addrlbl_add(pfx, ifal->ifal_prefixlen,
407				     ifal->ifal_index, label,
408				     nlh->nlmsg_flags & NLM_F_REPLACE);
409		break;
410	case RTM_DELADDRLABEL:
411		err = ip6addrlbl_del(pfx, ifal->ifal_prefixlen,
412				     ifal->ifal_index);
413		break;
414	default:
415		err = -EOPNOTSUPP;
416	}
417	return err;
418}
419
420static inline void ip6addrlbl_putmsg(struct nlmsghdr *nlh,
421				     int prefixlen, int ifindex, u32 lseq)
422{
423	struct ifaddrlblmsg *ifal = nlmsg_data(nlh);
424	ifal->ifal_family = AF_INET6;
425	ifal->ifal_prefixlen = prefixlen;
426	ifal->ifal_flags = 0;
427	ifal->ifal_index = ifindex;
428	ifal->ifal_seq = lseq;
429};
430
431static int ip6addrlbl_fill(struct sk_buff *skb,
432			   struct ip6addrlbl_entry *p,
433			   u32 lseq,
434			   u32 pid, u32 seq, int event,
435			   unsigned int flags)
436{
437	struct nlmsghdr *nlh = nlmsg_put(skb, pid, seq, event,
438					 sizeof(struct ifaddrlblmsg), flags);
439	if (!nlh)
440		return -EMSGSIZE;
441
442	ip6addrlbl_putmsg(nlh, p->prefixlen, p->ifindex, lseq);
443
444	if (nla_put(skb, IFAL_ADDRESS, 16, &p->prefix) < 0 ||
445	    nla_put_u32(skb, IFAL_LABEL, p->label) < 0) {
446		nlmsg_cancel(skb, nlh);
447		return -EMSGSIZE;
448	}
449
450	return nlmsg_end(skb, nlh);
451}
452
453static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb)
454{
455	struct net *net = skb->sk->sk_net;
456	struct ip6addrlbl_entry *p;
457	struct hlist_node *pos;
458	int idx = 0, s_idx = cb->args[0];
459	int err;
460
461	if (net != &init_net)
462		return 0;
463
464	rcu_read_lock();
465	hlist_for_each_entry_rcu(p, pos, &ip6addrlbl_table.head, list) {
466		if (idx >= s_idx) {
467			if ((err = ip6addrlbl_fill(skb, p,
468						   ip6addrlbl_table.seq,
469						   NETLINK_CB(cb->skb).pid,
470						   cb->nlh->nlmsg_seq,
471						   RTM_NEWADDRLABEL,
472						   NLM_F_MULTI)) <= 0)
473				break;
474		}
475		idx++;
476	}
477	rcu_read_unlock();
478	cb->args[0] = idx;
479	return skb->len;
480}
481
482static inline int ip6addrlbl_msgsize(void)
483{
484	return (NLMSG_ALIGN(sizeof(struct ifaddrlblmsg))
485		+ nla_total_size(16)	/* IFAL_ADDRESS */
486		+ nla_total_size(4)	/* IFAL_LABEL */
487	);
488}
489
490static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh,
491			  void *arg)
492{
493	struct net *net = in_skb->sk->sk_net;
494	struct ifaddrlblmsg *ifal;
495	struct nlattr *tb[IFAL_MAX+1];
496	struct in6_addr *addr;
497	u32 lseq;
498	int err = 0;
499	struct ip6addrlbl_entry *p;
500	struct sk_buff *skb;
501
502	if (net != &init_net)
503		return 0;
504
505	err = nlmsg_parse(nlh, sizeof(*ifal), tb, IFAL_MAX, ifal_policy);
506	if (err < 0)
507		return err;
508
509	ifal = nlmsg_data(nlh);
510
511	if (ifal->ifal_family != AF_INET6 ||
512	    ifal->ifal_prefixlen != 128)
513		return -EINVAL;
514
515	if (ifal->ifal_index &&
516	    !__dev_get_by_index(&init_net, ifal->ifal_index))
517		return -EINVAL;
518
519	if (!tb[IFAL_ADDRESS])
520		return -EINVAL;
521
522	addr = nla_data(tb[IFAL_ADDRESS]);
523	if (!addr)
524		return -EINVAL;
525
526	rcu_read_lock();
527	p = __ipv6_addr_label(addr, ipv6_addr_type(addr), ifal->ifal_index);
528	if (p && ip6addrlbl_hold(p))
529		p = NULL;
530	lseq = ip6addrlbl_table.seq;
531	rcu_read_unlock();
532
533	if (!p) {
534		err = -ESRCH;
535		goto out;
536	}
537
538	if (!(skb = nlmsg_new(ip6addrlbl_msgsize(), GFP_KERNEL))) {
539		ip6addrlbl_put(p);
540		return -ENOBUFS;
541	}
542
543	err = ip6addrlbl_fill(skb, p, lseq,
544			      NETLINK_CB(in_skb).pid, nlh->nlmsg_seq,
545			      RTM_NEWADDRLABEL, 0);
546
547	ip6addrlbl_put(p);
548
549	if (err < 0) {
550		WARN_ON(err == -EMSGSIZE);
551		kfree_skb(skb);
552		goto out;
553	}
554
555	err = rtnl_unicast(skb, &init_net, NETLINK_CB(in_skb).pid);
556out:
557	return err;
558}
559
560void __init ipv6_addr_label_rtnl_register(void)
561{
562	__rtnl_register(PF_INET6, RTM_NEWADDRLABEL, ip6addrlbl_newdel, NULL);
563	__rtnl_register(PF_INET6, RTM_DELADDRLABEL, ip6addrlbl_newdel, NULL);
564	__rtnl_register(PF_INET6, RTM_GETADDRLABEL, ip6addrlbl_get, ip6addrlbl_dump);
565}
566
567