addrlabel.c revision dfc47ef8639facd77210e74be831943c2fdd9c74
1/* 2 * IPv6 Address Label subsystem 3 * for the IPv6 "Default" Source Address Selection 4 * 5 * Copyright (C)2007 USAGI/WIDE Project 6 */ 7/* 8 * Author: 9 * YOSHIFUJI Hideaki @ USAGI/WIDE Project <yoshfuji@linux-ipv6.org> 10 */ 11 12#include <linux/kernel.h> 13#include <linux/list.h> 14#include <linux/rcupdate.h> 15#include <linux/in6.h> 16#include <linux/slab.h> 17#include <net/addrconf.h> 18#include <linux/if_addrlabel.h> 19#include <linux/netlink.h> 20#include <linux/rtnetlink.h> 21 22#if 0 23#define ADDRLABEL(x...) printk(x) 24#else 25#define ADDRLABEL(x...) do { ; } while(0) 26#endif 27 28/* 29 * Policy Table 30 */ 31struct ip6addrlbl_entry 32{ 33#ifdef CONFIG_NET_NS 34 struct net *lbl_net; 35#endif 36 struct in6_addr prefix; 37 int prefixlen; 38 int ifindex; 39 int addrtype; 40 u32 label; 41 struct hlist_node list; 42 atomic_t refcnt; 43 struct rcu_head rcu; 44}; 45 46static struct ip6addrlbl_table 47{ 48 struct hlist_head head; 49 spinlock_t lock; 50 u32 seq; 51} ip6addrlbl_table; 52 53static inline 54struct net *ip6addrlbl_net(const struct ip6addrlbl_entry *lbl) 55{ 56 return read_pnet(&lbl->lbl_net); 57} 58 59/* 60 * Default policy table (RFC6724 + extensions) 61 * 62 * prefix addr_type label 63 * ------------------------------------------------------------------------- 64 * ::1/128 LOOPBACK 0 65 * ::/0 N/A 1 66 * 2002::/16 N/A 2 67 * ::/96 COMPATv4 3 68 * ::ffff:0:0/96 V4MAPPED 4 69 * fc00::/7 N/A 5 ULA (RFC 4193) 70 * 2001::/32 N/A 6 Teredo (RFC 4380) 71 * 2001:10::/28 N/A 7 ORCHID (RFC 4843) 72 * fec0::/10 N/A 11 Site-local 73 * (deprecated by RFC3879) 74 * 3ffe::/16 N/A 12 6bone 75 * 76 * Note: 0xffffffff is used if we do not have any policies. 77 * Note: Labels for ULA and 6to4 are different from labels listed in RFC6724. 78 */ 79 80#define IPV6_ADDR_LABEL_DEFAULT 0xffffffffUL 81 82static const __net_initconst struct ip6addrlbl_init_table 83{ 84 const struct in6_addr *prefix; 85 int prefixlen; 86 u32 label; 87} ip6addrlbl_init_table[] = { 88 { /* ::/0 */ 89 .prefix = &in6addr_any, 90 .label = 1, 91 },{ /* fc00::/7 */ 92 .prefix = &(struct in6_addr){{{ 0xfc }}}, 93 .prefixlen = 7, 94 .label = 5, 95 },{ /* fec0::/10 */ 96 .prefix = &(struct in6_addr){{{ 0xfe, 0xc0 }}}, 97 .prefixlen = 10, 98 .label = 11, 99 },{ /* 2002::/16 */ 100 .prefix = &(struct in6_addr){{{ 0x20, 0x02 }}}, 101 .prefixlen = 16, 102 .label = 2, 103 },{ /* 3ffe::/16 */ 104 .prefix = &(struct in6_addr){{{ 0x3f, 0xfe }}}, 105 .prefixlen = 16, 106 .label = 12, 107 },{ /* 2001::/32 */ 108 .prefix = &(struct in6_addr){{{ 0x20, 0x01 }}}, 109 .prefixlen = 32, 110 .label = 6, 111 },{ /* 2001:10::/28 */ 112 .prefix = &(struct in6_addr){{{ 0x20, 0x01, 0x00, 0x10 }}}, 113 .prefixlen = 28, 114 .label = 7, 115 },{ /* ::ffff:0:0 */ 116 .prefix = &(struct in6_addr){{{ [10] = 0xff, [11] = 0xff }}}, 117 .prefixlen = 96, 118 .label = 4, 119 },{ /* ::/96 */ 120 .prefix = &in6addr_any, 121 .prefixlen = 96, 122 .label = 3, 123 },{ /* ::1/128 */ 124 .prefix = &in6addr_loopback, 125 .prefixlen = 128, 126 .label = 0, 127 } 128}; 129 130/* Object management */ 131static inline void ip6addrlbl_free(struct ip6addrlbl_entry *p) 132{ 133#ifdef CONFIG_NET_NS 134 release_net(p->lbl_net); 135#endif 136 kfree(p); 137} 138 139static void ip6addrlbl_free_rcu(struct rcu_head *h) 140{ 141 ip6addrlbl_free(container_of(h, struct ip6addrlbl_entry, rcu)); 142} 143 144static bool ip6addrlbl_hold(struct ip6addrlbl_entry *p) 145{ 146 return atomic_inc_not_zero(&p->refcnt); 147} 148 149static inline void ip6addrlbl_put(struct ip6addrlbl_entry *p) 150{ 151 if (atomic_dec_and_test(&p->refcnt)) 152 call_rcu(&p->rcu, ip6addrlbl_free_rcu); 153} 154 155/* Find label */ 156static bool __ip6addrlbl_match(struct net *net, 157 const struct ip6addrlbl_entry *p, 158 const struct in6_addr *addr, 159 int addrtype, int ifindex) 160{ 161 if (!net_eq(ip6addrlbl_net(p), net)) 162 return false; 163 if (p->ifindex && p->ifindex != ifindex) 164 return false; 165 if (p->addrtype && p->addrtype != addrtype) 166 return false; 167 if (!ipv6_prefix_equal(addr, &p->prefix, p->prefixlen)) 168 return false; 169 return true; 170} 171 172static struct ip6addrlbl_entry *__ipv6_addr_label(struct net *net, 173 const struct in6_addr *addr, 174 int type, int ifindex) 175{ 176 struct hlist_node *pos; 177 struct ip6addrlbl_entry *p; 178 hlist_for_each_entry_rcu(p, pos, &ip6addrlbl_table.head, list) { 179 if (__ip6addrlbl_match(net, p, addr, type, ifindex)) 180 return p; 181 } 182 return NULL; 183} 184 185u32 ipv6_addr_label(struct net *net, 186 const struct in6_addr *addr, int type, int ifindex) 187{ 188 u32 label; 189 struct ip6addrlbl_entry *p; 190 191 type &= IPV6_ADDR_MAPPED | IPV6_ADDR_COMPATv4 | IPV6_ADDR_LOOPBACK; 192 193 rcu_read_lock(); 194 p = __ipv6_addr_label(net, addr, type, ifindex); 195 label = p ? p->label : IPV6_ADDR_LABEL_DEFAULT; 196 rcu_read_unlock(); 197 198 ADDRLABEL(KERN_DEBUG "%s(addr=%pI6, type=%d, ifindex=%d) => %08x\n", 199 __func__, addr, type, ifindex, label); 200 201 return label; 202} 203 204/* allocate one entry */ 205static struct ip6addrlbl_entry *ip6addrlbl_alloc(struct net *net, 206 const struct in6_addr *prefix, 207 int prefixlen, int ifindex, 208 u32 label) 209{ 210 struct ip6addrlbl_entry *newp; 211 int addrtype; 212 213 ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d, label=%u)\n", 214 __func__, prefix, prefixlen, ifindex, (unsigned int)label); 215 216 addrtype = ipv6_addr_type(prefix) & (IPV6_ADDR_MAPPED | IPV6_ADDR_COMPATv4 | IPV6_ADDR_LOOPBACK); 217 218 switch (addrtype) { 219 case IPV6_ADDR_MAPPED: 220 if (prefixlen > 96) 221 return ERR_PTR(-EINVAL); 222 if (prefixlen < 96) 223 addrtype = 0; 224 break; 225 case IPV6_ADDR_COMPATv4: 226 if (prefixlen != 96) 227 addrtype = 0; 228 break; 229 case IPV6_ADDR_LOOPBACK: 230 if (prefixlen != 128) 231 addrtype = 0; 232 break; 233 } 234 235 newp = kmalloc(sizeof(*newp), GFP_KERNEL); 236 if (!newp) 237 return ERR_PTR(-ENOMEM); 238 239 ipv6_addr_prefix(&newp->prefix, prefix, prefixlen); 240 newp->prefixlen = prefixlen; 241 newp->ifindex = ifindex; 242 newp->addrtype = addrtype; 243 newp->label = label; 244 INIT_HLIST_NODE(&newp->list); 245#ifdef CONFIG_NET_NS 246 newp->lbl_net = hold_net(net); 247#endif 248 atomic_set(&newp->refcnt, 1); 249 return newp; 250} 251 252/* add a label */ 253static int __ip6addrlbl_add(struct ip6addrlbl_entry *newp, int replace) 254{ 255 int ret = 0; 256 257 ADDRLABEL(KERN_DEBUG "%s(newp=%p, replace=%d)\n", 258 __func__, 259 newp, replace); 260 261 if (hlist_empty(&ip6addrlbl_table.head)) { 262 hlist_add_head_rcu(&newp->list, &ip6addrlbl_table.head); 263 } else { 264 struct hlist_node *pos, *n; 265 struct ip6addrlbl_entry *p = NULL; 266 hlist_for_each_entry_safe(p, pos, n, 267 &ip6addrlbl_table.head, list) { 268 if (p->prefixlen == newp->prefixlen && 269 net_eq(ip6addrlbl_net(p), ip6addrlbl_net(newp)) && 270 p->ifindex == newp->ifindex && 271 ipv6_addr_equal(&p->prefix, &newp->prefix)) { 272 if (!replace) { 273 ret = -EEXIST; 274 goto out; 275 } 276 hlist_replace_rcu(&p->list, &newp->list); 277 ip6addrlbl_put(p); 278 goto out; 279 } else if ((p->prefixlen == newp->prefixlen && !p->ifindex) || 280 (p->prefixlen < newp->prefixlen)) { 281 hlist_add_before_rcu(&newp->list, &p->list); 282 goto out; 283 } 284 } 285 hlist_add_after_rcu(&p->list, &newp->list); 286 } 287out: 288 if (!ret) 289 ip6addrlbl_table.seq++; 290 return ret; 291} 292 293/* add a label */ 294static int ip6addrlbl_add(struct net *net, 295 const struct in6_addr *prefix, int prefixlen, 296 int ifindex, u32 label, int replace) 297{ 298 struct ip6addrlbl_entry *newp; 299 int ret = 0; 300 301 ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d, label=%u, replace=%d)\n", 302 __func__, prefix, prefixlen, ifindex, (unsigned int)label, 303 replace); 304 305 newp = ip6addrlbl_alloc(net, prefix, prefixlen, ifindex, label); 306 if (IS_ERR(newp)) 307 return PTR_ERR(newp); 308 spin_lock(&ip6addrlbl_table.lock); 309 ret = __ip6addrlbl_add(newp, replace); 310 spin_unlock(&ip6addrlbl_table.lock); 311 if (ret) 312 ip6addrlbl_free(newp); 313 return ret; 314} 315 316/* remove a label */ 317static int __ip6addrlbl_del(struct net *net, 318 const struct in6_addr *prefix, int prefixlen, 319 int ifindex) 320{ 321 struct ip6addrlbl_entry *p = NULL; 322 struct hlist_node *pos, *n; 323 int ret = -ESRCH; 324 325 ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d)\n", 326 __func__, prefix, prefixlen, ifindex); 327 328 hlist_for_each_entry_safe(p, pos, n, &ip6addrlbl_table.head, list) { 329 if (p->prefixlen == prefixlen && 330 net_eq(ip6addrlbl_net(p), net) && 331 p->ifindex == ifindex && 332 ipv6_addr_equal(&p->prefix, prefix)) { 333 hlist_del_rcu(&p->list); 334 ip6addrlbl_put(p); 335 ret = 0; 336 break; 337 } 338 } 339 return ret; 340} 341 342static int ip6addrlbl_del(struct net *net, 343 const struct in6_addr *prefix, int prefixlen, 344 int ifindex) 345{ 346 struct in6_addr prefix_buf; 347 int ret; 348 349 ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d)\n", 350 __func__, prefix, prefixlen, ifindex); 351 352 ipv6_addr_prefix(&prefix_buf, prefix, prefixlen); 353 spin_lock(&ip6addrlbl_table.lock); 354 ret = __ip6addrlbl_del(net, &prefix_buf, prefixlen, ifindex); 355 spin_unlock(&ip6addrlbl_table.lock); 356 return ret; 357} 358 359/* add default label */ 360static int __net_init ip6addrlbl_net_init(struct net *net) 361{ 362 int err = 0; 363 int i; 364 365 ADDRLABEL(KERN_DEBUG "%s\n", __func__); 366 367 for (i = 0; i < ARRAY_SIZE(ip6addrlbl_init_table); i++) { 368 int ret = ip6addrlbl_add(net, 369 ip6addrlbl_init_table[i].prefix, 370 ip6addrlbl_init_table[i].prefixlen, 371 0, 372 ip6addrlbl_init_table[i].label, 0); 373 /* XXX: should we free all rules when we catch an error? */ 374 if (ret && (!err || err != -ENOMEM)) 375 err = ret; 376 } 377 return err; 378} 379 380static void __net_exit ip6addrlbl_net_exit(struct net *net) 381{ 382 struct ip6addrlbl_entry *p = NULL; 383 struct hlist_node *pos, *n; 384 385 /* Remove all labels belonging to the exiting net */ 386 spin_lock(&ip6addrlbl_table.lock); 387 hlist_for_each_entry_safe(p, pos, n, &ip6addrlbl_table.head, list) { 388 if (net_eq(ip6addrlbl_net(p), net)) { 389 hlist_del_rcu(&p->list); 390 ip6addrlbl_put(p); 391 } 392 } 393 spin_unlock(&ip6addrlbl_table.lock); 394} 395 396static struct pernet_operations ipv6_addr_label_ops = { 397 .init = ip6addrlbl_net_init, 398 .exit = ip6addrlbl_net_exit, 399}; 400 401int __init ipv6_addr_label_init(void) 402{ 403 spin_lock_init(&ip6addrlbl_table.lock); 404 405 return register_pernet_subsys(&ipv6_addr_label_ops); 406} 407 408void ipv6_addr_label_cleanup(void) 409{ 410 unregister_pernet_subsys(&ipv6_addr_label_ops); 411} 412 413static const struct nla_policy ifal_policy[IFAL_MAX+1] = { 414 [IFAL_ADDRESS] = { .len = sizeof(struct in6_addr), }, 415 [IFAL_LABEL] = { .len = sizeof(u32), }, 416}; 417 418static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh, 419 void *arg) 420{ 421 struct net *net = sock_net(skb->sk); 422 struct ifaddrlblmsg *ifal; 423 struct nlattr *tb[IFAL_MAX+1]; 424 struct in6_addr *pfx; 425 u32 label; 426 int err = 0; 427 428 if (!capable(CAP_NET_ADMIN)) 429 return -EPERM; 430 431 err = nlmsg_parse(nlh, sizeof(*ifal), tb, IFAL_MAX, ifal_policy); 432 if (err < 0) 433 return err; 434 435 ifal = nlmsg_data(nlh); 436 437 if (ifal->ifal_family != AF_INET6 || 438 ifal->ifal_prefixlen > 128) 439 return -EINVAL; 440 441 if (!tb[IFAL_ADDRESS]) 442 return -EINVAL; 443 444 pfx = nla_data(tb[IFAL_ADDRESS]); 445 if (!pfx) 446 return -EINVAL; 447 448 if (!tb[IFAL_LABEL]) 449 return -EINVAL; 450 label = nla_get_u32(tb[IFAL_LABEL]); 451 if (label == IPV6_ADDR_LABEL_DEFAULT) 452 return -EINVAL; 453 454 switch(nlh->nlmsg_type) { 455 case RTM_NEWADDRLABEL: 456 if (ifal->ifal_index && 457 !__dev_get_by_index(net, ifal->ifal_index)) 458 return -EINVAL; 459 460 err = ip6addrlbl_add(net, pfx, ifal->ifal_prefixlen, 461 ifal->ifal_index, label, 462 nlh->nlmsg_flags & NLM_F_REPLACE); 463 break; 464 case RTM_DELADDRLABEL: 465 err = ip6addrlbl_del(net, pfx, ifal->ifal_prefixlen, 466 ifal->ifal_index); 467 break; 468 default: 469 err = -EOPNOTSUPP; 470 } 471 return err; 472} 473 474static void ip6addrlbl_putmsg(struct nlmsghdr *nlh, 475 int prefixlen, int ifindex, u32 lseq) 476{ 477 struct ifaddrlblmsg *ifal = nlmsg_data(nlh); 478 ifal->ifal_family = AF_INET6; 479 ifal->ifal_prefixlen = prefixlen; 480 ifal->ifal_flags = 0; 481 ifal->ifal_index = ifindex; 482 ifal->ifal_seq = lseq; 483}; 484 485static int ip6addrlbl_fill(struct sk_buff *skb, 486 struct ip6addrlbl_entry *p, 487 u32 lseq, 488 u32 portid, u32 seq, int event, 489 unsigned int flags) 490{ 491 struct nlmsghdr *nlh = nlmsg_put(skb, portid, seq, event, 492 sizeof(struct ifaddrlblmsg), flags); 493 if (!nlh) 494 return -EMSGSIZE; 495 496 ip6addrlbl_putmsg(nlh, p->prefixlen, p->ifindex, lseq); 497 498 if (nla_put(skb, IFAL_ADDRESS, 16, &p->prefix) < 0 || 499 nla_put_u32(skb, IFAL_LABEL, p->label) < 0) { 500 nlmsg_cancel(skb, nlh); 501 return -EMSGSIZE; 502 } 503 504 return nlmsg_end(skb, nlh); 505} 506 507static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb) 508{ 509 struct net *net = sock_net(skb->sk); 510 struct ip6addrlbl_entry *p; 511 struct hlist_node *pos; 512 int idx = 0, s_idx = cb->args[0]; 513 int err; 514 515 rcu_read_lock(); 516 hlist_for_each_entry_rcu(p, pos, &ip6addrlbl_table.head, list) { 517 if (idx >= s_idx && 518 net_eq(ip6addrlbl_net(p), net)) { 519 if ((err = ip6addrlbl_fill(skb, p, 520 ip6addrlbl_table.seq, 521 NETLINK_CB(cb->skb).portid, 522 cb->nlh->nlmsg_seq, 523 RTM_NEWADDRLABEL, 524 NLM_F_MULTI)) <= 0) 525 break; 526 } 527 idx++; 528 } 529 rcu_read_unlock(); 530 cb->args[0] = idx; 531 return skb->len; 532} 533 534static inline int ip6addrlbl_msgsize(void) 535{ 536 return NLMSG_ALIGN(sizeof(struct ifaddrlblmsg)) 537 + nla_total_size(16) /* IFAL_ADDRESS */ 538 + nla_total_size(4); /* IFAL_LABEL */ 539} 540 541static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh, 542 void *arg) 543{ 544 struct net *net = sock_net(in_skb->sk); 545 struct ifaddrlblmsg *ifal; 546 struct nlattr *tb[IFAL_MAX+1]; 547 struct in6_addr *addr; 548 u32 lseq; 549 int err = 0; 550 struct ip6addrlbl_entry *p; 551 struct sk_buff *skb; 552 553 err = nlmsg_parse(nlh, sizeof(*ifal), tb, IFAL_MAX, ifal_policy); 554 if (err < 0) 555 return err; 556 557 ifal = nlmsg_data(nlh); 558 559 if (ifal->ifal_family != AF_INET6 || 560 ifal->ifal_prefixlen != 128) 561 return -EINVAL; 562 563 if (ifal->ifal_index && 564 !__dev_get_by_index(net, ifal->ifal_index)) 565 return -EINVAL; 566 567 if (!tb[IFAL_ADDRESS]) 568 return -EINVAL; 569 570 addr = nla_data(tb[IFAL_ADDRESS]); 571 if (!addr) 572 return -EINVAL; 573 574 rcu_read_lock(); 575 p = __ipv6_addr_label(net, addr, ipv6_addr_type(addr), ifal->ifal_index); 576 if (p && ip6addrlbl_hold(p)) 577 p = NULL; 578 lseq = ip6addrlbl_table.seq; 579 rcu_read_unlock(); 580 581 if (!p) { 582 err = -ESRCH; 583 goto out; 584 } 585 586 if (!(skb = nlmsg_new(ip6addrlbl_msgsize(), GFP_KERNEL))) { 587 ip6addrlbl_put(p); 588 return -ENOBUFS; 589 } 590 591 err = ip6addrlbl_fill(skb, p, lseq, 592 NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, 593 RTM_NEWADDRLABEL, 0); 594 595 ip6addrlbl_put(p); 596 597 if (err < 0) { 598 WARN_ON(err == -EMSGSIZE); 599 kfree_skb(skb); 600 goto out; 601 } 602 603 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); 604out: 605 return err; 606} 607 608void __init ipv6_addr_label_rtnl_register(void) 609{ 610 __rtnl_register(PF_INET6, RTM_NEWADDRLABEL, ip6addrlbl_newdel, 611 NULL, NULL); 612 __rtnl_register(PF_INET6, RTM_DELADDRLABEL, ip6addrlbl_newdel, 613 NULL, NULL); 614 __rtnl_register(PF_INET6, RTM_GETADDRLABEL, ip6addrlbl_get, 615 ip6addrlbl_dump, NULL); 616} 617 618