addrlabel.c revision 3b1e0a655f8eba44ab1ee2a1068d169ccfb853b9
1/* 2 * IPv6 Address Label subsystem 3 * for the IPv6 "Default" Source Address Selection 4 * 5 * Copyright (C)2007 USAGI/WIDE Project 6 */ 7/* 8 * Author: 9 * YOSHIFUJI Hideaki @ USAGI/WIDE Project <yoshfuji@linux-ipv6.org> 10 */ 11 12#include <linux/kernel.h> 13#include <linux/list.h> 14#include <linux/rcupdate.h> 15#include <linux/in6.h> 16#include <net/addrconf.h> 17#include <linux/if_addrlabel.h> 18#include <linux/netlink.h> 19#include <linux/rtnetlink.h> 20 21#if 0 22#define ADDRLABEL(x...) printk(x) 23#else 24#define ADDRLABEL(x...) do { ; } while(0) 25#endif 26 27/* 28 * Policy Table 29 */ 30struct ip6addrlbl_entry 31{ 32 struct in6_addr prefix; 33 int prefixlen; 34 int ifindex; 35 int addrtype; 36 u32 label; 37 struct hlist_node list; 38 atomic_t refcnt; 39 struct rcu_head rcu; 40}; 41 42static struct ip6addrlbl_table 43{ 44 struct hlist_head head; 45 spinlock_t lock; 46 u32 seq; 47} ip6addrlbl_table; 48 49/* 50 * Default policy table (RFC3484 + extensions) 51 * 52 * prefix addr_type label 53 * ------------------------------------------------------------------------- 54 * ::1/128 LOOPBACK 0 55 * ::/0 N/A 1 56 * 2002::/16 N/A 2 57 * ::/96 COMPATv4 3 58 * ::ffff:0:0/96 V4MAPPED 4 59 * fc00::/7 N/A 5 ULA (RFC 4193) 60 * 2001::/32 N/A 6 Teredo (RFC 4380) 61 * 2001:10::/28 N/A 7 ORCHID (RFC 4843) 62 * 63 * Note: 0xffffffff is used if we do not have any policies. 64 */ 65 66#define IPV6_ADDR_LABEL_DEFAULT 0xffffffffUL 67 68static const __initdata struct ip6addrlbl_init_table 69{ 70 const struct in6_addr *prefix; 71 int prefixlen; 72 u32 label; 73} ip6addrlbl_init_table[] = { 74 { /* ::/0 */ 75 .prefix = &in6addr_any, 76 .label = 1, 77 },{ /* fc00::/7 */ 78 .prefix = &(struct in6_addr){{{ 0xfc }}}, 79 .prefixlen = 7, 80 .label = 5, 81 },{ /* 2002::/16 */ 82 .prefix = &(struct in6_addr){{{ 0x20, 0x02 }}}, 83 .prefixlen = 16, 84 .label = 2, 85 },{ /* 2001::/32 */ 86 .prefix = &(struct in6_addr){{{ 0x20, 0x01 }}}, 87 .prefixlen = 32, 88 .label = 6, 89 },{ /* 2001:10::/28 */ 90 .prefix = &(struct in6_addr){{{ 0x20, 0x01, 0x00, 0x10 }}}, 91 .prefixlen = 28, 92 .label = 7, 93 },{ /* ::ffff:0:0 */ 94 .prefix = &(struct in6_addr){{{ [10] = 0xff, [11] = 0xff }}}, 95 .prefixlen = 96, 96 .label = 4, 97 },{ /* ::/96 */ 98 .prefix = &in6addr_any, 99 .prefixlen = 96, 100 .label = 3, 101 },{ /* ::1/128 */ 102 .prefix = &in6addr_loopback, 103 .prefixlen = 128, 104 .label = 0, 105 } 106}; 107 108/* Object management */ 109static inline void ip6addrlbl_free(struct ip6addrlbl_entry *p) 110{ 111 kfree(p); 112} 113 114static void ip6addrlbl_free_rcu(struct rcu_head *h) 115{ 116 ip6addrlbl_free(container_of(h, struct ip6addrlbl_entry, rcu)); 117} 118 119static inline int ip6addrlbl_hold(struct ip6addrlbl_entry *p) 120{ 121 return atomic_inc_not_zero(&p->refcnt); 122} 123 124static inline void ip6addrlbl_put(struct ip6addrlbl_entry *p) 125{ 126 if (atomic_dec_and_test(&p->refcnt)) 127 call_rcu(&p->rcu, ip6addrlbl_free_rcu); 128} 129 130/* Find label */ 131static int __ip6addrlbl_match(struct ip6addrlbl_entry *p, 132 const struct in6_addr *addr, 133 int addrtype, int ifindex) 134{ 135 if (p->ifindex && p->ifindex != ifindex) 136 return 0; 137 if (p->addrtype && p->addrtype != addrtype) 138 return 0; 139 if (!ipv6_prefix_equal(addr, &p->prefix, p->prefixlen)) 140 return 0; 141 return 1; 142} 143 144static struct ip6addrlbl_entry *__ipv6_addr_label(const struct in6_addr *addr, 145 int type, int ifindex) 146{ 147 struct hlist_node *pos; 148 struct ip6addrlbl_entry *p; 149 hlist_for_each_entry_rcu(p, pos, &ip6addrlbl_table.head, list) { 150 if (__ip6addrlbl_match(p, addr, type, ifindex)) 151 return p; 152 } 153 return NULL; 154} 155 156u32 ipv6_addr_label(const struct in6_addr *addr, int type, int ifindex) 157{ 158 u32 label; 159 struct ip6addrlbl_entry *p; 160 161 type &= IPV6_ADDR_MAPPED | IPV6_ADDR_COMPATv4 | IPV6_ADDR_LOOPBACK; 162 163 rcu_read_lock(); 164 p = __ipv6_addr_label(addr, type, ifindex); 165 label = p ? p->label : IPV6_ADDR_LABEL_DEFAULT; 166 rcu_read_unlock(); 167 168 ADDRLABEL(KERN_DEBUG "%s(addr=" NIP6_FMT ", type=%d, ifindex=%d) => %08x\n", 169 __func__, 170 NIP6(*addr), type, ifindex, 171 label); 172 173 return label; 174} 175 176/* allocate one entry */ 177static struct ip6addrlbl_entry *ip6addrlbl_alloc(const struct in6_addr *prefix, 178 int prefixlen, int ifindex, 179 u32 label) 180{ 181 struct ip6addrlbl_entry *newp; 182 int addrtype; 183 184 ADDRLABEL(KERN_DEBUG "%s(prefix=" NIP6_FMT ", prefixlen=%d, ifindex=%d, label=%u)\n", 185 __func__, 186 NIP6(*prefix), prefixlen, 187 ifindex, 188 (unsigned int)label); 189 190 addrtype = ipv6_addr_type(prefix) & (IPV6_ADDR_MAPPED | IPV6_ADDR_COMPATv4 | IPV6_ADDR_LOOPBACK); 191 192 switch (addrtype) { 193 case IPV6_ADDR_MAPPED: 194 if (prefixlen > 96) 195 return ERR_PTR(-EINVAL); 196 if (prefixlen < 96) 197 addrtype = 0; 198 break; 199 case IPV6_ADDR_COMPATv4: 200 if (prefixlen != 96) 201 addrtype = 0; 202 break; 203 case IPV6_ADDR_LOOPBACK: 204 if (prefixlen != 128) 205 addrtype = 0; 206 break; 207 } 208 209 newp = kmalloc(sizeof(*newp), GFP_KERNEL); 210 if (!newp) 211 return ERR_PTR(-ENOMEM); 212 213 ipv6_addr_prefix(&newp->prefix, prefix, prefixlen); 214 newp->prefixlen = prefixlen; 215 newp->ifindex = ifindex; 216 newp->addrtype = addrtype; 217 newp->label = label; 218 INIT_HLIST_NODE(&newp->list); 219 atomic_set(&newp->refcnt, 1); 220 return newp; 221} 222 223/* add a label */ 224static int __ip6addrlbl_add(struct ip6addrlbl_entry *newp, int replace) 225{ 226 int ret = 0; 227 228 ADDRLABEL(KERN_DEBUG "%s(newp=%p, replace=%d)\n", 229 __func__, 230 newp, replace); 231 232 if (hlist_empty(&ip6addrlbl_table.head)) { 233 hlist_add_head_rcu(&newp->list, &ip6addrlbl_table.head); 234 } else { 235 struct hlist_node *pos, *n; 236 struct ip6addrlbl_entry *p = NULL; 237 hlist_for_each_entry_safe(p, pos, n, 238 &ip6addrlbl_table.head, list) { 239 if (p->prefixlen == newp->prefixlen && 240 p->ifindex == newp->ifindex && 241 ipv6_addr_equal(&p->prefix, &newp->prefix)) { 242 if (!replace) { 243 ret = -EEXIST; 244 goto out; 245 } 246 hlist_replace_rcu(&p->list, &newp->list); 247 ip6addrlbl_put(p); 248 goto out; 249 } else if ((p->prefixlen == newp->prefixlen && !p->ifindex) || 250 (p->prefixlen < newp->prefixlen)) { 251 hlist_add_before_rcu(&newp->list, &p->list); 252 goto out; 253 } 254 } 255 hlist_add_after_rcu(&p->list, &newp->list); 256 } 257out: 258 if (!ret) 259 ip6addrlbl_table.seq++; 260 return ret; 261} 262 263/* add a label */ 264static int ip6addrlbl_add(const struct in6_addr *prefix, int prefixlen, 265 int ifindex, u32 label, int replace) 266{ 267 struct ip6addrlbl_entry *newp; 268 int ret = 0; 269 270 ADDRLABEL(KERN_DEBUG "%s(prefix=" NIP6_FMT ", prefixlen=%d, ifindex=%d, label=%u, replace=%d)\n", 271 __func__, 272 NIP6(*prefix), prefixlen, 273 ifindex, 274 (unsigned int)label, 275 replace); 276 277 newp = ip6addrlbl_alloc(prefix, prefixlen, ifindex, label); 278 if (IS_ERR(newp)) 279 return PTR_ERR(newp); 280 spin_lock(&ip6addrlbl_table.lock); 281 ret = __ip6addrlbl_add(newp, replace); 282 spin_unlock(&ip6addrlbl_table.lock); 283 if (ret) 284 ip6addrlbl_free(newp); 285 return ret; 286} 287 288/* remove a label */ 289static int __ip6addrlbl_del(const struct in6_addr *prefix, int prefixlen, 290 int ifindex) 291{ 292 struct ip6addrlbl_entry *p = NULL; 293 struct hlist_node *pos, *n; 294 int ret = -ESRCH; 295 296 ADDRLABEL(KERN_DEBUG "%s(prefix=" NIP6_FMT ", prefixlen=%d, ifindex=%d)\n", 297 __func__, 298 NIP6(*prefix), prefixlen, 299 ifindex); 300 301 hlist_for_each_entry_safe(p, pos, n, &ip6addrlbl_table.head, list) { 302 if (p->prefixlen == prefixlen && 303 p->ifindex == ifindex && 304 ipv6_addr_equal(&p->prefix, prefix)) { 305 hlist_del_rcu(&p->list); 306 ip6addrlbl_put(p); 307 ret = 0; 308 break; 309 } 310 } 311 return ret; 312} 313 314static int ip6addrlbl_del(const struct in6_addr *prefix, int prefixlen, 315 int ifindex) 316{ 317 struct in6_addr prefix_buf; 318 int ret; 319 320 ADDRLABEL(KERN_DEBUG "%s(prefix=" NIP6_FMT ", prefixlen=%d, ifindex=%d)\n", 321 __func__, 322 NIP6(*prefix), prefixlen, 323 ifindex); 324 325 ipv6_addr_prefix(&prefix_buf, prefix, prefixlen); 326 spin_lock(&ip6addrlbl_table.lock); 327 ret = __ip6addrlbl_del(&prefix_buf, prefixlen, ifindex); 328 spin_unlock(&ip6addrlbl_table.lock); 329 return ret; 330} 331 332/* add default label */ 333static __init int ip6addrlbl_init(void) 334{ 335 int err = 0; 336 int i; 337 338 ADDRLABEL(KERN_DEBUG "%s()\n", __func__); 339 340 for (i = 0; i < ARRAY_SIZE(ip6addrlbl_init_table); i++) { 341 int ret = ip6addrlbl_add(ip6addrlbl_init_table[i].prefix, 342 ip6addrlbl_init_table[i].prefixlen, 343 0, 344 ip6addrlbl_init_table[i].label, 0); 345 /* XXX: should we free all rules when we catch an error? */ 346 if (ret && (!err || err != -ENOMEM)) 347 err = ret; 348 } 349 return err; 350} 351 352int __init ipv6_addr_label_init(void) 353{ 354 spin_lock_init(&ip6addrlbl_table.lock); 355 356 return ip6addrlbl_init(); 357} 358 359static const struct nla_policy ifal_policy[IFAL_MAX+1] = { 360 [IFAL_ADDRESS] = { .len = sizeof(struct in6_addr), }, 361 [IFAL_LABEL] = { .len = sizeof(u32), }, 362}; 363 364static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh, 365 void *arg) 366{ 367 struct net *net = sock_net(skb->sk); 368 struct ifaddrlblmsg *ifal; 369 struct nlattr *tb[IFAL_MAX+1]; 370 struct in6_addr *pfx; 371 u32 label; 372 int err = 0; 373 374 if (net != &init_net) 375 return 0; 376 377 err = nlmsg_parse(nlh, sizeof(*ifal), tb, IFAL_MAX, ifal_policy); 378 if (err < 0) 379 return err; 380 381 ifal = nlmsg_data(nlh); 382 383 if (ifal->ifal_family != AF_INET6 || 384 ifal->ifal_prefixlen > 128) 385 return -EINVAL; 386 387 if (ifal->ifal_index && 388 !__dev_get_by_index(&init_net, ifal->ifal_index)) 389 return -EINVAL; 390 391 if (!tb[IFAL_ADDRESS]) 392 return -EINVAL; 393 394 pfx = nla_data(tb[IFAL_ADDRESS]); 395 if (!pfx) 396 return -EINVAL; 397 398 if (!tb[IFAL_LABEL]) 399 return -EINVAL; 400 label = nla_get_u32(tb[IFAL_LABEL]); 401 if (label == IPV6_ADDR_LABEL_DEFAULT) 402 return -EINVAL; 403 404 switch(nlh->nlmsg_type) { 405 case RTM_NEWADDRLABEL: 406 err = ip6addrlbl_add(pfx, ifal->ifal_prefixlen, 407 ifal->ifal_index, label, 408 nlh->nlmsg_flags & NLM_F_REPLACE); 409 break; 410 case RTM_DELADDRLABEL: 411 err = ip6addrlbl_del(pfx, ifal->ifal_prefixlen, 412 ifal->ifal_index); 413 break; 414 default: 415 err = -EOPNOTSUPP; 416 } 417 return err; 418} 419 420static inline void ip6addrlbl_putmsg(struct nlmsghdr *nlh, 421 int prefixlen, int ifindex, u32 lseq) 422{ 423 struct ifaddrlblmsg *ifal = nlmsg_data(nlh); 424 ifal->ifal_family = AF_INET6; 425 ifal->ifal_prefixlen = prefixlen; 426 ifal->ifal_flags = 0; 427 ifal->ifal_index = ifindex; 428 ifal->ifal_seq = lseq; 429}; 430 431static int ip6addrlbl_fill(struct sk_buff *skb, 432 struct ip6addrlbl_entry *p, 433 u32 lseq, 434 u32 pid, u32 seq, int event, 435 unsigned int flags) 436{ 437 struct nlmsghdr *nlh = nlmsg_put(skb, pid, seq, event, 438 sizeof(struct ifaddrlblmsg), flags); 439 if (!nlh) 440 return -EMSGSIZE; 441 442 ip6addrlbl_putmsg(nlh, p->prefixlen, p->ifindex, lseq); 443 444 if (nla_put(skb, IFAL_ADDRESS, 16, &p->prefix) < 0 || 445 nla_put_u32(skb, IFAL_LABEL, p->label) < 0) { 446 nlmsg_cancel(skb, nlh); 447 return -EMSGSIZE; 448 } 449 450 return nlmsg_end(skb, nlh); 451} 452 453static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb) 454{ 455 struct net *net = sock_net(skb->sk); 456 struct ip6addrlbl_entry *p; 457 struct hlist_node *pos; 458 int idx = 0, s_idx = cb->args[0]; 459 int err; 460 461 if (net != &init_net) 462 return 0; 463 464 rcu_read_lock(); 465 hlist_for_each_entry_rcu(p, pos, &ip6addrlbl_table.head, list) { 466 if (idx >= s_idx) { 467 if ((err = ip6addrlbl_fill(skb, p, 468 ip6addrlbl_table.seq, 469 NETLINK_CB(cb->skb).pid, 470 cb->nlh->nlmsg_seq, 471 RTM_NEWADDRLABEL, 472 NLM_F_MULTI)) <= 0) 473 break; 474 } 475 idx++; 476 } 477 rcu_read_unlock(); 478 cb->args[0] = idx; 479 return skb->len; 480} 481 482static inline int ip6addrlbl_msgsize(void) 483{ 484 return (NLMSG_ALIGN(sizeof(struct ifaddrlblmsg)) 485 + nla_total_size(16) /* IFAL_ADDRESS */ 486 + nla_total_size(4) /* IFAL_LABEL */ 487 ); 488} 489 490static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh, 491 void *arg) 492{ 493 struct net *net = sock_net(in_skb->sk); 494 struct ifaddrlblmsg *ifal; 495 struct nlattr *tb[IFAL_MAX+1]; 496 struct in6_addr *addr; 497 u32 lseq; 498 int err = 0; 499 struct ip6addrlbl_entry *p; 500 struct sk_buff *skb; 501 502 if (net != &init_net) 503 return 0; 504 505 err = nlmsg_parse(nlh, sizeof(*ifal), tb, IFAL_MAX, ifal_policy); 506 if (err < 0) 507 return err; 508 509 ifal = nlmsg_data(nlh); 510 511 if (ifal->ifal_family != AF_INET6 || 512 ifal->ifal_prefixlen != 128) 513 return -EINVAL; 514 515 if (ifal->ifal_index && 516 !__dev_get_by_index(&init_net, ifal->ifal_index)) 517 return -EINVAL; 518 519 if (!tb[IFAL_ADDRESS]) 520 return -EINVAL; 521 522 addr = nla_data(tb[IFAL_ADDRESS]); 523 if (!addr) 524 return -EINVAL; 525 526 rcu_read_lock(); 527 p = __ipv6_addr_label(addr, ipv6_addr_type(addr), ifal->ifal_index); 528 if (p && ip6addrlbl_hold(p)) 529 p = NULL; 530 lseq = ip6addrlbl_table.seq; 531 rcu_read_unlock(); 532 533 if (!p) { 534 err = -ESRCH; 535 goto out; 536 } 537 538 if (!(skb = nlmsg_new(ip6addrlbl_msgsize(), GFP_KERNEL))) { 539 ip6addrlbl_put(p); 540 return -ENOBUFS; 541 } 542 543 err = ip6addrlbl_fill(skb, p, lseq, 544 NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, 545 RTM_NEWADDRLABEL, 0); 546 547 ip6addrlbl_put(p); 548 549 if (err < 0) { 550 WARN_ON(err == -EMSGSIZE); 551 kfree_skb(skb); 552 goto out; 553 } 554 555 err = rtnl_unicast(skb, &init_net, NETLINK_CB(in_skb).pid); 556out: 557 return err; 558} 559 560void __init ipv6_addr_label_rtnl_register(void) 561{ 562 __rtnl_register(PF_INET6, RTM_NEWADDRLABEL, ip6addrlbl_newdel, NULL); 563 __rtnl_register(PF_INET6, RTM_DELADDRLABEL, ip6addrlbl_newdel, NULL); 564 __rtnl_register(PF_INET6, RTM_GETADDRLABEL, ip6addrlbl_get, ip6addrlbl_dump); 565} 566 567