fib_frontend.c revision 0c838ff1ade71162775afffd9e5c6478a60bdca6
1/* 2 * INET An implementation of the TCP/IP protocol suite for the LINUX 3 * operating system. INET is implemented using the BSD Socket 4 * interface as the means of communication with the user level. 5 * 6 * IPv4 Forwarding Information Base: FIB frontend. 7 * 8 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 9 * 10 * This program is free software; you can redistribute it and/or 11 * modify it under the terms of the GNU General Public License 12 * as published by the Free Software Foundation; either version 13 * 2 of the License, or (at your option) any later version. 14 */ 15 16#include <linux/module.h> 17#include <asm/uaccess.h> 18#include <asm/system.h> 19#include <linux/bitops.h> 20#include <linux/capability.h> 21#include <linux/types.h> 22#include <linux/kernel.h> 23#include <linux/mm.h> 24#include <linux/string.h> 25#include <linux/socket.h> 26#include <linux/sockios.h> 27#include <linux/errno.h> 28#include <linux/in.h> 29#include <linux/inet.h> 30#include <linux/inetdevice.h> 31#include <linux/netdevice.h> 32#include <linux/if_addr.h> 33#include <linux/if_arp.h> 34#include <linux/skbuff.h> 35#include <linux/init.h> 36#include <linux/list.h> 37#include <linux/slab.h> 38 39#include <net/ip.h> 40#include <net/protocol.h> 41#include <net/route.h> 42#include <net/tcp.h> 43#include <net/sock.h> 44#include <net/arp.h> 45#include <net/ip_fib.h> 46#include <net/rtnetlink.h> 47 48#ifndef CONFIG_IP_MULTIPLE_TABLES 49 50static int __net_init fib4_rules_init(struct net *net) 51{ 52 struct fib_table *local_table, *main_table; 53 54 local_table = fib_hash_table(RT_TABLE_LOCAL); 55 if (local_table == NULL) 56 return -ENOMEM; 57 58 main_table = fib_hash_table(RT_TABLE_MAIN); 59 if (main_table == NULL) 60 goto fail; 61 62 hlist_add_head_rcu(&local_table->tb_hlist, 63 &net->ipv4.fib_table_hash[TABLE_LOCAL_INDEX]); 64 hlist_add_head_rcu(&main_table->tb_hlist, 65 &net->ipv4.fib_table_hash[TABLE_MAIN_INDEX]); 66 return 0; 67 68fail: 69 kfree(local_table); 70 return -ENOMEM; 71} 72#else 73 74struct fib_table *fib_new_table(struct net *net, u32 id) 75{ 76 struct fib_table *tb; 77 unsigned int h; 78 79 if (id == 0) 80 id = RT_TABLE_MAIN; 81 tb = fib_get_table(net, id); 82 if (tb) 83 return tb; 84 85 tb = fib_hash_table(id); 86 if (!tb) 87 return NULL; 88 h = id & (FIB_TABLE_HASHSZ - 1); 89 hlist_add_head_rcu(&tb->tb_hlist, &net->ipv4.fib_table_hash[h]); 90 return tb; 91} 92 93struct fib_table *fib_get_table(struct net *net, u32 id) 94{ 95 struct fib_table *tb; 96 struct hlist_node *node; 97 struct hlist_head *head; 98 unsigned int h; 99 100 if (id == 0) 101 id = RT_TABLE_MAIN; 102 h = id & (FIB_TABLE_HASHSZ - 1); 103 104 rcu_read_lock(); 105 head = &net->ipv4.fib_table_hash[h]; 106 hlist_for_each_entry_rcu(tb, node, head, tb_hlist) { 107 if (tb->tb_id == id) { 108 rcu_read_unlock(); 109 return tb; 110 } 111 } 112 rcu_read_unlock(); 113 return NULL; 114} 115#endif /* CONFIG_IP_MULTIPLE_TABLES */ 116 117static void fib_flush(struct net *net) 118{ 119 int flushed = 0; 120 struct fib_table *tb; 121 struct hlist_node *node; 122 struct hlist_head *head; 123 unsigned int h; 124 125 for (h = 0; h < FIB_TABLE_HASHSZ; h++) { 126 head = &net->ipv4.fib_table_hash[h]; 127 hlist_for_each_entry(tb, node, head, tb_hlist) 128 flushed += fib_table_flush(tb); 129 } 130 131 if (flushed) 132 rt_cache_flush(net, -1); 133} 134 135/** 136 * __ip_dev_find - find the first device with a given source address. 137 * @net: the net namespace 138 * @addr: the source address 139 * @devref: if true, take a reference on the found device 140 * 141 * If a caller uses devref=false, it should be protected by RCU, or RTNL 142 */ 143struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref) 144{ 145 struct flowi fl = { 146 .fl4_dst = addr, 147 }; 148 struct fib_result res = { 0 }; 149 struct net_device *dev = NULL; 150 struct fib_table *local_table; 151 152#ifdef CONFIG_IP_MULTIPLE_TABLES 153 res.r = NULL; 154#endif 155 156 rcu_read_lock(); 157 local_table = fib_get_table(net, RT_TABLE_LOCAL); 158 if (!local_table || 159 fib_table_lookup(local_table, &fl, &res, FIB_LOOKUP_NOREF)) { 160 rcu_read_unlock(); 161 return NULL; 162 } 163 if (res.type != RTN_LOCAL) 164 goto out; 165 dev = FIB_RES_DEV(res); 166 167 if (dev && devref) 168 dev_hold(dev); 169out: 170 rcu_read_unlock(); 171 return dev; 172} 173EXPORT_SYMBOL(__ip_dev_find); 174 175/* 176 * Find address type as if only "dev" was present in the system. If 177 * on_dev is NULL then all interfaces are taken into consideration. 178 */ 179static inline unsigned __inet_dev_addr_type(struct net *net, 180 const struct net_device *dev, 181 __be32 addr) 182{ 183 struct flowi fl = { .fl4_dst = addr }; 184 struct fib_result res; 185 unsigned ret = RTN_BROADCAST; 186 struct fib_table *local_table; 187 188 if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr)) 189 return RTN_BROADCAST; 190 if (ipv4_is_multicast(addr)) 191 return RTN_MULTICAST; 192 193#ifdef CONFIG_IP_MULTIPLE_TABLES 194 res.r = NULL; 195#endif 196 197 local_table = fib_get_table(net, RT_TABLE_LOCAL); 198 if (local_table) { 199 ret = RTN_UNICAST; 200 rcu_read_lock(); 201 if (!fib_table_lookup(local_table, &fl, &res, FIB_LOOKUP_NOREF)) { 202 if (!dev || dev == res.fi->fib_dev) 203 ret = res.type; 204 } 205 rcu_read_unlock(); 206 } 207 return ret; 208} 209 210unsigned int inet_addr_type(struct net *net, __be32 addr) 211{ 212 return __inet_dev_addr_type(net, NULL, addr); 213} 214EXPORT_SYMBOL(inet_addr_type); 215 216unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev, 217 __be32 addr) 218{ 219 return __inet_dev_addr_type(net, dev, addr); 220} 221EXPORT_SYMBOL(inet_dev_addr_type); 222 223/* Given (packet source, input interface) and optional (dst, oif, tos): 224 * - (main) check, that source is valid i.e. not broadcast or our local 225 * address. 226 * - figure out what "logical" interface this packet arrived 227 * and calculate "specific destination" address. 228 * - check, that packet arrived from expected physical interface. 229 * called with rcu_read_lock() 230 */ 231int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, 232 struct net_device *dev, __be32 *spec_dst, 233 u32 *itag, u32 mark) 234{ 235 struct in_device *in_dev; 236 struct flowi fl = { 237 .fl4_dst = src, 238 .fl4_src = dst, 239 .fl4_tos = tos, 240 .mark = mark, 241 .iif = oif 242 }; 243 struct fib_result res; 244 int no_addr, rpf, accept_local; 245 bool dev_match; 246 int ret; 247 struct net *net; 248 249 no_addr = rpf = accept_local = 0; 250 in_dev = __in_dev_get_rcu(dev); 251 if (in_dev) { 252 no_addr = in_dev->ifa_list == NULL; 253 rpf = IN_DEV_RPFILTER(in_dev); 254 accept_local = IN_DEV_ACCEPT_LOCAL(in_dev); 255 if (mark && !IN_DEV_SRC_VMARK(in_dev)) 256 fl.mark = 0; 257 } 258 259 if (in_dev == NULL) 260 goto e_inval; 261 262 net = dev_net(dev); 263 if (fib_lookup(net, &fl, &res)) 264 goto last_resort; 265 if (res.type != RTN_UNICAST) { 266 if (res.type != RTN_LOCAL || !accept_local) 267 goto e_inval; 268 } 269 *spec_dst = FIB_RES_PREFSRC(res); 270 fib_combine_itag(itag, &res); 271 dev_match = false; 272 273#ifdef CONFIG_IP_ROUTE_MULTIPATH 274 for (ret = 0; ret < res.fi->fib_nhs; ret++) { 275 struct fib_nh *nh = &res.fi->fib_nh[ret]; 276 277 if (nh->nh_dev == dev) { 278 dev_match = true; 279 break; 280 } 281 } 282#else 283 if (FIB_RES_DEV(res) == dev) 284 dev_match = true; 285#endif 286 if (dev_match) { 287 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST; 288 return ret; 289 } 290 if (no_addr) 291 goto last_resort; 292 if (rpf == 1) 293 goto e_rpf; 294 fl.oif = dev->ifindex; 295 296 ret = 0; 297 if (fib_lookup(net, &fl, &res) == 0) { 298 if (res.type == RTN_UNICAST) { 299 *spec_dst = FIB_RES_PREFSRC(res); 300 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST; 301 } 302 } 303 return ret; 304 305last_resort: 306 if (rpf) 307 goto e_rpf; 308 *spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE); 309 *itag = 0; 310 return 0; 311 312e_inval: 313 return -EINVAL; 314e_rpf: 315 return -EXDEV; 316} 317 318static inline __be32 sk_extract_addr(struct sockaddr *addr) 319{ 320 return ((struct sockaddr_in *) addr)->sin_addr.s_addr; 321} 322 323static int put_rtax(struct nlattr *mx, int len, int type, u32 value) 324{ 325 struct nlattr *nla; 326 327 nla = (struct nlattr *) ((char *) mx + len); 328 nla->nla_type = type; 329 nla->nla_len = nla_attr_size(4); 330 *(u32 *) nla_data(nla) = value; 331 332 return len + nla_total_size(4); 333} 334 335static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt, 336 struct fib_config *cfg) 337{ 338 __be32 addr; 339 int plen; 340 341 memset(cfg, 0, sizeof(*cfg)); 342 cfg->fc_nlinfo.nl_net = net; 343 344 if (rt->rt_dst.sa_family != AF_INET) 345 return -EAFNOSUPPORT; 346 347 /* 348 * Check mask for validity: 349 * a) it must be contiguous. 350 * b) destination must have all host bits clear. 351 * c) if application forgot to set correct family (AF_INET), 352 * reject request unless it is absolutely clear i.e. 353 * both family and mask are zero. 354 */ 355 plen = 32; 356 addr = sk_extract_addr(&rt->rt_dst); 357 if (!(rt->rt_flags & RTF_HOST)) { 358 __be32 mask = sk_extract_addr(&rt->rt_genmask); 359 360 if (rt->rt_genmask.sa_family != AF_INET) { 361 if (mask || rt->rt_genmask.sa_family) 362 return -EAFNOSUPPORT; 363 } 364 365 if (bad_mask(mask, addr)) 366 return -EINVAL; 367 368 plen = inet_mask_len(mask); 369 } 370 371 cfg->fc_dst_len = plen; 372 cfg->fc_dst = addr; 373 374 if (cmd != SIOCDELRT) { 375 cfg->fc_nlflags = NLM_F_CREATE; 376 cfg->fc_protocol = RTPROT_BOOT; 377 } 378 379 if (rt->rt_metric) 380 cfg->fc_priority = rt->rt_metric - 1; 381 382 if (rt->rt_flags & RTF_REJECT) { 383 cfg->fc_scope = RT_SCOPE_HOST; 384 cfg->fc_type = RTN_UNREACHABLE; 385 return 0; 386 } 387 388 cfg->fc_scope = RT_SCOPE_NOWHERE; 389 cfg->fc_type = RTN_UNICAST; 390 391 if (rt->rt_dev) { 392 char *colon; 393 struct net_device *dev; 394 char devname[IFNAMSIZ]; 395 396 if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1)) 397 return -EFAULT; 398 399 devname[IFNAMSIZ-1] = 0; 400 colon = strchr(devname, ':'); 401 if (colon) 402 *colon = 0; 403 dev = __dev_get_by_name(net, devname); 404 if (!dev) 405 return -ENODEV; 406 cfg->fc_oif = dev->ifindex; 407 if (colon) { 408 struct in_ifaddr *ifa; 409 struct in_device *in_dev = __in_dev_get_rtnl(dev); 410 if (!in_dev) 411 return -ENODEV; 412 *colon = ':'; 413 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) 414 if (strcmp(ifa->ifa_label, devname) == 0) 415 break; 416 if (ifa == NULL) 417 return -ENODEV; 418 cfg->fc_prefsrc = ifa->ifa_local; 419 } 420 } 421 422 addr = sk_extract_addr(&rt->rt_gateway); 423 if (rt->rt_gateway.sa_family == AF_INET && addr) { 424 cfg->fc_gw = addr; 425 if (rt->rt_flags & RTF_GATEWAY && 426 inet_addr_type(net, addr) == RTN_UNICAST) 427 cfg->fc_scope = RT_SCOPE_UNIVERSE; 428 } 429 430 if (cmd == SIOCDELRT) 431 return 0; 432 433 if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw) 434 return -EINVAL; 435 436 if (cfg->fc_scope == RT_SCOPE_NOWHERE) 437 cfg->fc_scope = RT_SCOPE_LINK; 438 439 if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) { 440 struct nlattr *mx; 441 int len = 0; 442 443 mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL); 444 if (mx == NULL) 445 return -ENOMEM; 446 447 if (rt->rt_flags & RTF_MTU) 448 len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40); 449 450 if (rt->rt_flags & RTF_WINDOW) 451 len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window); 452 453 if (rt->rt_flags & RTF_IRTT) 454 len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3); 455 456 cfg->fc_mx = mx; 457 cfg->fc_mx_len = len; 458 } 459 460 return 0; 461} 462 463/* 464 * Handle IP routing ioctl calls. 465 * These are used to manipulate the routing tables 466 */ 467int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg) 468{ 469 struct fib_config cfg; 470 struct rtentry rt; 471 int err; 472 473 switch (cmd) { 474 case SIOCADDRT: /* Add a route */ 475 case SIOCDELRT: /* Delete a route */ 476 if (!capable(CAP_NET_ADMIN)) 477 return -EPERM; 478 479 if (copy_from_user(&rt, arg, sizeof(rt))) 480 return -EFAULT; 481 482 rtnl_lock(); 483 err = rtentry_to_fib_config(net, cmd, &rt, &cfg); 484 if (err == 0) { 485 struct fib_table *tb; 486 487 if (cmd == SIOCDELRT) { 488 tb = fib_get_table(net, cfg.fc_table); 489 if (tb) 490 err = fib_table_delete(tb, &cfg); 491 else 492 err = -ESRCH; 493 } else { 494 tb = fib_new_table(net, cfg.fc_table); 495 if (tb) 496 err = fib_table_insert(tb, &cfg); 497 else 498 err = -ENOBUFS; 499 } 500 501 /* allocated by rtentry_to_fib_config() */ 502 kfree(cfg.fc_mx); 503 } 504 rtnl_unlock(); 505 return err; 506 } 507 return -EINVAL; 508} 509 510const struct nla_policy rtm_ipv4_policy[RTA_MAX + 1] = { 511 [RTA_DST] = { .type = NLA_U32 }, 512 [RTA_SRC] = { .type = NLA_U32 }, 513 [RTA_IIF] = { .type = NLA_U32 }, 514 [RTA_OIF] = { .type = NLA_U32 }, 515 [RTA_GATEWAY] = { .type = NLA_U32 }, 516 [RTA_PRIORITY] = { .type = NLA_U32 }, 517 [RTA_PREFSRC] = { .type = NLA_U32 }, 518 [RTA_METRICS] = { .type = NLA_NESTED }, 519 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) }, 520 [RTA_FLOW] = { .type = NLA_U32 }, 521}; 522 523static int rtm_to_fib_config(struct net *net, struct sk_buff *skb, 524 struct nlmsghdr *nlh, struct fib_config *cfg) 525{ 526 struct nlattr *attr; 527 int err, remaining; 528 struct rtmsg *rtm; 529 530 err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy); 531 if (err < 0) 532 goto errout; 533 534 memset(cfg, 0, sizeof(*cfg)); 535 536 rtm = nlmsg_data(nlh); 537 cfg->fc_dst_len = rtm->rtm_dst_len; 538 cfg->fc_tos = rtm->rtm_tos; 539 cfg->fc_table = rtm->rtm_table; 540 cfg->fc_protocol = rtm->rtm_protocol; 541 cfg->fc_scope = rtm->rtm_scope; 542 cfg->fc_type = rtm->rtm_type; 543 cfg->fc_flags = rtm->rtm_flags; 544 cfg->fc_nlflags = nlh->nlmsg_flags; 545 546 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid; 547 cfg->fc_nlinfo.nlh = nlh; 548 cfg->fc_nlinfo.nl_net = net; 549 550 if (cfg->fc_type > RTN_MAX) { 551 err = -EINVAL; 552 goto errout; 553 } 554 555 nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) { 556 switch (nla_type(attr)) { 557 case RTA_DST: 558 cfg->fc_dst = nla_get_be32(attr); 559 break; 560 case RTA_OIF: 561 cfg->fc_oif = nla_get_u32(attr); 562 break; 563 case RTA_GATEWAY: 564 cfg->fc_gw = nla_get_be32(attr); 565 break; 566 case RTA_PRIORITY: 567 cfg->fc_priority = nla_get_u32(attr); 568 break; 569 case RTA_PREFSRC: 570 cfg->fc_prefsrc = nla_get_be32(attr); 571 break; 572 case RTA_METRICS: 573 cfg->fc_mx = nla_data(attr); 574 cfg->fc_mx_len = nla_len(attr); 575 break; 576 case RTA_MULTIPATH: 577 cfg->fc_mp = nla_data(attr); 578 cfg->fc_mp_len = nla_len(attr); 579 break; 580 case RTA_FLOW: 581 cfg->fc_flow = nla_get_u32(attr); 582 break; 583 case RTA_TABLE: 584 cfg->fc_table = nla_get_u32(attr); 585 break; 586 } 587 } 588 589 return 0; 590errout: 591 return err; 592} 593 594static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 595{ 596 struct net *net = sock_net(skb->sk); 597 struct fib_config cfg; 598 struct fib_table *tb; 599 int err; 600 601 err = rtm_to_fib_config(net, skb, nlh, &cfg); 602 if (err < 0) 603 goto errout; 604 605 tb = fib_get_table(net, cfg.fc_table); 606 if (tb == NULL) { 607 err = -ESRCH; 608 goto errout; 609 } 610 611 err = fib_table_delete(tb, &cfg); 612errout: 613 return err; 614} 615 616static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 617{ 618 struct net *net = sock_net(skb->sk); 619 struct fib_config cfg; 620 struct fib_table *tb; 621 int err; 622 623 err = rtm_to_fib_config(net, skb, nlh, &cfg); 624 if (err < 0) 625 goto errout; 626 627 tb = fib_new_table(net, cfg.fc_table); 628 if (tb == NULL) { 629 err = -ENOBUFS; 630 goto errout; 631 } 632 633 err = fib_table_insert(tb, &cfg); 634errout: 635 return err; 636} 637 638static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) 639{ 640 struct net *net = sock_net(skb->sk); 641 unsigned int h, s_h; 642 unsigned int e = 0, s_e; 643 struct fib_table *tb; 644 struct hlist_node *node; 645 struct hlist_head *head; 646 int dumped = 0; 647 648 if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) && 649 ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED) 650 return ip_rt_dump(skb, cb); 651 652 s_h = cb->args[0]; 653 s_e = cb->args[1]; 654 655 for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) { 656 e = 0; 657 head = &net->ipv4.fib_table_hash[h]; 658 hlist_for_each_entry(tb, node, head, tb_hlist) { 659 if (e < s_e) 660 goto next; 661 if (dumped) 662 memset(&cb->args[2], 0, sizeof(cb->args) - 663 2 * sizeof(cb->args[0])); 664 if (fib_table_dump(tb, skb, cb) < 0) 665 goto out; 666 dumped = 1; 667next: 668 e++; 669 } 670 } 671out: 672 cb->args[1] = e; 673 cb->args[0] = h; 674 675 return skb->len; 676} 677 678/* Prepare and feed intra-kernel routing request. 679 * Really, it should be netlink message, but :-( netlink 680 * can be not configured, so that we feed it directly 681 * to fib engine. It is legal, because all events occur 682 * only when netlink is already locked. 683 */ 684static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa) 685{ 686 struct net *net = dev_net(ifa->ifa_dev->dev); 687 struct fib_table *tb; 688 struct fib_config cfg = { 689 .fc_protocol = RTPROT_KERNEL, 690 .fc_type = type, 691 .fc_dst = dst, 692 .fc_dst_len = dst_len, 693 .fc_prefsrc = ifa->ifa_local, 694 .fc_oif = ifa->ifa_dev->dev->ifindex, 695 .fc_nlflags = NLM_F_CREATE | NLM_F_APPEND, 696 .fc_nlinfo = { 697 .nl_net = net, 698 }, 699 }; 700 701 if (type == RTN_UNICAST) 702 tb = fib_new_table(net, RT_TABLE_MAIN); 703 else 704 tb = fib_new_table(net, RT_TABLE_LOCAL); 705 706 if (tb == NULL) 707 return; 708 709 cfg.fc_table = tb->tb_id; 710 711 if (type != RTN_LOCAL) 712 cfg.fc_scope = RT_SCOPE_LINK; 713 else 714 cfg.fc_scope = RT_SCOPE_HOST; 715 716 if (cmd == RTM_NEWROUTE) 717 fib_table_insert(tb, &cfg); 718 else 719 fib_table_delete(tb, &cfg); 720} 721 722void fib_add_ifaddr(struct in_ifaddr *ifa) 723{ 724 struct in_device *in_dev = ifa->ifa_dev; 725 struct net_device *dev = in_dev->dev; 726 struct in_ifaddr *prim = ifa; 727 __be32 mask = ifa->ifa_mask; 728 __be32 addr = ifa->ifa_local; 729 __be32 prefix = ifa->ifa_address & mask; 730 731 if (ifa->ifa_flags & IFA_F_SECONDARY) { 732 prim = inet_ifa_byprefix(in_dev, prefix, mask); 733 if (prim == NULL) { 734 printk(KERN_WARNING "fib_add_ifaddr: bug: prim == NULL\n"); 735 return; 736 } 737 } 738 739 fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim); 740 741 if (!(dev->flags & IFF_UP)) 742 return; 743 744 /* Add broadcast address, if it is explicitly assigned. */ 745 if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF)) 746 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim); 747 748 if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags & IFA_F_SECONDARY) && 749 (prefix != addr || ifa->ifa_prefixlen < 32)) { 750 fib_magic(RTM_NEWROUTE, 751 dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST, 752 prefix, ifa->ifa_prefixlen, prim); 753 754 /* Add network specific broadcasts, when it takes a sense */ 755 if (ifa->ifa_prefixlen < 31) { 756 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim); 757 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix | ~mask, 758 32, prim); 759 } 760 } 761} 762 763static void fib_del_ifaddr(struct in_ifaddr *ifa) 764{ 765 struct in_device *in_dev = ifa->ifa_dev; 766 struct net_device *dev = in_dev->dev; 767 struct in_ifaddr *ifa1; 768 struct in_ifaddr *prim = ifa; 769 __be32 brd = ifa->ifa_address | ~ifa->ifa_mask; 770 __be32 any = ifa->ifa_address & ifa->ifa_mask; 771#define LOCAL_OK 1 772#define BRD_OK 2 773#define BRD0_OK 4 774#define BRD1_OK 8 775 unsigned ok = 0; 776 777 if (!(ifa->ifa_flags & IFA_F_SECONDARY)) 778 fib_magic(RTM_DELROUTE, 779 dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST, 780 any, ifa->ifa_prefixlen, prim); 781 else { 782 prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask); 783 if (prim == NULL) { 784 printk(KERN_WARNING "fib_del_ifaddr: bug: prim == NULL\n"); 785 return; 786 } 787 } 788 789 /* Deletion is more complicated than add. 790 * We should take care of not to delete too much :-) 791 * 792 * Scan address list to be sure that addresses are really gone. 793 */ 794 795 for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) { 796 if (ifa->ifa_local == ifa1->ifa_local) 797 ok |= LOCAL_OK; 798 if (ifa->ifa_broadcast == ifa1->ifa_broadcast) 799 ok |= BRD_OK; 800 if (brd == ifa1->ifa_broadcast) 801 ok |= BRD1_OK; 802 if (any == ifa1->ifa_broadcast) 803 ok |= BRD0_OK; 804 } 805 806 if (!(ok & BRD_OK)) 807 fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim); 808 if (!(ok & BRD1_OK)) 809 fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim); 810 if (!(ok & BRD0_OK)) 811 fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim); 812 if (!(ok & LOCAL_OK)) { 813 fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim); 814 815 /* Check, that this local address finally disappeared. */ 816 if (inet_addr_type(dev_net(dev), ifa->ifa_local) != RTN_LOCAL) { 817 /* And the last, but not the least thing. 818 * We must flush stray FIB entries. 819 * 820 * First of all, we scan fib_info list searching 821 * for stray nexthop entries, then ignite fib_flush. 822 */ 823 if (fib_sync_down_addr(dev_net(dev), ifa->ifa_local)) 824 fib_flush(dev_net(dev)); 825 } 826 } 827#undef LOCAL_OK 828#undef BRD_OK 829#undef BRD0_OK 830#undef BRD1_OK 831} 832 833static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb) 834{ 835 836 struct fib_result res; 837 struct flowi fl = { 838 .mark = frn->fl_mark, 839 .fl4_dst = frn->fl_addr, 840 .fl4_tos = frn->fl_tos, 841 .fl4_scope = frn->fl_scope, 842 }; 843 844#ifdef CONFIG_IP_MULTIPLE_TABLES 845 res.r = NULL; 846#endif 847 848 frn->err = -ENOENT; 849 if (tb) { 850 local_bh_disable(); 851 852 frn->tb_id = tb->tb_id; 853 rcu_read_lock(); 854 frn->err = fib_table_lookup(tb, &fl, &res, FIB_LOOKUP_NOREF); 855 856 if (!frn->err) { 857 frn->prefixlen = res.prefixlen; 858 frn->nh_sel = res.nh_sel; 859 frn->type = res.type; 860 frn->scope = res.scope; 861 } 862 rcu_read_unlock(); 863 local_bh_enable(); 864 } 865} 866 867static void nl_fib_input(struct sk_buff *skb) 868{ 869 struct net *net; 870 struct fib_result_nl *frn; 871 struct nlmsghdr *nlh; 872 struct fib_table *tb; 873 u32 pid; 874 875 net = sock_net(skb->sk); 876 nlh = nlmsg_hdr(skb); 877 if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len || 878 nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn))) 879 return; 880 881 skb = skb_clone(skb, GFP_KERNEL); 882 if (skb == NULL) 883 return; 884 nlh = nlmsg_hdr(skb); 885 886 frn = (struct fib_result_nl *) NLMSG_DATA(nlh); 887 tb = fib_get_table(net, frn->tb_id_in); 888 889 nl_fib_lookup(frn, tb); 890 891 pid = NETLINK_CB(skb).pid; /* pid of sending process */ 892 NETLINK_CB(skb).pid = 0; /* from kernel */ 893 NETLINK_CB(skb).dst_group = 0; /* unicast */ 894 netlink_unicast(net->ipv4.fibnl, skb, pid, MSG_DONTWAIT); 895} 896 897static int __net_init nl_fib_lookup_init(struct net *net) 898{ 899 struct sock *sk; 900 sk = netlink_kernel_create(net, NETLINK_FIB_LOOKUP, 0, 901 nl_fib_input, NULL, THIS_MODULE); 902 if (sk == NULL) 903 return -EAFNOSUPPORT; 904 net->ipv4.fibnl = sk; 905 return 0; 906} 907 908static void nl_fib_lookup_exit(struct net *net) 909{ 910 netlink_kernel_release(net->ipv4.fibnl); 911 net->ipv4.fibnl = NULL; 912} 913 914static void fib_disable_ip(struct net_device *dev, int force, int delay) 915{ 916 if (fib_sync_down_dev(dev, force)) 917 fib_flush(dev_net(dev)); 918 rt_cache_flush(dev_net(dev), delay); 919 arp_ifdown(dev); 920} 921 922static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr) 923{ 924 struct in_ifaddr *ifa = (struct in_ifaddr *)ptr; 925 struct net_device *dev = ifa->ifa_dev->dev; 926 927 switch (event) { 928 case NETDEV_UP: 929 fib_add_ifaddr(ifa); 930#ifdef CONFIG_IP_ROUTE_MULTIPATH 931 fib_sync_up(dev); 932#endif 933 rt_cache_flush(dev_net(dev), -1); 934 break; 935 case NETDEV_DOWN: 936 fib_del_ifaddr(ifa); 937 if (ifa->ifa_dev->ifa_list == NULL) { 938 /* Last address was deleted from this interface. 939 * Disable IP. 940 */ 941 fib_disable_ip(dev, 1, 0); 942 } else { 943 rt_cache_flush(dev_net(dev), -1); 944 } 945 break; 946 } 947 return NOTIFY_DONE; 948} 949 950static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) 951{ 952 struct net_device *dev = ptr; 953 struct in_device *in_dev = __in_dev_get_rtnl(dev); 954 955 if (event == NETDEV_UNREGISTER) { 956 fib_disable_ip(dev, 2, -1); 957 return NOTIFY_DONE; 958 } 959 960 if (!in_dev) 961 return NOTIFY_DONE; 962 963 switch (event) { 964 case NETDEV_UP: 965 for_ifa(in_dev) { 966 fib_add_ifaddr(ifa); 967 } endfor_ifa(in_dev); 968#ifdef CONFIG_IP_ROUTE_MULTIPATH 969 fib_sync_up(dev); 970#endif 971 rt_cache_flush(dev_net(dev), -1); 972 break; 973 case NETDEV_DOWN: 974 fib_disable_ip(dev, 0, 0); 975 break; 976 case NETDEV_CHANGEMTU: 977 case NETDEV_CHANGE: 978 rt_cache_flush(dev_net(dev), 0); 979 break; 980 case NETDEV_UNREGISTER_BATCH: 981 /* The batch unregister is only called on the first 982 * device in the list of devices being unregistered. 983 * Therefore we should not pass dev_net(dev) in here. 984 */ 985 rt_cache_flush_batch(NULL); 986 break; 987 } 988 return NOTIFY_DONE; 989} 990 991static struct notifier_block fib_inetaddr_notifier = { 992 .notifier_call = fib_inetaddr_event, 993}; 994 995static struct notifier_block fib_netdev_notifier = { 996 .notifier_call = fib_netdev_event, 997}; 998 999static int __net_init ip_fib_net_init(struct net *net) 1000{ 1001 int err; 1002 size_t size = sizeof(struct hlist_head) * FIB_TABLE_HASHSZ; 1003 1004 /* Avoid false sharing : Use at least a full cache line */ 1005 size = max_t(size_t, size, L1_CACHE_BYTES); 1006 1007 net->ipv4.fib_table_hash = kzalloc(size, GFP_KERNEL); 1008 if (net->ipv4.fib_table_hash == NULL) 1009 return -ENOMEM; 1010 1011 err = fib4_rules_init(net); 1012 if (err < 0) 1013 goto fail; 1014 return 0; 1015 1016fail: 1017 kfree(net->ipv4.fib_table_hash); 1018 return err; 1019} 1020 1021static void ip_fib_net_exit(struct net *net) 1022{ 1023 unsigned int i; 1024 1025#ifdef CONFIG_IP_MULTIPLE_TABLES 1026 fib4_rules_exit(net); 1027#endif 1028 1029 for (i = 0; i < FIB_TABLE_HASHSZ; i++) { 1030 struct fib_table *tb; 1031 struct hlist_head *head; 1032 struct hlist_node *node, *tmp; 1033 1034 head = &net->ipv4.fib_table_hash[i]; 1035 hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) { 1036 hlist_del(node); 1037 fib_table_flush(tb); 1038 fib_free_table(tb); 1039 } 1040 } 1041 kfree(net->ipv4.fib_table_hash); 1042} 1043 1044static int __net_init fib_net_init(struct net *net) 1045{ 1046 int error; 1047 1048 error = ip_fib_net_init(net); 1049 if (error < 0) 1050 goto out; 1051 error = nl_fib_lookup_init(net); 1052 if (error < 0) 1053 goto out_nlfl; 1054 error = fib_proc_init(net); 1055 if (error < 0) 1056 goto out_proc; 1057out: 1058 return error; 1059 1060out_proc: 1061 nl_fib_lookup_exit(net); 1062out_nlfl: 1063 ip_fib_net_exit(net); 1064 goto out; 1065} 1066 1067static void __net_exit fib_net_exit(struct net *net) 1068{ 1069 fib_proc_exit(net); 1070 nl_fib_lookup_exit(net); 1071 ip_fib_net_exit(net); 1072} 1073 1074static struct pernet_operations fib_net_ops = { 1075 .init = fib_net_init, 1076 .exit = fib_net_exit, 1077}; 1078 1079void __init ip_fib_init(void) 1080{ 1081 rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL); 1082 rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL); 1083 rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib); 1084 1085 register_pernet_subsys(&fib_net_ops); 1086 register_netdevice_notifier(&fib_netdev_notifier); 1087 register_inetaddr_notifier(&fib_inetaddr_notifier); 1088 1089 fib_hash_init(); 1090} 1091