fib_frontend.c revision 5811662b15db018c740c57d037523683fd3e6123
1/* 2 * INET An implementation of the TCP/IP protocol suite for the LINUX 3 * operating system. INET is implemented using the BSD Socket 4 * interface as the means of communication with the user level. 5 * 6 * IPv4 Forwarding Information Base: FIB frontend. 7 * 8 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 9 * 10 * This program is free software; you can redistribute it and/or 11 * modify it under the terms of the GNU General Public License 12 * as published by the Free Software Foundation; either version 13 * 2 of the License, or (at your option) any later version. 14 */ 15 16#include <linux/module.h> 17#include <asm/uaccess.h> 18#include <asm/system.h> 19#include <linux/bitops.h> 20#include <linux/capability.h> 21#include <linux/types.h> 22#include <linux/kernel.h> 23#include <linux/mm.h> 24#include <linux/string.h> 25#include <linux/socket.h> 26#include <linux/sockios.h> 27#include <linux/errno.h> 28#include <linux/in.h> 29#include <linux/inet.h> 30#include <linux/inetdevice.h> 31#include <linux/netdevice.h> 32#include <linux/if_addr.h> 33#include <linux/if_arp.h> 34#include <linux/skbuff.h> 35#include <linux/init.h> 36#include <linux/list.h> 37#include <linux/slab.h> 38 39#include <net/ip.h> 40#include <net/protocol.h> 41#include <net/route.h> 42#include <net/tcp.h> 43#include <net/sock.h> 44#include <net/arp.h> 45#include <net/ip_fib.h> 46#include <net/rtnetlink.h> 47 48#ifndef CONFIG_IP_MULTIPLE_TABLES 49 50static int __net_init fib4_rules_init(struct net *net) 51{ 52 struct fib_table *local_table, *main_table; 53 54 local_table = fib_hash_table(RT_TABLE_LOCAL); 55 if (local_table == NULL) 56 return -ENOMEM; 57 58 main_table = fib_hash_table(RT_TABLE_MAIN); 59 if (main_table == NULL) 60 goto fail; 61 62 hlist_add_head_rcu(&local_table->tb_hlist, 63 &net->ipv4.fib_table_hash[TABLE_LOCAL_INDEX]); 64 hlist_add_head_rcu(&main_table->tb_hlist, 65 &net->ipv4.fib_table_hash[TABLE_MAIN_INDEX]); 66 return 0; 67 68fail: 69 kfree(local_table); 70 return -ENOMEM; 71} 72#else 73 74struct fib_table *fib_new_table(struct net *net, u32 id) 75{ 76 struct fib_table *tb; 77 unsigned int h; 78 79 if (id == 0) 80 id = RT_TABLE_MAIN; 81 tb = fib_get_table(net, id); 82 if (tb) 83 return tb; 84 85 tb = fib_hash_table(id); 86 if (!tb) 87 return NULL; 88 h = id & (FIB_TABLE_HASHSZ - 1); 89 hlist_add_head_rcu(&tb->tb_hlist, &net->ipv4.fib_table_hash[h]); 90 return tb; 91} 92 93struct fib_table *fib_get_table(struct net *net, u32 id) 94{ 95 struct fib_table *tb; 96 struct hlist_node *node; 97 struct hlist_head *head; 98 unsigned int h; 99 100 if (id == 0) 101 id = RT_TABLE_MAIN; 102 h = id & (FIB_TABLE_HASHSZ - 1); 103 104 rcu_read_lock(); 105 head = &net->ipv4.fib_table_hash[h]; 106 hlist_for_each_entry_rcu(tb, node, head, tb_hlist) { 107 if (tb->tb_id == id) { 108 rcu_read_unlock(); 109 return tb; 110 } 111 } 112 rcu_read_unlock(); 113 return NULL; 114} 115#endif /* CONFIG_IP_MULTIPLE_TABLES */ 116 117void fib_select_default(struct net *net, 118 const struct flowi *flp, struct fib_result *res) 119{ 120 struct fib_table *tb; 121 int table = RT_TABLE_MAIN; 122#ifdef CONFIG_IP_MULTIPLE_TABLES 123 if (res->r == NULL || res->r->action != FR_ACT_TO_TBL) 124 return; 125 table = res->r->table; 126#endif 127 tb = fib_get_table(net, table); 128 if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) 129 fib_table_select_default(tb, flp, res); 130} 131 132static void fib_flush(struct net *net) 133{ 134 int flushed = 0; 135 struct fib_table *tb; 136 struct hlist_node *node; 137 struct hlist_head *head; 138 unsigned int h; 139 140 for (h = 0; h < FIB_TABLE_HASHSZ; h++) { 141 head = &net->ipv4.fib_table_hash[h]; 142 hlist_for_each_entry(tb, node, head, tb_hlist) 143 flushed += fib_table_flush(tb); 144 } 145 146 if (flushed) 147 rt_cache_flush(net, -1); 148} 149 150/** 151 * __ip_dev_find - find the first device with a given source address. 152 * @net: the net namespace 153 * @addr: the source address 154 * @devref: if true, take a reference on the found device 155 * 156 * If a caller uses devref=false, it should be protected by RCU, or RTNL 157 */ 158struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref) 159{ 160 struct flowi fl = { 161 .fl4_dst = addr, 162 .flags = FLOWI_FLAG_MATCH_ANY_IIF 163 }; 164 struct fib_result res = { 0 }; 165 struct net_device *dev = NULL; 166 167 rcu_read_lock(); 168 if (fib_lookup(net, &fl, &res)) { 169 rcu_read_unlock(); 170 return NULL; 171 } 172 if (res.type != RTN_LOCAL) 173 goto out; 174 dev = FIB_RES_DEV(res); 175 176 if (dev && devref) 177 dev_hold(dev); 178out: 179 rcu_read_unlock(); 180 return dev; 181} 182EXPORT_SYMBOL(__ip_dev_find); 183 184/* 185 * Find address type as if only "dev" was present in the system. If 186 * on_dev is NULL then all interfaces are taken into consideration. 187 */ 188static inline unsigned __inet_dev_addr_type(struct net *net, 189 const struct net_device *dev, 190 __be32 addr) 191{ 192 struct flowi fl = { .fl4_dst = addr }; 193 struct fib_result res; 194 unsigned ret = RTN_BROADCAST; 195 struct fib_table *local_table; 196 197 if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr)) 198 return RTN_BROADCAST; 199 if (ipv4_is_multicast(addr)) 200 return RTN_MULTICAST; 201 202#ifdef CONFIG_IP_MULTIPLE_TABLES 203 res.r = NULL; 204#endif 205 206 local_table = fib_get_table(net, RT_TABLE_LOCAL); 207 if (local_table) { 208 ret = RTN_UNICAST; 209 rcu_read_lock(); 210 if (!fib_table_lookup(local_table, &fl, &res, FIB_LOOKUP_NOREF)) { 211 if (!dev || dev == res.fi->fib_dev) 212 ret = res.type; 213 } 214 rcu_read_unlock(); 215 } 216 return ret; 217} 218 219unsigned int inet_addr_type(struct net *net, __be32 addr) 220{ 221 return __inet_dev_addr_type(net, NULL, addr); 222} 223EXPORT_SYMBOL(inet_addr_type); 224 225unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev, 226 __be32 addr) 227{ 228 return __inet_dev_addr_type(net, dev, addr); 229} 230EXPORT_SYMBOL(inet_dev_addr_type); 231 232/* Given (packet source, input interface) and optional (dst, oif, tos): 233 * - (main) check, that source is valid i.e. not broadcast or our local 234 * address. 235 * - figure out what "logical" interface this packet arrived 236 * and calculate "specific destination" address. 237 * - check, that packet arrived from expected physical interface. 238 * called with rcu_read_lock() 239 */ 240int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, 241 struct net_device *dev, __be32 *spec_dst, 242 u32 *itag, u32 mark) 243{ 244 struct in_device *in_dev; 245 struct flowi fl = { 246 .fl4_dst = src, 247 .fl4_src = dst, 248 .fl4_tos = tos, 249 .mark = mark, 250 .iif = oif 251 }; 252 struct fib_result res; 253 int no_addr, rpf, accept_local; 254 bool dev_match; 255 int ret; 256 struct net *net; 257 258 no_addr = rpf = accept_local = 0; 259 in_dev = __in_dev_get_rcu(dev); 260 if (in_dev) { 261 no_addr = in_dev->ifa_list == NULL; 262 rpf = IN_DEV_RPFILTER(in_dev); 263 accept_local = IN_DEV_ACCEPT_LOCAL(in_dev); 264 if (mark && !IN_DEV_SRC_VMARK(in_dev)) 265 fl.mark = 0; 266 } 267 268 if (in_dev == NULL) 269 goto e_inval; 270 271 net = dev_net(dev); 272 if (fib_lookup(net, &fl, &res)) 273 goto last_resort; 274 if (res.type != RTN_UNICAST) { 275 if (res.type != RTN_LOCAL || !accept_local) 276 goto e_inval; 277 } 278 *spec_dst = FIB_RES_PREFSRC(res); 279 fib_combine_itag(itag, &res); 280 dev_match = false; 281 282#ifdef CONFIG_IP_ROUTE_MULTIPATH 283 for (ret = 0; ret < res.fi->fib_nhs; ret++) { 284 struct fib_nh *nh = &res.fi->fib_nh[ret]; 285 286 if (nh->nh_dev == dev) { 287 dev_match = true; 288 break; 289 } 290 } 291#else 292 if (FIB_RES_DEV(res) == dev) 293 dev_match = true; 294#endif 295 if (dev_match) { 296 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST; 297 return ret; 298 } 299 if (no_addr) 300 goto last_resort; 301 if (rpf == 1) 302 goto e_rpf; 303 fl.oif = dev->ifindex; 304 305 ret = 0; 306 if (fib_lookup(net, &fl, &res) == 0) { 307 if (res.type == RTN_UNICAST) { 308 *spec_dst = FIB_RES_PREFSRC(res); 309 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST; 310 } 311 } 312 return ret; 313 314last_resort: 315 if (rpf) 316 goto e_rpf; 317 *spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE); 318 *itag = 0; 319 return 0; 320 321e_inval: 322 return -EINVAL; 323e_rpf: 324 return -EXDEV; 325} 326 327static inline __be32 sk_extract_addr(struct sockaddr *addr) 328{ 329 return ((struct sockaddr_in *) addr)->sin_addr.s_addr; 330} 331 332static int put_rtax(struct nlattr *mx, int len, int type, u32 value) 333{ 334 struct nlattr *nla; 335 336 nla = (struct nlattr *) ((char *) mx + len); 337 nla->nla_type = type; 338 nla->nla_len = nla_attr_size(4); 339 *(u32 *) nla_data(nla) = value; 340 341 return len + nla_total_size(4); 342} 343 344static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt, 345 struct fib_config *cfg) 346{ 347 __be32 addr; 348 int plen; 349 350 memset(cfg, 0, sizeof(*cfg)); 351 cfg->fc_nlinfo.nl_net = net; 352 353 if (rt->rt_dst.sa_family != AF_INET) 354 return -EAFNOSUPPORT; 355 356 /* 357 * Check mask for validity: 358 * a) it must be contiguous. 359 * b) destination must have all host bits clear. 360 * c) if application forgot to set correct family (AF_INET), 361 * reject request unless it is absolutely clear i.e. 362 * both family and mask are zero. 363 */ 364 plen = 32; 365 addr = sk_extract_addr(&rt->rt_dst); 366 if (!(rt->rt_flags & RTF_HOST)) { 367 __be32 mask = sk_extract_addr(&rt->rt_genmask); 368 369 if (rt->rt_genmask.sa_family != AF_INET) { 370 if (mask || rt->rt_genmask.sa_family) 371 return -EAFNOSUPPORT; 372 } 373 374 if (bad_mask(mask, addr)) 375 return -EINVAL; 376 377 plen = inet_mask_len(mask); 378 } 379 380 cfg->fc_dst_len = plen; 381 cfg->fc_dst = addr; 382 383 if (cmd != SIOCDELRT) { 384 cfg->fc_nlflags = NLM_F_CREATE; 385 cfg->fc_protocol = RTPROT_BOOT; 386 } 387 388 if (rt->rt_metric) 389 cfg->fc_priority = rt->rt_metric - 1; 390 391 if (rt->rt_flags & RTF_REJECT) { 392 cfg->fc_scope = RT_SCOPE_HOST; 393 cfg->fc_type = RTN_UNREACHABLE; 394 return 0; 395 } 396 397 cfg->fc_scope = RT_SCOPE_NOWHERE; 398 cfg->fc_type = RTN_UNICAST; 399 400 if (rt->rt_dev) { 401 char *colon; 402 struct net_device *dev; 403 char devname[IFNAMSIZ]; 404 405 if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1)) 406 return -EFAULT; 407 408 devname[IFNAMSIZ-1] = 0; 409 colon = strchr(devname, ':'); 410 if (colon) 411 *colon = 0; 412 dev = __dev_get_by_name(net, devname); 413 if (!dev) 414 return -ENODEV; 415 cfg->fc_oif = dev->ifindex; 416 if (colon) { 417 struct in_ifaddr *ifa; 418 struct in_device *in_dev = __in_dev_get_rtnl(dev); 419 if (!in_dev) 420 return -ENODEV; 421 *colon = ':'; 422 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) 423 if (strcmp(ifa->ifa_label, devname) == 0) 424 break; 425 if (ifa == NULL) 426 return -ENODEV; 427 cfg->fc_prefsrc = ifa->ifa_local; 428 } 429 } 430 431 addr = sk_extract_addr(&rt->rt_gateway); 432 if (rt->rt_gateway.sa_family == AF_INET && addr) { 433 cfg->fc_gw = addr; 434 if (rt->rt_flags & RTF_GATEWAY && 435 inet_addr_type(net, addr) == RTN_UNICAST) 436 cfg->fc_scope = RT_SCOPE_UNIVERSE; 437 } 438 439 if (cmd == SIOCDELRT) 440 return 0; 441 442 if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw) 443 return -EINVAL; 444 445 if (cfg->fc_scope == RT_SCOPE_NOWHERE) 446 cfg->fc_scope = RT_SCOPE_LINK; 447 448 if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) { 449 struct nlattr *mx; 450 int len = 0; 451 452 mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL); 453 if (mx == NULL) 454 return -ENOMEM; 455 456 if (rt->rt_flags & RTF_MTU) 457 len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40); 458 459 if (rt->rt_flags & RTF_WINDOW) 460 len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window); 461 462 if (rt->rt_flags & RTF_IRTT) 463 len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3); 464 465 cfg->fc_mx = mx; 466 cfg->fc_mx_len = len; 467 } 468 469 return 0; 470} 471 472/* 473 * Handle IP routing ioctl calls. 474 * These are used to manipulate the routing tables 475 */ 476int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg) 477{ 478 struct fib_config cfg; 479 struct rtentry rt; 480 int err; 481 482 switch (cmd) { 483 case SIOCADDRT: /* Add a route */ 484 case SIOCDELRT: /* Delete a route */ 485 if (!capable(CAP_NET_ADMIN)) 486 return -EPERM; 487 488 if (copy_from_user(&rt, arg, sizeof(rt))) 489 return -EFAULT; 490 491 rtnl_lock(); 492 err = rtentry_to_fib_config(net, cmd, &rt, &cfg); 493 if (err == 0) { 494 struct fib_table *tb; 495 496 if (cmd == SIOCDELRT) { 497 tb = fib_get_table(net, cfg.fc_table); 498 if (tb) 499 err = fib_table_delete(tb, &cfg); 500 else 501 err = -ESRCH; 502 } else { 503 tb = fib_new_table(net, cfg.fc_table); 504 if (tb) 505 err = fib_table_insert(tb, &cfg); 506 else 507 err = -ENOBUFS; 508 } 509 510 /* allocated by rtentry_to_fib_config() */ 511 kfree(cfg.fc_mx); 512 } 513 rtnl_unlock(); 514 return err; 515 } 516 return -EINVAL; 517} 518 519const struct nla_policy rtm_ipv4_policy[RTA_MAX + 1] = { 520 [RTA_DST] = { .type = NLA_U32 }, 521 [RTA_SRC] = { .type = NLA_U32 }, 522 [RTA_IIF] = { .type = NLA_U32 }, 523 [RTA_OIF] = { .type = NLA_U32 }, 524 [RTA_GATEWAY] = { .type = NLA_U32 }, 525 [RTA_PRIORITY] = { .type = NLA_U32 }, 526 [RTA_PREFSRC] = { .type = NLA_U32 }, 527 [RTA_METRICS] = { .type = NLA_NESTED }, 528 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) }, 529 [RTA_FLOW] = { .type = NLA_U32 }, 530}; 531 532static int rtm_to_fib_config(struct net *net, struct sk_buff *skb, 533 struct nlmsghdr *nlh, struct fib_config *cfg) 534{ 535 struct nlattr *attr; 536 int err, remaining; 537 struct rtmsg *rtm; 538 539 err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy); 540 if (err < 0) 541 goto errout; 542 543 memset(cfg, 0, sizeof(*cfg)); 544 545 rtm = nlmsg_data(nlh); 546 cfg->fc_dst_len = rtm->rtm_dst_len; 547 cfg->fc_tos = rtm->rtm_tos; 548 cfg->fc_table = rtm->rtm_table; 549 cfg->fc_protocol = rtm->rtm_protocol; 550 cfg->fc_scope = rtm->rtm_scope; 551 cfg->fc_type = rtm->rtm_type; 552 cfg->fc_flags = rtm->rtm_flags; 553 cfg->fc_nlflags = nlh->nlmsg_flags; 554 555 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid; 556 cfg->fc_nlinfo.nlh = nlh; 557 cfg->fc_nlinfo.nl_net = net; 558 559 if (cfg->fc_type > RTN_MAX) { 560 err = -EINVAL; 561 goto errout; 562 } 563 564 nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) { 565 switch (nla_type(attr)) { 566 case RTA_DST: 567 cfg->fc_dst = nla_get_be32(attr); 568 break; 569 case RTA_OIF: 570 cfg->fc_oif = nla_get_u32(attr); 571 break; 572 case RTA_GATEWAY: 573 cfg->fc_gw = nla_get_be32(attr); 574 break; 575 case RTA_PRIORITY: 576 cfg->fc_priority = nla_get_u32(attr); 577 break; 578 case RTA_PREFSRC: 579 cfg->fc_prefsrc = nla_get_be32(attr); 580 break; 581 case RTA_METRICS: 582 cfg->fc_mx = nla_data(attr); 583 cfg->fc_mx_len = nla_len(attr); 584 break; 585 case RTA_MULTIPATH: 586 cfg->fc_mp = nla_data(attr); 587 cfg->fc_mp_len = nla_len(attr); 588 break; 589 case RTA_FLOW: 590 cfg->fc_flow = nla_get_u32(attr); 591 break; 592 case RTA_TABLE: 593 cfg->fc_table = nla_get_u32(attr); 594 break; 595 } 596 } 597 598 return 0; 599errout: 600 return err; 601} 602 603static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 604{ 605 struct net *net = sock_net(skb->sk); 606 struct fib_config cfg; 607 struct fib_table *tb; 608 int err; 609 610 err = rtm_to_fib_config(net, skb, nlh, &cfg); 611 if (err < 0) 612 goto errout; 613 614 tb = fib_get_table(net, cfg.fc_table); 615 if (tb == NULL) { 616 err = -ESRCH; 617 goto errout; 618 } 619 620 err = fib_table_delete(tb, &cfg); 621errout: 622 return err; 623} 624 625static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 626{ 627 struct net *net = sock_net(skb->sk); 628 struct fib_config cfg; 629 struct fib_table *tb; 630 int err; 631 632 err = rtm_to_fib_config(net, skb, nlh, &cfg); 633 if (err < 0) 634 goto errout; 635 636 tb = fib_new_table(net, cfg.fc_table); 637 if (tb == NULL) { 638 err = -ENOBUFS; 639 goto errout; 640 } 641 642 err = fib_table_insert(tb, &cfg); 643errout: 644 return err; 645} 646 647static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) 648{ 649 struct net *net = sock_net(skb->sk); 650 unsigned int h, s_h; 651 unsigned int e = 0, s_e; 652 struct fib_table *tb; 653 struct hlist_node *node; 654 struct hlist_head *head; 655 int dumped = 0; 656 657 if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) && 658 ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED) 659 return ip_rt_dump(skb, cb); 660 661 s_h = cb->args[0]; 662 s_e = cb->args[1]; 663 664 for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) { 665 e = 0; 666 head = &net->ipv4.fib_table_hash[h]; 667 hlist_for_each_entry(tb, node, head, tb_hlist) { 668 if (e < s_e) 669 goto next; 670 if (dumped) 671 memset(&cb->args[2], 0, sizeof(cb->args) - 672 2 * sizeof(cb->args[0])); 673 if (fib_table_dump(tb, skb, cb) < 0) 674 goto out; 675 dumped = 1; 676next: 677 e++; 678 } 679 } 680out: 681 cb->args[1] = e; 682 cb->args[0] = h; 683 684 return skb->len; 685} 686 687/* Prepare and feed intra-kernel routing request. 688 * Really, it should be netlink message, but :-( netlink 689 * can be not configured, so that we feed it directly 690 * to fib engine. It is legal, because all events occur 691 * only when netlink is already locked. 692 */ 693static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa) 694{ 695 struct net *net = dev_net(ifa->ifa_dev->dev); 696 struct fib_table *tb; 697 struct fib_config cfg = { 698 .fc_protocol = RTPROT_KERNEL, 699 .fc_type = type, 700 .fc_dst = dst, 701 .fc_dst_len = dst_len, 702 .fc_prefsrc = ifa->ifa_local, 703 .fc_oif = ifa->ifa_dev->dev->ifindex, 704 .fc_nlflags = NLM_F_CREATE | NLM_F_APPEND, 705 .fc_nlinfo = { 706 .nl_net = net, 707 }, 708 }; 709 710 if (type == RTN_UNICAST) 711 tb = fib_new_table(net, RT_TABLE_MAIN); 712 else 713 tb = fib_new_table(net, RT_TABLE_LOCAL); 714 715 if (tb == NULL) 716 return; 717 718 cfg.fc_table = tb->tb_id; 719 720 if (type != RTN_LOCAL) 721 cfg.fc_scope = RT_SCOPE_LINK; 722 else 723 cfg.fc_scope = RT_SCOPE_HOST; 724 725 if (cmd == RTM_NEWROUTE) 726 fib_table_insert(tb, &cfg); 727 else 728 fib_table_delete(tb, &cfg); 729} 730 731void fib_add_ifaddr(struct in_ifaddr *ifa) 732{ 733 struct in_device *in_dev = ifa->ifa_dev; 734 struct net_device *dev = in_dev->dev; 735 struct in_ifaddr *prim = ifa; 736 __be32 mask = ifa->ifa_mask; 737 __be32 addr = ifa->ifa_local; 738 __be32 prefix = ifa->ifa_address & mask; 739 740 if (ifa->ifa_flags & IFA_F_SECONDARY) { 741 prim = inet_ifa_byprefix(in_dev, prefix, mask); 742 if (prim == NULL) { 743 printk(KERN_WARNING "fib_add_ifaddr: bug: prim == NULL\n"); 744 return; 745 } 746 } 747 748 fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim); 749 750 if (!(dev->flags & IFF_UP)) 751 return; 752 753 /* Add broadcast address, if it is explicitly assigned. */ 754 if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF)) 755 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim); 756 757 if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags & IFA_F_SECONDARY) && 758 (prefix != addr || ifa->ifa_prefixlen < 32)) { 759 fib_magic(RTM_NEWROUTE, 760 dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST, 761 prefix, ifa->ifa_prefixlen, prim); 762 763 /* Add network specific broadcasts, when it takes a sense */ 764 if (ifa->ifa_prefixlen < 31) { 765 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim); 766 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix | ~mask, 767 32, prim); 768 } 769 } 770} 771 772static void fib_del_ifaddr(struct in_ifaddr *ifa) 773{ 774 struct in_device *in_dev = ifa->ifa_dev; 775 struct net_device *dev = in_dev->dev; 776 struct in_ifaddr *ifa1; 777 struct in_ifaddr *prim = ifa; 778 __be32 brd = ifa->ifa_address | ~ifa->ifa_mask; 779 __be32 any = ifa->ifa_address & ifa->ifa_mask; 780#define LOCAL_OK 1 781#define BRD_OK 2 782#define BRD0_OK 4 783#define BRD1_OK 8 784 unsigned ok = 0; 785 786 if (!(ifa->ifa_flags & IFA_F_SECONDARY)) 787 fib_magic(RTM_DELROUTE, 788 dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST, 789 any, ifa->ifa_prefixlen, prim); 790 else { 791 prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask); 792 if (prim == NULL) { 793 printk(KERN_WARNING "fib_del_ifaddr: bug: prim == NULL\n"); 794 return; 795 } 796 } 797 798 /* Deletion is more complicated than add. 799 * We should take care of not to delete too much :-) 800 * 801 * Scan address list to be sure that addresses are really gone. 802 */ 803 804 for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) { 805 if (ifa->ifa_local == ifa1->ifa_local) 806 ok |= LOCAL_OK; 807 if (ifa->ifa_broadcast == ifa1->ifa_broadcast) 808 ok |= BRD_OK; 809 if (brd == ifa1->ifa_broadcast) 810 ok |= BRD1_OK; 811 if (any == ifa1->ifa_broadcast) 812 ok |= BRD0_OK; 813 } 814 815 if (!(ok & BRD_OK)) 816 fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim); 817 if (!(ok & BRD1_OK)) 818 fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim); 819 if (!(ok & BRD0_OK)) 820 fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim); 821 if (!(ok & LOCAL_OK)) { 822 fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim); 823 824 /* Check, that this local address finally disappeared. */ 825 if (inet_addr_type(dev_net(dev), ifa->ifa_local) != RTN_LOCAL) { 826 /* And the last, but not the least thing. 827 * We must flush stray FIB entries. 828 * 829 * First of all, we scan fib_info list searching 830 * for stray nexthop entries, then ignite fib_flush. 831 */ 832 if (fib_sync_down_addr(dev_net(dev), ifa->ifa_local)) 833 fib_flush(dev_net(dev)); 834 } 835 } 836#undef LOCAL_OK 837#undef BRD_OK 838#undef BRD0_OK 839#undef BRD1_OK 840} 841 842static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb) 843{ 844 845 struct fib_result res; 846 struct flowi fl = { 847 .mark = frn->fl_mark, 848 .fl4_dst = frn->fl_addr, 849 .fl4_tos = frn->fl_tos, 850 .fl4_scope = frn->fl_scope, 851 }; 852 853#ifdef CONFIG_IP_MULTIPLE_TABLES 854 res.r = NULL; 855#endif 856 857 frn->err = -ENOENT; 858 if (tb) { 859 local_bh_disable(); 860 861 frn->tb_id = tb->tb_id; 862 rcu_read_lock(); 863 frn->err = fib_table_lookup(tb, &fl, &res, FIB_LOOKUP_NOREF); 864 865 if (!frn->err) { 866 frn->prefixlen = res.prefixlen; 867 frn->nh_sel = res.nh_sel; 868 frn->type = res.type; 869 frn->scope = res.scope; 870 } 871 rcu_read_unlock(); 872 local_bh_enable(); 873 } 874} 875 876static void nl_fib_input(struct sk_buff *skb) 877{ 878 struct net *net; 879 struct fib_result_nl *frn; 880 struct nlmsghdr *nlh; 881 struct fib_table *tb; 882 u32 pid; 883 884 net = sock_net(skb->sk); 885 nlh = nlmsg_hdr(skb); 886 if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len || 887 nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn))) 888 return; 889 890 skb = skb_clone(skb, GFP_KERNEL); 891 if (skb == NULL) 892 return; 893 nlh = nlmsg_hdr(skb); 894 895 frn = (struct fib_result_nl *) NLMSG_DATA(nlh); 896 tb = fib_get_table(net, frn->tb_id_in); 897 898 nl_fib_lookup(frn, tb); 899 900 pid = NETLINK_CB(skb).pid; /* pid of sending process */ 901 NETLINK_CB(skb).pid = 0; /* from kernel */ 902 NETLINK_CB(skb).dst_group = 0; /* unicast */ 903 netlink_unicast(net->ipv4.fibnl, skb, pid, MSG_DONTWAIT); 904} 905 906static int __net_init nl_fib_lookup_init(struct net *net) 907{ 908 struct sock *sk; 909 sk = netlink_kernel_create(net, NETLINK_FIB_LOOKUP, 0, 910 nl_fib_input, NULL, THIS_MODULE); 911 if (sk == NULL) 912 return -EAFNOSUPPORT; 913 net->ipv4.fibnl = sk; 914 return 0; 915} 916 917static void nl_fib_lookup_exit(struct net *net) 918{ 919 netlink_kernel_release(net->ipv4.fibnl); 920 net->ipv4.fibnl = NULL; 921} 922 923static void fib_disable_ip(struct net_device *dev, int force, int delay) 924{ 925 if (fib_sync_down_dev(dev, force)) 926 fib_flush(dev_net(dev)); 927 rt_cache_flush(dev_net(dev), delay); 928 arp_ifdown(dev); 929} 930 931static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr) 932{ 933 struct in_ifaddr *ifa = (struct in_ifaddr *)ptr; 934 struct net_device *dev = ifa->ifa_dev->dev; 935 936 switch (event) { 937 case NETDEV_UP: 938 fib_add_ifaddr(ifa); 939#ifdef CONFIG_IP_ROUTE_MULTIPATH 940 fib_sync_up(dev); 941#endif 942 rt_cache_flush(dev_net(dev), -1); 943 break; 944 case NETDEV_DOWN: 945 fib_del_ifaddr(ifa); 946 if (ifa->ifa_dev->ifa_list == NULL) { 947 /* Last address was deleted from this interface. 948 * Disable IP. 949 */ 950 fib_disable_ip(dev, 1, 0); 951 } else { 952 rt_cache_flush(dev_net(dev), -1); 953 } 954 break; 955 } 956 return NOTIFY_DONE; 957} 958 959static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) 960{ 961 struct net_device *dev = ptr; 962 struct in_device *in_dev = __in_dev_get_rtnl(dev); 963 964 if (event == NETDEV_UNREGISTER) { 965 fib_disable_ip(dev, 2, -1); 966 return NOTIFY_DONE; 967 } 968 969 if (!in_dev) 970 return NOTIFY_DONE; 971 972 switch (event) { 973 case NETDEV_UP: 974 for_ifa(in_dev) { 975 fib_add_ifaddr(ifa); 976 } endfor_ifa(in_dev); 977#ifdef CONFIG_IP_ROUTE_MULTIPATH 978 fib_sync_up(dev); 979#endif 980 rt_cache_flush(dev_net(dev), -1); 981 break; 982 case NETDEV_DOWN: 983 fib_disable_ip(dev, 0, 0); 984 break; 985 case NETDEV_CHANGEMTU: 986 case NETDEV_CHANGE: 987 rt_cache_flush(dev_net(dev), 0); 988 break; 989 case NETDEV_UNREGISTER_BATCH: 990 rt_cache_flush_batch(); 991 break; 992 } 993 return NOTIFY_DONE; 994} 995 996static struct notifier_block fib_inetaddr_notifier = { 997 .notifier_call = fib_inetaddr_event, 998}; 999 1000static struct notifier_block fib_netdev_notifier = { 1001 .notifier_call = fib_netdev_event, 1002}; 1003 1004static int __net_init ip_fib_net_init(struct net *net) 1005{ 1006 int err; 1007 size_t size = sizeof(struct hlist_head) * FIB_TABLE_HASHSZ; 1008 1009 /* Avoid false sharing : Use at least a full cache line */ 1010 size = max_t(size_t, size, L1_CACHE_BYTES); 1011 1012 net->ipv4.fib_table_hash = kzalloc(size, GFP_KERNEL); 1013 if (net->ipv4.fib_table_hash == NULL) 1014 return -ENOMEM; 1015 1016 err = fib4_rules_init(net); 1017 if (err < 0) 1018 goto fail; 1019 return 0; 1020 1021fail: 1022 kfree(net->ipv4.fib_table_hash); 1023 return err; 1024} 1025 1026static void ip_fib_net_exit(struct net *net) 1027{ 1028 unsigned int i; 1029 1030#ifdef CONFIG_IP_MULTIPLE_TABLES 1031 fib4_rules_exit(net); 1032#endif 1033 1034 for (i = 0; i < FIB_TABLE_HASHSZ; i++) { 1035 struct fib_table *tb; 1036 struct hlist_head *head; 1037 struct hlist_node *node, *tmp; 1038 1039 head = &net->ipv4.fib_table_hash[i]; 1040 hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) { 1041 hlist_del(node); 1042 fib_table_flush(tb); 1043 fib_free_table(tb); 1044 } 1045 } 1046 kfree(net->ipv4.fib_table_hash); 1047} 1048 1049static int __net_init fib_net_init(struct net *net) 1050{ 1051 int error; 1052 1053 error = ip_fib_net_init(net); 1054 if (error < 0) 1055 goto out; 1056 error = nl_fib_lookup_init(net); 1057 if (error < 0) 1058 goto out_nlfl; 1059 error = fib_proc_init(net); 1060 if (error < 0) 1061 goto out_proc; 1062out: 1063 return error; 1064 1065out_proc: 1066 nl_fib_lookup_exit(net); 1067out_nlfl: 1068 ip_fib_net_exit(net); 1069 goto out; 1070} 1071 1072static void __net_exit fib_net_exit(struct net *net) 1073{ 1074 fib_proc_exit(net); 1075 nl_fib_lookup_exit(net); 1076 ip_fib_net_exit(net); 1077} 1078 1079static struct pernet_operations fib_net_ops = { 1080 .init = fib_net_init, 1081 .exit = fib_net_exit, 1082}; 1083 1084void __init ip_fib_init(void) 1085{ 1086 rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL); 1087 rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL); 1088 rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib); 1089 1090 register_pernet_subsys(&fib_net_ops); 1091 register_netdevice_notifier(&fib_netdev_notifier); 1092 register_inetaddr_notifier(&fib_inetaddr_notifier); 1093 1094 fib_hash_init(); 1095} 1096