fib_frontend.c revision 4f84d82f7a623f8641af2574425c329431ff158f
1/* 2 * INET An implementation of the TCP/IP protocol suite for the LINUX 3 * operating system. INET is implemented using the BSD Socket 4 * interface as the means of communication with the user level. 5 * 6 * IPv4 Forwarding Information Base: FIB frontend. 7 * 8 * Version: $Id: fib_frontend.c,v 1.26 2001/10/31 21:55:54 davem Exp $ 9 * 10 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 11 * 12 * This program is free software; you can redistribute it and/or 13 * modify it under the terms of the GNU General Public License 14 * as published by the Free Software Foundation; either version 15 * 2 of the License, or (at your option) any later version. 16 */ 17 18#include <linux/module.h> 19#include <asm/uaccess.h> 20#include <asm/system.h> 21#include <linux/bitops.h> 22#include <linux/capability.h> 23#include <linux/types.h> 24#include <linux/kernel.h> 25#include <linux/mm.h> 26#include <linux/string.h> 27#include <linux/socket.h> 28#include <linux/sockios.h> 29#include <linux/errno.h> 30#include <linux/in.h> 31#include <linux/inet.h> 32#include <linux/inetdevice.h> 33#include <linux/netdevice.h> 34#include <linux/if_addr.h> 35#include <linux/if_arp.h> 36#include <linux/skbuff.h> 37#include <linux/init.h> 38#include <linux/list.h> 39 40#include <net/ip.h> 41#include <net/protocol.h> 42#include <net/route.h> 43#include <net/tcp.h> 44#include <net/sock.h> 45#include <net/icmp.h> 46#include <net/arp.h> 47#include <net/ip_fib.h> 48#include <net/rtnetlink.h> 49 50#ifndef CONFIG_IP_MULTIPLE_TABLES 51 52static int __net_init fib4_rules_init(struct net *net) 53{ 54 struct fib_table *local_table, *main_table; 55 56 local_table = fib_hash_table(RT_TABLE_LOCAL); 57 if (local_table == NULL) 58 return -ENOMEM; 59 60 main_table = fib_hash_table(RT_TABLE_MAIN); 61 if (main_table == NULL) 62 goto fail; 63 64 hlist_add_head_rcu(&local_table->tb_hlist, 65 &net->ipv4.fib_table_hash[TABLE_LOCAL_INDEX]); 66 hlist_add_head_rcu(&main_table->tb_hlist, 67 &net->ipv4.fib_table_hash[TABLE_MAIN_INDEX]); 68 return 0; 69 70fail: 71 kfree(local_table); 72 return -ENOMEM; 73} 74#else 75 76struct fib_table *fib_new_table(struct net *net, u32 id) 77{ 78 struct fib_table *tb; 79 unsigned int h; 80 81 if (id == 0) 82 id = RT_TABLE_MAIN; 83 tb = fib_get_table(net, id); 84 if (tb) 85 return tb; 86 87 tb = fib_hash_table(id); 88 if (!tb) 89 return NULL; 90 h = id & (FIB_TABLE_HASHSZ - 1); 91 hlist_add_head_rcu(&tb->tb_hlist, &net->ipv4.fib_table_hash[h]); 92 return tb; 93} 94 95struct fib_table *fib_get_table(struct net *net, u32 id) 96{ 97 struct fib_table *tb; 98 struct hlist_node *node; 99 struct hlist_head *head; 100 unsigned int h; 101 102 if (id == 0) 103 id = RT_TABLE_MAIN; 104 h = id & (FIB_TABLE_HASHSZ - 1); 105 106 rcu_read_lock(); 107 head = &net->ipv4.fib_table_hash[h]; 108 hlist_for_each_entry_rcu(tb, node, head, tb_hlist) { 109 if (tb->tb_id == id) { 110 rcu_read_unlock(); 111 return tb; 112 } 113 } 114 rcu_read_unlock(); 115 return NULL; 116} 117#endif /* CONFIG_IP_MULTIPLE_TABLES */ 118 119static void fib_flush(struct net *net) 120{ 121 int flushed = 0; 122 struct fib_table *tb; 123 struct hlist_node *node; 124 struct hlist_head *head; 125 unsigned int h; 126 127 for (h = 0; h < FIB_TABLE_HASHSZ; h++) { 128 head = &net->ipv4.fib_table_hash[h]; 129 hlist_for_each_entry(tb, node, head, tb_hlist) 130 flushed += tb->tb_flush(tb); 131 } 132 133 if (flushed) 134 rt_cache_flush(-1); 135} 136 137/* 138 * Find the first device with a given source address. 139 */ 140 141struct net_device * ip_dev_find(__be32 addr) 142{ 143 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } }; 144 struct fib_result res; 145 struct net_device *dev = NULL; 146 struct fib_table *local_table; 147 148#ifdef CONFIG_IP_MULTIPLE_TABLES 149 res.r = NULL; 150#endif 151 152 local_table = fib_get_table(&init_net, RT_TABLE_LOCAL); 153 if (!local_table || local_table->tb_lookup(local_table, &fl, &res)) 154 return NULL; 155 if (res.type != RTN_LOCAL) 156 goto out; 157 dev = FIB_RES_DEV(res); 158 159 if (dev) 160 dev_hold(dev); 161out: 162 fib_res_put(&res); 163 return dev; 164} 165 166/* 167 * Find address type as if only "dev" was present in the system. If 168 * on_dev is NULL then all interfaces are taken into consideration. 169 */ 170static inline unsigned __inet_dev_addr_type(struct net *net, 171 const struct net_device *dev, 172 __be32 addr) 173{ 174 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } }; 175 struct fib_result res; 176 unsigned ret = RTN_BROADCAST; 177 struct fib_table *local_table; 178 179 if (ipv4_is_zeronet(addr) || ipv4_is_badclass(addr)) 180 return RTN_BROADCAST; 181 if (ipv4_is_multicast(addr)) 182 return RTN_MULTICAST; 183 184#ifdef CONFIG_IP_MULTIPLE_TABLES 185 res.r = NULL; 186#endif 187 188 local_table = fib_get_table(net, RT_TABLE_LOCAL); 189 if (local_table) { 190 ret = RTN_UNICAST; 191 if (!local_table->tb_lookup(local_table, &fl, &res)) { 192 if (!dev || dev == res.fi->fib_dev) 193 ret = res.type; 194 fib_res_put(&res); 195 } 196 } 197 return ret; 198} 199 200unsigned int inet_addr_type(struct net *net, __be32 addr) 201{ 202 return __inet_dev_addr_type(net, NULL, addr); 203} 204 205unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev, 206 __be32 addr) 207{ 208 return __inet_dev_addr_type(net, dev, addr); 209} 210 211/* Given (packet source, input interface) and optional (dst, oif, tos): 212 - (main) check, that source is valid i.e. not broadcast or our local 213 address. 214 - figure out what "logical" interface this packet arrived 215 and calculate "specific destination" address. 216 - check, that packet arrived from expected physical interface. 217 */ 218 219int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, 220 struct net_device *dev, __be32 *spec_dst, u32 *itag) 221{ 222 struct in_device *in_dev; 223 struct flowi fl = { .nl_u = { .ip4_u = 224 { .daddr = src, 225 .saddr = dst, 226 .tos = tos } }, 227 .iif = oif }; 228 struct fib_result res; 229 int no_addr, rpf; 230 int ret; 231 232 no_addr = rpf = 0; 233 rcu_read_lock(); 234 in_dev = __in_dev_get_rcu(dev); 235 if (in_dev) { 236 no_addr = in_dev->ifa_list == NULL; 237 rpf = IN_DEV_RPFILTER(in_dev); 238 } 239 rcu_read_unlock(); 240 241 if (in_dev == NULL) 242 goto e_inval; 243 244 if (fib_lookup(&fl, &res)) 245 goto last_resort; 246 if (res.type != RTN_UNICAST) 247 goto e_inval_res; 248 *spec_dst = FIB_RES_PREFSRC(res); 249 fib_combine_itag(itag, &res); 250#ifdef CONFIG_IP_ROUTE_MULTIPATH 251 if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1) 252#else 253 if (FIB_RES_DEV(res) == dev) 254#endif 255 { 256 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST; 257 fib_res_put(&res); 258 return ret; 259 } 260 fib_res_put(&res); 261 if (no_addr) 262 goto last_resort; 263 if (rpf) 264 goto e_inval; 265 fl.oif = dev->ifindex; 266 267 ret = 0; 268 if (fib_lookup(&fl, &res) == 0) { 269 if (res.type == RTN_UNICAST) { 270 *spec_dst = FIB_RES_PREFSRC(res); 271 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST; 272 } 273 fib_res_put(&res); 274 } 275 return ret; 276 277last_resort: 278 if (rpf) 279 goto e_inval; 280 *spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE); 281 *itag = 0; 282 return 0; 283 284e_inval_res: 285 fib_res_put(&res); 286e_inval: 287 return -EINVAL; 288} 289 290static inline __be32 sk_extract_addr(struct sockaddr *addr) 291{ 292 return ((struct sockaddr_in *) addr)->sin_addr.s_addr; 293} 294 295static int put_rtax(struct nlattr *mx, int len, int type, u32 value) 296{ 297 struct nlattr *nla; 298 299 nla = (struct nlattr *) ((char *) mx + len); 300 nla->nla_type = type; 301 nla->nla_len = nla_attr_size(4); 302 *(u32 *) nla_data(nla) = value; 303 304 return len + nla_total_size(4); 305} 306 307static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt, 308 struct fib_config *cfg) 309{ 310 __be32 addr; 311 int plen; 312 313 memset(cfg, 0, sizeof(*cfg)); 314 cfg->fc_nlinfo.nl_net = net; 315 316 if (rt->rt_dst.sa_family != AF_INET) 317 return -EAFNOSUPPORT; 318 319 /* 320 * Check mask for validity: 321 * a) it must be contiguous. 322 * b) destination must have all host bits clear. 323 * c) if application forgot to set correct family (AF_INET), 324 * reject request unless it is absolutely clear i.e. 325 * both family and mask are zero. 326 */ 327 plen = 32; 328 addr = sk_extract_addr(&rt->rt_dst); 329 if (!(rt->rt_flags & RTF_HOST)) { 330 __be32 mask = sk_extract_addr(&rt->rt_genmask); 331 332 if (rt->rt_genmask.sa_family != AF_INET) { 333 if (mask || rt->rt_genmask.sa_family) 334 return -EAFNOSUPPORT; 335 } 336 337 if (bad_mask(mask, addr)) 338 return -EINVAL; 339 340 plen = inet_mask_len(mask); 341 } 342 343 cfg->fc_dst_len = plen; 344 cfg->fc_dst = addr; 345 346 if (cmd != SIOCDELRT) { 347 cfg->fc_nlflags = NLM_F_CREATE; 348 cfg->fc_protocol = RTPROT_BOOT; 349 } 350 351 if (rt->rt_metric) 352 cfg->fc_priority = rt->rt_metric - 1; 353 354 if (rt->rt_flags & RTF_REJECT) { 355 cfg->fc_scope = RT_SCOPE_HOST; 356 cfg->fc_type = RTN_UNREACHABLE; 357 return 0; 358 } 359 360 cfg->fc_scope = RT_SCOPE_NOWHERE; 361 cfg->fc_type = RTN_UNICAST; 362 363 if (rt->rt_dev) { 364 char *colon; 365 struct net_device *dev; 366 char devname[IFNAMSIZ]; 367 368 if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1)) 369 return -EFAULT; 370 371 devname[IFNAMSIZ-1] = 0; 372 colon = strchr(devname, ':'); 373 if (colon) 374 *colon = 0; 375 dev = __dev_get_by_name(net, devname); 376 if (!dev) 377 return -ENODEV; 378 cfg->fc_oif = dev->ifindex; 379 if (colon) { 380 struct in_ifaddr *ifa; 381 struct in_device *in_dev = __in_dev_get_rtnl(dev); 382 if (!in_dev) 383 return -ENODEV; 384 *colon = ':'; 385 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) 386 if (strcmp(ifa->ifa_label, devname) == 0) 387 break; 388 if (ifa == NULL) 389 return -ENODEV; 390 cfg->fc_prefsrc = ifa->ifa_local; 391 } 392 } 393 394 addr = sk_extract_addr(&rt->rt_gateway); 395 if (rt->rt_gateway.sa_family == AF_INET && addr) { 396 cfg->fc_gw = addr; 397 if (rt->rt_flags & RTF_GATEWAY && 398 inet_addr_type(net, addr) == RTN_UNICAST) 399 cfg->fc_scope = RT_SCOPE_UNIVERSE; 400 } 401 402 if (cmd == SIOCDELRT) 403 return 0; 404 405 if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw) 406 return -EINVAL; 407 408 if (cfg->fc_scope == RT_SCOPE_NOWHERE) 409 cfg->fc_scope = RT_SCOPE_LINK; 410 411 if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) { 412 struct nlattr *mx; 413 int len = 0; 414 415 mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL); 416 if (mx == NULL) 417 return -ENOMEM; 418 419 if (rt->rt_flags & RTF_MTU) 420 len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40); 421 422 if (rt->rt_flags & RTF_WINDOW) 423 len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window); 424 425 if (rt->rt_flags & RTF_IRTT) 426 len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3); 427 428 cfg->fc_mx = mx; 429 cfg->fc_mx_len = len; 430 } 431 432 return 0; 433} 434 435/* 436 * Handle IP routing ioctl calls. These are used to manipulate the routing tables 437 */ 438 439int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg) 440{ 441 struct fib_config cfg; 442 struct rtentry rt; 443 int err; 444 445 switch (cmd) { 446 case SIOCADDRT: /* Add a route */ 447 case SIOCDELRT: /* Delete a route */ 448 if (!capable(CAP_NET_ADMIN)) 449 return -EPERM; 450 451 if (copy_from_user(&rt, arg, sizeof(rt))) 452 return -EFAULT; 453 454 rtnl_lock(); 455 err = rtentry_to_fib_config(net, cmd, &rt, &cfg); 456 if (err == 0) { 457 struct fib_table *tb; 458 459 if (cmd == SIOCDELRT) { 460 tb = fib_get_table(net, cfg.fc_table); 461 if (tb) 462 err = tb->tb_delete(tb, &cfg); 463 else 464 err = -ESRCH; 465 } else { 466 tb = fib_new_table(net, cfg.fc_table); 467 if (tb) 468 err = tb->tb_insert(tb, &cfg); 469 else 470 err = -ENOBUFS; 471 } 472 473 /* allocated by rtentry_to_fib_config() */ 474 kfree(cfg.fc_mx); 475 } 476 rtnl_unlock(); 477 return err; 478 } 479 return -EINVAL; 480} 481 482const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = { 483 [RTA_DST] = { .type = NLA_U32 }, 484 [RTA_SRC] = { .type = NLA_U32 }, 485 [RTA_IIF] = { .type = NLA_U32 }, 486 [RTA_OIF] = { .type = NLA_U32 }, 487 [RTA_GATEWAY] = { .type = NLA_U32 }, 488 [RTA_PRIORITY] = { .type = NLA_U32 }, 489 [RTA_PREFSRC] = { .type = NLA_U32 }, 490 [RTA_METRICS] = { .type = NLA_NESTED }, 491 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) }, 492 [RTA_PROTOINFO] = { .type = NLA_U32 }, 493 [RTA_FLOW] = { .type = NLA_U32 }, 494}; 495 496static int rtm_to_fib_config(struct net *net, struct sk_buff *skb, 497 struct nlmsghdr *nlh, struct fib_config *cfg) 498{ 499 struct nlattr *attr; 500 int err, remaining; 501 struct rtmsg *rtm; 502 503 err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy); 504 if (err < 0) 505 goto errout; 506 507 memset(cfg, 0, sizeof(*cfg)); 508 509 rtm = nlmsg_data(nlh); 510 cfg->fc_dst_len = rtm->rtm_dst_len; 511 cfg->fc_tos = rtm->rtm_tos; 512 cfg->fc_table = rtm->rtm_table; 513 cfg->fc_protocol = rtm->rtm_protocol; 514 cfg->fc_scope = rtm->rtm_scope; 515 cfg->fc_type = rtm->rtm_type; 516 cfg->fc_flags = rtm->rtm_flags; 517 cfg->fc_nlflags = nlh->nlmsg_flags; 518 519 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid; 520 cfg->fc_nlinfo.nlh = nlh; 521 cfg->fc_nlinfo.nl_net = net; 522 523 if (cfg->fc_type > RTN_MAX) { 524 err = -EINVAL; 525 goto errout; 526 } 527 528 nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) { 529 switch (nla_type(attr)) { 530 case RTA_DST: 531 cfg->fc_dst = nla_get_be32(attr); 532 break; 533 case RTA_OIF: 534 cfg->fc_oif = nla_get_u32(attr); 535 break; 536 case RTA_GATEWAY: 537 cfg->fc_gw = nla_get_be32(attr); 538 break; 539 case RTA_PRIORITY: 540 cfg->fc_priority = nla_get_u32(attr); 541 break; 542 case RTA_PREFSRC: 543 cfg->fc_prefsrc = nla_get_be32(attr); 544 break; 545 case RTA_METRICS: 546 cfg->fc_mx = nla_data(attr); 547 cfg->fc_mx_len = nla_len(attr); 548 break; 549 case RTA_MULTIPATH: 550 cfg->fc_mp = nla_data(attr); 551 cfg->fc_mp_len = nla_len(attr); 552 break; 553 case RTA_FLOW: 554 cfg->fc_flow = nla_get_u32(attr); 555 break; 556 case RTA_TABLE: 557 cfg->fc_table = nla_get_u32(attr); 558 break; 559 } 560 } 561 562 return 0; 563errout: 564 return err; 565} 566 567static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) 568{ 569 struct net *net = skb->sk->sk_net; 570 struct fib_config cfg; 571 struct fib_table *tb; 572 int err; 573 574 err = rtm_to_fib_config(net, skb, nlh, &cfg); 575 if (err < 0) 576 goto errout; 577 578 tb = fib_get_table(net, cfg.fc_table); 579 if (tb == NULL) { 580 err = -ESRCH; 581 goto errout; 582 } 583 584 err = tb->tb_delete(tb, &cfg); 585errout: 586 return err; 587} 588 589static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) 590{ 591 struct net *net = skb->sk->sk_net; 592 struct fib_config cfg; 593 struct fib_table *tb; 594 int err; 595 596 err = rtm_to_fib_config(net, skb, nlh, &cfg); 597 if (err < 0) 598 goto errout; 599 600 tb = fib_new_table(net, cfg.fc_table); 601 if (tb == NULL) { 602 err = -ENOBUFS; 603 goto errout; 604 } 605 606 err = tb->tb_insert(tb, &cfg); 607errout: 608 return err; 609} 610 611static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) 612{ 613 struct net *net = skb->sk->sk_net; 614 unsigned int h, s_h; 615 unsigned int e = 0, s_e; 616 struct fib_table *tb; 617 struct hlist_node *node; 618 struct hlist_head *head; 619 int dumped = 0; 620 621 if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) && 622 ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED) 623 return ip_rt_dump(skb, cb); 624 625 s_h = cb->args[0]; 626 s_e = cb->args[1]; 627 628 for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) { 629 e = 0; 630 head = &net->ipv4.fib_table_hash[h]; 631 hlist_for_each_entry(tb, node, head, tb_hlist) { 632 if (e < s_e) 633 goto next; 634 if (dumped) 635 memset(&cb->args[2], 0, sizeof(cb->args) - 636 2 * sizeof(cb->args[0])); 637 if (tb->tb_dump(tb, skb, cb) < 0) 638 goto out; 639 dumped = 1; 640next: 641 e++; 642 } 643 } 644out: 645 cb->args[1] = e; 646 cb->args[0] = h; 647 648 return skb->len; 649} 650 651/* Prepare and feed intra-kernel routing request. 652 Really, it should be netlink message, but :-( netlink 653 can be not configured, so that we feed it directly 654 to fib engine. It is legal, because all events occur 655 only when netlink is already locked. 656 */ 657 658static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa) 659{ 660 struct net *net = ifa->ifa_dev->dev->nd_net; 661 struct fib_table *tb; 662 struct fib_config cfg = { 663 .fc_protocol = RTPROT_KERNEL, 664 .fc_type = type, 665 .fc_dst = dst, 666 .fc_dst_len = dst_len, 667 .fc_prefsrc = ifa->ifa_local, 668 .fc_oif = ifa->ifa_dev->dev->ifindex, 669 .fc_nlflags = NLM_F_CREATE | NLM_F_APPEND, 670 .fc_nlinfo = { 671 .nl_net = net, 672 }, 673 }; 674 675 if (type == RTN_UNICAST) 676 tb = fib_new_table(net, RT_TABLE_MAIN); 677 else 678 tb = fib_new_table(net, RT_TABLE_LOCAL); 679 680 if (tb == NULL) 681 return; 682 683 cfg.fc_table = tb->tb_id; 684 685 if (type != RTN_LOCAL) 686 cfg.fc_scope = RT_SCOPE_LINK; 687 else 688 cfg.fc_scope = RT_SCOPE_HOST; 689 690 if (cmd == RTM_NEWROUTE) 691 tb->tb_insert(tb, &cfg); 692 else 693 tb->tb_delete(tb, &cfg); 694} 695 696void fib_add_ifaddr(struct in_ifaddr *ifa) 697{ 698 struct in_device *in_dev = ifa->ifa_dev; 699 struct net_device *dev = in_dev->dev; 700 struct in_ifaddr *prim = ifa; 701 __be32 mask = ifa->ifa_mask; 702 __be32 addr = ifa->ifa_local; 703 __be32 prefix = ifa->ifa_address&mask; 704 705 if (ifa->ifa_flags&IFA_F_SECONDARY) { 706 prim = inet_ifa_byprefix(in_dev, prefix, mask); 707 if (prim == NULL) { 708 printk(KERN_WARNING "fib_add_ifaddr: bug: prim == NULL\n"); 709 return; 710 } 711 } 712 713 fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim); 714 715 if (!(dev->flags&IFF_UP)) 716 return; 717 718 /* Add broadcast address, if it is explicitly assigned. */ 719 if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF)) 720 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim); 721 722 if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) && 723 (prefix != addr || ifa->ifa_prefixlen < 32)) { 724 fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL : 725 RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim); 726 727 /* Add network specific broadcasts, when it takes a sense */ 728 if (ifa->ifa_prefixlen < 31) { 729 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim); 730 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim); 731 } 732 } 733} 734 735static void fib_del_ifaddr(struct in_ifaddr *ifa) 736{ 737 struct in_device *in_dev = ifa->ifa_dev; 738 struct net_device *dev = in_dev->dev; 739 struct in_ifaddr *ifa1; 740 struct in_ifaddr *prim = ifa; 741 __be32 brd = ifa->ifa_address|~ifa->ifa_mask; 742 __be32 any = ifa->ifa_address&ifa->ifa_mask; 743#define LOCAL_OK 1 744#define BRD_OK 2 745#define BRD0_OK 4 746#define BRD1_OK 8 747 unsigned ok = 0; 748 749 if (!(ifa->ifa_flags&IFA_F_SECONDARY)) 750 fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL : 751 RTN_UNICAST, any, ifa->ifa_prefixlen, prim); 752 else { 753 prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask); 754 if (prim == NULL) { 755 printk(KERN_WARNING "fib_del_ifaddr: bug: prim == NULL\n"); 756 return; 757 } 758 } 759 760 /* Deletion is more complicated than add. 761 We should take care of not to delete too much :-) 762 763 Scan address list to be sure that addresses are really gone. 764 */ 765 766 for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) { 767 if (ifa->ifa_local == ifa1->ifa_local) 768 ok |= LOCAL_OK; 769 if (ifa->ifa_broadcast == ifa1->ifa_broadcast) 770 ok |= BRD_OK; 771 if (brd == ifa1->ifa_broadcast) 772 ok |= BRD1_OK; 773 if (any == ifa1->ifa_broadcast) 774 ok |= BRD0_OK; 775 } 776 777 if (!(ok&BRD_OK)) 778 fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim); 779 if (!(ok&BRD1_OK)) 780 fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim); 781 if (!(ok&BRD0_OK)) 782 fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim); 783 if (!(ok&LOCAL_OK)) { 784 fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim); 785 786 /* Check, that this local address finally disappeared. */ 787 if (inet_addr_type(dev->nd_net, ifa->ifa_local) != RTN_LOCAL) { 788 /* And the last, but not the least thing. 789 We must flush stray FIB entries. 790 791 First of all, we scan fib_info list searching 792 for stray nexthop entries, then ignite fib_flush. 793 */ 794 if (fib_sync_down(ifa->ifa_local, NULL, 0)) 795 fib_flush(dev->nd_net); 796 } 797 } 798#undef LOCAL_OK 799#undef BRD_OK 800#undef BRD0_OK 801#undef BRD1_OK 802} 803 804static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb ) 805{ 806 807 struct fib_result res; 808 struct flowi fl = { .mark = frn->fl_mark, 809 .nl_u = { .ip4_u = { .daddr = frn->fl_addr, 810 .tos = frn->fl_tos, 811 .scope = frn->fl_scope } } }; 812 813#ifdef CONFIG_IP_MULTIPLE_TABLES 814 res.r = NULL; 815#endif 816 817 frn->err = -ENOENT; 818 if (tb) { 819 local_bh_disable(); 820 821 frn->tb_id = tb->tb_id; 822 frn->err = tb->tb_lookup(tb, &fl, &res); 823 824 if (!frn->err) { 825 frn->prefixlen = res.prefixlen; 826 frn->nh_sel = res.nh_sel; 827 frn->type = res.type; 828 frn->scope = res.scope; 829 fib_res_put(&res); 830 } 831 local_bh_enable(); 832 } 833} 834 835static void nl_fib_input(struct sk_buff *skb) 836{ 837 struct net *net; 838 struct fib_result_nl *frn; 839 struct nlmsghdr *nlh; 840 struct fib_table *tb; 841 u32 pid; 842 843 net = skb->sk->sk_net; 844 nlh = nlmsg_hdr(skb); 845 if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len || 846 nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn))) 847 return; 848 849 skb = skb_clone(skb, GFP_KERNEL); 850 if (skb == NULL) 851 return; 852 nlh = nlmsg_hdr(skb); 853 854 frn = (struct fib_result_nl *) NLMSG_DATA(nlh); 855 tb = fib_get_table(net, frn->tb_id_in); 856 857 nl_fib_lookup(frn, tb); 858 859 pid = NETLINK_CB(skb).pid; /* pid of sending process */ 860 NETLINK_CB(skb).pid = 0; /* from kernel */ 861 NETLINK_CB(skb).dst_group = 0; /* unicast */ 862 netlink_unicast(net->ipv4.fibnl, skb, pid, MSG_DONTWAIT); 863} 864 865static int nl_fib_lookup_init(struct net *net) 866{ 867 struct sock *sk; 868 sk = netlink_kernel_create(net, NETLINK_FIB_LOOKUP, 0, 869 nl_fib_input, NULL, THIS_MODULE); 870 if (sk == NULL) 871 return -EAFNOSUPPORT; 872 /* Don't hold an extra reference on the namespace */ 873 put_net(sk->sk_net); 874 net->ipv4.fibnl = sk; 875 return 0; 876} 877 878static void nl_fib_lookup_exit(struct net *net) 879{ 880 /* At the last minute lie and say this is a socket for the 881 * initial network namespace. So the socket will be safe to free. 882 */ 883 net->ipv4.fibnl->sk_net = get_net(&init_net); 884 sock_release(net->ipv4.fibnl->sk_socket); 885} 886 887static void fib_disable_ip(struct net_device *dev, int force) 888{ 889 if (fib_sync_down(0, dev, force)) 890 fib_flush(dev->nd_net); 891 rt_cache_flush(0); 892 arp_ifdown(dev); 893} 894 895static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr) 896{ 897 struct in_ifaddr *ifa = (struct in_ifaddr*)ptr; 898 899 switch (event) { 900 case NETDEV_UP: 901 fib_add_ifaddr(ifa); 902#ifdef CONFIG_IP_ROUTE_MULTIPATH 903 fib_sync_up(ifa->ifa_dev->dev); 904#endif 905 rt_cache_flush(-1); 906 break; 907 case NETDEV_DOWN: 908 fib_del_ifaddr(ifa); 909 if (ifa->ifa_dev->ifa_list == NULL) { 910 /* Last address was deleted from this interface. 911 Disable IP. 912 */ 913 fib_disable_ip(ifa->ifa_dev->dev, 1); 914 } else { 915 rt_cache_flush(-1); 916 } 917 break; 918 } 919 return NOTIFY_DONE; 920} 921 922static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) 923{ 924 struct net_device *dev = ptr; 925 struct in_device *in_dev = __in_dev_get_rtnl(dev); 926 927 if (event == NETDEV_UNREGISTER) { 928 fib_disable_ip(dev, 2); 929 return NOTIFY_DONE; 930 } 931 932 if (!in_dev) 933 return NOTIFY_DONE; 934 935 switch (event) { 936 case NETDEV_UP: 937 for_ifa(in_dev) { 938 fib_add_ifaddr(ifa); 939 } endfor_ifa(in_dev); 940#ifdef CONFIG_IP_ROUTE_MULTIPATH 941 fib_sync_up(dev); 942#endif 943 rt_cache_flush(-1); 944 break; 945 case NETDEV_DOWN: 946 fib_disable_ip(dev, 0); 947 break; 948 case NETDEV_CHANGEMTU: 949 case NETDEV_CHANGE: 950 rt_cache_flush(0); 951 break; 952 } 953 return NOTIFY_DONE; 954} 955 956static struct notifier_block fib_inetaddr_notifier = { 957 .notifier_call =fib_inetaddr_event, 958}; 959 960static struct notifier_block fib_netdev_notifier = { 961 .notifier_call =fib_netdev_event, 962}; 963 964static int __net_init ip_fib_net_init(struct net *net) 965{ 966 unsigned int i; 967 968 net->ipv4.fib_table_hash = kzalloc( 969 sizeof(struct hlist_head)*FIB_TABLE_HASHSZ, GFP_KERNEL); 970 if (net->ipv4.fib_table_hash == NULL) 971 return -ENOMEM; 972 973 for (i = 0; i < FIB_TABLE_HASHSZ; i++) 974 INIT_HLIST_HEAD(&net->ipv4.fib_table_hash[i]); 975 976 return fib4_rules_init(net); 977} 978 979static void __net_exit ip_fib_net_exit(struct net *net) 980{ 981 unsigned int i; 982 983#ifdef CONFIG_IP_MULTIPLE_TABLES 984 fib4_rules_exit(net); 985#endif 986 987 for (i = 0; i < FIB_TABLE_HASHSZ; i++) { 988 struct fib_table *tb; 989 struct hlist_head *head; 990 struct hlist_node *node, *tmp; 991 992 head = &net->ipv4.fib_table_hash[i]; 993 hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) { 994 hlist_del(node); 995 tb->tb_flush(tb); 996 kfree(tb); 997 } 998 } 999 kfree(net->ipv4.fib_table_hash); 1000} 1001 1002static int __net_init fib_net_init(struct net *net) 1003{ 1004 int error; 1005 1006 error = ip_fib_net_init(net); 1007 if (error < 0) 1008 goto out; 1009 error = nl_fib_lookup_init(net); 1010 if (error < 0) 1011 goto out_nlfl; 1012 error = fib_proc_init(net); 1013 if (error < 0) 1014 goto out_proc; 1015out: 1016 return error; 1017 1018out_proc: 1019 nl_fib_lookup_exit(net); 1020out_nlfl: 1021 ip_fib_net_exit(net); 1022 goto out; 1023} 1024 1025static void __net_exit fib_net_exit(struct net *net) 1026{ 1027 fib_proc_exit(net); 1028 nl_fib_lookup_exit(net); 1029 ip_fib_net_exit(net); 1030} 1031 1032static struct pernet_operations fib_net_ops = { 1033 .init = fib_net_init, 1034 .exit = fib_net_exit, 1035}; 1036 1037void __init ip_fib_init(void) 1038{ 1039 rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL); 1040 rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL); 1041 rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib); 1042 1043 register_pernet_subsys(&fib_net_ops); 1044 register_netdevice_notifier(&fib_netdev_notifier); 1045 register_inetaddr_notifier(&fib_inetaddr_notifier); 1046 1047 fib_hash_init(); 1048} 1049 1050EXPORT_SYMBOL(inet_addr_type); 1051EXPORT_SYMBOL(inet_dev_addr_type); 1052EXPORT_SYMBOL(ip_dev_find); 1053