fib_frontend.c revision a6db9010922f2c02db2bbea8c17c50e451be38d9
1/* 2 * INET An implementation of the TCP/IP protocol suite for the LINUX 3 * operating system. INET is implemented using the BSD Socket 4 * interface as the means of communication with the user level. 5 * 6 * IPv4 Forwarding Information Base: FIB frontend. 7 * 8 * Version: $Id: fib_frontend.c,v 1.26 2001/10/31 21:55:54 davem Exp $ 9 * 10 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 11 * 12 * This program is free software; you can redistribute it and/or 13 * modify it under the terms of the GNU General Public License 14 * as published by the Free Software Foundation; either version 15 * 2 of the License, or (at your option) any later version. 16 */ 17 18#include <linux/module.h> 19#include <asm/uaccess.h> 20#include <asm/system.h> 21#include <linux/bitops.h> 22#include <linux/capability.h> 23#include <linux/types.h> 24#include <linux/kernel.h> 25#include <linux/mm.h> 26#include <linux/string.h> 27#include <linux/socket.h> 28#include <linux/sockios.h> 29#include <linux/errno.h> 30#include <linux/in.h> 31#include <linux/inet.h> 32#include <linux/inetdevice.h> 33#include <linux/netdevice.h> 34#include <linux/if_addr.h> 35#include <linux/if_arp.h> 36#include <linux/skbuff.h> 37#include <linux/init.h> 38#include <linux/list.h> 39 40#include <net/ip.h> 41#include <net/protocol.h> 42#include <net/route.h> 43#include <net/tcp.h> 44#include <net/sock.h> 45#include <net/icmp.h> 46#include <net/arp.h> 47#include <net/ip_fib.h> 48#include <net/rtnetlink.h> 49 50#ifndef CONFIG_IP_MULTIPLE_TABLES 51 52static int __net_init fib4_rules_init(struct net *net) 53{ 54 struct fib_table *local_table, *main_table; 55 56 local_table = fib_hash_init(RT_TABLE_LOCAL); 57 if (local_table == NULL) 58 return -ENOMEM; 59 60 main_table = fib_hash_init(RT_TABLE_MAIN); 61 if (main_table == NULL) 62 goto fail; 63 64 hlist_add_head_rcu(&local_table->tb_hlist, 65 &net->ipv4.fib_table_hash[TABLE_LOCAL_INDEX]); 66 hlist_add_head_rcu(&main_table->tb_hlist, 67 &net->ipv4.fib_table_hash[TABLE_MAIN_INDEX]); 68 return 0; 69 70fail: 71 kfree(local_table); 72 return -ENOMEM; 73} 74#else 75 76struct fib_table *fib_new_table(struct net *net, u32 id) 77{ 78 struct fib_table *tb; 79 unsigned int h; 80 81 if (id == 0) 82 id = RT_TABLE_MAIN; 83 tb = fib_get_table(net, id); 84 if (tb) 85 return tb; 86 tb = fib_hash_init(id); 87 if (!tb) 88 return NULL; 89 h = id & (FIB_TABLE_HASHSZ - 1); 90 hlist_add_head_rcu(&tb->tb_hlist, &net->ipv4.fib_table_hash[h]); 91 return tb; 92} 93 94struct fib_table *fib_get_table(struct net *net, u32 id) 95{ 96 struct fib_table *tb; 97 struct hlist_node *node; 98 struct hlist_head *head; 99 unsigned int h; 100 101 if (id == 0) 102 id = RT_TABLE_MAIN; 103 h = id & (FIB_TABLE_HASHSZ - 1); 104 105 rcu_read_lock(); 106 head = &net->ipv4.fib_table_hash[h]; 107 hlist_for_each_entry_rcu(tb, node, head, tb_hlist) { 108 if (tb->tb_id == id) { 109 rcu_read_unlock(); 110 return tb; 111 } 112 } 113 rcu_read_unlock(); 114 return NULL; 115} 116#endif /* CONFIG_IP_MULTIPLE_TABLES */ 117 118static void fib_flush(struct net *net) 119{ 120 int flushed = 0; 121 struct fib_table *tb; 122 struct hlist_node *node; 123 struct hlist_head *head; 124 unsigned int h; 125 126 for (h = 0; h < FIB_TABLE_HASHSZ; h++) { 127 head = &net->ipv4.fib_table_hash[h]; 128 hlist_for_each_entry(tb, node, head, tb_hlist) 129 flushed += tb->tb_flush(tb); 130 } 131 132 if (flushed) 133 rt_cache_flush(-1); 134} 135 136/* 137 * Find the first device with a given source address. 138 */ 139 140struct net_device * ip_dev_find(__be32 addr) 141{ 142 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } }; 143 struct fib_result res; 144 struct net_device *dev = NULL; 145 struct fib_table *local_table; 146 147#ifdef CONFIG_IP_MULTIPLE_TABLES 148 res.r = NULL; 149#endif 150 151 local_table = fib_get_table(&init_net, RT_TABLE_LOCAL); 152 if (!local_table || local_table->tb_lookup(local_table, &fl, &res)) 153 return NULL; 154 if (res.type != RTN_LOCAL) 155 goto out; 156 dev = FIB_RES_DEV(res); 157 158 if (dev) 159 dev_hold(dev); 160out: 161 fib_res_put(&res); 162 return dev; 163} 164 165/* 166 * Find address type as if only "dev" was present in the system. If 167 * on_dev is NULL then all interfaces are taken into consideration. 168 */ 169static inline unsigned __inet_dev_addr_type(struct net *net, 170 const struct net_device *dev, 171 __be32 addr) 172{ 173 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } }; 174 struct fib_result res; 175 unsigned ret = RTN_BROADCAST; 176 struct fib_table *local_table; 177 178 if (ipv4_is_zeronet(addr) || ipv4_is_badclass(addr)) 179 return RTN_BROADCAST; 180 if (ipv4_is_multicast(addr)) 181 return RTN_MULTICAST; 182 183#ifdef CONFIG_IP_MULTIPLE_TABLES 184 res.r = NULL; 185#endif 186 187 local_table = fib_get_table(net, RT_TABLE_LOCAL); 188 if (local_table) { 189 ret = RTN_UNICAST; 190 if (!local_table->tb_lookup(local_table, &fl, &res)) { 191 if (!dev || dev == res.fi->fib_dev) 192 ret = res.type; 193 fib_res_put(&res); 194 } 195 } 196 return ret; 197} 198 199unsigned int inet_addr_type(struct net *net, __be32 addr) 200{ 201 return __inet_dev_addr_type(net, NULL, addr); 202} 203 204unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev, 205 __be32 addr) 206{ 207 return __inet_dev_addr_type(net, dev, addr); 208} 209 210/* Given (packet source, input interface) and optional (dst, oif, tos): 211 - (main) check, that source is valid i.e. not broadcast or our local 212 address. 213 - figure out what "logical" interface this packet arrived 214 and calculate "specific destination" address. 215 - check, that packet arrived from expected physical interface. 216 */ 217 218int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, 219 struct net_device *dev, __be32 *spec_dst, u32 *itag) 220{ 221 struct in_device *in_dev; 222 struct flowi fl = { .nl_u = { .ip4_u = 223 { .daddr = src, 224 .saddr = dst, 225 .tos = tos } }, 226 .iif = oif }; 227 struct fib_result res; 228 int no_addr, rpf; 229 int ret; 230 231 no_addr = rpf = 0; 232 rcu_read_lock(); 233 in_dev = __in_dev_get_rcu(dev); 234 if (in_dev) { 235 no_addr = in_dev->ifa_list == NULL; 236 rpf = IN_DEV_RPFILTER(in_dev); 237 } 238 rcu_read_unlock(); 239 240 if (in_dev == NULL) 241 goto e_inval; 242 243 if (fib_lookup(&fl, &res)) 244 goto last_resort; 245 if (res.type != RTN_UNICAST) 246 goto e_inval_res; 247 *spec_dst = FIB_RES_PREFSRC(res); 248 fib_combine_itag(itag, &res); 249#ifdef CONFIG_IP_ROUTE_MULTIPATH 250 if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1) 251#else 252 if (FIB_RES_DEV(res) == dev) 253#endif 254 { 255 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST; 256 fib_res_put(&res); 257 return ret; 258 } 259 fib_res_put(&res); 260 if (no_addr) 261 goto last_resort; 262 if (rpf) 263 goto e_inval; 264 fl.oif = dev->ifindex; 265 266 ret = 0; 267 if (fib_lookup(&fl, &res) == 0) { 268 if (res.type == RTN_UNICAST) { 269 *spec_dst = FIB_RES_PREFSRC(res); 270 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST; 271 } 272 fib_res_put(&res); 273 } 274 return ret; 275 276last_resort: 277 if (rpf) 278 goto e_inval; 279 *spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE); 280 *itag = 0; 281 return 0; 282 283e_inval_res: 284 fib_res_put(&res); 285e_inval: 286 return -EINVAL; 287} 288 289static inline __be32 sk_extract_addr(struct sockaddr *addr) 290{ 291 return ((struct sockaddr_in *) addr)->sin_addr.s_addr; 292} 293 294static int put_rtax(struct nlattr *mx, int len, int type, u32 value) 295{ 296 struct nlattr *nla; 297 298 nla = (struct nlattr *) ((char *) mx + len); 299 nla->nla_type = type; 300 nla->nla_len = nla_attr_size(4); 301 *(u32 *) nla_data(nla) = value; 302 303 return len + nla_total_size(4); 304} 305 306static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt, 307 struct fib_config *cfg) 308{ 309 __be32 addr; 310 int plen; 311 312 memset(cfg, 0, sizeof(*cfg)); 313 cfg->fc_nlinfo.nl_net = net; 314 315 if (rt->rt_dst.sa_family != AF_INET) 316 return -EAFNOSUPPORT; 317 318 /* 319 * Check mask for validity: 320 * a) it must be contiguous. 321 * b) destination must have all host bits clear. 322 * c) if application forgot to set correct family (AF_INET), 323 * reject request unless it is absolutely clear i.e. 324 * both family and mask are zero. 325 */ 326 plen = 32; 327 addr = sk_extract_addr(&rt->rt_dst); 328 if (!(rt->rt_flags & RTF_HOST)) { 329 __be32 mask = sk_extract_addr(&rt->rt_genmask); 330 331 if (rt->rt_genmask.sa_family != AF_INET) { 332 if (mask || rt->rt_genmask.sa_family) 333 return -EAFNOSUPPORT; 334 } 335 336 if (bad_mask(mask, addr)) 337 return -EINVAL; 338 339 plen = inet_mask_len(mask); 340 } 341 342 cfg->fc_dst_len = plen; 343 cfg->fc_dst = addr; 344 345 if (cmd != SIOCDELRT) { 346 cfg->fc_nlflags = NLM_F_CREATE; 347 cfg->fc_protocol = RTPROT_BOOT; 348 } 349 350 if (rt->rt_metric) 351 cfg->fc_priority = rt->rt_metric - 1; 352 353 if (rt->rt_flags & RTF_REJECT) { 354 cfg->fc_scope = RT_SCOPE_HOST; 355 cfg->fc_type = RTN_UNREACHABLE; 356 return 0; 357 } 358 359 cfg->fc_scope = RT_SCOPE_NOWHERE; 360 cfg->fc_type = RTN_UNICAST; 361 362 if (rt->rt_dev) { 363 char *colon; 364 struct net_device *dev; 365 char devname[IFNAMSIZ]; 366 367 if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1)) 368 return -EFAULT; 369 370 devname[IFNAMSIZ-1] = 0; 371 colon = strchr(devname, ':'); 372 if (colon) 373 *colon = 0; 374 dev = __dev_get_by_name(net, devname); 375 if (!dev) 376 return -ENODEV; 377 cfg->fc_oif = dev->ifindex; 378 if (colon) { 379 struct in_ifaddr *ifa; 380 struct in_device *in_dev = __in_dev_get_rtnl(dev); 381 if (!in_dev) 382 return -ENODEV; 383 *colon = ':'; 384 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) 385 if (strcmp(ifa->ifa_label, devname) == 0) 386 break; 387 if (ifa == NULL) 388 return -ENODEV; 389 cfg->fc_prefsrc = ifa->ifa_local; 390 } 391 } 392 393 addr = sk_extract_addr(&rt->rt_gateway); 394 if (rt->rt_gateway.sa_family == AF_INET && addr) { 395 cfg->fc_gw = addr; 396 if (rt->rt_flags & RTF_GATEWAY && 397 inet_addr_type(net, addr) == RTN_UNICAST) 398 cfg->fc_scope = RT_SCOPE_UNIVERSE; 399 } 400 401 if (cmd == SIOCDELRT) 402 return 0; 403 404 if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw) 405 return -EINVAL; 406 407 if (cfg->fc_scope == RT_SCOPE_NOWHERE) 408 cfg->fc_scope = RT_SCOPE_LINK; 409 410 if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) { 411 struct nlattr *mx; 412 int len = 0; 413 414 mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL); 415 if (mx == NULL) 416 return -ENOMEM; 417 418 if (rt->rt_flags & RTF_MTU) 419 len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40); 420 421 if (rt->rt_flags & RTF_WINDOW) 422 len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window); 423 424 if (rt->rt_flags & RTF_IRTT) 425 len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3); 426 427 cfg->fc_mx = mx; 428 cfg->fc_mx_len = len; 429 } 430 431 return 0; 432} 433 434/* 435 * Handle IP routing ioctl calls. These are used to manipulate the routing tables 436 */ 437 438int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg) 439{ 440 struct fib_config cfg; 441 struct rtentry rt; 442 int err; 443 444 switch (cmd) { 445 case SIOCADDRT: /* Add a route */ 446 case SIOCDELRT: /* Delete a route */ 447 if (!capable(CAP_NET_ADMIN)) 448 return -EPERM; 449 450 if (copy_from_user(&rt, arg, sizeof(rt))) 451 return -EFAULT; 452 453 rtnl_lock(); 454 err = rtentry_to_fib_config(net, cmd, &rt, &cfg); 455 if (err == 0) { 456 struct fib_table *tb; 457 458 if (cmd == SIOCDELRT) { 459 tb = fib_get_table(net, cfg.fc_table); 460 if (tb) 461 err = tb->tb_delete(tb, &cfg); 462 else 463 err = -ESRCH; 464 } else { 465 tb = fib_new_table(net, cfg.fc_table); 466 if (tb) 467 err = tb->tb_insert(tb, &cfg); 468 else 469 err = -ENOBUFS; 470 } 471 472 /* allocated by rtentry_to_fib_config() */ 473 kfree(cfg.fc_mx); 474 } 475 rtnl_unlock(); 476 return err; 477 } 478 return -EINVAL; 479} 480 481const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = { 482 [RTA_DST] = { .type = NLA_U32 }, 483 [RTA_SRC] = { .type = NLA_U32 }, 484 [RTA_IIF] = { .type = NLA_U32 }, 485 [RTA_OIF] = { .type = NLA_U32 }, 486 [RTA_GATEWAY] = { .type = NLA_U32 }, 487 [RTA_PRIORITY] = { .type = NLA_U32 }, 488 [RTA_PREFSRC] = { .type = NLA_U32 }, 489 [RTA_METRICS] = { .type = NLA_NESTED }, 490 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) }, 491 [RTA_PROTOINFO] = { .type = NLA_U32 }, 492 [RTA_FLOW] = { .type = NLA_U32 }, 493}; 494 495static int rtm_to_fib_config(struct net *net, struct sk_buff *skb, 496 struct nlmsghdr *nlh, struct fib_config *cfg) 497{ 498 struct nlattr *attr; 499 int err, remaining; 500 struct rtmsg *rtm; 501 502 err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy); 503 if (err < 0) 504 goto errout; 505 506 memset(cfg, 0, sizeof(*cfg)); 507 508 rtm = nlmsg_data(nlh); 509 cfg->fc_dst_len = rtm->rtm_dst_len; 510 cfg->fc_tos = rtm->rtm_tos; 511 cfg->fc_table = rtm->rtm_table; 512 cfg->fc_protocol = rtm->rtm_protocol; 513 cfg->fc_scope = rtm->rtm_scope; 514 cfg->fc_type = rtm->rtm_type; 515 cfg->fc_flags = rtm->rtm_flags; 516 cfg->fc_nlflags = nlh->nlmsg_flags; 517 518 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid; 519 cfg->fc_nlinfo.nlh = nlh; 520 cfg->fc_nlinfo.nl_net = net; 521 522 if (cfg->fc_type > RTN_MAX) { 523 err = -EINVAL; 524 goto errout; 525 } 526 527 nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) { 528 switch (nla_type(attr)) { 529 case RTA_DST: 530 cfg->fc_dst = nla_get_be32(attr); 531 break; 532 case RTA_OIF: 533 cfg->fc_oif = nla_get_u32(attr); 534 break; 535 case RTA_GATEWAY: 536 cfg->fc_gw = nla_get_be32(attr); 537 break; 538 case RTA_PRIORITY: 539 cfg->fc_priority = nla_get_u32(attr); 540 break; 541 case RTA_PREFSRC: 542 cfg->fc_prefsrc = nla_get_be32(attr); 543 break; 544 case RTA_METRICS: 545 cfg->fc_mx = nla_data(attr); 546 cfg->fc_mx_len = nla_len(attr); 547 break; 548 case RTA_MULTIPATH: 549 cfg->fc_mp = nla_data(attr); 550 cfg->fc_mp_len = nla_len(attr); 551 break; 552 case RTA_FLOW: 553 cfg->fc_flow = nla_get_u32(attr); 554 break; 555 case RTA_TABLE: 556 cfg->fc_table = nla_get_u32(attr); 557 break; 558 } 559 } 560 561 return 0; 562errout: 563 return err; 564} 565 566static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) 567{ 568 struct net *net = skb->sk->sk_net; 569 struct fib_config cfg; 570 struct fib_table *tb; 571 int err; 572 573 err = rtm_to_fib_config(net, skb, nlh, &cfg); 574 if (err < 0) 575 goto errout; 576 577 tb = fib_get_table(net, cfg.fc_table); 578 if (tb == NULL) { 579 err = -ESRCH; 580 goto errout; 581 } 582 583 err = tb->tb_delete(tb, &cfg); 584errout: 585 return err; 586} 587 588static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) 589{ 590 struct net *net = skb->sk->sk_net; 591 struct fib_config cfg; 592 struct fib_table *tb; 593 int err; 594 595 err = rtm_to_fib_config(net, skb, nlh, &cfg); 596 if (err < 0) 597 goto errout; 598 599 tb = fib_new_table(net, cfg.fc_table); 600 if (tb == NULL) { 601 err = -ENOBUFS; 602 goto errout; 603 } 604 605 err = tb->tb_insert(tb, &cfg); 606errout: 607 return err; 608} 609 610static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) 611{ 612 struct net *net = skb->sk->sk_net; 613 unsigned int h, s_h; 614 unsigned int e = 0, s_e; 615 struct fib_table *tb; 616 struct hlist_node *node; 617 struct hlist_head *head; 618 int dumped = 0; 619 620 if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) && 621 ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED) 622 return ip_rt_dump(skb, cb); 623 624 s_h = cb->args[0]; 625 s_e = cb->args[1]; 626 627 for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) { 628 e = 0; 629 head = &net->ipv4.fib_table_hash[h]; 630 hlist_for_each_entry(tb, node, head, tb_hlist) { 631 if (e < s_e) 632 goto next; 633 if (dumped) 634 memset(&cb->args[2], 0, sizeof(cb->args) - 635 2 * sizeof(cb->args[0])); 636 if (tb->tb_dump(tb, skb, cb) < 0) 637 goto out; 638 dumped = 1; 639next: 640 e++; 641 } 642 } 643out: 644 cb->args[1] = e; 645 cb->args[0] = h; 646 647 return skb->len; 648} 649 650/* Prepare and feed intra-kernel routing request. 651 Really, it should be netlink message, but :-( netlink 652 can be not configured, so that we feed it directly 653 to fib engine. It is legal, because all events occur 654 only when netlink is already locked. 655 */ 656 657static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa) 658{ 659 struct net *net = ifa->ifa_dev->dev->nd_net; 660 struct fib_table *tb; 661 struct fib_config cfg = { 662 .fc_protocol = RTPROT_KERNEL, 663 .fc_type = type, 664 .fc_dst = dst, 665 .fc_dst_len = dst_len, 666 .fc_prefsrc = ifa->ifa_local, 667 .fc_oif = ifa->ifa_dev->dev->ifindex, 668 .fc_nlflags = NLM_F_CREATE | NLM_F_APPEND, 669 .fc_nlinfo = { 670 .nl_net = net, 671 }, 672 }; 673 674 if (type == RTN_UNICAST) 675 tb = fib_new_table(net, RT_TABLE_MAIN); 676 else 677 tb = fib_new_table(net, RT_TABLE_LOCAL); 678 679 if (tb == NULL) 680 return; 681 682 cfg.fc_table = tb->tb_id; 683 684 if (type != RTN_LOCAL) 685 cfg.fc_scope = RT_SCOPE_LINK; 686 else 687 cfg.fc_scope = RT_SCOPE_HOST; 688 689 if (cmd == RTM_NEWROUTE) 690 tb->tb_insert(tb, &cfg); 691 else 692 tb->tb_delete(tb, &cfg); 693} 694 695void fib_add_ifaddr(struct in_ifaddr *ifa) 696{ 697 struct in_device *in_dev = ifa->ifa_dev; 698 struct net_device *dev = in_dev->dev; 699 struct in_ifaddr *prim = ifa; 700 __be32 mask = ifa->ifa_mask; 701 __be32 addr = ifa->ifa_local; 702 __be32 prefix = ifa->ifa_address&mask; 703 704 if (ifa->ifa_flags&IFA_F_SECONDARY) { 705 prim = inet_ifa_byprefix(in_dev, prefix, mask); 706 if (prim == NULL) { 707 printk(KERN_WARNING "fib_add_ifaddr: bug: prim == NULL\n"); 708 return; 709 } 710 } 711 712 fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim); 713 714 if (!(dev->flags&IFF_UP)) 715 return; 716 717 /* Add broadcast address, if it is explicitly assigned. */ 718 if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF)) 719 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim); 720 721 if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) && 722 (prefix != addr || ifa->ifa_prefixlen < 32)) { 723 fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL : 724 RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim); 725 726 /* Add network specific broadcasts, when it takes a sense */ 727 if (ifa->ifa_prefixlen < 31) { 728 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim); 729 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim); 730 } 731 } 732} 733 734static void fib_del_ifaddr(struct in_ifaddr *ifa) 735{ 736 struct in_device *in_dev = ifa->ifa_dev; 737 struct net_device *dev = in_dev->dev; 738 struct in_ifaddr *ifa1; 739 struct in_ifaddr *prim = ifa; 740 __be32 brd = ifa->ifa_address|~ifa->ifa_mask; 741 __be32 any = ifa->ifa_address&ifa->ifa_mask; 742#define LOCAL_OK 1 743#define BRD_OK 2 744#define BRD0_OK 4 745#define BRD1_OK 8 746 unsigned ok = 0; 747 748 if (!(ifa->ifa_flags&IFA_F_SECONDARY)) 749 fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL : 750 RTN_UNICAST, any, ifa->ifa_prefixlen, prim); 751 else { 752 prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask); 753 if (prim == NULL) { 754 printk(KERN_WARNING "fib_del_ifaddr: bug: prim == NULL\n"); 755 return; 756 } 757 } 758 759 /* Deletion is more complicated than add. 760 We should take care of not to delete too much :-) 761 762 Scan address list to be sure that addresses are really gone. 763 */ 764 765 for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) { 766 if (ifa->ifa_local == ifa1->ifa_local) 767 ok |= LOCAL_OK; 768 if (ifa->ifa_broadcast == ifa1->ifa_broadcast) 769 ok |= BRD_OK; 770 if (brd == ifa1->ifa_broadcast) 771 ok |= BRD1_OK; 772 if (any == ifa1->ifa_broadcast) 773 ok |= BRD0_OK; 774 } 775 776 if (!(ok&BRD_OK)) 777 fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim); 778 if (!(ok&BRD1_OK)) 779 fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim); 780 if (!(ok&BRD0_OK)) 781 fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim); 782 if (!(ok&LOCAL_OK)) { 783 fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim); 784 785 /* Check, that this local address finally disappeared. */ 786 if (inet_addr_type(dev->nd_net, ifa->ifa_local) != RTN_LOCAL) { 787 /* And the last, but not the least thing. 788 We must flush stray FIB entries. 789 790 First of all, we scan fib_info list searching 791 for stray nexthop entries, then ignite fib_flush. 792 */ 793 if (fib_sync_down(ifa->ifa_local, NULL, 0)) 794 fib_flush(dev->nd_net); 795 } 796 } 797#undef LOCAL_OK 798#undef BRD_OK 799#undef BRD0_OK 800#undef BRD1_OK 801} 802 803static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb ) 804{ 805 806 struct fib_result res; 807 struct flowi fl = { .mark = frn->fl_mark, 808 .nl_u = { .ip4_u = { .daddr = frn->fl_addr, 809 .tos = frn->fl_tos, 810 .scope = frn->fl_scope } } }; 811 812#ifdef CONFIG_IP_MULTIPLE_TABLES 813 res.r = NULL; 814#endif 815 816 frn->err = -ENOENT; 817 if (tb) { 818 local_bh_disable(); 819 820 frn->tb_id = tb->tb_id; 821 frn->err = tb->tb_lookup(tb, &fl, &res); 822 823 if (!frn->err) { 824 frn->prefixlen = res.prefixlen; 825 frn->nh_sel = res.nh_sel; 826 frn->type = res.type; 827 frn->scope = res.scope; 828 fib_res_put(&res); 829 } 830 local_bh_enable(); 831 } 832} 833 834static void nl_fib_input(struct sk_buff *skb) 835{ 836 struct net *net; 837 struct fib_result_nl *frn; 838 struct nlmsghdr *nlh; 839 struct fib_table *tb; 840 u32 pid; 841 842 net = skb->sk->sk_net; 843 nlh = nlmsg_hdr(skb); 844 if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len || 845 nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn))) 846 return; 847 848 skb = skb_clone(skb, GFP_KERNEL); 849 if (skb == NULL) 850 return; 851 nlh = nlmsg_hdr(skb); 852 853 frn = (struct fib_result_nl *) NLMSG_DATA(nlh); 854 tb = fib_get_table(net, frn->tb_id_in); 855 856 nl_fib_lookup(frn, tb); 857 858 pid = NETLINK_CB(skb).pid; /* pid of sending process */ 859 NETLINK_CB(skb).pid = 0; /* from kernel */ 860 NETLINK_CB(skb).dst_group = 0; /* unicast */ 861 netlink_unicast(net->ipv4.fibnl, skb, pid, MSG_DONTWAIT); 862} 863 864static int nl_fib_lookup_init(struct net *net) 865{ 866 struct sock *sk; 867 sk = netlink_kernel_create(net, NETLINK_FIB_LOOKUP, 0, 868 nl_fib_input, NULL, THIS_MODULE); 869 if (sk == NULL) 870 return -EAFNOSUPPORT; 871 /* Don't hold an extra reference on the namespace */ 872 put_net(sk->sk_net); 873 net->ipv4.fibnl = sk; 874 return 0; 875} 876 877static void nl_fib_lookup_exit(struct net *net) 878{ 879 /* At the last minute lie and say this is a socket for the 880 * initial network namespace. So the socket will be safe to free. 881 */ 882 net->ipv4.fibnl->sk_net = get_net(&init_net); 883 sock_put(net->ipv4.fibnl); 884} 885 886static void fib_disable_ip(struct net_device *dev, int force) 887{ 888 if (fib_sync_down(0, dev, force)) 889 fib_flush(dev->nd_net); 890 rt_cache_flush(0); 891 arp_ifdown(dev); 892} 893 894static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr) 895{ 896 struct in_ifaddr *ifa = (struct in_ifaddr*)ptr; 897 898 switch (event) { 899 case NETDEV_UP: 900 fib_add_ifaddr(ifa); 901#ifdef CONFIG_IP_ROUTE_MULTIPATH 902 fib_sync_up(ifa->ifa_dev->dev); 903#endif 904 rt_cache_flush(-1); 905 break; 906 case NETDEV_DOWN: 907 fib_del_ifaddr(ifa); 908 if (ifa->ifa_dev->ifa_list == NULL) { 909 /* Last address was deleted from this interface. 910 Disable IP. 911 */ 912 fib_disable_ip(ifa->ifa_dev->dev, 1); 913 } else { 914 rt_cache_flush(-1); 915 } 916 break; 917 } 918 return NOTIFY_DONE; 919} 920 921static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) 922{ 923 struct net_device *dev = ptr; 924 struct in_device *in_dev = __in_dev_get_rtnl(dev); 925 926 if (event == NETDEV_UNREGISTER) { 927 fib_disable_ip(dev, 2); 928 return NOTIFY_DONE; 929 } 930 931 if (!in_dev) 932 return NOTIFY_DONE; 933 934 switch (event) { 935 case NETDEV_UP: 936 for_ifa(in_dev) { 937 fib_add_ifaddr(ifa); 938 } endfor_ifa(in_dev); 939#ifdef CONFIG_IP_ROUTE_MULTIPATH 940 fib_sync_up(dev); 941#endif 942 rt_cache_flush(-1); 943 break; 944 case NETDEV_DOWN: 945 fib_disable_ip(dev, 0); 946 break; 947 case NETDEV_CHANGEMTU: 948 case NETDEV_CHANGE: 949 rt_cache_flush(0); 950 break; 951 } 952 return NOTIFY_DONE; 953} 954 955static struct notifier_block fib_inetaddr_notifier = { 956 .notifier_call =fib_inetaddr_event, 957}; 958 959static struct notifier_block fib_netdev_notifier = { 960 .notifier_call =fib_netdev_event, 961}; 962 963static int __net_init ip_fib_net_init(struct net *net) 964{ 965 unsigned int i; 966 967 net->ipv4.fib_table_hash = kzalloc( 968 sizeof(struct hlist_head)*FIB_TABLE_HASHSZ, GFP_KERNEL); 969 if (net->ipv4.fib_table_hash == NULL) 970 return -ENOMEM; 971 972 for (i = 0; i < FIB_TABLE_HASHSZ; i++) 973 INIT_HLIST_HEAD(&net->ipv4.fib_table_hash[i]); 974 975 return fib4_rules_init(net); 976} 977 978static void __net_exit ip_fib_net_exit(struct net *net) 979{ 980 unsigned int i; 981 982#ifdef CONFIG_IP_MULTIPLE_TABLES 983 fib4_rules_exit(net); 984#endif 985 986 for (i = 0; i < FIB_TABLE_HASHSZ; i++) { 987 struct fib_table *tb; 988 struct hlist_head *head; 989 struct hlist_node *node, *tmp; 990 991 head = &net->ipv4.fib_table_hash[i]; 992 hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) { 993 hlist_del(node); 994 tb->tb_flush(tb); 995 kfree(tb); 996 } 997 } 998 kfree(net->ipv4.fib_table_hash); 999} 1000 1001static int __net_init fib_net_init(struct net *net) 1002{ 1003 int error; 1004 1005 error = ip_fib_net_init(net); 1006 if (error < 0) 1007 goto out; 1008 error = nl_fib_lookup_init(net); 1009 if (error < 0) 1010 goto out_nlfl; 1011 error = fib_proc_init(net); 1012 if (error < 0) 1013 goto out_proc; 1014out: 1015 return error; 1016 1017out_proc: 1018 nl_fib_lookup_exit(net); 1019out_nlfl: 1020 ip_fib_net_exit(net); 1021 goto out; 1022} 1023 1024static void __net_exit fib_net_exit(struct net *net) 1025{ 1026 fib_proc_exit(net); 1027 nl_fib_lookup_exit(net); 1028 ip_fib_net_exit(net); 1029} 1030 1031static struct pernet_operations fib_net_ops = { 1032 .init = fib_net_init, 1033 .exit = fib_net_exit, 1034}; 1035 1036void __init ip_fib_init(void) 1037{ 1038 rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL); 1039 rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL); 1040 rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib); 1041 1042 register_pernet_subsys(&fib_net_ops); 1043 register_netdevice_notifier(&fib_netdev_notifier); 1044 register_inetaddr_notifier(&fib_inetaddr_notifier); 1045} 1046 1047EXPORT_SYMBOL(inet_addr_type); 1048EXPORT_SYMBOL(inet_dev_addr_type); 1049EXPORT_SYMBOL(ip_dev_find); 1050