fib_frontend.c revision dbb50165b512f6c9b7aae10af73ae5b6d811f4d0
1/* 2 * INET An implementation of the TCP/IP protocol suite for the LINUX 3 * operating system. INET is implemented using the BSD Socket 4 * interface as the means of communication with the user level. 5 * 6 * IPv4 Forwarding Information Base: FIB frontend. 7 * 8 * Version: $Id: fib_frontend.c,v 1.26 2001/10/31 21:55:54 davem Exp $ 9 * 10 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 11 * 12 * This program is free software; you can redistribute it and/or 13 * modify it under the terms of the GNU General Public License 14 * as published by the Free Software Foundation; either version 15 * 2 of the License, or (at your option) any later version. 16 */ 17 18#include <linux/module.h> 19#include <asm/uaccess.h> 20#include <asm/system.h> 21#include <linux/bitops.h> 22#include <linux/capability.h> 23#include <linux/types.h> 24#include <linux/kernel.h> 25#include <linux/mm.h> 26#include <linux/string.h> 27#include <linux/socket.h> 28#include <linux/sockios.h> 29#include <linux/errno.h> 30#include <linux/in.h> 31#include <linux/inet.h> 32#include <linux/inetdevice.h> 33#include <linux/netdevice.h> 34#include <linux/if_addr.h> 35#include <linux/if_arp.h> 36#include <linux/skbuff.h> 37#include <linux/init.h> 38#include <linux/list.h> 39 40#include <net/ip.h> 41#include <net/protocol.h> 42#include <net/route.h> 43#include <net/tcp.h> 44#include <net/sock.h> 45#include <net/icmp.h> 46#include <net/arp.h> 47#include <net/ip_fib.h> 48#include <net/rtnetlink.h> 49 50#define FFprint(a...) printk(KERN_DEBUG a) 51 52static struct sock *fibnl; 53 54#ifndef CONFIG_IP_MULTIPLE_TABLES 55 56struct fib_table *ip_fib_local_table; 57struct fib_table *ip_fib_main_table; 58 59#define FIB_TABLE_HASHSZ 1 60static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ]; 61 62static int __init fib4_rules_init(void) 63{ 64 ip_fib_local_table = fib_hash_init(RT_TABLE_LOCAL); 65 if (ip_fib_local_table == NULL) 66 return -ENOMEM; 67 68 ip_fib_main_table = fib_hash_init(RT_TABLE_MAIN); 69 if (ip_fib_main_table == NULL) 70 goto fail; 71 72 hlist_add_head_rcu(&ip_fib_local_table->tb_hlist, &fib_table_hash[0]); 73 hlist_add_head_rcu(&ip_fib_main_table->tb_hlist, &fib_table_hash[0]); 74 return 0; 75 76fail: 77 kfree(ip_fib_local_table); 78 ip_fib_local_table = NULL; 79 return -ENOMEM; 80} 81#else 82 83#define FIB_TABLE_HASHSZ 256 84static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ]; 85 86struct fib_table *fib_new_table(u32 id) 87{ 88 struct fib_table *tb; 89 unsigned int h; 90 91 if (id == 0) 92 id = RT_TABLE_MAIN; 93 tb = fib_get_table(id); 94 if (tb) 95 return tb; 96 tb = fib_hash_init(id); 97 if (!tb) 98 return NULL; 99 h = id & (FIB_TABLE_HASHSZ - 1); 100 hlist_add_head_rcu(&tb->tb_hlist, &fib_table_hash[h]); 101 return tb; 102} 103 104struct fib_table *fib_get_table(u32 id) 105{ 106 struct fib_table *tb; 107 struct hlist_node *node; 108 unsigned int h; 109 110 if (id == 0) 111 id = RT_TABLE_MAIN; 112 h = id & (FIB_TABLE_HASHSZ - 1); 113 rcu_read_lock(); 114 hlist_for_each_entry_rcu(tb, node, &fib_table_hash[h], tb_hlist) { 115 if (tb->tb_id == id) { 116 rcu_read_unlock(); 117 return tb; 118 } 119 } 120 rcu_read_unlock(); 121 return NULL; 122} 123#endif /* CONFIG_IP_MULTIPLE_TABLES */ 124 125static void fib_flush(void) 126{ 127 int flushed = 0; 128 struct fib_table *tb; 129 struct hlist_node *node; 130 unsigned int h; 131 132 for (h = 0; h < FIB_TABLE_HASHSZ; h++) { 133 hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist) 134 flushed += tb->tb_flush(tb); 135 } 136 137 if (flushed) 138 rt_cache_flush(-1); 139} 140 141/* 142 * Find the first device with a given source address. 143 */ 144 145struct net_device * ip_dev_find(__be32 addr) 146{ 147 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } }; 148 struct fib_result res; 149 struct net_device *dev = NULL; 150 struct fib_table *local_table; 151 152#ifdef CONFIG_IP_MULTIPLE_TABLES 153 res.r = NULL; 154#endif 155 156 local_table = fib_get_table(RT_TABLE_LOCAL); 157 if (!local_table || local_table->tb_lookup(local_table, &fl, &res)) 158 return NULL; 159 if (res.type != RTN_LOCAL) 160 goto out; 161 dev = FIB_RES_DEV(res); 162 163 if (dev) 164 dev_hold(dev); 165out: 166 fib_res_put(&res); 167 return dev; 168} 169 170/* 171 * Find address type as if only "dev" was present in the system. If 172 * on_dev is NULL then all interfaces are taken into consideration. 173 */ 174static inline unsigned __inet_dev_addr_type(const struct net_device *dev, 175 __be32 addr) 176{ 177 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } }; 178 struct fib_result res; 179 unsigned ret = RTN_BROADCAST; 180 struct fib_table *local_table; 181 182 if (ipv4_is_zeronet(addr) || ipv4_is_badclass(addr)) 183 return RTN_BROADCAST; 184 if (ipv4_is_multicast(addr)) 185 return RTN_MULTICAST; 186 187#ifdef CONFIG_IP_MULTIPLE_TABLES 188 res.r = NULL; 189#endif 190 191 local_table = fib_get_table(RT_TABLE_LOCAL); 192 if (local_table) { 193 ret = RTN_UNICAST; 194 if (!local_table->tb_lookup(local_table, &fl, &res)) { 195 if (!dev || dev == res.fi->fib_dev) 196 ret = res.type; 197 fib_res_put(&res); 198 } 199 } 200 return ret; 201} 202 203unsigned int inet_addr_type(__be32 addr) 204{ 205 return __inet_dev_addr_type(NULL, addr); 206} 207 208unsigned int inet_dev_addr_type(const struct net_device *dev, __be32 addr) 209{ 210 return __inet_dev_addr_type(dev, addr); 211} 212 213/* Given (packet source, input interface) and optional (dst, oif, tos): 214 - (main) check, that source is valid i.e. not broadcast or our local 215 address. 216 - figure out what "logical" interface this packet arrived 217 and calculate "specific destination" address. 218 - check, that packet arrived from expected physical interface. 219 */ 220 221int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, 222 struct net_device *dev, __be32 *spec_dst, u32 *itag) 223{ 224 struct in_device *in_dev; 225 struct flowi fl = { .nl_u = { .ip4_u = 226 { .daddr = src, 227 .saddr = dst, 228 .tos = tos } }, 229 .iif = oif }; 230 struct fib_result res; 231 int no_addr, rpf; 232 int ret; 233 234 no_addr = rpf = 0; 235 rcu_read_lock(); 236 in_dev = __in_dev_get_rcu(dev); 237 if (in_dev) { 238 no_addr = in_dev->ifa_list == NULL; 239 rpf = IN_DEV_RPFILTER(in_dev); 240 } 241 rcu_read_unlock(); 242 243 if (in_dev == NULL) 244 goto e_inval; 245 246 if (fib_lookup(&fl, &res)) 247 goto last_resort; 248 if (res.type != RTN_UNICAST) 249 goto e_inval_res; 250 *spec_dst = FIB_RES_PREFSRC(res); 251 fib_combine_itag(itag, &res); 252#ifdef CONFIG_IP_ROUTE_MULTIPATH 253 if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1) 254#else 255 if (FIB_RES_DEV(res) == dev) 256#endif 257 { 258 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST; 259 fib_res_put(&res); 260 return ret; 261 } 262 fib_res_put(&res); 263 if (no_addr) 264 goto last_resort; 265 if (rpf) 266 goto e_inval; 267 fl.oif = dev->ifindex; 268 269 ret = 0; 270 if (fib_lookup(&fl, &res) == 0) { 271 if (res.type == RTN_UNICAST) { 272 *spec_dst = FIB_RES_PREFSRC(res); 273 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST; 274 } 275 fib_res_put(&res); 276 } 277 return ret; 278 279last_resort: 280 if (rpf) 281 goto e_inval; 282 *spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE); 283 *itag = 0; 284 return 0; 285 286e_inval_res: 287 fib_res_put(&res); 288e_inval: 289 return -EINVAL; 290} 291 292static inline __be32 sk_extract_addr(struct sockaddr *addr) 293{ 294 return ((struct sockaddr_in *) addr)->sin_addr.s_addr; 295} 296 297static int put_rtax(struct nlattr *mx, int len, int type, u32 value) 298{ 299 struct nlattr *nla; 300 301 nla = (struct nlattr *) ((char *) mx + len); 302 nla->nla_type = type; 303 nla->nla_len = nla_attr_size(4); 304 *(u32 *) nla_data(nla) = value; 305 306 return len + nla_total_size(4); 307} 308 309static int rtentry_to_fib_config(int cmd, struct rtentry *rt, 310 struct fib_config *cfg) 311{ 312 __be32 addr; 313 int plen; 314 315 memset(cfg, 0, sizeof(*cfg)); 316 317 if (rt->rt_dst.sa_family != AF_INET) 318 return -EAFNOSUPPORT; 319 320 /* 321 * Check mask for validity: 322 * a) it must be contiguous. 323 * b) destination must have all host bits clear. 324 * c) if application forgot to set correct family (AF_INET), 325 * reject request unless it is absolutely clear i.e. 326 * both family and mask are zero. 327 */ 328 plen = 32; 329 addr = sk_extract_addr(&rt->rt_dst); 330 if (!(rt->rt_flags & RTF_HOST)) { 331 __be32 mask = sk_extract_addr(&rt->rt_genmask); 332 333 if (rt->rt_genmask.sa_family != AF_INET) { 334 if (mask || rt->rt_genmask.sa_family) 335 return -EAFNOSUPPORT; 336 } 337 338 if (bad_mask(mask, addr)) 339 return -EINVAL; 340 341 plen = inet_mask_len(mask); 342 } 343 344 cfg->fc_dst_len = plen; 345 cfg->fc_dst = addr; 346 347 if (cmd != SIOCDELRT) { 348 cfg->fc_nlflags = NLM_F_CREATE; 349 cfg->fc_protocol = RTPROT_BOOT; 350 } 351 352 if (rt->rt_metric) 353 cfg->fc_priority = rt->rt_metric - 1; 354 355 if (rt->rt_flags & RTF_REJECT) { 356 cfg->fc_scope = RT_SCOPE_HOST; 357 cfg->fc_type = RTN_UNREACHABLE; 358 return 0; 359 } 360 361 cfg->fc_scope = RT_SCOPE_NOWHERE; 362 cfg->fc_type = RTN_UNICAST; 363 364 if (rt->rt_dev) { 365 char *colon; 366 struct net_device *dev; 367 char devname[IFNAMSIZ]; 368 369 if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1)) 370 return -EFAULT; 371 372 devname[IFNAMSIZ-1] = 0; 373 colon = strchr(devname, ':'); 374 if (colon) 375 *colon = 0; 376 dev = __dev_get_by_name(&init_net, devname); 377 if (!dev) 378 return -ENODEV; 379 cfg->fc_oif = dev->ifindex; 380 if (colon) { 381 struct in_ifaddr *ifa; 382 struct in_device *in_dev = __in_dev_get_rtnl(dev); 383 if (!in_dev) 384 return -ENODEV; 385 *colon = ':'; 386 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) 387 if (strcmp(ifa->ifa_label, devname) == 0) 388 break; 389 if (ifa == NULL) 390 return -ENODEV; 391 cfg->fc_prefsrc = ifa->ifa_local; 392 } 393 } 394 395 addr = sk_extract_addr(&rt->rt_gateway); 396 if (rt->rt_gateway.sa_family == AF_INET && addr) { 397 cfg->fc_gw = addr; 398 if (rt->rt_flags & RTF_GATEWAY && 399 inet_addr_type(addr) == RTN_UNICAST) 400 cfg->fc_scope = RT_SCOPE_UNIVERSE; 401 } 402 403 if (cmd == SIOCDELRT) 404 return 0; 405 406 if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw) 407 return -EINVAL; 408 409 if (cfg->fc_scope == RT_SCOPE_NOWHERE) 410 cfg->fc_scope = RT_SCOPE_LINK; 411 412 if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) { 413 struct nlattr *mx; 414 int len = 0; 415 416 mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL); 417 if (mx == NULL) 418 return -ENOMEM; 419 420 if (rt->rt_flags & RTF_MTU) 421 len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40); 422 423 if (rt->rt_flags & RTF_WINDOW) 424 len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window); 425 426 if (rt->rt_flags & RTF_IRTT) 427 len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3); 428 429 cfg->fc_mx = mx; 430 cfg->fc_mx_len = len; 431 } 432 433 return 0; 434} 435 436/* 437 * Handle IP routing ioctl calls. These are used to manipulate the routing tables 438 */ 439 440int ip_rt_ioctl(unsigned int cmd, void __user *arg) 441{ 442 struct fib_config cfg; 443 struct rtentry rt; 444 int err; 445 446 switch (cmd) { 447 case SIOCADDRT: /* Add a route */ 448 case SIOCDELRT: /* Delete a route */ 449 if (!capable(CAP_NET_ADMIN)) 450 return -EPERM; 451 452 if (copy_from_user(&rt, arg, sizeof(rt))) 453 return -EFAULT; 454 455 rtnl_lock(); 456 err = rtentry_to_fib_config(cmd, &rt, &cfg); 457 if (err == 0) { 458 struct fib_table *tb; 459 460 if (cmd == SIOCDELRT) { 461 tb = fib_get_table(cfg.fc_table); 462 if (tb) 463 err = tb->tb_delete(tb, &cfg); 464 else 465 err = -ESRCH; 466 } else { 467 tb = fib_new_table(cfg.fc_table); 468 if (tb) 469 err = tb->tb_insert(tb, &cfg); 470 else 471 err = -ENOBUFS; 472 } 473 474 /* allocated by rtentry_to_fib_config() */ 475 kfree(cfg.fc_mx); 476 } 477 rtnl_unlock(); 478 return err; 479 } 480 return -EINVAL; 481} 482 483const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = { 484 [RTA_DST] = { .type = NLA_U32 }, 485 [RTA_SRC] = { .type = NLA_U32 }, 486 [RTA_IIF] = { .type = NLA_U32 }, 487 [RTA_OIF] = { .type = NLA_U32 }, 488 [RTA_GATEWAY] = { .type = NLA_U32 }, 489 [RTA_PRIORITY] = { .type = NLA_U32 }, 490 [RTA_PREFSRC] = { .type = NLA_U32 }, 491 [RTA_METRICS] = { .type = NLA_NESTED }, 492 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) }, 493 [RTA_PROTOINFO] = { .type = NLA_U32 }, 494 [RTA_FLOW] = { .type = NLA_U32 }, 495}; 496 497static int rtm_to_fib_config(struct sk_buff *skb, struct nlmsghdr *nlh, 498 struct fib_config *cfg) 499{ 500 struct nlattr *attr; 501 int err, remaining; 502 struct rtmsg *rtm; 503 504 err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy); 505 if (err < 0) 506 goto errout; 507 508 memset(cfg, 0, sizeof(*cfg)); 509 510 rtm = nlmsg_data(nlh); 511 cfg->fc_dst_len = rtm->rtm_dst_len; 512 cfg->fc_tos = rtm->rtm_tos; 513 cfg->fc_table = rtm->rtm_table; 514 cfg->fc_protocol = rtm->rtm_protocol; 515 cfg->fc_scope = rtm->rtm_scope; 516 cfg->fc_type = rtm->rtm_type; 517 cfg->fc_flags = rtm->rtm_flags; 518 cfg->fc_nlflags = nlh->nlmsg_flags; 519 520 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid; 521 cfg->fc_nlinfo.nlh = nlh; 522 523 if (cfg->fc_type > RTN_MAX) { 524 err = -EINVAL; 525 goto errout; 526 } 527 528 nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) { 529 switch (nla_type(attr)) { 530 case RTA_DST: 531 cfg->fc_dst = nla_get_be32(attr); 532 break; 533 case RTA_OIF: 534 cfg->fc_oif = nla_get_u32(attr); 535 break; 536 case RTA_GATEWAY: 537 cfg->fc_gw = nla_get_be32(attr); 538 break; 539 case RTA_PRIORITY: 540 cfg->fc_priority = nla_get_u32(attr); 541 break; 542 case RTA_PREFSRC: 543 cfg->fc_prefsrc = nla_get_be32(attr); 544 break; 545 case RTA_METRICS: 546 cfg->fc_mx = nla_data(attr); 547 cfg->fc_mx_len = nla_len(attr); 548 break; 549 case RTA_MULTIPATH: 550 cfg->fc_mp = nla_data(attr); 551 cfg->fc_mp_len = nla_len(attr); 552 break; 553 case RTA_FLOW: 554 cfg->fc_flow = nla_get_u32(attr); 555 break; 556 case RTA_TABLE: 557 cfg->fc_table = nla_get_u32(attr); 558 break; 559 } 560 } 561 562 return 0; 563errout: 564 return err; 565} 566 567static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) 568{ 569 struct net *net = skb->sk->sk_net; 570 struct fib_config cfg; 571 struct fib_table *tb; 572 int err; 573 574 if (net != &init_net) 575 return -EINVAL; 576 577 err = rtm_to_fib_config(skb, nlh, &cfg); 578 if (err < 0) 579 goto errout; 580 581 tb = fib_get_table(cfg.fc_table); 582 if (tb == NULL) { 583 err = -ESRCH; 584 goto errout; 585 } 586 587 err = tb->tb_delete(tb, &cfg); 588errout: 589 return err; 590} 591 592static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) 593{ 594 struct net *net = skb->sk->sk_net; 595 struct fib_config cfg; 596 struct fib_table *tb; 597 int err; 598 599 if (net != &init_net) 600 return -EINVAL; 601 602 err = rtm_to_fib_config(skb, nlh, &cfg); 603 if (err < 0) 604 goto errout; 605 606 tb = fib_new_table(cfg.fc_table); 607 if (tb == NULL) { 608 err = -ENOBUFS; 609 goto errout; 610 } 611 612 err = tb->tb_insert(tb, &cfg); 613errout: 614 return err; 615} 616 617static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) 618{ 619 struct net *net = skb->sk->sk_net; 620 unsigned int h, s_h; 621 unsigned int e = 0, s_e; 622 struct fib_table *tb; 623 struct hlist_node *node; 624 int dumped = 0; 625 626 if (net != &init_net) 627 return 0; 628 629 if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) && 630 ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED) 631 return ip_rt_dump(skb, cb); 632 633 s_h = cb->args[0]; 634 s_e = cb->args[1]; 635 636 for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) { 637 e = 0; 638 hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist) { 639 if (e < s_e) 640 goto next; 641 if (dumped) 642 memset(&cb->args[2], 0, sizeof(cb->args) - 643 2 * sizeof(cb->args[0])); 644 if (tb->tb_dump(tb, skb, cb) < 0) 645 goto out; 646 dumped = 1; 647next: 648 e++; 649 } 650 } 651out: 652 cb->args[1] = e; 653 cb->args[0] = h; 654 655 return skb->len; 656} 657 658/* Prepare and feed intra-kernel routing request. 659 Really, it should be netlink message, but :-( netlink 660 can be not configured, so that we feed it directly 661 to fib engine. It is legal, because all events occur 662 only when netlink is already locked. 663 */ 664 665static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa) 666{ 667 struct fib_table *tb; 668 struct fib_config cfg = { 669 .fc_protocol = RTPROT_KERNEL, 670 .fc_type = type, 671 .fc_dst = dst, 672 .fc_dst_len = dst_len, 673 .fc_prefsrc = ifa->ifa_local, 674 .fc_oif = ifa->ifa_dev->dev->ifindex, 675 .fc_nlflags = NLM_F_CREATE | NLM_F_APPEND, 676 }; 677 678 if (type == RTN_UNICAST) 679 tb = fib_new_table(RT_TABLE_MAIN); 680 else 681 tb = fib_new_table(RT_TABLE_LOCAL); 682 683 if (tb == NULL) 684 return; 685 686 cfg.fc_table = tb->tb_id; 687 688 if (type != RTN_LOCAL) 689 cfg.fc_scope = RT_SCOPE_LINK; 690 else 691 cfg.fc_scope = RT_SCOPE_HOST; 692 693 if (cmd == RTM_NEWROUTE) 694 tb->tb_insert(tb, &cfg); 695 else 696 tb->tb_delete(tb, &cfg); 697} 698 699void fib_add_ifaddr(struct in_ifaddr *ifa) 700{ 701 struct in_device *in_dev = ifa->ifa_dev; 702 struct net_device *dev = in_dev->dev; 703 struct in_ifaddr *prim = ifa; 704 __be32 mask = ifa->ifa_mask; 705 __be32 addr = ifa->ifa_local; 706 __be32 prefix = ifa->ifa_address&mask; 707 708 if (ifa->ifa_flags&IFA_F_SECONDARY) { 709 prim = inet_ifa_byprefix(in_dev, prefix, mask); 710 if (prim == NULL) { 711 printk(KERN_DEBUG "fib_add_ifaddr: bug: prim == NULL\n"); 712 return; 713 } 714 } 715 716 fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim); 717 718 if (!(dev->flags&IFF_UP)) 719 return; 720 721 /* Add broadcast address, if it is explicitly assigned. */ 722 if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF)) 723 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim); 724 725 if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) && 726 (prefix != addr || ifa->ifa_prefixlen < 32)) { 727 fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL : 728 RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim); 729 730 /* Add network specific broadcasts, when it takes a sense */ 731 if (ifa->ifa_prefixlen < 31) { 732 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim); 733 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim); 734 } 735 } 736} 737 738static void fib_del_ifaddr(struct in_ifaddr *ifa) 739{ 740 struct in_device *in_dev = ifa->ifa_dev; 741 struct net_device *dev = in_dev->dev; 742 struct in_ifaddr *ifa1; 743 struct in_ifaddr *prim = ifa; 744 __be32 brd = ifa->ifa_address|~ifa->ifa_mask; 745 __be32 any = ifa->ifa_address&ifa->ifa_mask; 746#define LOCAL_OK 1 747#define BRD_OK 2 748#define BRD0_OK 4 749#define BRD1_OK 8 750 unsigned ok = 0; 751 752 if (!(ifa->ifa_flags&IFA_F_SECONDARY)) 753 fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL : 754 RTN_UNICAST, any, ifa->ifa_prefixlen, prim); 755 else { 756 prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask); 757 if (prim == NULL) { 758 printk(KERN_DEBUG "fib_del_ifaddr: bug: prim == NULL\n"); 759 return; 760 } 761 } 762 763 /* Deletion is more complicated than add. 764 We should take care of not to delete too much :-) 765 766 Scan address list to be sure that addresses are really gone. 767 */ 768 769 for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) { 770 if (ifa->ifa_local == ifa1->ifa_local) 771 ok |= LOCAL_OK; 772 if (ifa->ifa_broadcast == ifa1->ifa_broadcast) 773 ok |= BRD_OK; 774 if (brd == ifa1->ifa_broadcast) 775 ok |= BRD1_OK; 776 if (any == ifa1->ifa_broadcast) 777 ok |= BRD0_OK; 778 } 779 780 if (!(ok&BRD_OK)) 781 fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim); 782 if (!(ok&BRD1_OK)) 783 fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim); 784 if (!(ok&BRD0_OK)) 785 fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim); 786 if (!(ok&LOCAL_OK)) { 787 fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim); 788 789 /* Check, that this local address finally disappeared. */ 790 if (inet_addr_type(ifa->ifa_local) != RTN_LOCAL) { 791 /* And the last, but not the least thing. 792 We must flush stray FIB entries. 793 794 First of all, we scan fib_info list searching 795 for stray nexthop entries, then ignite fib_flush. 796 */ 797 if (fib_sync_down(ifa->ifa_local, NULL, 0)) 798 fib_flush(); 799 } 800 } 801#undef LOCAL_OK 802#undef BRD_OK 803#undef BRD0_OK 804#undef BRD1_OK 805} 806 807static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb ) 808{ 809 810 struct fib_result res; 811 struct flowi fl = { .mark = frn->fl_mark, 812 .nl_u = { .ip4_u = { .daddr = frn->fl_addr, 813 .tos = frn->fl_tos, 814 .scope = frn->fl_scope } } }; 815 816#ifdef CONFIG_IP_MULTIPLE_TABLES 817 res.r = NULL; 818#endif 819 820 frn->err = -ENOENT; 821 if (tb) { 822 local_bh_disable(); 823 824 frn->tb_id = tb->tb_id; 825 frn->err = tb->tb_lookup(tb, &fl, &res); 826 827 if (!frn->err) { 828 frn->prefixlen = res.prefixlen; 829 frn->nh_sel = res.nh_sel; 830 frn->type = res.type; 831 frn->scope = res.scope; 832 fib_res_put(&res); 833 } 834 local_bh_enable(); 835 } 836} 837 838static void nl_fib_input(struct sk_buff *skb) 839{ 840 struct fib_result_nl *frn; 841 struct nlmsghdr *nlh; 842 struct fib_table *tb; 843 u32 pid; 844 845 nlh = nlmsg_hdr(skb); 846 if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len || 847 nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn))) 848 return; 849 850 skb = skb_clone(skb, GFP_KERNEL); 851 if (skb == NULL) 852 return; 853 nlh = nlmsg_hdr(skb); 854 855 frn = (struct fib_result_nl *) NLMSG_DATA(nlh); 856 tb = fib_get_table(frn->tb_id_in); 857 858 nl_fib_lookup(frn, tb); 859 860 pid = NETLINK_CB(skb).pid; /* pid of sending process */ 861 NETLINK_CB(skb).pid = 0; /* from kernel */ 862 NETLINK_CB(skb).dst_group = 0; /* unicast */ 863 netlink_unicast(fibnl, skb, pid, MSG_DONTWAIT); 864} 865 866static void nl_fib_lookup_init(void) 867{ 868 fibnl = netlink_kernel_create(&init_net, NETLINK_FIB_LOOKUP, 0, 869 nl_fib_input, NULL, THIS_MODULE); 870} 871 872static void fib_disable_ip(struct net_device *dev, int force) 873{ 874 if (fib_sync_down(0, dev, force)) 875 fib_flush(); 876 rt_cache_flush(0); 877 arp_ifdown(dev); 878} 879 880static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr) 881{ 882 struct in_ifaddr *ifa = (struct in_ifaddr*)ptr; 883 884 switch (event) { 885 case NETDEV_UP: 886 fib_add_ifaddr(ifa); 887#ifdef CONFIG_IP_ROUTE_MULTIPATH 888 fib_sync_up(ifa->ifa_dev->dev); 889#endif 890 rt_cache_flush(-1); 891 break; 892 case NETDEV_DOWN: 893 fib_del_ifaddr(ifa); 894 if (ifa->ifa_dev->ifa_list == NULL) { 895 /* Last address was deleted from this interface. 896 Disable IP. 897 */ 898 fib_disable_ip(ifa->ifa_dev->dev, 1); 899 } else { 900 rt_cache_flush(-1); 901 } 902 break; 903 } 904 return NOTIFY_DONE; 905} 906 907static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) 908{ 909 struct net_device *dev = ptr; 910 struct in_device *in_dev = __in_dev_get_rtnl(dev); 911 912 if (dev->nd_net != &init_net) 913 return NOTIFY_DONE; 914 915 if (event == NETDEV_UNREGISTER) { 916 fib_disable_ip(dev, 2); 917 return NOTIFY_DONE; 918 } 919 920 if (!in_dev) 921 return NOTIFY_DONE; 922 923 switch (event) { 924 case NETDEV_UP: 925 for_ifa(in_dev) { 926 fib_add_ifaddr(ifa); 927 } endfor_ifa(in_dev); 928#ifdef CONFIG_IP_ROUTE_MULTIPATH 929 fib_sync_up(dev); 930#endif 931 rt_cache_flush(-1); 932 break; 933 case NETDEV_DOWN: 934 fib_disable_ip(dev, 0); 935 break; 936 case NETDEV_CHANGEMTU: 937 case NETDEV_CHANGE: 938 rt_cache_flush(0); 939 break; 940 } 941 return NOTIFY_DONE; 942} 943 944static struct notifier_block fib_inetaddr_notifier = { 945 .notifier_call =fib_inetaddr_event, 946}; 947 948static struct notifier_block fib_netdev_notifier = { 949 .notifier_call =fib_netdev_event, 950}; 951 952void __init ip_fib_init(void) 953{ 954 unsigned int i; 955 956 for (i = 0; i < FIB_TABLE_HASHSZ; i++) 957 INIT_HLIST_HEAD(&fib_table_hash[i]); 958 959 BUG_ON(fib4_rules_init()); 960 961 register_netdevice_notifier(&fib_netdev_notifier); 962 register_inetaddr_notifier(&fib_inetaddr_notifier); 963 nl_fib_lookup_init(); 964 965 rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL); 966 rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL); 967 rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib); 968} 969 970EXPORT_SYMBOL(inet_addr_type); 971EXPORT_SYMBOL(inet_dev_addr_type); 972EXPORT_SYMBOL(ip_dev_find); 973