fib_frontend.c revision 5b707aaae4ca7b7204eb4a472721c84866d85f0f
1/* 2 * INET An implementation of the TCP/IP protocol suite for the LINUX 3 * operating system. INET is implemented using the BSD Socket 4 * interface as the means of communication with the user level. 5 * 6 * IPv4 Forwarding Information Base: FIB frontend. 7 * 8 * Version: $Id: fib_frontend.c,v 1.26 2001/10/31 21:55:54 davem Exp $ 9 * 10 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 11 * 12 * This program is free software; you can redistribute it and/or 13 * modify it under the terms of the GNU General Public License 14 * as published by the Free Software Foundation; either version 15 * 2 of the License, or (at your option) any later version. 16 */ 17 18#include <linux/module.h> 19#include <asm/uaccess.h> 20#include <asm/system.h> 21#include <linux/bitops.h> 22#include <linux/capability.h> 23#include <linux/types.h> 24#include <linux/kernel.h> 25#include <linux/mm.h> 26#include <linux/string.h> 27#include <linux/socket.h> 28#include <linux/sockios.h> 29#include <linux/errno.h> 30#include <linux/in.h> 31#include <linux/inet.h> 32#include <linux/inetdevice.h> 33#include <linux/netdevice.h> 34#include <linux/if_addr.h> 35#include <linux/if_arp.h> 36#include <linux/skbuff.h> 37#include <linux/init.h> 38#include <linux/list.h> 39 40#include <net/ip.h> 41#include <net/protocol.h> 42#include <net/route.h> 43#include <net/tcp.h> 44#include <net/sock.h> 45#include <net/icmp.h> 46#include <net/arp.h> 47#include <net/ip_fib.h> 48#include <net/rtnetlink.h> 49 50#ifndef CONFIG_IP_MULTIPLE_TABLES 51 52static int __net_init fib4_rules_init(struct net *net) 53{ 54 struct fib_table *local_table, *main_table; 55 56 local_table = fib_hash_table(RT_TABLE_LOCAL); 57 if (local_table == NULL) 58 return -ENOMEM; 59 60 main_table = fib_hash_table(RT_TABLE_MAIN); 61 if (main_table == NULL) 62 goto fail; 63 64 hlist_add_head_rcu(&local_table->tb_hlist, 65 &net->ipv4.fib_table_hash[TABLE_LOCAL_INDEX]); 66 hlist_add_head_rcu(&main_table->tb_hlist, 67 &net->ipv4.fib_table_hash[TABLE_MAIN_INDEX]); 68 return 0; 69 70fail: 71 kfree(local_table); 72 return -ENOMEM; 73} 74#else 75 76struct fib_table *fib_new_table(struct net *net, u32 id) 77{ 78 struct fib_table *tb; 79 unsigned int h; 80 81 if (id == 0) 82 id = RT_TABLE_MAIN; 83 tb = fib_get_table(net, id); 84 if (tb) 85 return tb; 86 87 tb = fib_hash_table(id); 88 if (!tb) 89 return NULL; 90 h = id & (FIB_TABLE_HASHSZ - 1); 91 hlist_add_head_rcu(&tb->tb_hlist, &net->ipv4.fib_table_hash[h]); 92 return tb; 93} 94 95struct fib_table *fib_get_table(struct net *net, u32 id) 96{ 97 struct fib_table *tb; 98 struct hlist_node *node; 99 struct hlist_head *head; 100 unsigned int h; 101 102 if (id == 0) 103 id = RT_TABLE_MAIN; 104 h = id & (FIB_TABLE_HASHSZ - 1); 105 106 rcu_read_lock(); 107 head = &net->ipv4.fib_table_hash[h]; 108 hlist_for_each_entry_rcu(tb, node, head, tb_hlist) { 109 if (tb->tb_id == id) { 110 rcu_read_unlock(); 111 return tb; 112 } 113 } 114 rcu_read_unlock(); 115 return NULL; 116} 117#endif /* CONFIG_IP_MULTIPLE_TABLES */ 118 119static void fib_flush(struct net *net) 120{ 121 int flushed = 0; 122 struct fib_table *tb; 123 struct hlist_node *node; 124 struct hlist_head *head; 125 unsigned int h; 126 127 for (h = 0; h < FIB_TABLE_HASHSZ; h++) { 128 head = &net->ipv4.fib_table_hash[h]; 129 hlist_for_each_entry(tb, node, head, tb_hlist) 130 flushed += tb->tb_flush(tb); 131 } 132 133 if (flushed) 134 rt_cache_flush(-1); 135} 136 137/* 138 * Find the first device with a given source address. 139 */ 140 141struct net_device * ip_dev_find(__be32 addr) 142{ 143 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } }; 144 struct fib_result res; 145 struct net_device *dev = NULL; 146 struct fib_table *local_table; 147 148#ifdef CONFIG_IP_MULTIPLE_TABLES 149 res.r = NULL; 150#endif 151 152 local_table = fib_get_table(&init_net, RT_TABLE_LOCAL); 153 if (!local_table || local_table->tb_lookup(local_table, &fl, &res)) 154 return NULL; 155 if (res.type != RTN_LOCAL) 156 goto out; 157 dev = FIB_RES_DEV(res); 158 159 if (dev) 160 dev_hold(dev); 161out: 162 fib_res_put(&res); 163 return dev; 164} 165 166/* 167 * Find address type as if only "dev" was present in the system. If 168 * on_dev is NULL then all interfaces are taken into consideration. 169 */ 170static inline unsigned __inet_dev_addr_type(struct net *net, 171 const struct net_device *dev, 172 __be32 addr) 173{ 174 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } }; 175 struct fib_result res; 176 unsigned ret = RTN_BROADCAST; 177 struct fib_table *local_table; 178 179 if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr)) 180 return RTN_BROADCAST; 181 if (ipv4_is_multicast(addr)) 182 return RTN_MULTICAST; 183 184#ifdef CONFIG_IP_MULTIPLE_TABLES 185 res.r = NULL; 186#endif 187 188 local_table = fib_get_table(net, RT_TABLE_LOCAL); 189 if (local_table) { 190 ret = RTN_UNICAST; 191 if (!local_table->tb_lookup(local_table, &fl, &res)) { 192 if (!dev || dev == res.fi->fib_dev) 193 ret = res.type; 194 fib_res_put(&res); 195 } 196 } 197 return ret; 198} 199 200unsigned int inet_addr_type(struct net *net, __be32 addr) 201{ 202 return __inet_dev_addr_type(net, NULL, addr); 203} 204 205unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev, 206 __be32 addr) 207{ 208 return __inet_dev_addr_type(net, dev, addr); 209} 210 211/* Given (packet source, input interface) and optional (dst, oif, tos): 212 - (main) check, that source is valid i.e. not broadcast or our local 213 address. 214 - figure out what "logical" interface this packet arrived 215 and calculate "specific destination" address. 216 - check, that packet arrived from expected physical interface. 217 */ 218 219int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, 220 struct net_device *dev, __be32 *spec_dst, u32 *itag) 221{ 222 struct in_device *in_dev; 223 struct flowi fl = { .nl_u = { .ip4_u = 224 { .daddr = src, 225 .saddr = dst, 226 .tos = tos } }, 227 .iif = oif }; 228 struct fib_result res; 229 int no_addr, rpf; 230 int ret; 231 struct net *net; 232 233 no_addr = rpf = 0; 234 rcu_read_lock(); 235 in_dev = __in_dev_get_rcu(dev); 236 if (in_dev) { 237 no_addr = in_dev->ifa_list == NULL; 238 rpf = IN_DEV_RPFILTER(in_dev); 239 } 240 rcu_read_unlock(); 241 242 if (in_dev == NULL) 243 goto e_inval; 244 245 net = dev->nd_net; 246 if (fib_lookup(net, &fl, &res)) 247 goto last_resort; 248 if (res.type != RTN_UNICAST) 249 goto e_inval_res; 250 *spec_dst = FIB_RES_PREFSRC(res); 251 fib_combine_itag(itag, &res); 252#ifdef CONFIG_IP_ROUTE_MULTIPATH 253 if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1) 254#else 255 if (FIB_RES_DEV(res) == dev) 256#endif 257 { 258 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST; 259 fib_res_put(&res); 260 return ret; 261 } 262 fib_res_put(&res); 263 if (no_addr) 264 goto last_resort; 265 if (rpf) 266 goto e_inval; 267 fl.oif = dev->ifindex; 268 269 ret = 0; 270 if (fib_lookup(net, &fl, &res) == 0) { 271 if (res.type == RTN_UNICAST) { 272 *spec_dst = FIB_RES_PREFSRC(res); 273 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST; 274 } 275 fib_res_put(&res); 276 } 277 return ret; 278 279last_resort: 280 if (rpf) 281 goto e_inval; 282 *spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE); 283 *itag = 0; 284 return 0; 285 286e_inval_res: 287 fib_res_put(&res); 288e_inval: 289 return -EINVAL; 290} 291 292static inline __be32 sk_extract_addr(struct sockaddr *addr) 293{ 294 return ((struct sockaddr_in *) addr)->sin_addr.s_addr; 295} 296 297static int put_rtax(struct nlattr *mx, int len, int type, u32 value) 298{ 299 struct nlattr *nla; 300 301 nla = (struct nlattr *) ((char *) mx + len); 302 nla->nla_type = type; 303 nla->nla_len = nla_attr_size(4); 304 *(u32 *) nla_data(nla) = value; 305 306 return len + nla_total_size(4); 307} 308 309static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt, 310 struct fib_config *cfg) 311{ 312 __be32 addr; 313 int plen; 314 315 memset(cfg, 0, sizeof(*cfg)); 316 cfg->fc_nlinfo.nl_net = net; 317 318 if (rt->rt_dst.sa_family != AF_INET) 319 return -EAFNOSUPPORT; 320 321 /* 322 * Check mask for validity: 323 * a) it must be contiguous. 324 * b) destination must have all host bits clear. 325 * c) if application forgot to set correct family (AF_INET), 326 * reject request unless it is absolutely clear i.e. 327 * both family and mask are zero. 328 */ 329 plen = 32; 330 addr = sk_extract_addr(&rt->rt_dst); 331 if (!(rt->rt_flags & RTF_HOST)) { 332 __be32 mask = sk_extract_addr(&rt->rt_genmask); 333 334 if (rt->rt_genmask.sa_family != AF_INET) { 335 if (mask || rt->rt_genmask.sa_family) 336 return -EAFNOSUPPORT; 337 } 338 339 if (bad_mask(mask, addr)) 340 return -EINVAL; 341 342 plen = inet_mask_len(mask); 343 } 344 345 cfg->fc_dst_len = plen; 346 cfg->fc_dst = addr; 347 348 if (cmd != SIOCDELRT) { 349 cfg->fc_nlflags = NLM_F_CREATE; 350 cfg->fc_protocol = RTPROT_BOOT; 351 } 352 353 if (rt->rt_metric) 354 cfg->fc_priority = rt->rt_metric - 1; 355 356 if (rt->rt_flags & RTF_REJECT) { 357 cfg->fc_scope = RT_SCOPE_HOST; 358 cfg->fc_type = RTN_UNREACHABLE; 359 return 0; 360 } 361 362 cfg->fc_scope = RT_SCOPE_NOWHERE; 363 cfg->fc_type = RTN_UNICAST; 364 365 if (rt->rt_dev) { 366 char *colon; 367 struct net_device *dev; 368 char devname[IFNAMSIZ]; 369 370 if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1)) 371 return -EFAULT; 372 373 devname[IFNAMSIZ-1] = 0; 374 colon = strchr(devname, ':'); 375 if (colon) 376 *colon = 0; 377 dev = __dev_get_by_name(net, devname); 378 if (!dev) 379 return -ENODEV; 380 cfg->fc_oif = dev->ifindex; 381 if (colon) { 382 struct in_ifaddr *ifa; 383 struct in_device *in_dev = __in_dev_get_rtnl(dev); 384 if (!in_dev) 385 return -ENODEV; 386 *colon = ':'; 387 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) 388 if (strcmp(ifa->ifa_label, devname) == 0) 389 break; 390 if (ifa == NULL) 391 return -ENODEV; 392 cfg->fc_prefsrc = ifa->ifa_local; 393 } 394 } 395 396 addr = sk_extract_addr(&rt->rt_gateway); 397 if (rt->rt_gateway.sa_family == AF_INET && addr) { 398 cfg->fc_gw = addr; 399 if (rt->rt_flags & RTF_GATEWAY && 400 inet_addr_type(net, addr) == RTN_UNICAST) 401 cfg->fc_scope = RT_SCOPE_UNIVERSE; 402 } 403 404 if (cmd == SIOCDELRT) 405 return 0; 406 407 if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw) 408 return -EINVAL; 409 410 if (cfg->fc_scope == RT_SCOPE_NOWHERE) 411 cfg->fc_scope = RT_SCOPE_LINK; 412 413 if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) { 414 struct nlattr *mx; 415 int len = 0; 416 417 mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL); 418 if (mx == NULL) 419 return -ENOMEM; 420 421 if (rt->rt_flags & RTF_MTU) 422 len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40); 423 424 if (rt->rt_flags & RTF_WINDOW) 425 len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window); 426 427 if (rt->rt_flags & RTF_IRTT) 428 len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3); 429 430 cfg->fc_mx = mx; 431 cfg->fc_mx_len = len; 432 } 433 434 return 0; 435} 436 437/* 438 * Handle IP routing ioctl calls. These are used to manipulate the routing tables 439 */ 440 441int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg) 442{ 443 struct fib_config cfg; 444 struct rtentry rt; 445 int err; 446 447 switch (cmd) { 448 case SIOCADDRT: /* Add a route */ 449 case SIOCDELRT: /* Delete a route */ 450 if (!capable(CAP_NET_ADMIN)) 451 return -EPERM; 452 453 if (copy_from_user(&rt, arg, sizeof(rt))) 454 return -EFAULT; 455 456 rtnl_lock(); 457 err = rtentry_to_fib_config(net, cmd, &rt, &cfg); 458 if (err == 0) { 459 struct fib_table *tb; 460 461 if (cmd == SIOCDELRT) { 462 tb = fib_get_table(net, cfg.fc_table); 463 if (tb) 464 err = tb->tb_delete(tb, &cfg); 465 else 466 err = -ESRCH; 467 } else { 468 tb = fib_new_table(net, cfg.fc_table); 469 if (tb) 470 err = tb->tb_insert(tb, &cfg); 471 else 472 err = -ENOBUFS; 473 } 474 475 /* allocated by rtentry_to_fib_config() */ 476 kfree(cfg.fc_mx); 477 } 478 rtnl_unlock(); 479 return err; 480 } 481 return -EINVAL; 482} 483 484const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = { 485 [RTA_DST] = { .type = NLA_U32 }, 486 [RTA_SRC] = { .type = NLA_U32 }, 487 [RTA_IIF] = { .type = NLA_U32 }, 488 [RTA_OIF] = { .type = NLA_U32 }, 489 [RTA_GATEWAY] = { .type = NLA_U32 }, 490 [RTA_PRIORITY] = { .type = NLA_U32 }, 491 [RTA_PREFSRC] = { .type = NLA_U32 }, 492 [RTA_METRICS] = { .type = NLA_NESTED }, 493 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) }, 494 [RTA_PROTOINFO] = { .type = NLA_U32 }, 495 [RTA_FLOW] = { .type = NLA_U32 }, 496}; 497 498static int rtm_to_fib_config(struct net *net, struct sk_buff *skb, 499 struct nlmsghdr *nlh, struct fib_config *cfg) 500{ 501 struct nlattr *attr; 502 int err, remaining; 503 struct rtmsg *rtm; 504 505 err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy); 506 if (err < 0) 507 goto errout; 508 509 memset(cfg, 0, sizeof(*cfg)); 510 511 rtm = nlmsg_data(nlh); 512 cfg->fc_dst_len = rtm->rtm_dst_len; 513 cfg->fc_tos = rtm->rtm_tos; 514 cfg->fc_table = rtm->rtm_table; 515 cfg->fc_protocol = rtm->rtm_protocol; 516 cfg->fc_scope = rtm->rtm_scope; 517 cfg->fc_type = rtm->rtm_type; 518 cfg->fc_flags = rtm->rtm_flags; 519 cfg->fc_nlflags = nlh->nlmsg_flags; 520 521 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid; 522 cfg->fc_nlinfo.nlh = nlh; 523 cfg->fc_nlinfo.nl_net = net; 524 525 if (cfg->fc_type > RTN_MAX) { 526 err = -EINVAL; 527 goto errout; 528 } 529 530 nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) { 531 switch (nla_type(attr)) { 532 case RTA_DST: 533 cfg->fc_dst = nla_get_be32(attr); 534 break; 535 case RTA_OIF: 536 cfg->fc_oif = nla_get_u32(attr); 537 break; 538 case RTA_GATEWAY: 539 cfg->fc_gw = nla_get_be32(attr); 540 break; 541 case RTA_PRIORITY: 542 cfg->fc_priority = nla_get_u32(attr); 543 break; 544 case RTA_PREFSRC: 545 cfg->fc_prefsrc = nla_get_be32(attr); 546 break; 547 case RTA_METRICS: 548 cfg->fc_mx = nla_data(attr); 549 cfg->fc_mx_len = nla_len(attr); 550 break; 551 case RTA_MULTIPATH: 552 cfg->fc_mp = nla_data(attr); 553 cfg->fc_mp_len = nla_len(attr); 554 break; 555 case RTA_FLOW: 556 cfg->fc_flow = nla_get_u32(attr); 557 break; 558 case RTA_TABLE: 559 cfg->fc_table = nla_get_u32(attr); 560 break; 561 } 562 } 563 564 return 0; 565errout: 566 return err; 567} 568 569static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) 570{ 571 struct net *net = skb->sk->sk_net; 572 struct fib_config cfg; 573 struct fib_table *tb; 574 int err; 575 576 err = rtm_to_fib_config(net, skb, nlh, &cfg); 577 if (err < 0) 578 goto errout; 579 580 tb = fib_get_table(net, cfg.fc_table); 581 if (tb == NULL) { 582 err = -ESRCH; 583 goto errout; 584 } 585 586 err = tb->tb_delete(tb, &cfg); 587errout: 588 return err; 589} 590 591static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) 592{ 593 struct net *net = skb->sk->sk_net; 594 struct fib_config cfg; 595 struct fib_table *tb; 596 int err; 597 598 err = rtm_to_fib_config(net, skb, nlh, &cfg); 599 if (err < 0) 600 goto errout; 601 602 tb = fib_new_table(net, cfg.fc_table); 603 if (tb == NULL) { 604 err = -ENOBUFS; 605 goto errout; 606 } 607 608 err = tb->tb_insert(tb, &cfg); 609errout: 610 return err; 611} 612 613static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) 614{ 615 struct net *net = skb->sk->sk_net; 616 unsigned int h, s_h; 617 unsigned int e = 0, s_e; 618 struct fib_table *tb; 619 struct hlist_node *node; 620 struct hlist_head *head; 621 int dumped = 0; 622 623 if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) && 624 ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED) 625 return ip_rt_dump(skb, cb); 626 627 s_h = cb->args[0]; 628 s_e = cb->args[1]; 629 630 for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) { 631 e = 0; 632 head = &net->ipv4.fib_table_hash[h]; 633 hlist_for_each_entry(tb, node, head, tb_hlist) { 634 if (e < s_e) 635 goto next; 636 if (dumped) 637 memset(&cb->args[2], 0, sizeof(cb->args) - 638 2 * sizeof(cb->args[0])); 639 if (tb->tb_dump(tb, skb, cb) < 0) 640 goto out; 641 dumped = 1; 642next: 643 e++; 644 } 645 } 646out: 647 cb->args[1] = e; 648 cb->args[0] = h; 649 650 return skb->len; 651} 652 653/* Prepare and feed intra-kernel routing request. 654 Really, it should be netlink message, but :-( netlink 655 can be not configured, so that we feed it directly 656 to fib engine. It is legal, because all events occur 657 only when netlink is already locked. 658 */ 659 660static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa) 661{ 662 struct net *net = ifa->ifa_dev->dev->nd_net; 663 struct fib_table *tb; 664 struct fib_config cfg = { 665 .fc_protocol = RTPROT_KERNEL, 666 .fc_type = type, 667 .fc_dst = dst, 668 .fc_dst_len = dst_len, 669 .fc_prefsrc = ifa->ifa_local, 670 .fc_oif = ifa->ifa_dev->dev->ifindex, 671 .fc_nlflags = NLM_F_CREATE | NLM_F_APPEND, 672 .fc_nlinfo = { 673 .nl_net = net, 674 }, 675 }; 676 677 if (type == RTN_UNICAST) 678 tb = fib_new_table(net, RT_TABLE_MAIN); 679 else 680 tb = fib_new_table(net, RT_TABLE_LOCAL); 681 682 if (tb == NULL) 683 return; 684 685 cfg.fc_table = tb->tb_id; 686 687 if (type != RTN_LOCAL) 688 cfg.fc_scope = RT_SCOPE_LINK; 689 else 690 cfg.fc_scope = RT_SCOPE_HOST; 691 692 if (cmd == RTM_NEWROUTE) 693 tb->tb_insert(tb, &cfg); 694 else 695 tb->tb_delete(tb, &cfg); 696} 697 698void fib_add_ifaddr(struct in_ifaddr *ifa) 699{ 700 struct in_device *in_dev = ifa->ifa_dev; 701 struct net_device *dev = in_dev->dev; 702 struct in_ifaddr *prim = ifa; 703 __be32 mask = ifa->ifa_mask; 704 __be32 addr = ifa->ifa_local; 705 __be32 prefix = ifa->ifa_address&mask; 706 707 if (ifa->ifa_flags&IFA_F_SECONDARY) { 708 prim = inet_ifa_byprefix(in_dev, prefix, mask); 709 if (prim == NULL) { 710 printk(KERN_WARNING "fib_add_ifaddr: bug: prim == NULL\n"); 711 return; 712 } 713 } 714 715 fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim); 716 717 if (!(dev->flags&IFF_UP)) 718 return; 719 720 /* Add broadcast address, if it is explicitly assigned. */ 721 if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF)) 722 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim); 723 724 if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) && 725 (prefix != addr || ifa->ifa_prefixlen < 32)) { 726 fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL : 727 RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim); 728 729 /* Add network specific broadcasts, when it takes a sense */ 730 if (ifa->ifa_prefixlen < 31) { 731 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim); 732 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim); 733 } 734 } 735} 736 737static void fib_del_ifaddr(struct in_ifaddr *ifa) 738{ 739 struct in_device *in_dev = ifa->ifa_dev; 740 struct net_device *dev = in_dev->dev; 741 struct in_ifaddr *ifa1; 742 struct in_ifaddr *prim = ifa; 743 __be32 brd = ifa->ifa_address|~ifa->ifa_mask; 744 __be32 any = ifa->ifa_address&ifa->ifa_mask; 745#define LOCAL_OK 1 746#define BRD_OK 2 747#define BRD0_OK 4 748#define BRD1_OK 8 749 unsigned ok = 0; 750 751 if (!(ifa->ifa_flags&IFA_F_SECONDARY)) 752 fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL : 753 RTN_UNICAST, any, ifa->ifa_prefixlen, prim); 754 else { 755 prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask); 756 if (prim == NULL) { 757 printk(KERN_WARNING "fib_del_ifaddr: bug: prim == NULL\n"); 758 return; 759 } 760 } 761 762 /* Deletion is more complicated than add. 763 We should take care of not to delete too much :-) 764 765 Scan address list to be sure that addresses are really gone. 766 */ 767 768 for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) { 769 if (ifa->ifa_local == ifa1->ifa_local) 770 ok |= LOCAL_OK; 771 if (ifa->ifa_broadcast == ifa1->ifa_broadcast) 772 ok |= BRD_OK; 773 if (brd == ifa1->ifa_broadcast) 774 ok |= BRD1_OK; 775 if (any == ifa1->ifa_broadcast) 776 ok |= BRD0_OK; 777 } 778 779 if (!(ok&BRD_OK)) 780 fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim); 781 if (!(ok&BRD1_OK)) 782 fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim); 783 if (!(ok&BRD0_OK)) 784 fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim); 785 if (!(ok&LOCAL_OK)) { 786 fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim); 787 788 /* Check, that this local address finally disappeared. */ 789 if (inet_addr_type(dev->nd_net, ifa->ifa_local) != RTN_LOCAL) { 790 /* And the last, but not the least thing. 791 We must flush stray FIB entries. 792 793 First of all, we scan fib_info list searching 794 for stray nexthop entries, then ignite fib_flush. 795 */ 796 if (fib_sync_down(ifa->ifa_local, NULL, 0)) 797 fib_flush(dev->nd_net); 798 } 799 } 800#undef LOCAL_OK 801#undef BRD_OK 802#undef BRD0_OK 803#undef BRD1_OK 804} 805 806static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb ) 807{ 808 809 struct fib_result res; 810 struct flowi fl = { .mark = frn->fl_mark, 811 .nl_u = { .ip4_u = { .daddr = frn->fl_addr, 812 .tos = frn->fl_tos, 813 .scope = frn->fl_scope } } }; 814 815#ifdef CONFIG_IP_MULTIPLE_TABLES 816 res.r = NULL; 817#endif 818 819 frn->err = -ENOENT; 820 if (tb) { 821 local_bh_disable(); 822 823 frn->tb_id = tb->tb_id; 824 frn->err = tb->tb_lookup(tb, &fl, &res); 825 826 if (!frn->err) { 827 frn->prefixlen = res.prefixlen; 828 frn->nh_sel = res.nh_sel; 829 frn->type = res.type; 830 frn->scope = res.scope; 831 fib_res_put(&res); 832 } 833 local_bh_enable(); 834 } 835} 836 837static void nl_fib_input(struct sk_buff *skb) 838{ 839 struct net *net; 840 struct fib_result_nl *frn; 841 struct nlmsghdr *nlh; 842 struct fib_table *tb; 843 u32 pid; 844 845 net = skb->sk->sk_net; 846 nlh = nlmsg_hdr(skb); 847 if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len || 848 nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn))) 849 return; 850 851 skb = skb_clone(skb, GFP_KERNEL); 852 if (skb == NULL) 853 return; 854 nlh = nlmsg_hdr(skb); 855 856 frn = (struct fib_result_nl *) NLMSG_DATA(nlh); 857 tb = fib_get_table(net, frn->tb_id_in); 858 859 nl_fib_lookup(frn, tb); 860 861 pid = NETLINK_CB(skb).pid; /* pid of sending process */ 862 NETLINK_CB(skb).pid = 0; /* from kernel */ 863 NETLINK_CB(skb).dst_group = 0; /* unicast */ 864 netlink_unicast(net->ipv4.fibnl, skb, pid, MSG_DONTWAIT); 865} 866 867static int nl_fib_lookup_init(struct net *net) 868{ 869 struct sock *sk; 870 sk = netlink_kernel_create(net, NETLINK_FIB_LOOKUP, 0, 871 nl_fib_input, NULL, THIS_MODULE); 872 if (sk == NULL) 873 return -EAFNOSUPPORT; 874 net->ipv4.fibnl = sk; 875 return 0; 876} 877 878static void nl_fib_lookup_exit(struct net *net) 879{ 880 netlink_kernel_release(net->ipv4.fibnl); 881 net->ipv4.fibnl = NULL; 882} 883 884static void fib_disable_ip(struct net_device *dev, int force) 885{ 886 if (fib_sync_down(0, dev, force)) 887 fib_flush(dev->nd_net); 888 rt_cache_flush(0); 889 arp_ifdown(dev); 890} 891 892static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr) 893{ 894 struct in_ifaddr *ifa = (struct in_ifaddr*)ptr; 895 896 switch (event) { 897 case NETDEV_UP: 898 fib_add_ifaddr(ifa); 899#ifdef CONFIG_IP_ROUTE_MULTIPATH 900 fib_sync_up(ifa->ifa_dev->dev); 901#endif 902 rt_cache_flush(-1); 903 break; 904 case NETDEV_DOWN: 905 fib_del_ifaddr(ifa); 906 if (ifa->ifa_dev->ifa_list == NULL) { 907 /* Last address was deleted from this interface. 908 Disable IP. 909 */ 910 fib_disable_ip(ifa->ifa_dev->dev, 1); 911 } else { 912 rt_cache_flush(-1); 913 } 914 break; 915 } 916 return NOTIFY_DONE; 917} 918 919static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) 920{ 921 struct net_device *dev = ptr; 922 struct in_device *in_dev = __in_dev_get_rtnl(dev); 923 924 if (event == NETDEV_UNREGISTER) { 925 fib_disable_ip(dev, 2); 926 return NOTIFY_DONE; 927 } 928 929 if (!in_dev) 930 return NOTIFY_DONE; 931 932 switch (event) { 933 case NETDEV_UP: 934 for_ifa(in_dev) { 935 fib_add_ifaddr(ifa); 936 } endfor_ifa(in_dev); 937#ifdef CONFIG_IP_ROUTE_MULTIPATH 938 fib_sync_up(dev); 939#endif 940 rt_cache_flush(-1); 941 break; 942 case NETDEV_DOWN: 943 fib_disable_ip(dev, 0); 944 break; 945 case NETDEV_CHANGEMTU: 946 case NETDEV_CHANGE: 947 rt_cache_flush(0); 948 break; 949 } 950 return NOTIFY_DONE; 951} 952 953static struct notifier_block fib_inetaddr_notifier = { 954 .notifier_call =fib_inetaddr_event, 955}; 956 957static struct notifier_block fib_netdev_notifier = { 958 .notifier_call =fib_netdev_event, 959}; 960 961static int __net_init ip_fib_net_init(struct net *net) 962{ 963 unsigned int i; 964 965 net->ipv4.fib_table_hash = kzalloc( 966 sizeof(struct hlist_head)*FIB_TABLE_HASHSZ, GFP_KERNEL); 967 if (net->ipv4.fib_table_hash == NULL) 968 return -ENOMEM; 969 970 for (i = 0; i < FIB_TABLE_HASHSZ; i++) 971 INIT_HLIST_HEAD(&net->ipv4.fib_table_hash[i]); 972 973 return fib4_rules_init(net); 974} 975 976static void __net_exit ip_fib_net_exit(struct net *net) 977{ 978 unsigned int i; 979 980#ifdef CONFIG_IP_MULTIPLE_TABLES 981 fib4_rules_exit(net); 982#endif 983 984 for (i = 0; i < FIB_TABLE_HASHSZ; i++) { 985 struct fib_table *tb; 986 struct hlist_head *head; 987 struct hlist_node *node, *tmp; 988 989 head = &net->ipv4.fib_table_hash[i]; 990 hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) { 991 hlist_del(node); 992 tb->tb_flush(tb); 993 kfree(tb); 994 } 995 } 996 kfree(net->ipv4.fib_table_hash); 997} 998 999static int __net_init fib_net_init(struct net *net) 1000{ 1001 int error; 1002 1003 error = ip_fib_net_init(net); 1004 if (error < 0) 1005 goto out; 1006 error = nl_fib_lookup_init(net); 1007 if (error < 0) 1008 goto out_nlfl; 1009 error = fib_proc_init(net); 1010 if (error < 0) 1011 goto out_proc; 1012out: 1013 return error; 1014 1015out_proc: 1016 nl_fib_lookup_exit(net); 1017out_nlfl: 1018 ip_fib_net_exit(net); 1019 goto out; 1020} 1021 1022static void __net_exit fib_net_exit(struct net *net) 1023{ 1024 fib_proc_exit(net); 1025 nl_fib_lookup_exit(net); 1026 ip_fib_net_exit(net); 1027} 1028 1029static struct pernet_operations fib_net_ops = { 1030 .init = fib_net_init, 1031 .exit = fib_net_exit, 1032}; 1033 1034void __init ip_fib_init(void) 1035{ 1036 rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL); 1037 rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL); 1038 rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib); 1039 1040 register_pernet_subsys(&fib_net_ops); 1041 register_netdevice_notifier(&fib_netdev_notifier); 1042 register_inetaddr_notifier(&fib_inetaddr_notifier); 1043 1044 fib_hash_init(); 1045} 1046 1047EXPORT_SYMBOL(inet_addr_type); 1048EXPORT_SYMBOL(inet_dev_addr_type); 1049EXPORT_SYMBOL(ip_dev_find); 1050