fib_frontend.c revision b5f7e7554753e2cc3ef3bef0271fdb32027df2ba
1/* 2 * INET An implementation of the TCP/IP protocol suite for the LINUX 3 * operating system. INET is implemented using the BSD Socket 4 * interface as the means of communication with the user level. 5 * 6 * IPv4 Forwarding Information Base: FIB frontend. 7 * 8 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 9 * 10 * This program is free software; you can redistribute it and/or 11 * modify it under the terms of the GNU General Public License 12 * as published by the Free Software Foundation; either version 13 * 2 of the License, or (at your option) any later version. 14 */ 15 16#include <linux/module.h> 17#include <asm/uaccess.h> 18#include <asm/system.h> 19#include <linux/bitops.h> 20#include <linux/capability.h> 21#include <linux/types.h> 22#include <linux/kernel.h> 23#include <linux/mm.h> 24#include <linux/string.h> 25#include <linux/socket.h> 26#include <linux/sockios.h> 27#include <linux/errno.h> 28#include <linux/in.h> 29#include <linux/inet.h> 30#include <linux/inetdevice.h> 31#include <linux/netdevice.h> 32#include <linux/if_addr.h> 33#include <linux/if_arp.h> 34#include <linux/skbuff.h> 35#include <linux/init.h> 36#include <linux/list.h> 37#include <linux/slab.h> 38 39#include <net/ip.h> 40#include <net/protocol.h> 41#include <net/route.h> 42#include <net/tcp.h> 43#include <net/sock.h> 44#include <net/arp.h> 45#include <net/ip_fib.h> 46#include <net/rtnetlink.h> 47 48#ifndef CONFIG_IP_MULTIPLE_TABLES 49 50static int __net_init fib4_rules_init(struct net *net) 51{ 52 struct fib_table *local_table, *main_table; 53 54 local_table = fib_hash_table(RT_TABLE_LOCAL); 55 if (local_table == NULL) 56 return -ENOMEM; 57 58 main_table = fib_hash_table(RT_TABLE_MAIN); 59 if (main_table == NULL) 60 goto fail; 61 62 hlist_add_head_rcu(&local_table->tb_hlist, 63 &net->ipv4.fib_table_hash[TABLE_LOCAL_INDEX]); 64 hlist_add_head_rcu(&main_table->tb_hlist, 65 &net->ipv4.fib_table_hash[TABLE_MAIN_INDEX]); 66 return 0; 67 68fail: 69 kfree(local_table); 70 return -ENOMEM; 71} 72#else 73 74struct fib_table *fib_new_table(struct net *net, u32 id) 75{ 76 struct fib_table *tb; 77 unsigned int h; 78 79 if (id == 0) 80 id = RT_TABLE_MAIN; 81 tb = fib_get_table(net, id); 82 if (tb) 83 return tb; 84 85 tb = fib_hash_table(id); 86 if (!tb) 87 return NULL; 88 h = id & (FIB_TABLE_HASHSZ - 1); 89 hlist_add_head_rcu(&tb->tb_hlist, &net->ipv4.fib_table_hash[h]); 90 return tb; 91} 92 93struct fib_table *fib_get_table(struct net *net, u32 id) 94{ 95 struct fib_table *tb; 96 struct hlist_node *node; 97 struct hlist_head *head; 98 unsigned int h; 99 100 if (id == 0) 101 id = RT_TABLE_MAIN; 102 h = id & (FIB_TABLE_HASHSZ - 1); 103 104 rcu_read_lock(); 105 head = &net->ipv4.fib_table_hash[h]; 106 hlist_for_each_entry_rcu(tb, node, head, tb_hlist) { 107 if (tb->tb_id == id) { 108 rcu_read_unlock(); 109 return tb; 110 } 111 } 112 rcu_read_unlock(); 113 return NULL; 114} 115#endif /* CONFIG_IP_MULTIPLE_TABLES */ 116 117void fib_select_default(struct net *net, 118 const struct flowi *flp, struct fib_result *res) 119{ 120 struct fib_table *tb; 121 int table = RT_TABLE_MAIN; 122#ifdef CONFIG_IP_MULTIPLE_TABLES 123 if (res->r == NULL || res->r->action != FR_ACT_TO_TBL) 124 return; 125 table = res->r->table; 126#endif 127 tb = fib_get_table(net, table); 128 if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) 129 fib_table_select_default(tb, flp, res); 130} 131 132static void fib_flush(struct net *net) 133{ 134 int flushed = 0; 135 struct fib_table *tb; 136 struct hlist_node *node; 137 struct hlist_head *head; 138 unsigned int h; 139 140 for (h = 0; h < FIB_TABLE_HASHSZ; h++) { 141 head = &net->ipv4.fib_table_hash[h]; 142 hlist_for_each_entry(tb, node, head, tb_hlist) 143 flushed += fib_table_flush(tb); 144 } 145 146 if (flushed) 147 rt_cache_flush(net, -1); 148} 149 150/* 151 * Find the first device with a given source address. 152 */ 153 154struct net_device * ip_dev_find(struct net *net, __be32 addr) 155{ 156 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } }; 157 struct fib_result res; 158 struct net_device *dev = NULL; 159 struct fib_table *local_table; 160 161#ifdef CONFIG_IP_MULTIPLE_TABLES 162 res.r = NULL; 163#endif 164 165 local_table = fib_get_table(net, RT_TABLE_LOCAL); 166 if (!local_table || fib_table_lookup(local_table, &fl, &res)) 167 return NULL; 168 if (res.type != RTN_LOCAL) 169 goto out; 170 dev = FIB_RES_DEV(res); 171 172 if (dev) 173 dev_hold(dev); 174out: 175 fib_res_put(&res); 176 return dev; 177} 178 179/* 180 * Find address type as if only "dev" was present in the system. If 181 * on_dev is NULL then all interfaces are taken into consideration. 182 */ 183static inline unsigned __inet_dev_addr_type(struct net *net, 184 const struct net_device *dev, 185 __be32 addr) 186{ 187 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } }; 188 struct fib_result res; 189 unsigned ret = RTN_BROADCAST; 190 struct fib_table *local_table; 191 192 if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr)) 193 return RTN_BROADCAST; 194 if (ipv4_is_multicast(addr)) 195 return RTN_MULTICAST; 196 197#ifdef CONFIG_IP_MULTIPLE_TABLES 198 res.r = NULL; 199#endif 200 201 local_table = fib_get_table(net, RT_TABLE_LOCAL); 202 if (local_table) { 203 ret = RTN_UNICAST; 204 if (!fib_table_lookup(local_table, &fl, &res)) { 205 if (!dev || dev == res.fi->fib_dev) 206 ret = res.type; 207 fib_res_put(&res); 208 } 209 } 210 return ret; 211} 212 213unsigned int inet_addr_type(struct net *net, __be32 addr) 214{ 215 return __inet_dev_addr_type(net, NULL, addr); 216} 217 218unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev, 219 __be32 addr) 220{ 221 return __inet_dev_addr_type(net, dev, addr); 222} 223 224/* Given (packet source, input interface) and optional (dst, oif, tos): 225 - (main) check, that source is valid i.e. not broadcast or our local 226 address. 227 - figure out what "logical" interface this packet arrived 228 and calculate "specific destination" address. 229 - check, that packet arrived from expected physical interface. 230 */ 231 232int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, 233 struct net_device *dev, __be32 *spec_dst, 234 u32 *itag, u32 mark) 235{ 236 struct in_device *in_dev; 237 struct flowi fl = { .nl_u = { .ip4_u = 238 { .daddr = src, 239 .saddr = dst, 240 .tos = tos } }, 241 .mark = mark, 242 .iif = oif }; 243 244 struct fib_result res; 245 int no_addr, rpf, accept_local; 246 int ret; 247 struct net *net; 248 249 no_addr = rpf = accept_local = 0; 250 rcu_read_lock(); 251 in_dev = __in_dev_get_rcu(dev); 252 if (in_dev) { 253 no_addr = in_dev->ifa_list == NULL; 254 rpf = IN_DEV_RPFILTER(in_dev); 255 accept_local = IN_DEV_ACCEPT_LOCAL(in_dev); 256 if (mark && !IN_DEV_SRC_VMARK(in_dev)) 257 fl.mark = 0; 258 } 259 rcu_read_unlock(); 260 261 if (in_dev == NULL) 262 goto e_inval; 263 264 net = dev_net(dev); 265 if (fib_lookup(net, &fl, &res)) 266 goto last_resort; 267 if (res.type != RTN_UNICAST) { 268 if (res.type != RTN_LOCAL || !accept_local) 269 goto e_inval_res; 270 } 271 *spec_dst = FIB_RES_PREFSRC(res); 272 fib_combine_itag(itag, &res); 273#ifdef CONFIG_IP_ROUTE_MULTIPATH 274 if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1) 275#else 276 if (FIB_RES_DEV(res) == dev) 277#endif 278 { 279 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST; 280 fib_res_put(&res); 281 return ret; 282 } 283 fib_res_put(&res); 284 if (no_addr) 285 goto last_resort; 286 if (rpf == 1) 287 goto e_rpf; 288 fl.oif = dev->ifindex; 289 290 ret = 0; 291 if (fib_lookup(net, &fl, &res) == 0) { 292 if (res.type == RTN_UNICAST) { 293 *spec_dst = FIB_RES_PREFSRC(res); 294 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST; 295 } 296 fib_res_put(&res); 297 } 298 return ret; 299 300last_resort: 301 if (rpf) 302 goto e_rpf; 303 *spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE); 304 *itag = 0; 305 return 0; 306 307e_inval_res: 308 fib_res_put(&res); 309e_inval: 310 return -EINVAL; 311e_rpf: 312 return -EXDEV; 313} 314 315static inline __be32 sk_extract_addr(struct sockaddr *addr) 316{ 317 return ((struct sockaddr_in *) addr)->sin_addr.s_addr; 318} 319 320static int put_rtax(struct nlattr *mx, int len, int type, u32 value) 321{ 322 struct nlattr *nla; 323 324 nla = (struct nlattr *) ((char *) mx + len); 325 nla->nla_type = type; 326 nla->nla_len = nla_attr_size(4); 327 *(u32 *) nla_data(nla) = value; 328 329 return len + nla_total_size(4); 330} 331 332static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt, 333 struct fib_config *cfg) 334{ 335 __be32 addr; 336 int plen; 337 338 memset(cfg, 0, sizeof(*cfg)); 339 cfg->fc_nlinfo.nl_net = net; 340 341 if (rt->rt_dst.sa_family != AF_INET) 342 return -EAFNOSUPPORT; 343 344 /* 345 * Check mask for validity: 346 * a) it must be contiguous. 347 * b) destination must have all host bits clear. 348 * c) if application forgot to set correct family (AF_INET), 349 * reject request unless it is absolutely clear i.e. 350 * both family and mask are zero. 351 */ 352 plen = 32; 353 addr = sk_extract_addr(&rt->rt_dst); 354 if (!(rt->rt_flags & RTF_HOST)) { 355 __be32 mask = sk_extract_addr(&rt->rt_genmask); 356 357 if (rt->rt_genmask.sa_family != AF_INET) { 358 if (mask || rt->rt_genmask.sa_family) 359 return -EAFNOSUPPORT; 360 } 361 362 if (bad_mask(mask, addr)) 363 return -EINVAL; 364 365 plen = inet_mask_len(mask); 366 } 367 368 cfg->fc_dst_len = plen; 369 cfg->fc_dst = addr; 370 371 if (cmd != SIOCDELRT) { 372 cfg->fc_nlflags = NLM_F_CREATE; 373 cfg->fc_protocol = RTPROT_BOOT; 374 } 375 376 if (rt->rt_metric) 377 cfg->fc_priority = rt->rt_metric - 1; 378 379 if (rt->rt_flags & RTF_REJECT) { 380 cfg->fc_scope = RT_SCOPE_HOST; 381 cfg->fc_type = RTN_UNREACHABLE; 382 return 0; 383 } 384 385 cfg->fc_scope = RT_SCOPE_NOWHERE; 386 cfg->fc_type = RTN_UNICAST; 387 388 if (rt->rt_dev) { 389 char *colon; 390 struct net_device *dev; 391 char devname[IFNAMSIZ]; 392 393 if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1)) 394 return -EFAULT; 395 396 devname[IFNAMSIZ-1] = 0; 397 colon = strchr(devname, ':'); 398 if (colon) 399 *colon = 0; 400 dev = __dev_get_by_name(net, devname); 401 if (!dev) 402 return -ENODEV; 403 cfg->fc_oif = dev->ifindex; 404 if (colon) { 405 struct in_ifaddr *ifa; 406 struct in_device *in_dev = __in_dev_get_rtnl(dev); 407 if (!in_dev) 408 return -ENODEV; 409 *colon = ':'; 410 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) 411 if (strcmp(ifa->ifa_label, devname) == 0) 412 break; 413 if (ifa == NULL) 414 return -ENODEV; 415 cfg->fc_prefsrc = ifa->ifa_local; 416 } 417 } 418 419 addr = sk_extract_addr(&rt->rt_gateway); 420 if (rt->rt_gateway.sa_family == AF_INET && addr) { 421 cfg->fc_gw = addr; 422 if (rt->rt_flags & RTF_GATEWAY && 423 inet_addr_type(net, addr) == RTN_UNICAST) 424 cfg->fc_scope = RT_SCOPE_UNIVERSE; 425 } 426 427 if (cmd == SIOCDELRT) 428 return 0; 429 430 if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw) 431 return -EINVAL; 432 433 if (cfg->fc_scope == RT_SCOPE_NOWHERE) 434 cfg->fc_scope = RT_SCOPE_LINK; 435 436 if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) { 437 struct nlattr *mx; 438 int len = 0; 439 440 mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL); 441 if (mx == NULL) 442 return -ENOMEM; 443 444 if (rt->rt_flags & RTF_MTU) 445 len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40); 446 447 if (rt->rt_flags & RTF_WINDOW) 448 len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window); 449 450 if (rt->rt_flags & RTF_IRTT) 451 len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3); 452 453 cfg->fc_mx = mx; 454 cfg->fc_mx_len = len; 455 } 456 457 return 0; 458} 459 460/* 461 * Handle IP routing ioctl calls. These are used to manipulate the routing tables 462 */ 463 464int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg) 465{ 466 struct fib_config cfg; 467 struct rtentry rt; 468 int err; 469 470 switch (cmd) { 471 case SIOCADDRT: /* Add a route */ 472 case SIOCDELRT: /* Delete a route */ 473 if (!capable(CAP_NET_ADMIN)) 474 return -EPERM; 475 476 if (copy_from_user(&rt, arg, sizeof(rt))) 477 return -EFAULT; 478 479 rtnl_lock(); 480 err = rtentry_to_fib_config(net, cmd, &rt, &cfg); 481 if (err == 0) { 482 struct fib_table *tb; 483 484 if (cmd == SIOCDELRT) { 485 tb = fib_get_table(net, cfg.fc_table); 486 if (tb) 487 err = fib_table_delete(tb, &cfg); 488 else 489 err = -ESRCH; 490 } else { 491 tb = fib_new_table(net, cfg.fc_table); 492 if (tb) 493 err = fib_table_insert(tb, &cfg); 494 else 495 err = -ENOBUFS; 496 } 497 498 /* allocated by rtentry_to_fib_config() */ 499 kfree(cfg.fc_mx); 500 } 501 rtnl_unlock(); 502 return err; 503 } 504 return -EINVAL; 505} 506 507const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = { 508 [RTA_DST] = { .type = NLA_U32 }, 509 [RTA_SRC] = { .type = NLA_U32 }, 510 [RTA_IIF] = { .type = NLA_U32 }, 511 [RTA_OIF] = { .type = NLA_U32 }, 512 [RTA_GATEWAY] = { .type = NLA_U32 }, 513 [RTA_PRIORITY] = { .type = NLA_U32 }, 514 [RTA_PREFSRC] = { .type = NLA_U32 }, 515 [RTA_METRICS] = { .type = NLA_NESTED }, 516 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) }, 517 [RTA_FLOW] = { .type = NLA_U32 }, 518}; 519 520static int rtm_to_fib_config(struct net *net, struct sk_buff *skb, 521 struct nlmsghdr *nlh, struct fib_config *cfg) 522{ 523 struct nlattr *attr; 524 int err, remaining; 525 struct rtmsg *rtm; 526 527 err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy); 528 if (err < 0) 529 goto errout; 530 531 memset(cfg, 0, sizeof(*cfg)); 532 533 rtm = nlmsg_data(nlh); 534 cfg->fc_dst_len = rtm->rtm_dst_len; 535 cfg->fc_tos = rtm->rtm_tos; 536 cfg->fc_table = rtm->rtm_table; 537 cfg->fc_protocol = rtm->rtm_protocol; 538 cfg->fc_scope = rtm->rtm_scope; 539 cfg->fc_type = rtm->rtm_type; 540 cfg->fc_flags = rtm->rtm_flags; 541 cfg->fc_nlflags = nlh->nlmsg_flags; 542 543 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid; 544 cfg->fc_nlinfo.nlh = nlh; 545 cfg->fc_nlinfo.nl_net = net; 546 547 if (cfg->fc_type > RTN_MAX) { 548 err = -EINVAL; 549 goto errout; 550 } 551 552 nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) { 553 switch (nla_type(attr)) { 554 case RTA_DST: 555 cfg->fc_dst = nla_get_be32(attr); 556 break; 557 case RTA_OIF: 558 cfg->fc_oif = nla_get_u32(attr); 559 break; 560 case RTA_GATEWAY: 561 cfg->fc_gw = nla_get_be32(attr); 562 break; 563 case RTA_PRIORITY: 564 cfg->fc_priority = nla_get_u32(attr); 565 break; 566 case RTA_PREFSRC: 567 cfg->fc_prefsrc = nla_get_be32(attr); 568 break; 569 case RTA_METRICS: 570 cfg->fc_mx = nla_data(attr); 571 cfg->fc_mx_len = nla_len(attr); 572 break; 573 case RTA_MULTIPATH: 574 cfg->fc_mp = nla_data(attr); 575 cfg->fc_mp_len = nla_len(attr); 576 break; 577 case RTA_FLOW: 578 cfg->fc_flow = nla_get_u32(attr); 579 break; 580 case RTA_TABLE: 581 cfg->fc_table = nla_get_u32(attr); 582 break; 583 } 584 } 585 586 return 0; 587errout: 588 return err; 589} 590 591static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 592{ 593 struct net *net = sock_net(skb->sk); 594 struct fib_config cfg; 595 struct fib_table *tb; 596 int err; 597 598 err = rtm_to_fib_config(net, skb, nlh, &cfg); 599 if (err < 0) 600 goto errout; 601 602 tb = fib_get_table(net, cfg.fc_table); 603 if (tb == NULL) { 604 err = -ESRCH; 605 goto errout; 606 } 607 608 err = fib_table_delete(tb, &cfg); 609errout: 610 return err; 611} 612 613static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 614{ 615 struct net *net = sock_net(skb->sk); 616 struct fib_config cfg; 617 struct fib_table *tb; 618 int err; 619 620 err = rtm_to_fib_config(net, skb, nlh, &cfg); 621 if (err < 0) 622 goto errout; 623 624 tb = fib_new_table(net, cfg.fc_table); 625 if (tb == NULL) { 626 err = -ENOBUFS; 627 goto errout; 628 } 629 630 err = fib_table_insert(tb, &cfg); 631errout: 632 return err; 633} 634 635static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) 636{ 637 struct net *net = sock_net(skb->sk); 638 unsigned int h, s_h; 639 unsigned int e = 0, s_e; 640 struct fib_table *tb; 641 struct hlist_node *node; 642 struct hlist_head *head; 643 int dumped = 0; 644 645 if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) && 646 ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED) 647 return ip_rt_dump(skb, cb); 648 649 s_h = cb->args[0]; 650 s_e = cb->args[1]; 651 652 for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) { 653 e = 0; 654 head = &net->ipv4.fib_table_hash[h]; 655 hlist_for_each_entry(tb, node, head, tb_hlist) { 656 if (e < s_e) 657 goto next; 658 if (dumped) 659 memset(&cb->args[2], 0, sizeof(cb->args) - 660 2 * sizeof(cb->args[0])); 661 if (fib_table_dump(tb, skb, cb) < 0) 662 goto out; 663 dumped = 1; 664next: 665 e++; 666 } 667 } 668out: 669 cb->args[1] = e; 670 cb->args[0] = h; 671 672 return skb->len; 673} 674 675/* Prepare and feed intra-kernel routing request. 676 Really, it should be netlink message, but :-( netlink 677 can be not configured, so that we feed it directly 678 to fib engine. It is legal, because all events occur 679 only when netlink is already locked. 680 */ 681 682static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa) 683{ 684 struct net *net = dev_net(ifa->ifa_dev->dev); 685 struct fib_table *tb; 686 struct fib_config cfg = { 687 .fc_protocol = RTPROT_KERNEL, 688 .fc_type = type, 689 .fc_dst = dst, 690 .fc_dst_len = dst_len, 691 .fc_prefsrc = ifa->ifa_local, 692 .fc_oif = ifa->ifa_dev->dev->ifindex, 693 .fc_nlflags = NLM_F_CREATE | NLM_F_APPEND, 694 .fc_nlinfo = { 695 .nl_net = net, 696 }, 697 }; 698 699 if (type == RTN_UNICAST) 700 tb = fib_new_table(net, RT_TABLE_MAIN); 701 else 702 tb = fib_new_table(net, RT_TABLE_LOCAL); 703 704 if (tb == NULL) 705 return; 706 707 cfg.fc_table = tb->tb_id; 708 709 if (type != RTN_LOCAL) 710 cfg.fc_scope = RT_SCOPE_LINK; 711 else 712 cfg.fc_scope = RT_SCOPE_HOST; 713 714 if (cmd == RTM_NEWROUTE) 715 fib_table_insert(tb, &cfg); 716 else 717 fib_table_delete(tb, &cfg); 718} 719 720void fib_add_ifaddr(struct in_ifaddr *ifa) 721{ 722 struct in_device *in_dev = ifa->ifa_dev; 723 struct net_device *dev = in_dev->dev; 724 struct in_ifaddr *prim = ifa; 725 __be32 mask = ifa->ifa_mask; 726 __be32 addr = ifa->ifa_local; 727 __be32 prefix = ifa->ifa_address&mask; 728 729 if (ifa->ifa_flags&IFA_F_SECONDARY) { 730 prim = inet_ifa_byprefix(in_dev, prefix, mask); 731 if (prim == NULL) { 732 printk(KERN_WARNING "fib_add_ifaddr: bug: prim == NULL\n"); 733 return; 734 } 735 } 736 737 fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim); 738 739 if (!(dev->flags&IFF_UP)) 740 return; 741 742 /* Add broadcast address, if it is explicitly assigned. */ 743 if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF)) 744 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim); 745 746 if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) && 747 (prefix != addr || ifa->ifa_prefixlen < 32)) { 748 fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL : 749 RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim); 750 751 /* Add network specific broadcasts, when it takes a sense */ 752 if (ifa->ifa_prefixlen < 31) { 753 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim); 754 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim); 755 } 756 } 757} 758 759static void fib_del_ifaddr(struct in_ifaddr *ifa) 760{ 761 struct in_device *in_dev = ifa->ifa_dev; 762 struct net_device *dev = in_dev->dev; 763 struct in_ifaddr *ifa1; 764 struct in_ifaddr *prim = ifa; 765 __be32 brd = ifa->ifa_address|~ifa->ifa_mask; 766 __be32 any = ifa->ifa_address&ifa->ifa_mask; 767#define LOCAL_OK 1 768#define BRD_OK 2 769#define BRD0_OK 4 770#define BRD1_OK 8 771 unsigned ok = 0; 772 773 if (!(ifa->ifa_flags&IFA_F_SECONDARY)) 774 fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL : 775 RTN_UNICAST, any, ifa->ifa_prefixlen, prim); 776 else { 777 prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask); 778 if (prim == NULL) { 779 printk(KERN_WARNING "fib_del_ifaddr: bug: prim == NULL\n"); 780 return; 781 } 782 } 783 784 /* Deletion is more complicated than add. 785 We should take care of not to delete too much :-) 786 787 Scan address list to be sure that addresses are really gone. 788 */ 789 790 for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) { 791 if (ifa->ifa_local == ifa1->ifa_local) 792 ok |= LOCAL_OK; 793 if (ifa->ifa_broadcast == ifa1->ifa_broadcast) 794 ok |= BRD_OK; 795 if (brd == ifa1->ifa_broadcast) 796 ok |= BRD1_OK; 797 if (any == ifa1->ifa_broadcast) 798 ok |= BRD0_OK; 799 } 800 801 if (!(ok&BRD_OK)) 802 fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim); 803 if (!(ok&BRD1_OK)) 804 fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim); 805 if (!(ok&BRD0_OK)) 806 fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim); 807 if (!(ok&LOCAL_OK)) { 808 fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim); 809 810 /* Check, that this local address finally disappeared. */ 811 if (inet_addr_type(dev_net(dev), ifa->ifa_local) != RTN_LOCAL) { 812 /* And the last, but not the least thing. 813 We must flush stray FIB entries. 814 815 First of all, we scan fib_info list searching 816 for stray nexthop entries, then ignite fib_flush. 817 */ 818 if (fib_sync_down_addr(dev_net(dev), ifa->ifa_local)) 819 fib_flush(dev_net(dev)); 820 } 821 } 822#undef LOCAL_OK 823#undef BRD_OK 824#undef BRD0_OK 825#undef BRD1_OK 826} 827 828static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb ) 829{ 830 831 struct fib_result res; 832 struct flowi fl = { .mark = frn->fl_mark, 833 .nl_u = { .ip4_u = { .daddr = frn->fl_addr, 834 .tos = frn->fl_tos, 835 .scope = frn->fl_scope } } }; 836 837#ifdef CONFIG_IP_MULTIPLE_TABLES 838 res.r = NULL; 839#endif 840 841 frn->err = -ENOENT; 842 if (tb) { 843 local_bh_disable(); 844 845 frn->tb_id = tb->tb_id; 846 frn->err = fib_table_lookup(tb, &fl, &res); 847 848 if (!frn->err) { 849 frn->prefixlen = res.prefixlen; 850 frn->nh_sel = res.nh_sel; 851 frn->type = res.type; 852 frn->scope = res.scope; 853 fib_res_put(&res); 854 } 855 local_bh_enable(); 856 } 857} 858 859static void nl_fib_input(struct sk_buff *skb) 860{ 861 struct net *net; 862 struct fib_result_nl *frn; 863 struct nlmsghdr *nlh; 864 struct fib_table *tb; 865 u32 pid; 866 867 net = sock_net(skb->sk); 868 nlh = nlmsg_hdr(skb); 869 if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len || 870 nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn))) 871 return; 872 873 skb = skb_clone(skb, GFP_KERNEL); 874 if (skb == NULL) 875 return; 876 nlh = nlmsg_hdr(skb); 877 878 frn = (struct fib_result_nl *) NLMSG_DATA(nlh); 879 tb = fib_get_table(net, frn->tb_id_in); 880 881 nl_fib_lookup(frn, tb); 882 883 pid = NETLINK_CB(skb).pid; /* pid of sending process */ 884 NETLINK_CB(skb).pid = 0; /* from kernel */ 885 NETLINK_CB(skb).dst_group = 0; /* unicast */ 886 netlink_unicast(net->ipv4.fibnl, skb, pid, MSG_DONTWAIT); 887} 888 889static int __net_init nl_fib_lookup_init(struct net *net) 890{ 891 struct sock *sk; 892 sk = netlink_kernel_create(net, NETLINK_FIB_LOOKUP, 0, 893 nl_fib_input, NULL, THIS_MODULE); 894 if (sk == NULL) 895 return -EAFNOSUPPORT; 896 net->ipv4.fibnl = sk; 897 return 0; 898} 899 900static void nl_fib_lookup_exit(struct net *net) 901{ 902 netlink_kernel_release(net->ipv4.fibnl); 903 net->ipv4.fibnl = NULL; 904} 905 906static void fib_disable_ip(struct net_device *dev, int force, int delay) 907{ 908 if (fib_sync_down_dev(dev, force)) 909 fib_flush(dev_net(dev)); 910 rt_cache_flush(dev_net(dev), delay); 911 arp_ifdown(dev); 912} 913 914static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr) 915{ 916 struct in_ifaddr *ifa = (struct in_ifaddr *)ptr; 917 struct net_device *dev = ifa->ifa_dev->dev; 918 919 switch (event) { 920 case NETDEV_UP: 921 fib_add_ifaddr(ifa); 922#ifdef CONFIG_IP_ROUTE_MULTIPATH 923 fib_sync_up(dev); 924#endif 925 rt_cache_flush(dev_net(dev), -1); 926 break; 927 case NETDEV_DOWN: 928 fib_del_ifaddr(ifa); 929 if (ifa->ifa_dev->ifa_list == NULL) { 930 /* Last address was deleted from this interface. 931 Disable IP. 932 */ 933 fib_disable_ip(dev, 1, 0); 934 } else { 935 rt_cache_flush(dev_net(dev), -1); 936 } 937 break; 938 } 939 return NOTIFY_DONE; 940} 941 942static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) 943{ 944 struct net_device *dev = ptr; 945 struct in_device *in_dev = __in_dev_get_rtnl(dev); 946 947 if (event == NETDEV_UNREGISTER) { 948 fib_disable_ip(dev, 2, -1); 949 return NOTIFY_DONE; 950 } 951 952 if (!in_dev) 953 return NOTIFY_DONE; 954 955 switch (event) { 956 case NETDEV_UP: 957 for_ifa(in_dev) { 958 fib_add_ifaddr(ifa); 959 } endfor_ifa(in_dev); 960#ifdef CONFIG_IP_ROUTE_MULTIPATH 961 fib_sync_up(dev); 962#endif 963 rt_cache_flush(dev_net(dev), -1); 964 break; 965 case NETDEV_DOWN: 966 fib_disable_ip(dev, 0, 0); 967 break; 968 case NETDEV_CHANGEMTU: 969 case NETDEV_CHANGE: 970 rt_cache_flush(dev_net(dev), 0); 971 break; 972 case NETDEV_UNREGISTER_BATCH: 973 rt_cache_flush_batch(); 974 break; 975 } 976 return NOTIFY_DONE; 977} 978 979static struct notifier_block fib_inetaddr_notifier = { 980 .notifier_call = fib_inetaddr_event, 981}; 982 983static struct notifier_block fib_netdev_notifier = { 984 .notifier_call = fib_netdev_event, 985}; 986 987static int __net_init ip_fib_net_init(struct net *net) 988{ 989 int err; 990 unsigned int i; 991 992 net->ipv4.fib_table_hash = kzalloc( 993 sizeof(struct hlist_head)*FIB_TABLE_HASHSZ, GFP_KERNEL); 994 if (net->ipv4.fib_table_hash == NULL) 995 return -ENOMEM; 996 997 for (i = 0; i < FIB_TABLE_HASHSZ; i++) 998 INIT_HLIST_HEAD(&net->ipv4.fib_table_hash[i]); 999 1000 err = fib4_rules_init(net); 1001 if (err < 0) 1002 goto fail; 1003 return 0; 1004 1005fail: 1006 kfree(net->ipv4.fib_table_hash); 1007 return err; 1008} 1009 1010static void ip_fib_net_exit(struct net *net) 1011{ 1012 unsigned int i; 1013 1014#ifdef CONFIG_IP_MULTIPLE_TABLES 1015 fib4_rules_exit(net); 1016#endif 1017 1018 for (i = 0; i < FIB_TABLE_HASHSZ; i++) { 1019 struct fib_table *tb; 1020 struct hlist_head *head; 1021 struct hlist_node *node, *tmp; 1022 1023 head = &net->ipv4.fib_table_hash[i]; 1024 hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) { 1025 hlist_del(node); 1026 fib_table_flush(tb); 1027 kfree(tb); 1028 } 1029 } 1030 kfree(net->ipv4.fib_table_hash); 1031} 1032 1033static int __net_init fib_net_init(struct net *net) 1034{ 1035 int error; 1036 1037 error = ip_fib_net_init(net); 1038 if (error < 0) 1039 goto out; 1040 error = nl_fib_lookup_init(net); 1041 if (error < 0) 1042 goto out_nlfl; 1043 error = fib_proc_init(net); 1044 if (error < 0) 1045 goto out_proc; 1046out: 1047 return error; 1048 1049out_proc: 1050 nl_fib_lookup_exit(net); 1051out_nlfl: 1052 ip_fib_net_exit(net); 1053 goto out; 1054} 1055 1056static void __net_exit fib_net_exit(struct net *net) 1057{ 1058 fib_proc_exit(net); 1059 nl_fib_lookup_exit(net); 1060 ip_fib_net_exit(net); 1061} 1062 1063static struct pernet_operations fib_net_ops = { 1064 .init = fib_net_init, 1065 .exit = fib_net_exit, 1066}; 1067 1068void __init ip_fib_init(void) 1069{ 1070 rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL); 1071 rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL); 1072 rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib); 1073 1074 register_pernet_subsys(&fib_net_ops); 1075 register_netdevice_notifier(&fib_netdev_notifier); 1076 register_inetaddr_notifier(&fib_inetaddr_notifier); 1077 1078 fib_hash_init(); 1079} 1080 1081EXPORT_SYMBOL(inet_addr_type); 1082EXPORT_SYMBOL(inet_dev_addr_type); 1083EXPORT_SYMBOL(ip_dev_find); 1084