fib_frontend.c revision 9e762a4a89b302cb3b26a1f9bb33eff459eaeca9
1/* 2 * INET An implementation of the TCP/IP protocol suite for the LINUX 3 * operating system. INET is implemented using the BSD Socket 4 * interface as the means of communication with the user level. 5 * 6 * IPv4 Forwarding Information Base: FIB frontend. 7 * 8 * Version: $Id: fib_frontend.c,v 1.26 2001/10/31 21:55:54 davem Exp $ 9 * 10 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 11 * 12 * This program is free software; you can redistribute it and/or 13 * modify it under the terms of the GNU General Public License 14 * as published by the Free Software Foundation; either version 15 * 2 of the License, or (at your option) any later version. 16 */ 17 18#include <linux/module.h> 19#include <asm/uaccess.h> 20#include <asm/system.h> 21#include <linux/bitops.h> 22#include <linux/capability.h> 23#include <linux/types.h> 24#include <linux/kernel.h> 25#include <linux/sched.h> 26#include <linux/mm.h> 27#include <linux/string.h> 28#include <linux/socket.h> 29#include <linux/sockios.h> 30#include <linux/errno.h> 31#include <linux/in.h> 32#include <linux/inet.h> 33#include <linux/inetdevice.h> 34#include <linux/netdevice.h> 35#include <linux/if_addr.h> 36#include <linux/if_arp.h> 37#include <linux/skbuff.h> 38#include <linux/netlink.h> 39#include <linux/init.h> 40 41#include <net/ip.h> 42#include <net/protocol.h> 43#include <net/route.h> 44#include <net/tcp.h> 45#include <net/sock.h> 46#include <net/icmp.h> 47#include <net/arp.h> 48#include <net/ip_fib.h> 49 50#define FFprint(a...) printk(KERN_DEBUG a) 51 52#ifndef CONFIG_IP_MULTIPLE_TABLES 53 54#define RT_TABLE_MIN RT_TABLE_MAIN 55 56struct fib_table *ip_fib_local_table; 57struct fib_table *ip_fib_main_table; 58 59#else 60 61#define RT_TABLE_MIN 1 62 63struct fib_table *fib_tables[RT_TABLE_MAX+1]; 64 65struct fib_table *__fib_new_table(u32 id) 66{ 67 struct fib_table *tb; 68 69 tb = fib_hash_init(id); 70 if (!tb) 71 return NULL; 72 fib_tables[id] = tb; 73 return tb; 74} 75 76 77#endif /* CONFIG_IP_MULTIPLE_TABLES */ 78 79 80static void fib_flush(void) 81{ 82 int flushed = 0; 83#ifdef CONFIG_IP_MULTIPLE_TABLES 84 struct fib_table *tb; 85 u32 id; 86 87 for (id = RT_TABLE_MAX; id>0; id--) { 88 if ((tb = fib_get_table(id))==NULL) 89 continue; 90 flushed += tb->tb_flush(tb); 91 } 92#else /* CONFIG_IP_MULTIPLE_TABLES */ 93 flushed += ip_fib_main_table->tb_flush(ip_fib_main_table); 94 flushed += ip_fib_local_table->tb_flush(ip_fib_local_table); 95#endif /* CONFIG_IP_MULTIPLE_TABLES */ 96 97 if (flushed) 98 rt_cache_flush(-1); 99} 100 101/* 102 * Find the first device with a given source address. 103 */ 104 105struct net_device * ip_dev_find(u32 addr) 106{ 107 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } }; 108 struct fib_result res; 109 struct net_device *dev = NULL; 110 111#ifdef CONFIG_IP_MULTIPLE_TABLES 112 res.r = NULL; 113#endif 114 115 if (!ip_fib_local_table || 116 ip_fib_local_table->tb_lookup(ip_fib_local_table, &fl, &res)) 117 return NULL; 118 if (res.type != RTN_LOCAL) 119 goto out; 120 dev = FIB_RES_DEV(res); 121 122 if (dev) 123 dev_hold(dev); 124out: 125 fib_res_put(&res); 126 return dev; 127} 128 129unsigned inet_addr_type(u32 addr) 130{ 131 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } }; 132 struct fib_result res; 133 unsigned ret = RTN_BROADCAST; 134 135 if (ZERONET(addr) || BADCLASS(addr)) 136 return RTN_BROADCAST; 137 if (MULTICAST(addr)) 138 return RTN_MULTICAST; 139 140#ifdef CONFIG_IP_MULTIPLE_TABLES 141 res.r = NULL; 142#endif 143 144 if (ip_fib_local_table) { 145 ret = RTN_UNICAST; 146 if (!ip_fib_local_table->tb_lookup(ip_fib_local_table, 147 &fl, &res)) { 148 ret = res.type; 149 fib_res_put(&res); 150 } 151 } 152 return ret; 153} 154 155/* Given (packet source, input interface) and optional (dst, oif, tos): 156 - (main) check, that source is valid i.e. not broadcast or our local 157 address. 158 - figure out what "logical" interface this packet arrived 159 and calculate "specific destination" address. 160 - check, that packet arrived from expected physical interface. 161 */ 162 163int fib_validate_source(u32 src, u32 dst, u8 tos, int oif, 164 struct net_device *dev, u32 *spec_dst, u32 *itag) 165{ 166 struct in_device *in_dev; 167 struct flowi fl = { .nl_u = { .ip4_u = 168 { .daddr = src, 169 .saddr = dst, 170 .tos = tos } }, 171 .iif = oif }; 172 struct fib_result res; 173 int no_addr, rpf; 174 int ret; 175 176 no_addr = rpf = 0; 177 rcu_read_lock(); 178 in_dev = __in_dev_get_rcu(dev); 179 if (in_dev) { 180 no_addr = in_dev->ifa_list == NULL; 181 rpf = IN_DEV_RPFILTER(in_dev); 182 } 183 rcu_read_unlock(); 184 185 if (in_dev == NULL) 186 goto e_inval; 187 188 if (fib_lookup(&fl, &res)) 189 goto last_resort; 190 if (res.type != RTN_UNICAST) 191 goto e_inval_res; 192 *spec_dst = FIB_RES_PREFSRC(res); 193 fib_combine_itag(itag, &res); 194#ifdef CONFIG_IP_ROUTE_MULTIPATH 195 if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1) 196#else 197 if (FIB_RES_DEV(res) == dev) 198#endif 199 { 200 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST; 201 fib_res_put(&res); 202 return ret; 203 } 204 fib_res_put(&res); 205 if (no_addr) 206 goto last_resort; 207 if (rpf) 208 goto e_inval; 209 fl.oif = dev->ifindex; 210 211 ret = 0; 212 if (fib_lookup(&fl, &res) == 0) { 213 if (res.type == RTN_UNICAST) { 214 *spec_dst = FIB_RES_PREFSRC(res); 215 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST; 216 } 217 fib_res_put(&res); 218 } 219 return ret; 220 221last_resort: 222 if (rpf) 223 goto e_inval; 224 *spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE); 225 *itag = 0; 226 return 0; 227 228e_inval_res: 229 fib_res_put(&res); 230e_inval: 231 return -EINVAL; 232} 233 234#ifndef CONFIG_IP_NOSIOCRT 235 236/* 237 * Handle IP routing ioctl calls. These are used to manipulate the routing tables 238 */ 239 240int ip_rt_ioctl(unsigned int cmd, void __user *arg) 241{ 242 int err; 243 struct kern_rta rta; 244 struct rtentry r; 245 struct { 246 struct nlmsghdr nlh; 247 struct rtmsg rtm; 248 } req; 249 250 switch (cmd) { 251 case SIOCADDRT: /* Add a route */ 252 case SIOCDELRT: /* Delete a route */ 253 if (!capable(CAP_NET_ADMIN)) 254 return -EPERM; 255 if (copy_from_user(&r, arg, sizeof(struct rtentry))) 256 return -EFAULT; 257 rtnl_lock(); 258 err = fib_convert_rtentry(cmd, &req.nlh, &req.rtm, &rta, &r); 259 if (err == 0) { 260 if (cmd == SIOCDELRT) { 261 struct fib_table *tb = fib_get_table(req.rtm.rtm_table); 262 err = -ESRCH; 263 if (tb) 264 err = tb->tb_delete(tb, &req.rtm, &rta, &req.nlh, NULL); 265 } else { 266 struct fib_table *tb = fib_new_table(req.rtm.rtm_table); 267 err = -ENOBUFS; 268 if (tb) 269 err = tb->tb_insert(tb, &req.rtm, &rta, &req.nlh, NULL); 270 } 271 kfree(rta.rta_mx); 272 } 273 rtnl_unlock(); 274 return err; 275 } 276 return -EINVAL; 277} 278 279#else 280 281int ip_rt_ioctl(unsigned int cmd, void *arg) 282{ 283 return -EINVAL; 284} 285 286#endif 287 288static int inet_check_attr(struct rtmsg *r, struct rtattr **rta) 289{ 290 int i; 291 292 for (i=1; i<=RTA_MAX; i++, rta++) { 293 struct rtattr *attr = *rta; 294 if (attr) { 295 if (RTA_PAYLOAD(attr) < 4) 296 return -EINVAL; 297 if (i != RTA_MULTIPATH && i != RTA_METRICS && 298 i != RTA_TABLE) 299 *rta = (struct rtattr*)RTA_DATA(attr); 300 } 301 } 302 return 0; 303} 304 305int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) 306{ 307 struct fib_table * tb; 308 struct rtattr **rta = arg; 309 struct rtmsg *r = NLMSG_DATA(nlh); 310 311 if (inet_check_attr(r, rta)) 312 return -EINVAL; 313 314 tb = fib_get_table(rtm_get_table(rta, r->rtm_table)); 315 if (tb) 316 return tb->tb_delete(tb, r, (struct kern_rta*)rta, nlh, &NETLINK_CB(skb)); 317 return -ESRCH; 318} 319 320int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) 321{ 322 struct fib_table * tb; 323 struct rtattr **rta = arg; 324 struct rtmsg *r = NLMSG_DATA(nlh); 325 326 if (inet_check_attr(r, rta)) 327 return -EINVAL; 328 329 tb = fib_new_table(rtm_get_table(rta, r->rtm_table)); 330 if (tb) 331 return tb->tb_insert(tb, r, (struct kern_rta*)rta, nlh, &NETLINK_CB(skb)); 332 return -ENOBUFS; 333} 334 335int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) 336{ 337 u32 t; 338 u32 s_t; 339 struct fib_table *tb; 340 341 if (NLMSG_PAYLOAD(cb->nlh, 0) >= sizeof(struct rtmsg) && 342 ((struct rtmsg*)NLMSG_DATA(cb->nlh))->rtm_flags&RTM_F_CLONED) 343 return ip_rt_dump(skb, cb); 344 345 s_t = cb->args[0]; 346 if (s_t == 0) 347 s_t = cb->args[0] = RT_TABLE_MIN; 348 349 for (t=s_t; t<=RT_TABLE_MAX; t++) { 350 if (t < s_t) continue; 351 if (t > s_t) 352 memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0])); 353 if ((tb = fib_get_table(t))==NULL) 354 continue; 355 if (tb->tb_dump(tb, skb, cb) < 0) 356 break; 357 } 358 359 cb->args[0] = t; 360 361 return skb->len; 362} 363 364/* Prepare and feed intra-kernel routing request. 365 Really, it should be netlink message, but :-( netlink 366 can be not configured, so that we feed it directly 367 to fib engine. It is legal, because all events occur 368 only when netlink is already locked. 369 */ 370 371static void fib_magic(int cmd, int type, u32 dst, int dst_len, struct in_ifaddr *ifa) 372{ 373 struct fib_table * tb; 374 struct { 375 struct nlmsghdr nlh; 376 struct rtmsg rtm; 377 } req; 378 struct kern_rta rta; 379 380 memset(&req.rtm, 0, sizeof(req.rtm)); 381 memset(&rta, 0, sizeof(rta)); 382 383 if (type == RTN_UNICAST) 384 tb = fib_new_table(RT_TABLE_MAIN); 385 else 386 tb = fib_new_table(RT_TABLE_LOCAL); 387 388 if (tb == NULL) 389 return; 390 391 req.nlh.nlmsg_len = sizeof(req); 392 req.nlh.nlmsg_type = cmd; 393 req.nlh.nlmsg_flags = NLM_F_REQUEST|NLM_F_CREATE|NLM_F_APPEND; 394 req.nlh.nlmsg_pid = 0; 395 req.nlh.nlmsg_seq = 0; 396 397 req.rtm.rtm_dst_len = dst_len; 398 req.rtm.rtm_table = tb->tb_id; 399 req.rtm.rtm_protocol = RTPROT_KERNEL; 400 req.rtm.rtm_scope = (type != RTN_LOCAL ? RT_SCOPE_LINK : RT_SCOPE_HOST); 401 req.rtm.rtm_type = type; 402 403 rta.rta_dst = &dst; 404 rta.rta_prefsrc = &ifa->ifa_local; 405 rta.rta_oif = &ifa->ifa_dev->dev->ifindex; 406 407 if (cmd == RTM_NEWROUTE) 408 tb->tb_insert(tb, &req.rtm, &rta, &req.nlh, NULL); 409 else 410 tb->tb_delete(tb, &req.rtm, &rta, &req.nlh, NULL); 411} 412 413void fib_add_ifaddr(struct in_ifaddr *ifa) 414{ 415 struct in_device *in_dev = ifa->ifa_dev; 416 struct net_device *dev = in_dev->dev; 417 struct in_ifaddr *prim = ifa; 418 u32 mask = ifa->ifa_mask; 419 u32 addr = ifa->ifa_local; 420 u32 prefix = ifa->ifa_address&mask; 421 422 if (ifa->ifa_flags&IFA_F_SECONDARY) { 423 prim = inet_ifa_byprefix(in_dev, prefix, mask); 424 if (prim == NULL) { 425 printk(KERN_DEBUG "fib_add_ifaddr: bug: prim == NULL\n"); 426 return; 427 } 428 } 429 430 fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim); 431 432 if (!(dev->flags&IFF_UP)) 433 return; 434 435 /* Add broadcast address, if it is explicitly assigned. */ 436 if (ifa->ifa_broadcast && ifa->ifa_broadcast != 0xFFFFFFFF) 437 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim); 438 439 if (!ZERONET(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) && 440 (prefix != addr || ifa->ifa_prefixlen < 32)) { 441 fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL : 442 RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim); 443 444 /* Add network specific broadcasts, when it takes a sense */ 445 if (ifa->ifa_prefixlen < 31) { 446 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim); 447 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim); 448 } 449 } 450} 451 452static void fib_del_ifaddr(struct in_ifaddr *ifa) 453{ 454 struct in_device *in_dev = ifa->ifa_dev; 455 struct net_device *dev = in_dev->dev; 456 struct in_ifaddr *ifa1; 457 struct in_ifaddr *prim = ifa; 458 u32 brd = ifa->ifa_address|~ifa->ifa_mask; 459 u32 any = ifa->ifa_address&ifa->ifa_mask; 460#define LOCAL_OK 1 461#define BRD_OK 2 462#define BRD0_OK 4 463#define BRD1_OK 8 464 unsigned ok = 0; 465 466 if (!(ifa->ifa_flags&IFA_F_SECONDARY)) 467 fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL : 468 RTN_UNICAST, any, ifa->ifa_prefixlen, prim); 469 else { 470 prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask); 471 if (prim == NULL) { 472 printk(KERN_DEBUG "fib_del_ifaddr: bug: prim == NULL\n"); 473 return; 474 } 475 } 476 477 /* Deletion is more complicated than add. 478 We should take care of not to delete too much :-) 479 480 Scan address list to be sure that addresses are really gone. 481 */ 482 483 for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) { 484 if (ifa->ifa_local == ifa1->ifa_local) 485 ok |= LOCAL_OK; 486 if (ifa->ifa_broadcast == ifa1->ifa_broadcast) 487 ok |= BRD_OK; 488 if (brd == ifa1->ifa_broadcast) 489 ok |= BRD1_OK; 490 if (any == ifa1->ifa_broadcast) 491 ok |= BRD0_OK; 492 } 493 494 if (!(ok&BRD_OK)) 495 fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim); 496 if (!(ok&BRD1_OK)) 497 fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim); 498 if (!(ok&BRD0_OK)) 499 fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim); 500 if (!(ok&LOCAL_OK)) { 501 fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim); 502 503 /* Check, that this local address finally disappeared. */ 504 if (inet_addr_type(ifa->ifa_local) != RTN_LOCAL) { 505 /* And the last, but not the least thing. 506 We must flush stray FIB entries. 507 508 First of all, we scan fib_info list searching 509 for stray nexthop entries, then ignite fib_flush. 510 */ 511 if (fib_sync_down(ifa->ifa_local, NULL, 0)) 512 fib_flush(); 513 } 514 } 515#undef LOCAL_OK 516#undef BRD_OK 517#undef BRD0_OK 518#undef BRD1_OK 519} 520 521static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb ) 522{ 523 524 struct fib_result res; 525 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = frn->fl_addr, 526 .fwmark = frn->fl_fwmark, 527 .tos = frn->fl_tos, 528 .scope = frn->fl_scope } } }; 529 if (tb) { 530 local_bh_disable(); 531 532 frn->tb_id = tb->tb_id; 533 frn->err = tb->tb_lookup(tb, &fl, &res); 534 535 if (!frn->err) { 536 frn->prefixlen = res.prefixlen; 537 frn->nh_sel = res.nh_sel; 538 frn->type = res.type; 539 frn->scope = res.scope; 540 } 541 local_bh_enable(); 542 } 543} 544 545static void nl_fib_input(struct sock *sk, int len) 546{ 547 struct sk_buff *skb = NULL; 548 struct nlmsghdr *nlh = NULL; 549 struct fib_result_nl *frn; 550 u32 pid; 551 struct fib_table *tb; 552 553 skb = skb_dequeue(&sk->sk_receive_queue); 554 nlh = (struct nlmsghdr *)skb->data; 555 if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len || 556 nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn))) { 557 kfree_skb(skb); 558 return; 559 } 560 561 frn = (struct fib_result_nl *) NLMSG_DATA(nlh); 562 tb = fib_get_table(frn->tb_id_in); 563 564 nl_fib_lookup(frn, tb); 565 566 pid = nlh->nlmsg_pid; /*pid of sending process */ 567 NETLINK_CB(skb).pid = 0; /* from kernel */ 568 NETLINK_CB(skb).dst_pid = pid; 569 NETLINK_CB(skb).dst_group = 0; /* unicast */ 570 netlink_unicast(sk, skb, pid, MSG_DONTWAIT); 571} 572 573static void nl_fib_lookup_init(void) 574{ 575 netlink_kernel_create(NETLINK_FIB_LOOKUP, 0, nl_fib_input, THIS_MODULE); 576} 577 578static void fib_disable_ip(struct net_device *dev, int force) 579{ 580 if (fib_sync_down(0, dev, force)) 581 fib_flush(); 582 rt_cache_flush(0); 583 arp_ifdown(dev); 584} 585 586static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr) 587{ 588 struct in_ifaddr *ifa = (struct in_ifaddr*)ptr; 589 590 switch (event) { 591 case NETDEV_UP: 592 fib_add_ifaddr(ifa); 593#ifdef CONFIG_IP_ROUTE_MULTIPATH 594 fib_sync_up(ifa->ifa_dev->dev); 595#endif 596 rt_cache_flush(-1); 597 break; 598 case NETDEV_DOWN: 599 fib_del_ifaddr(ifa); 600 if (ifa->ifa_dev->ifa_list == NULL) { 601 /* Last address was deleted from this interface. 602 Disable IP. 603 */ 604 fib_disable_ip(ifa->ifa_dev->dev, 1); 605 } else { 606 rt_cache_flush(-1); 607 } 608 break; 609 } 610 return NOTIFY_DONE; 611} 612 613static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) 614{ 615 struct net_device *dev = ptr; 616 struct in_device *in_dev = __in_dev_get_rtnl(dev); 617 618 if (event == NETDEV_UNREGISTER) { 619 fib_disable_ip(dev, 2); 620 return NOTIFY_DONE; 621 } 622 623 if (!in_dev) 624 return NOTIFY_DONE; 625 626 switch (event) { 627 case NETDEV_UP: 628 for_ifa(in_dev) { 629 fib_add_ifaddr(ifa); 630 } endfor_ifa(in_dev); 631#ifdef CONFIG_IP_ROUTE_MULTIPATH 632 fib_sync_up(dev); 633#endif 634 rt_cache_flush(-1); 635 break; 636 case NETDEV_DOWN: 637 fib_disable_ip(dev, 0); 638 break; 639 case NETDEV_CHANGEMTU: 640 case NETDEV_CHANGE: 641 rt_cache_flush(0); 642 break; 643 } 644 return NOTIFY_DONE; 645} 646 647static struct notifier_block fib_inetaddr_notifier = { 648 .notifier_call =fib_inetaddr_event, 649}; 650 651static struct notifier_block fib_netdev_notifier = { 652 .notifier_call =fib_netdev_event, 653}; 654 655void __init ip_fib_init(void) 656{ 657#ifndef CONFIG_IP_MULTIPLE_TABLES 658 ip_fib_local_table = fib_hash_init(RT_TABLE_LOCAL); 659 ip_fib_main_table = fib_hash_init(RT_TABLE_MAIN); 660#else 661 fib4_rules_init(); 662#endif 663 664 register_netdevice_notifier(&fib_netdev_notifier); 665 register_inetaddr_notifier(&fib_inetaddr_notifier); 666 nl_fib_lookup_init(); 667} 668 669EXPORT_SYMBOL(inet_addr_type); 670EXPORT_SYMBOL(ip_dev_find); 671