devinet.c revision 9435eb1cf0b76b323019cebf8d16762a50a12a19
1/* 2 * NET3 IP device support routines. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public License 6 * as published by the Free Software Foundation; either version 7 * 2 of the License, or (at your option) any later version. 8 * 9 * Derived from the IP parts of dev.c 1.0.19 10 * Authors: Ross Biro 11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> 12 * Mark Evans, <evansmp@uhura.aston.ac.uk> 13 * 14 * Additional Authors: 15 * Alan Cox, <gw4pts@gw4pts.ampr.org> 16 * Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 17 * 18 * Changes: 19 * Alexey Kuznetsov: pa_* fields are replaced with ifaddr 20 * lists. 21 * Cyrus Durgin: updated for kmod 22 * Matthias Andree: in devinet_ioctl, compare label and 23 * address (4.4BSD alias style support), 24 * fall back to comparing just the label 25 * if no match found. 26 */ 27 28 29#include <asm/uaccess.h> 30#include <asm/system.h> 31#include <linux/bitops.h> 32#include <linux/capability.h> 33#include <linux/module.h> 34#include <linux/types.h> 35#include <linux/kernel.h> 36#include <linux/string.h> 37#include <linux/mm.h> 38#include <linux/socket.h> 39#include <linux/sockios.h> 40#include <linux/in.h> 41#include <linux/errno.h> 42#include <linux/interrupt.h> 43#include <linux/if_addr.h> 44#include <linux/if_ether.h> 45#include <linux/inet.h> 46#include <linux/netdevice.h> 47#include <linux/etherdevice.h> 48#include <linux/skbuff.h> 49#include <linux/init.h> 50#include <linux/notifier.h> 51#include <linux/inetdevice.h> 52#include <linux/igmp.h> 53#include <linux/slab.h> 54#include <linux/hash.h> 55#ifdef CONFIG_SYSCTL 56#include <linux/sysctl.h> 57#endif 58#include <linux/kmod.h> 59 60#include <net/arp.h> 61#include <net/ip.h> 62#include <net/route.h> 63#include <net/ip_fib.h> 64#include <net/rtnetlink.h> 65#include <net/net_namespace.h> 66 67static struct ipv4_devconf ipv4_devconf = { 68 .data = { 69 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1, 70 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1, 71 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1, 72 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1, 73 }, 74}; 75 76static struct ipv4_devconf ipv4_devconf_dflt = { 77 .data = { 78 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1, 79 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1, 80 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1, 81 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1, 82 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1, 83 }, 84}; 85 86#define IPV4_DEVCONF_DFLT(net, attr) \ 87 IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr) 88 89static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = { 90 [IFA_LOCAL] = { .type = NLA_U32 }, 91 [IFA_ADDRESS] = { .type = NLA_U32 }, 92 [IFA_BROADCAST] = { .type = NLA_U32 }, 93 [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, 94}; 95 96/* inet_addr_hash's shifting is dependent upon this IN4_ADDR_HSIZE 97 * value. So if you change this define, make appropriate changes to 98 * inet_addr_hash as well. 99 */ 100#define IN4_ADDR_HSIZE 256 101static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE]; 102static DEFINE_SPINLOCK(inet_addr_hash_lock); 103 104static inline unsigned int inet_addr_hash(struct net *net, __be32 addr) 105{ 106 u32 val = (__force u32) addr ^ hash_ptr(net, 8); 107 108 return ((val ^ (val >> 8) ^ (val >> 16) ^ (val >> 24)) & 109 (IN4_ADDR_HSIZE - 1)); 110} 111 112static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa) 113{ 114 unsigned int hash = inet_addr_hash(net, ifa->ifa_address); 115 116 spin_lock(&inet_addr_hash_lock); 117 hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]); 118 spin_unlock(&inet_addr_hash_lock); 119} 120 121static void inet_hash_remove(struct in_ifaddr *ifa) 122{ 123 spin_lock(&inet_addr_hash_lock); 124 hlist_del_init_rcu(&ifa->hash); 125 spin_unlock(&inet_addr_hash_lock); 126} 127 128/** 129 * __ip_dev_find - find the first device with a given source address. 130 * @net: the net namespace 131 * @addr: the source address 132 * @devref: if true, take a reference on the found device 133 * 134 * If a caller uses devref=false, it should be protected by RCU, or RTNL 135 */ 136struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref) 137{ 138 unsigned int hash = inet_addr_hash(net, addr); 139 struct net_device *result = NULL; 140 struct in_ifaddr *ifa; 141 struct hlist_node *node; 142 143 rcu_read_lock(); 144 hlist_for_each_entry_rcu(ifa, node, &inet_addr_lst[hash], hash) { 145 struct net_device *dev = ifa->ifa_dev->dev; 146 147 if (!net_eq(dev_net(dev), net)) 148 continue; 149 if (ifa->ifa_address == addr) { 150 result = dev; 151 break; 152 } 153 } 154 if (result && devref) 155 dev_hold(result); 156 rcu_read_unlock(); 157 return result; 158} 159EXPORT_SYMBOL(__ip_dev_find); 160 161static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32); 162 163static BLOCKING_NOTIFIER_HEAD(inetaddr_chain); 164static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, 165 int destroy); 166#ifdef CONFIG_SYSCTL 167static void devinet_sysctl_register(struct in_device *idev); 168static void devinet_sysctl_unregister(struct in_device *idev); 169#else 170static inline void devinet_sysctl_register(struct in_device *idev) 171{ 172} 173static inline void devinet_sysctl_unregister(struct in_device *idev) 174{ 175} 176#endif 177 178/* Locks all the inet devices. */ 179 180static struct in_ifaddr *inet_alloc_ifa(void) 181{ 182 return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL); 183} 184 185static void inet_rcu_free_ifa(struct rcu_head *head) 186{ 187 struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head); 188 if (ifa->ifa_dev) 189 in_dev_put(ifa->ifa_dev); 190 kfree(ifa); 191} 192 193static inline void inet_free_ifa(struct in_ifaddr *ifa) 194{ 195 call_rcu(&ifa->rcu_head, inet_rcu_free_ifa); 196} 197 198void in_dev_finish_destroy(struct in_device *idev) 199{ 200 struct net_device *dev = idev->dev; 201 202 WARN_ON(idev->ifa_list); 203 WARN_ON(idev->mc_list); 204#ifdef NET_REFCNT_DEBUG 205 printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n", 206 idev, dev ? dev->name : "NIL"); 207#endif 208 dev_put(dev); 209 if (!idev->dead) 210 pr_err("Freeing alive in_device %p\n", idev); 211 else 212 kfree(idev); 213} 214EXPORT_SYMBOL(in_dev_finish_destroy); 215 216static struct in_device *inetdev_init(struct net_device *dev) 217{ 218 struct in_device *in_dev; 219 220 ASSERT_RTNL(); 221 222 in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL); 223 if (!in_dev) 224 goto out; 225 memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt, 226 sizeof(in_dev->cnf)); 227 in_dev->cnf.sysctl = NULL; 228 in_dev->dev = dev; 229 in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl); 230 if (!in_dev->arp_parms) 231 goto out_kfree; 232 if (IPV4_DEVCONF(in_dev->cnf, FORWARDING)) 233 dev_disable_lro(dev); 234 /* Reference in_dev->dev */ 235 dev_hold(dev); 236 /* Account for reference dev->ip_ptr (below) */ 237 in_dev_hold(in_dev); 238 239 devinet_sysctl_register(in_dev); 240 ip_mc_init_dev(in_dev); 241 if (dev->flags & IFF_UP) 242 ip_mc_up(in_dev); 243 244 /* we can receive as soon as ip_ptr is set -- do this last */ 245 rcu_assign_pointer(dev->ip_ptr, in_dev); 246out: 247 return in_dev; 248out_kfree: 249 kfree(in_dev); 250 in_dev = NULL; 251 goto out; 252} 253 254static void in_dev_rcu_put(struct rcu_head *head) 255{ 256 struct in_device *idev = container_of(head, struct in_device, rcu_head); 257 in_dev_put(idev); 258} 259 260static void inetdev_destroy(struct in_device *in_dev) 261{ 262 struct in_ifaddr *ifa; 263 struct net_device *dev; 264 265 ASSERT_RTNL(); 266 267 dev = in_dev->dev; 268 269 in_dev->dead = 1; 270 271 ip_mc_destroy_dev(in_dev); 272 273 while ((ifa = in_dev->ifa_list) != NULL) { 274 inet_del_ifa(in_dev, &in_dev->ifa_list, 0); 275 inet_free_ifa(ifa); 276 } 277 278 rcu_assign_pointer(dev->ip_ptr, NULL); 279 280 devinet_sysctl_unregister(in_dev); 281 neigh_parms_release(&arp_tbl, in_dev->arp_parms); 282 arp_ifdown(dev); 283 284 call_rcu(&in_dev->rcu_head, in_dev_rcu_put); 285} 286 287int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b) 288{ 289 rcu_read_lock(); 290 for_primary_ifa(in_dev) { 291 if (inet_ifa_match(a, ifa)) { 292 if (!b || inet_ifa_match(b, ifa)) { 293 rcu_read_unlock(); 294 return 1; 295 } 296 } 297 } endfor_ifa(in_dev); 298 rcu_read_unlock(); 299 return 0; 300} 301 302static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, 303 int destroy, struct nlmsghdr *nlh, u32 pid) 304{ 305 struct in_ifaddr *promote = NULL; 306 struct in_ifaddr *ifa, *ifa1 = *ifap; 307 struct in_ifaddr *last_prim = in_dev->ifa_list; 308 struct in_ifaddr *prev_prom = NULL; 309 int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev); 310 311 ASSERT_RTNL(); 312 313 /* 1. Deleting primary ifaddr forces deletion all secondaries 314 * unless alias promotion is set 315 **/ 316 317 if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) { 318 struct in_ifaddr **ifap1 = &ifa1->ifa_next; 319 320 while ((ifa = *ifap1) != NULL) { 321 if (!(ifa->ifa_flags & IFA_F_SECONDARY) && 322 ifa1->ifa_scope <= ifa->ifa_scope) 323 last_prim = ifa; 324 325 if (!(ifa->ifa_flags & IFA_F_SECONDARY) || 326 ifa1->ifa_mask != ifa->ifa_mask || 327 !inet_ifa_match(ifa1->ifa_address, ifa)) { 328 ifap1 = &ifa->ifa_next; 329 prev_prom = ifa; 330 continue; 331 } 332 333 if (!do_promote) { 334 inet_hash_remove(ifa); 335 *ifap1 = ifa->ifa_next; 336 337 rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid); 338 blocking_notifier_call_chain(&inetaddr_chain, 339 NETDEV_DOWN, ifa); 340 inet_free_ifa(ifa); 341 } else { 342 promote = ifa; 343 break; 344 } 345 } 346 } 347 348 /* 2. Unlink it */ 349 350 *ifap = ifa1->ifa_next; 351 inet_hash_remove(ifa1); 352 353 /* 3. Announce address deletion */ 354 355 /* Send message first, then call notifier. 356 At first sight, FIB update triggered by notifier 357 will refer to already deleted ifaddr, that could confuse 358 netlink listeners. It is not true: look, gated sees 359 that route deleted and if it still thinks that ifaddr 360 is valid, it will try to restore deleted routes... Grr. 361 So that, this order is correct. 362 */ 363 rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid); 364 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1); 365 366 if (promote) { 367 368 if (prev_prom) { 369 prev_prom->ifa_next = promote->ifa_next; 370 promote->ifa_next = last_prim->ifa_next; 371 last_prim->ifa_next = promote; 372 } 373 374 promote->ifa_flags &= ~IFA_F_SECONDARY; 375 rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid); 376 blocking_notifier_call_chain(&inetaddr_chain, 377 NETDEV_UP, promote); 378 for (ifa = promote->ifa_next; ifa; ifa = ifa->ifa_next) { 379 if (ifa1->ifa_mask != ifa->ifa_mask || 380 !inet_ifa_match(ifa1->ifa_address, ifa)) 381 continue; 382 fib_add_ifaddr(ifa); 383 } 384 385 } 386 if (destroy) 387 inet_free_ifa(ifa1); 388} 389 390static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, 391 int destroy) 392{ 393 __inet_del_ifa(in_dev, ifap, destroy, NULL, 0); 394} 395 396static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh, 397 u32 pid) 398{ 399 struct in_device *in_dev = ifa->ifa_dev; 400 struct in_ifaddr *ifa1, **ifap, **last_primary; 401 402 ASSERT_RTNL(); 403 404 if (!ifa->ifa_local) { 405 inet_free_ifa(ifa); 406 return 0; 407 } 408 409 ifa->ifa_flags &= ~IFA_F_SECONDARY; 410 last_primary = &in_dev->ifa_list; 411 412 for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL; 413 ifap = &ifa1->ifa_next) { 414 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) && 415 ifa->ifa_scope <= ifa1->ifa_scope) 416 last_primary = &ifa1->ifa_next; 417 if (ifa1->ifa_mask == ifa->ifa_mask && 418 inet_ifa_match(ifa1->ifa_address, ifa)) { 419 if (ifa1->ifa_local == ifa->ifa_local) { 420 inet_free_ifa(ifa); 421 return -EEXIST; 422 } 423 if (ifa1->ifa_scope != ifa->ifa_scope) { 424 inet_free_ifa(ifa); 425 return -EINVAL; 426 } 427 ifa->ifa_flags |= IFA_F_SECONDARY; 428 } 429 } 430 431 if (!(ifa->ifa_flags & IFA_F_SECONDARY)) { 432 net_srandom(ifa->ifa_local); 433 ifap = last_primary; 434 } 435 436 ifa->ifa_next = *ifap; 437 *ifap = ifa; 438 439 inet_hash_insert(dev_net(in_dev->dev), ifa); 440 441 /* Send message first, then call notifier. 442 Notifier will trigger FIB update, so that 443 listeners of netlink will know about new ifaddr */ 444 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid); 445 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa); 446 447 return 0; 448} 449 450static int inet_insert_ifa(struct in_ifaddr *ifa) 451{ 452 return __inet_insert_ifa(ifa, NULL, 0); 453} 454 455static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa) 456{ 457 struct in_device *in_dev = __in_dev_get_rtnl(dev); 458 459 ASSERT_RTNL(); 460 461 if (!in_dev) { 462 inet_free_ifa(ifa); 463 return -ENOBUFS; 464 } 465 ipv4_devconf_setall(in_dev); 466 if (ifa->ifa_dev != in_dev) { 467 WARN_ON(ifa->ifa_dev); 468 in_dev_hold(in_dev); 469 ifa->ifa_dev = in_dev; 470 } 471 if (ipv4_is_loopback(ifa->ifa_local)) 472 ifa->ifa_scope = RT_SCOPE_HOST; 473 return inet_insert_ifa(ifa); 474} 475 476/* Caller must hold RCU or RTNL : 477 * We dont take a reference on found in_device 478 */ 479struct in_device *inetdev_by_index(struct net *net, int ifindex) 480{ 481 struct net_device *dev; 482 struct in_device *in_dev = NULL; 483 484 rcu_read_lock(); 485 dev = dev_get_by_index_rcu(net, ifindex); 486 if (dev) 487 in_dev = rcu_dereference_rtnl(dev->ip_ptr); 488 rcu_read_unlock(); 489 return in_dev; 490} 491EXPORT_SYMBOL(inetdev_by_index); 492 493/* Called only from RTNL semaphored context. No locks. */ 494 495struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix, 496 __be32 mask) 497{ 498 ASSERT_RTNL(); 499 500 for_primary_ifa(in_dev) { 501 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa)) 502 return ifa; 503 } endfor_ifa(in_dev); 504 return NULL; 505} 506 507static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 508{ 509 struct net *net = sock_net(skb->sk); 510 struct nlattr *tb[IFA_MAX+1]; 511 struct in_device *in_dev; 512 struct ifaddrmsg *ifm; 513 struct in_ifaddr *ifa, **ifap; 514 int err = -EINVAL; 515 516 ASSERT_RTNL(); 517 518 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy); 519 if (err < 0) 520 goto errout; 521 522 ifm = nlmsg_data(nlh); 523 in_dev = inetdev_by_index(net, ifm->ifa_index); 524 if (in_dev == NULL) { 525 err = -ENODEV; 526 goto errout; 527 } 528 529 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL; 530 ifap = &ifa->ifa_next) { 531 if (tb[IFA_LOCAL] && 532 ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL])) 533 continue; 534 535 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label)) 536 continue; 537 538 if (tb[IFA_ADDRESS] && 539 (ifm->ifa_prefixlen != ifa->ifa_prefixlen || 540 !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa))) 541 continue; 542 543 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid); 544 return 0; 545 } 546 547 err = -EADDRNOTAVAIL; 548errout: 549 return err; 550} 551 552static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh) 553{ 554 struct nlattr *tb[IFA_MAX+1]; 555 struct in_ifaddr *ifa; 556 struct ifaddrmsg *ifm; 557 struct net_device *dev; 558 struct in_device *in_dev; 559 int err; 560 561 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy); 562 if (err < 0) 563 goto errout; 564 565 ifm = nlmsg_data(nlh); 566 err = -EINVAL; 567 if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL) 568 goto errout; 569 570 dev = __dev_get_by_index(net, ifm->ifa_index); 571 err = -ENODEV; 572 if (dev == NULL) 573 goto errout; 574 575 in_dev = __in_dev_get_rtnl(dev); 576 err = -ENOBUFS; 577 if (in_dev == NULL) 578 goto errout; 579 580 ifa = inet_alloc_ifa(); 581 if (ifa == NULL) 582 /* 583 * A potential indev allocation can be left alive, it stays 584 * assigned to its device and is destroy with it. 585 */ 586 goto errout; 587 588 ipv4_devconf_setall(in_dev); 589 in_dev_hold(in_dev); 590 591 if (tb[IFA_ADDRESS] == NULL) 592 tb[IFA_ADDRESS] = tb[IFA_LOCAL]; 593 594 INIT_HLIST_NODE(&ifa->hash); 595 ifa->ifa_prefixlen = ifm->ifa_prefixlen; 596 ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen); 597 ifa->ifa_flags = ifm->ifa_flags; 598 ifa->ifa_scope = ifm->ifa_scope; 599 ifa->ifa_dev = in_dev; 600 601 ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]); 602 ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]); 603 604 if (tb[IFA_BROADCAST]) 605 ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]); 606 607 if (tb[IFA_LABEL]) 608 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ); 609 else 610 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ); 611 612 return ifa; 613 614errout: 615 return ERR_PTR(err); 616} 617 618static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 619{ 620 struct net *net = sock_net(skb->sk); 621 struct in_ifaddr *ifa; 622 623 ASSERT_RTNL(); 624 625 ifa = rtm_to_ifaddr(net, nlh); 626 if (IS_ERR(ifa)) 627 return PTR_ERR(ifa); 628 629 return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid); 630} 631 632/* 633 * Determine a default network mask, based on the IP address. 634 */ 635 636static inline int inet_abc_len(__be32 addr) 637{ 638 int rc = -1; /* Something else, probably a multicast. */ 639 640 if (ipv4_is_zeronet(addr)) 641 rc = 0; 642 else { 643 __u32 haddr = ntohl(addr); 644 645 if (IN_CLASSA(haddr)) 646 rc = 8; 647 else if (IN_CLASSB(haddr)) 648 rc = 16; 649 else if (IN_CLASSC(haddr)) 650 rc = 24; 651 } 652 653 return rc; 654} 655 656 657int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg) 658{ 659 struct ifreq ifr; 660 struct sockaddr_in sin_orig; 661 struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr; 662 struct in_device *in_dev; 663 struct in_ifaddr **ifap = NULL; 664 struct in_ifaddr *ifa = NULL; 665 struct net_device *dev; 666 char *colon; 667 int ret = -EFAULT; 668 int tryaddrmatch = 0; 669 670 /* 671 * Fetch the caller's info block into kernel space 672 */ 673 674 if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) 675 goto out; 676 ifr.ifr_name[IFNAMSIZ - 1] = 0; 677 678 /* save original address for comparison */ 679 memcpy(&sin_orig, sin, sizeof(*sin)); 680 681 colon = strchr(ifr.ifr_name, ':'); 682 if (colon) 683 *colon = 0; 684 685 dev_load(net, ifr.ifr_name); 686 687 switch (cmd) { 688 case SIOCGIFADDR: /* Get interface address */ 689 case SIOCGIFBRDADDR: /* Get the broadcast address */ 690 case SIOCGIFDSTADDR: /* Get the destination address */ 691 case SIOCGIFNETMASK: /* Get the netmask for the interface */ 692 /* Note that these ioctls will not sleep, 693 so that we do not impose a lock. 694 One day we will be forced to put shlock here (I mean SMP) 695 */ 696 tryaddrmatch = (sin_orig.sin_family == AF_INET); 697 memset(sin, 0, sizeof(*sin)); 698 sin->sin_family = AF_INET; 699 break; 700 701 case SIOCSIFFLAGS: 702 ret = -EACCES; 703 if (!capable(CAP_NET_ADMIN)) 704 goto out; 705 break; 706 case SIOCSIFADDR: /* Set interface address (and family) */ 707 case SIOCSIFBRDADDR: /* Set the broadcast address */ 708 case SIOCSIFDSTADDR: /* Set the destination address */ 709 case SIOCSIFNETMASK: /* Set the netmask for the interface */ 710 ret = -EACCES; 711 if (!capable(CAP_NET_ADMIN)) 712 goto out; 713 ret = -EINVAL; 714 if (sin->sin_family != AF_INET) 715 goto out; 716 break; 717 default: 718 ret = -EINVAL; 719 goto out; 720 } 721 722 rtnl_lock(); 723 724 ret = -ENODEV; 725 dev = __dev_get_by_name(net, ifr.ifr_name); 726 if (!dev) 727 goto done; 728 729 if (colon) 730 *colon = ':'; 731 732 in_dev = __in_dev_get_rtnl(dev); 733 if (in_dev) { 734 if (tryaddrmatch) { 735 /* Matthias Andree */ 736 /* compare label and address (4.4BSD style) */ 737 /* note: we only do this for a limited set of ioctls 738 and only if the original address family was AF_INET. 739 This is checked above. */ 740 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL; 741 ifap = &ifa->ifa_next) { 742 if (!strcmp(ifr.ifr_name, ifa->ifa_label) && 743 sin_orig.sin_addr.s_addr == 744 ifa->ifa_address) { 745 break; /* found */ 746 } 747 } 748 } 749 /* we didn't get a match, maybe the application is 750 4.3BSD-style and passed in junk so we fall back to 751 comparing just the label */ 752 if (!ifa) { 753 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL; 754 ifap = &ifa->ifa_next) 755 if (!strcmp(ifr.ifr_name, ifa->ifa_label)) 756 break; 757 } 758 } 759 760 ret = -EADDRNOTAVAIL; 761 if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS) 762 goto done; 763 764 switch (cmd) { 765 case SIOCGIFADDR: /* Get interface address */ 766 sin->sin_addr.s_addr = ifa->ifa_local; 767 goto rarok; 768 769 case SIOCGIFBRDADDR: /* Get the broadcast address */ 770 sin->sin_addr.s_addr = ifa->ifa_broadcast; 771 goto rarok; 772 773 case SIOCGIFDSTADDR: /* Get the destination address */ 774 sin->sin_addr.s_addr = ifa->ifa_address; 775 goto rarok; 776 777 case SIOCGIFNETMASK: /* Get the netmask for the interface */ 778 sin->sin_addr.s_addr = ifa->ifa_mask; 779 goto rarok; 780 781 case SIOCSIFFLAGS: 782 if (colon) { 783 ret = -EADDRNOTAVAIL; 784 if (!ifa) 785 break; 786 ret = 0; 787 if (!(ifr.ifr_flags & IFF_UP)) 788 inet_del_ifa(in_dev, ifap, 1); 789 break; 790 } 791 ret = dev_change_flags(dev, ifr.ifr_flags); 792 break; 793 794 case SIOCSIFADDR: /* Set interface address (and family) */ 795 ret = -EINVAL; 796 if (inet_abc_len(sin->sin_addr.s_addr) < 0) 797 break; 798 799 if (!ifa) { 800 ret = -ENOBUFS; 801 ifa = inet_alloc_ifa(); 802 INIT_HLIST_NODE(&ifa->hash); 803 if (!ifa) 804 break; 805 if (colon) 806 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ); 807 else 808 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ); 809 } else { 810 ret = 0; 811 if (ifa->ifa_local == sin->sin_addr.s_addr) 812 break; 813 inet_del_ifa(in_dev, ifap, 0); 814 ifa->ifa_broadcast = 0; 815 ifa->ifa_scope = 0; 816 } 817 818 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr; 819 820 if (!(dev->flags & IFF_POINTOPOINT)) { 821 ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address); 822 ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen); 823 if ((dev->flags & IFF_BROADCAST) && 824 ifa->ifa_prefixlen < 31) 825 ifa->ifa_broadcast = ifa->ifa_address | 826 ~ifa->ifa_mask; 827 } else { 828 ifa->ifa_prefixlen = 32; 829 ifa->ifa_mask = inet_make_mask(32); 830 } 831 ret = inet_set_ifa(dev, ifa); 832 break; 833 834 case SIOCSIFBRDADDR: /* Set the broadcast address */ 835 ret = 0; 836 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) { 837 inet_del_ifa(in_dev, ifap, 0); 838 ifa->ifa_broadcast = sin->sin_addr.s_addr; 839 inet_insert_ifa(ifa); 840 } 841 break; 842 843 case SIOCSIFDSTADDR: /* Set the destination address */ 844 ret = 0; 845 if (ifa->ifa_address == sin->sin_addr.s_addr) 846 break; 847 ret = -EINVAL; 848 if (inet_abc_len(sin->sin_addr.s_addr) < 0) 849 break; 850 ret = 0; 851 inet_del_ifa(in_dev, ifap, 0); 852 ifa->ifa_address = sin->sin_addr.s_addr; 853 inet_insert_ifa(ifa); 854 break; 855 856 case SIOCSIFNETMASK: /* Set the netmask for the interface */ 857 858 /* 859 * The mask we set must be legal. 860 */ 861 ret = -EINVAL; 862 if (bad_mask(sin->sin_addr.s_addr, 0)) 863 break; 864 ret = 0; 865 if (ifa->ifa_mask != sin->sin_addr.s_addr) { 866 __be32 old_mask = ifa->ifa_mask; 867 inet_del_ifa(in_dev, ifap, 0); 868 ifa->ifa_mask = sin->sin_addr.s_addr; 869 ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask); 870 871 /* See if current broadcast address matches 872 * with current netmask, then recalculate 873 * the broadcast address. Otherwise it's a 874 * funny address, so don't touch it since 875 * the user seems to know what (s)he's doing... 876 */ 877 if ((dev->flags & IFF_BROADCAST) && 878 (ifa->ifa_prefixlen < 31) && 879 (ifa->ifa_broadcast == 880 (ifa->ifa_local|~old_mask))) { 881 ifa->ifa_broadcast = (ifa->ifa_local | 882 ~sin->sin_addr.s_addr); 883 } 884 inet_insert_ifa(ifa); 885 } 886 break; 887 } 888done: 889 rtnl_unlock(); 890out: 891 return ret; 892rarok: 893 rtnl_unlock(); 894 ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0; 895 goto out; 896} 897 898static int inet_gifconf(struct net_device *dev, char __user *buf, int len) 899{ 900 struct in_device *in_dev = __in_dev_get_rtnl(dev); 901 struct in_ifaddr *ifa; 902 struct ifreq ifr; 903 int done = 0; 904 905 if (!in_dev) 906 goto out; 907 908 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) { 909 if (!buf) { 910 done += sizeof(ifr); 911 continue; 912 } 913 if (len < (int) sizeof(ifr)) 914 break; 915 memset(&ifr, 0, sizeof(struct ifreq)); 916 if (ifa->ifa_label) 917 strcpy(ifr.ifr_name, ifa->ifa_label); 918 else 919 strcpy(ifr.ifr_name, dev->name); 920 921 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET; 922 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr = 923 ifa->ifa_local; 924 925 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) { 926 done = -EFAULT; 927 break; 928 } 929 buf += sizeof(struct ifreq); 930 len -= sizeof(struct ifreq); 931 done += sizeof(struct ifreq); 932 } 933out: 934 return done; 935} 936 937__be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope) 938{ 939 __be32 addr = 0; 940 struct in_device *in_dev; 941 struct net *net = dev_net(dev); 942 943 rcu_read_lock(); 944 in_dev = __in_dev_get_rcu(dev); 945 if (!in_dev) 946 goto no_in_dev; 947 948 for_primary_ifa(in_dev) { 949 if (ifa->ifa_scope > scope) 950 continue; 951 if (!dst || inet_ifa_match(dst, ifa)) { 952 addr = ifa->ifa_local; 953 break; 954 } 955 if (!addr) 956 addr = ifa->ifa_local; 957 } endfor_ifa(in_dev); 958 959 if (addr) 960 goto out_unlock; 961no_in_dev: 962 963 /* Not loopback addresses on loopback should be preferred 964 in this case. It is importnat that lo is the first interface 965 in dev_base list. 966 */ 967 for_each_netdev_rcu(net, dev) { 968 in_dev = __in_dev_get_rcu(dev); 969 if (!in_dev) 970 continue; 971 972 for_primary_ifa(in_dev) { 973 if (ifa->ifa_scope != RT_SCOPE_LINK && 974 ifa->ifa_scope <= scope) { 975 addr = ifa->ifa_local; 976 goto out_unlock; 977 } 978 } endfor_ifa(in_dev); 979 } 980out_unlock: 981 rcu_read_unlock(); 982 return addr; 983} 984EXPORT_SYMBOL(inet_select_addr); 985 986static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst, 987 __be32 local, int scope) 988{ 989 int same = 0; 990 __be32 addr = 0; 991 992 for_ifa(in_dev) { 993 if (!addr && 994 (local == ifa->ifa_local || !local) && 995 ifa->ifa_scope <= scope) { 996 addr = ifa->ifa_local; 997 if (same) 998 break; 999 } 1000 if (!same) { 1001 same = (!local || inet_ifa_match(local, ifa)) && 1002 (!dst || inet_ifa_match(dst, ifa)); 1003 if (same && addr) { 1004 if (local || !dst) 1005 break; 1006 /* Is the selected addr into dst subnet? */ 1007 if (inet_ifa_match(addr, ifa)) 1008 break; 1009 /* No, then can we use new local src? */ 1010 if (ifa->ifa_scope <= scope) { 1011 addr = ifa->ifa_local; 1012 break; 1013 } 1014 /* search for large dst subnet for addr */ 1015 same = 0; 1016 } 1017 } 1018 } endfor_ifa(in_dev); 1019 1020 return same ? addr : 0; 1021} 1022 1023/* 1024 * Confirm that local IP address exists using wildcards: 1025 * - in_dev: only on this interface, 0=any interface 1026 * - dst: only in the same subnet as dst, 0=any dst 1027 * - local: address, 0=autoselect the local address 1028 * - scope: maximum allowed scope value for the local address 1029 */ 1030__be32 inet_confirm_addr(struct in_device *in_dev, 1031 __be32 dst, __be32 local, int scope) 1032{ 1033 __be32 addr = 0; 1034 struct net_device *dev; 1035 struct net *net; 1036 1037 if (scope != RT_SCOPE_LINK) 1038 return confirm_addr_indev(in_dev, dst, local, scope); 1039 1040 net = dev_net(in_dev->dev); 1041 rcu_read_lock(); 1042 for_each_netdev_rcu(net, dev) { 1043 in_dev = __in_dev_get_rcu(dev); 1044 if (in_dev) { 1045 addr = confirm_addr_indev(in_dev, dst, local, scope); 1046 if (addr) 1047 break; 1048 } 1049 } 1050 rcu_read_unlock(); 1051 1052 return addr; 1053} 1054 1055/* 1056 * Device notifier 1057 */ 1058 1059int register_inetaddr_notifier(struct notifier_block *nb) 1060{ 1061 return blocking_notifier_chain_register(&inetaddr_chain, nb); 1062} 1063EXPORT_SYMBOL(register_inetaddr_notifier); 1064 1065int unregister_inetaddr_notifier(struct notifier_block *nb) 1066{ 1067 return blocking_notifier_chain_unregister(&inetaddr_chain, nb); 1068} 1069EXPORT_SYMBOL(unregister_inetaddr_notifier); 1070 1071/* Rename ifa_labels for a device name change. Make some effort to preserve 1072 * existing alias numbering and to create unique labels if possible. 1073*/ 1074static void inetdev_changename(struct net_device *dev, struct in_device *in_dev) 1075{ 1076 struct in_ifaddr *ifa; 1077 int named = 0; 1078 1079 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) { 1080 char old[IFNAMSIZ], *dot; 1081 1082 memcpy(old, ifa->ifa_label, IFNAMSIZ); 1083 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ); 1084 if (named++ == 0) 1085 goto skip; 1086 dot = strchr(old, ':'); 1087 if (dot == NULL) { 1088 sprintf(old, ":%d", named); 1089 dot = old; 1090 } 1091 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ) 1092 strcat(ifa->ifa_label, dot); 1093 else 1094 strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot); 1095skip: 1096 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0); 1097 } 1098} 1099 1100static inline bool inetdev_valid_mtu(unsigned mtu) 1101{ 1102 return mtu >= 68; 1103} 1104 1105/* Called only under RTNL semaphore */ 1106 1107static int inetdev_event(struct notifier_block *this, unsigned long event, 1108 void *ptr) 1109{ 1110 struct net_device *dev = ptr; 1111 struct in_device *in_dev = __in_dev_get_rtnl(dev); 1112 1113 ASSERT_RTNL(); 1114 1115 if (!in_dev) { 1116 if (event == NETDEV_REGISTER) { 1117 in_dev = inetdev_init(dev); 1118 if (!in_dev) 1119 return notifier_from_errno(-ENOMEM); 1120 if (dev->flags & IFF_LOOPBACK) { 1121 IN_DEV_CONF_SET(in_dev, NOXFRM, 1); 1122 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1); 1123 } 1124 } else if (event == NETDEV_CHANGEMTU) { 1125 /* Re-enabling IP */ 1126 if (inetdev_valid_mtu(dev->mtu)) 1127 in_dev = inetdev_init(dev); 1128 } 1129 goto out; 1130 } 1131 1132 switch (event) { 1133 case NETDEV_REGISTER: 1134 printk(KERN_DEBUG "inetdev_event: bug\n"); 1135 rcu_assign_pointer(dev->ip_ptr, NULL); 1136 break; 1137 case NETDEV_UP: 1138 if (!inetdev_valid_mtu(dev->mtu)) 1139 break; 1140 if (dev->flags & IFF_LOOPBACK) { 1141 struct in_ifaddr *ifa = inet_alloc_ifa(); 1142 1143 if (ifa) { 1144 INIT_HLIST_NODE(&ifa->hash); 1145 ifa->ifa_local = 1146 ifa->ifa_address = htonl(INADDR_LOOPBACK); 1147 ifa->ifa_prefixlen = 8; 1148 ifa->ifa_mask = inet_make_mask(8); 1149 in_dev_hold(in_dev); 1150 ifa->ifa_dev = in_dev; 1151 ifa->ifa_scope = RT_SCOPE_HOST; 1152 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ); 1153 inet_insert_ifa(ifa); 1154 } 1155 } 1156 ip_mc_up(in_dev); 1157 /* fall through */ 1158 case NETDEV_NOTIFY_PEERS: 1159 case NETDEV_CHANGEADDR: 1160 /* Send gratuitous ARP to notify of link change */ 1161 if (IN_DEV_ARP_NOTIFY(in_dev)) { 1162 struct in_ifaddr *ifa = in_dev->ifa_list; 1163 1164 if (ifa) 1165 arp_send(ARPOP_REQUEST, ETH_P_ARP, 1166 ifa->ifa_address, dev, 1167 ifa->ifa_address, NULL, 1168 dev->dev_addr, NULL); 1169 } 1170 break; 1171 case NETDEV_DOWN: 1172 ip_mc_down(in_dev); 1173 break; 1174 case NETDEV_PRE_TYPE_CHANGE: 1175 ip_mc_unmap(in_dev); 1176 break; 1177 case NETDEV_POST_TYPE_CHANGE: 1178 ip_mc_remap(in_dev); 1179 break; 1180 case NETDEV_CHANGEMTU: 1181 if (inetdev_valid_mtu(dev->mtu)) 1182 break; 1183 /* disable IP when MTU is not enough */ 1184 case NETDEV_UNREGISTER: 1185 inetdev_destroy(in_dev); 1186 break; 1187 case NETDEV_CHANGENAME: 1188 /* Do not notify about label change, this event is 1189 * not interesting to applications using netlink. 1190 */ 1191 inetdev_changename(dev, in_dev); 1192 1193 devinet_sysctl_unregister(in_dev); 1194 devinet_sysctl_register(in_dev); 1195 break; 1196 } 1197out: 1198 return NOTIFY_DONE; 1199} 1200 1201static struct notifier_block ip_netdev_notifier = { 1202 .notifier_call = inetdev_event, 1203}; 1204 1205static inline size_t inet_nlmsg_size(void) 1206{ 1207 return NLMSG_ALIGN(sizeof(struct ifaddrmsg)) 1208 + nla_total_size(4) /* IFA_ADDRESS */ 1209 + nla_total_size(4) /* IFA_LOCAL */ 1210 + nla_total_size(4) /* IFA_BROADCAST */ 1211 + nla_total_size(IFNAMSIZ); /* IFA_LABEL */ 1212} 1213 1214static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa, 1215 u32 pid, u32 seq, int event, unsigned int flags) 1216{ 1217 struct ifaddrmsg *ifm; 1218 struct nlmsghdr *nlh; 1219 1220 nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags); 1221 if (nlh == NULL) 1222 return -EMSGSIZE; 1223 1224 ifm = nlmsg_data(nlh); 1225 ifm->ifa_family = AF_INET; 1226 ifm->ifa_prefixlen = ifa->ifa_prefixlen; 1227 ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT; 1228 ifm->ifa_scope = ifa->ifa_scope; 1229 ifm->ifa_index = ifa->ifa_dev->dev->ifindex; 1230 1231 if (ifa->ifa_address) 1232 NLA_PUT_BE32(skb, IFA_ADDRESS, ifa->ifa_address); 1233 1234 if (ifa->ifa_local) 1235 NLA_PUT_BE32(skb, IFA_LOCAL, ifa->ifa_local); 1236 1237 if (ifa->ifa_broadcast) 1238 NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast); 1239 1240 if (ifa->ifa_label[0]) 1241 NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label); 1242 1243 return nlmsg_end(skb, nlh); 1244 1245nla_put_failure: 1246 nlmsg_cancel(skb, nlh); 1247 return -EMSGSIZE; 1248} 1249 1250static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) 1251{ 1252 struct net *net = sock_net(skb->sk); 1253 int h, s_h; 1254 int idx, s_idx; 1255 int ip_idx, s_ip_idx; 1256 struct net_device *dev; 1257 struct in_device *in_dev; 1258 struct in_ifaddr *ifa; 1259 struct hlist_head *head; 1260 struct hlist_node *node; 1261 1262 s_h = cb->args[0]; 1263 s_idx = idx = cb->args[1]; 1264 s_ip_idx = ip_idx = cb->args[2]; 1265 1266 for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { 1267 idx = 0; 1268 head = &net->dev_index_head[h]; 1269 rcu_read_lock(); 1270 hlist_for_each_entry_rcu(dev, node, head, index_hlist) { 1271 if (idx < s_idx) 1272 goto cont; 1273 if (h > s_h || idx > s_idx) 1274 s_ip_idx = 0; 1275 in_dev = __in_dev_get_rcu(dev); 1276 if (!in_dev) 1277 goto cont; 1278 1279 for (ifa = in_dev->ifa_list, ip_idx = 0; ifa; 1280 ifa = ifa->ifa_next, ip_idx++) { 1281 if (ip_idx < s_ip_idx) 1282 continue; 1283 if (inet_fill_ifaddr(skb, ifa, 1284 NETLINK_CB(cb->skb).pid, 1285 cb->nlh->nlmsg_seq, 1286 RTM_NEWADDR, NLM_F_MULTI) <= 0) { 1287 rcu_read_unlock(); 1288 goto done; 1289 } 1290 } 1291cont: 1292 idx++; 1293 } 1294 rcu_read_unlock(); 1295 } 1296 1297done: 1298 cb->args[0] = h; 1299 cb->args[1] = idx; 1300 cb->args[2] = ip_idx; 1301 1302 return skb->len; 1303} 1304 1305static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh, 1306 u32 pid) 1307{ 1308 struct sk_buff *skb; 1309 u32 seq = nlh ? nlh->nlmsg_seq : 0; 1310 int err = -ENOBUFS; 1311 struct net *net; 1312 1313 net = dev_net(ifa->ifa_dev->dev); 1314 skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL); 1315 if (skb == NULL) 1316 goto errout; 1317 1318 err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0); 1319 if (err < 0) { 1320 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */ 1321 WARN_ON(err == -EMSGSIZE); 1322 kfree_skb(skb); 1323 goto errout; 1324 } 1325 rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL); 1326 return; 1327errout: 1328 if (err < 0) 1329 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err); 1330} 1331 1332static size_t inet_get_link_af_size(const struct net_device *dev) 1333{ 1334 struct in_device *in_dev = __in_dev_get_rtnl(dev); 1335 1336 if (!in_dev) 1337 return 0; 1338 1339 return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */ 1340} 1341 1342static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev) 1343{ 1344 struct in_device *in_dev = __in_dev_get_rtnl(dev); 1345 struct nlattr *nla; 1346 int i; 1347 1348 if (!in_dev) 1349 return -ENODATA; 1350 1351 nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4); 1352 if (nla == NULL) 1353 return -EMSGSIZE; 1354 1355 for (i = 0; i < IPV4_DEVCONF_MAX; i++) 1356 ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i]; 1357 1358 return 0; 1359} 1360 1361static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = { 1362 [IFLA_INET_CONF] = { .type = NLA_NESTED }, 1363}; 1364 1365static int inet_validate_link_af(const struct net_device *dev, 1366 const struct nlattr *nla) 1367{ 1368 struct nlattr *a, *tb[IFLA_INET_MAX+1]; 1369 int err, rem; 1370 1371 if (dev && !__in_dev_get_rtnl(dev)) 1372 return -EAFNOSUPPORT; 1373 1374 err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy); 1375 if (err < 0) 1376 return err; 1377 1378 if (tb[IFLA_INET_CONF]) { 1379 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) { 1380 int cfgid = nla_type(a); 1381 1382 if (nla_len(a) < 4) 1383 return -EINVAL; 1384 1385 if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX) 1386 return -EINVAL; 1387 } 1388 } 1389 1390 return 0; 1391} 1392 1393static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla) 1394{ 1395 struct in_device *in_dev = __in_dev_get_rtnl(dev); 1396 struct nlattr *a, *tb[IFLA_INET_MAX+1]; 1397 int rem; 1398 1399 if (!in_dev) 1400 return -EAFNOSUPPORT; 1401 1402 if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0) 1403 BUG(); 1404 1405 if (tb[IFLA_INET_CONF]) { 1406 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) 1407 ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a)); 1408 } 1409 1410 return 0; 1411} 1412 1413#ifdef CONFIG_SYSCTL 1414 1415static void devinet_copy_dflt_conf(struct net *net, int i) 1416{ 1417 struct net_device *dev; 1418 1419 rcu_read_lock(); 1420 for_each_netdev_rcu(net, dev) { 1421 struct in_device *in_dev; 1422 1423 in_dev = __in_dev_get_rcu(dev); 1424 if (in_dev && !test_bit(i, in_dev->cnf.state)) 1425 in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i]; 1426 } 1427 rcu_read_unlock(); 1428} 1429 1430/* called with RTNL locked */ 1431static void inet_forward_change(struct net *net) 1432{ 1433 struct net_device *dev; 1434 int on = IPV4_DEVCONF_ALL(net, FORWARDING); 1435 1436 IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on; 1437 IPV4_DEVCONF_DFLT(net, FORWARDING) = on; 1438 1439 for_each_netdev(net, dev) { 1440 struct in_device *in_dev; 1441 if (on) 1442 dev_disable_lro(dev); 1443 rcu_read_lock(); 1444 in_dev = __in_dev_get_rcu(dev); 1445 if (in_dev) 1446 IN_DEV_CONF_SET(in_dev, FORWARDING, on); 1447 rcu_read_unlock(); 1448 } 1449} 1450 1451static int devinet_conf_proc(ctl_table *ctl, int write, 1452 void __user *buffer, 1453 size_t *lenp, loff_t *ppos) 1454{ 1455 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos); 1456 1457 if (write) { 1458 struct ipv4_devconf *cnf = ctl->extra1; 1459 struct net *net = ctl->extra2; 1460 int i = (int *)ctl->data - cnf->data; 1461 1462 set_bit(i, cnf->state); 1463 1464 if (cnf == net->ipv4.devconf_dflt) 1465 devinet_copy_dflt_conf(net, i); 1466 } 1467 1468 return ret; 1469} 1470 1471static int devinet_sysctl_forward(ctl_table *ctl, int write, 1472 void __user *buffer, 1473 size_t *lenp, loff_t *ppos) 1474{ 1475 int *valp = ctl->data; 1476 int val = *valp; 1477 loff_t pos = *ppos; 1478 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos); 1479 1480 if (write && *valp != val) { 1481 struct net *net = ctl->extra2; 1482 1483 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) { 1484 if (!rtnl_trylock()) { 1485 /* Restore the original values before restarting */ 1486 *valp = val; 1487 *ppos = pos; 1488 return restart_syscall(); 1489 } 1490 if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) { 1491 inet_forward_change(net); 1492 } else if (*valp) { 1493 struct ipv4_devconf *cnf = ctl->extra1; 1494 struct in_device *idev = 1495 container_of(cnf, struct in_device, cnf); 1496 dev_disable_lro(idev->dev); 1497 } 1498 rtnl_unlock(); 1499 rt_cache_flush(net, 0); 1500 } 1501 } 1502 1503 return ret; 1504} 1505 1506static int ipv4_doint_and_flush(ctl_table *ctl, int write, 1507 void __user *buffer, 1508 size_t *lenp, loff_t *ppos) 1509{ 1510 int *valp = ctl->data; 1511 int val = *valp; 1512 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos); 1513 struct net *net = ctl->extra2; 1514 1515 if (write && *valp != val) 1516 rt_cache_flush(net, 0); 1517 1518 return ret; 1519} 1520 1521#define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \ 1522 { \ 1523 .procname = name, \ 1524 .data = ipv4_devconf.data + \ 1525 IPV4_DEVCONF_ ## attr - 1, \ 1526 .maxlen = sizeof(int), \ 1527 .mode = mval, \ 1528 .proc_handler = proc, \ 1529 .extra1 = &ipv4_devconf, \ 1530 } 1531 1532#define DEVINET_SYSCTL_RW_ENTRY(attr, name) \ 1533 DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc) 1534 1535#define DEVINET_SYSCTL_RO_ENTRY(attr, name) \ 1536 DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc) 1537 1538#define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \ 1539 DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc) 1540 1541#define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \ 1542 DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush) 1543 1544static struct devinet_sysctl_table { 1545 struct ctl_table_header *sysctl_header; 1546 struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX]; 1547 char *dev_name; 1548} devinet_sysctl = { 1549 .devinet_vars = { 1550 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding", 1551 devinet_sysctl_forward), 1552 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"), 1553 1554 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"), 1555 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"), 1556 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"), 1557 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"), 1558 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"), 1559 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE, 1560 "accept_source_route"), 1561 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"), 1562 DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"), 1563 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"), 1564 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"), 1565 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"), 1566 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"), 1567 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"), 1568 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"), 1569 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"), 1570 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"), 1571 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"), 1572 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"), 1573 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"), 1574 1575 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"), 1576 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"), 1577 DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION, 1578 "force_igmp_version"), 1579 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES, 1580 "promote_secondaries"), 1581 }, 1582}; 1583 1584static int __devinet_sysctl_register(struct net *net, char *dev_name, 1585 struct ipv4_devconf *p) 1586{ 1587 int i; 1588 struct devinet_sysctl_table *t; 1589 1590#define DEVINET_CTL_PATH_DEV 3 1591 1592 struct ctl_path devinet_ctl_path[] = { 1593 { .procname = "net", }, 1594 { .procname = "ipv4", }, 1595 { .procname = "conf", }, 1596 { /* to be set */ }, 1597 { }, 1598 }; 1599 1600 t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL); 1601 if (!t) 1602 goto out; 1603 1604 for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) { 1605 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf; 1606 t->devinet_vars[i].extra1 = p; 1607 t->devinet_vars[i].extra2 = net; 1608 } 1609 1610 /* 1611 * Make a copy of dev_name, because '.procname' is regarded as const 1612 * by sysctl and we wouldn't want anyone to change it under our feet 1613 * (see SIOCSIFNAME). 1614 */ 1615 t->dev_name = kstrdup(dev_name, GFP_KERNEL); 1616 if (!t->dev_name) 1617 goto free; 1618 1619 devinet_ctl_path[DEVINET_CTL_PATH_DEV].procname = t->dev_name; 1620 1621 t->sysctl_header = register_net_sysctl_table(net, devinet_ctl_path, 1622 t->devinet_vars); 1623 if (!t->sysctl_header) 1624 goto free_procname; 1625 1626 p->sysctl = t; 1627 return 0; 1628 1629free_procname: 1630 kfree(t->dev_name); 1631free: 1632 kfree(t); 1633out: 1634 return -ENOBUFS; 1635} 1636 1637static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf) 1638{ 1639 struct devinet_sysctl_table *t = cnf->sysctl; 1640 1641 if (t == NULL) 1642 return; 1643 1644 cnf->sysctl = NULL; 1645 unregister_sysctl_table(t->sysctl_header); 1646 kfree(t->dev_name); 1647 kfree(t); 1648} 1649 1650static void devinet_sysctl_register(struct in_device *idev) 1651{ 1652 neigh_sysctl_register(idev->dev, idev->arp_parms, "ipv4", NULL); 1653 __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name, 1654 &idev->cnf); 1655} 1656 1657static void devinet_sysctl_unregister(struct in_device *idev) 1658{ 1659 __devinet_sysctl_unregister(&idev->cnf); 1660 neigh_sysctl_unregister(idev->arp_parms); 1661} 1662 1663static struct ctl_table ctl_forward_entry[] = { 1664 { 1665 .procname = "ip_forward", 1666 .data = &ipv4_devconf.data[ 1667 IPV4_DEVCONF_FORWARDING - 1], 1668 .maxlen = sizeof(int), 1669 .mode = 0644, 1670 .proc_handler = devinet_sysctl_forward, 1671 .extra1 = &ipv4_devconf, 1672 .extra2 = &init_net, 1673 }, 1674 { }, 1675}; 1676 1677static __net_initdata struct ctl_path net_ipv4_path[] = { 1678 { .procname = "net", }, 1679 { .procname = "ipv4", }, 1680 { }, 1681}; 1682#endif 1683 1684static __net_init int devinet_init_net(struct net *net) 1685{ 1686 int err; 1687 struct ipv4_devconf *all, *dflt; 1688#ifdef CONFIG_SYSCTL 1689 struct ctl_table *tbl = ctl_forward_entry; 1690 struct ctl_table_header *forw_hdr; 1691#endif 1692 1693 err = -ENOMEM; 1694 all = &ipv4_devconf; 1695 dflt = &ipv4_devconf_dflt; 1696 1697 if (!net_eq(net, &init_net)) { 1698 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL); 1699 if (all == NULL) 1700 goto err_alloc_all; 1701 1702 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL); 1703 if (dflt == NULL) 1704 goto err_alloc_dflt; 1705 1706#ifdef CONFIG_SYSCTL 1707 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL); 1708 if (tbl == NULL) 1709 goto err_alloc_ctl; 1710 1711 tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1]; 1712 tbl[0].extra1 = all; 1713 tbl[0].extra2 = net; 1714#endif 1715 } 1716 1717#ifdef CONFIG_SYSCTL 1718 err = __devinet_sysctl_register(net, "all", all); 1719 if (err < 0) 1720 goto err_reg_all; 1721 1722 err = __devinet_sysctl_register(net, "default", dflt); 1723 if (err < 0) 1724 goto err_reg_dflt; 1725 1726 err = -ENOMEM; 1727 forw_hdr = register_net_sysctl_table(net, net_ipv4_path, tbl); 1728 if (forw_hdr == NULL) 1729 goto err_reg_ctl; 1730 net->ipv4.forw_hdr = forw_hdr; 1731#endif 1732 1733 net->ipv4.devconf_all = all; 1734 net->ipv4.devconf_dflt = dflt; 1735 return 0; 1736 1737#ifdef CONFIG_SYSCTL 1738err_reg_ctl: 1739 __devinet_sysctl_unregister(dflt); 1740err_reg_dflt: 1741 __devinet_sysctl_unregister(all); 1742err_reg_all: 1743 if (tbl != ctl_forward_entry) 1744 kfree(tbl); 1745err_alloc_ctl: 1746#endif 1747 if (dflt != &ipv4_devconf_dflt) 1748 kfree(dflt); 1749err_alloc_dflt: 1750 if (all != &ipv4_devconf) 1751 kfree(all); 1752err_alloc_all: 1753 return err; 1754} 1755 1756static __net_exit void devinet_exit_net(struct net *net) 1757{ 1758#ifdef CONFIG_SYSCTL 1759 struct ctl_table *tbl; 1760 1761 tbl = net->ipv4.forw_hdr->ctl_table_arg; 1762 unregister_net_sysctl_table(net->ipv4.forw_hdr); 1763 __devinet_sysctl_unregister(net->ipv4.devconf_dflt); 1764 __devinet_sysctl_unregister(net->ipv4.devconf_all); 1765 kfree(tbl); 1766#endif 1767 kfree(net->ipv4.devconf_dflt); 1768 kfree(net->ipv4.devconf_all); 1769} 1770 1771static __net_initdata struct pernet_operations devinet_ops = { 1772 .init = devinet_init_net, 1773 .exit = devinet_exit_net, 1774}; 1775 1776static struct rtnl_af_ops inet_af_ops = { 1777 .family = AF_INET, 1778 .fill_link_af = inet_fill_link_af, 1779 .get_link_af_size = inet_get_link_af_size, 1780 .validate_link_af = inet_validate_link_af, 1781 .set_link_af = inet_set_link_af, 1782}; 1783 1784void __init devinet_init(void) 1785{ 1786 int i; 1787 1788 for (i = 0; i < IN4_ADDR_HSIZE; i++) 1789 INIT_HLIST_HEAD(&inet_addr_lst[i]); 1790 1791 register_pernet_subsys(&devinet_ops); 1792 1793 register_gifconf(PF_INET, inet_gifconf); 1794 register_netdevice_notifier(&ip_netdev_notifier); 1795 1796 rtnl_af_register(&inet_af_ops); 1797 1798 rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL); 1799 rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL); 1800 rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr); 1801} 1802 1803