devinet.c revision 2d230e2b2c3111cf4a11619f60dcd158ae84e3ab
1/* 2 * NET3 IP device support routines. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public License 6 * as published by the Free Software Foundation; either version 7 * 2 of the License, or (at your option) any later version. 8 * 9 * Derived from the IP parts of dev.c 1.0.19 10 * Authors: Ross Biro 11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> 12 * Mark Evans, <evansmp@uhura.aston.ac.uk> 13 * 14 * Additional Authors: 15 * Alan Cox, <gw4pts@gw4pts.ampr.org> 16 * Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 17 * 18 * Changes: 19 * Alexey Kuznetsov: pa_* fields are replaced with ifaddr 20 * lists. 21 * Cyrus Durgin: updated for kmod 22 * Matthias Andree: in devinet_ioctl, compare label and 23 * address (4.4BSD alias style support), 24 * fall back to comparing just the label 25 * if no match found. 26 */ 27 28 29#include <asm/uaccess.h> 30#include <asm/system.h> 31#include <linux/bitops.h> 32#include <linux/capability.h> 33#include <linux/module.h> 34#include <linux/types.h> 35#include <linux/kernel.h> 36#include <linux/string.h> 37#include <linux/mm.h> 38#include <linux/socket.h> 39#include <linux/sockios.h> 40#include <linux/in.h> 41#include <linux/errno.h> 42#include <linux/interrupt.h> 43#include <linux/if_addr.h> 44#include <linux/if_ether.h> 45#include <linux/inet.h> 46#include <linux/netdevice.h> 47#include <linux/etherdevice.h> 48#include <linux/skbuff.h> 49#include <linux/init.h> 50#include <linux/notifier.h> 51#include <linux/inetdevice.h> 52#include <linux/igmp.h> 53#include <linux/slab.h> 54#include <linux/hash.h> 55#ifdef CONFIG_SYSCTL 56#include <linux/sysctl.h> 57#endif 58#include <linux/kmod.h> 59 60#include <net/arp.h> 61#include <net/ip.h> 62#include <net/route.h> 63#include <net/ip_fib.h> 64#include <net/rtnetlink.h> 65#include <net/net_namespace.h> 66 67static struct ipv4_devconf ipv4_devconf = { 68 .data = { 69 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1, 70 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1, 71 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1, 72 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1, 73 }, 74}; 75 76static struct ipv4_devconf ipv4_devconf_dflt = { 77 .data = { 78 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1, 79 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1, 80 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1, 81 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1, 82 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1, 83 }, 84}; 85 86#define IPV4_DEVCONF_DFLT(net, attr) \ 87 IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr) 88 89static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = { 90 [IFA_LOCAL] = { .type = NLA_U32 }, 91 [IFA_ADDRESS] = { .type = NLA_U32 }, 92 [IFA_BROADCAST] = { .type = NLA_U32 }, 93 [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, 94}; 95 96/* inet_addr_hash's shifting is dependent upon this IN4_ADDR_HSIZE 97 * value. So if you change this define, make appropriate changes to 98 * inet_addr_hash as well. 99 */ 100#define IN4_ADDR_HSIZE 256 101static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE]; 102static DEFINE_SPINLOCK(inet_addr_hash_lock); 103 104static inline unsigned int inet_addr_hash(struct net *net, __be32 addr) 105{ 106 u32 val = (__force u32) addr ^ hash_ptr(net, 8); 107 108 return ((val ^ (val >> 8) ^ (val >> 16) ^ (val >> 24)) & 109 (IN4_ADDR_HSIZE - 1)); 110} 111 112static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa) 113{ 114 unsigned int hash = inet_addr_hash(net, ifa->ifa_local); 115 116 spin_lock(&inet_addr_hash_lock); 117 hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]); 118 spin_unlock(&inet_addr_hash_lock); 119} 120 121static void inet_hash_remove(struct in_ifaddr *ifa) 122{ 123 spin_lock(&inet_addr_hash_lock); 124 hlist_del_init_rcu(&ifa->hash); 125 spin_unlock(&inet_addr_hash_lock); 126} 127 128/** 129 * __ip_dev_find - find the first device with a given source address. 130 * @net: the net namespace 131 * @addr: the source address 132 * @devref: if true, take a reference on the found device 133 * 134 * If a caller uses devref=false, it should be protected by RCU, or RTNL 135 */ 136struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref) 137{ 138 unsigned int hash = inet_addr_hash(net, addr); 139 struct net_device *result = NULL; 140 struct in_ifaddr *ifa; 141 struct hlist_node *node; 142 143 rcu_read_lock(); 144 hlist_for_each_entry_rcu(ifa, node, &inet_addr_lst[hash], hash) { 145 struct net_device *dev = ifa->ifa_dev->dev; 146 147 if (!net_eq(dev_net(dev), net)) 148 continue; 149 if (ifa->ifa_local == addr) { 150 result = dev; 151 break; 152 } 153 } 154 if (result && devref) 155 dev_hold(result); 156 rcu_read_unlock(); 157 return result; 158} 159EXPORT_SYMBOL(__ip_dev_find); 160 161static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32); 162 163static BLOCKING_NOTIFIER_HEAD(inetaddr_chain); 164static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, 165 int destroy); 166#ifdef CONFIG_SYSCTL 167static void devinet_sysctl_register(struct in_device *idev); 168static void devinet_sysctl_unregister(struct in_device *idev); 169#else 170static inline void devinet_sysctl_register(struct in_device *idev) 171{ 172} 173static inline void devinet_sysctl_unregister(struct in_device *idev) 174{ 175} 176#endif 177 178/* Locks all the inet devices. */ 179 180static struct in_ifaddr *inet_alloc_ifa(void) 181{ 182 return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL); 183} 184 185static void inet_rcu_free_ifa(struct rcu_head *head) 186{ 187 struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head); 188 if (ifa->ifa_dev) 189 in_dev_put(ifa->ifa_dev); 190 kfree(ifa); 191} 192 193static inline void inet_free_ifa(struct in_ifaddr *ifa) 194{ 195 call_rcu(&ifa->rcu_head, inet_rcu_free_ifa); 196} 197 198void in_dev_finish_destroy(struct in_device *idev) 199{ 200 struct net_device *dev = idev->dev; 201 202 WARN_ON(idev->ifa_list); 203 WARN_ON(idev->mc_list); 204#ifdef NET_REFCNT_DEBUG 205 printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n", 206 idev, dev ? dev->name : "NIL"); 207#endif 208 dev_put(dev); 209 if (!idev->dead) 210 pr_err("Freeing alive in_device %p\n", idev); 211 else 212 kfree(idev); 213} 214EXPORT_SYMBOL(in_dev_finish_destroy); 215 216static struct in_device *inetdev_init(struct net_device *dev) 217{ 218 struct in_device *in_dev; 219 220 ASSERT_RTNL(); 221 222 in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL); 223 if (!in_dev) 224 goto out; 225 memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt, 226 sizeof(in_dev->cnf)); 227 in_dev->cnf.sysctl = NULL; 228 in_dev->dev = dev; 229 in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl); 230 if (!in_dev->arp_parms) 231 goto out_kfree; 232 if (IPV4_DEVCONF(in_dev->cnf, FORWARDING)) 233 dev_disable_lro(dev); 234 /* Reference in_dev->dev */ 235 dev_hold(dev); 236 /* Account for reference dev->ip_ptr (below) */ 237 in_dev_hold(in_dev); 238 239 devinet_sysctl_register(in_dev); 240 ip_mc_init_dev(in_dev); 241 if (dev->flags & IFF_UP) 242 ip_mc_up(in_dev); 243 244 /* we can receive as soon as ip_ptr is set -- do this last */ 245 rcu_assign_pointer(dev->ip_ptr, in_dev); 246out: 247 return in_dev; 248out_kfree: 249 kfree(in_dev); 250 in_dev = NULL; 251 goto out; 252} 253 254static void in_dev_rcu_put(struct rcu_head *head) 255{ 256 struct in_device *idev = container_of(head, struct in_device, rcu_head); 257 in_dev_put(idev); 258} 259 260static void inetdev_destroy(struct in_device *in_dev) 261{ 262 struct in_ifaddr *ifa; 263 struct net_device *dev; 264 265 ASSERT_RTNL(); 266 267 dev = in_dev->dev; 268 269 in_dev->dead = 1; 270 271 ip_mc_destroy_dev(in_dev); 272 273 while ((ifa = in_dev->ifa_list) != NULL) { 274 inet_del_ifa(in_dev, &in_dev->ifa_list, 0); 275 inet_free_ifa(ifa); 276 } 277 278 rcu_assign_pointer(dev->ip_ptr, NULL); 279 280 devinet_sysctl_unregister(in_dev); 281 neigh_parms_release(&arp_tbl, in_dev->arp_parms); 282 arp_ifdown(dev); 283 284 call_rcu(&in_dev->rcu_head, in_dev_rcu_put); 285} 286 287int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b) 288{ 289 rcu_read_lock(); 290 for_primary_ifa(in_dev) { 291 if (inet_ifa_match(a, ifa)) { 292 if (!b || inet_ifa_match(b, ifa)) { 293 rcu_read_unlock(); 294 return 1; 295 } 296 } 297 } endfor_ifa(in_dev); 298 rcu_read_unlock(); 299 return 0; 300} 301 302static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, 303 int destroy, struct nlmsghdr *nlh, u32 pid) 304{ 305 struct in_ifaddr *promote = NULL; 306 struct in_ifaddr *ifa, *ifa1 = *ifap; 307 struct in_ifaddr *last_prim = in_dev->ifa_list; 308 struct in_ifaddr *prev_prom = NULL; 309 int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev); 310 311 ASSERT_RTNL(); 312 313 /* 1. Deleting primary ifaddr forces deletion all secondaries 314 * unless alias promotion is set 315 **/ 316 317 if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) { 318 struct in_ifaddr **ifap1 = &ifa1->ifa_next; 319 320 while ((ifa = *ifap1) != NULL) { 321 if (!(ifa->ifa_flags & IFA_F_SECONDARY) && 322 ifa1->ifa_scope <= ifa->ifa_scope) 323 last_prim = ifa; 324 325 if (!(ifa->ifa_flags & IFA_F_SECONDARY) || 326 ifa1->ifa_mask != ifa->ifa_mask || 327 !inet_ifa_match(ifa1->ifa_address, ifa)) { 328 ifap1 = &ifa->ifa_next; 329 prev_prom = ifa; 330 continue; 331 } 332 333 if (!do_promote) { 334 inet_hash_remove(ifa); 335 *ifap1 = ifa->ifa_next; 336 337 rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid); 338 blocking_notifier_call_chain(&inetaddr_chain, 339 NETDEV_DOWN, ifa); 340 inet_free_ifa(ifa); 341 } else { 342 promote = ifa; 343 break; 344 } 345 } 346 } 347 348 /* On promotion all secondaries from subnet are changing 349 * the primary IP, we must remove all their routes silently 350 * and later to add them back with new prefsrc. Do this 351 * while all addresses are on the device list. 352 */ 353 for (ifa = promote; ifa; ifa = ifa->ifa_next) { 354 if (ifa1->ifa_mask == ifa->ifa_mask && 355 inet_ifa_match(ifa1->ifa_address, ifa)) 356 fib_del_ifaddr(ifa, ifa1); 357 } 358 359 /* 2. Unlink it */ 360 361 *ifap = ifa1->ifa_next; 362 inet_hash_remove(ifa1); 363 364 /* 3. Announce address deletion */ 365 366 /* Send message first, then call notifier. 367 At first sight, FIB update triggered by notifier 368 will refer to already deleted ifaddr, that could confuse 369 netlink listeners. It is not true: look, gated sees 370 that route deleted and if it still thinks that ifaddr 371 is valid, it will try to restore deleted routes... Grr. 372 So that, this order is correct. 373 */ 374 rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid); 375 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1); 376 377 if (promote) { 378 379 if (prev_prom) { 380 prev_prom->ifa_next = promote->ifa_next; 381 promote->ifa_next = last_prim->ifa_next; 382 last_prim->ifa_next = promote; 383 } 384 385 promote->ifa_flags &= ~IFA_F_SECONDARY; 386 rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid); 387 blocking_notifier_call_chain(&inetaddr_chain, 388 NETDEV_UP, promote); 389 for (ifa = promote->ifa_next; ifa; ifa = ifa->ifa_next) { 390 if (ifa1->ifa_mask != ifa->ifa_mask || 391 !inet_ifa_match(ifa1->ifa_address, ifa)) 392 continue; 393 fib_add_ifaddr(ifa); 394 } 395 396 } 397 if (destroy) 398 inet_free_ifa(ifa1); 399} 400 401static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, 402 int destroy) 403{ 404 __inet_del_ifa(in_dev, ifap, destroy, NULL, 0); 405} 406 407static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh, 408 u32 pid) 409{ 410 struct in_device *in_dev = ifa->ifa_dev; 411 struct in_ifaddr *ifa1, **ifap, **last_primary; 412 413 ASSERT_RTNL(); 414 415 if (!ifa->ifa_local) { 416 inet_free_ifa(ifa); 417 return 0; 418 } 419 420 ifa->ifa_flags &= ~IFA_F_SECONDARY; 421 last_primary = &in_dev->ifa_list; 422 423 for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL; 424 ifap = &ifa1->ifa_next) { 425 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) && 426 ifa->ifa_scope <= ifa1->ifa_scope) 427 last_primary = &ifa1->ifa_next; 428 if (ifa1->ifa_mask == ifa->ifa_mask && 429 inet_ifa_match(ifa1->ifa_address, ifa)) { 430 if (ifa1->ifa_local == ifa->ifa_local) { 431 inet_free_ifa(ifa); 432 return -EEXIST; 433 } 434 if (ifa1->ifa_scope != ifa->ifa_scope) { 435 inet_free_ifa(ifa); 436 return -EINVAL; 437 } 438 ifa->ifa_flags |= IFA_F_SECONDARY; 439 } 440 } 441 442 if (!(ifa->ifa_flags & IFA_F_SECONDARY)) { 443 net_srandom(ifa->ifa_local); 444 ifap = last_primary; 445 } 446 447 ifa->ifa_next = *ifap; 448 *ifap = ifa; 449 450 inet_hash_insert(dev_net(in_dev->dev), ifa); 451 452 /* Send message first, then call notifier. 453 Notifier will trigger FIB update, so that 454 listeners of netlink will know about new ifaddr */ 455 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid); 456 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa); 457 458 return 0; 459} 460 461static int inet_insert_ifa(struct in_ifaddr *ifa) 462{ 463 return __inet_insert_ifa(ifa, NULL, 0); 464} 465 466static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa) 467{ 468 struct in_device *in_dev = __in_dev_get_rtnl(dev); 469 470 ASSERT_RTNL(); 471 472 if (!in_dev) { 473 inet_free_ifa(ifa); 474 return -ENOBUFS; 475 } 476 ipv4_devconf_setall(in_dev); 477 if (ifa->ifa_dev != in_dev) { 478 WARN_ON(ifa->ifa_dev); 479 in_dev_hold(in_dev); 480 ifa->ifa_dev = in_dev; 481 } 482 if (ipv4_is_loopback(ifa->ifa_local)) 483 ifa->ifa_scope = RT_SCOPE_HOST; 484 return inet_insert_ifa(ifa); 485} 486 487/* Caller must hold RCU or RTNL : 488 * We dont take a reference on found in_device 489 */ 490struct in_device *inetdev_by_index(struct net *net, int ifindex) 491{ 492 struct net_device *dev; 493 struct in_device *in_dev = NULL; 494 495 rcu_read_lock(); 496 dev = dev_get_by_index_rcu(net, ifindex); 497 if (dev) 498 in_dev = rcu_dereference_rtnl(dev->ip_ptr); 499 rcu_read_unlock(); 500 return in_dev; 501} 502EXPORT_SYMBOL(inetdev_by_index); 503 504/* Called only from RTNL semaphored context. No locks. */ 505 506struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix, 507 __be32 mask) 508{ 509 ASSERT_RTNL(); 510 511 for_primary_ifa(in_dev) { 512 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa)) 513 return ifa; 514 } endfor_ifa(in_dev); 515 return NULL; 516} 517 518static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 519{ 520 struct net *net = sock_net(skb->sk); 521 struct nlattr *tb[IFA_MAX+1]; 522 struct in_device *in_dev; 523 struct ifaddrmsg *ifm; 524 struct in_ifaddr *ifa, **ifap; 525 int err = -EINVAL; 526 527 ASSERT_RTNL(); 528 529 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy); 530 if (err < 0) 531 goto errout; 532 533 ifm = nlmsg_data(nlh); 534 in_dev = inetdev_by_index(net, ifm->ifa_index); 535 if (in_dev == NULL) { 536 err = -ENODEV; 537 goto errout; 538 } 539 540 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL; 541 ifap = &ifa->ifa_next) { 542 if (tb[IFA_LOCAL] && 543 ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL])) 544 continue; 545 546 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label)) 547 continue; 548 549 if (tb[IFA_ADDRESS] && 550 (ifm->ifa_prefixlen != ifa->ifa_prefixlen || 551 !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa))) 552 continue; 553 554 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid); 555 return 0; 556 } 557 558 err = -EADDRNOTAVAIL; 559errout: 560 return err; 561} 562 563static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh) 564{ 565 struct nlattr *tb[IFA_MAX+1]; 566 struct in_ifaddr *ifa; 567 struct ifaddrmsg *ifm; 568 struct net_device *dev; 569 struct in_device *in_dev; 570 int err; 571 572 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy); 573 if (err < 0) 574 goto errout; 575 576 ifm = nlmsg_data(nlh); 577 err = -EINVAL; 578 if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL) 579 goto errout; 580 581 dev = __dev_get_by_index(net, ifm->ifa_index); 582 err = -ENODEV; 583 if (dev == NULL) 584 goto errout; 585 586 in_dev = __in_dev_get_rtnl(dev); 587 err = -ENOBUFS; 588 if (in_dev == NULL) 589 goto errout; 590 591 ifa = inet_alloc_ifa(); 592 if (ifa == NULL) 593 /* 594 * A potential indev allocation can be left alive, it stays 595 * assigned to its device and is destroy with it. 596 */ 597 goto errout; 598 599 ipv4_devconf_setall(in_dev); 600 in_dev_hold(in_dev); 601 602 if (tb[IFA_ADDRESS] == NULL) 603 tb[IFA_ADDRESS] = tb[IFA_LOCAL]; 604 605 INIT_HLIST_NODE(&ifa->hash); 606 ifa->ifa_prefixlen = ifm->ifa_prefixlen; 607 ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen); 608 ifa->ifa_flags = ifm->ifa_flags; 609 ifa->ifa_scope = ifm->ifa_scope; 610 ifa->ifa_dev = in_dev; 611 612 ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]); 613 ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]); 614 615 if (tb[IFA_BROADCAST]) 616 ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]); 617 618 if (tb[IFA_LABEL]) 619 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ); 620 else 621 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ); 622 623 return ifa; 624 625errout: 626 return ERR_PTR(err); 627} 628 629static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 630{ 631 struct net *net = sock_net(skb->sk); 632 struct in_ifaddr *ifa; 633 634 ASSERT_RTNL(); 635 636 ifa = rtm_to_ifaddr(net, nlh); 637 if (IS_ERR(ifa)) 638 return PTR_ERR(ifa); 639 640 return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid); 641} 642 643/* 644 * Determine a default network mask, based on the IP address. 645 */ 646 647static inline int inet_abc_len(__be32 addr) 648{ 649 int rc = -1; /* Something else, probably a multicast. */ 650 651 if (ipv4_is_zeronet(addr)) 652 rc = 0; 653 else { 654 __u32 haddr = ntohl(addr); 655 656 if (IN_CLASSA(haddr)) 657 rc = 8; 658 else if (IN_CLASSB(haddr)) 659 rc = 16; 660 else if (IN_CLASSC(haddr)) 661 rc = 24; 662 } 663 664 return rc; 665} 666 667 668int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg) 669{ 670 struct ifreq ifr; 671 struct sockaddr_in sin_orig; 672 struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr; 673 struct in_device *in_dev; 674 struct in_ifaddr **ifap = NULL; 675 struct in_ifaddr *ifa = NULL; 676 struct net_device *dev; 677 char *colon; 678 int ret = -EFAULT; 679 int tryaddrmatch = 0; 680 681 /* 682 * Fetch the caller's info block into kernel space 683 */ 684 685 if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) 686 goto out; 687 ifr.ifr_name[IFNAMSIZ - 1] = 0; 688 689 /* save original address for comparison */ 690 memcpy(&sin_orig, sin, sizeof(*sin)); 691 692 colon = strchr(ifr.ifr_name, ':'); 693 if (colon) 694 *colon = 0; 695 696 dev_load(net, ifr.ifr_name); 697 698 switch (cmd) { 699 case SIOCGIFADDR: /* Get interface address */ 700 case SIOCGIFBRDADDR: /* Get the broadcast address */ 701 case SIOCGIFDSTADDR: /* Get the destination address */ 702 case SIOCGIFNETMASK: /* Get the netmask for the interface */ 703 /* Note that these ioctls will not sleep, 704 so that we do not impose a lock. 705 One day we will be forced to put shlock here (I mean SMP) 706 */ 707 tryaddrmatch = (sin_orig.sin_family == AF_INET); 708 memset(sin, 0, sizeof(*sin)); 709 sin->sin_family = AF_INET; 710 break; 711 712 case SIOCSIFFLAGS: 713 ret = -EACCES; 714 if (!capable(CAP_NET_ADMIN)) 715 goto out; 716 break; 717 case SIOCSIFADDR: /* Set interface address (and family) */ 718 case SIOCSIFBRDADDR: /* Set the broadcast address */ 719 case SIOCSIFDSTADDR: /* Set the destination address */ 720 case SIOCSIFNETMASK: /* Set the netmask for the interface */ 721 ret = -EACCES; 722 if (!capable(CAP_NET_ADMIN)) 723 goto out; 724 ret = -EINVAL; 725 if (sin->sin_family != AF_INET) 726 goto out; 727 break; 728 default: 729 ret = -EINVAL; 730 goto out; 731 } 732 733 rtnl_lock(); 734 735 ret = -ENODEV; 736 dev = __dev_get_by_name(net, ifr.ifr_name); 737 if (!dev) 738 goto done; 739 740 if (colon) 741 *colon = ':'; 742 743 in_dev = __in_dev_get_rtnl(dev); 744 if (in_dev) { 745 if (tryaddrmatch) { 746 /* Matthias Andree */ 747 /* compare label and address (4.4BSD style) */ 748 /* note: we only do this for a limited set of ioctls 749 and only if the original address family was AF_INET. 750 This is checked above. */ 751 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL; 752 ifap = &ifa->ifa_next) { 753 if (!strcmp(ifr.ifr_name, ifa->ifa_label) && 754 sin_orig.sin_addr.s_addr == 755 ifa->ifa_local) { 756 break; /* found */ 757 } 758 } 759 } 760 /* we didn't get a match, maybe the application is 761 4.3BSD-style and passed in junk so we fall back to 762 comparing just the label */ 763 if (!ifa) { 764 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL; 765 ifap = &ifa->ifa_next) 766 if (!strcmp(ifr.ifr_name, ifa->ifa_label)) 767 break; 768 } 769 } 770 771 ret = -EADDRNOTAVAIL; 772 if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS) 773 goto done; 774 775 switch (cmd) { 776 case SIOCGIFADDR: /* Get interface address */ 777 sin->sin_addr.s_addr = ifa->ifa_local; 778 goto rarok; 779 780 case SIOCGIFBRDADDR: /* Get the broadcast address */ 781 sin->sin_addr.s_addr = ifa->ifa_broadcast; 782 goto rarok; 783 784 case SIOCGIFDSTADDR: /* Get the destination address */ 785 sin->sin_addr.s_addr = ifa->ifa_address; 786 goto rarok; 787 788 case SIOCGIFNETMASK: /* Get the netmask for the interface */ 789 sin->sin_addr.s_addr = ifa->ifa_mask; 790 goto rarok; 791 792 case SIOCSIFFLAGS: 793 if (colon) { 794 ret = -EADDRNOTAVAIL; 795 if (!ifa) 796 break; 797 ret = 0; 798 if (!(ifr.ifr_flags & IFF_UP)) 799 inet_del_ifa(in_dev, ifap, 1); 800 break; 801 } 802 ret = dev_change_flags(dev, ifr.ifr_flags); 803 break; 804 805 case SIOCSIFADDR: /* Set interface address (and family) */ 806 ret = -EINVAL; 807 if (inet_abc_len(sin->sin_addr.s_addr) < 0) 808 break; 809 810 if (!ifa) { 811 ret = -ENOBUFS; 812 ifa = inet_alloc_ifa(); 813 INIT_HLIST_NODE(&ifa->hash); 814 if (!ifa) 815 break; 816 if (colon) 817 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ); 818 else 819 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ); 820 } else { 821 ret = 0; 822 if (ifa->ifa_local == sin->sin_addr.s_addr) 823 break; 824 inet_del_ifa(in_dev, ifap, 0); 825 ifa->ifa_broadcast = 0; 826 ifa->ifa_scope = 0; 827 } 828 829 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr; 830 831 if (!(dev->flags & IFF_POINTOPOINT)) { 832 ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address); 833 ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen); 834 if ((dev->flags & IFF_BROADCAST) && 835 ifa->ifa_prefixlen < 31) 836 ifa->ifa_broadcast = ifa->ifa_address | 837 ~ifa->ifa_mask; 838 } else { 839 ifa->ifa_prefixlen = 32; 840 ifa->ifa_mask = inet_make_mask(32); 841 } 842 ret = inet_set_ifa(dev, ifa); 843 break; 844 845 case SIOCSIFBRDADDR: /* Set the broadcast address */ 846 ret = 0; 847 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) { 848 inet_del_ifa(in_dev, ifap, 0); 849 ifa->ifa_broadcast = sin->sin_addr.s_addr; 850 inet_insert_ifa(ifa); 851 } 852 break; 853 854 case SIOCSIFDSTADDR: /* Set the destination address */ 855 ret = 0; 856 if (ifa->ifa_address == sin->sin_addr.s_addr) 857 break; 858 ret = -EINVAL; 859 if (inet_abc_len(sin->sin_addr.s_addr) < 0) 860 break; 861 ret = 0; 862 inet_del_ifa(in_dev, ifap, 0); 863 ifa->ifa_address = sin->sin_addr.s_addr; 864 inet_insert_ifa(ifa); 865 break; 866 867 case SIOCSIFNETMASK: /* Set the netmask for the interface */ 868 869 /* 870 * The mask we set must be legal. 871 */ 872 ret = -EINVAL; 873 if (bad_mask(sin->sin_addr.s_addr, 0)) 874 break; 875 ret = 0; 876 if (ifa->ifa_mask != sin->sin_addr.s_addr) { 877 __be32 old_mask = ifa->ifa_mask; 878 inet_del_ifa(in_dev, ifap, 0); 879 ifa->ifa_mask = sin->sin_addr.s_addr; 880 ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask); 881 882 /* See if current broadcast address matches 883 * with current netmask, then recalculate 884 * the broadcast address. Otherwise it's a 885 * funny address, so don't touch it since 886 * the user seems to know what (s)he's doing... 887 */ 888 if ((dev->flags & IFF_BROADCAST) && 889 (ifa->ifa_prefixlen < 31) && 890 (ifa->ifa_broadcast == 891 (ifa->ifa_local|~old_mask))) { 892 ifa->ifa_broadcast = (ifa->ifa_local | 893 ~sin->sin_addr.s_addr); 894 } 895 inet_insert_ifa(ifa); 896 } 897 break; 898 } 899done: 900 rtnl_unlock(); 901out: 902 return ret; 903rarok: 904 rtnl_unlock(); 905 ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0; 906 goto out; 907} 908 909static int inet_gifconf(struct net_device *dev, char __user *buf, int len) 910{ 911 struct in_device *in_dev = __in_dev_get_rtnl(dev); 912 struct in_ifaddr *ifa; 913 struct ifreq ifr; 914 int done = 0; 915 916 if (!in_dev) 917 goto out; 918 919 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) { 920 if (!buf) { 921 done += sizeof(ifr); 922 continue; 923 } 924 if (len < (int) sizeof(ifr)) 925 break; 926 memset(&ifr, 0, sizeof(struct ifreq)); 927 if (ifa->ifa_label) 928 strcpy(ifr.ifr_name, ifa->ifa_label); 929 else 930 strcpy(ifr.ifr_name, dev->name); 931 932 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET; 933 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr = 934 ifa->ifa_local; 935 936 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) { 937 done = -EFAULT; 938 break; 939 } 940 buf += sizeof(struct ifreq); 941 len -= sizeof(struct ifreq); 942 done += sizeof(struct ifreq); 943 } 944out: 945 return done; 946} 947 948__be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope) 949{ 950 __be32 addr = 0; 951 struct in_device *in_dev; 952 struct net *net = dev_net(dev); 953 954 rcu_read_lock(); 955 in_dev = __in_dev_get_rcu(dev); 956 if (!in_dev) 957 goto no_in_dev; 958 959 for_primary_ifa(in_dev) { 960 if (ifa->ifa_scope > scope) 961 continue; 962 if (!dst || inet_ifa_match(dst, ifa)) { 963 addr = ifa->ifa_local; 964 break; 965 } 966 if (!addr) 967 addr = ifa->ifa_local; 968 } endfor_ifa(in_dev); 969 970 if (addr) 971 goto out_unlock; 972no_in_dev: 973 974 /* Not loopback addresses on loopback should be preferred 975 in this case. It is importnat that lo is the first interface 976 in dev_base list. 977 */ 978 for_each_netdev_rcu(net, dev) { 979 in_dev = __in_dev_get_rcu(dev); 980 if (!in_dev) 981 continue; 982 983 for_primary_ifa(in_dev) { 984 if (ifa->ifa_scope != RT_SCOPE_LINK && 985 ifa->ifa_scope <= scope) { 986 addr = ifa->ifa_local; 987 goto out_unlock; 988 } 989 } endfor_ifa(in_dev); 990 } 991out_unlock: 992 rcu_read_unlock(); 993 return addr; 994} 995EXPORT_SYMBOL(inet_select_addr); 996 997static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst, 998 __be32 local, int scope) 999{ 1000 int same = 0; 1001 __be32 addr = 0; 1002 1003 for_ifa(in_dev) { 1004 if (!addr && 1005 (local == ifa->ifa_local || !local) && 1006 ifa->ifa_scope <= scope) { 1007 addr = ifa->ifa_local; 1008 if (same) 1009 break; 1010 } 1011 if (!same) { 1012 same = (!local || inet_ifa_match(local, ifa)) && 1013 (!dst || inet_ifa_match(dst, ifa)); 1014 if (same && addr) { 1015 if (local || !dst) 1016 break; 1017 /* Is the selected addr into dst subnet? */ 1018 if (inet_ifa_match(addr, ifa)) 1019 break; 1020 /* No, then can we use new local src? */ 1021 if (ifa->ifa_scope <= scope) { 1022 addr = ifa->ifa_local; 1023 break; 1024 } 1025 /* search for large dst subnet for addr */ 1026 same = 0; 1027 } 1028 } 1029 } endfor_ifa(in_dev); 1030 1031 return same ? addr : 0; 1032} 1033 1034/* 1035 * Confirm that local IP address exists using wildcards: 1036 * - in_dev: only on this interface, 0=any interface 1037 * - dst: only in the same subnet as dst, 0=any dst 1038 * - local: address, 0=autoselect the local address 1039 * - scope: maximum allowed scope value for the local address 1040 */ 1041__be32 inet_confirm_addr(struct in_device *in_dev, 1042 __be32 dst, __be32 local, int scope) 1043{ 1044 __be32 addr = 0; 1045 struct net_device *dev; 1046 struct net *net; 1047 1048 if (scope != RT_SCOPE_LINK) 1049 return confirm_addr_indev(in_dev, dst, local, scope); 1050 1051 net = dev_net(in_dev->dev); 1052 rcu_read_lock(); 1053 for_each_netdev_rcu(net, dev) { 1054 in_dev = __in_dev_get_rcu(dev); 1055 if (in_dev) { 1056 addr = confirm_addr_indev(in_dev, dst, local, scope); 1057 if (addr) 1058 break; 1059 } 1060 } 1061 rcu_read_unlock(); 1062 1063 return addr; 1064} 1065 1066/* 1067 * Device notifier 1068 */ 1069 1070int register_inetaddr_notifier(struct notifier_block *nb) 1071{ 1072 return blocking_notifier_chain_register(&inetaddr_chain, nb); 1073} 1074EXPORT_SYMBOL(register_inetaddr_notifier); 1075 1076int unregister_inetaddr_notifier(struct notifier_block *nb) 1077{ 1078 return blocking_notifier_chain_unregister(&inetaddr_chain, nb); 1079} 1080EXPORT_SYMBOL(unregister_inetaddr_notifier); 1081 1082/* Rename ifa_labels for a device name change. Make some effort to preserve 1083 * existing alias numbering and to create unique labels if possible. 1084*/ 1085static void inetdev_changename(struct net_device *dev, struct in_device *in_dev) 1086{ 1087 struct in_ifaddr *ifa; 1088 int named = 0; 1089 1090 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) { 1091 char old[IFNAMSIZ], *dot; 1092 1093 memcpy(old, ifa->ifa_label, IFNAMSIZ); 1094 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ); 1095 if (named++ == 0) 1096 goto skip; 1097 dot = strchr(old, ':'); 1098 if (dot == NULL) { 1099 sprintf(old, ":%d", named); 1100 dot = old; 1101 } 1102 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ) 1103 strcat(ifa->ifa_label, dot); 1104 else 1105 strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot); 1106skip: 1107 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0); 1108 } 1109} 1110 1111static inline bool inetdev_valid_mtu(unsigned mtu) 1112{ 1113 return mtu >= 68; 1114} 1115 1116static void inetdev_send_gratuitous_arp(struct net_device *dev, 1117 struct in_device *in_dev) 1118 1119{ 1120 struct in_ifaddr *ifa = in_dev->ifa_list; 1121 1122 if (!ifa) 1123 return; 1124 1125 arp_send(ARPOP_REQUEST, ETH_P_ARP, 1126 ifa->ifa_local, dev, 1127 ifa->ifa_local, NULL, 1128 dev->dev_addr, NULL); 1129} 1130 1131/* Called only under RTNL semaphore */ 1132 1133static int inetdev_event(struct notifier_block *this, unsigned long event, 1134 void *ptr) 1135{ 1136 struct net_device *dev = ptr; 1137 struct in_device *in_dev = __in_dev_get_rtnl(dev); 1138 1139 ASSERT_RTNL(); 1140 1141 if (!in_dev) { 1142 if (event == NETDEV_REGISTER) { 1143 in_dev = inetdev_init(dev); 1144 if (!in_dev) 1145 return notifier_from_errno(-ENOMEM); 1146 if (dev->flags & IFF_LOOPBACK) { 1147 IN_DEV_CONF_SET(in_dev, NOXFRM, 1); 1148 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1); 1149 } 1150 } else if (event == NETDEV_CHANGEMTU) { 1151 /* Re-enabling IP */ 1152 if (inetdev_valid_mtu(dev->mtu)) 1153 in_dev = inetdev_init(dev); 1154 } 1155 goto out; 1156 } 1157 1158 switch (event) { 1159 case NETDEV_REGISTER: 1160 printk(KERN_DEBUG "inetdev_event: bug\n"); 1161 rcu_assign_pointer(dev->ip_ptr, NULL); 1162 break; 1163 case NETDEV_UP: 1164 if (!inetdev_valid_mtu(dev->mtu)) 1165 break; 1166 if (dev->flags & IFF_LOOPBACK) { 1167 struct in_ifaddr *ifa = inet_alloc_ifa(); 1168 1169 if (ifa) { 1170 INIT_HLIST_NODE(&ifa->hash); 1171 ifa->ifa_local = 1172 ifa->ifa_address = htonl(INADDR_LOOPBACK); 1173 ifa->ifa_prefixlen = 8; 1174 ifa->ifa_mask = inet_make_mask(8); 1175 in_dev_hold(in_dev); 1176 ifa->ifa_dev = in_dev; 1177 ifa->ifa_scope = RT_SCOPE_HOST; 1178 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ); 1179 inet_insert_ifa(ifa); 1180 } 1181 } 1182 ip_mc_up(in_dev); 1183 /* fall through */ 1184 case NETDEV_CHANGEADDR: 1185 if (!IN_DEV_ARP_NOTIFY(in_dev)) 1186 break; 1187 /* fall through */ 1188 case NETDEV_NOTIFY_PEERS: 1189 /* Send gratuitous ARP to notify of link change */ 1190 inetdev_send_gratuitous_arp(dev, in_dev); 1191 break; 1192 case NETDEV_DOWN: 1193 ip_mc_down(in_dev); 1194 break; 1195 case NETDEV_PRE_TYPE_CHANGE: 1196 ip_mc_unmap(in_dev); 1197 break; 1198 case NETDEV_POST_TYPE_CHANGE: 1199 ip_mc_remap(in_dev); 1200 break; 1201 case NETDEV_CHANGEMTU: 1202 if (inetdev_valid_mtu(dev->mtu)) 1203 break; 1204 /* disable IP when MTU is not enough */ 1205 case NETDEV_UNREGISTER: 1206 inetdev_destroy(in_dev); 1207 break; 1208 case NETDEV_CHANGENAME: 1209 /* Do not notify about label change, this event is 1210 * not interesting to applications using netlink. 1211 */ 1212 inetdev_changename(dev, in_dev); 1213 1214 devinet_sysctl_unregister(in_dev); 1215 devinet_sysctl_register(in_dev); 1216 break; 1217 } 1218out: 1219 return NOTIFY_DONE; 1220} 1221 1222static struct notifier_block ip_netdev_notifier = { 1223 .notifier_call = inetdev_event, 1224}; 1225 1226static inline size_t inet_nlmsg_size(void) 1227{ 1228 return NLMSG_ALIGN(sizeof(struct ifaddrmsg)) 1229 + nla_total_size(4) /* IFA_ADDRESS */ 1230 + nla_total_size(4) /* IFA_LOCAL */ 1231 + nla_total_size(4) /* IFA_BROADCAST */ 1232 + nla_total_size(IFNAMSIZ); /* IFA_LABEL */ 1233} 1234 1235static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa, 1236 u32 pid, u32 seq, int event, unsigned int flags) 1237{ 1238 struct ifaddrmsg *ifm; 1239 struct nlmsghdr *nlh; 1240 1241 nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags); 1242 if (nlh == NULL) 1243 return -EMSGSIZE; 1244 1245 ifm = nlmsg_data(nlh); 1246 ifm->ifa_family = AF_INET; 1247 ifm->ifa_prefixlen = ifa->ifa_prefixlen; 1248 ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT; 1249 ifm->ifa_scope = ifa->ifa_scope; 1250 ifm->ifa_index = ifa->ifa_dev->dev->ifindex; 1251 1252 if (ifa->ifa_address) 1253 NLA_PUT_BE32(skb, IFA_ADDRESS, ifa->ifa_address); 1254 1255 if (ifa->ifa_local) 1256 NLA_PUT_BE32(skb, IFA_LOCAL, ifa->ifa_local); 1257 1258 if (ifa->ifa_broadcast) 1259 NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast); 1260 1261 if (ifa->ifa_label[0]) 1262 NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label); 1263 1264 return nlmsg_end(skb, nlh); 1265 1266nla_put_failure: 1267 nlmsg_cancel(skb, nlh); 1268 return -EMSGSIZE; 1269} 1270 1271static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) 1272{ 1273 struct net *net = sock_net(skb->sk); 1274 int h, s_h; 1275 int idx, s_idx; 1276 int ip_idx, s_ip_idx; 1277 struct net_device *dev; 1278 struct in_device *in_dev; 1279 struct in_ifaddr *ifa; 1280 struct hlist_head *head; 1281 struct hlist_node *node; 1282 1283 s_h = cb->args[0]; 1284 s_idx = idx = cb->args[1]; 1285 s_ip_idx = ip_idx = cb->args[2]; 1286 1287 for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { 1288 idx = 0; 1289 head = &net->dev_index_head[h]; 1290 rcu_read_lock(); 1291 hlist_for_each_entry_rcu(dev, node, head, index_hlist) { 1292 if (idx < s_idx) 1293 goto cont; 1294 if (h > s_h || idx > s_idx) 1295 s_ip_idx = 0; 1296 in_dev = __in_dev_get_rcu(dev); 1297 if (!in_dev) 1298 goto cont; 1299 1300 for (ifa = in_dev->ifa_list, ip_idx = 0; ifa; 1301 ifa = ifa->ifa_next, ip_idx++) { 1302 if (ip_idx < s_ip_idx) 1303 continue; 1304 if (inet_fill_ifaddr(skb, ifa, 1305 NETLINK_CB(cb->skb).pid, 1306 cb->nlh->nlmsg_seq, 1307 RTM_NEWADDR, NLM_F_MULTI) <= 0) { 1308 rcu_read_unlock(); 1309 goto done; 1310 } 1311 } 1312cont: 1313 idx++; 1314 } 1315 rcu_read_unlock(); 1316 } 1317 1318done: 1319 cb->args[0] = h; 1320 cb->args[1] = idx; 1321 cb->args[2] = ip_idx; 1322 1323 return skb->len; 1324} 1325 1326static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh, 1327 u32 pid) 1328{ 1329 struct sk_buff *skb; 1330 u32 seq = nlh ? nlh->nlmsg_seq : 0; 1331 int err = -ENOBUFS; 1332 struct net *net; 1333 1334 net = dev_net(ifa->ifa_dev->dev); 1335 skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL); 1336 if (skb == NULL) 1337 goto errout; 1338 1339 err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0); 1340 if (err < 0) { 1341 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */ 1342 WARN_ON(err == -EMSGSIZE); 1343 kfree_skb(skb); 1344 goto errout; 1345 } 1346 rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL); 1347 return; 1348errout: 1349 if (err < 0) 1350 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err); 1351} 1352 1353static size_t inet_get_link_af_size(const struct net_device *dev) 1354{ 1355 struct in_device *in_dev = __in_dev_get_rtnl(dev); 1356 1357 if (!in_dev) 1358 return 0; 1359 1360 return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */ 1361} 1362 1363static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev) 1364{ 1365 struct in_device *in_dev = __in_dev_get_rtnl(dev); 1366 struct nlattr *nla; 1367 int i; 1368 1369 if (!in_dev) 1370 return -ENODATA; 1371 1372 nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4); 1373 if (nla == NULL) 1374 return -EMSGSIZE; 1375 1376 for (i = 0; i < IPV4_DEVCONF_MAX; i++) 1377 ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i]; 1378 1379 return 0; 1380} 1381 1382static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = { 1383 [IFLA_INET_CONF] = { .type = NLA_NESTED }, 1384}; 1385 1386static int inet_validate_link_af(const struct net_device *dev, 1387 const struct nlattr *nla) 1388{ 1389 struct nlattr *a, *tb[IFLA_INET_MAX+1]; 1390 int err, rem; 1391 1392 if (dev && !__in_dev_get_rtnl(dev)) 1393 return -EAFNOSUPPORT; 1394 1395 err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy); 1396 if (err < 0) 1397 return err; 1398 1399 if (tb[IFLA_INET_CONF]) { 1400 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) { 1401 int cfgid = nla_type(a); 1402 1403 if (nla_len(a) < 4) 1404 return -EINVAL; 1405 1406 if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX) 1407 return -EINVAL; 1408 } 1409 } 1410 1411 return 0; 1412} 1413 1414static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla) 1415{ 1416 struct in_device *in_dev = __in_dev_get_rtnl(dev); 1417 struct nlattr *a, *tb[IFLA_INET_MAX+1]; 1418 int rem; 1419 1420 if (!in_dev) 1421 return -EAFNOSUPPORT; 1422 1423 if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0) 1424 BUG(); 1425 1426 if (tb[IFLA_INET_CONF]) { 1427 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) 1428 ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a)); 1429 } 1430 1431 return 0; 1432} 1433 1434#ifdef CONFIG_SYSCTL 1435 1436static void devinet_copy_dflt_conf(struct net *net, int i) 1437{ 1438 struct net_device *dev; 1439 1440 rcu_read_lock(); 1441 for_each_netdev_rcu(net, dev) { 1442 struct in_device *in_dev; 1443 1444 in_dev = __in_dev_get_rcu(dev); 1445 if (in_dev && !test_bit(i, in_dev->cnf.state)) 1446 in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i]; 1447 } 1448 rcu_read_unlock(); 1449} 1450 1451/* called with RTNL locked */ 1452static void inet_forward_change(struct net *net) 1453{ 1454 struct net_device *dev; 1455 int on = IPV4_DEVCONF_ALL(net, FORWARDING); 1456 1457 IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on; 1458 IPV4_DEVCONF_DFLT(net, FORWARDING) = on; 1459 1460 for_each_netdev(net, dev) { 1461 struct in_device *in_dev; 1462 if (on) 1463 dev_disable_lro(dev); 1464 rcu_read_lock(); 1465 in_dev = __in_dev_get_rcu(dev); 1466 if (in_dev) 1467 IN_DEV_CONF_SET(in_dev, FORWARDING, on); 1468 rcu_read_unlock(); 1469 } 1470} 1471 1472static int devinet_conf_proc(ctl_table *ctl, int write, 1473 void __user *buffer, 1474 size_t *lenp, loff_t *ppos) 1475{ 1476 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos); 1477 1478 if (write) { 1479 struct ipv4_devconf *cnf = ctl->extra1; 1480 struct net *net = ctl->extra2; 1481 int i = (int *)ctl->data - cnf->data; 1482 1483 set_bit(i, cnf->state); 1484 1485 if (cnf == net->ipv4.devconf_dflt) 1486 devinet_copy_dflt_conf(net, i); 1487 } 1488 1489 return ret; 1490} 1491 1492static int devinet_sysctl_forward(ctl_table *ctl, int write, 1493 void __user *buffer, 1494 size_t *lenp, loff_t *ppos) 1495{ 1496 int *valp = ctl->data; 1497 int val = *valp; 1498 loff_t pos = *ppos; 1499 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos); 1500 1501 if (write && *valp != val) { 1502 struct net *net = ctl->extra2; 1503 1504 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) { 1505 if (!rtnl_trylock()) { 1506 /* Restore the original values before restarting */ 1507 *valp = val; 1508 *ppos = pos; 1509 return restart_syscall(); 1510 } 1511 if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) { 1512 inet_forward_change(net); 1513 } else if (*valp) { 1514 struct ipv4_devconf *cnf = ctl->extra1; 1515 struct in_device *idev = 1516 container_of(cnf, struct in_device, cnf); 1517 dev_disable_lro(idev->dev); 1518 } 1519 rtnl_unlock(); 1520 rt_cache_flush(net, 0); 1521 } 1522 } 1523 1524 return ret; 1525} 1526 1527static int ipv4_doint_and_flush(ctl_table *ctl, int write, 1528 void __user *buffer, 1529 size_t *lenp, loff_t *ppos) 1530{ 1531 int *valp = ctl->data; 1532 int val = *valp; 1533 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos); 1534 struct net *net = ctl->extra2; 1535 1536 if (write && *valp != val) 1537 rt_cache_flush(net, 0); 1538 1539 return ret; 1540} 1541 1542#define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \ 1543 { \ 1544 .procname = name, \ 1545 .data = ipv4_devconf.data + \ 1546 IPV4_DEVCONF_ ## attr - 1, \ 1547 .maxlen = sizeof(int), \ 1548 .mode = mval, \ 1549 .proc_handler = proc, \ 1550 .extra1 = &ipv4_devconf, \ 1551 } 1552 1553#define DEVINET_SYSCTL_RW_ENTRY(attr, name) \ 1554 DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc) 1555 1556#define DEVINET_SYSCTL_RO_ENTRY(attr, name) \ 1557 DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc) 1558 1559#define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \ 1560 DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc) 1561 1562#define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \ 1563 DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush) 1564 1565static struct devinet_sysctl_table { 1566 struct ctl_table_header *sysctl_header; 1567 struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX]; 1568 char *dev_name; 1569} devinet_sysctl = { 1570 .devinet_vars = { 1571 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding", 1572 devinet_sysctl_forward), 1573 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"), 1574 1575 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"), 1576 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"), 1577 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"), 1578 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"), 1579 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"), 1580 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE, 1581 "accept_source_route"), 1582 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"), 1583 DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"), 1584 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"), 1585 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"), 1586 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"), 1587 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"), 1588 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"), 1589 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"), 1590 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"), 1591 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"), 1592 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"), 1593 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"), 1594 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"), 1595 1596 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"), 1597 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"), 1598 DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION, 1599 "force_igmp_version"), 1600 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES, 1601 "promote_secondaries"), 1602 }, 1603}; 1604 1605static int __devinet_sysctl_register(struct net *net, char *dev_name, 1606 struct ipv4_devconf *p) 1607{ 1608 int i; 1609 struct devinet_sysctl_table *t; 1610 1611#define DEVINET_CTL_PATH_DEV 3 1612 1613 struct ctl_path devinet_ctl_path[] = { 1614 { .procname = "net", }, 1615 { .procname = "ipv4", }, 1616 { .procname = "conf", }, 1617 { /* to be set */ }, 1618 { }, 1619 }; 1620 1621 t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL); 1622 if (!t) 1623 goto out; 1624 1625 for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) { 1626 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf; 1627 t->devinet_vars[i].extra1 = p; 1628 t->devinet_vars[i].extra2 = net; 1629 } 1630 1631 /* 1632 * Make a copy of dev_name, because '.procname' is regarded as const 1633 * by sysctl and we wouldn't want anyone to change it under our feet 1634 * (see SIOCSIFNAME). 1635 */ 1636 t->dev_name = kstrdup(dev_name, GFP_KERNEL); 1637 if (!t->dev_name) 1638 goto free; 1639 1640 devinet_ctl_path[DEVINET_CTL_PATH_DEV].procname = t->dev_name; 1641 1642 t->sysctl_header = register_net_sysctl_table(net, devinet_ctl_path, 1643 t->devinet_vars); 1644 if (!t->sysctl_header) 1645 goto free_procname; 1646 1647 p->sysctl = t; 1648 return 0; 1649 1650free_procname: 1651 kfree(t->dev_name); 1652free: 1653 kfree(t); 1654out: 1655 return -ENOBUFS; 1656} 1657 1658static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf) 1659{ 1660 struct devinet_sysctl_table *t = cnf->sysctl; 1661 1662 if (t == NULL) 1663 return; 1664 1665 cnf->sysctl = NULL; 1666 unregister_sysctl_table(t->sysctl_header); 1667 kfree(t->dev_name); 1668 kfree(t); 1669} 1670 1671static void devinet_sysctl_register(struct in_device *idev) 1672{ 1673 neigh_sysctl_register(idev->dev, idev->arp_parms, "ipv4", NULL); 1674 __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name, 1675 &idev->cnf); 1676} 1677 1678static void devinet_sysctl_unregister(struct in_device *idev) 1679{ 1680 __devinet_sysctl_unregister(&idev->cnf); 1681 neigh_sysctl_unregister(idev->arp_parms); 1682} 1683 1684static struct ctl_table ctl_forward_entry[] = { 1685 { 1686 .procname = "ip_forward", 1687 .data = &ipv4_devconf.data[ 1688 IPV4_DEVCONF_FORWARDING - 1], 1689 .maxlen = sizeof(int), 1690 .mode = 0644, 1691 .proc_handler = devinet_sysctl_forward, 1692 .extra1 = &ipv4_devconf, 1693 .extra2 = &init_net, 1694 }, 1695 { }, 1696}; 1697 1698static __net_initdata struct ctl_path net_ipv4_path[] = { 1699 { .procname = "net", }, 1700 { .procname = "ipv4", }, 1701 { }, 1702}; 1703#endif 1704 1705static __net_init int devinet_init_net(struct net *net) 1706{ 1707 int err; 1708 struct ipv4_devconf *all, *dflt; 1709#ifdef CONFIG_SYSCTL 1710 struct ctl_table *tbl = ctl_forward_entry; 1711 struct ctl_table_header *forw_hdr; 1712#endif 1713 1714 err = -ENOMEM; 1715 all = &ipv4_devconf; 1716 dflt = &ipv4_devconf_dflt; 1717 1718 if (!net_eq(net, &init_net)) { 1719 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL); 1720 if (all == NULL) 1721 goto err_alloc_all; 1722 1723 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL); 1724 if (dflt == NULL) 1725 goto err_alloc_dflt; 1726 1727#ifdef CONFIG_SYSCTL 1728 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL); 1729 if (tbl == NULL) 1730 goto err_alloc_ctl; 1731 1732 tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1]; 1733 tbl[0].extra1 = all; 1734 tbl[0].extra2 = net; 1735#endif 1736 } 1737 1738#ifdef CONFIG_SYSCTL 1739 err = __devinet_sysctl_register(net, "all", all); 1740 if (err < 0) 1741 goto err_reg_all; 1742 1743 err = __devinet_sysctl_register(net, "default", dflt); 1744 if (err < 0) 1745 goto err_reg_dflt; 1746 1747 err = -ENOMEM; 1748 forw_hdr = register_net_sysctl_table(net, net_ipv4_path, tbl); 1749 if (forw_hdr == NULL) 1750 goto err_reg_ctl; 1751 net->ipv4.forw_hdr = forw_hdr; 1752#endif 1753 1754 net->ipv4.devconf_all = all; 1755 net->ipv4.devconf_dflt = dflt; 1756 return 0; 1757 1758#ifdef CONFIG_SYSCTL 1759err_reg_ctl: 1760 __devinet_sysctl_unregister(dflt); 1761err_reg_dflt: 1762 __devinet_sysctl_unregister(all); 1763err_reg_all: 1764 if (tbl != ctl_forward_entry) 1765 kfree(tbl); 1766err_alloc_ctl: 1767#endif 1768 if (dflt != &ipv4_devconf_dflt) 1769 kfree(dflt); 1770err_alloc_dflt: 1771 if (all != &ipv4_devconf) 1772 kfree(all); 1773err_alloc_all: 1774 return err; 1775} 1776 1777static __net_exit void devinet_exit_net(struct net *net) 1778{ 1779#ifdef CONFIG_SYSCTL 1780 struct ctl_table *tbl; 1781 1782 tbl = net->ipv4.forw_hdr->ctl_table_arg; 1783 unregister_net_sysctl_table(net->ipv4.forw_hdr); 1784 __devinet_sysctl_unregister(net->ipv4.devconf_dflt); 1785 __devinet_sysctl_unregister(net->ipv4.devconf_all); 1786 kfree(tbl); 1787#endif 1788 kfree(net->ipv4.devconf_dflt); 1789 kfree(net->ipv4.devconf_all); 1790} 1791 1792static __net_initdata struct pernet_operations devinet_ops = { 1793 .init = devinet_init_net, 1794 .exit = devinet_exit_net, 1795}; 1796 1797static struct rtnl_af_ops inet_af_ops = { 1798 .family = AF_INET, 1799 .fill_link_af = inet_fill_link_af, 1800 .get_link_af_size = inet_get_link_af_size, 1801 .validate_link_af = inet_validate_link_af, 1802 .set_link_af = inet_set_link_af, 1803}; 1804 1805void __init devinet_init(void) 1806{ 1807 int i; 1808 1809 for (i = 0; i < IN4_ADDR_HSIZE; i++) 1810 INIT_HLIST_HEAD(&inet_addr_lst[i]); 1811 1812 register_pernet_subsys(&devinet_ops); 1813 1814 register_gifconf(PF_INET, inet_gifconf); 1815 register_netdevice_notifier(&ip_netdev_notifier); 1816 1817 rtnl_af_register(&inet_af_ops); 1818 1819 rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL); 1820 rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL); 1821 rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr); 1822} 1823 1824