devinet.c revision 539afedfccb39577c9264b29f11ec9556fd45022
1/*
2 *	NET3	IP device support routines.
3 *
4 *		This program is free software; you can redistribute it and/or
5 *		modify it under the terms of the GNU General Public License
6 *		as published by the Free Software Foundation; either version
7 *		2 of the License, or (at your option) any later version.
8 *
9 *	Derived from the IP parts of dev.c 1.0.19
10 * 		Authors:	Ross Biro
11 *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 *				Mark Evans, <evansmp@uhura.aston.ac.uk>
13 *
14 *	Additional Authors:
15 *		Alan Cox, <gw4pts@gw4pts.ampr.org>
16 *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17 *
18 *	Changes:
19 *		Alexey Kuznetsov:	pa_* fields are replaced with ifaddr
20 *					lists.
21 *		Cyrus Durgin:		updated for kmod
22 *		Matthias Andree:	in devinet_ioctl, compare label and
23 *					address (4.4BSD alias style support),
24 *					fall back to comparing just the label
25 *					if no match found.
26 */
27
28
29#include <asm/uaccess.h>
30#include <asm/system.h>
31#include <linux/bitops.h>
32#include <linux/capability.h>
33#include <linux/module.h>
34#include <linux/types.h>
35#include <linux/kernel.h>
36#include <linux/string.h>
37#include <linux/mm.h>
38#include <linux/socket.h>
39#include <linux/sockios.h>
40#include <linux/in.h>
41#include <linux/errno.h>
42#include <linux/interrupt.h>
43#include <linux/if_addr.h>
44#include <linux/if_ether.h>
45#include <linux/inet.h>
46#include <linux/netdevice.h>
47#include <linux/etherdevice.h>
48#include <linux/skbuff.h>
49#include <linux/init.h>
50#include <linux/notifier.h>
51#include <linux/inetdevice.h>
52#include <linux/igmp.h>
53#ifdef CONFIG_SYSCTL
54#include <linux/sysctl.h>
55#endif
56#include <linux/kmod.h>
57
58#include <net/arp.h>
59#include <net/ip.h>
60#include <net/route.h>
61#include <net/ip_fib.h>
62#include <net/rtnetlink.h>
63#include <net/net_namespace.h>
64
65static struct ipv4_devconf ipv4_devconf = {
66	.data = {
67		[NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1,
68		[NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1,
69		[NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1,
70		[NET_IPV4_CONF_SHARED_MEDIA - 1] = 1,
71	},
72};
73
74static struct ipv4_devconf ipv4_devconf_dflt = {
75	.data = {
76		[NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1,
77		[NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1,
78		[NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1,
79		[NET_IPV4_CONF_SHARED_MEDIA - 1] = 1,
80		[NET_IPV4_CONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
81	},
82};
83
84#define IPV4_DEVCONF_DFLT(net, attr) \
85	IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
86
87static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
88	[IFA_LOCAL]     	= { .type = NLA_U32 },
89	[IFA_ADDRESS]   	= { .type = NLA_U32 },
90	[IFA_BROADCAST] 	= { .type = NLA_U32 },
91	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
92};
93
94static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
95
96static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
97static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
98			 int destroy);
99#ifdef CONFIG_SYSCTL
100static void devinet_sysctl_register(struct in_device *idev);
101static void devinet_sysctl_unregister(struct in_device *idev);
102#else
103static inline void devinet_sysctl_register(struct in_device *idev)
104{
105}
106static inline void devinet_sysctl_unregister(struct in_device *idev)
107{
108}
109#endif
110
111/* Locks all the inet devices. */
112
113static struct in_ifaddr *inet_alloc_ifa(void)
114{
115	return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
116}
117
118static void inet_rcu_free_ifa(struct rcu_head *head)
119{
120	struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
121	if (ifa->ifa_dev)
122		in_dev_put(ifa->ifa_dev);
123	kfree(ifa);
124}
125
126static inline void inet_free_ifa(struct in_ifaddr *ifa)
127{
128	call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
129}
130
131void in_dev_finish_destroy(struct in_device *idev)
132{
133	struct net_device *dev = idev->dev;
134
135	WARN_ON(idev->ifa_list);
136	WARN_ON(idev->mc_list);
137#ifdef NET_REFCNT_DEBUG
138	printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n",
139	       idev, dev ? dev->name : "NIL");
140#endif
141	dev_put(dev);
142	if (!idev->dead)
143		printk("Freeing alive in_device %p\n", idev);
144	else {
145		kfree(idev);
146	}
147}
148
149static struct in_device *inetdev_init(struct net_device *dev)
150{
151	struct in_device *in_dev;
152
153	ASSERT_RTNL();
154
155	in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
156	if (!in_dev)
157		goto out;
158	memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
159			sizeof(in_dev->cnf));
160	in_dev->cnf.sysctl = NULL;
161	in_dev->dev = dev;
162	if ((in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl)) == NULL)
163		goto out_kfree;
164	if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
165		dev_disable_lro(dev);
166	/* Reference in_dev->dev */
167	dev_hold(dev);
168	/* Account for reference dev->ip_ptr (below) */
169	in_dev_hold(in_dev);
170
171	devinet_sysctl_register(in_dev);
172	ip_mc_init_dev(in_dev);
173	if (dev->flags & IFF_UP)
174		ip_mc_up(in_dev);
175
176	/* we can receive as soon as ip_ptr is set -- do this last */
177	rcu_assign_pointer(dev->ip_ptr, in_dev);
178out:
179	return in_dev;
180out_kfree:
181	kfree(in_dev);
182	in_dev = NULL;
183	goto out;
184}
185
186static void in_dev_rcu_put(struct rcu_head *head)
187{
188	struct in_device *idev = container_of(head, struct in_device, rcu_head);
189	in_dev_put(idev);
190}
191
192static void inetdev_destroy(struct in_device *in_dev)
193{
194	struct in_ifaddr *ifa;
195	struct net_device *dev;
196
197	ASSERT_RTNL();
198
199	dev = in_dev->dev;
200
201	in_dev->dead = 1;
202
203	ip_mc_destroy_dev(in_dev);
204
205	while ((ifa = in_dev->ifa_list) != NULL) {
206		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
207		inet_free_ifa(ifa);
208	}
209
210	dev->ip_ptr = NULL;
211
212	devinet_sysctl_unregister(in_dev);
213	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
214	arp_ifdown(dev);
215
216	call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
217}
218
219int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
220{
221	rcu_read_lock();
222	for_primary_ifa(in_dev) {
223		if (inet_ifa_match(a, ifa)) {
224			if (!b || inet_ifa_match(b, ifa)) {
225				rcu_read_unlock();
226				return 1;
227			}
228		}
229	} endfor_ifa(in_dev);
230	rcu_read_unlock();
231	return 0;
232}
233
234static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
235			 int destroy, struct nlmsghdr *nlh, u32 pid)
236{
237	struct in_ifaddr *promote = NULL;
238	struct in_ifaddr *ifa, *ifa1 = *ifap;
239	struct in_ifaddr *last_prim = in_dev->ifa_list;
240	struct in_ifaddr *prev_prom = NULL;
241	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
242
243	ASSERT_RTNL();
244
245	/* 1. Deleting primary ifaddr forces deletion all secondaries
246	 * unless alias promotion is set
247	 **/
248
249	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
250		struct in_ifaddr **ifap1 = &ifa1->ifa_next;
251
252		while ((ifa = *ifap1) != NULL) {
253			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
254			    ifa1->ifa_scope <= ifa->ifa_scope)
255				last_prim = ifa;
256
257			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
258			    ifa1->ifa_mask != ifa->ifa_mask ||
259			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
260				ifap1 = &ifa->ifa_next;
261				prev_prom = ifa;
262				continue;
263			}
264
265			if (!do_promote) {
266				*ifap1 = ifa->ifa_next;
267
268				rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid);
269				blocking_notifier_call_chain(&inetaddr_chain,
270						NETDEV_DOWN, ifa);
271				inet_free_ifa(ifa);
272			} else {
273				promote = ifa;
274				break;
275			}
276		}
277	}
278
279	/* 2. Unlink it */
280
281	*ifap = ifa1->ifa_next;
282
283	/* 3. Announce address deletion */
284
285	/* Send message first, then call notifier.
286	   At first sight, FIB update triggered by notifier
287	   will refer to already deleted ifaddr, that could confuse
288	   netlink listeners. It is not true: look, gated sees
289	   that route deleted and if it still thinks that ifaddr
290	   is valid, it will try to restore deleted routes... Grr.
291	   So that, this order is correct.
292	 */
293	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid);
294	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
295
296	if (promote) {
297
298		if (prev_prom) {
299			prev_prom->ifa_next = promote->ifa_next;
300			promote->ifa_next = last_prim->ifa_next;
301			last_prim->ifa_next = promote;
302		}
303
304		promote->ifa_flags &= ~IFA_F_SECONDARY;
305		rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
306		blocking_notifier_call_chain(&inetaddr_chain,
307				NETDEV_UP, promote);
308		for (ifa = promote->ifa_next; ifa; ifa = ifa->ifa_next) {
309			if (ifa1->ifa_mask != ifa->ifa_mask ||
310			    !inet_ifa_match(ifa1->ifa_address, ifa))
311					continue;
312			fib_add_ifaddr(ifa);
313		}
314
315	}
316	if (destroy)
317		inet_free_ifa(ifa1);
318}
319
320static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
321			 int destroy)
322{
323	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
324}
325
326static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
327			     u32 pid)
328{
329	struct in_device *in_dev = ifa->ifa_dev;
330	struct in_ifaddr *ifa1, **ifap, **last_primary;
331
332	ASSERT_RTNL();
333
334	if (!ifa->ifa_local) {
335		inet_free_ifa(ifa);
336		return 0;
337	}
338
339	ifa->ifa_flags &= ~IFA_F_SECONDARY;
340	last_primary = &in_dev->ifa_list;
341
342	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
343	     ifap = &ifa1->ifa_next) {
344		if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
345		    ifa->ifa_scope <= ifa1->ifa_scope)
346			last_primary = &ifa1->ifa_next;
347		if (ifa1->ifa_mask == ifa->ifa_mask &&
348		    inet_ifa_match(ifa1->ifa_address, ifa)) {
349			if (ifa1->ifa_local == ifa->ifa_local) {
350				inet_free_ifa(ifa);
351				return -EEXIST;
352			}
353			if (ifa1->ifa_scope != ifa->ifa_scope) {
354				inet_free_ifa(ifa);
355				return -EINVAL;
356			}
357			ifa->ifa_flags |= IFA_F_SECONDARY;
358		}
359	}
360
361	if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
362		net_srandom(ifa->ifa_local);
363		ifap = last_primary;
364	}
365
366	ifa->ifa_next = *ifap;
367	*ifap = ifa;
368
369	/* Send message first, then call notifier.
370	   Notifier will trigger FIB update, so that
371	   listeners of netlink will know about new ifaddr */
372	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid);
373	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
374
375	return 0;
376}
377
378static int inet_insert_ifa(struct in_ifaddr *ifa)
379{
380	return __inet_insert_ifa(ifa, NULL, 0);
381}
382
383static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
384{
385	struct in_device *in_dev = __in_dev_get_rtnl(dev);
386
387	ASSERT_RTNL();
388
389	if (!in_dev) {
390		inet_free_ifa(ifa);
391		return -ENOBUFS;
392	}
393	ipv4_devconf_setall(in_dev);
394	if (ifa->ifa_dev != in_dev) {
395		WARN_ON(ifa->ifa_dev);
396		in_dev_hold(in_dev);
397		ifa->ifa_dev = in_dev;
398	}
399	if (ipv4_is_loopback(ifa->ifa_local))
400		ifa->ifa_scope = RT_SCOPE_HOST;
401	return inet_insert_ifa(ifa);
402}
403
404struct in_device *inetdev_by_index(struct net *net, int ifindex)
405{
406	struct net_device *dev;
407	struct in_device *in_dev = NULL;
408	read_lock(&dev_base_lock);
409	dev = __dev_get_by_index(net, ifindex);
410	if (dev)
411		in_dev = in_dev_get(dev);
412	read_unlock(&dev_base_lock);
413	return in_dev;
414}
415
416/* Called only from RTNL semaphored context. No locks. */
417
418struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
419				    __be32 mask)
420{
421	ASSERT_RTNL();
422
423	for_primary_ifa(in_dev) {
424		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
425			return ifa;
426	} endfor_ifa(in_dev);
427	return NULL;
428}
429
430static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
431{
432	struct net *net = sock_net(skb->sk);
433	struct nlattr *tb[IFA_MAX+1];
434	struct in_device *in_dev;
435	struct ifaddrmsg *ifm;
436	struct in_ifaddr *ifa, **ifap;
437	int err = -EINVAL;
438
439	ASSERT_RTNL();
440
441	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
442	if (err < 0)
443		goto errout;
444
445	ifm = nlmsg_data(nlh);
446	in_dev = inetdev_by_index(net, ifm->ifa_index);
447	if (in_dev == NULL) {
448		err = -ENODEV;
449		goto errout;
450	}
451
452	__in_dev_put(in_dev);
453
454	for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
455	     ifap = &ifa->ifa_next) {
456		if (tb[IFA_LOCAL] &&
457		    ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
458			continue;
459
460		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
461			continue;
462
463		if (tb[IFA_ADDRESS] &&
464		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
465		    !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
466			continue;
467
468		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid);
469		return 0;
470	}
471
472	err = -EADDRNOTAVAIL;
473errout:
474	return err;
475}
476
477static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
478{
479	struct nlattr *tb[IFA_MAX+1];
480	struct in_ifaddr *ifa;
481	struct ifaddrmsg *ifm;
482	struct net_device *dev;
483	struct in_device *in_dev;
484	int err;
485
486	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
487	if (err < 0)
488		goto errout;
489
490	ifm = nlmsg_data(nlh);
491	err = -EINVAL;
492	if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
493		goto errout;
494
495	dev = __dev_get_by_index(net, ifm->ifa_index);
496	err = -ENODEV;
497	if (dev == NULL)
498		goto errout;
499
500	in_dev = __in_dev_get_rtnl(dev);
501	err = -ENOBUFS;
502	if (in_dev == NULL)
503		goto errout;
504
505	ifa = inet_alloc_ifa();
506	if (ifa == NULL)
507		/*
508		 * A potential indev allocation can be left alive, it stays
509		 * assigned to its device and is destroy with it.
510		 */
511		goto errout;
512
513	ipv4_devconf_setall(in_dev);
514	in_dev_hold(in_dev);
515
516	if (tb[IFA_ADDRESS] == NULL)
517		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
518
519	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
520	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
521	ifa->ifa_flags = ifm->ifa_flags;
522	ifa->ifa_scope = ifm->ifa_scope;
523	ifa->ifa_dev = in_dev;
524
525	ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
526	ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
527
528	if (tb[IFA_BROADCAST])
529		ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
530
531	if (tb[IFA_LABEL])
532		nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
533	else
534		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
535
536	return ifa;
537
538errout:
539	return ERR_PTR(err);
540}
541
542static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
543{
544	struct net *net = sock_net(skb->sk);
545	struct in_ifaddr *ifa;
546
547	ASSERT_RTNL();
548
549	ifa = rtm_to_ifaddr(net, nlh);
550	if (IS_ERR(ifa))
551		return PTR_ERR(ifa);
552
553	return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid);
554}
555
556/*
557 *	Determine a default network mask, based on the IP address.
558 */
559
560static __inline__ int inet_abc_len(__be32 addr)
561{
562	int rc = -1;	/* Something else, probably a multicast. */
563
564	if (ipv4_is_zeronet(addr))
565		rc = 0;
566	else {
567		__u32 haddr = ntohl(addr);
568
569		if (IN_CLASSA(haddr))
570			rc = 8;
571		else if (IN_CLASSB(haddr))
572			rc = 16;
573		else if (IN_CLASSC(haddr))
574			rc = 24;
575	}
576
577	return rc;
578}
579
580
581int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
582{
583	struct ifreq ifr;
584	struct sockaddr_in sin_orig;
585	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
586	struct in_device *in_dev;
587	struct in_ifaddr **ifap = NULL;
588	struct in_ifaddr *ifa = NULL;
589	struct net_device *dev;
590	char *colon;
591	int ret = -EFAULT;
592	int tryaddrmatch = 0;
593
594	/*
595	 *	Fetch the caller's info block into kernel space
596	 */
597
598	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
599		goto out;
600	ifr.ifr_name[IFNAMSIZ - 1] = 0;
601
602	/* save original address for comparison */
603	memcpy(&sin_orig, sin, sizeof(*sin));
604
605	colon = strchr(ifr.ifr_name, ':');
606	if (colon)
607		*colon = 0;
608
609	dev_load(net, ifr.ifr_name);
610
611	switch (cmd) {
612	case SIOCGIFADDR:	/* Get interface address */
613	case SIOCGIFBRDADDR:	/* Get the broadcast address */
614	case SIOCGIFDSTADDR:	/* Get the destination address */
615	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
616		/* Note that these ioctls will not sleep,
617		   so that we do not impose a lock.
618		   One day we will be forced to put shlock here (I mean SMP)
619		 */
620		tryaddrmatch = (sin_orig.sin_family == AF_INET);
621		memset(sin, 0, sizeof(*sin));
622		sin->sin_family = AF_INET;
623		break;
624
625	case SIOCSIFFLAGS:
626		ret = -EACCES;
627		if (!capable(CAP_NET_ADMIN))
628			goto out;
629		break;
630	case SIOCSIFADDR:	/* Set interface address (and family) */
631	case SIOCSIFBRDADDR:	/* Set the broadcast address */
632	case SIOCSIFDSTADDR:	/* Set the destination address */
633	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
634		ret = -EACCES;
635		if (!capable(CAP_NET_ADMIN))
636			goto out;
637		ret = -EINVAL;
638		if (sin->sin_family != AF_INET)
639			goto out;
640		break;
641	default:
642		ret = -EINVAL;
643		goto out;
644	}
645
646	rtnl_lock();
647
648	ret = -ENODEV;
649	if ((dev = __dev_get_by_name(net, ifr.ifr_name)) == NULL)
650		goto done;
651
652	if (colon)
653		*colon = ':';
654
655	if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
656		if (tryaddrmatch) {
657			/* Matthias Andree */
658			/* compare label and address (4.4BSD style) */
659			/* note: we only do this for a limited set of ioctls
660			   and only if the original address family was AF_INET.
661			   This is checked above. */
662			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
663			     ifap = &ifa->ifa_next) {
664				if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
665				    sin_orig.sin_addr.s_addr ==
666							ifa->ifa_address) {
667					break; /* found */
668				}
669			}
670		}
671		/* we didn't get a match, maybe the application is
672		   4.3BSD-style and passed in junk so we fall back to
673		   comparing just the label */
674		if (!ifa) {
675			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
676			     ifap = &ifa->ifa_next)
677				if (!strcmp(ifr.ifr_name, ifa->ifa_label))
678					break;
679		}
680	}
681
682	ret = -EADDRNOTAVAIL;
683	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
684		goto done;
685
686	switch (cmd) {
687	case SIOCGIFADDR:	/* Get interface address */
688		sin->sin_addr.s_addr = ifa->ifa_local;
689		goto rarok;
690
691	case SIOCGIFBRDADDR:	/* Get the broadcast address */
692		sin->sin_addr.s_addr = ifa->ifa_broadcast;
693		goto rarok;
694
695	case SIOCGIFDSTADDR:	/* Get the destination address */
696		sin->sin_addr.s_addr = ifa->ifa_address;
697		goto rarok;
698
699	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
700		sin->sin_addr.s_addr = ifa->ifa_mask;
701		goto rarok;
702
703	case SIOCSIFFLAGS:
704		if (colon) {
705			ret = -EADDRNOTAVAIL;
706			if (!ifa)
707				break;
708			ret = 0;
709			if (!(ifr.ifr_flags & IFF_UP))
710				inet_del_ifa(in_dev, ifap, 1);
711			break;
712		}
713		ret = dev_change_flags(dev, ifr.ifr_flags);
714		break;
715
716	case SIOCSIFADDR:	/* Set interface address (and family) */
717		ret = -EINVAL;
718		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
719			break;
720
721		if (!ifa) {
722			ret = -ENOBUFS;
723			if ((ifa = inet_alloc_ifa()) == NULL)
724				break;
725			if (colon)
726				memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
727			else
728				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
729		} else {
730			ret = 0;
731			if (ifa->ifa_local == sin->sin_addr.s_addr)
732				break;
733			inet_del_ifa(in_dev, ifap, 0);
734			ifa->ifa_broadcast = 0;
735			ifa->ifa_scope = 0;
736		}
737
738		ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
739
740		if (!(dev->flags & IFF_POINTOPOINT)) {
741			ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
742			ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
743			if ((dev->flags & IFF_BROADCAST) &&
744			    ifa->ifa_prefixlen < 31)
745				ifa->ifa_broadcast = ifa->ifa_address |
746						     ~ifa->ifa_mask;
747		} else {
748			ifa->ifa_prefixlen = 32;
749			ifa->ifa_mask = inet_make_mask(32);
750		}
751		ret = inet_set_ifa(dev, ifa);
752		break;
753
754	case SIOCSIFBRDADDR:	/* Set the broadcast address */
755		ret = 0;
756		if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
757			inet_del_ifa(in_dev, ifap, 0);
758			ifa->ifa_broadcast = sin->sin_addr.s_addr;
759			inet_insert_ifa(ifa);
760		}
761		break;
762
763	case SIOCSIFDSTADDR:	/* Set the destination address */
764		ret = 0;
765		if (ifa->ifa_address == sin->sin_addr.s_addr)
766			break;
767		ret = -EINVAL;
768		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
769			break;
770		ret = 0;
771		inet_del_ifa(in_dev, ifap, 0);
772		ifa->ifa_address = sin->sin_addr.s_addr;
773		inet_insert_ifa(ifa);
774		break;
775
776	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
777
778		/*
779		 *	The mask we set must be legal.
780		 */
781		ret = -EINVAL;
782		if (bad_mask(sin->sin_addr.s_addr, 0))
783			break;
784		ret = 0;
785		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
786			__be32 old_mask = ifa->ifa_mask;
787			inet_del_ifa(in_dev, ifap, 0);
788			ifa->ifa_mask = sin->sin_addr.s_addr;
789			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
790
791			/* See if current broadcast address matches
792			 * with current netmask, then recalculate
793			 * the broadcast address. Otherwise it's a
794			 * funny address, so don't touch it since
795			 * the user seems to know what (s)he's doing...
796			 */
797			if ((dev->flags & IFF_BROADCAST) &&
798			    (ifa->ifa_prefixlen < 31) &&
799			    (ifa->ifa_broadcast ==
800			     (ifa->ifa_local|~old_mask))) {
801				ifa->ifa_broadcast = (ifa->ifa_local |
802						      ~sin->sin_addr.s_addr);
803			}
804			inet_insert_ifa(ifa);
805		}
806		break;
807	}
808done:
809	rtnl_unlock();
810out:
811	return ret;
812rarok:
813	rtnl_unlock();
814	ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
815	goto out;
816}
817
818static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
819{
820	struct in_device *in_dev = __in_dev_get_rtnl(dev);
821	struct in_ifaddr *ifa;
822	struct ifreq ifr;
823	int done = 0;
824
825	if (!in_dev || (ifa = in_dev->ifa_list) == NULL)
826		goto out;
827
828	for (; ifa; ifa = ifa->ifa_next) {
829		if (!buf) {
830			done += sizeof(ifr);
831			continue;
832		}
833		if (len < (int) sizeof(ifr))
834			break;
835		memset(&ifr, 0, sizeof(struct ifreq));
836		if (ifa->ifa_label)
837			strcpy(ifr.ifr_name, ifa->ifa_label);
838		else
839			strcpy(ifr.ifr_name, dev->name);
840
841		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
842		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
843								ifa->ifa_local;
844
845		if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
846			done = -EFAULT;
847			break;
848		}
849		buf  += sizeof(struct ifreq);
850		len  -= sizeof(struct ifreq);
851		done += sizeof(struct ifreq);
852	}
853out:
854	return done;
855}
856
857__be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
858{
859	__be32 addr = 0;
860	struct in_device *in_dev;
861	struct net *net = dev_net(dev);
862
863	rcu_read_lock();
864	in_dev = __in_dev_get_rcu(dev);
865	if (!in_dev)
866		goto no_in_dev;
867
868	for_primary_ifa(in_dev) {
869		if (ifa->ifa_scope > scope)
870			continue;
871		if (!dst || inet_ifa_match(dst, ifa)) {
872			addr = ifa->ifa_local;
873			break;
874		}
875		if (!addr)
876			addr = ifa->ifa_local;
877	} endfor_ifa(in_dev);
878no_in_dev:
879	rcu_read_unlock();
880
881	if (addr)
882		goto out;
883
884	/* Not loopback addresses on loopback should be preferred
885	   in this case. It is importnat that lo is the first interface
886	   in dev_base list.
887	 */
888	read_lock(&dev_base_lock);
889	rcu_read_lock();
890	for_each_netdev(net, dev) {
891		if ((in_dev = __in_dev_get_rcu(dev)) == NULL)
892			continue;
893
894		for_primary_ifa(in_dev) {
895			if (ifa->ifa_scope != RT_SCOPE_LINK &&
896			    ifa->ifa_scope <= scope) {
897				addr = ifa->ifa_local;
898				goto out_unlock_both;
899			}
900		} endfor_ifa(in_dev);
901	}
902out_unlock_both:
903	read_unlock(&dev_base_lock);
904	rcu_read_unlock();
905out:
906	return addr;
907}
908
909static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
910			      __be32 local, int scope)
911{
912	int same = 0;
913	__be32 addr = 0;
914
915	for_ifa(in_dev) {
916		if (!addr &&
917		    (local == ifa->ifa_local || !local) &&
918		    ifa->ifa_scope <= scope) {
919			addr = ifa->ifa_local;
920			if (same)
921				break;
922		}
923		if (!same) {
924			same = (!local || inet_ifa_match(local, ifa)) &&
925				(!dst || inet_ifa_match(dst, ifa));
926			if (same && addr) {
927				if (local || !dst)
928					break;
929				/* Is the selected addr into dst subnet? */
930				if (inet_ifa_match(addr, ifa))
931					break;
932				/* No, then can we use new local src? */
933				if (ifa->ifa_scope <= scope) {
934					addr = ifa->ifa_local;
935					break;
936				}
937				/* search for large dst subnet for addr */
938				same = 0;
939			}
940		}
941	} endfor_ifa(in_dev);
942
943	return same? addr : 0;
944}
945
946/*
947 * Confirm that local IP address exists using wildcards:
948 * - in_dev: only on this interface, 0=any interface
949 * - dst: only in the same subnet as dst, 0=any dst
950 * - local: address, 0=autoselect the local address
951 * - scope: maximum allowed scope value for the local address
952 */
953__be32 inet_confirm_addr(struct in_device *in_dev,
954			 __be32 dst, __be32 local, int scope)
955{
956	__be32 addr = 0;
957	struct net_device *dev;
958	struct net *net;
959
960	if (scope != RT_SCOPE_LINK)
961		return confirm_addr_indev(in_dev, dst, local, scope);
962
963	net = dev_net(in_dev->dev);
964	read_lock(&dev_base_lock);
965	rcu_read_lock();
966	for_each_netdev(net, dev) {
967		if ((in_dev = __in_dev_get_rcu(dev))) {
968			addr = confirm_addr_indev(in_dev, dst, local, scope);
969			if (addr)
970				break;
971		}
972	}
973	rcu_read_unlock();
974	read_unlock(&dev_base_lock);
975
976	return addr;
977}
978
979/*
980 *	Device notifier
981 */
982
983int register_inetaddr_notifier(struct notifier_block *nb)
984{
985	return blocking_notifier_chain_register(&inetaddr_chain, nb);
986}
987
988int unregister_inetaddr_notifier(struct notifier_block *nb)
989{
990	return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
991}
992
993/* Rename ifa_labels for a device name change. Make some effort to preserve existing
994 * alias numbering and to create unique labels if possible.
995*/
996static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
997{
998	struct in_ifaddr *ifa;
999	int named = 0;
1000
1001	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1002		char old[IFNAMSIZ], *dot;
1003
1004		memcpy(old, ifa->ifa_label, IFNAMSIZ);
1005		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1006		if (named++ == 0)
1007			goto skip;
1008		dot = strchr(old, ':');
1009		if (dot == NULL) {
1010			sprintf(old, ":%d", named);
1011			dot = old;
1012		}
1013		if (strlen(dot) + strlen(dev->name) < IFNAMSIZ) {
1014			strcat(ifa->ifa_label, dot);
1015		} else {
1016			strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1017		}
1018skip:
1019		rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1020	}
1021}
1022
1023static inline bool inetdev_valid_mtu(unsigned mtu)
1024{
1025	return mtu >= 68;
1026}
1027
1028/* Called only under RTNL semaphore */
1029
1030static int inetdev_event(struct notifier_block *this, unsigned long event,
1031			 void *ptr)
1032{
1033	struct net_device *dev = ptr;
1034	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1035
1036	ASSERT_RTNL();
1037
1038	if (!in_dev) {
1039		if (event == NETDEV_REGISTER) {
1040			in_dev = inetdev_init(dev);
1041			if (!in_dev)
1042				return notifier_from_errno(-ENOMEM);
1043			if (dev->flags & IFF_LOOPBACK) {
1044				IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1045				IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1046			}
1047		} else if (event == NETDEV_CHANGEMTU) {
1048			/* Re-enabling IP */
1049			if (inetdev_valid_mtu(dev->mtu))
1050				in_dev = inetdev_init(dev);
1051		}
1052		goto out;
1053	}
1054
1055	switch (event) {
1056	case NETDEV_REGISTER:
1057		printk(KERN_DEBUG "inetdev_event: bug\n");
1058		dev->ip_ptr = NULL;
1059		break;
1060	case NETDEV_UP:
1061		if (!inetdev_valid_mtu(dev->mtu))
1062			break;
1063		if (dev->flags & IFF_LOOPBACK) {
1064			struct in_ifaddr *ifa;
1065			if ((ifa = inet_alloc_ifa()) != NULL) {
1066				ifa->ifa_local =
1067				  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1068				ifa->ifa_prefixlen = 8;
1069				ifa->ifa_mask = inet_make_mask(8);
1070				in_dev_hold(in_dev);
1071				ifa->ifa_dev = in_dev;
1072				ifa->ifa_scope = RT_SCOPE_HOST;
1073				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1074				inet_insert_ifa(ifa);
1075			}
1076		}
1077		ip_mc_up(in_dev);
1078		break;
1079	case NETDEV_DOWN:
1080		ip_mc_down(in_dev);
1081		break;
1082	case NETDEV_CHANGEMTU:
1083		if (inetdev_valid_mtu(dev->mtu))
1084			break;
1085		/* disable IP when MTU is not enough */
1086	case NETDEV_UNREGISTER:
1087		inetdev_destroy(in_dev);
1088		break;
1089	case NETDEV_CHANGENAME:
1090		/* Do not notify about label change, this event is
1091		 * not interesting to applications using netlink.
1092		 */
1093		inetdev_changename(dev, in_dev);
1094
1095		devinet_sysctl_unregister(in_dev);
1096		devinet_sysctl_register(in_dev);
1097		break;
1098	}
1099out:
1100	return NOTIFY_DONE;
1101}
1102
1103static struct notifier_block ip_netdev_notifier = {
1104	.notifier_call = inetdev_event,
1105};
1106
1107static inline size_t inet_nlmsg_size(void)
1108{
1109	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1110	       + nla_total_size(4) /* IFA_ADDRESS */
1111	       + nla_total_size(4) /* IFA_LOCAL */
1112	       + nla_total_size(4) /* IFA_BROADCAST */
1113	       + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1114}
1115
1116static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1117			    u32 pid, u32 seq, int event, unsigned int flags)
1118{
1119	struct ifaddrmsg *ifm;
1120	struct nlmsghdr  *nlh;
1121
1122	nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags);
1123	if (nlh == NULL)
1124		return -EMSGSIZE;
1125
1126	ifm = nlmsg_data(nlh);
1127	ifm->ifa_family = AF_INET;
1128	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1129	ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
1130	ifm->ifa_scope = ifa->ifa_scope;
1131	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1132
1133	if (ifa->ifa_address)
1134		NLA_PUT_BE32(skb, IFA_ADDRESS, ifa->ifa_address);
1135
1136	if (ifa->ifa_local)
1137		NLA_PUT_BE32(skb, IFA_LOCAL, ifa->ifa_local);
1138
1139	if (ifa->ifa_broadcast)
1140		NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast);
1141
1142	if (ifa->ifa_label[0])
1143		NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label);
1144
1145	return nlmsg_end(skb, nlh);
1146
1147nla_put_failure:
1148	nlmsg_cancel(skb, nlh);
1149	return -EMSGSIZE;
1150}
1151
1152static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1153{
1154	struct net *net = sock_net(skb->sk);
1155	int idx, ip_idx;
1156	struct net_device *dev;
1157	struct in_device *in_dev;
1158	struct in_ifaddr *ifa;
1159	int s_ip_idx, s_idx = cb->args[0];
1160
1161	s_ip_idx = ip_idx = cb->args[1];
1162	idx = 0;
1163	for_each_netdev(net, dev) {
1164		if (idx < s_idx)
1165			goto cont;
1166		if (idx > s_idx)
1167			s_ip_idx = 0;
1168		if ((in_dev = __in_dev_get_rtnl(dev)) == NULL)
1169			goto cont;
1170
1171		for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1172		     ifa = ifa->ifa_next, ip_idx++) {
1173			if (ip_idx < s_ip_idx)
1174				continue;
1175			if (inet_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid,
1176					     cb->nlh->nlmsg_seq,
1177					     RTM_NEWADDR, NLM_F_MULTI) <= 0)
1178				goto done;
1179		}
1180cont:
1181		idx++;
1182	}
1183
1184done:
1185	cb->args[0] = idx;
1186	cb->args[1] = ip_idx;
1187
1188	return skb->len;
1189}
1190
1191static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1192		      u32 pid)
1193{
1194	struct sk_buff *skb;
1195	u32 seq = nlh ? nlh->nlmsg_seq : 0;
1196	int err = -ENOBUFS;
1197	struct net *net;
1198
1199	net = dev_net(ifa->ifa_dev->dev);
1200	skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1201	if (skb == NULL)
1202		goto errout;
1203
1204	err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0);
1205	if (err < 0) {
1206		/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1207		WARN_ON(err == -EMSGSIZE);
1208		kfree_skb(skb);
1209		goto errout;
1210	}
1211	err = rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1212errout:
1213	if (err < 0)
1214		rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1215}
1216
1217#ifdef CONFIG_SYSCTL
1218
1219static void devinet_copy_dflt_conf(struct net *net, int i)
1220{
1221	struct net_device *dev;
1222
1223	read_lock(&dev_base_lock);
1224	for_each_netdev(net, dev) {
1225		struct in_device *in_dev;
1226		rcu_read_lock();
1227		in_dev = __in_dev_get_rcu(dev);
1228		if (in_dev && !test_bit(i, in_dev->cnf.state))
1229			in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1230		rcu_read_unlock();
1231	}
1232	read_unlock(&dev_base_lock);
1233}
1234
1235static void inet_forward_change(struct net *net)
1236{
1237	struct net_device *dev;
1238	int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1239
1240	IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1241	IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1242
1243	read_lock(&dev_base_lock);
1244	for_each_netdev(net, dev) {
1245		struct in_device *in_dev;
1246		if (on)
1247			dev_disable_lro(dev);
1248		rcu_read_lock();
1249		in_dev = __in_dev_get_rcu(dev);
1250		if (in_dev)
1251			IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1252		rcu_read_unlock();
1253	}
1254	read_unlock(&dev_base_lock);
1255}
1256
1257static int devinet_conf_proc(ctl_table *ctl, int write,
1258			     struct file *filp, void __user *buffer,
1259			     size_t *lenp, loff_t *ppos)
1260{
1261	int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1262
1263	if (write) {
1264		struct ipv4_devconf *cnf = ctl->extra1;
1265		struct net *net = ctl->extra2;
1266		int i = (int *)ctl->data - cnf->data;
1267
1268		set_bit(i, cnf->state);
1269
1270		if (cnf == net->ipv4.devconf_dflt)
1271			devinet_copy_dflt_conf(net, i);
1272	}
1273
1274	return ret;
1275}
1276
1277static int devinet_conf_sysctl(ctl_table *table,
1278			       void __user *oldval, size_t __user *oldlenp,
1279			       void __user *newval, size_t newlen)
1280{
1281	struct ipv4_devconf *cnf;
1282	struct net *net;
1283	int *valp = table->data;
1284	int new;
1285	int i;
1286
1287	if (!newval || !newlen)
1288		return 0;
1289
1290	if (newlen != sizeof(int))
1291		return -EINVAL;
1292
1293	if (get_user(new, (int __user *)newval))
1294		return -EFAULT;
1295
1296	if (new == *valp)
1297		return 0;
1298
1299	if (oldval && oldlenp) {
1300		size_t len;
1301
1302		if (get_user(len, oldlenp))
1303			return -EFAULT;
1304
1305		if (len) {
1306			if (len > table->maxlen)
1307				len = table->maxlen;
1308			if (copy_to_user(oldval, valp, len))
1309				return -EFAULT;
1310			if (put_user(len, oldlenp))
1311				return -EFAULT;
1312		}
1313	}
1314
1315	*valp = new;
1316
1317	cnf = table->extra1;
1318	net = table->extra2;
1319	i = (int *)table->data - cnf->data;
1320
1321	set_bit(i, cnf->state);
1322
1323	if (cnf == net->ipv4.devconf_dflt)
1324		devinet_copy_dflt_conf(net, i);
1325
1326	return 1;
1327}
1328
1329static int devinet_sysctl_forward(ctl_table *ctl, int write,
1330				  struct file *filp, void __user *buffer,
1331				  size_t *lenp, loff_t *ppos)
1332{
1333	int *valp = ctl->data;
1334	int val = *valp;
1335	int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1336
1337	if (write && *valp != val) {
1338		struct net *net = ctl->extra2;
1339
1340		if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
1341			rtnl_lock();
1342			if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
1343				inet_forward_change(net);
1344			} else if (*valp) {
1345				struct ipv4_devconf *cnf = ctl->extra1;
1346				struct in_device *idev =
1347					container_of(cnf, struct in_device, cnf);
1348				dev_disable_lro(idev->dev);
1349			}
1350			rtnl_unlock();
1351			rt_cache_flush(net, 0);
1352		}
1353	}
1354
1355	return ret;
1356}
1357
1358int ipv4_doint_and_flush(ctl_table *ctl, int write,
1359			 struct file *filp, void __user *buffer,
1360			 size_t *lenp, loff_t *ppos)
1361{
1362	int *valp = ctl->data;
1363	int val = *valp;
1364	int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1365	struct net *net = ctl->extra2;
1366
1367	if (write && *valp != val)
1368		rt_cache_flush(net, 0);
1369
1370	return ret;
1371}
1372
1373int ipv4_doint_and_flush_strategy(ctl_table *table,
1374				  void __user *oldval, size_t __user *oldlenp,
1375				  void __user *newval, size_t newlen)
1376{
1377	int ret = devinet_conf_sysctl(table, oldval, oldlenp, newval, newlen);
1378	struct net *net = table->extra2;
1379
1380	if (ret == 1)
1381		rt_cache_flush(net, 0);
1382
1383	return ret;
1384}
1385
1386
1387#define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc, sysctl) \
1388	{ \
1389		.ctl_name	= NET_IPV4_CONF_ ## attr, \
1390		.procname	= name, \
1391		.data		= ipv4_devconf.data + \
1392				  NET_IPV4_CONF_ ## attr - 1, \
1393		.maxlen		= sizeof(int), \
1394		.mode		= mval, \
1395		.proc_handler	= proc, \
1396		.strategy	= sysctl, \
1397		.extra1		= &ipv4_devconf, \
1398	}
1399
1400#define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
1401	DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc, \
1402			     devinet_conf_sysctl)
1403
1404#define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
1405	DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc, \
1406			     devinet_conf_sysctl)
1407
1408#define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc, sysctl) \
1409	DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc, sysctl)
1410
1411#define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
1412	DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush, \
1413				     ipv4_doint_and_flush_strategy)
1414
1415static struct devinet_sysctl_table {
1416	struct ctl_table_header *sysctl_header;
1417	struct ctl_table devinet_vars[__NET_IPV4_CONF_MAX];
1418	char *dev_name;
1419} devinet_sysctl = {
1420	.devinet_vars = {
1421		DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
1422					     devinet_sysctl_forward,
1423					     devinet_conf_sysctl),
1424		DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
1425
1426		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
1427		DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
1428		DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
1429		DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
1430		DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
1431		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
1432					"accept_source_route"),
1433		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
1434		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
1435		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
1436		DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
1437		DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
1438		DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
1439		DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
1440		DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
1441		DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
1442
1443		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
1444		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
1445		DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
1446					      "force_igmp_version"),
1447		DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
1448					      "promote_secondaries"),
1449	},
1450};
1451
1452static int __devinet_sysctl_register(struct net *net, char *dev_name,
1453		int ctl_name, struct ipv4_devconf *p)
1454{
1455	int i;
1456	struct devinet_sysctl_table *t;
1457
1458#define DEVINET_CTL_PATH_DEV	3
1459
1460	struct ctl_path devinet_ctl_path[] = {
1461		{ .procname = "net", .ctl_name = CTL_NET, },
1462		{ .procname = "ipv4", .ctl_name = NET_IPV4, },
1463		{ .procname = "conf", .ctl_name = NET_IPV4_CONF, },
1464		{ /* to be set */ },
1465		{ },
1466	};
1467
1468	t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
1469	if (!t)
1470		goto out;
1471
1472	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
1473		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
1474		t->devinet_vars[i].extra1 = p;
1475		t->devinet_vars[i].extra2 = net;
1476	}
1477
1478	/*
1479	 * Make a copy of dev_name, because '.procname' is regarded as const
1480	 * by sysctl and we wouldn't want anyone to change it under our feet
1481	 * (see SIOCSIFNAME).
1482	 */
1483	t->dev_name = kstrdup(dev_name, GFP_KERNEL);
1484	if (!t->dev_name)
1485		goto free;
1486
1487	devinet_ctl_path[DEVINET_CTL_PATH_DEV].procname = t->dev_name;
1488	devinet_ctl_path[DEVINET_CTL_PATH_DEV].ctl_name = ctl_name;
1489
1490	t->sysctl_header = register_net_sysctl_table(net, devinet_ctl_path,
1491			t->devinet_vars);
1492	if (!t->sysctl_header)
1493		goto free_procname;
1494
1495	p->sysctl = t;
1496	return 0;
1497
1498free_procname:
1499	kfree(t->dev_name);
1500free:
1501	kfree(t);
1502out:
1503	return -ENOBUFS;
1504}
1505
1506static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
1507{
1508	struct devinet_sysctl_table *t = cnf->sysctl;
1509
1510	if (t == NULL)
1511		return;
1512
1513	cnf->sysctl = NULL;
1514	unregister_sysctl_table(t->sysctl_header);
1515	kfree(t->dev_name);
1516	kfree(t);
1517}
1518
1519static void devinet_sysctl_register(struct in_device *idev)
1520{
1521	neigh_sysctl_register(idev->dev, idev->arp_parms, NET_IPV4,
1522			NET_IPV4_NEIGH, "ipv4", NULL, NULL);
1523	__devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
1524			idev->dev->ifindex, &idev->cnf);
1525}
1526
1527static void devinet_sysctl_unregister(struct in_device *idev)
1528{
1529	__devinet_sysctl_unregister(&idev->cnf);
1530	neigh_sysctl_unregister(idev->arp_parms);
1531}
1532
1533static struct ctl_table ctl_forward_entry[] = {
1534	{
1535		.ctl_name	= NET_IPV4_FORWARD,
1536		.procname	= "ip_forward",
1537		.data		= &ipv4_devconf.data[
1538					NET_IPV4_CONF_FORWARDING - 1],
1539		.maxlen		= sizeof(int),
1540		.mode		= 0644,
1541		.proc_handler	= devinet_sysctl_forward,
1542		.strategy	= devinet_conf_sysctl,
1543		.extra1		= &ipv4_devconf,
1544		.extra2		= &init_net,
1545	},
1546	{ },
1547};
1548
1549static __net_initdata struct ctl_path net_ipv4_path[] = {
1550	{ .procname = "net", .ctl_name = CTL_NET, },
1551	{ .procname = "ipv4", .ctl_name = NET_IPV4, },
1552	{ },
1553};
1554#endif
1555
1556static __net_init int devinet_init_net(struct net *net)
1557{
1558	int err;
1559	struct ipv4_devconf *all, *dflt;
1560#ifdef CONFIG_SYSCTL
1561	struct ctl_table *tbl = ctl_forward_entry;
1562	struct ctl_table_header *forw_hdr;
1563#endif
1564
1565	err = -ENOMEM;
1566	all = &ipv4_devconf;
1567	dflt = &ipv4_devconf_dflt;
1568
1569	if (net != &init_net) {
1570		all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
1571		if (all == NULL)
1572			goto err_alloc_all;
1573
1574		dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
1575		if (dflt == NULL)
1576			goto err_alloc_dflt;
1577
1578#ifdef CONFIG_SYSCTL
1579		tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
1580		if (tbl == NULL)
1581			goto err_alloc_ctl;
1582
1583		tbl[0].data = &all->data[NET_IPV4_CONF_FORWARDING - 1];
1584		tbl[0].extra1 = all;
1585		tbl[0].extra2 = net;
1586#endif
1587	}
1588
1589#ifdef CONFIG_SYSCTL
1590	err = __devinet_sysctl_register(net, "all",
1591			NET_PROTO_CONF_ALL, all);
1592	if (err < 0)
1593		goto err_reg_all;
1594
1595	err = __devinet_sysctl_register(net, "default",
1596			NET_PROTO_CONF_DEFAULT, dflt);
1597	if (err < 0)
1598		goto err_reg_dflt;
1599
1600	err = -ENOMEM;
1601	forw_hdr = register_net_sysctl_table(net, net_ipv4_path, tbl);
1602	if (forw_hdr == NULL)
1603		goto err_reg_ctl;
1604	net->ipv4.forw_hdr = forw_hdr;
1605#endif
1606
1607	net->ipv4.devconf_all = all;
1608	net->ipv4.devconf_dflt = dflt;
1609	return 0;
1610
1611#ifdef CONFIG_SYSCTL
1612err_reg_ctl:
1613	__devinet_sysctl_unregister(dflt);
1614err_reg_dflt:
1615	__devinet_sysctl_unregister(all);
1616err_reg_all:
1617	if (tbl != ctl_forward_entry)
1618		kfree(tbl);
1619err_alloc_ctl:
1620#endif
1621	if (dflt != &ipv4_devconf_dflt)
1622		kfree(dflt);
1623err_alloc_dflt:
1624	if (all != &ipv4_devconf)
1625		kfree(all);
1626err_alloc_all:
1627	return err;
1628}
1629
1630static __net_exit void devinet_exit_net(struct net *net)
1631{
1632#ifdef CONFIG_SYSCTL
1633	struct ctl_table *tbl;
1634
1635	tbl = net->ipv4.forw_hdr->ctl_table_arg;
1636	unregister_net_sysctl_table(net->ipv4.forw_hdr);
1637	__devinet_sysctl_unregister(net->ipv4.devconf_dflt);
1638	__devinet_sysctl_unregister(net->ipv4.devconf_all);
1639	kfree(tbl);
1640#endif
1641	kfree(net->ipv4.devconf_dflt);
1642	kfree(net->ipv4.devconf_all);
1643}
1644
1645static __net_initdata struct pernet_operations devinet_ops = {
1646	.init = devinet_init_net,
1647	.exit = devinet_exit_net,
1648};
1649
1650void __init devinet_init(void)
1651{
1652	register_pernet_subsys(&devinet_ops);
1653
1654	register_gifconf(PF_INET, inet_gifconf);
1655	register_netdevice_notifier(&ip_netdev_notifier);
1656
1657	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL);
1658	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL);
1659	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr);
1660}
1661
1662EXPORT_SYMBOL(in_dev_finish_destroy);
1663EXPORT_SYMBOL(inet_select_addr);
1664EXPORT_SYMBOL(inetdev_by_index);
1665EXPORT_SYMBOL(register_inetaddr_notifier);
1666EXPORT_SYMBOL(unregister_inetaddr_notifier);
1667