devinet.c revision f221e726bf4e082a05dcd573379ac859bfba7126
1/*
2 *	NET3	IP device support routines.
3 *
4 *		This program is free software; you can redistribute it and/or
5 *		modify it under the terms of the GNU General Public License
6 *		as published by the Free Software Foundation; either version
7 *		2 of the License, or (at your option) any later version.
8 *
9 *	Derived from the IP parts of dev.c 1.0.19
10 * 		Authors:	Ross Biro
11 *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 *				Mark Evans, <evansmp@uhura.aston.ac.uk>
13 *
14 *	Additional Authors:
15 *		Alan Cox, <gw4pts@gw4pts.ampr.org>
16 *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17 *
18 *	Changes:
19 *		Alexey Kuznetsov:	pa_* fields are replaced with ifaddr
20 *					lists.
21 *		Cyrus Durgin:		updated for kmod
22 *		Matthias Andree:	in devinet_ioctl, compare label and
23 *					address (4.4BSD alias style support),
24 *					fall back to comparing just the label
25 *					if no match found.
26 */
27
28
29#include <asm/uaccess.h>
30#include <asm/system.h>
31#include <linux/bitops.h>
32#include <linux/capability.h>
33#include <linux/module.h>
34#include <linux/types.h>
35#include <linux/kernel.h>
36#include <linux/string.h>
37#include <linux/mm.h>
38#include <linux/socket.h>
39#include <linux/sockios.h>
40#include <linux/in.h>
41#include <linux/errno.h>
42#include <linux/interrupt.h>
43#include <linux/if_addr.h>
44#include <linux/if_ether.h>
45#include <linux/inet.h>
46#include <linux/netdevice.h>
47#include <linux/etherdevice.h>
48#include <linux/skbuff.h>
49#include <linux/init.h>
50#include <linux/notifier.h>
51#include <linux/inetdevice.h>
52#include <linux/igmp.h>
53#ifdef CONFIG_SYSCTL
54#include <linux/sysctl.h>
55#endif
56#include <linux/kmod.h>
57
58#include <net/arp.h>
59#include <net/ip.h>
60#include <net/route.h>
61#include <net/ip_fib.h>
62#include <net/rtnetlink.h>
63#include <net/net_namespace.h>
64
65static struct ipv4_devconf ipv4_devconf = {
66	.data = {
67		[NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1,
68		[NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1,
69		[NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1,
70		[NET_IPV4_CONF_SHARED_MEDIA - 1] = 1,
71	},
72};
73
74static struct ipv4_devconf ipv4_devconf_dflt = {
75	.data = {
76		[NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1,
77		[NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1,
78		[NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1,
79		[NET_IPV4_CONF_SHARED_MEDIA - 1] = 1,
80		[NET_IPV4_CONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
81	},
82};
83
84#define IPV4_DEVCONF_DFLT(net, attr) \
85	IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
86
87static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
88	[IFA_LOCAL]     	= { .type = NLA_U32 },
89	[IFA_ADDRESS]   	= { .type = NLA_U32 },
90	[IFA_BROADCAST] 	= { .type = NLA_U32 },
91	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
92};
93
94static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
95
96static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
97static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
98			 int destroy);
99#ifdef CONFIG_SYSCTL
100static void devinet_sysctl_register(struct in_device *idev);
101static void devinet_sysctl_unregister(struct in_device *idev);
102#else
103static inline void devinet_sysctl_register(struct in_device *idev)
104{
105}
106static inline void devinet_sysctl_unregister(struct in_device *idev)
107{
108}
109#endif
110
111/* Locks all the inet devices. */
112
113static struct in_ifaddr *inet_alloc_ifa(void)
114{
115	struct in_ifaddr *ifa = kzalloc(sizeof(*ifa), GFP_KERNEL);
116
117	if (ifa) {
118		INIT_RCU_HEAD(&ifa->rcu_head);
119	}
120
121	return ifa;
122}
123
124static void inet_rcu_free_ifa(struct rcu_head *head)
125{
126	struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
127	if (ifa->ifa_dev)
128		in_dev_put(ifa->ifa_dev);
129	kfree(ifa);
130}
131
132static inline void inet_free_ifa(struct in_ifaddr *ifa)
133{
134	call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
135}
136
137void in_dev_finish_destroy(struct in_device *idev)
138{
139	struct net_device *dev = idev->dev;
140
141	WARN_ON(idev->ifa_list);
142	WARN_ON(idev->mc_list);
143#ifdef NET_REFCNT_DEBUG
144	printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n",
145	       idev, dev ? dev->name : "NIL");
146#endif
147	dev_put(dev);
148	if (!idev->dead)
149		printk("Freeing alive in_device %p\n", idev);
150	else {
151		kfree(idev);
152	}
153}
154
155static struct in_device *inetdev_init(struct net_device *dev)
156{
157	struct in_device *in_dev;
158
159	ASSERT_RTNL();
160
161	in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
162	if (!in_dev)
163		goto out;
164	INIT_RCU_HEAD(&in_dev->rcu_head);
165	memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
166			sizeof(in_dev->cnf));
167	in_dev->cnf.sysctl = NULL;
168	in_dev->dev = dev;
169	if ((in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl)) == NULL)
170		goto out_kfree;
171	if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
172		dev_disable_lro(dev);
173	/* Reference in_dev->dev */
174	dev_hold(dev);
175	/* Account for reference dev->ip_ptr (below) */
176	in_dev_hold(in_dev);
177
178	devinet_sysctl_register(in_dev);
179	ip_mc_init_dev(in_dev);
180	if (dev->flags & IFF_UP)
181		ip_mc_up(in_dev);
182
183	/* we can receive as soon as ip_ptr is set -- do this last */
184	rcu_assign_pointer(dev->ip_ptr, in_dev);
185out:
186	return in_dev;
187out_kfree:
188	kfree(in_dev);
189	in_dev = NULL;
190	goto out;
191}
192
193static void in_dev_rcu_put(struct rcu_head *head)
194{
195	struct in_device *idev = container_of(head, struct in_device, rcu_head);
196	in_dev_put(idev);
197}
198
199static void inetdev_destroy(struct in_device *in_dev)
200{
201	struct in_ifaddr *ifa;
202	struct net_device *dev;
203
204	ASSERT_RTNL();
205
206	dev = in_dev->dev;
207
208	in_dev->dead = 1;
209
210	ip_mc_destroy_dev(in_dev);
211
212	while ((ifa = in_dev->ifa_list) != NULL) {
213		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
214		inet_free_ifa(ifa);
215	}
216
217	dev->ip_ptr = NULL;
218
219	devinet_sysctl_unregister(in_dev);
220	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
221	arp_ifdown(dev);
222
223	call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
224}
225
226int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
227{
228	rcu_read_lock();
229	for_primary_ifa(in_dev) {
230		if (inet_ifa_match(a, ifa)) {
231			if (!b || inet_ifa_match(b, ifa)) {
232				rcu_read_unlock();
233				return 1;
234			}
235		}
236	} endfor_ifa(in_dev);
237	rcu_read_unlock();
238	return 0;
239}
240
241static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
242			 int destroy, struct nlmsghdr *nlh, u32 pid)
243{
244	struct in_ifaddr *promote = NULL;
245	struct in_ifaddr *ifa, *ifa1 = *ifap;
246	struct in_ifaddr *last_prim = in_dev->ifa_list;
247	struct in_ifaddr *prev_prom = NULL;
248	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
249
250	ASSERT_RTNL();
251
252	/* 1. Deleting primary ifaddr forces deletion all secondaries
253	 * unless alias promotion is set
254	 **/
255
256	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
257		struct in_ifaddr **ifap1 = &ifa1->ifa_next;
258
259		while ((ifa = *ifap1) != NULL) {
260			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
261			    ifa1->ifa_scope <= ifa->ifa_scope)
262				last_prim = ifa;
263
264			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
265			    ifa1->ifa_mask != ifa->ifa_mask ||
266			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
267				ifap1 = &ifa->ifa_next;
268				prev_prom = ifa;
269				continue;
270			}
271
272			if (!do_promote) {
273				*ifap1 = ifa->ifa_next;
274
275				rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid);
276				blocking_notifier_call_chain(&inetaddr_chain,
277						NETDEV_DOWN, ifa);
278				inet_free_ifa(ifa);
279			} else {
280				promote = ifa;
281				break;
282			}
283		}
284	}
285
286	/* 2. Unlink it */
287
288	*ifap = ifa1->ifa_next;
289
290	/* 3. Announce address deletion */
291
292	/* Send message first, then call notifier.
293	   At first sight, FIB update triggered by notifier
294	   will refer to already deleted ifaddr, that could confuse
295	   netlink listeners. It is not true: look, gated sees
296	   that route deleted and if it still thinks that ifaddr
297	   is valid, it will try to restore deleted routes... Grr.
298	   So that, this order is correct.
299	 */
300	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid);
301	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
302
303	if (promote) {
304
305		if (prev_prom) {
306			prev_prom->ifa_next = promote->ifa_next;
307			promote->ifa_next = last_prim->ifa_next;
308			last_prim->ifa_next = promote;
309		}
310
311		promote->ifa_flags &= ~IFA_F_SECONDARY;
312		rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
313		blocking_notifier_call_chain(&inetaddr_chain,
314				NETDEV_UP, promote);
315		for (ifa = promote->ifa_next; ifa; ifa = ifa->ifa_next) {
316			if (ifa1->ifa_mask != ifa->ifa_mask ||
317			    !inet_ifa_match(ifa1->ifa_address, ifa))
318					continue;
319			fib_add_ifaddr(ifa);
320		}
321
322	}
323	if (destroy)
324		inet_free_ifa(ifa1);
325}
326
327static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
328			 int destroy)
329{
330	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
331}
332
333static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
334			     u32 pid)
335{
336	struct in_device *in_dev = ifa->ifa_dev;
337	struct in_ifaddr *ifa1, **ifap, **last_primary;
338
339	ASSERT_RTNL();
340
341	if (!ifa->ifa_local) {
342		inet_free_ifa(ifa);
343		return 0;
344	}
345
346	ifa->ifa_flags &= ~IFA_F_SECONDARY;
347	last_primary = &in_dev->ifa_list;
348
349	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
350	     ifap = &ifa1->ifa_next) {
351		if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
352		    ifa->ifa_scope <= ifa1->ifa_scope)
353			last_primary = &ifa1->ifa_next;
354		if (ifa1->ifa_mask == ifa->ifa_mask &&
355		    inet_ifa_match(ifa1->ifa_address, ifa)) {
356			if (ifa1->ifa_local == ifa->ifa_local) {
357				inet_free_ifa(ifa);
358				return -EEXIST;
359			}
360			if (ifa1->ifa_scope != ifa->ifa_scope) {
361				inet_free_ifa(ifa);
362				return -EINVAL;
363			}
364			ifa->ifa_flags |= IFA_F_SECONDARY;
365		}
366	}
367
368	if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
369		net_srandom(ifa->ifa_local);
370		ifap = last_primary;
371	}
372
373	ifa->ifa_next = *ifap;
374	*ifap = ifa;
375
376	/* Send message first, then call notifier.
377	   Notifier will trigger FIB update, so that
378	   listeners of netlink will know about new ifaddr */
379	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid);
380	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
381
382	return 0;
383}
384
385static int inet_insert_ifa(struct in_ifaddr *ifa)
386{
387	return __inet_insert_ifa(ifa, NULL, 0);
388}
389
390static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
391{
392	struct in_device *in_dev = __in_dev_get_rtnl(dev);
393
394	ASSERT_RTNL();
395
396	if (!in_dev) {
397		inet_free_ifa(ifa);
398		return -ENOBUFS;
399	}
400	ipv4_devconf_setall(in_dev);
401	if (ifa->ifa_dev != in_dev) {
402		WARN_ON(ifa->ifa_dev);
403		in_dev_hold(in_dev);
404		ifa->ifa_dev = in_dev;
405	}
406	if (ipv4_is_loopback(ifa->ifa_local))
407		ifa->ifa_scope = RT_SCOPE_HOST;
408	return inet_insert_ifa(ifa);
409}
410
411struct in_device *inetdev_by_index(struct net *net, int ifindex)
412{
413	struct net_device *dev;
414	struct in_device *in_dev = NULL;
415	read_lock(&dev_base_lock);
416	dev = __dev_get_by_index(net, ifindex);
417	if (dev)
418		in_dev = in_dev_get(dev);
419	read_unlock(&dev_base_lock);
420	return in_dev;
421}
422
423/* Called only from RTNL semaphored context. No locks. */
424
425struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
426				    __be32 mask)
427{
428	ASSERT_RTNL();
429
430	for_primary_ifa(in_dev) {
431		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
432			return ifa;
433	} endfor_ifa(in_dev);
434	return NULL;
435}
436
437static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
438{
439	struct net *net = sock_net(skb->sk);
440	struct nlattr *tb[IFA_MAX+1];
441	struct in_device *in_dev;
442	struct ifaddrmsg *ifm;
443	struct in_ifaddr *ifa, **ifap;
444	int err = -EINVAL;
445
446	ASSERT_RTNL();
447
448	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
449	if (err < 0)
450		goto errout;
451
452	ifm = nlmsg_data(nlh);
453	in_dev = inetdev_by_index(net, ifm->ifa_index);
454	if (in_dev == NULL) {
455		err = -ENODEV;
456		goto errout;
457	}
458
459	__in_dev_put(in_dev);
460
461	for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
462	     ifap = &ifa->ifa_next) {
463		if (tb[IFA_LOCAL] &&
464		    ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
465			continue;
466
467		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
468			continue;
469
470		if (tb[IFA_ADDRESS] &&
471		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
472		    !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
473			continue;
474
475		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid);
476		return 0;
477	}
478
479	err = -EADDRNOTAVAIL;
480errout:
481	return err;
482}
483
484static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
485{
486	struct nlattr *tb[IFA_MAX+1];
487	struct in_ifaddr *ifa;
488	struct ifaddrmsg *ifm;
489	struct net_device *dev;
490	struct in_device *in_dev;
491	int err;
492
493	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
494	if (err < 0)
495		goto errout;
496
497	ifm = nlmsg_data(nlh);
498	err = -EINVAL;
499	if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
500		goto errout;
501
502	dev = __dev_get_by_index(net, ifm->ifa_index);
503	err = -ENODEV;
504	if (dev == NULL)
505		goto errout;
506
507	in_dev = __in_dev_get_rtnl(dev);
508	err = -ENOBUFS;
509	if (in_dev == NULL)
510		goto errout;
511
512	ifa = inet_alloc_ifa();
513	if (ifa == NULL)
514		/*
515		 * A potential indev allocation can be left alive, it stays
516		 * assigned to its device and is destroy with it.
517		 */
518		goto errout;
519
520	ipv4_devconf_setall(in_dev);
521	in_dev_hold(in_dev);
522
523	if (tb[IFA_ADDRESS] == NULL)
524		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
525
526	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
527	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
528	ifa->ifa_flags = ifm->ifa_flags;
529	ifa->ifa_scope = ifm->ifa_scope;
530	ifa->ifa_dev = in_dev;
531
532	ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
533	ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
534
535	if (tb[IFA_BROADCAST])
536		ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
537
538	if (tb[IFA_LABEL])
539		nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
540	else
541		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
542
543	return ifa;
544
545errout:
546	return ERR_PTR(err);
547}
548
549static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
550{
551	struct net *net = sock_net(skb->sk);
552	struct in_ifaddr *ifa;
553
554	ASSERT_RTNL();
555
556	ifa = rtm_to_ifaddr(net, nlh);
557	if (IS_ERR(ifa))
558		return PTR_ERR(ifa);
559
560	return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid);
561}
562
563/*
564 *	Determine a default network mask, based on the IP address.
565 */
566
567static __inline__ int inet_abc_len(__be32 addr)
568{
569	int rc = -1;	/* Something else, probably a multicast. */
570
571	if (ipv4_is_zeronet(addr))
572		rc = 0;
573	else {
574		__u32 haddr = ntohl(addr);
575
576		if (IN_CLASSA(haddr))
577			rc = 8;
578		else if (IN_CLASSB(haddr))
579			rc = 16;
580		else if (IN_CLASSC(haddr))
581			rc = 24;
582	}
583
584	return rc;
585}
586
587
588int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
589{
590	struct ifreq ifr;
591	struct sockaddr_in sin_orig;
592	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
593	struct in_device *in_dev;
594	struct in_ifaddr **ifap = NULL;
595	struct in_ifaddr *ifa = NULL;
596	struct net_device *dev;
597	char *colon;
598	int ret = -EFAULT;
599	int tryaddrmatch = 0;
600
601	/*
602	 *	Fetch the caller's info block into kernel space
603	 */
604
605	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
606		goto out;
607	ifr.ifr_name[IFNAMSIZ - 1] = 0;
608
609	/* save original address for comparison */
610	memcpy(&sin_orig, sin, sizeof(*sin));
611
612	colon = strchr(ifr.ifr_name, ':');
613	if (colon)
614		*colon = 0;
615
616#ifdef CONFIG_KMOD
617	dev_load(net, ifr.ifr_name);
618#endif
619
620	switch (cmd) {
621	case SIOCGIFADDR:	/* Get interface address */
622	case SIOCGIFBRDADDR:	/* Get the broadcast address */
623	case SIOCGIFDSTADDR:	/* Get the destination address */
624	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
625		/* Note that these ioctls will not sleep,
626		   so that we do not impose a lock.
627		   One day we will be forced to put shlock here (I mean SMP)
628		 */
629		tryaddrmatch = (sin_orig.sin_family == AF_INET);
630		memset(sin, 0, sizeof(*sin));
631		sin->sin_family = AF_INET;
632		break;
633
634	case SIOCSIFFLAGS:
635		ret = -EACCES;
636		if (!capable(CAP_NET_ADMIN))
637			goto out;
638		break;
639	case SIOCSIFADDR:	/* Set interface address (and family) */
640	case SIOCSIFBRDADDR:	/* Set the broadcast address */
641	case SIOCSIFDSTADDR:	/* Set the destination address */
642	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
643		ret = -EACCES;
644		if (!capable(CAP_NET_ADMIN))
645			goto out;
646		ret = -EINVAL;
647		if (sin->sin_family != AF_INET)
648			goto out;
649		break;
650	default:
651		ret = -EINVAL;
652		goto out;
653	}
654
655	rtnl_lock();
656
657	ret = -ENODEV;
658	if ((dev = __dev_get_by_name(net, ifr.ifr_name)) == NULL)
659		goto done;
660
661	if (colon)
662		*colon = ':';
663
664	if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
665		if (tryaddrmatch) {
666			/* Matthias Andree */
667			/* compare label and address (4.4BSD style) */
668			/* note: we only do this for a limited set of ioctls
669			   and only if the original address family was AF_INET.
670			   This is checked above. */
671			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
672			     ifap = &ifa->ifa_next) {
673				if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
674				    sin_orig.sin_addr.s_addr ==
675							ifa->ifa_address) {
676					break; /* found */
677				}
678			}
679		}
680		/* we didn't get a match, maybe the application is
681		   4.3BSD-style and passed in junk so we fall back to
682		   comparing just the label */
683		if (!ifa) {
684			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
685			     ifap = &ifa->ifa_next)
686				if (!strcmp(ifr.ifr_name, ifa->ifa_label))
687					break;
688		}
689	}
690
691	ret = -EADDRNOTAVAIL;
692	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
693		goto done;
694
695	switch (cmd) {
696	case SIOCGIFADDR:	/* Get interface address */
697		sin->sin_addr.s_addr = ifa->ifa_local;
698		goto rarok;
699
700	case SIOCGIFBRDADDR:	/* Get the broadcast address */
701		sin->sin_addr.s_addr = ifa->ifa_broadcast;
702		goto rarok;
703
704	case SIOCGIFDSTADDR:	/* Get the destination address */
705		sin->sin_addr.s_addr = ifa->ifa_address;
706		goto rarok;
707
708	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
709		sin->sin_addr.s_addr = ifa->ifa_mask;
710		goto rarok;
711
712	case SIOCSIFFLAGS:
713		if (colon) {
714			ret = -EADDRNOTAVAIL;
715			if (!ifa)
716				break;
717			ret = 0;
718			if (!(ifr.ifr_flags & IFF_UP))
719				inet_del_ifa(in_dev, ifap, 1);
720			break;
721		}
722		ret = dev_change_flags(dev, ifr.ifr_flags);
723		break;
724
725	case SIOCSIFADDR:	/* Set interface address (and family) */
726		ret = -EINVAL;
727		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
728			break;
729
730		if (!ifa) {
731			ret = -ENOBUFS;
732			if ((ifa = inet_alloc_ifa()) == NULL)
733				break;
734			if (colon)
735				memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
736			else
737				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
738		} else {
739			ret = 0;
740			if (ifa->ifa_local == sin->sin_addr.s_addr)
741				break;
742			inet_del_ifa(in_dev, ifap, 0);
743			ifa->ifa_broadcast = 0;
744			ifa->ifa_scope = 0;
745		}
746
747		ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
748
749		if (!(dev->flags & IFF_POINTOPOINT)) {
750			ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
751			ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
752			if ((dev->flags & IFF_BROADCAST) &&
753			    ifa->ifa_prefixlen < 31)
754				ifa->ifa_broadcast = ifa->ifa_address |
755						     ~ifa->ifa_mask;
756		} else {
757			ifa->ifa_prefixlen = 32;
758			ifa->ifa_mask = inet_make_mask(32);
759		}
760		ret = inet_set_ifa(dev, ifa);
761		break;
762
763	case SIOCSIFBRDADDR:	/* Set the broadcast address */
764		ret = 0;
765		if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
766			inet_del_ifa(in_dev, ifap, 0);
767			ifa->ifa_broadcast = sin->sin_addr.s_addr;
768			inet_insert_ifa(ifa);
769		}
770		break;
771
772	case SIOCSIFDSTADDR:	/* Set the destination address */
773		ret = 0;
774		if (ifa->ifa_address == sin->sin_addr.s_addr)
775			break;
776		ret = -EINVAL;
777		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
778			break;
779		ret = 0;
780		inet_del_ifa(in_dev, ifap, 0);
781		ifa->ifa_address = sin->sin_addr.s_addr;
782		inet_insert_ifa(ifa);
783		break;
784
785	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
786
787		/*
788		 *	The mask we set must be legal.
789		 */
790		ret = -EINVAL;
791		if (bad_mask(sin->sin_addr.s_addr, 0))
792			break;
793		ret = 0;
794		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
795			__be32 old_mask = ifa->ifa_mask;
796			inet_del_ifa(in_dev, ifap, 0);
797			ifa->ifa_mask = sin->sin_addr.s_addr;
798			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
799
800			/* See if current broadcast address matches
801			 * with current netmask, then recalculate
802			 * the broadcast address. Otherwise it's a
803			 * funny address, so don't touch it since
804			 * the user seems to know what (s)he's doing...
805			 */
806			if ((dev->flags & IFF_BROADCAST) &&
807			    (ifa->ifa_prefixlen < 31) &&
808			    (ifa->ifa_broadcast ==
809			     (ifa->ifa_local|~old_mask))) {
810				ifa->ifa_broadcast = (ifa->ifa_local |
811						      ~sin->sin_addr.s_addr);
812			}
813			inet_insert_ifa(ifa);
814		}
815		break;
816	}
817done:
818	rtnl_unlock();
819out:
820	return ret;
821rarok:
822	rtnl_unlock();
823	ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
824	goto out;
825}
826
827static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
828{
829	struct in_device *in_dev = __in_dev_get_rtnl(dev);
830	struct in_ifaddr *ifa;
831	struct ifreq ifr;
832	int done = 0;
833
834	if (!in_dev || (ifa = in_dev->ifa_list) == NULL)
835		goto out;
836
837	for (; ifa; ifa = ifa->ifa_next) {
838		if (!buf) {
839			done += sizeof(ifr);
840			continue;
841		}
842		if (len < (int) sizeof(ifr))
843			break;
844		memset(&ifr, 0, sizeof(struct ifreq));
845		if (ifa->ifa_label)
846			strcpy(ifr.ifr_name, ifa->ifa_label);
847		else
848			strcpy(ifr.ifr_name, dev->name);
849
850		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
851		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
852								ifa->ifa_local;
853
854		if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
855			done = -EFAULT;
856			break;
857		}
858		buf  += sizeof(struct ifreq);
859		len  -= sizeof(struct ifreq);
860		done += sizeof(struct ifreq);
861	}
862out:
863	return done;
864}
865
866__be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
867{
868	__be32 addr = 0;
869	struct in_device *in_dev;
870	struct net *net = dev_net(dev);
871
872	rcu_read_lock();
873	in_dev = __in_dev_get_rcu(dev);
874	if (!in_dev)
875		goto no_in_dev;
876
877	for_primary_ifa(in_dev) {
878		if (ifa->ifa_scope > scope)
879			continue;
880		if (!dst || inet_ifa_match(dst, ifa)) {
881			addr = ifa->ifa_local;
882			break;
883		}
884		if (!addr)
885			addr = ifa->ifa_local;
886	} endfor_ifa(in_dev);
887no_in_dev:
888	rcu_read_unlock();
889
890	if (addr)
891		goto out;
892
893	/* Not loopback addresses on loopback should be preferred
894	   in this case. It is importnat that lo is the first interface
895	   in dev_base list.
896	 */
897	read_lock(&dev_base_lock);
898	rcu_read_lock();
899	for_each_netdev(net, dev) {
900		if ((in_dev = __in_dev_get_rcu(dev)) == NULL)
901			continue;
902
903		for_primary_ifa(in_dev) {
904			if (ifa->ifa_scope != RT_SCOPE_LINK &&
905			    ifa->ifa_scope <= scope) {
906				addr = ifa->ifa_local;
907				goto out_unlock_both;
908			}
909		} endfor_ifa(in_dev);
910	}
911out_unlock_both:
912	read_unlock(&dev_base_lock);
913	rcu_read_unlock();
914out:
915	return addr;
916}
917
918static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
919			      __be32 local, int scope)
920{
921	int same = 0;
922	__be32 addr = 0;
923
924	for_ifa(in_dev) {
925		if (!addr &&
926		    (local == ifa->ifa_local || !local) &&
927		    ifa->ifa_scope <= scope) {
928			addr = ifa->ifa_local;
929			if (same)
930				break;
931		}
932		if (!same) {
933			same = (!local || inet_ifa_match(local, ifa)) &&
934				(!dst || inet_ifa_match(dst, ifa));
935			if (same && addr) {
936				if (local || !dst)
937					break;
938				/* Is the selected addr into dst subnet? */
939				if (inet_ifa_match(addr, ifa))
940					break;
941				/* No, then can we use new local src? */
942				if (ifa->ifa_scope <= scope) {
943					addr = ifa->ifa_local;
944					break;
945				}
946				/* search for large dst subnet for addr */
947				same = 0;
948			}
949		}
950	} endfor_ifa(in_dev);
951
952	return same? addr : 0;
953}
954
955/*
956 * Confirm that local IP address exists using wildcards:
957 * - in_dev: only on this interface, 0=any interface
958 * - dst: only in the same subnet as dst, 0=any dst
959 * - local: address, 0=autoselect the local address
960 * - scope: maximum allowed scope value for the local address
961 */
962__be32 inet_confirm_addr(struct in_device *in_dev,
963			 __be32 dst, __be32 local, int scope)
964{
965	__be32 addr = 0;
966	struct net_device *dev;
967	struct net *net;
968
969	if (scope != RT_SCOPE_LINK)
970		return confirm_addr_indev(in_dev, dst, local, scope);
971
972	net = dev_net(in_dev->dev);
973	read_lock(&dev_base_lock);
974	rcu_read_lock();
975	for_each_netdev(net, dev) {
976		if ((in_dev = __in_dev_get_rcu(dev))) {
977			addr = confirm_addr_indev(in_dev, dst, local, scope);
978			if (addr)
979				break;
980		}
981	}
982	rcu_read_unlock();
983	read_unlock(&dev_base_lock);
984
985	return addr;
986}
987
988/*
989 *	Device notifier
990 */
991
992int register_inetaddr_notifier(struct notifier_block *nb)
993{
994	return blocking_notifier_chain_register(&inetaddr_chain, nb);
995}
996
997int unregister_inetaddr_notifier(struct notifier_block *nb)
998{
999	return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1000}
1001
1002/* Rename ifa_labels for a device name change. Make some effort to preserve existing
1003 * alias numbering and to create unique labels if possible.
1004*/
1005static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1006{
1007	struct in_ifaddr *ifa;
1008	int named = 0;
1009
1010	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1011		char old[IFNAMSIZ], *dot;
1012
1013		memcpy(old, ifa->ifa_label, IFNAMSIZ);
1014		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1015		if (named++ == 0)
1016			goto skip;
1017		dot = strchr(old, ':');
1018		if (dot == NULL) {
1019			sprintf(old, ":%d", named);
1020			dot = old;
1021		}
1022		if (strlen(dot) + strlen(dev->name) < IFNAMSIZ) {
1023			strcat(ifa->ifa_label, dot);
1024		} else {
1025			strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1026		}
1027skip:
1028		rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1029	}
1030}
1031
1032static inline bool inetdev_valid_mtu(unsigned mtu)
1033{
1034	return mtu >= 68;
1035}
1036
1037/* Called only under RTNL semaphore */
1038
1039static int inetdev_event(struct notifier_block *this, unsigned long event,
1040			 void *ptr)
1041{
1042	struct net_device *dev = ptr;
1043	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1044
1045	ASSERT_RTNL();
1046
1047	if (!in_dev) {
1048		if (event == NETDEV_REGISTER) {
1049			in_dev = inetdev_init(dev);
1050			if (!in_dev)
1051				return notifier_from_errno(-ENOMEM);
1052			if (dev->flags & IFF_LOOPBACK) {
1053				IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1054				IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1055			}
1056		} else if (event == NETDEV_CHANGEMTU) {
1057			/* Re-enabling IP */
1058			if (inetdev_valid_mtu(dev->mtu))
1059				in_dev = inetdev_init(dev);
1060		}
1061		goto out;
1062	}
1063
1064	switch (event) {
1065	case NETDEV_REGISTER:
1066		printk(KERN_DEBUG "inetdev_event: bug\n");
1067		dev->ip_ptr = NULL;
1068		break;
1069	case NETDEV_UP:
1070		if (!inetdev_valid_mtu(dev->mtu))
1071			break;
1072		if (dev->flags & IFF_LOOPBACK) {
1073			struct in_ifaddr *ifa;
1074			if ((ifa = inet_alloc_ifa()) != NULL) {
1075				ifa->ifa_local =
1076				  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1077				ifa->ifa_prefixlen = 8;
1078				ifa->ifa_mask = inet_make_mask(8);
1079				in_dev_hold(in_dev);
1080				ifa->ifa_dev = in_dev;
1081				ifa->ifa_scope = RT_SCOPE_HOST;
1082				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1083				inet_insert_ifa(ifa);
1084			}
1085		}
1086		ip_mc_up(in_dev);
1087		break;
1088	case NETDEV_DOWN:
1089		ip_mc_down(in_dev);
1090		break;
1091	case NETDEV_CHANGEMTU:
1092		if (inetdev_valid_mtu(dev->mtu))
1093			break;
1094		/* disable IP when MTU is not enough */
1095	case NETDEV_UNREGISTER:
1096		inetdev_destroy(in_dev);
1097		break;
1098	case NETDEV_CHANGENAME:
1099		/* Do not notify about label change, this event is
1100		 * not interesting to applications using netlink.
1101		 */
1102		inetdev_changename(dev, in_dev);
1103
1104		devinet_sysctl_unregister(in_dev);
1105		devinet_sysctl_register(in_dev);
1106		break;
1107	}
1108out:
1109	return NOTIFY_DONE;
1110}
1111
1112static struct notifier_block ip_netdev_notifier = {
1113	.notifier_call =inetdev_event,
1114};
1115
1116static inline size_t inet_nlmsg_size(void)
1117{
1118	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1119	       + nla_total_size(4) /* IFA_ADDRESS */
1120	       + nla_total_size(4) /* IFA_LOCAL */
1121	       + nla_total_size(4) /* IFA_BROADCAST */
1122	       + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1123}
1124
1125static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1126			    u32 pid, u32 seq, int event, unsigned int flags)
1127{
1128	struct ifaddrmsg *ifm;
1129	struct nlmsghdr  *nlh;
1130
1131	nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags);
1132	if (nlh == NULL)
1133		return -EMSGSIZE;
1134
1135	ifm = nlmsg_data(nlh);
1136	ifm->ifa_family = AF_INET;
1137	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1138	ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
1139	ifm->ifa_scope = ifa->ifa_scope;
1140	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1141
1142	if (ifa->ifa_address)
1143		NLA_PUT_BE32(skb, IFA_ADDRESS, ifa->ifa_address);
1144
1145	if (ifa->ifa_local)
1146		NLA_PUT_BE32(skb, IFA_LOCAL, ifa->ifa_local);
1147
1148	if (ifa->ifa_broadcast)
1149		NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast);
1150
1151	if (ifa->ifa_label[0])
1152		NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label);
1153
1154	return nlmsg_end(skb, nlh);
1155
1156nla_put_failure:
1157	nlmsg_cancel(skb, nlh);
1158	return -EMSGSIZE;
1159}
1160
1161static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1162{
1163	struct net *net = sock_net(skb->sk);
1164	int idx, ip_idx;
1165	struct net_device *dev;
1166	struct in_device *in_dev;
1167	struct in_ifaddr *ifa;
1168	int s_ip_idx, s_idx = cb->args[0];
1169
1170	s_ip_idx = ip_idx = cb->args[1];
1171	idx = 0;
1172	for_each_netdev(net, dev) {
1173		if (idx < s_idx)
1174			goto cont;
1175		if (idx > s_idx)
1176			s_ip_idx = 0;
1177		if ((in_dev = __in_dev_get_rtnl(dev)) == NULL)
1178			goto cont;
1179
1180		for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1181		     ifa = ifa->ifa_next, ip_idx++) {
1182			if (ip_idx < s_ip_idx)
1183				continue;
1184			if (inet_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid,
1185					     cb->nlh->nlmsg_seq,
1186					     RTM_NEWADDR, NLM_F_MULTI) <= 0)
1187				goto done;
1188		}
1189cont:
1190		idx++;
1191	}
1192
1193done:
1194	cb->args[0] = idx;
1195	cb->args[1] = ip_idx;
1196
1197	return skb->len;
1198}
1199
1200static void rtmsg_ifa(int event, struct in_ifaddr* ifa, struct nlmsghdr *nlh,
1201		      u32 pid)
1202{
1203	struct sk_buff *skb;
1204	u32 seq = nlh ? nlh->nlmsg_seq : 0;
1205	int err = -ENOBUFS;
1206	struct net *net;
1207
1208	net = dev_net(ifa->ifa_dev->dev);
1209	skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1210	if (skb == NULL)
1211		goto errout;
1212
1213	err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0);
1214	if (err < 0) {
1215		/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1216		WARN_ON(err == -EMSGSIZE);
1217		kfree_skb(skb);
1218		goto errout;
1219	}
1220	err = rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1221errout:
1222	if (err < 0)
1223		rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1224}
1225
1226#ifdef CONFIG_SYSCTL
1227
1228static void devinet_copy_dflt_conf(struct net *net, int i)
1229{
1230	struct net_device *dev;
1231
1232	read_lock(&dev_base_lock);
1233	for_each_netdev(net, dev) {
1234		struct in_device *in_dev;
1235		rcu_read_lock();
1236		in_dev = __in_dev_get_rcu(dev);
1237		if (in_dev && !test_bit(i, in_dev->cnf.state))
1238			in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1239		rcu_read_unlock();
1240	}
1241	read_unlock(&dev_base_lock);
1242}
1243
1244static void inet_forward_change(struct net *net)
1245{
1246	struct net_device *dev;
1247	int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1248
1249	IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1250	IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1251
1252	read_lock(&dev_base_lock);
1253	for_each_netdev(net, dev) {
1254		struct in_device *in_dev;
1255		if (on)
1256			dev_disable_lro(dev);
1257		rcu_read_lock();
1258		in_dev = __in_dev_get_rcu(dev);
1259		if (in_dev)
1260			IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1261		rcu_read_unlock();
1262	}
1263	read_unlock(&dev_base_lock);
1264}
1265
1266static int devinet_conf_proc(ctl_table *ctl, int write,
1267			     struct file* filp, void __user *buffer,
1268			     size_t *lenp, loff_t *ppos)
1269{
1270	int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1271
1272	if (write) {
1273		struct ipv4_devconf *cnf = ctl->extra1;
1274		struct net *net = ctl->extra2;
1275		int i = (int *)ctl->data - cnf->data;
1276
1277		set_bit(i, cnf->state);
1278
1279		if (cnf == net->ipv4.devconf_dflt)
1280			devinet_copy_dflt_conf(net, i);
1281	}
1282
1283	return ret;
1284}
1285
1286static int devinet_conf_sysctl(ctl_table *table,
1287			       void __user *oldval, size_t __user *oldlenp,
1288			       void __user *newval, size_t newlen)
1289{
1290	struct ipv4_devconf *cnf;
1291	struct net *net;
1292	int *valp = table->data;
1293	int new;
1294	int i;
1295
1296	if (!newval || !newlen)
1297		return 0;
1298
1299	if (newlen != sizeof(int))
1300		return -EINVAL;
1301
1302	if (get_user(new, (int __user *)newval))
1303		return -EFAULT;
1304
1305	if (new == *valp)
1306		return 0;
1307
1308	if (oldval && oldlenp) {
1309		size_t len;
1310
1311		if (get_user(len, oldlenp))
1312			return -EFAULT;
1313
1314		if (len) {
1315			if (len > table->maxlen)
1316				len = table->maxlen;
1317			if (copy_to_user(oldval, valp, len))
1318				return -EFAULT;
1319			if (put_user(len, oldlenp))
1320				return -EFAULT;
1321		}
1322	}
1323
1324	*valp = new;
1325
1326	cnf = table->extra1;
1327	net = table->extra2;
1328	i = (int *)table->data - cnf->data;
1329
1330	set_bit(i, cnf->state);
1331
1332	if (cnf == net->ipv4.devconf_dflt)
1333		devinet_copy_dflt_conf(net, i);
1334
1335	return 1;
1336}
1337
1338static int devinet_sysctl_forward(ctl_table *ctl, int write,
1339				  struct file* filp, void __user *buffer,
1340				  size_t *lenp, loff_t *ppos)
1341{
1342	int *valp = ctl->data;
1343	int val = *valp;
1344	int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1345
1346	if (write && *valp != val) {
1347		struct net *net = ctl->extra2;
1348
1349		if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
1350			rtnl_lock();
1351			if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
1352				inet_forward_change(net);
1353			} else if (*valp) {
1354				struct ipv4_devconf *cnf = ctl->extra1;
1355				struct in_device *idev =
1356					container_of(cnf, struct in_device, cnf);
1357				dev_disable_lro(idev->dev);
1358			}
1359			rtnl_unlock();
1360			rt_cache_flush(net, 0);
1361		}
1362	}
1363
1364	return ret;
1365}
1366
1367int ipv4_doint_and_flush(ctl_table *ctl, int write,
1368			 struct file* filp, void __user *buffer,
1369			 size_t *lenp, loff_t *ppos)
1370{
1371	int *valp = ctl->data;
1372	int val = *valp;
1373	int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1374	struct net *net = ctl->extra2;
1375
1376	if (write && *valp != val)
1377		rt_cache_flush(net, 0);
1378
1379	return ret;
1380}
1381
1382int ipv4_doint_and_flush_strategy(ctl_table *table,
1383				  void __user *oldval, size_t __user *oldlenp,
1384				  void __user *newval, size_t newlen)
1385{
1386	int ret = devinet_conf_sysctl(table, oldval, oldlenp, newval, newlen);
1387	struct net *net = table->extra2;
1388
1389	if (ret == 1)
1390		rt_cache_flush(net, 0);
1391
1392	return ret;
1393}
1394
1395
1396#define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc, sysctl) \
1397	{ \
1398		.ctl_name	= NET_IPV4_CONF_ ## attr, \
1399		.procname	= name, \
1400		.data		= ipv4_devconf.data + \
1401				  NET_IPV4_CONF_ ## attr - 1, \
1402		.maxlen		= sizeof(int), \
1403		.mode		= mval, \
1404		.proc_handler	= proc, \
1405		.strategy	= sysctl, \
1406		.extra1		= &ipv4_devconf, \
1407	}
1408
1409#define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
1410	DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc, \
1411			     devinet_conf_sysctl)
1412
1413#define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
1414	DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc, \
1415			     devinet_conf_sysctl)
1416
1417#define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc, sysctl) \
1418	DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc, sysctl)
1419
1420#define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
1421	DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush, \
1422				     ipv4_doint_and_flush_strategy)
1423
1424static struct devinet_sysctl_table {
1425	struct ctl_table_header *sysctl_header;
1426	struct ctl_table devinet_vars[__NET_IPV4_CONF_MAX];
1427	char *dev_name;
1428} devinet_sysctl = {
1429	.devinet_vars = {
1430		DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
1431					     devinet_sysctl_forward,
1432					     devinet_conf_sysctl),
1433		DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
1434
1435		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
1436		DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
1437		DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
1438		DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
1439		DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
1440		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
1441					"accept_source_route"),
1442		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
1443		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
1444		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
1445		DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
1446		DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
1447		DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
1448		DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
1449		DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
1450		DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
1451
1452		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
1453		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
1454		DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
1455					      "force_igmp_version"),
1456		DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
1457					      "promote_secondaries"),
1458	},
1459};
1460
1461static int __devinet_sysctl_register(struct net *net, char *dev_name,
1462		int ctl_name, struct ipv4_devconf *p)
1463{
1464	int i;
1465	struct devinet_sysctl_table *t;
1466
1467#define DEVINET_CTL_PATH_DEV	3
1468
1469	struct ctl_path devinet_ctl_path[] = {
1470		{ .procname = "net", .ctl_name = CTL_NET, },
1471		{ .procname = "ipv4", .ctl_name = NET_IPV4, },
1472		{ .procname = "conf", .ctl_name = NET_IPV4_CONF, },
1473		{ /* to be set */ },
1474		{ },
1475	};
1476
1477	t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
1478	if (!t)
1479		goto out;
1480
1481	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
1482		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
1483		t->devinet_vars[i].extra1 = p;
1484		t->devinet_vars[i].extra2 = net;
1485	}
1486
1487	/*
1488	 * Make a copy of dev_name, because '.procname' is regarded as const
1489	 * by sysctl and we wouldn't want anyone to change it under our feet
1490	 * (see SIOCSIFNAME).
1491	 */
1492	t->dev_name = kstrdup(dev_name, GFP_KERNEL);
1493	if (!t->dev_name)
1494		goto free;
1495
1496	devinet_ctl_path[DEVINET_CTL_PATH_DEV].procname = t->dev_name;
1497	devinet_ctl_path[DEVINET_CTL_PATH_DEV].ctl_name = ctl_name;
1498
1499	t->sysctl_header = register_net_sysctl_table(net, devinet_ctl_path,
1500			t->devinet_vars);
1501	if (!t->sysctl_header)
1502		goto free_procname;
1503
1504	p->sysctl = t;
1505	return 0;
1506
1507free_procname:
1508	kfree(t->dev_name);
1509free:
1510	kfree(t);
1511out:
1512	return -ENOBUFS;
1513}
1514
1515static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
1516{
1517	struct devinet_sysctl_table *t = cnf->sysctl;
1518
1519	if (t == NULL)
1520		return;
1521
1522	cnf->sysctl = NULL;
1523	unregister_sysctl_table(t->sysctl_header);
1524	kfree(t->dev_name);
1525	kfree(t);
1526}
1527
1528static void devinet_sysctl_register(struct in_device *idev)
1529{
1530	neigh_sysctl_register(idev->dev, idev->arp_parms, NET_IPV4,
1531			NET_IPV4_NEIGH, "ipv4", NULL, NULL);
1532	__devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
1533			idev->dev->ifindex, &idev->cnf);
1534}
1535
1536static void devinet_sysctl_unregister(struct in_device *idev)
1537{
1538	__devinet_sysctl_unregister(&idev->cnf);
1539	neigh_sysctl_unregister(idev->arp_parms);
1540}
1541
1542static struct ctl_table ctl_forward_entry[] = {
1543	{
1544		.ctl_name	= NET_IPV4_FORWARD,
1545		.procname	= "ip_forward",
1546		.data		= &ipv4_devconf.data[
1547					NET_IPV4_CONF_FORWARDING - 1],
1548		.maxlen		= sizeof(int),
1549		.mode		= 0644,
1550		.proc_handler	= devinet_sysctl_forward,
1551		.strategy	= devinet_conf_sysctl,
1552		.extra1		= &ipv4_devconf,
1553		.extra2		= &init_net,
1554	},
1555	{ },
1556};
1557
1558static __net_initdata struct ctl_path net_ipv4_path[] = {
1559	{ .procname = "net", .ctl_name = CTL_NET, },
1560	{ .procname = "ipv4", .ctl_name = NET_IPV4, },
1561	{ },
1562};
1563#endif
1564
1565static __net_init int devinet_init_net(struct net *net)
1566{
1567	int err;
1568	struct ipv4_devconf *all, *dflt;
1569#ifdef CONFIG_SYSCTL
1570	struct ctl_table *tbl = ctl_forward_entry;
1571	struct ctl_table_header *forw_hdr;
1572#endif
1573
1574	err = -ENOMEM;
1575	all = &ipv4_devconf;
1576	dflt = &ipv4_devconf_dflt;
1577
1578	if (net != &init_net) {
1579		all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
1580		if (all == NULL)
1581			goto err_alloc_all;
1582
1583		dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
1584		if (dflt == NULL)
1585			goto err_alloc_dflt;
1586
1587#ifdef CONFIG_SYSCTL
1588		tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
1589		if (tbl == NULL)
1590			goto err_alloc_ctl;
1591
1592		tbl[0].data = &all->data[NET_IPV4_CONF_FORWARDING - 1];
1593		tbl[0].extra1 = all;
1594		tbl[0].extra2 = net;
1595#endif
1596	}
1597
1598#ifdef CONFIG_SYSCTL
1599	err = __devinet_sysctl_register(net, "all",
1600			NET_PROTO_CONF_ALL, all);
1601	if (err < 0)
1602		goto err_reg_all;
1603
1604	err = __devinet_sysctl_register(net, "default",
1605			NET_PROTO_CONF_DEFAULT, dflt);
1606	if (err < 0)
1607		goto err_reg_dflt;
1608
1609	err = -ENOMEM;
1610	forw_hdr = register_net_sysctl_table(net, net_ipv4_path, tbl);
1611	if (forw_hdr == NULL)
1612		goto err_reg_ctl;
1613	net->ipv4.forw_hdr = forw_hdr;
1614#endif
1615
1616	net->ipv4.devconf_all = all;
1617	net->ipv4.devconf_dflt = dflt;
1618	return 0;
1619
1620#ifdef CONFIG_SYSCTL
1621err_reg_ctl:
1622	__devinet_sysctl_unregister(dflt);
1623err_reg_dflt:
1624	__devinet_sysctl_unregister(all);
1625err_reg_all:
1626	if (tbl != ctl_forward_entry)
1627		kfree(tbl);
1628err_alloc_ctl:
1629#endif
1630	if (dflt != &ipv4_devconf_dflt)
1631		kfree(dflt);
1632err_alloc_dflt:
1633	if (all != &ipv4_devconf)
1634		kfree(all);
1635err_alloc_all:
1636	return err;
1637}
1638
1639static __net_exit void devinet_exit_net(struct net *net)
1640{
1641#ifdef CONFIG_SYSCTL
1642	struct ctl_table *tbl;
1643
1644	tbl = net->ipv4.forw_hdr->ctl_table_arg;
1645	unregister_net_sysctl_table(net->ipv4.forw_hdr);
1646	__devinet_sysctl_unregister(net->ipv4.devconf_dflt);
1647	__devinet_sysctl_unregister(net->ipv4.devconf_all);
1648	kfree(tbl);
1649#endif
1650	kfree(net->ipv4.devconf_dflt);
1651	kfree(net->ipv4.devconf_all);
1652}
1653
1654static __net_initdata struct pernet_operations devinet_ops = {
1655	.init = devinet_init_net,
1656	.exit = devinet_exit_net,
1657};
1658
1659void __init devinet_init(void)
1660{
1661	register_pernet_subsys(&devinet_ops);
1662
1663	register_gifconf(PF_INET, inet_gifconf);
1664	register_netdevice_notifier(&ip_netdev_notifier);
1665
1666	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL);
1667	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL);
1668	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr);
1669}
1670
1671EXPORT_SYMBOL(in_dev_finish_destroy);
1672EXPORT_SYMBOL(inet_select_addr);
1673EXPORT_SYMBOL(inetdev_by_index);
1674EXPORT_SYMBOL(register_inetaddr_notifier);
1675EXPORT_SYMBOL(unregister_inetaddr_notifier);
1676