devinet.c revision 9b8adb5ea005fe73acd5dd58f9bd47eafa74c9d1
1/*
2 *	NET3	IP device support routines.
3 *
4 *		This program is free software; you can redistribute it and/or
5 *		modify it under the terms of the GNU General Public License
6 *		as published by the Free Software Foundation; either version
7 *		2 of the License, or (at your option) any later version.
8 *
9 *	Derived from the IP parts of dev.c 1.0.19
10 * 		Authors:	Ross Biro
11 *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 *				Mark Evans, <evansmp@uhura.aston.ac.uk>
13 *
14 *	Additional Authors:
15 *		Alan Cox, <gw4pts@gw4pts.ampr.org>
16 *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17 *
18 *	Changes:
19 *		Alexey Kuznetsov:	pa_* fields are replaced with ifaddr
20 *					lists.
21 *		Cyrus Durgin:		updated for kmod
22 *		Matthias Andree:	in devinet_ioctl, compare label and
23 *					address (4.4BSD alias style support),
24 *					fall back to comparing just the label
25 *					if no match found.
26 */
27
28
29#include <asm/uaccess.h>
30#include <asm/system.h>
31#include <linux/bitops.h>
32#include <linux/capability.h>
33#include <linux/module.h>
34#include <linux/types.h>
35#include <linux/kernel.h>
36#include <linux/string.h>
37#include <linux/mm.h>
38#include <linux/socket.h>
39#include <linux/sockios.h>
40#include <linux/in.h>
41#include <linux/errno.h>
42#include <linux/interrupt.h>
43#include <linux/if_addr.h>
44#include <linux/if_ether.h>
45#include <linux/inet.h>
46#include <linux/netdevice.h>
47#include <linux/etherdevice.h>
48#include <linux/skbuff.h>
49#include <linux/init.h>
50#include <linux/notifier.h>
51#include <linux/inetdevice.h>
52#include <linux/igmp.h>
53#ifdef CONFIG_SYSCTL
54#include <linux/sysctl.h>
55#endif
56#include <linux/kmod.h>
57
58#include <net/arp.h>
59#include <net/ip.h>
60#include <net/route.h>
61#include <net/ip_fib.h>
62#include <net/rtnetlink.h>
63#include <net/net_namespace.h>
64
65static struct ipv4_devconf ipv4_devconf = {
66	.data = {
67		[NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1,
68		[NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1,
69		[NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1,
70		[NET_IPV4_CONF_SHARED_MEDIA - 1] = 1,
71	},
72};
73
74static struct ipv4_devconf ipv4_devconf_dflt = {
75	.data = {
76		[NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1,
77		[NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1,
78		[NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1,
79		[NET_IPV4_CONF_SHARED_MEDIA - 1] = 1,
80		[NET_IPV4_CONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
81	},
82};
83
84#define IPV4_DEVCONF_DFLT(net, attr) \
85	IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
86
87static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
88	[IFA_LOCAL]     	= { .type = NLA_U32 },
89	[IFA_ADDRESS]   	= { .type = NLA_U32 },
90	[IFA_BROADCAST] 	= { .type = NLA_U32 },
91	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
92};
93
94static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
95
96static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
97static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
98			 int destroy);
99#ifdef CONFIG_SYSCTL
100static void devinet_sysctl_register(struct in_device *idev);
101static void devinet_sysctl_unregister(struct in_device *idev);
102#else
103static inline void devinet_sysctl_register(struct in_device *idev)
104{
105}
106static inline void devinet_sysctl_unregister(struct in_device *idev)
107{
108}
109#endif
110
111/* Locks all the inet devices. */
112
113static struct in_ifaddr *inet_alloc_ifa(void)
114{
115	return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
116}
117
118static void inet_rcu_free_ifa(struct rcu_head *head)
119{
120	struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
121	if (ifa->ifa_dev)
122		in_dev_put(ifa->ifa_dev);
123	kfree(ifa);
124}
125
126static inline void inet_free_ifa(struct in_ifaddr *ifa)
127{
128	call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
129}
130
131void in_dev_finish_destroy(struct in_device *idev)
132{
133	struct net_device *dev = idev->dev;
134
135	WARN_ON(idev->ifa_list);
136	WARN_ON(idev->mc_list);
137#ifdef NET_REFCNT_DEBUG
138	printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n",
139	       idev, dev ? dev->name : "NIL");
140#endif
141	dev_put(dev);
142	if (!idev->dead)
143		printk("Freeing alive in_device %p\n", idev);
144	else {
145		kfree(idev);
146	}
147}
148
149static struct in_device *inetdev_init(struct net_device *dev)
150{
151	struct in_device *in_dev;
152
153	ASSERT_RTNL();
154
155	in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
156	if (!in_dev)
157		goto out;
158	memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
159			sizeof(in_dev->cnf));
160	in_dev->cnf.sysctl = NULL;
161	in_dev->dev = dev;
162	if ((in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl)) == NULL)
163		goto out_kfree;
164	if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
165		dev_disable_lro(dev);
166	/* Reference in_dev->dev */
167	dev_hold(dev);
168	/* Account for reference dev->ip_ptr (below) */
169	in_dev_hold(in_dev);
170
171	devinet_sysctl_register(in_dev);
172	ip_mc_init_dev(in_dev);
173	if (dev->flags & IFF_UP)
174		ip_mc_up(in_dev);
175
176	/* we can receive as soon as ip_ptr is set -- do this last */
177	rcu_assign_pointer(dev->ip_ptr, in_dev);
178out:
179	return in_dev;
180out_kfree:
181	kfree(in_dev);
182	in_dev = NULL;
183	goto out;
184}
185
186static void in_dev_rcu_put(struct rcu_head *head)
187{
188	struct in_device *idev = container_of(head, struct in_device, rcu_head);
189	in_dev_put(idev);
190}
191
192static void inetdev_destroy(struct in_device *in_dev)
193{
194	struct in_ifaddr *ifa;
195	struct net_device *dev;
196
197	ASSERT_RTNL();
198
199	dev = in_dev->dev;
200
201	in_dev->dead = 1;
202
203	ip_mc_destroy_dev(in_dev);
204
205	while ((ifa = in_dev->ifa_list) != NULL) {
206		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
207		inet_free_ifa(ifa);
208	}
209
210	dev->ip_ptr = NULL;
211
212	devinet_sysctl_unregister(in_dev);
213	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
214	arp_ifdown(dev);
215
216	call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
217}
218
219int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
220{
221	rcu_read_lock();
222	for_primary_ifa(in_dev) {
223		if (inet_ifa_match(a, ifa)) {
224			if (!b || inet_ifa_match(b, ifa)) {
225				rcu_read_unlock();
226				return 1;
227			}
228		}
229	} endfor_ifa(in_dev);
230	rcu_read_unlock();
231	return 0;
232}
233
234static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
235			 int destroy, struct nlmsghdr *nlh, u32 pid)
236{
237	struct in_ifaddr *promote = NULL;
238	struct in_ifaddr *ifa, *ifa1 = *ifap;
239	struct in_ifaddr *last_prim = in_dev->ifa_list;
240	struct in_ifaddr *prev_prom = NULL;
241	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
242
243	ASSERT_RTNL();
244
245	/* 1. Deleting primary ifaddr forces deletion all secondaries
246	 * unless alias promotion is set
247	 **/
248
249	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
250		struct in_ifaddr **ifap1 = &ifa1->ifa_next;
251
252		while ((ifa = *ifap1) != NULL) {
253			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
254			    ifa1->ifa_scope <= ifa->ifa_scope)
255				last_prim = ifa;
256
257			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
258			    ifa1->ifa_mask != ifa->ifa_mask ||
259			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
260				ifap1 = &ifa->ifa_next;
261				prev_prom = ifa;
262				continue;
263			}
264
265			if (!do_promote) {
266				*ifap1 = ifa->ifa_next;
267
268				rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid);
269				blocking_notifier_call_chain(&inetaddr_chain,
270						NETDEV_DOWN, ifa);
271				inet_free_ifa(ifa);
272			} else {
273				promote = ifa;
274				break;
275			}
276		}
277	}
278
279	/* 2. Unlink it */
280
281	*ifap = ifa1->ifa_next;
282
283	/* 3. Announce address deletion */
284
285	/* Send message first, then call notifier.
286	   At first sight, FIB update triggered by notifier
287	   will refer to already deleted ifaddr, that could confuse
288	   netlink listeners. It is not true: look, gated sees
289	   that route deleted and if it still thinks that ifaddr
290	   is valid, it will try to restore deleted routes... Grr.
291	   So that, this order is correct.
292	 */
293	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid);
294	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
295
296	if (promote) {
297
298		if (prev_prom) {
299			prev_prom->ifa_next = promote->ifa_next;
300			promote->ifa_next = last_prim->ifa_next;
301			last_prim->ifa_next = promote;
302		}
303
304		promote->ifa_flags &= ~IFA_F_SECONDARY;
305		rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
306		blocking_notifier_call_chain(&inetaddr_chain,
307				NETDEV_UP, promote);
308		for (ifa = promote->ifa_next; ifa; ifa = ifa->ifa_next) {
309			if (ifa1->ifa_mask != ifa->ifa_mask ||
310			    !inet_ifa_match(ifa1->ifa_address, ifa))
311					continue;
312			fib_add_ifaddr(ifa);
313		}
314
315	}
316	if (destroy)
317		inet_free_ifa(ifa1);
318}
319
320static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
321			 int destroy)
322{
323	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
324}
325
326static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
327			     u32 pid)
328{
329	struct in_device *in_dev = ifa->ifa_dev;
330	struct in_ifaddr *ifa1, **ifap, **last_primary;
331
332	ASSERT_RTNL();
333
334	if (!ifa->ifa_local) {
335		inet_free_ifa(ifa);
336		return 0;
337	}
338
339	ifa->ifa_flags &= ~IFA_F_SECONDARY;
340	last_primary = &in_dev->ifa_list;
341
342	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
343	     ifap = &ifa1->ifa_next) {
344		if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
345		    ifa->ifa_scope <= ifa1->ifa_scope)
346			last_primary = &ifa1->ifa_next;
347		if (ifa1->ifa_mask == ifa->ifa_mask &&
348		    inet_ifa_match(ifa1->ifa_address, ifa)) {
349			if (ifa1->ifa_local == ifa->ifa_local) {
350				inet_free_ifa(ifa);
351				return -EEXIST;
352			}
353			if (ifa1->ifa_scope != ifa->ifa_scope) {
354				inet_free_ifa(ifa);
355				return -EINVAL;
356			}
357			ifa->ifa_flags |= IFA_F_SECONDARY;
358		}
359	}
360
361	if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
362		net_srandom(ifa->ifa_local);
363		ifap = last_primary;
364	}
365
366	ifa->ifa_next = *ifap;
367	*ifap = ifa;
368
369	/* Send message first, then call notifier.
370	   Notifier will trigger FIB update, so that
371	   listeners of netlink will know about new ifaddr */
372	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid);
373	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
374
375	return 0;
376}
377
378static int inet_insert_ifa(struct in_ifaddr *ifa)
379{
380	return __inet_insert_ifa(ifa, NULL, 0);
381}
382
383static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
384{
385	struct in_device *in_dev = __in_dev_get_rtnl(dev);
386
387	ASSERT_RTNL();
388
389	if (!in_dev) {
390		inet_free_ifa(ifa);
391		return -ENOBUFS;
392	}
393	ipv4_devconf_setall(in_dev);
394	if (ifa->ifa_dev != in_dev) {
395		WARN_ON(ifa->ifa_dev);
396		in_dev_hold(in_dev);
397		ifa->ifa_dev = in_dev;
398	}
399	if (ipv4_is_loopback(ifa->ifa_local))
400		ifa->ifa_scope = RT_SCOPE_HOST;
401	return inet_insert_ifa(ifa);
402}
403
404struct in_device *inetdev_by_index(struct net *net, int ifindex)
405{
406	struct net_device *dev;
407	struct in_device *in_dev = NULL;
408	read_lock(&dev_base_lock);
409	dev = __dev_get_by_index(net, ifindex);
410	if (dev)
411		in_dev = in_dev_get(dev);
412	read_unlock(&dev_base_lock);
413	return in_dev;
414}
415
416/* Called only from RTNL semaphored context. No locks. */
417
418struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
419				    __be32 mask)
420{
421	ASSERT_RTNL();
422
423	for_primary_ifa(in_dev) {
424		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
425			return ifa;
426	} endfor_ifa(in_dev);
427	return NULL;
428}
429
430static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
431{
432	struct net *net = sock_net(skb->sk);
433	struct nlattr *tb[IFA_MAX+1];
434	struct in_device *in_dev;
435	struct ifaddrmsg *ifm;
436	struct in_ifaddr *ifa, **ifap;
437	int err = -EINVAL;
438
439	ASSERT_RTNL();
440
441	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
442	if (err < 0)
443		goto errout;
444
445	ifm = nlmsg_data(nlh);
446	in_dev = inetdev_by_index(net, ifm->ifa_index);
447	if (in_dev == NULL) {
448		err = -ENODEV;
449		goto errout;
450	}
451
452	__in_dev_put(in_dev);
453
454	for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
455	     ifap = &ifa->ifa_next) {
456		if (tb[IFA_LOCAL] &&
457		    ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
458			continue;
459
460		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
461			continue;
462
463		if (tb[IFA_ADDRESS] &&
464		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
465		    !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
466			continue;
467
468		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid);
469		return 0;
470	}
471
472	err = -EADDRNOTAVAIL;
473errout:
474	return err;
475}
476
477static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
478{
479	struct nlattr *tb[IFA_MAX+1];
480	struct in_ifaddr *ifa;
481	struct ifaddrmsg *ifm;
482	struct net_device *dev;
483	struct in_device *in_dev;
484	int err;
485
486	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
487	if (err < 0)
488		goto errout;
489
490	ifm = nlmsg_data(nlh);
491	err = -EINVAL;
492	if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
493		goto errout;
494
495	dev = __dev_get_by_index(net, ifm->ifa_index);
496	err = -ENODEV;
497	if (dev == NULL)
498		goto errout;
499
500	in_dev = __in_dev_get_rtnl(dev);
501	err = -ENOBUFS;
502	if (in_dev == NULL)
503		goto errout;
504
505	ifa = inet_alloc_ifa();
506	if (ifa == NULL)
507		/*
508		 * A potential indev allocation can be left alive, it stays
509		 * assigned to its device and is destroy with it.
510		 */
511		goto errout;
512
513	ipv4_devconf_setall(in_dev);
514	in_dev_hold(in_dev);
515
516	if (tb[IFA_ADDRESS] == NULL)
517		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
518
519	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
520	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
521	ifa->ifa_flags = ifm->ifa_flags;
522	ifa->ifa_scope = ifm->ifa_scope;
523	ifa->ifa_dev = in_dev;
524
525	ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
526	ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
527
528	if (tb[IFA_BROADCAST])
529		ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
530
531	if (tb[IFA_LABEL])
532		nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
533	else
534		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
535
536	return ifa;
537
538errout:
539	return ERR_PTR(err);
540}
541
542static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
543{
544	struct net *net = sock_net(skb->sk);
545	struct in_ifaddr *ifa;
546
547	ASSERT_RTNL();
548
549	ifa = rtm_to_ifaddr(net, nlh);
550	if (IS_ERR(ifa))
551		return PTR_ERR(ifa);
552
553	return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid);
554}
555
556/*
557 *	Determine a default network mask, based on the IP address.
558 */
559
560static __inline__ int inet_abc_len(__be32 addr)
561{
562	int rc = -1;	/* Something else, probably a multicast. */
563
564	if (ipv4_is_zeronet(addr))
565		rc = 0;
566	else {
567		__u32 haddr = ntohl(addr);
568
569		if (IN_CLASSA(haddr))
570			rc = 8;
571		else if (IN_CLASSB(haddr))
572			rc = 16;
573		else if (IN_CLASSC(haddr))
574			rc = 24;
575	}
576
577	return rc;
578}
579
580
581int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
582{
583	struct ifreq ifr;
584	struct sockaddr_in sin_orig;
585	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
586	struct in_device *in_dev;
587	struct in_ifaddr **ifap = NULL;
588	struct in_ifaddr *ifa = NULL;
589	struct net_device *dev;
590	char *colon;
591	int ret = -EFAULT;
592	int tryaddrmatch = 0;
593
594	/*
595	 *	Fetch the caller's info block into kernel space
596	 */
597
598	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
599		goto out;
600	ifr.ifr_name[IFNAMSIZ - 1] = 0;
601
602	/* save original address for comparison */
603	memcpy(&sin_orig, sin, sizeof(*sin));
604
605	colon = strchr(ifr.ifr_name, ':');
606	if (colon)
607		*colon = 0;
608
609	dev_load(net, ifr.ifr_name);
610
611	switch (cmd) {
612	case SIOCGIFADDR:	/* Get interface address */
613	case SIOCGIFBRDADDR:	/* Get the broadcast address */
614	case SIOCGIFDSTADDR:	/* Get the destination address */
615	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
616		/* Note that these ioctls will not sleep,
617		   so that we do not impose a lock.
618		   One day we will be forced to put shlock here (I mean SMP)
619		 */
620		tryaddrmatch = (sin_orig.sin_family == AF_INET);
621		memset(sin, 0, sizeof(*sin));
622		sin->sin_family = AF_INET;
623		break;
624
625	case SIOCSIFFLAGS:
626		ret = -EACCES;
627		if (!capable(CAP_NET_ADMIN))
628			goto out;
629		break;
630	case SIOCSIFADDR:	/* Set interface address (and family) */
631	case SIOCSIFBRDADDR:	/* Set the broadcast address */
632	case SIOCSIFDSTADDR:	/* Set the destination address */
633	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
634		ret = -EACCES;
635		if (!capable(CAP_NET_ADMIN))
636			goto out;
637		ret = -EINVAL;
638		if (sin->sin_family != AF_INET)
639			goto out;
640		break;
641	default:
642		ret = -EINVAL;
643		goto out;
644	}
645
646	rtnl_lock();
647
648	ret = -ENODEV;
649	if ((dev = __dev_get_by_name(net, ifr.ifr_name)) == NULL)
650		goto done;
651
652	if (colon)
653		*colon = ':';
654
655	if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
656		if (tryaddrmatch) {
657			/* Matthias Andree */
658			/* compare label and address (4.4BSD style) */
659			/* note: we only do this for a limited set of ioctls
660			   and only if the original address family was AF_INET.
661			   This is checked above. */
662			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
663			     ifap = &ifa->ifa_next) {
664				if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
665				    sin_orig.sin_addr.s_addr ==
666							ifa->ifa_address) {
667					break; /* found */
668				}
669			}
670		}
671		/* we didn't get a match, maybe the application is
672		   4.3BSD-style and passed in junk so we fall back to
673		   comparing just the label */
674		if (!ifa) {
675			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
676			     ifap = &ifa->ifa_next)
677				if (!strcmp(ifr.ifr_name, ifa->ifa_label))
678					break;
679		}
680	}
681
682	ret = -EADDRNOTAVAIL;
683	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
684		goto done;
685
686	switch (cmd) {
687	case SIOCGIFADDR:	/* Get interface address */
688		sin->sin_addr.s_addr = ifa->ifa_local;
689		goto rarok;
690
691	case SIOCGIFBRDADDR:	/* Get the broadcast address */
692		sin->sin_addr.s_addr = ifa->ifa_broadcast;
693		goto rarok;
694
695	case SIOCGIFDSTADDR:	/* Get the destination address */
696		sin->sin_addr.s_addr = ifa->ifa_address;
697		goto rarok;
698
699	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
700		sin->sin_addr.s_addr = ifa->ifa_mask;
701		goto rarok;
702
703	case SIOCSIFFLAGS:
704		if (colon) {
705			ret = -EADDRNOTAVAIL;
706			if (!ifa)
707				break;
708			ret = 0;
709			if (!(ifr.ifr_flags & IFF_UP))
710				inet_del_ifa(in_dev, ifap, 1);
711			break;
712		}
713		ret = dev_change_flags(dev, ifr.ifr_flags);
714		break;
715
716	case SIOCSIFADDR:	/* Set interface address (and family) */
717		ret = -EINVAL;
718		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
719			break;
720
721		if (!ifa) {
722			ret = -ENOBUFS;
723			if ((ifa = inet_alloc_ifa()) == NULL)
724				break;
725			if (colon)
726				memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
727			else
728				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
729		} else {
730			ret = 0;
731			if (ifa->ifa_local == sin->sin_addr.s_addr)
732				break;
733			inet_del_ifa(in_dev, ifap, 0);
734			ifa->ifa_broadcast = 0;
735			ifa->ifa_scope = 0;
736		}
737
738		ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
739
740		if (!(dev->flags & IFF_POINTOPOINT)) {
741			ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
742			ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
743			if ((dev->flags & IFF_BROADCAST) &&
744			    ifa->ifa_prefixlen < 31)
745				ifa->ifa_broadcast = ifa->ifa_address |
746						     ~ifa->ifa_mask;
747		} else {
748			ifa->ifa_prefixlen = 32;
749			ifa->ifa_mask = inet_make_mask(32);
750		}
751		ret = inet_set_ifa(dev, ifa);
752		break;
753
754	case SIOCSIFBRDADDR:	/* Set the broadcast address */
755		ret = 0;
756		if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
757			inet_del_ifa(in_dev, ifap, 0);
758			ifa->ifa_broadcast = sin->sin_addr.s_addr;
759			inet_insert_ifa(ifa);
760		}
761		break;
762
763	case SIOCSIFDSTADDR:	/* Set the destination address */
764		ret = 0;
765		if (ifa->ifa_address == sin->sin_addr.s_addr)
766			break;
767		ret = -EINVAL;
768		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
769			break;
770		ret = 0;
771		inet_del_ifa(in_dev, ifap, 0);
772		ifa->ifa_address = sin->sin_addr.s_addr;
773		inet_insert_ifa(ifa);
774		break;
775
776	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
777
778		/*
779		 *	The mask we set must be legal.
780		 */
781		ret = -EINVAL;
782		if (bad_mask(sin->sin_addr.s_addr, 0))
783			break;
784		ret = 0;
785		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
786			__be32 old_mask = ifa->ifa_mask;
787			inet_del_ifa(in_dev, ifap, 0);
788			ifa->ifa_mask = sin->sin_addr.s_addr;
789			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
790
791			/* See if current broadcast address matches
792			 * with current netmask, then recalculate
793			 * the broadcast address. Otherwise it's a
794			 * funny address, so don't touch it since
795			 * the user seems to know what (s)he's doing...
796			 */
797			if ((dev->flags & IFF_BROADCAST) &&
798			    (ifa->ifa_prefixlen < 31) &&
799			    (ifa->ifa_broadcast ==
800			     (ifa->ifa_local|~old_mask))) {
801				ifa->ifa_broadcast = (ifa->ifa_local |
802						      ~sin->sin_addr.s_addr);
803			}
804			inet_insert_ifa(ifa);
805		}
806		break;
807	}
808done:
809	rtnl_unlock();
810out:
811	return ret;
812rarok:
813	rtnl_unlock();
814	ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
815	goto out;
816}
817
818static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
819{
820	struct in_device *in_dev = __in_dev_get_rtnl(dev);
821	struct in_ifaddr *ifa;
822	struct ifreq ifr;
823	int done = 0;
824
825	if (!in_dev || (ifa = in_dev->ifa_list) == NULL)
826		goto out;
827
828	for (; ifa; ifa = ifa->ifa_next) {
829		if (!buf) {
830			done += sizeof(ifr);
831			continue;
832		}
833		if (len < (int) sizeof(ifr))
834			break;
835		memset(&ifr, 0, sizeof(struct ifreq));
836		if (ifa->ifa_label)
837			strcpy(ifr.ifr_name, ifa->ifa_label);
838		else
839			strcpy(ifr.ifr_name, dev->name);
840
841		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
842		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
843								ifa->ifa_local;
844
845		if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
846			done = -EFAULT;
847			break;
848		}
849		buf  += sizeof(struct ifreq);
850		len  -= sizeof(struct ifreq);
851		done += sizeof(struct ifreq);
852	}
853out:
854	return done;
855}
856
857__be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
858{
859	__be32 addr = 0;
860	struct in_device *in_dev;
861	struct net *net = dev_net(dev);
862
863	rcu_read_lock();
864	in_dev = __in_dev_get_rcu(dev);
865	if (!in_dev)
866		goto no_in_dev;
867
868	for_primary_ifa(in_dev) {
869		if (ifa->ifa_scope > scope)
870			continue;
871		if (!dst || inet_ifa_match(dst, ifa)) {
872			addr = ifa->ifa_local;
873			break;
874		}
875		if (!addr)
876			addr = ifa->ifa_local;
877	} endfor_ifa(in_dev);
878no_in_dev:
879	rcu_read_unlock();
880
881	if (addr)
882		goto out;
883
884	/* Not loopback addresses on loopback should be preferred
885	   in this case. It is importnat that lo is the first interface
886	   in dev_base list.
887	 */
888	read_lock(&dev_base_lock);
889	rcu_read_lock();
890	for_each_netdev(net, dev) {
891		if ((in_dev = __in_dev_get_rcu(dev)) == NULL)
892			continue;
893
894		for_primary_ifa(in_dev) {
895			if (ifa->ifa_scope != RT_SCOPE_LINK &&
896			    ifa->ifa_scope <= scope) {
897				addr = ifa->ifa_local;
898				goto out_unlock_both;
899			}
900		} endfor_ifa(in_dev);
901	}
902out_unlock_both:
903	read_unlock(&dev_base_lock);
904	rcu_read_unlock();
905out:
906	return addr;
907}
908
909static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
910			      __be32 local, int scope)
911{
912	int same = 0;
913	__be32 addr = 0;
914
915	for_ifa(in_dev) {
916		if (!addr &&
917		    (local == ifa->ifa_local || !local) &&
918		    ifa->ifa_scope <= scope) {
919			addr = ifa->ifa_local;
920			if (same)
921				break;
922		}
923		if (!same) {
924			same = (!local || inet_ifa_match(local, ifa)) &&
925				(!dst || inet_ifa_match(dst, ifa));
926			if (same && addr) {
927				if (local || !dst)
928					break;
929				/* Is the selected addr into dst subnet? */
930				if (inet_ifa_match(addr, ifa))
931					break;
932				/* No, then can we use new local src? */
933				if (ifa->ifa_scope <= scope) {
934					addr = ifa->ifa_local;
935					break;
936				}
937				/* search for large dst subnet for addr */
938				same = 0;
939			}
940		}
941	} endfor_ifa(in_dev);
942
943	return same? addr : 0;
944}
945
946/*
947 * Confirm that local IP address exists using wildcards:
948 * - in_dev: only on this interface, 0=any interface
949 * - dst: only in the same subnet as dst, 0=any dst
950 * - local: address, 0=autoselect the local address
951 * - scope: maximum allowed scope value for the local address
952 */
953__be32 inet_confirm_addr(struct in_device *in_dev,
954			 __be32 dst, __be32 local, int scope)
955{
956	__be32 addr = 0;
957	struct net_device *dev;
958	struct net *net;
959
960	if (scope != RT_SCOPE_LINK)
961		return confirm_addr_indev(in_dev, dst, local, scope);
962
963	net = dev_net(in_dev->dev);
964	read_lock(&dev_base_lock);
965	rcu_read_lock();
966	for_each_netdev(net, dev) {
967		if ((in_dev = __in_dev_get_rcu(dev))) {
968			addr = confirm_addr_indev(in_dev, dst, local, scope);
969			if (addr)
970				break;
971		}
972	}
973	rcu_read_unlock();
974	read_unlock(&dev_base_lock);
975
976	return addr;
977}
978
979/*
980 *	Device notifier
981 */
982
983int register_inetaddr_notifier(struct notifier_block *nb)
984{
985	return blocking_notifier_chain_register(&inetaddr_chain, nb);
986}
987
988int unregister_inetaddr_notifier(struct notifier_block *nb)
989{
990	return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
991}
992
993/* Rename ifa_labels for a device name change. Make some effort to preserve existing
994 * alias numbering and to create unique labels if possible.
995*/
996static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
997{
998	struct in_ifaddr *ifa;
999	int named = 0;
1000
1001	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1002		char old[IFNAMSIZ], *dot;
1003
1004		memcpy(old, ifa->ifa_label, IFNAMSIZ);
1005		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1006		if (named++ == 0)
1007			goto skip;
1008		dot = strchr(old, ':');
1009		if (dot == NULL) {
1010			sprintf(old, ":%d", named);
1011			dot = old;
1012		}
1013		if (strlen(dot) + strlen(dev->name) < IFNAMSIZ) {
1014			strcat(ifa->ifa_label, dot);
1015		} else {
1016			strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1017		}
1018skip:
1019		rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1020	}
1021}
1022
1023static inline bool inetdev_valid_mtu(unsigned mtu)
1024{
1025	return mtu >= 68;
1026}
1027
1028/* Called only under RTNL semaphore */
1029
1030static int inetdev_event(struct notifier_block *this, unsigned long event,
1031			 void *ptr)
1032{
1033	struct net_device *dev = ptr;
1034	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1035
1036	ASSERT_RTNL();
1037
1038	if (!in_dev) {
1039		if (event == NETDEV_REGISTER) {
1040			in_dev = inetdev_init(dev);
1041			if (!in_dev)
1042				return notifier_from_errno(-ENOMEM);
1043			if (dev->flags & IFF_LOOPBACK) {
1044				IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1045				IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1046			}
1047		} else if (event == NETDEV_CHANGEMTU) {
1048			/* Re-enabling IP */
1049			if (inetdev_valid_mtu(dev->mtu))
1050				in_dev = inetdev_init(dev);
1051		}
1052		goto out;
1053	}
1054
1055	switch (event) {
1056	case NETDEV_REGISTER:
1057		printk(KERN_DEBUG "inetdev_event: bug\n");
1058		dev->ip_ptr = NULL;
1059		break;
1060	case NETDEV_UP:
1061		if (!inetdev_valid_mtu(dev->mtu))
1062			break;
1063		if (dev->flags & IFF_LOOPBACK) {
1064			struct in_ifaddr *ifa;
1065			if ((ifa = inet_alloc_ifa()) != NULL) {
1066				ifa->ifa_local =
1067				  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1068				ifa->ifa_prefixlen = 8;
1069				ifa->ifa_mask = inet_make_mask(8);
1070				in_dev_hold(in_dev);
1071				ifa->ifa_dev = in_dev;
1072				ifa->ifa_scope = RT_SCOPE_HOST;
1073				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1074				inet_insert_ifa(ifa);
1075			}
1076		}
1077		ip_mc_up(in_dev);
1078		/* fall through */
1079	case NETDEV_CHANGEADDR:
1080		if (IN_DEV_ARP_NOTIFY(in_dev))
1081			arp_send(ARPOP_REQUEST, ETH_P_ARP,
1082				 in_dev->ifa_list->ifa_address,
1083				 dev,
1084				 in_dev->ifa_list->ifa_address,
1085				 NULL, dev->dev_addr, NULL);
1086		break;
1087	case NETDEV_DOWN:
1088		ip_mc_down(in_dev);
1089		break;
1090	case NETDEV_CHANGEMTU:
1091		if (inetdev_valid_mtu(dev->mtu))
1092			break;
1093		/* disable IP when MTU is not enough */
1094	case NETDEV_UNREGISTER:
1095		inetdev_destroy(in_dev);
1096		break;
1097	case NETDEV_CHANGENAME:
1098		/* Do not notify about label change, this event is
1099		 * not interesting to applications using netlink.
1100		 */
1101		inetdev_changename(dev, in_dev);
1102
1103		devinet_sysctl_unregister(in_dev);
1104		devinet_sysctl_register(in_dev);
1105		break;
1106	}
1107out:
1108	return NOTIFY_DONE;
1109}
1110
1111static struct notifier_block ip_netdev_notifier = {
1112	.notifier_call = inetdev_event,
1113};
1114
1115static inline size_t inet_nlmsg_size(void)
1116{
1117	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1118	       + nla_total_size(4) /* IFA_ADDRESS */
1119	       + nla_total_size(4) /* IFA_LOCAL */
1120	       + nla_total_size(4) /* IFA_BROADCAST */
1121	       + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1122}
1123
1124static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1125			    u32 pid, u32 seq, int event, unsigned int flags)
1126{
1127	struct ifaddrmsg *ifm;
1128	struct nlmsghdr  *nlh;
1129
1130	nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags);
1131	if (nlh == NULL)
1132		return -EMSGSIZE;
1133
1134	ifm = nlmsg_data(nlh);
1135	ifm->ifa_family = AF_INET;
1136	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1137	ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
1138	ifm->ifa_scope = ifa->ifa_scope;
1139	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1140
1141	if (ifa->ifa_address)
1142		NLA_PUT_BE32(skb, IFA_ADDRESS, ifa->ifa_address);
1143
1144	if (ifa->ifa_local)
1145		NLA_PUT_BE32(skb, IFA_LOCAL, ifa->ifa_local);
1146
1147	if (ifa->ifa_broadcast)
1148		NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast);
1149
1150	if (ifa->ifa_label[0])
1151		NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label);
1152
1153	return nlmsg_end(skb, nlh);
1154
1155nla_put_failure:
1156	nlmsg_cancel(skb, nlh);
1157	return -EMSGSIZE;
1158}
1159
1160static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1161{
1162	struct net *net = sock_net(skb->sk);
1163	int idx, ip_idx;
1164	struct net_device *dev;
1165	struct in_device *in_dev;
1166	struct in_ifaddr *ifa;
1167	int s_ip_idx, s_idx = cb->args[0];
1168
1169	s_ip_idx = ip_idx = cb->args[1];
1170	idx = 0;
1171	for_each_netdev(net, dev) {
1172		if (idx < s_idx)
1173			goto cont;
1174		if (idx > s_idx)
1175			s_ip_idx = 0;
1176		if ((in_dev = __in_dev_get_rtnl(dev)) == NULL)
1177			goto cont;
1178
1179		for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1180		     ifa = ifa->ifa_next, ip_idx++) {
1181			if (ip_idx < s_ip_idx)
1182				continue;
1183			if (inet_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid,
1184					     cb->nlh->nlmsg_seq,
1185					     RTM_NEWADDR, NLM_F_MULTI) <= 0)
1186				goto done;
1187		}
1188cont:
1189		idx++;
1190	}
1191
1192done:
1193	cb->args[0] = idx;
1194	cb->args[1] = ip_idx;
1195
1196	return skb->len;
1197}
1198
1199static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1200		      u32 pid)
1201{
1202	struct sk_buff *skb;
1203	u32 seq = nlh ? nlh->nlmsg_seq : 0;
1204	int err = -ENOBUFS;
1205	struct net *net;
1206
1207	net = dev_net(ifa->ifa_dev->dev);
1208	skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1209	if (skb == NULL)
1210		goto errout;
1211
1212	err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0);
1213	if (err < 0) {
1214		/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1215		WARN_ON(err == -EMSGSIZE);
1216		kfree_skb(skb);
1217		goto errout;
1218	}
1219	rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1220	return;
1221errout:
1222	if (err < 0)
1223		rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1224}
1225
1226#ifdef CONFIG_SYSCTL
1227
1228static void devinet_copy_dflt_conf(struct net *net, int i)
1229{
1230	struct net_device *dev;
1231
1232	read_lock(&dev_base_lock);
1233	for_each_netdev(net, dev) {
1234		struct in_device *in_dev;
1235		rcu_read_lock();
1236		in_dev = __in_dev_get_rcu(dev);
1237		if (in_dev && !test_bit(i, in_dev->cnf.state))
1238			in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1239		rcu_read_unlock();
1240	}
1241	read_unlock(&dev_base_lock);
1242}
1243
1244static void inet_forward_change(struct net *net)
1245{
1246	struct net_device *dev;
1247	int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1248
1249	IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1250	IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1251
1252	read_lock(&dev_base_lock);
1253	for_each_netdev(net, dev) {
1254		struct in_device *in_dev;
1255		if (on)
1256			dev_disable_lro(dev);
1257		rcu_read_lock();
1258		in_dev = __in_dev_get_rcu(dev);
1259		if (in_dev)
1260			IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1261		rcu_read_unlock();
1262	}
1263	read_unlock(&dev_base_lock);
1264}
1265
1266static int devinet_conf_proc(ctl_table *ctl, int write,
1267			     struct file *filp, void __user *buffer,
1268			     size_t *lenp, loff_t *ppos)
1269{
1270	int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1271
1272	if (write) {
1273		struct ipv4_devconf *cnf = ctl->extra1;
1274		struct net *net = ctl->extra2;
1275		int i = (int *)ctl->data - cnf->data;
1276
1277		set_bit(i, cnf->state);
1278
1279		if (cnf == net->ipv4.devconf_dflt)
1280			devinet_copy_dflt_conf(net, i);
1281	}
1282
1283	return ret;
1284}
1285
1286static int devinet_conf_sysctl(ctl_table *table,
1287			       void __user *oldval, size_t __user *oldlenp,
1288			       void __user *newval, size_t newlen)
1289{
1290	struct ipv4_devconf *cnf;
1291	struct net *net;
1292	int *valp = table->data;
1293	int new;
1294	int i;
1295
1296	if (!newval || !newlen)
1297		return 0;
1298
1299	if (newlen != sizeof(int))
1300		return -EINVAL;
1301
1302	if (get_user(new, (int __user *)newval))
1303		return -EFAULT;
1304
1305	if (new == *valp)
1306		return 0;
1307
1308	if (oldval && oldlenp) {
1309		size_t len;
1310
1311		if (get_user(len, oldlenp))
1312			return -EFAULT;
1313
1314		if (len) {
1315			if (len > table->maxlen)
1316				len = table->maxlen;
1317			if (copy_to_user(oldval, valp, len))
1318				return -EFAULT;
1319			if (put_user(len, oldlenp))
1320				return -EFAULT;
1321		}
1322	}
1323
1324	*valp = new;
1325
1326	cnf = table->extra1;
1327	net = table->extra2;
1328	i = (int *)table->data - cnf->data;
1329
1330	set_bit(i, cnf->state);
1331
1332	if (cnf == net->ipv4.devconf_dflt)
1333		devinet_copy_dflt_conf(net, i);
1334
1335	return 1;
1336}
1337
1338static int devinet_sysctl_forward(ctl_table *ctl, int write,
1339				  struct file *filp, void __user *buffer,
1340				  size_t *lenp, loff_t *ppos)
1341{
1342	int *valp = ctl->data;
1343	int val = *valp;
1344	int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1345
1346	if (write && *valp != val) {
1347		struct net *net = ctl->extra2;
1348
1349		if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
1350			if (!rtnl_trylock())
1351				return restart_syscall();
1352			if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
1353				inet_forward_change(net);
1354			} else if (*valp) {
1355				struct ipv4_devconf *cnf = ctl->extra1;
1356				struct in_device *idev =
1357					container_of(cnf, struct in_device, cnf);
1358				dev_disable_lro(idev->dev);
1359			}
1360			rtnl_unlock();
1361			rt_cache_flush(net, 0);
1362		}
1363	}
1364
1365	return ret;
1366}
1367
1368int ipv4_doint_and_flush(ctl_table *ctl, int write,
1369			 struct file *filp, void __user *buffer,
1370			 size_t *lenp, loff_t *ppos)
1371{
1372	int *valp = ctl->data;
1373	int val = *valp;
1374	int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1375	struct net *net = ctl->extra2;
1376
1377	if (write && *valp != val)
1378		rt_cache_flush(net, 0);
1379
1380	return ret;
1381}
1382
1383int ipv4_doint_and_flush_strategy(ctl_table *table,
1384				  void __user *oldval, size_t __user *oldlenp,
1385				  void __user *newval, size_t newlen)
1386{
1387	int ret = devinet_conf_sysctl(table, oldval, oldlenp, newval, newlen);
1388	struct net *net = table->extra2;
1389
1390	if (ret == 1)
1391		rt_cache_flush(net, 0);
1392
1393	return ret;
1394}
1395
1396
1397#define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc, sysctl) \
1398	{ \
1399		.ctl_name	= NET_IPV4_CONF_ ## attr, \
1400		.procname	= name, \
1401		.data		= ipv4_devconf.data + \
1402				  NET_IPV4_CONF_ ## attr - 1, \
1403		.maxlen		= sizeof(int), \
1404		.mode		= mval, \
1405		.proc_handler	= proc, \
1406		.strategy	= sysctl, \
1407		.extra1		= &ipv4_devconf, \
1408	}
1409
1410#define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
1411	DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc, \
1412			     devinet_conf_sysctl)
1413
1414#define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
1415	DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc, \
1416			     devinet_conf_sysctl)
1417
1418#define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc, sysctl) \
1419	DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc, sysctl)
1420
1421#define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
1422	DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush, \
1423				     ipv4_doint_and_flush_strategy)
1424
1425static struct devinet_sysctl_table {
1426	struct ctl_table_header *sysctl_header;
1427	struct ctl_table devinet_vars[__NET_IPV4_CONF_MAX];
1428	char *dev_name;
1429} devinet_sysctl = {
1430	.devinet_vars = {
1431		DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
1432					     devinet_sysctl_forward,
1433					     devinet_conf_sysctl),
1434		DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
1435
1436		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
1437		DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
1438		DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
1439		DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
1440		DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
1441		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
1442					"accept_source_route"),
1443		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
1444		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
1445		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
1446		DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
1447		DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
1448		DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
1449		DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
1450		DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
1451		DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
1452		DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
1453
1454		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
1455		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
1456		DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
1457					      "force_igmp_version"),
1458		DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
1459					      "promote_secondaries"),
1460	},
1461};
1462
1463static int __devinet_sysctl_register(struct net *net, char *dev_name,
1464		int ctl_name, struct ipv4_devconf *p)
1465{
1466	int i;
1467	struct devinet_sysctl_table *t;
1468
1469#define DEVINET_CTL_PATH_DEV	3
1470
1471	struct ctl_path devinet_ctl_path[] = {
1472		{ .procname = "net", .ctl_name = CTL_NET, },
1473		{ .procname = "ipv4", .ctl_name = NET_IPV4, },
1474		{ .procname = "conf", .ctl_name = NET_IPV4_CONF, },
1475		{ /* to be set */ },
1476		{ },
1477	};
1478
1479	t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
1480	if (!t)
1481		goto out;
1482
1483	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
1484		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
1485		t->devinet_vars[i].extra1 = p;
1486		t->devinet_vars[i].extra2 = net;
1487	}
1488
1489	/*
1490	 * Make a copy of dev_name, because '.procname' is regarded as const
1491	 * by sysctl and we wouldn't want anyone to change it under our feet
1492	 * (see SIOCSIFNAME).
1493	 */
1494	t->dev_name = kstrdup(dev_name, GFP_KERNEL);
1495	if (!t->dev_name)
1496		goto free;
1497
1498	devinet_ctl_path[DEVINET_CTL_PATH_DEV].procname = t->dev_name;
1499	devinet_ctl_path[DEVINET_CTL_PATH_DEV].ctl_name = ctl_name;
1500
1501	t->sysctl_header = register_net_sysctl_table(net, devinet_ctl_path,
1502			t->devinet_vars);
1503	if (!t->sysctl_header)
1504		goto free_procname;
1505
1506	p->sysctl = t;
1507	return 0;
1508
1509free_procname:
1510	kfree(t->dev_name);
1511free:
1512	kfree(t);
1513out:
1514	return -ENOBUFS;
1515}
1516
1517static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
1518{
1519	struct devinet_sysctl_table *t = cnf->sysctl;
1520
1521	if (t == NULL)
1522		return;
1523
1524	cnf->sysctl = NULL;
1525	unregister_sysctl_table(t->sysctl_header);
1526	kfree(t->dev_name);
1527	kfree(t);
1528}
1529
1530static void devinet_sysctl_register(struct in_device *idev)
1531{
1532	neigh_sysctl_register(idev->dev, idev->arp_parms, NET_IPV4,
1533			NET_IPV4_NEIGH, "ipv4", NULL, NULL);
1534	__devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
1535			idev->dev->ifindex, &idev->cnf);
1536}
1537
1538static void devinet_sysctl_unregister(struct in_device *idev)
1539{
1540	__devinet_sysctl_unregister(&idev->cnf);
1541	neigh_sysctl_unregister(idev->arp_parms);
1542}
1543
1544static struct ctl_table ctl_forward_entry[] = {
1545	{
1546		.ctl_name	= NET_IPV4_FORWARD,
1547		.procname	= "ip_forward",
1548		.data		= &ipv4_devconf.data[
1549					NET_IPV4_CONF_FORWARDING - 1],
1550		.maxlen		= sizeof(int),
1551		.mode		= 0644,
1552		.proc_handler	= devinet_sysctl_forward,
1553		.strategy	= devinet_conf_sysctl,
1554		.extra1		= &ipv4_devconf,
1555		.extra2		= &init_net,
1556	},
1557	{ },
1558};
1559
1560static __net_initdata struct ctl_path net_ipv4_path[] = {
1561	{ .procname = "net", .ctl_name = CTL_NET, },
1562	{ .procname = "ipv4", .ctl_name = NET_IPV4, },
1563	{ },
1564};
1565#endif
1566
1567static __net_init int devinet_init_net(struct net *net)
1568{
1569	int err;
1570	struct ipv4_devconf *all, *dflt;
1571#ifdef CONFIG_SYSCTL
1572	struct ctl_table *tbl = ctl_forward_entry;
1573	struct ctl_table_header *forw_hdr;
1574#endif
1575
1576	err = -ENOMEM;
1577	all = &ipv4_devconf;
1578	dflt = &ipv4_devconf_dflt;
1579
1580	if (net != &init_net) {
1581		all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
1582		if (all == NULL)
1583			goto err_alloc_all;
1584
1585		dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
1586		if (dflt == NULL)
1587			goto err_alloc_dflt;
1588
1589#ifdef CONFIG_SYSCTL
1590		tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
1591		if (tbl == NULL)
1592			goto err_alloc_ctl;
1593
1594		tbl[0].data = &all->data[NET_IPV4_CONF_FORWARDING - 1];
1595		tbl[0].extra1 = all;
1596		tbl[0].extra2 = net;
1597#endif
1598	}
1599
1600#ifdef CONFIG_SYSCTL
1601	err = __devinet_sysctl_register(net, "all",
1602			NET_PROTO_CONF_ALL, all);
1603	if (err < 0)
1604		goto err_reg_all;
1605
1606	err = __devinet_sysctl_register(net, "default",
1607			NET_PROTO_CONF_DEFAULT, dflt);
1608	if (err < 0)
1609		goto err_reg_dflt;
1610
1611	err = -ENOMEM;
1612	forw_hdr = register_net_sysctl_table(net, net_ipv4_path, tbl);
1613	if (forw_hdr == NULL)
1614		goto err_reg_ctl;
1615	net->ipv4.forw_hdr = forw_hdr;
1616#endif
1617
1618	net->ipv4.devconf_all = all;
1619	net->ipv4.devconf_dflt = dflt;
1620	return 0;
1621
1622#ifdef CONFIG_SYSCTL
1623err_reg_ctl:
1624	__devinet_sysctl_unregister(dflt);
1625err_reg_dflt:
1626	__devinet_sysctl_unregister(all);
1627err_reg_all:
1628	if (tbl != ctl_forward_entry)
1629		kfree(tbl);
1630err_alloc_ctl:
1631#endif
1632	if (dflt != &ipv4_devconf_dflt)
1633		kfree(dflt);
1634err_alloc_dflt:
1635	if (all != &ipv4_devconf)
1636		kfree(all);
1637err_alloc_all:
1638	return err;
1639}
1640
1641static __net_exit void devinet_exit_net(struct net *net)
1642{
1643#ifdef CONFIG_SYSCTL
1644	struct ctl_table *tbl;
1645
1646	tbl = net->ipv4.forw_hdr->ctl_table_arg;
1647	unregister_net_sysctl_table(net->ipv4.forw_hdr);
1648	__devinet_sysctl_unregister(net->ipv4.devconf_dflt);
1649	__devinet_sysctl_unregister(net->ipv4.devconf_all);
1650	kfree(tbl);
1651#endif
1652	kfree(net->ipv4.devconf_dflt);
1653	kfree(net->ipv4.devconf_all);
1654}
1655
1656static __net_initdata struct pernet_operations devinet_ops = {
1657	.init = devinet_init_net,
1658	.exit = devinet_exit_net,
1659};
1660
1661void __init devinet_init(void)
1662{
1663	register_pernet_subsys(&devinet_ops);
1664
1665	register_gifconf(PF_INET, inet_gifconf);
1666	register_netdevice_notifier(&ip_netdev_notifier);
1667
1668	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL);
1669	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL);
1670	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr);
1671}
1672
1673EXPORT_SYMBOL(in_dev_finish_destroy);
1674EXPORT_SYMBOL(inet_select_addr);
1675EXPORT_SYMBOL(inetdev_by_index);
1676EXPORT_SYMBOL(register_inetaddr_notifier);
1677EXPORT_SYMBOL(unregister_inetaddr_notifier);
1678