devinet.c revision 8723e1b4ad9be4444423b4d41509ce859a629649
1/*
2 *	NET3	IP device support routines.
3 *
4 *		This program is free software; you can redistribute it and/or
5 *		modify it under the terms of the GNU General Public License
6 *		as published by the Free Software Foundation; either version
7 *		2 of the License, or (at your option) any later version.
8 *
9 *	Derived from the IP parts of dev.c 1.0.19
10 * 		Authors:	Ross Biro
11 *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 *				Mark Evans, <evansmp@uhura.aston.ac.uk>
13 *
14 *	Additional Authors:
15 *		Alan Cox, <gw4pts@gw4pts.ampr.org>
16 *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17 *
18 *	Changes:
19 *		Alexey Kuznetsov:	pa_* fields are replaced with ifaddr
20 *					lists.
21 *		Cyrus Durgin:		updated for kmod
22 *		Matthias Andree:	in devinet_ioctl, compare label and
23 *					address (4.4BSD alias style support),
24 *					fall back to comparing just the label
25 *					if no match found.
26 */
27
28
29#include <asm/uaccess.h>
30#include <asm/system.h>
31#include <linux/bitops.h>
32#include <linux/capability.h>
33#include <linux/module.h>
34#include <linux/types.h>
35#include <linux/kernel.h>
36#include <linux/string.h>
37#include <linux/mm.h>
38#include <linux/socket.h>
39#include <linux/sockios.h>
40#include <linux/in.h>
41#include <linux/errno.h>
42#include <linux/interrupt.h>
43#include <linux/if_addr.h>
44#include <linux/if_ether.h>
45#include <linux/inet.h>
46#include <linux/netdevice.h>
47#include <linux/etherdevice.h>
48#include <linux/skbuff.h>
49#include <linux/init.h>
50#include <linux/notifier.h>
51#include <linux/inetdevice.h>
52#include <linux/igmp.h>
53#include <linux/slab.h>
54#ifdef CONFIG_SYSCTL
55#include <linux/sysctl.h>
56#endif
57#include <linux/kmod.h>
58
59#include <net/arp.h>
60#include <net/ip.h>
61#include <net/route.h>
62#include <net/ip_fib.h>
63#include <net/rtnetlink.h>
64#include <net/net_namespace.h>
65
66static struct ipv4_devconf ipv4_devconf = {
67	.data = {
68		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
69		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
70		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
71		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
72	},
73};
74
75static struct ipv4_devconf ipv4_devconf_dflt = {
76	.data = {
77		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
78		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
79		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
80		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
81		[IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
82	},
83};
84
85#define IPV4_DEVCONF_DFLT(net, attr) \
86	IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
87
88static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
89	[IFA_LOCAL]     	= { .type = NLA_U32 },
90	[IFA_ADDRESS]   	= { .type = NLA_U32 },
91	[IFA_BROADCAST] 	= { .type = NLA_U32 },
92	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
93};
94
95static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
96
97static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
98static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
99			 int destroy);
100#ifdef CONFIG_SYSCTL
101static void devinet_sysctl_register(struct in_device *idev);
102static void devinet_sysctl_unregister(struct in_device *idev);
103#else
104static inline void devinet_sysctl_register(struct in_device *idev)
105{
106}
107static inline void devinet_sysctl_unregister(struct in_device *idev)
108{
109}
110#endif
111
112/* Locks all the inet devices. */
113
114static struct in_ifaddr *inet_alloc_ifa(void)
115{
116	return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
117}
118
119static void inet_rcu_free_ifa(struct rcu_head *head)
120{
121	struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
122	if (ifa->ifa_dev)
123		in_dev_put(ifa->ifa_dev);
124	kfree(ifa);
125}
126
127static inline void inet_free_ifa(struct in_ifaddr *ifa)
128{
129	call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
130}
131
132void in_dev_finish_destroy(struct in_device *idev)
133{
134	struct net_device *dev = idev->dev;
135
136	WARN_ON(idev->ifa_list);
137	WARN_ON(idev->mc_list);
138#ifdef NET_REFCNT_DEBUG
139	printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n",
140	       idev, dev ? dev->name : "NIL");
141#endif
142	dev_put(dev);
143	if (!idev->dead)
144		pr_err("Freeing alive in_device %p\n", idev);
145	else
146		kfree(idev);
147}
148EXPORT_SYMBOL(in_dev_finish_destroy);
149
150static struct in_device *inetdev_init(struct net_device *dev)
151{
152	struct in_device *in_dev;
153
154	ASSERT_RTNL();
155
156	in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
157	if (!in_dev)
158		goto out;
159	memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
160			sizeof(in_dev->cnf));
161	in_dev->cnf.sysctl = NULL;
162	in_dev->dev = dev;
163	in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
164	if (!in_dev->arp_parms)
165		goto out_kfree;
166	if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
167		dev_disable_lro(dev);
168	/* Reference in_dev->dev */
169	dev_hold(dev);
170	/* Account for reference dev->ip_ptr (below) */
171	in_dev_hold(in_dev);
172
173	devinet_sysctl_register(in_dev);
174	ip_mc_init_dev(in_dev);
175	if (dev->flags & IFF_UP)
176		ip_mc_up(in_dev);
177
178	/* we can receive as soon as ip_ptr is set -- do this last */
179	rcu_assign_pointer(dev->ip_ptr, in_dev);
180out:
181	return in_dev;
182out_kfree:
183	kfree(in_dev);
184	in_dev = NULL;
185	goto out;
186}
187
188static void in_dev_rcu_put(struct rcu_head *head)
189{
190	struct in_device *idev = container_of(head, struct in_device, rcu_head);
191	in_dev_put(idev);
192}
193
194static void inetdev_destroy(struct in_device *in_dev)
195{
196	struct in_ifaddr *ifa;
197	struct net_device *dev;
198
199	ASSERT_RTNL();
200
201	dev = in_dev->dev;
202
203	in_dev->dead = 1;
204
205	ip_mc_destroy_dev(in_dev);
206
207	while ((ifa = in_dev->ifa_list) != NULL) {
208		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
209		inet_free_ifa(ifa);
210	}
211
212	rcu_assign_pointer(dev->ip_ptr, NULL);
213
214	devinet_sysctl_unregister(in_dev);
215	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
216	arp_ifdown(dev);
217
218	call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
219}
220
221int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
222{
223	rcu_read_lock();
224	for_primary_ifa(in_dev) {
225		if (inet_ifa_match(a, ifa)) {
226			if (!b || inet_ifa_match(b, ifa)) {
227				rcu_read_unlock();
228				return 1;
229			}
230		}
231	} endfor_ifa(in_dev);
232	rcu_read_unlock();
233	return 0;
234}
235
236static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
237			 int destroy, struct nlmsghdr *nlh, u32 pid)
238{
239	struct in_ifaddr *promote = NULL;
240	struct in_ifaddr *ifa, *ifa1 = *ifap;
241	struct in_ifaddr *last_prim = in_dev->ifa_list;
242	struct in_ifaddr *prev_prom = NULL;
243	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
244
245	ASSERT_RTNL();
246
247	/* 1. Deleting primary ifaddr forces deletion all secondaries
248	 * unless alias promotion is set
249	 **/
250
251	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
252		struct in_ifaddr **ifap1 = &ifa1->ifa_next;
253
254		while ((ifa = *ifap1) != NULL) {
255			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
256			    ifa1->ifa_scope <= ifa->ifa_scope)
257				last_prim = ifa;
258
259			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
260			    ifa1->ifa_mask != ifa->ifa_mask ||
261			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
262				ifap1 = &ifa->ifa_next;
263				prev_prom = ifa;
264				continue;
265			}
266
267			if (!do_promote) {
268				*ifap1 = ifa->ifa_next;
269
270				rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid);
271				blocking_notifier_call_chain(&inetaddr_chain,
272						NETDEV_DOWN, ifa);
273				inet_free_ifa(ifa);
274			} else {
275				promote = ifa;
276				break;
277			}
278		}
279	}
280
281	/* 2. Unlink it */
282
283	*ifap = ifa1->ifa_next;
284
285	/* 3. Announce address deletion */
286
287	/* Send message first, then call notifier.
288	   At first sight, FIB update triggered by notifier
289	   will refer to already deleted ifaddr, that could confuse
290	   netlink listeners. It is not true: look, gated sees
291	   that route deleted and if it still thinks that ifaddr
292	   is valid, it will try to restore deleted routes... Grr.
293	   So that, this order is correct.
294	 */
295	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid);
296	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
297
298	if (promote) {
299
300		if (prev_prom) {
301			prev_prom->ifa_next = promote->ifa_next;
302			promote->ifa_next = last_prim->ifa_next;
303			last_prim->ifa_next = promote;
304		}
305
306		promote->ifa_flags &= ~IFA_F_SECONDARY;
307		rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
308		blocking_notifier_call_chain(&inetaddr_chain,
309				NETDEV_UP, promote);
310		for (ifa = promote->ifa_next; ifa; ifa = ifa->ifa_next) {
311			if (ifa1->ifa_mask != ifa->ifa_mask ||
312			    !inet_ifa_match(ifa1->ifa_address, ifa))
313					continue;
314			fib_add_ifaddr(ifa);
315		}
316
317	}
318	if (destroy)
319		inet_free_ifa(ifa1);
320}
321
322static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
323			 int destroy)
324{
325	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
326}
327
328static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
329			     u32 pid)
330{
331	struct in_device *in_dev = ifa->ifa_dev;
332	struct in_ifaddr *ifa1, **ifap, **last_primary;
333
334	ASSERT_RTNL();
335
336	if (!ifa->ifa_local) {
337		inet_free_ifa(ifa);
338		return 0;
339	}
340
341	ifa->ifa_flags &= ~IFA_F_SECONDARY;
342	last_primary = &in_dev->ifa_list;
343
344	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
345	     ifap = &ifa1->ifa_next) {
346		if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
347		    ifa->ifa_scope <= ifa1->ifa_scope)
348			last_primary = &ifa1->ifa_next;
349		if (ifa1->ifa_mask == ifa->ifa_mask &&
350		    inet_ifa_match(ifa1->ifa_address, ifa)) {
351			if (ifa1->ifa_local == ifa->ifa_local) {
352				inet_free_ifa(ifa);
353				return -EEXIST;
354			}
355			if (ifa1->ifa_scope != ifa->ifa_scope) {
356				inet_free_ifa(ifa);
357				return -EINVAL;
358			}
359			ifa->ifa_flags |= IFA_F_SECONDARY;
360		}
361	}
362
363	if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
364		net_srandom(ifa->ifa_local);
365		ifap = last_primary;
366	}
367
368	ifa->ifa_next = *ifap;
369	*ifap = ifa;
370
371	/* Send message first, then call notifier.
372	   Notifier will trigger FIB update, so that
373	   listeners of netlink will know about new ifaddr */
374	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid);
375	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
376
377	return 0;
378}
379
380static int inet_insert_ifa(struct in_ifaddr *ifa)
381{
382	return __inet_insert_ifa(ifa, NULL, 0);
383}
384
385static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
386{
387	struct in_device *in_dev = __in_dev_get_rtnl(dev);
388
389	ASSERT_RTNL();
390
391	if (!in_dev) {
392		inet_free_ifa(ifa);
393		return -ENOBUFS;
394	}
395	ipv4_devconf_setall(in_dev);
396	if (ifa->ifa_dev != in_dev) {
397		WARN_ON(ifa->ifa_dev);
398		in_dev_hold(in_dev);
399		ifa->ifa_dev = in_dev;
400	}
401	if (ipv4_is_loopback(ifa->ifa_local))
402		ifa->ifa_scope = RT_SCOPE_HOST;
403	return inet_insert_ifa(ifa);
404}
405
406/* Caller must hold RCU or RTNL :
407 * We dont take a reference on found in_device
408 */
409struct in_device *inetdev_by_index(struct net *net, int ifindex)
410{
411	struct net_device *dev;
412	struct in_device *in_dev = NULL;
413
414	rcu_read_lock();
415	dev = dev_get_by_index_rcu(net, ifindex);
416	if (dev)
417		in_dev = rcu_dereference_rtnl(dev->ip_ptr);
418	rcu_read_unlock();
419	return in_dev;
420}
421EXPORT_SYMBOL(inetdev_by_index);
422
423/* Called only from RTNL semaphored context. No locks. */
424
425struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
426				    __be32 mask)
427{
428	ASSERT_RTNL();
429
430	for_primary_ifa(in_dev) {
431		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
432			return ifa;
433	} endfor_ifa(in_dev);
434	return NULL;
435}
436
437static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
438{
439	struct net *net = sock_net(skb->sk);
440	struct nlattr *tb[IFA_MAX+1];
441	struct in_device *in_dev;
442	struct ifaddrmsg *ifm;
443	struct in_ifaddr *ifa, **ifap;
444	int err = -EINVAL;
445
446	ASSERT_RTNL();
447
448	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
449	if (err < 0)
450		goto errout;
451
452	ifm = nlmsg_data(nlh);
453	in_dev = inetdev_by_index(net, ifm->ifa_index);
454	if (in_dev == NULL) {
455		err = -ENODEV;
456		goto errout;
457	}
458
459	for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
460	     ifap = &ifa->ifa_next) {
461		if (tb[IFA_LOCAL] &&
462		    ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
463			continue;
464
465		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
466			continue;
467
468		if (tb[IFA_ADDRESS] &&
469		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
470		    !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
471			continue;
472
473		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid);
474		return 0;
475	}
476
477	err = -EADDRNOTAVAIL;
478errout:
479	return err;
480}
481
482static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
483{
484	struct nlattr *tb[IFA_MAX+1];
485	struct in_ifaddr *ifa;
486	struct ifaddrmsg *ifm;
487	struct net_device *dev;
488	struct in_device *in_dev;
489	int err;
490
491	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
492	if (err < 0)
493		goto errout;
494
495	ifm = nlmsg_data(nlh);
496	err = -EINVAL;
497	if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
498		goto errout;
499
500	dev = __dev_get_by_index(net, ifm->ifa_index);
501	err = -ENODEV;
502	if (dev == NULL)
503		goto errout;
504
505	in_dev = __in_dev_get_rtnl(dev);
506	err = -ENOBUFS;
507	if (in_dev == NULL)
508		goto errout;
509
510	ifa = inet_alloc_ifa();
511	if (ifa == NULL)
512		/*
513		 * A potential indev allocation can be left alive, it stays
514		 * assigned to its device and is destroy with it.
515		 */
516		goto errout;
517
518	ipv4_devconf_setall(in_dev);
519	in_dev_hold(in_dev);
520
521	if (tb[IFA_ADDRESS] == NULL)
522		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
523
524	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
525	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
526	ifa->ifa_flags = ifm->ifa_flags;
527	ifa->ifa_scope = ifm->ifa_scope;
528	ifa->ifa_dev = in_dev;
529
530	ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
531	ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
532
533	if (tb[IFA_BROADCAST])
534		ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
535
536	if (tb[IFA_LABEL])
537		nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
538	else
539		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
540
541	return ifa;
542
543errout:
544	return ERR_PTR(err);
545}
546
547static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
548{
549	struct net *net = sock_net(skb->sk);
550	struct in_ifaddr *ifa;
551
552	ASSERT_RTNL();
553
554	ifa = rtm_to_ifaddr(net, nlh);
555	if (IS_ERR(ifa))
556		return PTR_ERR(ifa);
557
558	return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid);
559}
560
561/*
562 *	Determine a default network mask, based on the IP address.
563 */
564
565static inline int inet_abc_len(__be32 addr)
566{
567	int rc = -1;	/* Something else, probably a multicast. */
568
569	if (ipv4_is_zeronet(addr))
570		rc = 0;
571	else {
572		__u32 haddr = ntohl(addr);
573
574		if (IN_CLASSA(haddr))
575			rc = 8;
576		else if (IN_CLASSB(haddr))
577			rc = 16;
578		else if (IN_CLASSC(haddr))
579			rc = 24;
580	}
581
582	return rc;
583}
584
585
586int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
587{
588	struct ifreq ifr;
589	struct sockaddr_in sin_orig;
590	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
591	struct in_device *in_dev;
592	struct in_ifaddr **ifap = NULL;
593	struct in_ifaddr *ifa = NULL;
594	struct net_device *dev;
595	char *colon;
596	int ret = -EFAULT;
597	int tryaddrmatch = 0;
598
599	/*
600	 *	Fetch the caller's info block into kernel space
601	 */
602
603	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
604		goto out;
605	ifr.ifr_name[IFNAMSIZ - 1] = 0;
606
607	/* save original address for comparison */
608	memcpy(&sin_orig, sin, sizeof(*sin));
609
610	colon = strchr(ifr.ifr_name, ':');
611	if (colon)
612		*colon = 0;
613
614	dev_load(net, ifr.ifr_name);
615
616	switch (cmd) {
617	case SIOCGIFADDR:	/* Get interface address */
618	case SIOCGIFBRDADDR:	/* Get the broadcast address */
619	case SIOCGIFDSTADDR:	/* Get the destination address */
620	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
621		/* Note that these ioctls will not sleep,
622		   so that we do not impose a lock.
623		   One day we will be forced to put shlock here (I mean SMP)
624		 */
625		tryaddrmatch = (sin_orig.sin_family == AF_INET);
626		memset(sin, 0, sizeof(*sin));
627		sin->sin_family = AF_INET;
628		break;
629
630	case SIOCSIFFLAGS:
631		ret = -EACCES;
632		if (!capable(CAP_NET_ADMIN))
633			goto out;
634		break;
635	case SIOCSIFADDR:	/* Set interface address (and family) */
636	case SIOCSIFBRDADDR:	/* Set the broadcast address */
637	case SIOCSIFDSTADDR:	/* Set the destination address */
638	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
639		ret = -EACCES;
640		if (!capable(CAP_NET_ADMIN))
641			goto out;
642		ret = -EINVAL;
643		if (sin->sin_family != AF_INET)
644			goto out;
645		break;
646	default:
647		ret = -EINVAL;
648		goto out;
649	}
650
651	rtnl_lock();
652
653	ret = -ENODEV;
654	dev = __dev_get_by_name(net, ifr.ifr_name);
655	if (!dev)
656		goto done;
657
658	if (colon)
659		*colon = ':';
660
661	in_dev = __in_dev_get_rtnl(dev);
662	if (in_dev) {
663		if (tryaddrmatch) {
664			/* Matthias Andree */
665			/* compare label and address (4.4BSD style) */
666			/* note: we only do this for a limited set of ioctls
667			   and only if the original address family was AF_INET.
668			   This is checked above. */
669			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
670			     ifap = &ifa->ifa_next) {
671				if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
672				    sin_orig.sin_addr.s_addr ==
673							ifa->ifa_address) {
674					break; /* found */
675				}
676			}
677		}
678		/* we didn't get a match, maybe the application is
679		   4.3BSD-style and passed in junk so we fall back to
680		   comparing just the label */
681		if (!ifa) {
682			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
683			     ifap = &ifa->ifa_next)
684				if (!strcmp(ifr.ifr_name, ifa->ifa_label))
685					break;
686		}
687	}
688
689	ret = -EADDRNOTAVAIL;
690	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
691		goto done;
692
693	switch (cmd) {
694	case SIOCGIFADDR:	/* Get interface address */
695		sin->sin_addr.s_addr = ifa->ifa_local;
696		goto rarok;
697
698	case SIOCGIFBRDADDR:	/* Get the broadcast address */
699		sin->sin_addr.s_addr = ifa->ifa_broadcast;
700		goto rarok;
701
702	case SIOCGIFDSTADDR:	/* Get the destination address */
703		sin->sin_addr.s_addr = ifa->ifa_address;
704		goto rarok;
705
706	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
707		sin->sin_addr.s_addr = ifa->ifa_mask;
708		goto rarok;
709
710	case SIOCSIFFLAGS:
711		if (colon) {
712			ret = -EADDRNOTAVAIL;
713			if (!ifa)
714				break;
715			ret = 0;
716			if (!(ifr.ifr_flags & IFF_UP))
717				inet_del_ifa(in_dev, ifap, 1);
718			break;
719		}
720		ret = dev_change_flags(dev, ifr.ifr_flags);
721		break;
722
723	case SIOCSIFADDR:	/* Set interface address (and family) */
724		ret = -EINVAL;
725		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
726			break;
727
728		if (!ifa) {
729			ret = -ENOBUFS;
730			ifa = inet_alloc_ifa();
731			if (!ifa)
732				break;
733			if (colon)
734				memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
735			else
736				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
737		} else {
738			ret = 0;
739			if (ifa->ifa_local == sin->sin_addr.s_addr)
740				break;
741			inet_del_ifa(in_dev, ifap, 0);
742			ifa->ifa_broadcast = 0;
743			ifa->ifa_scope = 0;
744		}
745
746		ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
747
748		if (!(dev->flags & IFF_POINTOPOINT)) {
749			ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
750			ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
751			if ((dev->flags & IFF_BROADCAST) &&
752			    ifa->ifa_prefixlen < 31)
753				ifa->ifa_broadcast = ifa->ifa_address |
754						     ~ifa->ifa_mask;
755		} else {
756			ifa->ifa_prefixlen = 32;
757			ifa->ifa_mask = inet_make_mask(32);
758		}
759		ret = inet_set_ifa(dev, ifa);
760		break;
761
762	case SIOCSIFBRDADDR:	/* Set the broadcast address */
763		ret = 0;
764		if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
765			inet_del_ifa(in_dev, ifap, 0);
766			ifa->ifa_broadcast = sin->sin_addr.s_addr;
767			inet_insert_ifa(ifa);
768		}
769		break;
770
771	case SIOCSIFDSTADDR:	/* Set the destination address */
772		ret = 0;
773		if (ifa->ifa_address == sin->sin_addr.s_addr)
774			break;
775		ret = -EINVAL;
776		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
777			break;
778		ret = 0;
779		inet_del_ifa(in_dev, ifap, 0);
780		ifa->ifa_address = sin->sin_addr.s_addr;
781		inet_insert_ifa(ifa);
782		break;
783
784	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
785
786		/*
787		 *	The mask we set must be legal.
788		 */
789		ret = -EINVAL;
790		if (bad_mask(sin->sin_addr.s_addr, 0))
791			break;
792		ret = 0;
793		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
794			__be32 old_mask = ifa->ifa_mask;
795			inet_del_ifa(in_dev, ifap, 0);
796			ifa->ifa_mask = sin->sin_addr.s_addr;
797			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
798
799			/* See if current broadcast address matches
800			 * with current netmask, then recalculate
801			 * the broadcast address. Otherwise it's a
802			 * funny address, so don't touch it since
803			 * the user seems to know what (s)he's doing...
804			 */
805			if ((dev->flags & IFF_BROADCAST) &&
806			    (ifa->ifa_prefixlen < 31) &&
807			    (ifa->ifa_broadcast ==
808			     (ifa->ifa_local|~old_mask))) {
809				ifa->ifa_broadcast = (ifa->ifa_local |
810						      ~sin->sin_addr.s_addr);
811			}
812			inet_insert_ifa(ifa);
813		}
814		break;
815	}
816done:
817	rtnl_unlock();
818out:
819	return ret;
820rarok:
821	rtnl_unlock();
822	ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
823	goto out;
824}
825
826static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
827{
828	struct in_device *in_dev = __in_dev_get_rtnl(dev);
829	struct in_ifaddr *ifa;
830	struct ifreq ifr;
831	int done = 0;
832
833	if (!in_dev)
834		goto out;
835
836	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
837		if (!buf) {
838			done += sizeof(ifr);
839			continue;
840		}
841		if (len < (int) sizeof(ifr))
842			break;
843		memset(&ifr, 0, sizeof(struct ifreq));
844		if (ifa->ifa_label)
845			strcpy(ifr.ifr_name, ifa->ifa_label);
846		else
847			strcpy(ifr.ifr_name, dev->name);
848
849		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
850		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
851								ifa->ifa_local;
852
853		if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
854			done = -EFAULT;
855			break;
856		}
857		buf  += sizeof(struct ifreq);
858		len  -= sizeof(struct ifreq);
859		done += sizeof(struct ifreq);
860	}
861out:
862	return done;
863}
864
865__be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
866{
867	__be32 addr = 0;
868	struct in_device *in_dev;
869	struct net *net = dev_net(dev);
870
871	rcu_read_lock();
872	in_dev = __in_dev_get_rcu(dev);
873	if (!in_dev)
874		goto no_in_dev;
875
876	for_primary_ifa(in_dev) {
877		if (ifa->ifa_scope > scope)
878			continue;
879		if (!dst || inet_ifa_match(dst, ifa)) {
880			addr = ifa->ifa_local;
881			break;
882		}
883		if (!addr)
884			addr = ifa->ifa_local;
885	} endfor_ifa(in_dev);
886
887	if (addr)
888		goto out_unlock;
889no_in_dev:
890
891	/* Not loopback addresses on loopback should be preferred
892	   in this case. It is importnat that lo is the first interface
893	   in dev_base list.
894	 */
895	for_each_netdev_rcu(net, dev) {
896		in_dev = __in_dev_get_rcu(dev);
897		if (!in_dev)
898			continue;
899
900		for_primary_ifa(in_dev) {
901			if (ifa->ifa_scope != RT_SCOPE_LINK &&
902			    ifa->ifa_scope <= scope) {
903				addr = ifa->ifa_local;
904				goto out_unlock;
905			}
906		} endfor_ifa(in_dev);
907	}
908out_unlock:
909	rcu_read_unlock();
910	return addr;
911}
912EXPORT_SYMBOL(inet_select_addr);
913
914static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
915			      __be32 local, int scope)
916{
917	int same = 0;
918	__be32 addr = 0;
919
920	for_ifa(in_dev) {
921		if (!addr &&
922		    (local == ifa->ifa_local || !local) &&
923		    ifa->ifa_scope <= scope) {
924			addr = ifa->ifa_local;
925			if (same)
926				break;
927		}
928		if (!same) {
929			same = (!local || inet_ifa_match(local, ifa)) &&
930				(!dst || inet_ifa_match(dst, ifa));
931			if (same && addr) {
932				if (local || !dst)
933					break;
934				/* Is the selected addr into dst subnet? */
935				if (inet_ifa_match(addr, ifa))
936					break;
937				/* No, then can we use new local src? */
938				if (ifa->ifa_scope <= scope) {
939					addr = ifa->ifa_local;
940					break;
941				}
942				/* search for large dst subnet for addr */
943				same = 0;
944			}
945		}
946	} endfor_ifa(in_dev);
947
948	return same ? addr : 0;
949}
950
951/*
952 * Confirm that local IP address exists using wildcards:
953 * - in_dev: only on this interface, 0=any interface
954 * - dst: only in the same subnet as dst, 0=any dst
955 * - local: address, 0=autoselect the local address
956 * - scope: maximum allowed scope value for the local address
957 */
958__be32 inet_confirm_addr(struct in_device *in_dev,
959			 __be32 dst, __be32 local, int scope)
960{
961	__be32 addr = 0;
962	struct net_device *dev;
963	struct net *net;
964
965	if (scope != RT_SCOPE_LINK)
966		return confirm_addr_indev(in_dev, dst, local, scope);
967
968	net = dev_net(in_dev->dev);
969	rcu_read_lock();
970	for_each_netdev_rcu(net, dev) {
971		in_dev = __in_dev_get_rcu(dev);
972		if (in_dev) {
973			addr = confirm_addr_indev(in_dev, dst, local, scope);
974			if (addr)
975				break;
976		}
977	}
978	rcu_read_unlock();
979
980	return addr;
981}
982
983/*
984 *	Device notifier
985 */
986
987int register_inetaddr_notifier(struct notifier_block *nb)
988{
989	return blocking_notifier_chain_register(&inetaddr_chain, nb);
990}
991EXPORT_SYMBOL(register_inetaddr_notifier);
992
993int unregister_inetaddr_notifier(struct notifier_block *nb)
994{
995	return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
996}
997EXPORT_SYMBOL(unregister_inetaddr_notifier);
998
999/* Rename ifa_labels for a device name change. Make some effort to preserve
1000 * existing alias numbering and to create unique labels if possible.
1001*/
1002static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1003{
1004	struct in_ifaddr *ifa;
1005	int named = 0;
1006
1007	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1008		char old[IFNAMSIZ], *dot;
1009
1010		memcpy(old, ifa->ifa_label, IFNAMSIZ);
1011		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1012		if (named++ == 0)
1013			goto skip;
1014		dot = strchr(old, ':');
1015		if (dot == NULL) {
1016			sprintf(old, ":%d", named);
1017			dot = old;
1018		}
1019		if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1020			strcat(ifa->ifa_label, dot);
1021		else
1022			strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1023skip:
1024		rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1025	}
1026}
1027
1028static inline bool inetdev_valid_mtu(unsigned mtu)
1029{
1030	return mtu >= 68;
1031}
1032
1033/* Called only under RTNL semaphore */
1034
1035static int inetdev_event(struct notifier_block *this, unsigned long event,
1036			 void *ptr)
1037{
1038	struct net_device *dev = ptr;
1039	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1040
1041	ASSERT_RTNL();
1042
1043	if (!in_dev) {
1044		if (event == NETDEV_REGISTER) {
1045			in_dev = inetdev_init(dev);
1046			if (!in_dev)
1047				return notifier_from_errno(-ENOMEM);
1048			if (dev->flags & IFF_LOOPBACK) {
1049				IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1050				IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1051			}
1052		} else if (event == NETDEV_CHANGEMTU) {
1053			/* Re-enabling IP */
1054			if (inetdev_valid_mtu(dev->mtu))
1055				in_dev = inetdev_init(dev);
1056		}
1057		goto out;
1058	}
1059
1060	switch (event) {
1061	case NETDEV_REGISTER:
1062		printk(KERN_DEBUG "inetdev_event: bug\n");
1063		rcu_assign_pointer(dev->ip_ptr, NULL);
1064		break;
1065	case NETDEV_UP:
1066		if (!inetdev_valid_mtu(dev->mtu))
1067			break;
1068		if (dev->flags & IFF_LOOPBACK) {
1069			struct in_ifaddr *ifa = inet_alloc_ifa();
1070
1071			if (ifa) {
1072				ifa->ifa_local =
1073				  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1074				ifa->ifa_prefixlen = 8;
1075				ifa->ifa_mask = inet_make_mask(8);
1076				in_dev_hold(in_dev);
1077				ifa->ifa_dev = in_dev;
1078				ifa->ifa_scope = RT_SCOPE_HOST;
1079				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1080				inet_insert_ifa(ifa);
1081			}
1082		}
1083		ip_mc_up(in_dev);
1084		/* fall through */
1085	case NETDEV_NOTIFY_PEERS:
1086	case NETDEV_CHANGEADDR:
1087		/* Send gratuitous ARP to notify of link change */
1088		if (IN_DEV_ARP_NOTIFY(in_dev)) {
1089			struct in_ifaddr *ifa = in_dev->ifa_list;
1090
1091			if (ifa)
1092				arp_send(ARPOP_REQUEST, ETH_P_ARP,
1093					 ifa->ifa_address, dev,
1094					 ifa->ifa_address, NULL,
1095					 dev->dev_addr, NULL);
1096		}
1097		break;
1098	case NETDEV_DOWN:
1099		ip_mc_down(in_dev);
1100		break;
1101	case NETDEV_PRE_TYPE_CHANGE:
1102		ip_mc_unmap(in_dev);
1103		break;
1104	case NETDEV_POST_TYPE_CHANGE:
1105		ip_mc_remap(in_dev);
1106		break;
1107	case NETDEV_CHANGEMTU:
1108		if (inetdev_valid_mtu(dev->mtu))
1109			break;
1110		/* disable IP when MTU is not enough */
1111	case NETDEV_UNREGISTER:
1112		inetdev_destroy(in_dev);
1113		break;
1114	case NETDEV_CHANGENAME:
1115		/* Do not notify about label change, this event is
1116		 * not interesting to applications using netlink.
1117		 */
1118		inetdev_changename(dev, in_dev);
1119
1120		devinet_sysctl_unregister(in_dev);
1121		devinet_sysctl_register(in_dev);
1122		break;
1123	}
1124out:
1125	return NOTIFY_DONE;
1126}
1127
1128static struct notifier_block ip_netdev_notifier = {
1129	.notifier_call = inetdev_event,
1130};
1131
1132static inline size_t inet_nlmsg_size(void)
1133{
1134	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1135	       + nla_total_size(4) /* IFA_ADDRESS */
1136	       + nla_total_size(4) /* IFA_LOCAL */
1137	       + nla_total_size(4) /* IFA_BROADCAST */
1138	       + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1139}
1140
1141static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1142			    u32 pid, u32 seq, int event, unsigned int flags)
1143{
1144	struct ifaddrmsg *ifm;
1145	struct nlmsghdr  *nlh;
1146
1147	nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags);
1148	if (nlh == NULL)
1149		return -EMSGSIZE;
1150
1151	ifm = nlmsg_data(nlh);
1152	ifm->ifa_family = AF_INET;
1153	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1154	ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
1155	ifm->ifa_scope = ifa->ifa_scope;
1156	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1157
1158	if (ifa->ifa_address)
1159		NLA_PUT_BE32(skb, IFA_ADDRESS, ifa->ifa_address);
1160
1161	if (ifa->ifa_local)
1162		NLA_PUT_BE32(skb, IFA_LOCAL, ifa->ifa_local);
1163
1164	if (ifa->ifa_broadcast)
1165		NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast);
1166
1167	if (ifa->ifa_label[0])
1168		NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label);
1169
1170	return nlmsg_end(skb, nlh);
1171
1172nla_put_failure:
1173	nlmsg_cancel(skb, nlh);
1174	return -EMSGSIZE;
1175}
1176
1177static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1178{
1179	struct net *net = sock_net(skb->sk);
1180	int h, s_h;
1181	int idx, s_idx;
1182	int ip_idx, s_ip_idx;
1183	struct net_device *dev;
1184	struct in_device *in_dev;
1185	struct in_ifaddr *ifa;
1186	struct hlist_head *head;
1187	struct hlist_node *node;
1188
1189	s_h = cb->args[0];
1190	s_idx = idx = cb->args[1];
1191	s_ip_idx = ip_idx = cb->args[2];
1192
1193	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1194		idx = 0;
1195		head = &net->dev_index_head[h];
1196		rcu_read_lock();
1197		hlist_for_each_entry_rcu(dev, node, head, index_hlist) {
1198			if (idx < s_idx)
1199				goto cont;
1200			if (h > s_h || idx > s_idx)
1201				s_ip_idx = 0;
1202			in_dev = __in_dev_get_rcu(dev);
1203			if (!in_dev)
1204				goto cont;
1205
1206			for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1207			     ifa = ifa->ifa_next, ip_idx++) {
1208				if (ip_idx < s_ip_idx)
1209					continue;
1210				if (inet_fill_ifaddr(skb, ifa,
1211					     NETLINK_CB(cb->skb).pid,
1212					     cb->nlh->nlmsg_seq,
1213					     RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1214					rcu_read_unlock();
1215					goto done;
1216				}
1217			}
1218cont:
1219			idx++;
1220		}
1221		rcu_read_unlock();
1222	}
1223
1224done:
1225	cb->args[0] = h;
1226	cb->args[1] = idx;
1227	cb->args[2] = ip_idx;
1228
1229	return skb->len;
1230}
1231
1232static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1233		      u32 pid)
1234{
1235	struct sk_buff *skb;
1236	u32 seq = nlh ? nlh->nlmsg_seq : 0;
1237	int err = -ENOBUFS;
1238	struct net *net;
1239
1240	net = dev_net(ifa->ifa_dev->dev);
1241	skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1242	if (skb == NULL)
1243		goto errout;
1244
1245	err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0);
1246	if (err < 0) {
1247		/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1248		WARN_ON(err == -EMSGSIZE);
1249		kfree_skb(skb);
1250		goto errout;
1251	}
1252	rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1253	return;
1254errout:
1255	if (err < 0)
1256		rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1257}
1258
1259#ifdef CONFIG_SYSCTL
1260
1261static void devinet_copy_dflt_conf(struct net *net, int i)
1262{
1263	struct net_device *dev;
1264
1265	rcu_read_lock();
1266	for_each_netdev_rcu(net, dev) {
1267		struct in_device *in_dev;
1268
1269		in_dev = __in_dev_get_rcu(dev);
1270		if (in_dev && !test_bit(i, in_dev->cnf.state))
1271			in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1272	}
1273	rcu_read_unlock();
1274}
1275
1276/* called with RTNL locked */
1277static void inet_forward_change(struct net *net)
1278{
1279	struct net_device *dev;
1280	int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1281
1282	IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1283	IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1284
1285	for_each_netdev(net, dev) {
1286		struct in_device *in_dev;
1287		if (on)
1288			dev_disable_lro(dev);
1289		rcu_read_lock();
1290		in_dev = __in_dev_get_rcu(dev);
1291		if (in_dev)
1292			IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1293		rcu_read_unlock();
1294	}
1295}
1296
1297static int devinet_conf_proc(ctl_table *ctl, int write,
1298			     void __user *buffer,
1299			     size_t *lenp, loff_t *ppos)
1300{
1301	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1302
1303	if (write) {
1304		struct ipv4_devconf *cnf = ctl->extra1;
1305		struct net *net = ctl->extra2;
1306		int i = (int *)ctl->data - cnf->data;
1307
1308		set_bit(i, cnf->state);
1309
1310		if (cnf == net->ipv4.devconf_dflt)
1311			devinet_copy_dflt_conf(net, i);
1312	}
1313
1314	return ret;
1315}
1316
1317static int devinet_sysctl_forward(ctl_table *ctl, int write,
1318				  void __user *buffer,
1319				  size_t *lenp, loff_t *ppos)
1320{
1321	int *valp = ctl->data;
1322	int val = *valp;
1323	loff_t pos = *ppos;
1324	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1325
1326	if (write && *valp != val) {
1327		struct net *net = ctl->extra2;
1328
1329		if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
1330			if (!rtnl_trylock()) {
1331				/* Restore the original values before restarting */
1332				*valp = val;
1333				*ppos = pos;
1334				return restart_syscall();
1335			}
1336			if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
1337				inet_forward_change(net);
1338			} else if (*valp) {
1339				struct ipv4_devconf *cnf = ctl->extra1;
1340				struct in_device *idev =
1341					container_of(cnf, struct in_device, cnf);
1342				dev_disable_lro(idev->dev);
1343			}
1344			rtnl_unlock();
1345			rt_cache_flush(net, 0);
1346		}
1347	}
1348
1349	return ret;
1350}
1351
1352int ipv4_doint_and_flush(ctl_table *ctl, int write,
1353			 void __user *buffer,
1354			 size_t *lenp, loff_t *ppos)
1355{
1356	int *valp = ctl->data;
1357	int val = *valp;
1358	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1359	struct net *net = ctl->extra2;
1360
1361	if (write && *valp != val)
1362		rt_cache_flush(net, 0);
1363
1364	return ret;
1365}
1366
1367#define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
1368	{ \
1369		.procname	= name, \
1370		.data		= ipv4_devconf.data + \
1371				  IPV4_DEVCONF_ ## attr - 1, \
1372		.maxlen		= sizeof(int), \
1373		.mode		= mval, \
1374		.proc_handler	= proc, \
1375		.extra1		= &ipv4_devconf, \
1376	}
1377
1378#define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
1379	DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
1380
1381#define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
1382	DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
1383
1384#define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
1385	DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
1386
1387#define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
1388	DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
1389
1390static struct devinet_sysctl_table {
1391	struct ctl_table_header *sysctl_header;
1392	struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
1393	char *dev_name;
1394} devinet_sysctl = {
1395	.devinet_vars = {
1396		DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
1397					     devinet_sysctl_forward),
1398		DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
1399
1400		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
1401		DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
1402		DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
1403		DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
1404		DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
1405		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
1406					"accept_source_route"),
1407		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
1408		DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
1409		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
1410		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
1411		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
1412		DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
1413		DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
1414		DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
1415		DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
1416		DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
1417		DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
1418		DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
1419		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
1420
1421		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
1422		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
1423		DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
1424					      "force_igmp_version"),
1425		DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
1426					      "promote_secondaries"),
1427	},
1428};
1429
1430static int __devinet_sysctl_register(struct net *net, char *dev_name,
1431					struct ipv4_devconf *p)
1432{
1433	int i;
1434	struct devinet_sysctl_table *t;
1435
1436#define DEVINET_CTL_PATH_DEV	3
1437
1438	struct ctl_path devinet_ctl_path[] = {
1439		{ .procname = "net",  },
1440		{ .procname = "ipv4", },
1441		{ .procname = "conf", },
1442		{ /* to be set */ },
1443		{ },
1444	};
1445
1446	t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
1447	if (!t)
1448		goto out;
1449
1450	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
1451		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
1452		t->devinet_vars[i].extra1 = p;
1453		t->devinet_vars[i].extra2 = net;
1454	}
1455
1456	/*
1457	 * Make a copy of dev_name, because '.procname' is regarded as const
1458	 * by sysctl and we wouldn't want anyone to change it under our feet
1459	 * (see SIOCSIFNAME).
1460	 */
1461	t->dev_name = kstrdup(dev_name, GFP_KERNEL);
1462	if (!t->dev_name)
1463		goto free;
1464
1465	devinet_ctl_path[DEVINET_CTL_PATH_DEV].procname = t->dev_name;
1466
1467	t->sysctl_header = register_net_sysctl_table(net, devinet_ctl_path,
1468			t->devinet_vars);
1469	if (!t->sysctl_header)
1470		goto free_procname;
1471
1472	p->sysctl = t;
1473	return 0;
1474
1475free_procname:
1476	kfree(t->dev_name);
1477free:
1478	kfree(t);
1479out:
1480	return -ENOBUFS;
1481}
1482
1483static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
1484{
1485	struct devinet_sysctl_table *t = cnf->sysctl;
1486
1487	if (t == NULL)
1488		return;
1489
1490	cnf->sysctl = NULL;
1491	unregister_sysctl_table(t->sysctl_header);
1492	kfree(t->dev_name);
1493	kfree(t);
1494}
1495
1496static void devinet_sysctl_register(struct in_device *idev)
1497{
1498	neigh_sysctl_register(idev->dev, idev->arp_parms, "ipv4", NULL);
1499	__devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
1500					&idev->cnf);
1501}
1502
1503static void devinet_sysctl_unregister(struct in_device *idev)
1504{
1505	__devinet_sysctl_unregister(&idev->cnf);
1506	neigh_sysctl_unregister(idev->arp_parms);
1507}
1508
1509static struct ctl_table ctl_forward_entry[] = {
1510	{
1511		.procname	= "ip_forward",
1512		.data		= &ipv4_devconf.data[
1513					IPV4_DEVCONF_FORWARDING - 1],
1514		.maxlen		= sizeof(int),
1515		.mode		= 0644,
1516		.proc_handler	= devinet_sysctl_forward,
1517		.extra1		= &ipv4_devconf,
1518		.extra2		= &init_net,
1519	},
1520	{ },
1521};
1522
1523static __net_initdata struct ctl_path net_ipv4_path[] = {
1524	{ .procname = "net", },
1525	{ .procname = "ipv4", },
1526	{ },
1527};
1528#endif
1529
1530static __net_init int devinet_init_net(struct net *net)
1531{
1532	int err;
1533	struct ipv4_devconf *all, *dflt;
1534#ifdef CONFIG_SYSCTL
1535	struct ctl_table *tbl = ctl_forward_entry;
1536	struct ctl_table_header *forw_hdr;
1537#endif
1538
1539	err = -ENOMEM;
1540	all = &ipv4_devconf;
1541	dflt = &ipv4_devconf_dflt;
1542
1543	if (!net_eq(net, &init_net)) {
1544		all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
1545		if (all == NULL)
1546			goto err_alloc_all;
1547
1548		dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
1549		if (dflt == NULL)
1550			goto err_alloc_dflt;
1551
1552#ifdef CONFIG_SYSCTL
1553		tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
1554		if (tbl == NULL)
1555			goto err_alloc_ctl;
1556
1557		tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
1558		tbl[0].extra1 = all;
1559		tbl[0].extra2 = net;
1560#endif
1561	}
1562
1563#ifdef CONFIG_SYSCTL
1564	err = __devinet_sysctl_register(net, "all", all);
1565	if (err < 0)
1566		goto err_reg_all;
1567
1568	err = __devinet_sysctl_register(net, "default", dflt);
1569	if (err < 0)
1570		goto err_reg_dflt;
1571
1572	err = -ENOMEM;
1573	forw_hdr = register_net_sysctl_table(net, net_ipv4_path, tbl);
1574	if (forw_hdr == NULL)
1575		goto err_reg_ctl;
1576	net->ipv4.forw_hdr = forw_hdr;
1577#endif
1578
1579	net->ipv4.devconf_all = all;
1580	net->ipv4.devconf_dflt = dflt;
1581	return 0;
1582
1583#ifdef CONFIG_SYSCTL
1584err_reg_ctl:
1585	__devinet_sysctl_unregister(dflt);
1586err_reg_dflt:
1587	__devinet_sysctl_unregister(all);
1588err_reg_all:
1589	if (tbl != ctl_forward_entry)
1590		kfree(tbl);
1591err_alloc_ctl:
1592#endif
1593	if (dflt != &ipv4_devconf_dflt)
1594		kfree(dflt);
1595err_alloc_dflt:
1596	if (all != &ipv4_devconf)
1597		kfree(all);
1598err_alloc_all:
1599	return err;
1600}
1601
1602static __net_exit void devinet_exit_net(struct net *net)
1603{
1604#ifdef CONFIG_SYSCTL
1605	struct ctl_table *tbl;
1606
1607	tbl = net->ipv4.forw_hdr->ctl_table_arg;
1608	unregister_net_sysctl_table(net->ipv4.forw_hdr);
1609	__devinet_sysctl_unregister(net->ipv4.devconf_dflt);
1610	__devinet_sysctl_unregister(net->ipv4.devconf_all);
1611	kfree(tbl);
1612#endif
1613	kfree(net->ipv4.devconf_dflt);
1614	kfree(net->ipv4.devconf_all);
1615}
1616
1617static __net_initdata struct pernet_operations devinet_ops = {
1618	.init = devinet_init_net,
1619	.exit = devinet_exit_net,
1620};
1621
1622void __init devinet_init(void)
1623{
1624	register_pernet_subsys(&devinet_ops);
1625
1626	register_gifconf(PF_INET, inet_gifconf);
1627	register_netdevice_notifier(&ip_netdev_notifier);
1628
1629	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL);
1630	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL);
1631	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr);
1632}
1633
1634