devinet.c revision 88af182e389097997c5e2a0b42285b3522796759
1/*
2 *	NET3	IP device support routines.
3 *
4 *		This program is free software; you can redistribute it and/or
5 *		modify it under the terms of the GNU General Public License
6 *		as published by the Free Software Foundation; either version
7 *		2 of the License, or (at your option) any later version.
8 *
9 *	Derived from the IP parts of dev.c 1.0.19
10 * 		Authors:	Ross Biro
11 *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 *				Mark Evans, <evansmp@uhura.aston.ac.uk>
13 *
14 *	Additional Authors:
15 *		Alan Cox, <gw4pts@gw4pts.ampr.org>
16 *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17 *
18 *	Changes:
19 *		Alexey Kuznetsov:	pa_* fields are replaced with ifaddr
20 *					lists.
21 *		Cyrus Durgin:		updated for kmod
22 *		Matthias Andree:	in devinet_ioctl, compare label and
23 *					address (4.4BSD alias style support),
24 *					fall back to comparing just the label
25 *					if no match found.
26 */
27
28
29#include <asm/uaccess.h>
30#include <asm/system.h>
31#include <linux/bitops.h>
32#include <linux/capability.h>
33#include <linux/module.h>
34#include <linux/types.h>
35#include <linux/kernel.h>
36#include <linux/string.h>
37#include <linux/mm.h>
38#include <linux/socket.h>
39#include <linux/sockios.h>
40#include <linux/in.h>
41#include <linux/errno.h>
42#include <linux/interrupt.h>
43#include <linux/if_addr.h>
44#include <linux/if_ether.h>
45#include <linux/inet.h>
46#include <linux/netdevice.h>
47#include <linux/etherdevice.h>
48#include <linux/skbuff.h>
49#include <linux/init.h>
50#include <linux/notifier.h>
51#include <linux/inetdevice.h>
52#include <linux/igmp.h>
53#ifdef CONFIG_SYSCTL
54#include <linux/sysctl.h>
55#endif
56#include <linux/kmod.h>
57
58#include <net/arp.h>
59#include <net/ip.h>
60#include <net/route.h>
61#include <net/ip_fib.h>
62#include <net/rtnetlink.h>
63#include <net/net_namespace.h>
64
65static struct ipv4_devconf ipv4_devconf = {
66	.data = {
67		[NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1,
68		[NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1,
69		[NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1,
70		[NET_IPV4_CONF_SHARED_MEDIA - 1] = 1,
71	},
72};
73
74static struct ipv4_devconf ipv4_devconf_dflt = {
75	.data = {
76		[NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1,
77		[NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1,
78		[NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1,
79		[NET_IPV4_CONF_SHARED_MEDIA - 1] = 1,
80		[NET_IPV4_CONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
81	},
82};
83
84#define IPV4_DEVCONF_DFLT(net, attr) \
85	IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
86
87static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
88	[IFA_LOCAL]     	= { .type = NLA_U32 },
89	[IFA_ADDRESS]   	= { .type = NLA_U32 },
90	[IFA_BROADCAST] 	= { .type = NLA_U32 },
91	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
92};
93
94static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
95
96static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
97static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
98			 int destroy);
99#ifdef CONFIG_SYSCTL
100static void devinet_sysctl_register(struct in_device *idev);
101static void devinet_sysctl_unregister(struct in_device *idev);
102#else
103static inline void devinet_sysctl_register(struct in_device *idev)
104{
105}
106static inline void devinet_sysctl_unregister(struct in_device *idev)
107{
108}
109#endif
110
111/* Locks all the inet devices. */
112
113static struct in_ifaddr *inet_alloc_ifa(void)
114{
115	return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
116}
117
118static void inet_rcu_free_ifa(struct rcu_head *head)
119{
120	struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
121	if (ifa->ifa_dev)
122		in_dev_put(ifa->ifa_dev);
123	kfree(ifa);
124}
125
126static inline void inet_free_ifa(struct in_ifaddr *ifa)
127{
128	call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
129}
130
131void in_dev_finish_destroy(struct in_device *idev)
132{
133	struct net_device *dev = idev->dev;
134
135	WARN_ON(idev->ifa_list);
136	WARN_ON(idev->mc_list);
137#ifdef NET_REFCNT_DEBUG
138	printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n",
139	       idev, dev ? dev->name : "NIL");
140#endif
141	dev_put(dev);
142	if (!idev->dead)
143		pr_err("Freeing alive in_device %p\n", idev);
144	else
145		kfree(idev);
146}
147EXPORT_SYMBOL(in_dev_finish_destroy);
148
149static struct in_device *inetdev_init(struct net_device *dev)
150{
151	struct in_device *in_dev;
152
153	ASSERT_RTNL();
154
155	in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
156	if (!in_dev)
157		goto out;
158	memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
159			sizeof(in_dev->cnf));
160	in_dev->cnf.sysctl = NULL;
161	in_dev->dev = dev;
162	in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
163	if (!in_dev->arp_parms)
164		goto out_kfree;
165	if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
166		dev_disable_lro(dev);
167	/* Reference in_dev->dev */
168	dev_hold(dev);
169	/* Account for reference dev->ip_ptr (below) */
170	in_dev_hold(in_dev);
171
172	devinet_sysctl_register(in_dev);
173	ip_mc_init_dev(in_dev);
174	if (dev->flags & IFF_UP)
175		ip_mc_up(in_dev);
176
177	/* we can receive as soon as ip_ptr is set -- do this last */
178	rcu_assign_pointer(dev->ip_ptr, in_dev);
179out:
180	return in_dev;
181out_kfree:
182	kfree(in_dev);
183	in_dev = NULL;
184	goto out;
185}
186
187static void in_dev_rcu_put(struct rcu_head *head)
188{
189	struct in_device *idev = container_of(head, struct in_device, rcu_head);
190	in_dev_put(idev);
191}
192
193static void inetdev_destroy(struct in_device *in_dev)
194{
195	struct in_ifaddr *ifa;
196	struct net_device *dev;
197
198	ASSERT_RTNL();
199
200	dev = in_dev->dev;
201
202	in_dev->dead = 1;
203
204	ip_mc_destroy_dev(in_dev);
205
206	while ((ifa = in_dev->ifa_list) != NULL) {
207		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
208		inet_free_ifa(ifa);
209	}
210
211	dev->ip_ptr = NULL;
212
213	devinet_sysctl_unregister(in_dev);
214	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
215	arp_ifdown(dev);
216
217	call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
218}
219
220int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
221{
222	rcu_read_lock();
223	for_primary_ifa(in_dev) {
224		if (inet_ifa_match(a, ifa)) {
225			if (!b || inet_ifa_match(b, ifa)) {
226				rcu_read_unlock();
227				return 1;
228			}
229		}
230	} endfor_ifa(in_dev);
231	rcu_read_unlock();
232	return 0;
233}
234
235static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
236			 int destroy, struct nlmsghdr *nlh, u32 pid)
237{
238	struct in_ifaddr *promote = NULL;
239	struct in_ifaddr *ifa, *ifa1 = *ifap;
240	struct in_ifaddr *last_prim = in_dev->ifa_list;
241	struct in_ifaddr *prev_prom = NULL;
242	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
243
244	ASSERT_RTNL();
245
246	/* 1. Deleting primary ifaddr forces deletion all secondaries
247	 * unless alias promotion is set
248	 **/
249
250	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
251		struct in_ifaddr **ifap1 = &ifa1->ifa_next;
252
253		while ((ifa = *ifap1) != NULL) {
254			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
255			    ifa1->ifa_scope <= ifa->ifa_scope)
256				last_prim = ifa;
257
258			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
259			    ifa1->ifa_mask != ifa->ifa_mask ||
260			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
261				ifap1 = &ifa->ifa_next;
262				prev_prom = ifa;
263				continue;
264			}
265
266			if (!do_promote) {
267				*ifap1 = ifa->ifa_next;
268
269				rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid);
270				blocking_notifier_call_chain(&inetaddr_chain,
271						NETDEV_DOWN, ifa);
272				inet_free_ifa(ifa);
273			} else {
274				promote = ifa;
275				break;
276			}
277		}
278	}
279
280	/* 2. Unlink it */
281
282	*ifap = ifa1->ifa_next;
283
284	/* 3. Announce address deletion */
285
286	/* Send message first, then call notifier.
287	   At first sight, FIB update triggered by notifier
288	   will refer to already deleted ifaddr, that could confuse
289	   netlink listeners. It is not true: look, gated sees
290	   that route deleted and if it still thinks that ifaddr
291	   is valid, it will try to restore deleted routes... Grr.
292	   So that, this order is correct.
293	 */
294	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid);
295	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
296
297	if (promote) {
298
299		if (prev_prom) {
300			prev_prom->ifa_next = promote->ifa_next;
301			promote->ifa_next = last_prim->ifa_next;
302			last_prim->ifa_next = promote;
303		}
304
305		promote->ifa_flags &= ~IFA_F_SECONDARY;
306		rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
307		blocking_notifier_call_chain(&inetaddr_chain,
308				NETDEV_UP, promote);
309		for (ifa = promote->ifa_next; ifa; ifa = ifa->ifa_next) {
310			if (ifa1->ifa_mask != ifa->ifa_mask ||
311			    !inet_ifa_match(ifa1->ifa_address, ifa))
312					continue;
313			fib_add_ifaddr(ifa);
314		}
315
316	}
317	if (destroy)
318		inet_free_ifa(ifa1);
319}
320
321static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
322			 int destroy)
323{
324	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
325}
326
327static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
328			     u32 pid)
329{
330	struct in_device *in_dev = ifa->ifa_dev;
331	struct in_ifaddr *ifa1, **ifap, **last_primary;
332
333	ASSERT_RTNL();
334
335	if (!ifa->ifa_local) {
336		inet_free_ifa(ifa);
337		return 0;
338	}
339
340	ifa->ifa_flags &= ~IFA_F_SECONDARY;
341	last_primary = &in_dev->ifa_list;
342
343	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
344	     ifap = &ifa1->ifa_next) {
345		if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
346		    ifa->ifa_scope <= ifa1->ifa_scope)
347			last_primary = &ifa1->ifa_next;
348		if (ifa1->ifa_mask == ifa->ifa_mask &&
349		    inet_ifa_match(ifa1->ifa_address, ifa)) {
350			if (ifa1->ifa_local == ifa->ifa_local) {
351				inet_free_ifa(ifa);
352				return -EEXIST;
353			}
354			if (ifa1->ifa_scope != ifa->ifa_scope) {
355				inet_free_ifa(ifa);
356				return -EINVAL;
357			}
358			ifa->ifa_flags |= IFA_F_SECONDARY;
359		}
360	}
361
362	if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
363		net_srandom(ifa->ifa_local);
364		ifap = last_primary;
365	}
366
367	ifa->ifa_next = *ifap;
368	*ifap = ifa;
369
370	/* Send message first, then call notifier.
371	   Notifier will trigger FIB update, so that
372	   listeners of netlink will know about new ifaddr */
373	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid);
374	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
375
376	return 0;
377}
378
379static int inet_insert_ifa(struct in_ifaddr *ifa)
380{
381	return __inet_insert_ifa(ifa, NULL, 0);
382}
383
384static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
385{
386	struct in_device *in_dev = __in_dev_get_rtnl(dev);
387
388	ASSERT_RTNL();
389
390	if (!in_dev) {
391		inet_free_ifa(ifa);
392		return -ENOBUFS;
393	}
394	ipv4_devconf_setall(in_dev);
395	if (ifa->ifa_dev != in_dev) {
396		WARN_ON(ifa->ifa_dev);
397		in_dev_hold(in_dev);
398		ifa->ifa_dev = in_dev;
399	}
400	if (ipv4_is_loopback(ifa->ifa_local))
401		ifa->ifa_scope = RT_SCOPE_HOST;
402	return inet_insert_ifa(ifa);
403}
404
405struct in_device *inetdev_by_index(struct net *net, int ifindex)
406{
407	struct net_device *dev;
408	struct in_device *in_dev = NULL;
409
410	rcu_read_lock();
411	dev = dev_get_by_index_rcu(net, ifindex);
412	if (dev)
413		in_dev = in_dev_get(dev);
414	rcu_read_unlock();
415	return in_dev;
416}
417EXPORT_SYMBOL(inetdev_by_index);
418
419/* Called only from RTNL semaphored context. No locks. */
420
421struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
422				    __be32 mask)
423{
424	ASSERT_RTNL();
425
426	for_primary_ifa(in_dev) {
427		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
428			return ifa;
429	} endfor_ifa(in_dev);
430	return NULL;
431}
432
433static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
434{
435	struct net *net = sock_net(skb->sk);
436	struct nlattr *tb[IFA_MAX+1];
437	struct in_device *in_dev;
438	struct ifaddrmsg *ifm;
439	struct in_ifaddr *ifa, **ifap;
440	int err = -EINVAL;
441
442	ASSERT_RTNL();
443
444	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
445	if (err < 0)
446		goto errout;
447
448	ifm = nlmsg_data(nlh);
449	in_dev = inetdev_by_index(net, ifm->ifa_index);
450	if (in_dev == NULL) {
451		err = -ENODEV;
452		goto errout;
453	}
454
455	__in_dev_put(in_dev);
456
457	for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
458	     ifap = &ifa->ifa_next) {
459		if (tb[IFA_LOCAL] &&
460		    ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
461			continue;
462
463		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
464			continue;
465
466		if (tb[IFA_ADDRESS] &&
467		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
468		    !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
469			continue;
470
471		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid);
472		return 0;
473	}
474
475	err = -EADDRNOTAVAIL;
476errout:
477	return err;
478}
479
480static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
481{
482	struct nlattr *tb[IFA_MAX+1];
483	struct in_ifaddr *ifa;
484	struct ifaddrmsg *ifm;
485	struct net_device *dev;
486	struct in_device *in_dev;
487	int err;
488
489	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
490	if (err < 0)
491		goto errout;
492
493	ifm = nlmsg_data(nlh);
494	err = -EINVAL;
495	if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
496		goto errout;
497
498	dev = __dev_get_by_index(net, ifm->ifa_index);
499	err = -ENODEV;
500	if (dev == NULL)
501		goto errout;
502
503	in_dev = __in_dev_get_rtnl(dev);
504	err = -ENOBUFS;
505	if (in_dev == NULL)
506		goto errout;
507
508	ifa = inet_alloc_ifa();
509	if (ifa == NULL)
510		/*
511		 * A potential indev allocation can be left alive, it stays
512		 * assigned to its device and is destroy with it.
513		 */
514		goto errout;
515
516	ipv4_devconf_setall(in_dev);
517	in_dev_hold(in_dev);
518
519	if (tb[IFA_ADDRESS] == NULL)
520		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
521
522	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
523	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
524	ifa->ifa_flags = ifm->ifa_flags;
525	ifa->ifa_scope = ifm->ifa_scope;
526	ifa->ifa_dev = in_dev;
527
528	ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
529	ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
530
531	if (tb[IFA_BROADCAST])
532		ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
533
534	if (tb[IFA_LABEL])
535		nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
536	else
537		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
538
539	return ifa;
540
541errout:
542	return ERR_PTR(err);
543}
544
545static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
546{
547	struct net *net = sock_net(skb->sk);
548	struct in_ifaddr *ifa;
549
550	ASSERT_RTNL();
551
552	ifa = rtm_to_ifaddr(net, nlh);
553	if (IS_ERR(ifa))
554		return PTR_ERR(ifa);
555
556	return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid);
557}
558
559/*
560 *	Determine a default network mask, based on the IP address.
561 */
562
563static inline int inet_abc_len(__be32 addr)
564{
565	int rc = -1;	/* Something else, probably a multicast. */
566
567	if (ipv4_is_zeronet(addr))
568		rc = 0;
569	else {
570		__u32 haddr = ntohl(addr);
571
572		if (IN_CLASSA(haddr))
573			rc = 8;
574		else if (IN_CLASSB(haddr))
575			rc = 16;
576		else if (IN_CLASSC(haddr))
577			rc = 24;
578	}
579
580	return rc;
581}
582
583
584int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
585{
586	struct ifreq ifr;
587	struct sockaddr_in sin_orig;
588	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
589	struct in_device *in_dev;
590	struct in_ifaddr **ifap = NULL;
591	struct in_ifaddr *ifa = NULL;
592	struct net_device *dev;
593	char *colon;
594	int ret = -EFAULT;
595	int tryaddrmatch = 0;
596
597	/*
598	 *	Fetch the caller's info block into kernel space
599	 */
600
601	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
602		goto out;
603	ifr.ifr_name[IFNAMSIZ - 1] = 0;
604
605	/* save original address for comparison */
606	memcpy(&sin_orig, sin, sizeof(*sin));
607
608	colon = strchr(ifr.ifr_name, ':');
609	if (colon)
610		*colon = 0;
611
612	dev_load(net, ifr.ifr_name);
613
614	switch (cmd) {
615	case SIOCGIFADDR:	/* Get interface address */
616	case SIOCGIFBRDADDR:	/* Get the broadcast address */
617	case SIOCGIFDSTADDR:	/* Get the destination address */
618	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
619		/* Note that these ioctls will not sleep,
620		   so that we do not impose a lock.
621		   One day we will be forced to put shlock here (I mean SMP)
622		 */
623		tryaddrmatch = (sin_orig.sin_family == AF_INET);
624		memset(sin, 0, sizeof(*sin));
625		sin->sin_family = AF_INET;
626		break;
627
628	case SIOCSIFFLAGS:
629		ret = -EACCES;
630		if (!capable(CAP_NET_ADMIN))
631			goto out;
632		break;
633	case SIOCSIFADDR:	/* Set interface address (and family) */
634	case SIOCSIFBRDADDR:	/* Set the broadcast address */
635	case SIOCSIFDSTADDR:	/* Set the destination address */
636	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
637		ret = -EACCES;
638		if (!capable(CAP_NET_ADMIN))
639			goto out;
640		ret = -EINVAL;
641		if (sin->sin_family != AF_INET)
642			goto out;
643		break;
644	default:
645		ret = -EINVAL;
646		goto out;
647	}
648
649	rtnl_lock();
650
651	ret = -ENODEV;
652	dev = __dev_get_by_name(net, ifr.ifr_name);
653	if (!dev)
654		goto done;
655
656	if (colon)
657		*colon = ':';
658
659	in_dev = __in_dev_get_rtnl(dev);
660	if (in_dev) {
661		if (tryaddrmatch) {
662			/* Matthias Andree */
663			/* compare label and address (4.4BSD style) */
664			/* note: we only do this for a limited set of ioctls
665			   and only if the original address family was AF_INET.
666			   This is checked above. */
667			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
668			     ifap = &ifa->ifa_next) {
669				if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
670				    sin_orig.sin_addr.s_addr ==
671							ifa->ifa_address) {
672					break; /* found */
673				}
674			}
675		}
676		/* we didn't get a match, maybe the application is
677		   4.3BSD-style and passed in junk so we fall back to
678		   comparing just the label */
679		if (!ifa) {
680			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
681			     ifap = &ifa->ifa_next)
682				if (!strcmp(ifr.ifr_name, ifa->ifa_label))
683					break;
684		}
685	}
686
687	ret = -EADDRNOTAVAIL;
688	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
689		goto done;
690
691	switch (cmd) {
692	case SIOCGIFADDR:	/* Get interface address */
693		sin->sin_addr.s_addr = ifa->ifa_local;
694		goto rarok;
695
696	case SIOCGIFBRDADDR:	/* Get the broadcast address */
697		sin->sin_addr.s_addr = ifa->ifa_broadcast;
698		goto rarok;
699
700	case SIOCGIFDSTADDR:	/* Get the destination address */
701		sin->sin_addr.s_addr = ifa->ifa_address;
702		goto rarok;
703
704	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
705		sin->sin_addr.s_addr = ifa->ifa_mask;
706		goto rarok;
707
708	case SIOCSIFFLAGS:
709		if (colon) {
710			ret = -EADDRNOTAVAIL;
711			if (!ifa)
712				break;
713			ret = 0;
714			if (!(ifr.ifr_flags & IFF_UP))
715				inet_del_ifa(in_dev, ifap, 1);
716			break;
717		}
718		ret = dev_change_flags(dev, ifr.ifr_flags);
719		break;
720
721	case SIOCSIFADDR:	/* Set interface address (and family) */
722		ret = -EINVAL;
723		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
724			break;
725
726		if (!ifa) {
727			ret = -ENOBUFS;
728			ifa = inet_alloc_ifa();
729			if (!ifa)
730				break;
731			if (colon)
732				memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
733			else
734				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
735		} else {
736			ret = 0;
737			if (ifa->ifa_local == sin->sin_addr.s_addr)
738				break;
739			inet_del_ifa(in_dev, ifap, 0);
740			ifa->ifa_broadcast = 0;
741			ifa->ifa_scope = 0;
742		}
743
744		ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
745
746		if (!(dev->flags & IFF_POINTOPOINT)) {
747			ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
748			ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
749			if ((dev->flags & IFF_BROADCAST) &&
750			    ifa->ifa_prefixlen < 31)
751				ifa->ifa_broadcast = ifa->ifa_address |
752						     ~ifa->ifa_mask;
753		} else {
754			ifa->ifa_prefixlen = 32;
755			ifa->ifa_mask = inet_make_mask(32);
756		}
757		ret = inet_set_ifa(dev, ifa);
758		break;
759
760	case SIOCSIFBRDADDR:	/* Set the broadcast address */
761		ret = 0;
762		if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
763			inet_del_ifa(in_dev, ifap, 0);
764			ifa->ifa_broadcast = sin->sin_addr.s_addr;
765			inet_insert_ifa(ifa);
766		}
767		break;
768
769	case SIOCSIFDSTADDR:	/* Set the destination address */
770		ret = 0;
771		if (ifa->ifa_address == sin->sin_addr.s_addr)
772			break;
773		ret = -EINVAL;
774		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
775			break;
776		ret = 0;
777		inet_del_ifa(in_dev, ifap, 0);
778		ifa->ifa_address = sin->sin_addr.s_addr;
779		inet_insert_ifa(ifa);
780		break;
781
782	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
783
784		/*
785		 *	The mask we set must be legal.
786		 */
787		ret = -EINVAL;
788		if (bad_mask(sin->sin_addr.s_addr, 0))
789			break;
790		ret = 0;
791		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
792			__be32 old_mask = ifa->ifa_mask;
793			inet_del_ifa(in_dev, ifap, 0);
794			ifa->ifa_mask = sin->sin_addr.s_addr;
795			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
796
797			/* See if current broadcast address matches
798			 * with current netmask, then recalculate
799			 * the broadcast address. Otherwise it's a
800			 * funny address, so don't touch it since
801			 * the user seems to know what (s)he's doing...
802			 */
803			if ((dev->flags & IFF_BROADCAST) &&
804			    (ifa->ifa_prefixlen < 31) &&
805			    (ifa->ifa_broadcast ==
806			     (ifa->ifa_local|~old_mask))) {
807				ifa->ifa_broadcast = (ifa->ifa_local |
808						      ~sin->sin_addr.s_addr);
809			}
810			inet_insert_ifa(ifa);
811		}
812		break;
813	}
814done:
815	rtnl_unlock();
816out:
817	return ret;
818rarok:
819	rtnl_unlock();
820	ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
821	goto out;
822}
823
824static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
825{
826	struct in_device *in_dev = __in_dev_get_rtnl(dev);
827	struct in_ifaddr *ifa;
828	struct ifreq ifr;
829	int done = 0;
830
831	if (!in_dev)
832		goto out;
833
834	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
835		if (!buf) {
836			done += sizeof(ifr);
837			continue;
838		}
839		if (len < (int) sizeof(ifr))
840			break;
841		memset(&ifr, 0, sizeof(struct ifreq));
842		if (ifa->ifa_label)
843			strcpy(ifr.ifr_name, ifa->ifa_label);
844		else
845			strcpy(ifr.ifr_name, dev->name);
846
847		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
848		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
849								ifa->ifa_local;
850
851		if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
852			done = -EFAULT;
853			break;
854		}
855		buf  += sizeof(struct ifreq);
856		len  -= sizeof(struct ifreq);
857		done += sizeof(struct ifreq);
858	}
859out:
860	return done;
861}
862
863__be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
864{
865	__be32 addr = 0;
866	struct in_device *in_dev;
867	struct net *net = dev_net(dev);
868
869	rcu_read_lock();
870	in_dev = __in_dev_get_rcu(dev);
871	if (!in_dev)
872		goto no_in_dev;
873
874	for_primary_ifa(in_dev) {
875		if (ifa->ifa_scope > scope)
876			continue;
877		if (!dst || inet_ifa_match(dst, ifa)) {
878			addr = ifa->ifa_local;
879			break;
880		}
881		if (!addr)
882			addr = ifa->ifa_local;
883	} endfor_ifa(in_dev);
884
885	if (addr)
886		goto out_unlock;
887no_in_dev:
888
889	/* Not loopback addresses on loopback should be preferred
890	   in this case. It is importnat that lo is the first interface
891	   in dev_base list.
892	 */
893	for_each_netdev_rcu(net, dev) {
894		in_dev = __in_dev_get_rcu(dev);
895		if (!in_dev)
896			continue;
897
898		for_primary_ifa(in_dev) {
899			if (ifa->ifa_scope != RT_SCOPE_LINK &&
900			    ifa->ifa_scope <= scope) {
901				addr = ifa->ifa_local;
902				goto out_unlock;
903			}
904		} endfor_ifa(in_dev);
905	}
906out_unlock:
907	rcu_read_unlock();
908	return addr;
909}
910EXPORT_SYMBOL(inet_select_addr);
911
912static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
913			      __be32 local, int scope)
914{
915	int same = 0;
916	__be32 addr = 0;
917
918	for_ifa(in_dev) {
919		if (!addr &&
920		    (local == ifa->ifa_local || !local) &&
921		    ifa->ifa_scope <= scope) {
922			addr = ifa->ifa_local;
923			if (same)
924				break;
925		}
926		if (!same) {
927			same = (!local || inet_ifa_match(local, ifa)) &&
928				(!dst || inet_ifa_match(dst, ifa));
929			if (same && addr) {
930				if (local || !dst)
931					break;
932				/* Is the selected addr into dst subnet? */
933				if (inet_ifa_match(addr, ifa))
934					break;
935				/* No, then can we use new local src? */
936				if (ifa->ifa_scope <= scope) {
937					addr = ifa->ifa_local;
938					break;
939				}
940				/* search for large dst subnet for addr */
941				same = 0;
942			}
943		}
944	} endfor_ifa(in_dev);
945
946	return same ? addr : 0;
947}
948
949/*
950 * Confirm that local IP address exists using wildcards:
951 * - in_dev: only on this interface, 0=any interface
952 * - dst: only in the same subnet as dst, 0=any dst
953 * - local: address, 0=autoselect the local address
954 * - scope: maximum allowed scope value for the local address
955 */
956__be32 inet_confirm_addr(struct in_device *in_dev,
957			 __be32 dst, __be32 local, int scope)
958{
959	__be32 addr = 0;
960	struct net_device *dev;
961	struct net *net;
962
963	if (scope != RT_SCOPE_LINK)
964		return confirm_addr_indev(in_dev, dst, local, scope);
965
966	net = dev_net(in_dev->dev);
967	rcu_read_lock();
968	for_each_netdev_rcu(net, dev) {
969		in_dev = __in_dev_get_rcu(dev);
970		if (in_dev) {
971			addr = confirm_addr_indev(in_dev, dst, local, scope);
972			if (addr)
973				break;
974		}
975	}
976	rcu_read_unlock();
977
978	return addr;
979}
980
981/*
982 *	Device notifier
983 */
984
985int register_inetaddr_notifier(struct notifier_block *nb)
986{
987	return blocking_notifier_chain_register(&inetaddr_chain, nb);
988}
989EXPORT_SYMBOL(register_inetaddr_notifier);
990
991int unregister_inetaddr_notifier(struct notifier_block *nb)
992{
993	return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
994}
995EXPORT_SYMBOL(unregister_inetaddr_notifier);
996
997/* Rename ifa_labels for a device name change. Make some effort to preserve
998 * existing alias numbering and to create unique labels if possible.
999*/
1000static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1001{
1002	struct in_ifaddr *ifa;
1003	int named = 0;
1004
1005	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1006		char old[IFNAMSIZ], *dot;
1007
1008		memcpy(old, ifa->ifa_label, IFNAMSIZ);
1009		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1010		if (named++ == 0)
1011			goto skip;
1012		dot = strchr(old, ':');
1013		if (dot == NULL) {
1014			sprintf(old, ":%d", named);
1015			dot = old;
1016		}
1017		if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1018			strcat(ifa->ifa_label, dot);
1019		else
1020			strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1021skip:
1022		rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1023	}
1024}
1025
1026static inline bool inetdev_valid_mtu(unsigned mtu)
1027{
1028	return mtu >= 68;
1029}
1030
1031/* Called only under RTNL semaphore */
1032
1033static int inetdev_event(struct notifier_block *this, unsigned long event,
1034			 void *ptr)
1035{
1036	struct net_device *dev = ptr;
1037	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1038
1039	ASSERT_RTNL();
1040
1041	if (!in_dev) {
1042		if (event == NETDEV_REGISTER) {
1043			in_dev = inetdev_init(dev);
1044			if (!in_dev)
1045				return notifier_from_errno(-ENOMEM);
1046			if (dev->flags & IFF_LOOPBACK) {
1047				IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1048				IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1049			}
1050		} else if (event == NETDEV_CHANGEMTU) {
1051			/* Re-enabling IP */
1052			if (inetdev_valid_mtu(dev->mtu))
1053				in_dev = inetdev_init(dev);
1054		}
1055		goto out;
1056	}
1057
1058	switch (event) {
1059	case NETDEV_REGISTER:
1060		printk(KERN_DEBUG "inetdev_event: bug\n");
1061		dev->ip_ptr = NULL;
1062		break;
1063	case NETDEV_UP:
1064		if (!inetdev_valid_mtu(dev->mtu))
1065			break;
1066		if (dev->flags & IFF_LOOPBACK) {
1067			struct in_ifaddr *ifa = inet_alloc_ifa();
1068
1069			if (ifa) {
1070				ifa->ifa_local =
1071				  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1072				ifa->ifa_prefixlen = 8;
1073				ifa->ifa_mask = inet_make_mask(8);
1074				in_dev_hold(in_dev);
1075				ifa->ifa_dev = in_dev;
1076				ifa->ifa_scope = RT_SCOPE_HOST;
1077				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1078				inet_insert_ifa(ifa);
1079			}
1080		}
1081		ip_mc_up(in_dev);
1082		/* fall through */
1083	case NETDEV_CHANGEADDR:
1084		/* Send gratuitous ARP to notify of link change */
1085		if (IN_DEV_ARP_NOTIFY(in_dev)) {
1086			struct in_ifaddr *ifa = in_dev->ifa_list;
1087
1088			if (ifa)
1089				arp_send(ARPOP_REQUEST, ETH_P_ARP,
1090					 ifa->ifa_address, dev,
1091					 ifa->ifa_address, NULL,
1092					 dev->dev_addr, NULL);
1093		}
1094		break;
1095	case NETDEV_DOWN:
1096		ip_mc_down(in_dev);
1097		break;
1098	case NETDEV_BONDING_OLDTYPE:
1099		ip_mc_unmap(in_dev);
1100		break;
1101	case NETDEV_BONDING_NEWTYPE:
1102		ip_mc_remap(in_dev);
1103		break;
1104	case NETDEV_CHANGEMTU:
1105		if (inetdev_valid_mtu(dev->mtu))
1106			break;
1107		/* disable IP when MTU is not enough */
1108	case NETDEV_UNREGISTER:
1109		inetdev_destroy(in_dev);
1110		break;
1111	case NETDEV_CHANGENAME:
1112		/* Do not notify about label change, this event is
1113		 * not interesting to applications using netlink.
1114		 */
1115		inetdev_changename(dev, in_dev);
1116
1117		devinet_sysctl_unregister(in_dev);
1118		devinet_sysctl_register(in_dev);
1119		break;
1120	}
1121out:
1122	return NOTIFY_DONE;
1123}
1124
1125static struct notifier_block ip_netdev_notifier = {
1126	.notifier_call = inetdev_event,
1127};
1128
1129static inline size_t inet_nlmsg_size(void)
1130{
1131	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1132	       + nla_total_size(4) /* IFA_ADDRESS */
1133	       + nla_total_size(4) /* IFA_LOCAL */
1134	       + nla_total_size(4) /* IFA_BROADCAST */
1135	       + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1136}
1137
1138static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1139			    u32 pid, u32 seq, int event, unsigned int flags)
1140{
1141	struct ifaddrmsg *ifm;
1142	struct nlmsghdr  *nlh;
1143
1144	nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags);
1145	if (nlh == NULL)
1146		return -EMSGSIZE;
1147
1148	ifm = nlmsg_data(nlh);
1149	ifm->ifa_family = AF_INET;
1150	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1151	ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
1152	ifm->ifa_scope = ifa->ifa_scope;
1153	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1154
1155	if (ifa->ifa_address)
1156		NLA_PUT_BE32(skb, IFA_ADDRESS, ifa->ifa_address);
1157
1158	if (ifa->ifa_local)
1159		NLA_PUT_BE32(skb, IFA_LOCAL, ifa->ifa_local);
1160
1161	if (ifa->ifa_broadcast)
1162		NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast);
1163
1164	if (ifa->ifa_label[0])
1165		NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label);
1166
1167	return nlmsg_end(skb, nlh);
1168
1169nla_put_failure:
1170	nlmsg_cancel(skb, nlh);
1171	return -EMSGSIZE;
1172}
1173
1174static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1175{
1176	struct net *net = sock_net(skb->sk);
1177	int h, s_h;
1178	int idx, s_idx;
1179	int ip_idx, s_ip_idx;
1180	struct net_device *dev;
1181	struct in_device *in_dev;
1182	struct in_ifaddr *ifa;
1183	struct hlist_head *head;
1184	struct hlist_node *node;
1185
1186	s_h = cb->args[0];
1187	s_idx = idx = cb->args[1];
1188	s_ip_idx = ip_idx = cb->args[2];
1189
1190	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1191		idx = 0;
1192		head = &net->dev_index_head[h];
1193		rcu_read_lock();
1194		hlist_for_each_entry_rcu(dev, node, head, index_hlist) {
1195			if (idx < s_idx)
1196				goto cont;
1197			if (idx > s_idx)
1198				s_ip_idx = 0;
1199			in_dev = __in_dev_get_rcu(dev);
1200			if (!in_dev)
1201				goto cont;
1202
1203			for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1204			     ifa = ifa->ifa_next, ip_idx++) {
1205				if (ip_idx < s_ip_idx)
1206					continue;
1207				if (inet_fill_ifaddr(skb, ifa,
1208					     NETLINK_CB(cb->skb).pid,
1209					     cb->nlh->nlmsg_seq,
1210					     RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1211					rcu_read_unlock();
1212					goto done;
1213				}
1214			}
1215cont:
1216			idx++;
1217		}
1218		rcu_read_unlock();
1219	}
1220
1221done:
1222	cb->args[0] = h;
1223	cb->args[1] = idx;
1224	cb->args[2] = ip_idx;
1225
1226	return skb->len;
1227}
1228
1229static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1230		      u32 pid)
1231{
1232	struct sk_buff *skb;
1233	u32 seq = nlh ? nlh->nlmsg_seq : 0;
1234	int err = -ENOBUFS;
1235	struct net *net;
1236
1237	net = dev_net(ifa->ifa_dev->dev);
1238	skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1239	if (skb == NULL)
1240		goto errout;
1241
1242	err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0);
1243	if (err < 0) {
1244		/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1245		WARN_ON(err == -EMSGSIZE);
1246		kfree_skb(skb);
1247		goto errout;
1248	}
1249	rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1250	return;
1251errout:
1252	if (err < 0)
1253		rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1254}
1255
1256#ifdef CONFIG_SYSCTL
1257
1258static void devinet_copy_dflt_conf(struct net *net, int i)
1259{
1260	struct net_device *dev;
1261
1262	rcu_read_lock();
1263	for_each_netdev_rcu(net, dev) {
1264		struct in_device *in_dev;
1265
1266		in_dev = __in_dev_get_rcu(dev);
1267		if (in_dev && !test_bit(i, in_dev->cnf.state))
1268			in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1269	}
1270	rcu_read_unlock();
1271}
1272
1273/* called with RTNL locked */
1274static void inet_forward_change(struct net *net)
1275{
1276	struct net_device *dev;
1277	int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1278
1279	IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1280	IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1281
1282	for_each_netdev(net, dev) {
1283		struct in_device *in_dev;
1284		if (on)
1285			dev_disable_lro(dev);
1286		rcu_read_lock();
1287		in_dev = __in_dev_get_rcu(dev);
1288		if (in_dev)
1289			IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1290		rcu_read_unlock();
1291	}
1292}
1293
1294static int devinet_conf_proc(ctl_table *ctl, int write,
1295			     void __user *buffer,
1296			     size_t *lenp, loff_t *ppos)
1297{
1298	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1299
1300	if (write) {
1301		struct ipv4_devconf *cnf = ctl->extra1;
1302		struct net *net = ctl->extra2;
1303		int i = (int *)ctl->data - cnf->data;
1304
1305		set_bit(i, cnf->state);
1306
1307		if (cnf == net->ipv4.devconf_dflt)
1308			devinet_copy_dflt_conf(net, i);
1309	}
1310
1311	return ret;
1312}
1313
1314static int devinet_sysctl_forward(ctl_table *ctl, int write,
1315				  void __user *buffer,
1316				  size_t *lenp, loff_t *ppos)
1317{
1318	int *valp = ctl->data;
1319	int val = *valp;
1320	loff_t pos = *ppos;
1321	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1322
1323	if (write && *valp != val) {
1324		struct net *net = ctl->extra2;
1325
1326		if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
1327			if (!rtnl_trylock()) {
1328				/* Restore the original values before restarting */
1329				*valp = val;
1330				*ppos = pos;
1331				return restart_syscall();
1332			}
1333			if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
1334				inet_forward_change(net);
1335			} else if (*valp) {
1336				struct ipv4_devconf *cnf = ctl->extra1;
1337				struct in_device *idev =
1338					container_of(cnf, struct in_device, cnf);
1339				dev_disable_lro(idev->dev);
1340			}
1341			rtnl_unlock();
1342			rt_cache_flush(net, 0);
1343		}
1344	}
1345
1346	return ret;
1347}
1348
1349int ipv4_doint_and_flush(ctl_table *ctl, int write,
1350			 void __user *buffer,
1351			 size_t *lenp, loff_t *ppos)
1352{
1353	int *valp = ctl->data;
1354	int val = *valp;
1355	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1356	struct net *net = ctl->extra2;
1357
1358	if (write && *valp != val)
1359		rt_cache_flush(net, 0);
1360
1361	return ret;
1362}
1363
1364#define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
1365	{ \
1366		.procname	= name, \
1367		.data		= ipv4_devconf.data + \
1368				  NET_IPV4_CONF_ ## attr - 1, \
1369		.maxlen		= sizeof(int), \
1370		.mode		= mval, \
1371		.proc_handler	= proc, \
1372		.extra1		= &ipv4_devconf, \
1373	}
1374
1375#define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
1376	DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
1377
1378#define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
1379	DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
1380
1381#define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
1382	DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
1383
1384#define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
1385	DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
1386
1387static struct devinet_sysctl_table {
1388	struct ctl_table_header *sysctl_header;
1389	struct ctl_table devinet_vars[__NET_IPV4_CONF_MAX];
1390	char *dev_name;
1391} devinet_sysctl = {
1392	.devinet_vars = {
1393		DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
1394					     devinet_sysctl_forward),
1395		DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
1396
1397		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
1398		DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
1399		DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
1400		DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
1401		DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
1402		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
1403					"accept_source_route"),
1404		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
1405		DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
1406		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
1407		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
1408		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
1409		DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
1410		DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
1411		DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
1412		DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
1413		DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
1414		DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
1415		DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
1416
1417		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
1418		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
1419		DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
1420					      "force_igmp_version"),
1421		DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
1422					      "promote_secondaries"),
1423	},
1424};
1425
1426static int __devinet_sysctl_register(struct net *net, char *dev_name,
1427					struct ipv4_devconf *p)
1428{
1429	int i;
1430	struct devinet_sysctl_table *t;
1431
1432#define DEVINET_CTL_PATH_DEV	3
1433
1434	struct ctl_path devinet_ctl_path[] = {
1435		{ .procname = "net",  },
1436		{ .procname = "ipv4", },
1437		{ .procname = "conf", },
1438		{ /* to be set */ },
1439		{ },
1440	};
1441
1442	t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
1443	if (!t)
1444		goto out;
1445
1446	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
1447		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
1448		t->devinet_vars[i].extra1 = p;
1449		t->devinet_vars[i].extra2 = net;
1450	}
1451
1452	/*
1453	 * Make a copy of dev_name, because '.procname' is regarded as const
1454	 * by sysctl and we wouldn't want anyone to change it under our feet
1455	 * (see SIOCSIFNAME).
1456	 */
1457	t->dev_name = kstrdup(dev_name, GFP_KERNEL);
1458	if (!t->dev_name)
1459		goto free;
1460
1461	devinet_ctl_path[DEVINET_CTL_PATH_DEV].procname = t->dev_name;
1462
1463	t->sysctl_header = register_net_sysctl_table(net, devinet_ctl_path,
1464			t->devinet_vars);
1465	if (!t->sysctl_header)
1466		goto free_procname;
1467
1468	p->sysctl = t;
1469	return 0;
1470
1471free_procname:
1472	kfree(t->dev_name);
1473free:
1474	kfree(t);
1475out:
1476	return -ENOBUFS;
1477}
1478
1479static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
1480{
1481	struct devinet_sysctl_table *t = cnf->sysctl;
1482
1483	if (t == NULL)
1484		return;
1485
1486	cnf->sysctl = NULL;
1487	unregister_sysctl_table(t->sysctl_header);
1488	kfree(t->dev_name);
1489	kfree(t);
1490}
1491
1492static void devinet_sysctl_register(struct in_device *idev)
1493{
1494	neigh_sysctl_register(idev->dev, idev->arp_parms, NET_IPV4,
1495			NET_IPV4_NEIGH, "ipv4", NULL);
1496	__devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
1497					&idev->cnf);
1498}
1499
1500static void devinet_sysctl_unregister(struct in_device *idev)
1501{
1502	__devinet_sysctl_unregister(&idev->cnf);
1503	neigh_sysctl_unregister(idev->arp_parms);
1504}
1505
1506static struct ctl_table ctl_forward_entry[] = {
1507	{
1508		.procname	= "ip_forward",
1509		.data		= &ipv4_devconf.data[
1510					NET_IPV4_CONF_FORWARDING - 1],
1511		.maxlen		= sizeof(int),
1512		.mode		= 0644,
1513		.proc_handler	= devinet_sysctl_forward,
1514		.extra1		= &ipv4_devconf,
1515		.extra2		= &init_net,
1516	},
1517	{ },
1518};
1519
1520static __net_initdata struct ctl_path net_ipv4_path[] = {
1521	{ .procname = "net", },
1522	{ .procname = "ipv4", },
1523	{ },
1524};
1525#endif
1526
1527static __net_init int devinet_init_net(struct net *net)
1528{
1529	int err;
1530	struct ipv4_devconf *all, *dflt;
1531#ifdef CONFIG_SYSCTL
1532	struct ctl_table *tbl = ctl_forward_entry;
1533	struct ctl_table_header *forw_hdr;
1534#endif
1535
1536	err = -ENOMEM;
1537	all = &ipv4_devconf;
1538	dflt = &ipv4_devconf_dflt;
1539
1540	if (!net_eq(net, &init_net)) {
1541		all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
1542		if (all == NULL)
1543			goto err_alloc_all;
1544
1545		dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
1546		if (dflt == NULL)
1547			goto err_alloc_dflt;
1548
1549#ifdef CONFIG_SYSCTL
1550		tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
1551		if (tbl == NULL)
1552			goto err_alloc_ctl;
1553
1554		tbl[0].data = &all->data[NET_IPV4_CONF_FORWARDING - 1];
1555		tbl[0].extra1 = all;
1556		tbl[0].extra2 = net;
1557#endif
1558	}
1559
1560#ifdef CONFIG_SYSCTL
1561	err = __devinet_sysctl_register(net, "all", all);
1562	if (err < 0)
1563		goto err_reg_all;
1564
1565	err = __devinet_sysctl_register(net, "default", dflt);
1566	if (err < 0)
1567		goto err_reg_dflt;
1568
1569	err = -ENOMEM;
1570	forw_hdr = register_net_sysctl_table(net, net_ipv4_path, tbl);
1571	if (forw_hdr == NULL)
1572		goto err_reg_ctl;
1573	net->ipv4.forw_hdr = forw_hdr;
1574#endif
1575
1576	net->ipv4.devconf_all = all;
1577	net->ipv4.devconf_dflt = dflt;
1578	return 0;
1579
1580#ifdef CONFIG_SYSCTL
1581err_reg_ctl:
1582	__devinet_sysctl_unregister(dflt);
1583err_reg_dflt:
1584	__devinet_sysctl_unregister(all);
1585err_reg_all:
1586	if (tbl != ctl_forward_entry)
1587		kfree(tbl);
1588err_alloc_ctl:
1589#endif
1590	if (dflt != &ipv4_devconf_dflt)
1591		kfree(dflt);
1592err_alloc_dflt:
1593	if (all != &ipv4_devconf)
1594		kfree(all);
1595err_alloc_all:
1596	return err;
1597}
1598
1599static __net_exit void devinet_exit_net(struct net *net)
1600{
1601#ifdef CONFIG_SYSCTL
1602	struct ctl_table *tbl;
1603
1604	tbl = net->ipv4.forw_hdr->ctl_table_arg;
1605	unregister_net_sysctl_table(net->ipv4.forw_hdr);
1606	__devinet_sysctl_unregister(net->ipv4.devconf_dflt);
1607	__devinet_sysctl_unregister(net->ipv4.devconf_all);
1608	kfree(tbl);
1609#endif
1610	kfree(net->ipv4.devconf_dflt);
1611	kfree(net->ipv4.devconf_all);
1612}
1613
1614static __net_initdata struct pernet_operations devinet_ops = {
1615	.init = devinet_init_net,
1616	.exit = devinet_exit_net,
1617};
1618
1619void __init devinet_init(void)
1620{
1621	register_pernet_subsys(&devinet_ops);
1622
1623	register_gifconf(PF_INET, inet_gifconf);
1624	register_netdevice_notifier(&ip_netdev_notifier);
1625
1626	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL);
1627	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL);
1628	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr);
1629}
1630
1631