devinet.c revision f97c1e0c6ebdb606c97b6cb5e837c6110ac5a961
1/*
2 *	NET3	IP device support routines.
3 *
4 *	Version: $Id: devinet.c,v 1.44 2001/10/31 21:55:54 davem Exp $
5 *
6 *		This program is free software; you can redistribute it and/or
7 *		modify it under the terms of the GNU General Public License
8 *		as published by the Free Software Foundation; either version
9 *		2 of the License, or (at your option) any later version.
10 *
11 *	Derived from the IP parts of dev.c 1.0.19
12 * 		Authors:	Ross Biro
13 *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
14 *				Mark Evans, <evansmp@uhura.aston.ac.uk>
15 *
16 *	Additional Authors:
17 *		Alan Cox, <gw4pts@gw4pts.ampr.org>
18 *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
19 *
20 *	Changes:
21 *		Alexey Kuznetsov:	pa_* fields are replaced with ifaddr
22 *					lists.
23 *		Cyrus Durgin:		updated for kmod
24 *		Matthias Andree:	in devinet_ioctl, compare label and
25 *					address (4.4BSD alias style support),
26 *					fall back to comparing just the label
27 *					if no match found.
28 */
29
30
31#include <asm/uaccess.h>
32#include <asm/system.h>
33#include <linux/bitops.h>
34#include <linux/capability.h>
35#include <linux/module.h>
36#include <linux/types.h>
37#include <linux/kernel.h>
38#include <linux/string.h>
39#include <linux/mm.h>
40#include <linux/socket.h>
41#include <linux/sockios.h>
42#include <linux/in.h>
43#include <linux/errno.h>
44#include <linux/interrupt.h>
45#include <linux/if_addr.h>
46#include <linux/if_ether.h>
47#include <linux/inet.h>
48#include <linux/netdevice.h>
49#include <linux/etherdevice.h>
50#include <linux/skbuff.h>
51#include <linux/init.h>
52#include <linux/notifier.h>
53#include <linux/inetdevice.h>
54#include <linux/igmp.h>
55#ifdef CONFIG_SYSCTL
56#include <linux/sysctl.h>
57#endif
58#include <linux/kmod.h>
59
60#include <net/arp.h>
61#include <net/ip.h>
62#include <net/route.h>
63#include <net/ip_fib.h>
64#include <net/rtnetlink.h>
65#include <net/net_namespace.h>
66
67struct ipv4_devconf ipv4_devconf = {
68	.data = {
69		[NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1,
70		[NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1,
71		[NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1,
72		[NET_IPV4_CONF_SHARED_MEDIA - 1] = 1,
73	},
74};
75
76static struct ipv4_devconf ipv4_devconf_dflt = {
77	.data = {
78		[NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1,
79		[NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1,
80		[NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1,
81		[NET_IPV4_CONF_SHARED_MEDIA - 1] = 1,
82		[NET_IPV4_CONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
83	},
84};
85
86#define IPV4_DEVCONF_DFLT(net, attr) \
87	IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
88
89static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
90	[IFA_LOCAL]     	= { .type = NLA_U32 },
91	[IFA_ADDRESS]   	= { .type = NLA_U32 },
92	[IFA_BROADCAST] 	= { .type = NLA_U32 },
93	[IFA_ANYCAST]   	= { .type = NLA_U32 },
94	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
95};
96
97static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
98
99static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
100static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
101			 int destroy);
102#ifdef CONFIG_SYSCTL
103static void devinet_sysctl_register(struct in_device *idev);
104static void devinet_sysctl_unregister(struct in_device *idev);
105#else
106static inline void devinet_sysctl_register(struct in_device *idev)
107{
108}
109static inline void devinet_sysctl_unregister(struct in_device *idev)
110{
111}
112#endif
113
114/* Locks all the inet devices. */
115
116static struct in_ifaddr *inet_alloc_ifa(void)
117{
118	struct in_ifaddr *ifa = kzalloc(sizeof(*ifa), GFP_KERNEL);
119
120	if (ifa) {
121		INIT_RCU_HEAD(&ifa->rcu_head);
122	}
123
124	return ifa;
125}
126
127static void inet_rcu_free_ifa(struct rcu_head *head)
128{
129	struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
130	if (ifa->ifa_dev)
131		in_dev_put(ifa->ifa_dev);
132	kfree(ifa);
133}
134
135static inline void inet_free_ifa(struct in_ifaddr *ifa)
136{
137	call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
138}
139
140void in_dev_finish_destroy(struct in_device *idev)
141{
142	struct net_device *dev = idev->dev;
143
144	BUG_TRAP(!idev->ifa_list);
145	BUG_TRAP(!idev->mc_list);
146#ifdef NET_REFCNT_DEBUG
147	printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n",
148	       idev, dev ? dev->name : "NIL");
149#endif
150	dev_put(dev);
151	if (!idev->dead)
152		printk("Freeing alive in_device %p\n", idev);
153	else {
154		kfree(idev);
155	}
156}
157
158static struct in_device *inetdev_init(struct net_device *dev)
159{
160	struct in_device *in_dev;
161
162	ASSERT_RTNL();
163
164	in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
165	if (!in_dev)
166		goto out;
167	INIT_RCU_HEAD(&in_dev->rcu_head);
168	memcpy(&in_dev->cnf, dev->nd_net->ipv4.devconf_dflt,
169			sizeof(in_dev->cnf));
170	in_dev->cnf.sysctl = NULL;
171	in_dev->dev = dev;
172	if ((in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl)) == NULL)
173		goto out_kfree;
174	/* Reference in_dev->dev */
175	dev_hold(dev);
176	/* Account for reference dev->ip_ptr (below) */
177	in_dev_hold(in_dev);
178
179	devinet_sysctl_register(in_dev);
180	ip_mc_init_dev(in_dev);
181	if (dev->flags & IFF_UP)
182		ip_mc_up(in_dev);
183
184	/* we can receive as soon as ip_ptr is set -- do this last */
185	rcu_assign_pointer(dev->ip_ptr, in_dev);
186out:
187	return in_dev;
188out_kfree:
189	kfree(in_dev);
190	in_dev = NULL;
191	goto out;
192}
193
194static void in_dev_rcu_put(struct rcu_head *head)
195{
196	struct in_device *idev = container_of(head, struct in_device, rcu_head);
197	in_dev_put(idev);
198}
199
200static void inetdev_destroy(struct in_device *in_dev)
201{
202	struct in_ifaddr *ifa;
203	struct net_device *dev;
204
205	ASSERT_RTNL();
206
207	dev = in_dev->dev;
208
209	in_dev->dead = 1;
210
211	ip_mc_destroy_dev(in_dev);
212
213	while ((ifa = in_dev->ifa_list) != NULL) {
214		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
215		inet_free_ifa(ifa);
216	}
217
218	dev->ip_ptr = NULL;
219
220	devinet_sysctl_unregister(in_dev);
221	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
222	arp_ifdown(dev);
223
224	call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
225}
226
227int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
228{
229	rcu_read_lock();
230	for_primary_ifa(in_dev) {
231		if (inet_ifa_match(a, ifa)) {
232			if (!b || inet_ifa_match(b, ifa)) {
233				rcu_read_unlock();
234				return 1;
235			}
236		}
237	} endfor_ifa(in_dev);
238	rcu_read_unlock();
239	return 0;
240}
241
242static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
243			 int destroy, struct nlmsghdr *nlh, u32 pid)
244{
245	struct in_ifaddr *promote = NULL;
246	struct in_ifaddr *ifa, *ifa1 = *ifap;
247	struct in_ifaddr *last_prim = in_dev->ifa_list;
248	struct in_ifaddr *prev_prom = NULL;
249	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
250
251	ASSERT_RTNL();
252
253	/* 1. Deleting primary ifaddr forces deletion all secondaries
254	 * unless alias promotion is set
255	 **/
256
257	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
258		struct in_ifaddr **ifap1 = &ifa1->ifa_next;
259
260		while ((ifa = *ifap1) != NULL) {
261			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
262			    ifa1->ifa_scope <= ifa->ifa_scope)
263				last_prim = ifa;
264
265			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
266			    ifa1->ifa_mask != ifa->ifa_mask ||
267			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
268				ifap1 = &ifa->ifa_next;
269				prev_prom = ifa;
270				continue;
271			}
272
273			if (!do_promote) {
274				*ifap1 = ifa->ifa_next;
275
276				rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid);
277				blocking_notifier_call_chain(&inetaddr_chain,
278						NETDEV_DOWN, ifa);
279				inet_free_ifa(ifa);
280			} else {
281				promote = ifa;
282				break;
283			}
284		}
285	}
286
287	/* 2. Unlink it */
288
289	*ifap = ifa1->ifa_next;
290
291	/* 3. Announce address deletion */
292
293	/* Send message first, then call notifier.
294	   At first sight, FIB update triggered by notifier
295	   will refer to already deleted ifaddr, that could confuse
296	   netlink listeners. It is not true: look, gated sees
297	   that route deleted and if it still thinks that ifaddr
298	   is valid, it will try to restore deleted routes... Grr.
299	   So that, this order is correct.
300	 */
301	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid);
302	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
303
304	if (promote) {
305
306		if (prev_prom) {
307			prev_prom->ifa_next = promote->ifa_next;
308			promote->ifa_next = last_prim->ifa_next;
309			last_prim->ifa_next = promote;
310		}
311
312		promote->ifa_flags &= ~IFA_F_SECONDARY;
313		rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
314		blocking_notifier_call_chain(&inetaddr_chain,
315				NETDEV_UP, promote);
316		for (ifa = promote->ifa_next; ifa; ifa = ifa->ifa_next) {
317			if (ifa1->ifa_mask != ifa->ifa_mask ||
318			    !inet_ifa_match(ifa1->ifa_address, ifa))
319					continue;
320			fib_add_ifaddr(ifa);
321		}
322
323	}
324	if (destroy)
325		inet_free_ifa(ifa1);
326}
327
328static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
329			 int destroy)
330{
331	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
332}
333
334static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
335			     u32 pid)
336{
337	struct in_device *in_dev = ifa->ifa_dev;
338	struct in_ifaddr *ifa1, **ifap, **last_primary;
339
340	ASSERT_RTNL();
341
342	if (!ifa->ifa_local) {
343		inet_free_ifa(ifa);
344		return 0;
345	}
346
347	ifa->ifa_flags &= ~IFA_F_SECONDARY;
348	last_primary = &in_dev->ifa_list;
349
350	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
351	     ifap = &ifa1->ifa_next) {
352		if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
353		    ifa->ifa_scope <= ifa1->ifa_scope)
354			last_primary = &ifa1->ifa_next;
355		if (ifa1->ifa_mask == ifa->ifa_mask &&
356		    inet_ifa_match(ifa1->ifa_address, ifa)) {
357			if (ifa1->ifa_local == ifa->ifa_local) {
358				inet_free_ifa(ifa);
359				return -EEXIST;
360			}
361			if (ifa1->ifa_scope != ifa->ifa_scope) {
362				inet_free_ifa(ifa);
363				return -EINVAL;
364			}
365			ifa->ifa_flags |= IFA_F_SECONDARY;
366		}
367	}
368
369	if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
370		net_srandom(ifa->ifa_local);
371		ifap = last_primary;
372	}
373
374	ifa->ifa_next = *ifap;
375	*ifap = ifa;
376
377	/* Send message first, then call notifier.
378	   Notifier will trigger FIB update, so that
379	   listeners of netlink will know about new ifaddr */
380	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid);
381	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
382
383	return 0;
384}
385
386static int inet_insert_ifa(struct in_ifaddr *ifa)
387{
388	return __inet_insert_ifa(ifa, NULL, 0);
389}
390
391static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
392{
393	struct in_device *in_dev = __in_dev_get_rtnl(dev);
394
395	ASSERT_RTNL();
396
397	if (!in_dev) {
398		inet_free_ifa(ifa);
399		return -ENOBUFS;
400	}
401	ipv4_devconf_setall(in_dev);
402	if (ifa->ifa_dev != in_dev) {
403		BUG_TRAP(!ifa->ifa_dev);
404		in_dev_hold(in_dev);
405		ifa->ifa_dev = in_dev;
406	}
407	if (ipv4_is_loopback(ifa->ifa_local))
408		ifa->ifa_scope = RT_SCOPE_HOST;
409	return inet_insert_ifa(ifa);
410}
411
412struct in_device *inetdev_by_index(int ifindex)
413{
414	struct net_device *dev;
415	struct in_device *in_dev = NULL;
416	read_lock(&dev_base_lock);
417	dev = __dev_get_by_index(&init_net, ifindex);
418	if (dev)
419		in_dev = in_dev_get(dev);
420	read_unlock(&dev_base_lock);
421	return in_dev;
422}
423
424/* Called only from RTNL semaphored context. No locks. */
425
426struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
427				    __be32 mask)
428{
429	ASSERT_RTNL();
430
431	for_primary_ifa(in_dev) {
432		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
433			return ifa;
434	} endfor_ifa(in_dev);
435	return NULL;
436}
437
438static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
439{
440	struct net *net = skb->sk->sk_net;
441	struct nlattr *tb[IFA_MAX+1];
442	struct in_device *in_dev;
443	struct ifaddrmsg *ifm;
444	struct in_ifaddr *ifa, **ifap;
445	int err = -EINVAL;
446
447	ASSERT_RTNL();
448
449	if (net != &init_net)
450		return -EINVAL;
451
452	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
453	if (err < 0)
454		goto errout;
455
456	ifm = nlmsg_data(nlh);
457	in_dev = inetdev_by_index(ifm->ifa_index);
458	if (in_dev == NULL) {
459		err = -ENODEV;
460		goto errout;
461	}
462
463	__in_dev_put(in_dev);
464
465	for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
466	     ifap = &ifa->ifa_next) {
467		if (tb[IFA_LOCAL] &&
468		    ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
469			continue;
470
471		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
472			continue;
473
474		if (tb[IFA_ADDRESS] &&
475		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
476		    !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
477			continue;
478
479		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid);
480		return 0;
481	}
482
483	err = -EADDRNOTAVAIL;
484errout:
485	return err;
486}
487
488static struct in_ifaddr *rtm_to_ifaddr(struct nlmsghdr *nlh)
489{
490	struct nlattr *tb[IFA_MAX+1];
491	struct in_ifaddr *ifa;
492	struct ifaddrmsg *ifm;
493	struct net_device *dev;
494	struct in_device *in_dev;
495	int err = -EINVAL;
496
497	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
498	if (err < 0)
499		goto errout;
500
501	ifm = nlmsg_data(nlh);
502	if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL) {
503		err = -EINVAL;
504		goto errout;
505	}
506
507	dev = __dev_get_by_index(&init_net, ifm->ifa_index);
508	if (dev == NULL) {
509		err = -ENODEV;
510		goto errout;
511	}
512
513	in_dev = __in_dev_get_rtnl(dev);
514	if (in_dev == NULL) {
515		err = -ENOBUFS;
516		goto errout;
517	}
518
519	ifa = inet_alloc_ifa();
520	if (ifa == NULL) {
521		/*
522		 * A potential indev allocation can be left alive, it stays
523		 * assigned to its device and is destroy with it.
524		 */
525		err = -ENOBUFS;
526		goto errout;
527	}
528
529	ipv4_devconf_setall(in_dev);
530	in_dev_hold(in_dev);
531
532	if (tb[IFA_ADDRESS] == NULL)
533		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
534
535	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
536	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
537	ifa->ifa_flags = ifm->ifa_flags;
538	ifa->ifa_scope = ifm->ifa_scope;
539	ifa->ifa_dev = in_dev;
540
541	ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
542	ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
543
544	if (tb[IFA_BROADCAST])
545		ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
546
547	if (tb[IFA_ANYCAST])
548		ifa->ifa_anycast = nla_get_be32(tb[IFA_ANYCAST]);
549
550	if (tb[IFA_LABEL])
551		nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
552	else
553		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
554
555	return ifa;
556
557errout:
558	return ERR_PTR(err);
559}
560
561static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
562{
563	struct net *net = skb->sk->sk_net;
564	struct in_ifaddr *ifa;
565
566	ASSERT_RTNL();
567
568	if (net != &init_net)
569		return -EINVAL;
570
571	ifa = rtm_to_ifaddr(nlh);
572	if (IS_ERR(ifa))
573		return PTR_ERR(ifa);
574
575	return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid);
576}
577
578/*
579 *	Determine a default network mask, based on the IP address.
580 */
581
582static __inline__ int inet_abc_len(__be32 addr)
583{
584	int rc = -1;	/* Something else, probably a multicast. */
585
586	if (ipv4_is_zeronet(addr))
587		rc = 0;
588	else {
589		__u32 haddr = ntohl(addr);
590
591		if (IN_CLASSA(haddr))
592			rc = 8;
593		else if (IN_CLASSB(haddr))
594			rc = 16;
595		else if (IN_CLASSC(haddr))
596			rc = 24;
597	}
598
599	return rc;
600}
601
602
603int devinet_ioctl(unsigned int cmd, void __user *arg)
604{
605	struct ifreq ifr;
606	struct sockaddr_in sin_orig;
607	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
608	struct in_device *in_dev;
609	struct in_ifaddr **ifap = NULL;
610	struct in_ifaddr *ifa = NULL;
611	struct net_device *dev;
612	char *colon;
613	int ret = -EFAULT;
614	int tryaddrmatch = 0;
615
616	/*
617	 *	Fetch the caller's info block into kernel space
618	 */
619
620	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
621		goto out;
622	ifr.ifr_name[IFNAMSIZ - 1] = 0;
623
624	/* save original address for comparison */
625	memcpy(&sin_orig, sin, sizeof(*sin));
626
627	colon = strchr(ifr.ifr_name, ':');
628	if (colon)
629		*colon = 0;
630
631#ifdef CONFIG_KMOD
632	dev_load(&init_net, ifr.ifr_name);
633#endif
634
635	switch (cmd) {
636	case SIOCGIFADDR:	/* Get interface address */
637	case SIOCGIFBRDADDR:	/* Get the broadcast address */
638	case SIOCGIFDSTADDR:	/* Get the destination address */
639	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
640		/* Note that these ioctls will not sleep,
641		   so that we do not impose a lock.
642		   One day we will be forced to put shlock here (I mean SMP)
643		 */
644		tryaddrmatch = (sin_orig.sin_family == AF_INET);
645		memset(sin, 0, sizeof(*sin));
646		sin->sin_family = AF_INET;
647		break;
648
649	case SIOCSIFFLAGS:
650		ret = -EACCES;
651		if (!capable(CAP_NET_ADMIN))
652			goto out;
653		break;
654	case SIOCSIFADDR:	/* Set interface address (and family) */
655	case SIOCSIFBRDADDR:	/* Set the broadcast address */
656	case SIOCSIFDSTADDR:	/* Set the destination address */
657	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
658		ret = -EACCES;
659		if (!capable(CAP_NET_ADMIN))
660			goto out;
661		ret = -EINVAL;
662		if (sin->sin_family != AF_INET)
663			goto out;
664		break;
665	default:
666		ret = -EINVAL;
667		goto out;
668	}
669
670	rtnl_lock();
671
672	ret = -ENODEV;
673	if ((dev = __dev_get_by_name(&init_net, ifr.ifr_name)) == NULL)
674		goto done;
675
676	if (colon)
677		*colon = ':';
678
679	if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
680		if (tryaddrmatch) {
681			/* Matthias Andree */
682			/* compare label and address (4.4BSD style) */
683			/* note: we only do this for a limited set of ioctls
684			   and only if the original address family was AF_INET.
685			   This is checked above. */
686			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
687			     ifap = &ifa->ifa_next) {
688				if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
689				    sin_orig.sin_addr.s_addr ==
690							ifa->ifa_address) {
691					break; /* found */
692				}
693			}
694		}
695		/* we didn't get a match, maybe the application is
696		   4.3BSD-style and passed in junk so we fall back to
697		   comparing just the label */
698		if (!ifa) {
699			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
700			     ifap = &ifa->ifa_next)
701				if (!strcmp(ifr.ifr_name, ifa->ifa_label))
702					break;
703		}
704	}
705
706	ret = -EADDRNOTAVAIL;
707	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
708		goto done;
709
710	switch (cmd) {
711	case SIOCGIFADDR:	/* Get interface address */
712		sin->sin_addr.s_addr = ifa->ifa_local;
713		goto rarok;
714
715	case SIOCGIFBRDADDR:	/* Get the broadcast address */
716		sin->sin_addr.s_addr = ifa->ifa_broadcast;
717		goto rarok;
718
719	case SIOCGIFDSTADDR:	/* Get the destination address */
720		sin->sin_addr.s_addr = ifa->ifa_address;
721		goto rarok;
722
723	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
724		sin->sin_addr.s_addr = ifa->ifa_mask;
725		goto rarok;
726
727	case SIOCSIFFLAGS:
728		if (colon) {
729			ret = -EADDRNOTAVAIL;
730			if (!ifa)
731				break;
732			ret = 0;
733			if (!(ifr.ifr_flags & IFF_UP))
734				inet_del_ifa(in_dev, ifap, 1);
735			break;
736		}
737		ret = dev_change_flags(dev, ifr.ifr_flags);
738		break;
739
740	case SIOCSIFADDR:	/* Set interface address (and family) */
741		ret = -EINVAL;
742		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
743			break;
744
745		if (!ifa) {
746			ret = -ENOBUFS;
747			if ((ifa = inet_alloc_ifa()) == NULL)
748				break;
749			if (colon)
750				memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
751			else
752				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
753		} else {
754			ret = 0;
755			if (ifa->ifa_local == sin->sin_addr.s_addr)
756				break;
757			inet_del_ifa(in_dev, ifap, 0);
758			ifa->ifa_broadcast = 0;
759			ifa->ifa_anycast = 0;
760		}
761
762		ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
763
764		if (!(dev->flags & IFF_POINTOPOINT)) {
765			ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
766			ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
767			if ((dev->flags & IFF_BROADCAST) &&
768			    ifa->ifa_prefixlen < 31)
769				ifa->ifa_broadcast = ifa->ifa_address |
770						     ~ifa->ifa_mask;
771		} else {
772			ifa->ifa_prefixlen = 32;
773			ifa->ifa_mask = inet_make_mask(32);
774		}
775		ret = inet_set_ifa(dev, ifa);
776		break;
777
778	case SIOCSIFBRDADDR:	/* Set the broadcast address */
779		ret = 0;
780		if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
781			inet_del_ifa(in_dev, ifap, 0);
782			ifa->ifa_broadcast = sin->sin_addr.s_addr;
783			inet_insert_ifa(ifa);
784		}
785		break;
786
787	case SIOCSIFDSTADDR:	/* Set the destination address */
788		ret = 0;
789		if (ifa->ifa_address == sin->sin_addr.s_addr)
790			break;
791		ret = -EINVAL;
792		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
793			break;
794		ret = 0;
795		inet_del_ifa(in_dev, ifap, 0);
796		ifa->ifa_address = sin->sin_addr.s_addr;
797		inet_insert_ifa(ifa);
798		break;
799
800	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
801
802		/*
803		 *	The mask we set must be legal.
804		 */
805		ret = -EINVAL;
806		if (bad_mask(sin->sin_addr.s_addr, 0))
807			break;
808		ret = 0;
809		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
810			__be32 old_mask = ifa->ifa_mask;
811			inet_del_ifa(in_dev, ifap, 0);
812			ifa->ifa_mask = sin->sin_addr.s_addr;
813			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
814
815			/* See if current broadcast address matches
816			 * with current netmask, then recalculate
817			 * the broadcast address. Otherwise it's a
818			 * funny address, so don't touch it since
819			 * the user seems to know what (s)he's doing...
820			 */
821			if ((dev->flags & IFF_BROADCAST) &&
822			    (ifa->ifa_prefixlen < 31) &&
823			    (ifa->ifa_broadcast ==
824			     (ifa->ifa_local|~old_mask))) {
825				ifa->ifa_broadcast = (ifa->ifa_local |
826						      ~sin->sin_addr.s_addr);
827			}
828			inet_insert_ifa(ifa);
829		}
830		break;
831	}
832done:
833	rtnl_unlock();
834out:
835	return ret;
836rarok:
837	rtnl_unlock();
838	ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
839	goto out;
840}
841
842static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
843{
844	struct in_device *in_dev = __in_dev_get_rtnl(dev);
845	struct in_ifaddr *ifa;
846	struct ifreq ifr;
847	int done = 0;
848
849	if (!in_dev || (ifa = in_dev->ifa_list) == NULL)
850		goto out;
851
852	for (; ifa; ifa = ifa->ifa_next) {
853		if (!buf) {
854			done += sizeof(ifr);
855			continue;
856		}
857		if (len < (int) sizeof(ifr))
858			break;
859		memset(&ifr, 0, sizeof(struct ifreq));
860		if (ifa->ifa_label)
861			strcpy(ifr.ifr_name, ifa->ifa_label);
862		else
863			strcpy(ifr.ifr_name, dev->name);
864
865		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
866		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
867								ifa->ifa_local;
868
869		if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
870			done = -EFAULT;
871			break;
872		}
873		buf  += sizeof(struct ifreq);
874		len  -= sizeof(struct ifreq);
875		done += sizeof(struct ifreq);
876	}
877out:
878	return done;
879}
880
881__be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
882{
883	__be32 addr = 0;
884	struct in_device *in_dev;
885
886	rcu_read_lock();
887	in_dev = __in_dev_get_rcu(dev);
888	if (!in_dev)
889		goto no_in_dev;
890
891	for_primary_ifa(in_dev) {
892		if (ifa->ifa_scope > scope)
893			continue;
894		if (!dst || inet_ifa_match(dst, ifa)) {
895			addr = ifa->ifa_local;
896			break;
897		}
898		if (!addr)
899			addr = ifa->ifa_local;
900	} endfor_ifa(in_dev);
901no_in_dev:
902	rcu_read_unlock();
903
904	if (addr)
905		goto out;
906
907	/* Not loopback addresses on loopback should be preferred
908	   in this case. It is importnat that lo is the first interface
909	   in dev_base list.
910	 */
911	read_lock(&dev_base_lock);
912	rcu_read_lock();
913	for_each_netdev(&init_net, dev) {
914		if ((in_dev = __in_dev_get_rcu(dev)) == NULL)
915			continue;
916
917		for_primary_ifa(in_dev) {
918			if (ifa->ifa_scope != RT_SCOPE_LINK &&
919			    ifa->ifa_scope <= scope) {
920				addr = ifa->ifa_local;
921				goto out_unlock_both;
922			}
923		} endfor_ifa(in_dev);
924	}
925out_unlock_both:
926	read_unlock(&dev_base_lock);
927	rcu_read_unlock();
928out:
929	return addr;
930}
931
932static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
933			      __be32 local, int scope)
934{
935	int same = 0;
936	__be32 addr = 0;
937
938	for_ifa(in_dev) {
939		if (!addr &&
940		    (local == ifa->ifa_local || !local) &&
941		    ifa->ifa_scope <= scope) {
942			addr = ifa->ifa_local;
943			if (same)
944				break;
945		}
946		if (!same) {
947			same = (!local || inet_ifa_match(local, ifa)) &&
948				(!dst || inet_ifa_match(dst, ifa));
949			if (same && addr) {
950				if (local || !dst)
951					break;
952				/* Is the selected addr into dst subnet? */
953				if (inet_ifa_match(addr, ifa))
954					break;
955				/* No, then can we use new local src? */
956				if (ifa->ifa_scope <= scope) {
957					addr = ifa->ifa_local;
958					break;
959				}
960				/* search for large dst subnet for addr */
961				same = 0;
962			}
963		}
964	} endfor_ifa(in_dev);
965
966	return same? addr : 0;
967}
968
969/*
970 * Confirm that local IP address exists using wildcards:
971 * - dev: only on this interface, 0=any interface
972 * - dst: only in the same subnet as dst, 0=any dst
973 * - local: address, 0=autoselect the local address
974 * - scope: maximum allowed scope value for the local address
975 */
976__be32 inet_confirm_addr(const struct net_device *dev, __be32 dst, __be32 local, int scope)
977{
978	__be32 addr = 0;
979	struct in_device *in_dev;
980
981	if (dev) {
982		rcu_read_lock();
983		if ((in_dev = __in_dev_get_rcu(dev)))
984			addr = confirm_addr_indev(in_dev, dst, local, scope);
985		rcu_read_unlock();
986
987		return addr;
988	}
989
990	read_lock(&dev_base_lock);
991	rcu_read_lock();
992	for_each_netdev(&init_net, dev) {
993		if ((in_dev = __in_dev_get_rcu(dev))) {
994			addr = confirm_addr_indev(in_dev, dst, local, scope);
995			if (addr)
996				break;
997		}
998	}
999	rcu_read_unlock();
1000	read_unlock(&dev_base_lock);
1001
1002	return addr;
1003}
1004
1005/*
1006 *	Device notifier
1007 */
1008
1009int register_inetaddr_notifier(struct notifier_block *nb)
1010{
1011	return blocking_notifier_chain_register(&inetaddr_chain, nb);
1012}
1013
1014int unregister_inetaddr_notifier(struct notifier_block *nb)
1015{
1016	return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1017}
1018
1019/* Rename ifa_labels for a device name change. Make some effort to preserve existing
1020 * alias numbering and to create unique labels if possible.
1021*/
1022static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1023{
1024	struct in_ifaddr *ifa;
1025	int named = 0;
1026
1027	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1028		char old[IFNAMSIZ], *dot;
1029
1030		memcpy(old, ifa->ifa_label, IFNAMSIZ);
1031		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1032		if (named++ == 0)
1033			continue;
1034		dot = strchr(old, ':');
1035		if (dot == NULL) {
1036			sprintf(old, ":%d", named);
1037			dot = old;
1038		}
1039		if (strlen(dot) + strlen(dev->name) < IFNAMSIZ) {
1040			strcat(ifa->ifa_label, dot);
1041		} else {
1042			strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1043		}
1044	}
1045}
1046
1047/* Called only under RTNL semaphore */
1048
1049static int inetdev_event(struct notifier_block *this, unsigned long event,
1050			 void *ptr)
1051{
1052	struct net_device *dev = ptr;
1053	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1054
1055	if (dev->nd_net != &init_net)
1056		return NOTIFY_DONE;
1057
1058	ASSERT_RTNL();
1059
1060	if (!in_dev) {
1061		if (event == NETDEV_REGISTER) {
1062			in_dev = inetdev_init(dev);
1063			if (!in_dev)
1064				return notifier_from_errno(-ENOMEM);
1065			if (dev->flags & IFF_LOOPBACK) {
1066				IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1067				IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1068			}
1069		}
1070		goto out;
1071	}
1072
1073	switch (event) {
1074	case NETDEV_REGISTER:
1075		printk(KERN_DEBUG "inetdev_event: bug\n");
1076		dev->ip_ptr = NULL;
1077		break;
1078	case NETDEV_UP:
1079		if (dev->mtu < 68)
1080			break;
1081		if (dev->flags & IFF_LOOPBACK) {
1082			struct in_ifaddr *ifa;
1083			if ((ifa = inet_alloc_ifa()) != NULL) {
1084				ifa->ifa_local =
1085				  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1086				ifa->ifa_prefixlen = 8;
1087				ifa->ifa_mask = inet_make_mask(8);
1088				in_dev_hold(in_dev);
1089				ifa->ifa_dev = in_dev;
1090				ifa->ifa_scope = RT_SCOPE_HOST;
1091				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1092				inet_insert_ifa(ifa);
1093			}
1094		}
1095		ip_mc_up(in_dev);
1096		break;
1097	case NETDEV_DOWN:
1098		ip_mc_down(in_dev);
1099		break;
1100	case NETDEV_CHANGEMTU:
1101		if (dev->mtu >= 68)
1102			break;
1103		/* MTU falled under 68, disable IP */
1104	case NETDEV_UNREGISTER:
1105		inetdev_destroy(in_dev);
1106		break;
1107	case NETDEV_CHANGENAME:
1108		/* Do not notify about label change, this event is
1109		 * not interesting to applications using netlink.
1110		 */
1111		inetdev_changename(dev, in_dev);
1112
1113		devinet_sysctl_unregister(in_dev);
1114		devinet_sysctl_register(in_dev);
1115		break;
1116	}
1117out:
1118	return NOTIFY_DONE;
1119}
1120
1121static struct notifier_block ip_netdev_notifier = {
1122	.notifier_call =inetdev_event,
1123};
1124
1125static inline size_t inet_nlmsg_size(void)
1126{
1127	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1128	       + nla_total_size(4) /* IFA_ADDRESS */
1129	       + nla_total_size(4) /* IFA_LOCAL */
1130	       + nla_total_size(4) /* IFA_BROADCAST */
1131	       + nla_total_size(4) /* IFA_ANYCAST */
1132	       + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1133}
1134
1135static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1136			    u32 pid, u32 seq, int event, unsigned int flags)
1137{
1138	struct ifaddrmsg *ifm;
1139	struct nlmsghdr  *nlh;
1140
1141	nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags);
1142	if (nlh == NULL)
1143		return -EMSGSIZE;
1144
1145	ifm = nlmsg_data(nlh);
1146	ifm->ifa_family = AF_INET;
1147	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1148	ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
1149	ifm->ifa_scope = ifa->ifa_scope;
1150	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1151
1152	if (ifa->ifa_address)
1153		NLA_PUT_BE32(skb, IFA_ADDRESS, ifa->ifa_address);
1154
1155	if (ifa->ifa_local)
1156		NLA_PUT_BE32(skb, IFA_LOCAL, ifa->ifa_local);
1157
1158	if (ifa->ifa_broadcast)
1159		NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast);
1160
1161	if (ifa->ifa_anycast)
1162		NLA_PUT_BE32(skb, IFA_ANYCAST, ifa->ifa_anycast);
1163
1164	if (ifa->ifa_label[0])
1165		NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label);
1166
1167	return nlmsg_end(skb, nlh);
1168
1169nla_put_failure:
1170	nlmsg_cancel(skb, nlh);
1171	return -EMSGSIZE;
1172}
1173
1174static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1175{
1176	struct net *net = skb->sk->sk_net;
1177	int idx, ip_idx;
1178	struct net_device *dev;
1179	struct in_device *in_dev;
1180	struct in_ifaddr *ifa;
1181	int s_ip_idx, s_idx = cb->args[0];
1182
1183	if (net != &init_net)
1184		return 0;
1185
1186	s_ip_idx = ip_idx = cb->args[1];
1187	idx = 0;
1188	for_each_netdev(&init_net, dev) {
1189		if (idx < s_idx)
1190			goto cont;
1191		if (idx > s_idx)
1192			s_ip_idx = 0;
1193		if ((in_dev = __in_dev_get_rtnl(dev)) == NULL)
1194			goto cont;
1195
1196		for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1197		     ifa = ifa->ifa_next, ip_idx++) {
1198			if (ip_idx < s_ip_idx)
1199				continue;
1200			if (inet_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid,
1201					     cb->nlh->nlmsg_seq,
1202					     RTM_NEWADDR, NLM_F_MULTI) <= 0)
1203				goto done;
1204		}
1205cont:
1206		idx++;
1207	}
1208
1209done:
1210	cb->args[0] = idx;
1211	cb->args[1] = ip_idx;
1212
1213	return skb->len;
1214}
1215
1216static void rtmsg_ifa(int event, struct in_ifaddr* ifa, struct nlmsghdr *nlh,
1217		      u32 pid)
1218{
1219	struct sk_buff *skb;
1220	u32 seq = nlh ? nlh->nlmsg_seq : 0;
1221	int err = -ENOBUFS;
1222
1223	skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1224	if (skb == NULL)
1225		goto errout;
1226
1227	err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0);
1228	if (err < 0) {
1229		/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1230		WARN_ON(err == -EMSGSIZE);
1231		kfree_skb(skb);
1232		goto errout;
1233	}
1234	err = rtnl_notify(skb, &init_net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1235errout:
1236	if (err < 0)
1237		rtnl_set_sk_err(&init_net, RTNLGRP_IPV4_IFADDR, err);
1238}
1239
1240#ifdef CONFIG_SYSCTL
1241
1242static void devinet_copy_dflt_conf(struct net *net, int i)
1243{
1244	struct net_device *dev;
1245
1246	read_lock(&dev_base_lock);
1247	for_each_netdev(net, dev) {
1248		struct in_device *in_dev;
1249		rcu_read_lock();
1250		in_dev = __in_dev_get_rcu(dev);
1251		if (in_dev && !test_bit(i, in_dev->cnf.state))
1252			in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1253		rcu_read_unlock();
1254	}
1255	read_unlock(&dev_base_lock);
1256}
1257
1258static void inet_forward_change(struct net *net)
1259{
1260	struct net_device *dev;
1261	int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1262
1263	IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1264	IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1265
1266	read_lock(&dev_base_lock);
1267	for_each_netdev(net, dev) {
1268		struct in_device *in_dev;
1269		rcu_read_lock();
1270		in_dev = __in_dev_get_rcu(dev);
1271		if (in_dev)
1272			IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1273		rcu_read_unlock();
1274	}
1275	read_unlock(&dev_base_lock);
1276
1277	rt_cache_flush(0);
1278}
1279
1280static int devinet_conf_proc(ctl_table *ctl, int write,
1281			     struct file* filp, void __user *buffer,
1282			     size_t *lenp, loff_t *ppos)
1283{
1284	int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1285
1286	if (write) {
1287		struct ipv4_devconf *cnf = ctl->extra1;
1288		struct net *net = ctl->extra2;
1289		int i = (int *)ctl->data - cnf->data;
1290
1291		set_bit(i, cnf->state);
1292
1293		if (cnf == net->ipv4.devconf_dflt)
1294			devinet_copy_dflt_conf(net, i);
1295	}
1296
1297	return ret;
1298}
1299
1300static int devinet_conf_sysctl(ctl_table *table, int __user *name, int nlen,
1301			       void __user *oldval, size_t __user *oldlenp,
1302			       void __user *newval, size_t newlen)
1303{
1304	struct ipv4_devconf *cnf;
1305	struct net *net;
1306	int *valp = table->data;
1307	int new;
1308	int i;
1309
1310	if (!newval || !newlen)
1311		return 0;
1312
1313	if (newlen != sizeof(int))
1314		return -EINVAL;
1315
1316	if (get_user(new, (int __user *)newval))
1317		return -EFAULT;
1318
1319	if (new == *valp)
1320		return 0;
1321
1322	if (oldval && oldlenp) {
1323		size_t len;
1324
1325		if (get_user(len, oldlenp))
1326			return -EFAULT;
1327
1328		if (len) {
1329			if (len > table->maxlen)
1330				len = table->maxlen;
1331			if (copy_to_user(oldval, valp, len))
1332				return -EFAULT;
1333			if (put_user(len, oldlenp))
1334				return -EFAULT;
1335		}
1336	}
1337
1338	*valp = new;
1339
1340	cnf = table->extra1;
1341	net = table->extra2;
1342	i = (int *)table->data - cnf->data;
1343
1344	set_bit(i, cnf->state);
1345
1346	if (cnf == net->ipv4.devconf_dflt)
1347		devinet_copy_dflt_conf(net, i);
1348
1349	return 1;
1350}
1351
1352static int devinet_sysctl_forward(ctl_table *ctl, int write,
1353				  struct file* filp, void __user *buffer,
1354				  size_t *lenp, loff_t *ppos)
1355{
1356	int *valp = ctl->data;
1357	int val = *valp;
1358	int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1359
1360	if (write && *valp != val) {
1361		struct net *net = ctl->extra2;
1362
1363		if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING))
1364			inet_forward_change(net);
1365		else if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING))
1366			rt_cache_flush(0);
1367	}
1368
1369	return ret;
1370}
1371
1372int ipv4_doint_and_flush(ctl_table *ctl, int write,
1373			 struct file* filp, void __user *buffer,
1374			 size_t *lenp, loff_t *ppos)
1375{
1376	int *valp = ctl->data;
1377	int val = *valp;
1378	int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1379
1380	if (write && *valp != val)
1381		rt_cache_flush(0);
1382
1383	return ret;
1384}
1385
1386int ipv4_doint_and_flush_strategy(ctl_table *table, int __user *name, int nlen,
1387				  void __user *oldval, size_t __user *oldlenp,
1388				  void __user *newval, size_t newlen)
1389{
1390	int ret = devinet_conf_sysctl(table, name, nlen, oldval, oldlenp,
1391				      newval, newlen);
1392
1393	if (ret == 1)
1394		rt_cache_flush(0);
1395
1396	return ret;
1397}
1398
1399
1400#define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc, sysctl) \
1401	{ \
1402		.ctl_name	= NET_IPV4_CONF_ ## attr, \
1403		.procname	= name, \
1404		.data		= ipv4_devconf.data + \
1405				  NET_IPV4_CONF_ ## attr - 1, \
1406		.maxlen		= sizeof(int), \
1407		.mode		= mval, \
1408		.proc_handler	= proc, \
1409		.strategy	= sysctl, \
1410		.extra1		= &ipv4_devconf, \
1411	}
1412
1413#define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
1414	DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc, \
1415			     devinet_conf_sysctl)
1416
1417#define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
1418	DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc, \
1419			     devinet_conf_sysctl)
1420
1421#define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc, sysctl) \
1422	DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc, sysctl)
1423
1424#define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
1425	DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush, \
1426				     ipv4_doint_and_flush_strategy)
1427
1428static struct devinet_sysctl_table {
1429	struct ctl_table_header *sysctl_header;
1430	struct ctl_table devinet_vars[__NET_IPV4_CONF_MAX];
1431	char *dev_name;
1432} devinet_sysctl = {
1433	.devinet_vars = {
1434		DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
1435					     devinet_sysctl_forward,
1436					     devinet_conf_sysctl),
1437		DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
1438
1439		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
1440		DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
1441		DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
1442		DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
1443		DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
1444		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
1445					"accept_source_route"),
1446		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
1447		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
1448		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
1449		DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
1450		DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
1451		DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
1452		DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
1453		DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
1454		DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
1455
1456		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
1457		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
1458		DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
1459					      "force_igmp_version"),
1460		DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
1461					      "promote_secondaries"),
1462	},
1463};
1464
1465static int __devinet_sysctl_register(struct net *net, char *dev_name,
1466		int ctl_name, struct ipv4_devconf *p)
1467{
1468	int i;
1469	struct devinet_sysctl_table *t;
1470
1471#define DEVINET_CTL_PATH_DEV	3
1472
1473	struct ctl_path devinet_ctl_path[] = {
1474		{ .procname = "net", .ctl_name = CTL_NET, },
1475		{ .procname = "ipv4", .ctl_name = NET_IPV4, },
1476		{ .procname = "conf", .ctl_name = NET_IPV4_CONF, },
1477		{ /* to be set */ },
1478		{ },
1479	};
1480
1481	t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
1482	if (!t)
1483		goto out;
1484
1485	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
1486		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
1487		t->devinet_vars[i].extra1 = p;
1488		t->devinet_vars[i].extra2 = net;
1489	}
1490
1491	/*
1492	 * Make a copy of dev_name, because '.procname' is regarded as const
1493	 * by sysctl and we wouldn't want anyone to change it under our feet
1494	 * (see SIOCSIFNAME).
1495	 */
1496	t->dev_name = kstrdup(dev_name, GFP_KERNEL);
1497	if (!t->dev_name)
1498		goto free;
1499
1500	devinet_ctl_path[DEVINET_CTL_PATH_DEV].procname = t->dev_name;
1501	devinet_ctl_path[DEVINET_CTL_PATH_DEV].ctl_name = ctl_name;
1502
1503	t->sysctl_header = register_net_sysctl_table(net, devinet_ctl_path,
1504			t->devinet_vars);
1505	if (!t->sysctl_header)
1506		goto free_procname;
1507
1508	p->sysctl = t;
1509	return 0;
1510
1511free_procname:
1512	kfree(t->dev_name);
1513free:
1514	kfree(t);
1515out:
1516	return -ENOBUFS;
1517}
1518
1519static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
1520{
1521	struct devinet_sysctl_table *t = cnf->sysctl;
1522
1523	if (t == NULL)
1524		return;
1525
1526	cnf->sysctl = NULL;
1527	unregister_sysctl_table(t->sysctl_header);
1528	kfree(t->dev_name);
1529	kfree(t);
1530}
1531
1532static void devinet_sysctl_register(struct in_device *idev)
1533{
1534	neigh_sysctl_register(idev->dev, idev->arp_parms, NET_IPV4,
1535			NET_IPV4_NEIGH, "ipv4", NULL, NULL);
1536	__devinet_sysctl_register(idev->dev->nd_net, idev->dev->name,
1537			idev->dev->ifindex, &idev->cnf);
1538}
1539
1540static void devinet_sysctl_unregister(struct in_device *idev)
1541{
1542	__devinet_sysctl_unregister(&idev->cnf);
1543	neigh_sysctl_unregister(idev->arp_parms);
1544}
1545#endif
1546
1547static struct ctl_table ctl_forward_entry[] = {
1548	{
1549		.ctl_name	= NET_IPV4_FORWARD,
1550		.procname	= "ip_forward",
1551		.data		= &ipv4_devconf.data[
1552					NET_IPV4_CONF_FORWARDING - 1],
1553		.maxlen		= sizeof(int),
1554		.mode		= 0644,
1555		.proc_handler	= devinet_sysctl_forward,
1556		.strategy	= devinet_conf_sysctl,
1557		.extra1		= &ipv4_devconf,
1558		.extra2		= &init_net,
1559	},
1560	{ },
1561};
1562
1563static __net_initdata struct ctl_path net_ipv4_path[] = {
1564	{ .procname = "net", .ctl_name = CTL_NET, },
1565	{ .procname = "ipv4", .ctl_name = NET_IPV4, },
1566	{ },
1567};
1568
1569static __net_init int devinet_init_net(struct net *net)
1570{
1571	int err;
1572	struct ctl_table *tbl;
1573	struct ipv4_devconf *all, *dflt;
1574	struct ctl_table_header *forw_hdr;
1575
1576	err = -ENOMEM;
1577	all = &ipv4_devconf;
1578	dflt = &ipv4_devconf_dflt;
1579	tbl = ctl_forward_entry;
1580
1581	if (net != &init_net) {
1582		all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
1583		if (all == NULL)
1584			goto err_alloc_all;
1585
1586		dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
1587		if (dflt == NULL)
1588			goto err_alloc_dflt;
1589
1590		tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
1591		if (tbl == NULL)
1592			goto err_alloc_ctl;
1593
1594		tbl[0].data = &all->data[NET_IPV4_CONF_FORWARDING - 1];
1595		tbl[0].extra1 = all;
1596		tbl[0].extra2 = net;
1597	}
1598
1599#ifdef CONFIG_SYSCTL
1600	err = __devinet_sysctl_register(net, "all",
1601			NET_PROTO_CONF_ALL, all);
1602	if (err < 0)
1603		goto err_reg_all;
1604
1605	err = __devinet_sysctl_register(net, "default",
1606			NET_PROTO_CONF_DEFAULT, dflt);
1607	if (err < 0)
1608		goto err_reg_dflt;
1609
1610	err = -ENOMEM;
1611	forw_hdr = register_net_sysctl_table(net, net_ipv4_path, tbl);
1612	if (forw_hdr == NULL)
1613		goto err_reg_ctl;
1614#endif
1615
1616	net->ipv4.forw_hdr = forw_hdr;
1617	net->ipv4.devconf_all = all;
1618	net->ipv4.devconf_dflt = dflt;
1619	return 0;
1620
1621#ifdef CONFIG_SYSCTL
1622err_reg_ctl:
1623	__devinet_sysctl_unregister(dflt);
1624err_reg_dflt:
1625	__devinet_sysctl_unregister(all);
1626err_reg_all:
1627	if (tbl != ctl_forward_entry)
1628		kfree(tbl);
1629#endif
1630err_alloc_ctl:
1631	if (dflt != &ipv4_devconf_dflt)
1632		kfree(dflt);
1633err_alloc_dflt:
1634	if (all != &ipv4_devconf)
1635		kfree(all);
1636err_alloc_all:
1637	return err;
1638}
1639
1640static __net_exit void devinet_exit_net(struct net *net)
1641{
1642	struct ctl_table *tbl;
1643
1644	tbl = net->ipv4.forw_hdr->ctl_table_arg;
1645#ifdef CONFIG_SYSCTL
1646	unregister_net_sysctl_table(net->ipv4.forw_hdr);
1647	__devinet_sysctl_unregister(net->ipv4.devconf_dflt);
1648	__devinet_sysctl_unregister(net->ipv4.devconf_all);
1649#endif
1650	kfree(tbl);
1651	kfree(net->ipv4.devconf_dflt);
1652	kfree(net->ipv4.devconf_all);
1653}
1654
1655static __net_initdata struct pernet_operations devinet_ops = {
1656	.init = devinet_init_net,
1657	.exit = devinet_exit_net,
1658};
1659
1660void __init devinet_init(void)
1661{
1662	register_pernet_subsys(&devinet_ops);
1663
1664	register_gifconf(PF_INET, inet_gifconf);
1665	register_netdevice_notifier(&ip_netdev_notifier);
1666
1667	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL);
1668	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL);
1669	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr);
1670}
1671
1672EXPORT_SYMBOL(in_dev_finish_destroy);
1673EXPORT_SYMBOL(inet_select_addr);
1674EXPORT_SYMBOL(inetdev_by_index);
1675EXPORT_SYMBOL(register_inetaddr_notifier);
1676EXPORT_SYMBOL(unregister_inetaddr_notifier);
1677