devinet.c revision 752d14dc6aa9d0fc8f3b25e5052596fb549e5157
1/*
2 *	NET3	IP device support routines.
3 *
4 *	Version: $Id: devinet.c,v 1.44 2001/10/31 21:55:54 davem Exp $
5 *
6 *		This program is free software; you can redistribute it and/or
7 *		modify it under the terms of the GNU General Public License
8 *		as published by the Free Software Foundation; either version
9 *		2 of the License, or (at your option) any later version.
10 *
11 *	Derived from the IP parts of dev.c 1.0.19
12 * 		Authors:	Ross Biro
13 *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
14 *				Mark Evans, <evansmp@uhura.aston.ac.uk>
15 *
16 *	Additional Authors:
17 *		Alan Cox, <gw4pts@gw4pts.ampr.org>
18 *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
19 *
20 *	Changes:
21 *		Alexey Kuznetsov:	pa_* fields are replaced with ifaddr
22 *					lists.
23 *		Cyrus Durgin:		updated for kmod
24 *		Matthias Andree:	in devinet_ioctl, compare label and
25 *					address (4.4BSD alias style support),
26 *					fall back to comparing just the label
27 *					if no match found.
28 */
29
30
31#include <asm/uaccess.h>
32#include <asm/system.h>
33#include <linux/bitops.h>
34#include <linux/capability.h>
35#include <linux/module.h>
36#include <linux/types.h>
37#include <linux/kernel.h>
38#include <linux/string.h>
39#include <linux/mm.h>
40#include <linux/socket.h>
41#include <linux/sockios.h>
42#include <linux/in.h>
43#include <linux/errno.h>
44#include <linux/interrupt.h>
45#include <linux/if_addr.h>
46#include <linux/if_ether.h>
47#include <linux/inet.h>
48#include <linux/netdevice.h>
49#include <linux/etherdevice.h>
50#include <linux/skbuff.h>
51#include <linux/init.h>
52#include <linux/notifier.h>
53#include <linux/inetdevice.h>
54#include <linux/igmp.h>
55#ifdef CONFIG_SYSCTL
56#include <linux/sysctl.h>
57#endif
58#include <linux/kmod.h>
59
60#include <net/arp.h>
61#include <net/ip.h>
62#include <net/route.h>
63#include <net/ip_fib.h>
64#include <net/rtnetlink.h>
65#include <net/net_namespace.h>
66
67struct ipv4_devconf ipv4_devconf = {
68	.data = {
69		[NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1,
70		[NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1,
71		[NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1,
72		[NET_IPV4_CONF_SHARED_MEDIA - 1] = 1,
73	},
74};
75
76static struct ipv4_devconf ipv4_devconf_dflt = {
77	.data = {
78		[NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1,
79		[NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1,
80		[NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1,
81		[NET_IPV4_CONF_SHARED_MEDIA - 1] = 1,
82		[NET_IPV4_CONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
83	},
84};
85
86#define IPV4_DEVCONF_DFLT(attr) IPV4_DEVCONF(ipv4_devconf_dflt, attr)
87
88static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
89	[IFA_LOCAL]     	= { .type = NLA_U32 },
90	[IFA_ADDRESS]   	= { .type = NLA_U32 },
91	[IFA_BROADCAST] 	= { .type = NLA_U32 },
92	[IFA_ANYCAST]   	= { .type = NLA_U32 },
93	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
94};
95
96static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
97
98static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
99static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
100			 int destroy);
101#ifdef CONFIG_SYSCTL
102static void devinet_sysctl_register(struct in_device *idev);
103static void devinet_sysctl_unregister(struct in_device *idev);
104#else
105static inline void devinet_sysctl_register(struct in_device *idev)
106{
107}
108static inline void devinet_sysctl_unregister(struct in_device *idev)
109{
110}
111#endif
112
113/* Locks all the inet devices. */
114
115static struct in_ifaddr *inet_alloc_ifa(void)
116{
117	struct in_ifaddr *ifa = kzalloc(sizeof(*ifa), GFP_KERNEL);
118
119	if (ifa) {
120		INIT_RCU_HEAD(&ifa->rcu_head);
121	}
122
123	return ifa;
124}
125
126static void inet_rcu_free_ifa(struct rcu_head *head)
127{
128	struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
129	if (ifa->ifa_dev)
130		in_dev_put(ifa->ifa_dev);
131	kfree(ifa);
132}
133
134static inline void inet_free_ifa(struct in_ifaddr *ifa)
135{
136	call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
137}
138
139void in_dev_finish_destroy(struct in_device *idev)
140{
141	struct net_device *dev = idev->dev;
142
143	BUG_TRAP(!idev->ifa_list);
144	BUG_TRAP(!idev->mc_list);
145#ifdef NET_REFCNT_DEBUG
146	printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n",
147	       idev, dev ? dev->name : "NIL");
148#endif
149	dev_put(dev);
150	if (!idev->dead)
151		printk("Freeing alive in_device %p\n", idev);
152	else {
153		kfree(idev);
154	}
155}
156
157static struct in_device *inetdev_init(struct net_device *dev)
158{
159	struct in_device *in_dev;
160
161	ASSERT_RTNL();
162
163	in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
164	if (!in_dev)
165		goto out;
166	INIT_RCU_HEAD(&in_dev->rcu_head);
167	memcpy(&in_dev->cnf, &ipv4_devconf_dflt, sizeof(in_dev->cnf));
168	in_dev->cnf.sysctl = NULL;
169	in_dev->dev = dev;
170	if ((in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl)) == NULL)
171		goto out_kfree;
172	/* Reference in_dev->dev */
173	dev_hold(dev);
174	/* Account for reference dev->ip_ptr (below) */
175	in_dev_hold(in_dev);
176
177	devinet_sysctl_register(in_dev);
178	ip_mc_init_dev(in_dev);
179	if (dev->flags & IFF_UP)
180		ip_mc_up(in_dev);
181
182	/* we can receive as soon as ip_ptr is set -- do this last */
183	rcu_assign_pointer(dev->ip_ptr, in_dev);
184out:
185	return in_dev;
186out_kfree:
187	kfree(in_dev);
188	in_dev = NULL;
189	goto out;
190}
191
192static void in_dev_rcu_put(struct rcu_head *head)
193{
194	struct in_device *idev = container_of(head, struct in_device, rcu_head);
195	in_dev_put(idev);
196}
197
198static void inetdev_destroy(struct in_device *in_dev)
199{
200	struct in_ifaddr *ifa;
201	struct net_device *dev;
202
203	ASSERT_RTNL();
204
205	dev = in_dev->dev;
206
207	in_dev->dead = 1;
208
209	ip_mc_destroy_dev(in_dev);
210
211	while ((ifa = in_dev->ifa_list) != NULL) {
212		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
213		inet_free_ifa(ifa);
214	}
215
216	dev->ip_ptr = NULL;
217
218	devinet_sysctl_unregister(in_dev);
219	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
220	arp_ifdown(dev);
221
222	call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
223}
224
225int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
226{
227	rcu_read_lock();
228	for_primary_ifa(in_dev) {
229		if (inet_ifa_match(a, ifa)) {
230			if (!b || inet_ifa_match(b, ifa)) {
231				rcu_read_unlock();
232				return 1;
233			}
234		}
235	} endfor_ifa(in_dev);
236	rcu_read_unlock();
237	return 0;
238}
239
240static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
241			 int destroy, struct nlmsghdr *nlh, u32 pid)
242{
243	struct in_ifaddr *promote = NULL;
244	struct in_ifaddr *ifa, *ifa1 = *ifap;
245	struct in_ifaddr *last_prim = in_dev->ifa_list;
246	struct in_ifaddr *prev_prom = NULL;
247	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
248
249	ASSERT_RTNL();
250
251	/* 1. Deleting primary ifaddr forces deletion all secondaries
252	 * unless alias promotion is set
253	 **/
254
255	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
256		struct in_ifaddr **ifap1 = &ifa1->ifa_next;
257
258		while ((ifa = *ifap1) != NULL) {
259			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
260			    ifa1->ifa_scope <= ifa->ifa_scope)
261				last_prim = ifa;
262
263			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
264			    ifa1->ifa_mask != ifa->ifa_mask ||
265			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
266				ifap1 = &ifa->ifa_next;
267				prev_prom = ifa;
268				continue;
269			}
270
271			if (!do_promote) {
272				*ifap1 = ifa->ifa_next;
273
274				rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid);
275				blocking_notifier_call_chain(&inetaddr_chain,
276						NETDEV_DOWN, ifa);
277				inet_free_ifa(ifa);
278			} else {
279				promote = ifa;
280				break;
281			}
282		}
283	}
284
285	/* 2. Unlink it */
286
287	*ifap = ifa1->ifa_next;
288
289	/* 3. Announce address deletion */
290
291	/* Send message first, then call notifier.
292	   At first sight, FIB update triggered by notifier
293	   will refer to already deleted ifaddr, that could confuse
294	   netlink listeners. It is not true: look, gated sees
295	   that route deleted and if it still thinks that ifaddr
296	   is valid, it will try to restore deleted routes... Grr.
297	   So that, this order is correct.
298	 */
299	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid);
300	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
301
302	if (promote) {
303
304		if (prev_prom) {
305			prev_prom->ifa_next = promote->ifa_next;
306			promote->ifa_next = last_prim->ifa_next;
307			last_prim->ifa_next = promote;
308		}
309
310		promote->ifa_flags &= ~IFA_F_SECONDARY;
311		rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
312		blocking_notifier_call_chain(&inetaddr_chain,
313				NETDEV_UP, promote);
314		for (ifa = promote->ifa_next; ifa; ifa = ifa->ifa_next) {
315			if (ifa1->ifa_mask != ifa->ifa_mask ||
316			    !inet_ifa_match(ifa1->ifa_address, ifa))
317					continue;
318			fib_add_ifaddr(ifa);
319		}
320
321	}
322	if (destroy)
323		inet_free_ifa(ifa1);
324}
325
326static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
327			 int destroy)
328{
329	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
330}
331
332static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
333			     u32 pid)
334{
335	struct in_device *in_dev = ifa->ifa_dev;
336	struct in_ifaddr *ifa1, **ifap, **last_primary;
337
338	ASSERT_RTNL();
339
340	if (!ifa->ifa_local) {
341		inet_free_ifa(ifa);
342		return 0;
343	}
344
345	ifa->ifa_flags &= ~IFA_F_SECONDARY;
346	last_primary = &in_dev->ifa_list;
347
348	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
349	     ifap = &ifa1->ifa_next) {
350		if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
351		    ifa->ifa_scope <= ifa1->ifa_scope)
352			last_primary = &ifa1->ifa_next;
353		if (ifa1->ifa_mask == ifa->ifa_mask &&
354		    inet_ifa_match(ifa1->ifa_address, ifa)) {
355			if (ifa1->ifa_local == ifa->ifa_local) {
356				inet_free_ifa(ifa);
357				return -EEXIST;
358			}
359			if (ifa1->ifa_scope != ifa->ifa_scope) {
360				inet_free_ifa(ifa);
361				return -EINVAL;
362			}
363			ifa->ifa_flags |= IFA_F_SECONDARY;
364		}
365	}
366
367	if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
368		net_srandom(ifa->ifa_local);
369		ifap = last_primary;
370	}
371
372	ifa->ifa_next = *ifap;
373	*ifap = ifa;
374
375	/* Send message first, then call notifier.
376	   Notifier will trigger FIB update, so that
377	   listeners of netlink will know about new ifaddr */
378	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid);
379	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
380
381	return 0;
382}
383
384static int inet_insert_ifa(struct in_ifaddr *ifa)
385{
386	return __inet_insert_ifa(ifa, NULL, 0);
387}
388
389static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
390{
391	struct in_device *in_dev = __in_dev_get_rtnl(dev);
392
393	ASSERT_RTNL();
394
395	if (!in_dev) {
396		inet_free_ifa(ifa);
397		return -ENOBUFS;
398	}
399	ipv4_devconf_setall(in_dev);
400	if (ifa->ifa_dev != in_dev) {
401		BUG_TRAP(!ifa->ifa_dev);
402		in_dev_hold(in_dev);
403		ifa->ifa_dev = in_dev;
404	}
405	if (LOOPBACK(ifa->ifa_local))
406		ifa->ifa_scope = RT_SCOPE_HOST;
407	return inet_insert_ifa(ifa);
408}
409
410struct in_device *inetdev_by_index(int ifindex)
411{
412	struct net_device *dev;
413	struct in_device *in_dev = NULL;
414	read_lock(&dev_base_lock);
415	dev = __dev_get_by_index(&init_net, ifindex);
416	if (dev)
417		in_dev = in_dev_get(dev);
418	read_unlock(&dev_base_lock);
419	return in_dev;
420}
421
422/* Called only from RTNL semaphored context. No locks. */
423
424struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
425				    __be32 mask)
426{
427	ASSERT_RTNL();
428
429	for_primary_ifa(in_dev) {
430		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
431			return ifa;
432	} endfor_ifa(in_dev);
433	return NULL;
434}
435
436static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
437{
438	struct net *net = skb->sk->sk_net;
439	struct nlattr *tb[IFA_MAX+1];
440	struct in_device *in_dev;
441	struct ifaddrmsg *ifm;
442	struct in_ifaddr *ifa, **ifap;
443	int err = -EINVAL;
444
445	ASSERT_RTNL();
446
447	if (net != &init_net)
448		return -EINVAL;
449
450	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
451	if (err < 0)
452		goto errout;
453
454	ifm = nlmsg_data(nlh);
455	in_dev = inetdev_by_index(ifm->ifa_index);
456	if (in_dev == NULL) {
457		err = -ENODEV;
458		goto errout;
459	}
460
461	__in_dev_put(in_dev);
462
463	for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
464	     ifap = &ifa->ifa_next) {
465		if (tb[IFA_LOCAL] &&
466		    ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
467			continue;
468
469		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
470			continue;
471
472		if (tb[IFA_ADDRESS] &&
473		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
474		    !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
475			continue;
476
477		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid);
478		return 0;
479	}
480
481	err = -EADDRNOTAVAIL;
482errout:
483	return err;
484}
485
486static struct in_ifaddr *rtm_to_ifaddr(struct nlmsghdr *nlh)
487{
488	struct nlattr *tb[IFA_MAX+1];
489	struct in_ifaddr *ifa;
490	struct ifaddrmsg *ifm;
491	struct net_device *dev;
492	struct in_device *in_dev;
493	int err = -EINVAL;
494
495	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
496	if (err < 0)
497		goto errout;
498
499	ifm = nlmsg_data(nlh);
500	if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL) {
501		err = -EINVAL;
502		goto errout;
503	}
504
505	dev = __dev_get_by_index(&init_net, ifm->ifa_index);
506	if (dev == NULL) {
507		err = -ENODEV;
508		goto errout;
509	}
510
511	in_dev = __in_dev_get_rtnl(dev);
512	if (in_dev == NULL) {
513		err = -ENOBUFS;
514		goto errout;
515	}
516
517	ifa = inet_alloc_ifa();
518	if (ifa == NULL) {
519		/*
520		 * A potential indev allocation can be left alive, it stays
521		 * assigned to its device and is destroy with it.
522		 */
523		err = -ENOBUFS;
524		goto errout;
525	}
526
527	ipv4_devconf_setall(in_dev);
528	in_dev_hold(in_dev);
529
530	if (tb[IFA_ADDRESS] == NULL)
531		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
532
533	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
534	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
535	ifa->ifa_flags = ifm->ifa_flags;
536	ifa->ifa_scope = ifm->ifa_scope;
537	ifa->ifa_dev = in_dev;
538
539	ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
540	ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
541
542	if (tb[IFA_BROADCAST])
543		ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
544
545	if (tb[IFA_ANYCAST])
546		ifa->ifa_anycast = nla_get_be32(tb[IFA_ANYCAST]);
547
548	if (tb[IFA_LABEL])
549		nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
550	else
551		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
552
553	return ifa;
554
555errout:
556	return ERR_PTR(err);
557}
558
559static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
560{
561	struct net *net = skb->sk->sk_net;
562	struct in_ifaddr *ifa;
563
564	ASSERT_RTNL();
565
566	if (net != &init_net)
567		return -EINVAL;
568
569	ifa = rtm_to_ifaddr(nlh);
570	if (IS_ERR(ifa))
571		return PTR_ERR(ifa);
572
573	return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid);
574}
575
576/*
577 *	Determine a default network mask, based on the IP address.
578 */
579
580static __inline__ int inet_abc_len(__be32 addr)
581{
582	int rc = -1;	/* Something else, probably a multicast. */
583
584	if (ZERONET(addr))
585		rc = 0;
586	else {
587		__u32 haddr = ntohl(addr);
588
589		if (IN_CLASSA(haddr))
590			rc = 8;
591		else if (IN_CLASSB(haddr))
592			rc = 16;
593		else if (IN_CLASSC(haddr))
594			rc = 24;
595	}
596
597	return rc;
598}
599
600
601int devinet_ioctl(unsigned int cmd, void __user *arg)
602{
603	struct ifreq ifr;
604	struct sockaddr_in sin_orig;
605	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
606	struct in_device *in_dev;
607	struct in_ifaddr **ifap = NULL;
608	struct in_ifaddr *ifa = NULL;
609	struct net_device *dev;
610	char *colon;
611	int ret = -EFAULT;
612	int tryaddrmatch = 0;
613
614	/*
615	 *	Fetch the caller's info block into kernel space
616	 */
617
618	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
619		goto out;
620	ifr.ifr_name[IFNAMSIZ - 1] = 0;
621
622	/* save original address for comparison */
623	memcpy(&sin_orig, sin, sizeof(*sin));
624
625	colon = strchr(ifr.ifr_name, ':');
626	if (colon)
627		*colon = 0;
628
629#ifdef CONFIG_KMOD
630	dev_load(&init_net, ifr.ifr_name);
631#endif
632
633	switch (cmd) {
634	case SIOCGIFADDR:	/* Get interface address */
635	case SIOCGIFBRDADDR:	/* Get the broadcast address */
636	case SIOCGIFDSTADDR:	/* Get the destination address */
637	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
638		/* Note that these ioctls will not sleep,
639		   so that we do not impose a lock.
640		   One day we will be forced to put shlock here (I mean SMP)
641		 */
642		tryaddrmatch = (sin_orig.sin_family == AF_INET);
643		memset(sin, 0, sizeof(*sin));
644		sin->sin_family = AF_INET;
645		break;
646
647	case SIOCSIFFLAGS:
648		ret = -EACCES;
649		if (!capable(CAP_NET_ADMIN))
650			goto out;
651		break;
652	case SIOCSIFADDR:	/* Set interface address (and family) */
653	case SIOCSIFBRDADDR:	/* Set the broadcast address */
654	case SIOCSIFDSTADDR:	/* Set the destination address */
655	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
656		ret = -EACCES;
657		if (!capable(CAP_NET_ADMIN))
658			goto out;
659		ret = -EINVAL;
660		if (sin->sin_family != AF_INET)
661			goto out;
662		break;
663	default:
664		ret = -EINVAL;
665		goto out;
666	}
667
668	rtnl_lock();
669
670	ret = -ENODEV;
671	if ((dev = __dev_get_by_name(&init_net, ifr.ifr_name)) == NULL)
672		goto done;
673
674	if (colon)
675		*colon = ':';
676
677	if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
678		if (tryaddrmatch) {
679			/* Matthias Andree */
680			/* compare label and address (4.4BSD style) */
681			/* note: we only do this for a limited set of ioctls
682			   and only if the original address family was AF_INET.
683			   This is checked above. */
684			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
685			     ifap = &ifa->ifa_next) {
686				if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
687				    sin_orig.sin_addr.s_addr ==
688							ifa->ifa_address) {
689					break; /* found */
690				}
691			}
692		}
693		/* we didn't get a match, maybe the application is
694		   4.3BSD-style and passed in junk so we fall back to
695		   comparing just the label */
696		if (!ifa) {
697			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
698			     ifap = &ifa->ifa_next)
699				if (!strcmp(ifr.ifr_name, ifa->ifa_label))
700					break;
701		}
702	}
703
704	ret = -EADDRNOTAVAIL;
705	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
706		goto done;
707
708	switch (cmd) {
709	case SIOCGIFADDR:	/* Get interface address */
710		sin->sin_addr.s_addr = ifa->ifa_local;
711		goto rarok;
712
713	case SIOCGIFBRDADDR:	/* Get the broadcast address */
714		sin->sin_addr.s_addr = ifa->ifa_broadcast;
715		goto rarok;
716
717	case SIOCGIFDSTADDR:	/* Get the destination address */
718		sin->sin_addr.s_addr = ifa->ifa_address;
719		goto rarok;
720
721	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
722		sin->sin_addr.s_addr = ifa->ifa_mask;
723		goto rarok;
724
725	case SIOCSIFFLAGS:
726		if (colon) {
727			ret = -EADDRNOTAVAIL;
728			if (!ifa)
729				break;
730			ret = 0;
731			if (!(ifr.ifr_flags & IFF_UP))
732				inet_del_ifa(in_dev, ifap, 1);
733			break;
734		}
735		ret = dev_change_flags(dev, ifr.ifr_flags);
736		break;
737
738	case SIOCSIFADDR:	/* Set interface address (and family) */
739		ret = -EINVAL;
740		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
741			break;
742
743		if (!ifa) {
744			ret = -ENOBUFS;
745			if ((ifa = inet_alloc_ifa()) == NULL)
746				break;
747			if (colon)
748				memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
749			else
750				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
751		} else {
752			ret = 0;
753			if (ifa->ifa_local == sin->sin_addr.s_addr)
754				break;
755			inet_del_ifa(in_dev, ifap, 0);
756			ifa->ifa_broadcast = 0;
757			ifa->ifa_anycast = 0;
758		}
759
760		ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
761
762		if (!(dev->flags & IFF_POINTOPOINT)) {
763			ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
764			ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
765			if ((dev->flags & IFF_BROADCAST) &&
766			    ifa->ifa_prefixlen < 31)
767				ifa->ifa_broadcast = ifa->ifa_address |
768						     ~ifa->ifa_mask;
769		} else {
770			ifa->ifa_prefixlen = 32;
771			ifa->ifa_mask = inet_make_mask(32);
772		}
773		ret = inet_set_ifa(dev, ifa);
774		break;
775
776	case SIOCSIFBRDADDR:	/* Set the broadcast address */
777		ret = 0;
778		if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
779			inet_del_ifa(in_dev, ifap, 0);
780			ifa->ifa_broadcast = sin->sin_addr.s_addr;
781			inet_insert_ifa(ifa);
782		}
783		break;
784
785	case SIOCSIFDSTADDR:	/* Set the destination address */
786		ret = 0;
787		if (ifa->ifa_address == sin->sin_addr.s_addr)
788			break;
789		ret = -EINVAL;
790		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
791			break;
792		ret = 0;
793		inet_del_ifa(in_dev, ifap, 0);
794		ifa->ifa_address = sin->sin_addr.s_addr;
795		inet_insert_ifa(ifa);
796		break;
797
798	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
799
800		/*
801		 *	The mask we set must be legal.
802		 */
803		ret = -EINVAL;
804		if (bad_mask(sin->sin_addr.s_addr, 0))
805			break;
806		ret = 0;
807		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
808			__be32 old_mask = ifa->ifa_mask;
809			inet_del_ifa(in_dev, ifap, 0);
810			ifa->ifa_mask = sin->sin_addr.s_addr;
811			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
812
813			/* See if current broadcast address matches
814			 * with current netmask, then recalculate
815			 * the broadcast address. Otherwise it's a
816			 * funny address, so don't touch it since
817			 * the user seems to know what (s)he's doing...
818			 */
819			if ((dev->flags & IFF_BROADCAST) &&
820			    (ifa->ifa_prefixlen < 31) &&
821			    (ifa->ifa_broadcast ==
822			     (ifa->ifa_local|~old_mask))) {
823				ifa->ifa_broadcast = (ifa->ifa_local |
824						      ~sin->sin_addr.s_addr);
825			}
826			inet_insert_ifa(ifa);
827		}
828		break;
829	}
830done:
831	rtnl_unlock();
832out:
833	return ret;
834rarok:
835	rtnl_unlock();
836	ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
837	goto out;
838}
839
840static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
841{
842	struct in_device *in_dev = __in_dev_get_rtnl(dev);
843	struct in_ifaddr *ifa;
844	struct ifreq ifr;
845	int done = 0;
846
847	if (!in_dev || (ifa = in_dev->ifa_list) == NULL)
848		goto out;
849
850	for (; ifa; ifa = ifa->ifa_next) {
851		if (!buf) {
852			done += sizeof(ifr);
853			continue;
854		}
855		if (len < (int) sizeof(ifr))
856			break;
857		memset(&ifr, 0, sizeof(struct ifreq));
858		if (ifa->ifa_label)
859			strcpy(ifr.ifr_name, ifa->ifa_label);
860		else
861			strcpy(ifr.ifr_name, dev->name);
862
863		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
864		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
865								ifa->ifa_local;
866
867		if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
868			done = -EFAULT;
869			break;
870		}
871		buf  += sizeof(struct ifreq);
872		len  -= sizeof(struct ifreq);
873		done += sizeof(struct ifreq);
874	}
875out:
876	return done;
877}
878
879__be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
880{
881	__be32 addr = 0;
882	struct in_device *in_dev;
883
884	rcu_read_lock();
885	in_dev = __in_dev_get_rcu(dev);
886	if (!in_dev)
887		goto no_in_dev;
888
889	for_primary_ifa(in_dev) {
890		if (ifa->ifa_scope > scope)
891			continue;
892		if (!dst || inet_ifa_match(dst, ifa)) {
893			addr = ifa->ifa_local;
894			break;
895		}
896		if (!addr)
897			addr = ifa->ifa_local;
898	} endfor_ifa(in_dev);
899no_in_dev:
900	rcu_read_unlock();
901
902	if (addr)
903		goto out;
904
905	/* Not loopback addresses on loopback should be preferred
906	   in this case. It is importnat that lo is the first interface
907	   in dev_base list.
908	 */
909	read_lock(&dev_base_lock);
910	rcu_read_lock();
911	for_each_netdev(&init_net, dev) {
912		if ((in_dev = __in_dev_get_rcu(dev)) == NULL)
913			continue;
914
915		for_primary_ifa(in_dev) {
916			if (ifa->ifa_scope != RT_SCOPE_LINK &&
917			    ifa->ifa_scope <= scope) {
918				addr = ifa->ifa_local;
919				goto out_unlock_both;
920			}
921		} endfor_ifa(in_dev);
922	}
923out_unlock_both:
924	read_unlock(&dev_base_lock);
925	rcu_read_unlock();
926out:
927	return addr;
928}
929
930static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
931			      __be32 local, int scope)
932{
933	int same = 0;
934	__be32 addr = 0;
935
936	for_ifa(in_dev) {
937		if (!addr &&
938		    (local == ifa->ifa_local || !local) &&
939		    ifa->ifa_scope <= scope) {
940			addr = ifa->ifa_local;
941			if (same)
942				break;
943		}
944		if (!same) {
945			same = (!local || inet_ifa_match(local, ifa)) &&
946				(!dst || inet_ifa_match(dst, ifa));
947			if (same && addr) {
948				if (local || !dst)
949					break;
950				/* Is the selected addr into dst subnet? */
951				if (inet_ifa_match(addr, ifa))
952					break;
953				/* No, then can we use new local src? */
954				if (ifa->ifa_scope <= scope) {
955					addr = ifa->ifa_local;
956					break;
957				}
958				/* search for large dst subnet for addr */
959				same = 0;
960			}
961		}
962	} endfor_ifa(in_dev);
963
964	return same? addr : 0;
965}
966
967/*
968 * Confirm that local IP address exists using wildcards:
969 * - dev: only on this interface, 0=any interface
970 * - dst: only in the same subnet as dst, 0=any dst
971 * - local: address, 0=autoselect the local address
972 * - scope: maximum allowed scope value for the local address
973 */
974__be32 inet_confirm_addr(const struct net_device *dev, __be32 dst, __be32 local, int scope)
975{
976	__be32 addr = 0;
977	struct in_device *in_dev;
978
979	if (dev) {
980		rcu_read_lock();
981		if ((in_dev = __in_dev_get_rcu(dev)))
982			addr = confirm_addr_indev(in_dev, dst, local, scope);
983		rcu_read_unlock();
984
985		return addr;
986	}
987
988	read_lock(&dev_base_lock);
989	rcu_read_lock();
990	for_each_netdev(&init_net, dev) {
991		if ((in_dev = __in_dev_get_rcu(dev))) {
992			addr = confirm_addr_indev(in_dev, dst, local, scope);
993			if (addr)
994				break;
995		}
996	}
997	rcu_read_unlock();
998	read_unlock(&dev_base_lock);
999
1000	return addr;
1001}
1002
1003/*
1004 *	Device notifier
1005 */
1006
1007int register_inetaddr_notifier(struct notifier_block *nb)
1008{
1009	return blocking_notifier_chain_register(&inetaddr_chain, nb);
1010}
1011
1012int unregister_inetaddr_notifier(struct notifier_block *nb)
1013{
1014	return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1015}
1016
1017/* Rename ifa_labels for a device name change. Make some effort to preserve existing
1018 * alias numbering and to create unique labels if possible.
1019*/
1020static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1021{
1022	struct in_ifaddr *ifa;
1023	int named = 0;
1024
1025	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1026		char old[IFNAMSIZ], *dot;
1027
1028		memcpy(old, ifa->ifa_label, IFNAMSIZ);
1029		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1030		if (named++ == 0)
1031			continue;
1032		dot = strchr(old, ':');
1033		if (dot == NULL) {
1034			sprintf(old, ":%d", named);
1035			dot = old;
1036		}
1037		if (strlen(dot) + strlen(dev->name) < IFNAMSIZ) {
1038			strcat(ifa->ifa_label, dot);
1039		} else {
1040			strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1041		}
1042	}
1043}
1044
1045/* Called only under RTNL semaphore */
1046
1047static int inetdev_event(struct notifier_block *this, unsigned long event,
1048			 void *ptr)
1049{
1050	struct net_device *dev = ptr;
1051	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1052
1053	if (dev->nd_net != &init_net)
1054		return NOTIFY_DONE;
1055
1056	ASSERT_RTNL();
1057
1058	if (!in_dev) {
1059		if (event == NETDEV_REGISTER) {
1060			in_dev = inetdev_init(dev);
1061			if (!in_dev)
1062				return notifier_from_errno(-ENOMEM);
1063			if (dev->flags & IFF_LOOPBACK) {
1064				IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1065				IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1066			}
1067		}
1068		goto out;
1069	}
1070
1071	switch (event) {
1072	case NETDEV_REGISTER:
1073		printk(KERN_DEBUG "inetdev_event: bug\n");
1074		dev->ip_ptr = NULL;
1075		break;
1076	case NETDEV_UP:
1077		if (dev->mtu < 68)
1078			break;
1079		if (dev->flags & IFF_LOOPBACK) {
1080			struct in_ifaddr *ifa;
1081			if ((ifa = inet_alloc_ifa()) != NULL) {
1082				ifa->ifa_local =
1083				  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1084				ifa->ifa_prefixlen = 8;
1085				ifa->ifa_mask = inet_make_mask(8);
1086				in_dev_hold(in_dev);
1087				ifa->ifa_dev = in_dev;
1088				ifa->ifa_scope = RT_SCOPE_HOST;
1089				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1090				inet_insert_ifa(ifa);
1091			}
1092		}
1093		ip_mc_up(in_dev);
1094		break;
1095	case NETDEV_DOWN:
1096		ip_mc_down(in_dev);
1097		break;
1098	case NETDEV_CHANGEMTU:
1099		if (dev->mtu >= 68)
1100			break;
1101		/* MTU falled under 68, disable IP */
1102	case NETDEV_UNREGISTER:
1103		inetdev_destroy(in_dev);
1104		break;
1105	case NETDEV_CHANGENAME:
1106		/* Do not notify about label change, this event is
1107		 * not interesting to applications using netlink.
1108		 */
1109		inetdev_changename(dev, in_dev);
1110
1111		devinet_sysctl_unregister(in_dev);
1112		devinet_sysctl_register(in_dev);
1113		break;
1114	}
1115out:
1116	return NOTIFY_DONE;
1117}
1118
1119static struct notifier_block ip_netdev_notifier = {
1120	.notifier_call =inetdev_event,
1121};
1122
1123static inline size_t inet_nlmsg_size(void)
1124{
1125	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1126	       + nla_total_size(4) /* IFA_ADDRESS */
1127	       + nla_total_size(4) /* IFA_LOCAL */
1128	       + nla_total_size(4) /* IFA_BROADCAST */
1129	       + nla_total_size(4) /* IFA_ANYCAST */
1130	       + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1131}
1132
1133static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1134			    u32 pid, u32 seq, int event, unsigned int flags)
1135{
1136	struct ifaddrmsg *ifm;
1137	struct nlmsghdr  *nlh;
1138
1139	nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags);
1140	if (nlh == NULL)
1141		return -EMSGSIZE;
1142
1143	ifm = nlmsg_data(nlh);
1144	ifm->ifa_family = AF_INET;
1145	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1146	ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
1147	ifm->ifa_scope = ifa->ifa_scope;
1148	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1149
1150	if (ifa->ifa_address)
1151		NLA_PUT_BE32(skb, IFA_ADDRESS, ifa->ifa_address);
1152
1153	if (ifa->ifa_local)
1154		NLA_PUT_BE32(skb, IFA_LOCAL, ifa->ifa_local);
1155
1156	if (ifa->ifa_broadcast)
1157		NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast);
1158
1159	if (ifa->ifa_anycast)
1160		NLA_PUT_BE32(skb, IFA_ANYCAST, ifa->ifa_anycast);
1161
1162	if (ifa->ifa_label[0])
1163		NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label);
1164
1165	return nlmsg_end(skb, nlh);
1166
1167nla_put_failure:
1168	nlmsg_cancel(skb, nlh);
1169	return -EMSGSIZE;
1170}
1171
1172static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1173{
1174	struct net *net = skb->sk->sk_net;
1175	int idx, ip_idx;
1176	struct net_device *dev;
1177	struct in_device *in_dev;
1178	struct in_ifaddr *ifa;
1179	int s_ip_idx, s_idx = cb->args[0];
1180
1181	if (net != &init_net)
1182		return 0;
1183
1184	s_ip_idx = ip_idx = cb->args[1];
1185	idx = 0;
1186	for_each_netdev(&init_net, dev) {
1187		if (idx < s_idx)
1188			goto cont;
1189		if (idx > s_idx)
1190			s_ip_idx = 0;
1191		if ((in_dev = __in_dev_get_rtnl(dev)) == NULL)
1192			goto cont;
1193
1194		for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1195		     ifa = ifa->ifa_next, ip_idx++) {
1196			if (ip_idx < s_ip_idx)
1197				continue;
1198			if (inet_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid,
1199					     cb->nlh->nlmsg_seq,
1200					     RTM_NEWADDR, NLM_F_MULTI) <= 0)
1201				goto done;
1202		}
1203cont:
1204		idx++;
1205	}
1206
1207done:
1208	cb->args[0] = idx;
1209	cb->args[1] = ip_idx;
1210
1211	return skb->len;
1212}
1213
1214static void rtmsg_ifa(int event, struct in_ifaddr* ifa, struct nlmsghdr *nlh,
1215		      u32 pid)
1216{
1217	struct sk_buff *skb;
1218	u32 seq = nlh ? nlh->nlmsg_seq : 0;
1219	int err = -ENOBUFS;
1220
1221	skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1222	if (skb == NULL)
1223		goto errout;
1224
1225	err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0);
1226	if (err < 0) {
1227		/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1228		WARN_ON(err == -EMSGSIZE);
1229		kfree_skb(skb);
1230		goto errout;
1231	}
1232	err = rtnl_notify(skb, &init_net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1233errout:
1234	if (err < 0)
1235		rtnl_set_sk_err(&init_net, RTNLGRP_IPV4_IFADDR, err);
1236}
1237
1238#ifdef CONFIG_SYSCTL
1239
1240static void devinet_copy_dflt_conf(struct net *net, int i)
1241{
1242	struct net_device *dev;
1243
1244	read_lock(&dev_base_lock);
1245	for_each_netdev(net, dev) {
1246		struct in_device *in_dev;
1247		rcu_read_lock();
1248		in_dev = __in_dev_get_rcu(dev);
1249		if (in_dev && !test_bit(i, in_dev->cnf.state))
1250			in_dev->cnf.data[i] = ipv4_devconf_dflt.data[i];
1251		rcu_read_unlock();
1252	}
1253	read_unlock(&dev_base_lock);
1254}
1255
1256static void inet_forward_change(struct net *net)
1257{
1258	struct net_device *dev;
1259	int on = IPV4_DEVCONF_ALL(FORWARDING);
1260
1261	IPV4_DEVCONF_ALL(ACCEPT_REDIRECTS) = !on;
1262	IPV4_DEVCONF_DFLT(FORWARDING) = on;
1263
1264	read_lock(&dev_base_lock);
1265	for_each_netdev(net, dev) {
1266		struct in_device *in_dev;
1267		rcu_read_lock();
1268		in_dev = __in_dev_get_rcu(dev);
1269		if (in_dev)
1270			IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1271		rcu_read_unlock();
1272	}
1273	read_unlock(&dev_base_lock);
1274
1275	rt_cache_flush(0);
1276}
1277
1278static int devinet_conf_proc(ctl_table *ctl, int write,
1279			     struct file* filp, void __user *buffer,
1280			     size_t *lenp, loff_t *ppos)
1281{
1282	int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1283
1284	if (write) {
1285		struct ipv4_devconf *cnf = ctl->extra1;
1286		struct net *net = ctl->extra2;
1287		int i = (int *)ctl->data - cnf->data;
1288
1289		set_bit(i, cnf->state);
1290
1291		if (cnf == &ipv4_devconf_dflt)
1292			devinet_copy_dflt_conf(net, i);
1293	}
1294
1295	return ret;
1296}
1297
1298static int devinet_conf_sysctl(ctl_table *table, int __user *name, int nlen,
1299			       void __user *oldval, size_t __user *oldlenp,
1300			       void __user *newval, size_t newlen)
1301{
1302	struct ipv4_devconf *cnf;
1303	struct net *net;
1304	int *valp = table->data;
1305	int new;
1306	int i;
1307
1308	if (!newval || !newlen)
1309		return 0;
1310
1311	if (newlen != sizeof(int))
1312		return -EINVAL;
1313
1314	if (get_user(new, (int __user *)newval))
1315		return -EFAULT;
1316
1317	if (new == *valp)
1318		return 0;
1319
1320	if (oldval && oldlenp) {
1321		size_t len;
1322
1323		if (get_user(len, oldlenp))
1324			return -EFAULT;
1325
1326		if (len) {
1327			if (len > table->maxlen)
1328				len = table->maxlen;
1329			if (copy_to_user(oldval, valp, len))
1330				return -EFAULT;
1331			if (put_user(len, oldlenp))
1332				return -EFAULT;
1333		}
1334	}
1335
1336	*valp = new;
1337
1338	cnf = table->extra1;
1339	net = table->extra2;
1340	i = (int *)table->data - cnf->data;
1341
1342	set_bit(i, cnf->state);
1343
1344	if (cnf == &ipv4_devconf_dflt)
1345		devinet_copy_dflt_conf(net, i);
1346
1347	return 1;
1348}
1349
1350static int devinet_sysctl_forward(ctl_table *ctl, int write,
1351				  struct file* filp, void __user *buffer,
1352				  size_t *lenp, loff_t *ppos)
1353{
1354	int *valp = ctl->data;
1355	int val = *valp;
1356	int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1357
1358	if (write && *valp != val) {
1359		struct net *net = ctl->extra2;
1360
1361		if (valp == &IPV4_DEVCONF_ALL(FORWARDING))
1362			inet_forward_change(net);
1363		else if (valp != &IPV4_DEVCONF_DFLT(FORWARDING))
1364			rt_cache_flush(0);
1365	}
1366
1367	return ret;
1368}
1369
1370int ipv4_doint_and_flush(ctl_table *ctl, int write,
1371			 struct file* filp, void __user *buffer,
1372			 size_t *lenp, loff_t *ppos)
1373{
1374	int *valp = ctl->data;
1375	int val = *valp;
1376	int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1377
1378	if (write && *valp != val)
1379		rt_cache_flush(0);
1380
1381	return ret;
1382}
1383
1384int ipv4_doint_and_flush_strategy(ctl_table *table, int __user *name, int nlen,
1385				  void __user *oldval, size_t __user *oldlenp,
1386				  void __user *newval, size_t newlen)
1387{
1388	int ret = devinet_conf_sysctl(table, name, nlen, oldval, oldlenp,
1389				      newval, newlen);
1390
1391	if (ret == 1)
1392		rt_cache_flush(0);
1393
1394	return ret;
1395}
1396
1397
1398#define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc, sysctl) \
1399	{ \
1400		.ctl_name	= NET_IPV4_CONF_ ## attr, \
1401		.procname	= name, \
1402		.data		= ipv4_devconf.data + \
1403				  NET_IPV4_CONF_ ## attr - 1, \
1404		.maxlen		= sizeof(int), \
1405		.mode		= mval, \
1406		.proc_handler	= proc, \
1407		.strategy	= sysctl, \
1408		.extra1		= &ipv4_devconf, \
1409	}
1410
1411#define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
1412	DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc, \
1413			     devinet_conf_sysctl)
1414
1415#define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
1416	DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc, \
1417			     devinet_conf_sysctl)
1418
1419#define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc, sysctl) \
1420	DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc, sysctl)
1421
1422#define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
1423	DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush, \
1424				     ipv4_doint_and_flush_strategy)
1425
1426static struct devinet_sysctl_table {
1427	struct ctl_table_header *sysctl_header;
1428	struct ctl_table devinet_vars[__NET_IPV4_CONF_MAX];
1429	char *dev_name;
1430} devinet_sysctl = {
1431	.devinet_vars = {
1432		DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
1433					     devinet_sysctl_forward,
1434					     devinet_conf_sysctl),
1435		DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
1436
1437		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
1438		DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
1439		DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
1440		DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
1441		DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
1442		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
1443					"accept_source_route"),
1444		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
1445		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
1446		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
1447		DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
1448		DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
1449		DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
1450		DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
1451		DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
1452		DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
1453
1454		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
1455		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
1456		DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
1457					      "force_igmp_version"),
1458		DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
1459					      "promote_secondaries"),
1460	},
1461};
1462
1463static int __devinet_sysctl_register(struct net *net, char *dev_name,
1464		int ctl_name, struct ipv4_devconf *p)
1465{
1466	int i;
1467	struct devinet_sysctl_table *t;
1468
1469#define DEVINET_CTL_PATH_DEV	3
1470
1471	struct ctl_path devinet_ctl_path[] = {
1472		{ .procname = "net", .ctl_name = CTL_NET, },
1473		{ .procname = "ipv4", .ctl_name = NET_IPV4, },
1474		{ .procname = "conf", .ctl_name = NET_IPV4_CONF, },
1475		{ /* to be set */ },
1476		{ },
1477	};
1478
1479	t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
1480	if (!t)
1481		goto out;
1482
1483	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
1484		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
1485		t->devinet_vars[i].extra1 = p;
1486		t->devinet_vars[i].extra2 = net;
1487	}
1488
1489	/*
1490	 * Make a copy of dev_name, because '.procname' is regarded as const
1491	 * by sysctl and we wouldn't want anyone to change it under our feet
1492	 * (see SIOCSIFNAME).
1493	 */
1494	t->dev_name = kstrdup(dev_name, GFP_KERNEL);
1495	if (!t->dev_name)
1496		goto free;
1497
1498	devinet_ctl_path[DEVINET_CTL_PATH_DEV].procname = t->dev_name;
1499	devinet_ctl_path[DEVINET_CTL_PATH_DEV].ctl_name = ctl_name;
1500
1501	t->sysctl_header = register_net_sysctl_table(net, devinet_ctl_path,
1502			t->devinet_vars);
1503	if (!t->sysctl_header)
1504		goto free_procname;
1505
1506	p->sysctl = t;
1507	return 0;
1508
1509free_procname:
1510	kfree(t->dev_name);
1511free:
1512	kfree(t);
1513out:
1514	return -ENOBUFS;
1515}
1516
1517static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
1518{
1519	struct devinet_sysctl_table *t = cnf->sysctl;
1520
1521	if (t == NULL)
1522		return;
1523
1524	cnf->sysctl = NULL;
1525	unregister_sysctl_table(t->sysctl_header);
1526	kfree(t->dev_name);
1527	kfree(t);
1528}
1529
1530static void devinet_sysctl_register(struct in_device *idev)
1531{
1532	neigh_sysctl_register(idev->dev, idev->arp_parms, NET_IPV4,
1533			NET_IPV4_NEIGH, "ipv4", NULL, NULL);
1534	__devinet_sysctl_register(idev->dev->nd_net, idev->dev->name,
1535			idev->dev->ifindex, &idev->cnf);
1536}
1537
1538static void devinet_sysctl_unregister(struct in_device *idev)
1539{
1540	__devinet_sysctl_unregister(&idev->cnf);
1541	neigh_sysctl_unregister(idev->arp_parms);
1542}
1543#endif
1544
1545static struct ctl_table ctl_forward_entry[] = {
1546	{
1547		.ctl_name	= NET_IPV4_FORWARD,
1548		.procname	= "ip_forward",
1549		.data		= &ipv4_devconf.data[
1550					NET_IPV4_CONF_FORWARDING - 1],
1551		.maxlen		= sizeof(int),
1552		.mode		= 0644,
1553		.proc_handler	= devinet_sysctl_forward,
1554		.strategy	= devinet_conf_sysctl,
1555		.extra1		= &ipv4_devconf,
1556		.extra2		= &init_net,
1557	},
1558	{ },
1559};
1560
1561static __net_initdata struct ctl_path net_ipv4_path[] = {
1562	{ .procname = "net", .ctl_name = CTL_NET, },
1563	{ .procname = "ipv4", .ctl_name = NET_IPV4, },
1564	{ },
1565};
1566
1567static __net_init int devinet_init_net(struct net *net)
1568{
1569	int err;
1570	struct ctl_table *tbl;
1571	struct ipv4_devconf *all, *dflt;
1572	struct ctl_table_header *forw_hdr;
1573
1574	err = -ENOMEM;
1575	all = &ipv4_devconf;
1576	dflt = &ipv4_devconf_dflt;
1577	tbl = ctl_forward_entry;
1578
1579	if (net != &init_net) {
1580		all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
1581		if (all == NULL)
1582			goto err_alloc_all;
1583
1584		dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
1585		if (dflt == NULL)
1586			goto err_alloc_dflt;
1587
1588		tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
1589		if (tbl == NULL)
1590			goto err_alloc_ctl;
1591
1592		tbl[0].data = &all->data[NET_IPV4_CONF_FORWARDING - 1];
1593		tbl[0].extra1 = all;
1594		tbl[0].extra2 = net;
1595	}
1596
1597#ifdef CONFIG_SYSCTL
1598	err = __devinet_sysctl_register(net, "all",
1599			NET_PROTO_CONF_ALL, all);
1600	if (err < 0)
1601		goto err_reg_all;
1602
1603	err = __devinet_sysctl_register(net, "default",
1604			NET_PROTO_CONF_DEFAULT, dflt);
1605	if (err < 0)
1606		goto err_reg_dflt;
1607
1608	err = -ENOMEM;
1609	forw_hdr = register_net_sysctl_table(net, net_ipv4_path, tbl);
1610	if (forw_hdr == NULL)
1611		goto err_reg_ctl;
1612#endif
1613
1614	net->ipv4.forw_hdr = forw_hdr;
1615	net->ipv4.devconf_all = all;
1616	net->ipv4.devconf_dflt = dflt;
1617	return 0;
1618
1619#ifdef CONFIG_SYSCTL
1620err_reg_ctl:
1621	__devinet_sysctl_unregister(dflt);
1622err_reg_dflt:
1623	__devinet_sysctl_unregister(all);
1624err_reg_all:
1625	if (tbl != ctl_forward_entry)
1626		kfree(tbl);
1627#endif
1628err_alloc_ctl:
1629	if (dflt != &ipv4_devconf_dflt)
1630		kfree(dflt);
1631err_alloc_dflt:
1632	if (all != &ipv4_devconf)
1633		kfree(all);
1634err_alloc_all:
1635	return err;
1636}
1637
1638static __net_exit void devinet_exit_net(struct net *net)
1639{
1640	struct ctl_table *tbl;
1641
1642	tbl = net->ipv4.forw_hdr->ctl_table_arg;
1643#ifdef CONFIG_SYSCTL
1644	unregister_net_sysctl_table(net->ipv4.forw_hdr);
1645	__devinet_sysctl_unregister(net->ipv4.devconf_dflt);
1646	__devinet_sysctl_unregister(net->ipv4.devconf_all);
1647#endif
1648	kfree(tbl);
1649	kfree(net->ipv4.devconf_dflt);
1650	kfree(net->ipv4.devconf_all);
1651}
1652
1653static __net_initdata struct pernet_operations devinet_ops = {
1654	.init = devinet_init_net,
1655	.exit = devinet_exit_net,
1656};
1657
1658void __init devinet_init(void)
1659{
1660	register_pernet_subsys(&devinet_ops);
1661
1662	register_gifconf(PF_INET, inet_gifconf);
1663	register_netdevice_notifier(&ip_netdev_notifier);
1664
1665	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL);
1666	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL);
1667	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr);
1668}
1669
1670EXPORT_SYMBOL(in_dev_finish_destroy);
1671EXPORT_SYMBOL(inet_select_addr);
1672EXPORT_SYMBOL(inetdev_by_index);
1673EXPORT_SYMBOL(register_inetaddr_notifier);
1674EXPORT_SYMBOL(unregister_inetaddr_notifier);
1675