devinet.c revision 4b8aa9abee2e108b132dea7a7c4e81a167895354
1/*
2 *	NET3	IP device support routines.
3 *
4 *	Version: $Id: devinet.c,v 1.44 2001/10/31 21:55:54 davem Exp $
5 *
6 *		This program is free software; you can redistribute it and/or
7 *		modify it under the terms of the GNU General Public License
8 *		as published by the Free Software Foundation; either version
9 *		2 of the License, or (at your option) any later version.
10 *
11 *	Derived from the IP parts of dev.c 1.0.19
12 * 		Authors:	Ross Biro
13 *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
14 *				Mark Evans, <evansmp@uhura.aston.ac.uk>
15 *
16 *	Additional Authors:
17 *		Alan Cox, <gw4pts@gw4pts.ampr.org>
18 *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
19 *
20 *	Changes:
21 *		Alexey Kuznetsov:	pa_* fields are replaced with ifaddr
22 *					lists.
23 *		Cyrus Durgin:		updated for kmod
24 *		Matthias Andree:	in devinet_ioctl, compare label and
25 *					address (4.4BSD alias style support),
26 *					fall back to comparing just the label
27 *					if no match found.
28 */
29
30
31#include <asm/uaccess.h>
32#include <asm/system.h>
33#include <linux/bitops.h>
34#include <linux/capability.h>
35#include <linux/module.h>
36#include <linux/types.h>
37#include <linux/kernel.h>
38#include <linux/string.h>
39#include <linux/mm.h>
40#include <linux/socket.h>
41#include <linux/sockios.h>
42#include <linux/in.h>
43#include <linux/errno.h>
44#include <linux/interrupt.h>
45#include <linux/if_addr.h>
46#include <linux/if_ether.h>
47#include <linux/inet.h>
48#include <linux/netdevice.h>
49#include <linux/etherdevice.h>
50#include <linux/skbuff.h>
51#include <linux/init.h>
52#include <linux/notifier.h>
53#include <linux/inetdevice.h>
54#include <linux/igmp.h>
55#ifdef CONFIG_SYSCTL
56#include <linux/sysctl.h>
57#endif
58#include <linux/kmod.h>
59
60#include <net/arp.h>
61#include <net/ip.h>
62#include <net/route.h>
63#include <net/ip_fib.h>
64#include <net/rtnetlink.h>
65#include <net/net_namespace.h>
66
67static struct ipv4_devconf ipv4_devconf = {
68	.data = {
69		[NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1,
70		[NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1,
71		[NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1,
72		[NET_IPV4_CONF_SHARED_MEDIA - 1] = 1,
73	},
74};
75
76static struct ipv4_devconf ipv4_devconf_dflt = {
77	.data = {
78		[NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1,
79		[NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1,
80		[NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1,
81		[NET_IPV4_CONF_SHARED_MEDIA - 1] = 1,
82		[NET_IPV4_CONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
83	},
84};
85
86#define IPV4_DEVCONF_DFLT(net, attr) \
87	IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
88
89static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
90	[IFA_LOCAL]     	= { .type = NLA_U32 },
91	[IFA_ADDRESS]   	= { .type = NLA_U32 },
92	[IFA_BROADCAST] 	= { .type = NLA_U32 },
93	[IFA_ANYCAST]   	= { .type = NLA_U32 },
94	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
95};
96
97static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
98
99static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
100static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
101			 int destroy);
102#ifdef CONFIG_SYSCTL
103static void devinet_sysctl_register(struct in_device *idev);
104static void devinet_sysctl_unregister(struct in_device *idev);
105#else
106static inline void devinet_sysctl_register(struct in_device *idev)
107{
108}
109static inline void devinet_sysctl_unregister(struct in_device *idev)
110{
111}
112#endif
113
114/* Locks all the inet devices. */
115
116static struct in_ifaddr *inet_alloc_ifa(void)
117{
118	struct in_ifaddr *ifa = kzalloc(sizeof(*ifa), GFP_KERNEL);
119
120	if (ifa) {
121		INIT_RCU_HEAD(&ifa->rcu_head);
122	}
123
124	return ifa;
125}
126
127static void inet_rcu_free_ifa(struct rcu_head *head)
128{
129	struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
130	if (ifa->ifa_dev)
131		in_dev_put(ifa->ifa_dev);
132	kfree(ifa);
133}
134
135static inline void inet_free_ifa(struct in_ifaddr *ifa)
136{
137	call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
138}
139
140void in_dev_finish_destroy(struct in_device *idev)
141{
142	struct net_device *dev = idev->dev;
143
144	BUG_TRAP(!idev->ifa_list);
145	BUG_TRAP(!idev->mc_list);
146#ifdef NET_REFCNT_DEBUG
147	printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n",
148	       idev, dev ? dev->name : "NIL");
149#endif
150	dev_put(dev);
151	if (!idev->dead)
152		printk("Freeing alive in_device %p\n", idev);
153	else {
154		kfree(idev);
155	}
156}
157
158static struct in_device *inetdev_init(struct net_device *dev)
159{
160	struct in_device *in_dev;
161
162	ASSERT_RTNL();
163
164	in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
165	if (!in_dev)
166		goto out;
167	INIT_RCU_HEAD(&in_dev->rcu_head);
168	memcpy(&in_dev->cnf, dev->nd_net->ipv4.devconf_dflt,
169			sizeof(in_dev->cnf));
170	in_dev->cnf.sysctl = NULL;
171	in_dev->dev = dev;
172	if ((in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl)) == NULL)
173		goto out_kfree;
174	/* Reference in_dev->dev */
175	dev_hold(dev);
176	/* Account for reference dev->ip_ptr (below) */
177	in_dev_hold(in_dev);
178
179	devinet_sysctl_register(in_dev);
180	ip_mc_init_dev(in_dev);
181	if (dev->flags & IFF_UP)
182		ip_mc_up(in_dev);
183
184	/* we can receive as soon as ip_ptr is set -- do this last */
185	rcu_assign_pointer(dev->ip_ptr, in_dev);
186out:
187	return in_dev;
188out_kfree:
189	kfree(in_dev);
190	in_dev = NULL;
191	goto out;
192}
193
194static void in_dev_rcu_put(struct rcu_head *head)
195{
196	struct in_device *idev = container_of(head, struct in_device, rcu_head);
197	in_dev_put(idev);
198}
199
200static void inetdev_destroy(struct in_device *in_dev)
201{
202	struct in_ifaddr *ifa;
203	struct net_device *dev;
204
205	ASSERT_RTNL();
206
207	dev = in_dev->dev;
208
209	in_dev->dead = 1;
210
211	ip_mc_destroy_dev(in_dev);
212
213	while ((ifa = in_dev->ifa_list) != NULL) {
214		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
215		inet_free_ifa(ifa);
216	}
217
218	dev->ip_ptr = NULL;
219
220	devinet_sysctl_unregister(in_dev);
221	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
222	arp_ifdown(dev);
223
224	call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
225}
226
227int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
228{
229	rcu_read_lock();
230	for_primary_ifa(in_dev) {
231		if (inet_ifa_match(a, ifa)) {
232			if (!b || inet_ifa_match(b, ifa)) {
233				rcu_read_unlock();
234				return 1;
235			}
236		}
237	} endfor_ifa(in_dev);
238	rcu_read_unlock();
239	return 0;
240}
241
242static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
243			 int destroy, struct nlmsghdr *nlh, u32 pid)
244{
245	struct in_ifaddr *promote = NULL;
246	struct in_ifaddr *ifa, *ifa1 = *ifap;
247	struct in_ifaddr *last_prim = in_dev->ifa_list;
248	struct in_ifaddr *prev_prom = NULL;
249	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
250
251	ASSERT_RTNL();
252
253	/* 1. Deleting primary ifaddr forces deletion all secondaries
254	 * unless alias promotion is set
255	 **/
256
257	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
258		struct in_ifaddr **ifap1 = &ifa1->ifa_next;
259
260		while ((ifa = *ifap1) != NULL) {
261			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
262			    ifa1->ifa_scope <= ifa->ifa_scope)
263				last_prim = ifa;
264
265			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
266			    ifa1->ifa_mask != ifa->ifa_mask ||
267			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
268				ifap1 = &ifa->ifa_next;
269				prev_prom = ifa;
270				continue;
271			}
272
273			if (!do_promote) {
274				*ifap1 = ifa->ifa_next;
275
276				rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid);
277				blocking_notifier_call_chain(&inetaddr_chain,
278						NETDEV_DOWN, ifa);
279				inet_free_ifa(ifa);
280			} else {
281				promote = ifa;
282				break;
283			}
284		}
285	}
286
287	/* 2. Unlink it */
288
289	*ifap = ifa1->ifa_next;
290
291	/* 3. Announce address deletion */
292
293	/* Send message first, then call notifier.
294	   At first sight, FIB update triggered by notifier
295	   will refer to already deleted ifaddr, that could confuse
296	   netlink listeners. It is not true: look, gated sees
297	   that route deleted and if it still thinks that ifaddr
298	   is valid, it will try to restore deleted routes... Grr.
299	   So that, this order is correct.
300	 */
301	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid);
302	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
303
304	if (promote) {
305
306		if (prev_prom) {
307			prev_prom->ifa_next = promote->ifa_next;
308			promote->ifa_next = last_prim->ifa_next;
309			last_prim->ifa_next = promote;
310		}
311
312		promote->ifa_flags &= ~IFA_F_SECONDARY;
313		rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
314		blocking_notifier_call_chain(&inetaddr_chain,
315				NETDEV_UP, promote);
316		for (ifa = promote->ifa_next; ifa; ifa = ifa->ifa_next) {
317			if (ifa1->ifa_mask != ifa->ifa_mask ||
318			    !inet_ifa_match(ifa1->ifa_address, ifa))
319					continue;
320			fib_add_ifaddr(ifa);
321		}
322
323	}
324	if (destroy)
325		inet_free_ifa(ifa1);
326}
327
328static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
329			 int destroy)
330{
331	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
332}
333
334static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
335			     u32 pid)
336{
337	struct in_device *in_dev = ifa->ifa_dev;
338	struct in_ifaddr *ifa1, **ifap, **last_primary;
339
340	ASSERT_RTNL();
341
342	if (!ifa->ifa_local) {
343		inet_free_ifa(ifa);
344		return 0;
345	}
346
347	ifa->ifa_flags &= ~IFA_F_SECONDARY;
348	last_primary = &in_dev->ifa_list;
349
350	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
351	     ifap = &ifa1->ifa_next) {
352		if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
353		    ifa->ifa_scope <= ifa1->ifa_scope)
354			last_primary = &ifa1->ifa_next;
355		if (ifa1->ifa_mask == ifa->ifa_mask &&
356		    inet_ifa_match(ifa1->ifa_address, ifa)) {
357			if (ifa1->ifa_local == ifa->ifa_local) {
358				inet_free_ifa(ifa);
359				return -EEXIST;
360			}
361			if (ifa1->ifa_scope != ifa->ifa_scope) {
362				inet_free_ifa(ifa);
363				return -EINVAL;
364			}
365			ifa->ifa_flags |= IFA_F_SECONDARY;
366		}
367	}
368
369	if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
370		net_srandom(ifa->ifa_local);
371		ifap = last_primary;
372	}
373
374	ifa->ifa_next = *ifap;
375	*ifap = ifa;
376
377	/* Send message first, then call notifier.
378	   Notifier will trigger FIB update, so that
379	   listeners of netlink will know about new ifaddr */
380	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid);
381	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
382
383	return 0;
384}
385
386static int inet_insert_ifa(struct in_ifaddr *ifa)
387{
388	return __inet_insert_ifa(ifa, NULL, 0);
389}
390
391static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
392{
393	struct in_device *in_dev = __in_dev_get_rtnl(dev);
394
395	ASSERT_RTNL();
396
397	if (!in_dev) {
398		inet_free_ifa(ifa);
399		return -ENOBUFS;
400	}
401	ipv4_devconf_setall(in_dev);
402	if (ifa->ifa_dev != in_dev) {
403		BUG_TRAP(!ifa->ifa_dev);
404		in_dev_hold(in_dev);
405		ifa->ifa_dev = in_dev;
406	}
407	if (ipv4_is_loopback(ifa->ifa_local))
408		ifa->ifa_scope = RT_SCOPE_HOST;
409	return inet_insert_ifa(ifa);
410}
411
412struct in_device *inetdev_by_index(struct net *net, int ifindex)
413{
414	struct net_device *dev;
415	struct in_device *in_dev = NULL;
416	read_lock(&dev_base_lock);
417	dev = __dev_get_by_index(net, ifindex);
418	if (dev)
419		in_dev = in_dev_get(dev);
420	read_unlock(&dev_base_lock);
421	return in_dev;
422}
423
424/* Called only from RTNL semaphored context. No locks. */
425
426struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
427				    __be32 mask)
428{
429	ASSERT_RTNL();
430
431	for_primary_ifa(in_dev) {
432		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
433			return ifa;
434	} endfor_ifa(in_dev);
435	return NULL;
436}
437
438static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
439{
440	struct net *net = skb->sk->sk_net;
441	struct nlattr *tb[IFA_MAX+1];
442	struct in_device *in_dev;
443	struct ifaddrmsg *ifm;
444	struct in_ifaddr *ifa, **ifap;
445	int err = -EINVAL;
446
447	ASSERT_RTNL();
448
449	if (net != &init_net)
450		return -EINVAL;
451
452	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
453	if (err < 0)
454		goto errout;
455
456	ifm = nlmsg_data(nlh);
457	in_dev = inetdev_by_index(net, ifm->ifa_index);
458	if (in_dev == NULL) {
459		err = -ENODEV;
460		goto errout;
461	}
462
463	__in_dev_put(in_dev);
464
465	for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
466	     ifap = &ifa->ifa_next) {
467		if (tb[IFA_LOCAL] &&
468		    ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
469			continue;
470
471		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
472			continue;
473
474		if (tb[IFA_ADDRESS] &&
475		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
476		    !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
477			continue;
478
479		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid);
480		return 0;
481	}
482
483	err = -EADDRNOTAVAIL;
484errout:
485	return err;
486}
487
488static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
489{
490	struct nlattr *tb[IFA_MAX+1];
491	struct in_ifaddr *ifa;
492	struct ifaddrmsg *ifm;
493	struct net_device *dev;
494	struct in_device *in_dev;
495	int err;
496
497	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
498	if (err < 0)
499		goto errout;
500
501	ifm = nlmsg_data(nlh);
502	err = -EINVAL;
503	if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
504		goto errout;
505
506	dev = __dev_get_by_index(net, ifm->ifa_index);
507	err = -ENODEV;
508	if (dev == NULL)
509		goto errout;
510
511	in_dev = __in_dev_get_rtnl(dev);
512	err = -ENOBUFS;
513	if (in_dev == NULL)
514		goto errout;
515
516	ifa = inet_alloc_ifa();
517	if (ifa == NULL)
518		/*
519		 * A potential indev allocation can be left alive, it stays
520		 * assigned to its device and is destroy with it.
521		 */
522		goto errout;
523
524	ipv4_devconf_setall(in_dev);
525	in_dev_hold(in_dev);
526
527	if (tb[IFA_ADDRESS] == NULL)
528		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
529
530	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
531	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
532	ifa->ifa_flags = ifm->ifa_flags;
533	ifa->ifa_scope = ifm->ifa_scope;
534	ifa->ifa_dev = in_dev;
535
536	ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
537	ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
538
539	if (tb[IFA_BROADCAST])
540		ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
541
542	if (tb[IFA_ANYCAST])
543		ifa->ifa_anycast = nla_get_be32(tb[IFA_ANYCAST]);
544
545	if (tb[IFA_LABEL])
546		nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
547	else
548		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
549
550	return ifa;
551
552errout:
553	return ERR_PTR(err);
554}
555
556static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
557{
558	struct net *net = skb->sk->sk_net;
559	struct in_ifaddr *ifa;
560
561	ASSERT_RTNL();
562
563	if (net != &init_net)
564		return -EINVAL;
565
566	ifa = rtm_to_ifaddr(net, nlh);
567	if (IS_ERR(ifa))
568		return PTR_ERR(ifa);
569
570	return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid);
571}
572
573/*
574 *	Determine a default network mask, based on the IP address.
575 */
576
577static __inline__ int inet_abc_len(__be32 addr)
578{
579	int rc = -1;	/* Something else, probably a multicast. */
580
581	if (ipv4_is_zeronet(addr))
582		rc = 0;
583	else {
584		__u32 haddr = ntohl(addr);
585
586		if (IN_CLASSA(haddr))
587			rc = 8;
588		else if (IN_CLASSB(haddr))
589			rc = 16;
590		else if (IN_CLASSC(haddr))
591			rc = 24;
592	}
593
594	return rc;
595}
596
597
598int devinet_ioctl(unsigned int cmd, void __user *arg)
599{
600	struct ifreq ifr;
601	struct sockaddr_in sin_orig;
602	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
603	struct in_device *in_dev;
604	struct in_ifaddr **ifap = NULL;
605	struct in_ifaddr *ifa = NULL;
606	struct net_device *dev;
607	char *colon;
608	int ret = -EFAULT;
609	int tryaddrmatch = 0;
610
611	/*
612	 *	Fetch the caller's info block into kernel space
613	 */
614
615	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
616		goto out;
617	ifr.ifr_name[IFNAMSIZ - 1] = 0;
618
619	/* save original address for comparison */
620	memcpy(&sin_orig, sin, sizeof(*sin));
621
622	colon = strchr(ifr.ifr_name, ':');
623	if (colon)
624		*colon = 0;
625
626#ifdef CONFIG_KMOD
627	dev_load(&init_net, ifr.ifr_name);
628#endif
629
630	switch (cmd) {
631	case SIOCGIFADDR:	/* Get interface address */
632	case SIOCGIFBRDADDR:	/* Get the broadcast address */
633	case SIOCGIFDSTADDR:	/* Get the destination address */
634	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
635		/* Note that these ioctls will not sleep,
636		   so that we do not impose a lock.
637		   One day we will be forced to put shlock here (I mean SMP)
638		 */
639		tryaddrmatch = (sin_orig.sin_family == AF_INET);
640		memset(sin, 0, sizeof(*sin));
641		sin->sin_family = AF_INET;
642		break;
643
644	case SIOCSIFFLAGS:
645		ret = -EACCES;
646		if (!capable(CAP_NET_ADMIN))
647			goto out;
648		break;
649	case SIOCSIFADDR:	/* Set interface address (and family) */
650	case SIOCSIFBRDADDR:	/* Set the broadcast address */
651	case SIOCSIFDSTADDR:	/* Set the destination address */
652	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
653		ret = -EACCES;
654		if (!capable(CAP_NET_ADMIN))
655			goto out;
656		ret = -EINVAL;
657		if (sin->sin_family != AF_INET)
658			goto out;
659		break;
660	default:
661		ret = -EINVAL;
662		goto out;
663	}
664
665	rtnl_lock();
666
667	ret = -ENODEV;
668	if ((dev = __dev_get_by_name(&init_net, ifr.ifr_name)) == NULL)
669		goto done;
670
671	if (colon)
672		*colon = ':';
673
674	if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
675		if (tryaddrmatch) {
676			/* Matthias Andree */
677			/* compare label and address (4.4BSD style) */
678			/* note: we only do this for a limited set of ioctls
679			   and only if the original address family was AF_INET.
680			   This is checked above. */
681			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
682			     ifap = &ifa->ifa_next) {
683				if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
684				    sin_orig.sin_addr.s_addr ==
685							ifa->ifa_address) {
686					break; /* found */
687				}
688			}
689		}
690		/* we didn't get a match, maybe the application is
691		   4.3BSD-style and passed in junk so we fall back to
692		   comparing just the label */
693		if (!ifa) {
694			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
695			     ifap = &ifa->ifa_next)
696				if (!strcmp(ifr.ifr_name, ifa->ifa_label))
697					break;
698		}
699	}
700
701	ret = -EADDRNOTAVAIL;
702	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
703		goto done;
704
705	switch (cmd) {
706	case SIOCGIFADDR:	/* Get interface address */
707		sin->sin_addr.s_addr = ifa->ifa_local;
708		goto rarok;
709
710	case SIOCGIFBRDADDR:	/* Get the broadcast address */
711		sin->sin_addr.s_addr = ifa->ifa_broadcast;
712		goto rarok;
713
714	case SIOCGIFDSTADDR:	/* Get the destination address */
715		sin->sin_addr.s_addr = ifa->ifa_address;
716		goto rarok;
717
718	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
719		sin->sin_addr.s_addr = ifa->ifa_mask;
720		goto rarok;
721
722	case SIOCSIFFLAGS:
723		if (colon) {
724			ret = -EADDRNOTAVAIL;
725			if (!ifa)
726				break;
727			ret = 0;
728			if (!(ifr.ifr_flags & IFF_UP))
729				inet_del_ifa(in_dev, ifap, 1);
730			break;
731		}
732		ret = dev_change_flags(dev, ifr.ifr_flags);
733		break;
734
735	case SIOCSIFADDR:	/* Set interface address (and family) */
736		ret = -EINVAL;
737		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
738			break;
739
740		if (!ifa) {
741			ret = -ENOBUFS;
742			if ((ifa = inet_alloc_ifa()) == NULL)
743				break;
744			if (colon)
745				memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
746			else
747				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
748		} else {
749			ret = 0;
750			if (ifa->ifa_local == sin->sin_addr.s_addr)
751				break;
752			inet_del_ifa(in_dev, ifap, 0);
753			ifa->ifa_broadcast = 0;
754			ifa->ifa_anycast = 0;
755		}
756
757		ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
758
759		if (!(dev->flags & IFF_POINTOPOINT)) {
760			ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
761			ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
762			if ((dev->flags & IFF_BROADCAST) &&
763			    ifa->ifa_prefixlen < 31)
764				ifa->ifa_broadcast = ifa->ifa_address |
765						     ~ifa->ifa_mask;
766		} else {
767			ifa->ifa_prefixlen = 32;
768			ifa->ifa_mask = inet_make_mask(32);
769		}
770		ret = inet_set_ifa(dev, ifa);
771		break;
772
773	case SIOCSIFBRDADDR:	/* Set the broadcast address */
774		ret = 0;
775		if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
776			inet_del_ifa(in_dev, ifap, 0);
777			ifa->ifa_broadcast = sin->sin_addr.s_addr;
778			inet_insert_ifa(ifa);
779		}
780		break;
781
782	case SIOCSIFDSTADDR:	/* Set the destination address */
783		ret = 0;
784		if (ifa->ifa_address == sin->sin_addr.s_addr)
785			break;
786		ret = -EINVAL;
787		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
788			break;
789		ret = 0;
790		inet_del_ifa(in_dev, ifap, 0);
791		ifa->ifa_address = sin->sin_addr.s_addr;
792		inet_insert_ifa(ifa);
793		break;
794
795	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
796
797		/*
798		 *	The mask we set must be legal.
799		 */
800		ret = -EINVAL;
801		if (bad_mask(sin->sin_addr.s_addr, 0))
802			break;
803		ret = 0;
804		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
805			__be32 old_mask = ifa->ifa_mask;
806			inet_del_ifa(in_dev, ifap, 0);
807			ifa->ifa_mask = sin->sin_addr.s_addr;
808			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
809
810			/* See if current broadcast address matches
811			 * with current netmask, then recalculate
812			 * the broadcast address. Otherwise it's a
813			 * funny address, so don't touch it since
814			 * the user seems to know what (s)he's doing...
815			 */
816			if ((dev->flags & IFF_BROADCAST) &&
817			    (ifa->ifa_prefixlen < 31) &&
818			    (ifa->ifa_broadcast ==
819			     (ifa->ifa_local|~old_mask))) {
820				ifa->ifa_broadcast = (ifa->ifa_local |
821						      ~sin->sin_addr.s_addr);
822			}
823			inet_insert_ifa(ifa);
824		}
825		break;
826	}
827done:
828	rtnl_unlock();
829out:
830	return ret;
831rarok:
832	rtnl_unlock();
833	ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
834	goto out;
835}
836
837static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
838{
839	struct in_device *in_dev = __in_dev_get_rtnl(dev);
840	struct in_ifaddr *ifa;
841	struct ifreq ifr;
842	int done = 0;
843
844	if (!in_dev || (ifa = in_dev->ifa_list) == NULL)
845		goto out;
846
847	for (; ifa; ifa = ifa->ifa_next) {
848		if (!buf) {
849			done += sizeof(ifr);
850			continue;
851		}
852		if (len < (int) sizeof(ifr))
853			break;
854		memset(&ifr, 0, sizeof(struct ifreq));
855		if (ifa->ifa_label)
856			strcpy(ifr.ifr_name, ifa->ifa_label);
857		else
858			strcpy(ifr.ifr_name, dev->name);
859
860		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
861		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
862								ifa->ifa_local;
863
864		if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
865			done = -EFAULT;
866			break;
867		}
868		buf  += sizeof(struct ifreq);
869		len  -= sizeof(struct ifreq);
870		done += sizeof(struct ifreq);
871	}
872out:
873	return done;
874}
875
876__be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
877{
878	__be32 addr = 0;
879	struct in_device *in_dev;
880
881	rcu_read_lock();
882	in_dev = __in_dev_get_rcu(dev);
883	if (!in_dev)
884		goto no_in_dev;
885
886	for_primary_ifa(in_dev) {
887		if (ifa->ifa_scope > scope)
888			continue;
889		if (!dst || inet_ifa_match(dst, ifa)) {
890			addr = ifa->ifa_local;
891			break;
892		}
893		if (!addr)
894			addr = ifa->ifa_local;
895	} endfor_ifa(in_dev);
896no_in_dev:
897	rcu_read_unlock();
898
899	if (addr)
900		goto out;
901
902	/* Not loopback addresses on loopback should be preferred
903	   in this case. It is importnat that lo is the first interface
904	   in dev_base list.
905	 */
906	read_lock(&dev_base_lock);
907	rcu_read_lock();
908	for_each_netdev(&init_net, dev) {
909		if ((in_dev = __in_dev_get_rcu(dev)) == NULL)
910			continue;
911
912		for_primary_ifa(in_dev) {
913			if (ifa->ifa_scope != RT_SCOPE_LINK &&
914			    ifa->ifa_scope <= scope) {
915				addr = ifa->ifa_local;
916				goto out_unlock_both;
917			}
918		} endfor_ifa(in_dev);
919	}
920out_unlock_both:
921	read_unlock(&dev_base_lock);
922	rcu_read_unlock();
923out:
924	return addr;
925}
926
927static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
928			      __be32 local, int scope)
929{
930	int same = 0;
931	__be32 addr = 0;
932
933	for_ifa(in_dev) {
934		if (!addr &&
935		    (local == ifa->ifa_local || !local) &&
936		    ifa->ifa_scope <= scope) {
937			addr = ifa->ifa_local;
938			if (same)
939				break;
940		}
941		if (!same) {
942			same = (!local || inet_ifa_match(local, ifa)) &&
943				(!dst || inet_ifa_match(dst, ifa));
944			if (same && addr) {
945				if (local || !dst)
946					break;
947				/* Is the selected addr into dst subnet? */
948				if (inet_ifa_match(addr, ifa))
949					break;
950				/* No, then can we use new local src? */
951				if (ifa->ifa_scope <= scope) {
952					addr = ifa->ifa_local;
953					break;
954				}
955				/* search for large dst subnet for addr */
956				same = 0;
957			}
958		}
959	} endfor_ifa(in_dev);
960
961	return same? addr : 0;
962}
963
964/*
965 * Confirm that local IP address exists using wildcards:
966 * - in_dev: only on this interface, 0=any interface
967 * - dst: only in the same subnet as dst, 0=any dst
968 * - local: address, 0=autoselect the local address
969 * - scope: maximum allowed scope value for the local address
970 */
971__be32 inet_confirm_addr(struct in_device *in_dev,
972			 __be32 dst, __be32 local, int scope)
973{
974	__be32 addr = 0;
975	struct net_device *dev;
976	struct net *net;
977
978	if (scope != RT_SCOPE_LINK)
979		return confirm_addr_indev(in_dev, dst, local, scope);
980
981	net = in_dev->dev->nd_net;
982	read_lock(&dev_base_lock);
983	rcu_read_lock();
984	for_each_netdev(net, dev) {
985		if ((in_dev = __in_dev_get_rcu(dev))) {
986			addr = confirm_addr_indev(in_dev, dst, local, scope);
987			if (addr)
988				break;
989		}
990	}
991	rcu_read_unlock();
992	read_unlock(&dev_base_lock);
993
994	return addr;
995}
996
997/*
998 *	Device notifier
999 */
1000
1001int register_inetaddr_notifier(struct notifier_block *nb)
1002{
1003	return blocking_notifier_chain_register(&inetaddr_chain, nb);
1004}
1005
1006int unregister_inetaddr_notifier(struct notifier_block *nb)
1007{
1008	return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1009}
1010
1011/* Rename ifa_labels for a device name change. Make some effort to preserve existing
1012 * alias numbering and to create unique labels if possible.
1013*/
1014static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1015{
1016	struct in_ifaddr *ifa;
1017	int named = 0;
1018
1019	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1020		char old[IFNAMSIZ], *dot;
1021
1022		memcpy(old, ifa->ifa_label, IFNAMSIZ);
1023		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1024		if (named++ == 0)
1025			continue;
1026		dot = strchr(old, ':');
1027		if (dot == NULL) {
1028			sprintf(old, ":%d", named);
1029			dot = old;
1030		}
1031		if (strlen(dot) + strlen(dev->name) < IFNAMSIZ) {
1032			strcat(ifa->ifa_label, dot);
1033		} else {
1034			strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1035		}
1036	}
1037}
1038
1039/* Called only under RTNL semaphore */
1040
1041static int inetdev_event(struct notifier_block *this, unsigned long event,
1042			 void *ptr)
1043{
1044	struct net_device *dev = ptr;
1045	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1046
1047	if (dev->nd_net != &init_net)
1048		return NOTIFY_DONE;
1049
1050	ASSERT_RTNL();
1051
1052	if (!in_dev) {
1053		if (event == NETDEV_REGISTER) {
1054			in_dev = inetdev_init(dev);
1055			if (!in_dev)
1056				return notifier_from_errno(-ENOMEM);
1057			if (dev->flags & IFF_LOOPBACK) {
1058				IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1059				IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1060			}
1061		}
1062		goto out;
1063	}
1064
1065	switch (event) {
1066	case NETDEV_REGISTER:
1067		printk(KERN_DEBUG "inetdev_event: bug\n");
1068		dev->ip_ptr = NULL;
1069		break;
1070	case NETDEV_UP:
1071		if (dev->mtu < 68)
1072			break;
1073		if (dev->flags & IFF_LOOPBACK) {
1074			struct in_ifaddr *ifa;
1075			if ((ifa = inet_alloc_ifa()) != NULL) {
1076				ifa->ifa_local =
1077				  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1078				ifa->ifa_prefixlen = 8;
1079				ifa->ifa_mask = inet_make_mask(8);
1080				in_dev_hold(in_dev);
1081				ifa->ifa_dev = in_dev;
1082				ifa->ifa_scope = RT_SCOPE_HOST;
1083				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1084				inet_insert_ifa(ifa);
1085			}
1086		}
1087		ip_mc_up(in_dev);
1088		break;
1089	case NETDEV_DOWN:
1090		ip_mc_down(in_dev);
1091		break;
1092	case NETDEV_CHANGEMTU:
1093		if (dev->mtu >= 68)
1094			break;
1095		/* MTU falled under 68, disable IP */
1096	case NETDEV_UNREGISTER:
1097		inetdev_destroy(in_dev);
1098		break;
1099	case NETDEV_CHANGENAME:
1100		/* Do not notify about label change, this event is
1101		 * not interesting to applications using netlink.
1102		 */
1103		inetdev_changename(dev, in_dev);
1104
1105		devinet_sysctl_unregister(in_dev);
1106		devinet_sysctl_register(in_dev);
1107		break;
1108	}
1109out:
1110	return NOTIFY_DONE;
1111}
1112
1113static struct notifier_block ip_netdev_notifier = {
1114	.notifier_call =inetdev_event,
1115};
1116
1117static inline size_t inet_nlmsg_size(void)
1118{
1119	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1120	       + nla_total_size(4) /* IFA_ADDRESS */
1121	       + nla_total_size(4) /* IFA_LOCAL */
1122	       + nla_total_size(4) /* IFA_BROADCAST */
1123	       + nla_total_size(4) /* IFA_ANYCAST */
1124	       + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1125}
1126
1127static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1128			    u32 pid, u32 seq, int event, unsigned int flags)
1129{
1130	struct ifaddrmsg *ifm;
1131	struct nlmsghdr  *nlh;
1132
1133	nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags);
1134	if (nlh == NULL)
1135		return -EMSGSIZE;
1136
1137	ifm = nlmsg_data(nlh);
1138	ifm->ifa_family = AF_INET;
1139	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1140	ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
1141	ifm->ifa_scope = ifa->ifa_scope;
1142	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1143
1144	if (ifa->ifa_address)
1145		NLA_PUT_BE32(skb, IFA_ADDRESS, ifa->ifa_address);
1146
1147	if (ifa->ifa_local)
1148		NLA_PUT_BE32(skb, IFA_LOCAL, ifa->ifa_local);
1149
1150	if (ifa->ifa_broadcast)
1151		NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast);
1152
1153	if (ifa->ifa_anycast)
1154		NLA_PUT_BE32(skb, IFA_ANYCAST, ifa->ifa_anycast);
1155
1156	if (ifa->ifa_label[0])
1157		NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label);
1158
1159	return nlmsg_end(skb, nlh);
1160
1161nla_put_failure:
1162	nlmsg_cancel(skb, nlh);
1163	return -EMSGSIZE;
1164}
1165
1166static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1167{
1168	struct net *net = skb->sk->sk_net;
1169	int idx, ip_idx;
1170	struct net_device *dev;
1171	struct in_device *in_dev;
1172	struct in_ifaddr *ifa;
1173	int s_ip_idx, s_idx = cb->args[0];
1174
1175	if (net != &init_net)
1176		return 0;
1177
1178	s_ip_idx = ip_idx = cb->args[1];
1179	idx = 0;
1180	for_each_netdev(net, dev) {
1181		if (idx < s_idx)
1182			goto cont;
1183		if (idx > s_idx)
1184			s_ip_idx = 0;
1185		if ((in_dev = __in_dev_get_rtnl(dev)) == NULL)
1186			goto cont;
1187
1188		for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1189		     ifa = ifa->ifa_next, ip_idx++) {
1190			if (ip_idx < s_ip_idx)
1191				continue;
1192			if (inet_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid,
1193					     cb->nlh->nlmsg_seq,
1194					     RTM_NEWADDR, NLM_F_MULTI) <= 0)
1195				goto done;
1196		}
1197cont:
1198		idx++;
1199	}
1200
1201done:
1202	cb->args[0] = idx;
1203	cb->args[1] = ip_idx;
1204
1205	return skb->len;
1206}
1207
1208static void rtmsg_ifa(int event, struct in_ifaddr* ifa, struct nlmsghdr *nlh,
1209		      u32 pid)
1210{
1211	struct sk_buff *skb;
1212	u32 seq = nlh ? nlh->nlmsg_seq : 0;
1213	int err = -ENOBUFS;
1214	struct net *net;
1215
1216	net = ifa->ifa_dev->dev->nd_net;
1217	skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1218	if (skb == NULL)
1219		goto errout;
1220
1221	err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0);
1222	if (err < 0) {
1223		/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1224		WARN_ON(err == -EMSGSIZE);
1225		kfree_skb(skb);
1226		goto errout;
1227	}
1228	err = rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1229errout:
1230	if (err < 0)
1231		rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1232}
1233
1234#ifdef CONFIG_SYSCTL
1235
1236static void devinet_copy_dflt_conf(struct net *net, int i)
1237{
1238	struct net_device *dev;
1239
1240	read_lock(&dev_base_lock);
1241	for_each_netdev(net, dev) {
1242		struct in_device *in_dev;
1243		rcu_read_lock();
1244		in_dev = __in_dev_get_rcu(dev);
1245		if (in_dev && !test_bit(i, in_dev->cnf.state))
1246			in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1247		rcu_read_unlock();
1248	}
1249	read_unlock(&dev_base_lock);
1250}
1251
1252static void inet_forward_change(struct net *net)
1253{
1254	struct net_device *dev;
1255	int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1256
1257	IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1258	IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1259
1260	read_lock(&dev_base_lock);
1261	for_each_netdev(net, dev) {
1262		struct in_device *in_dev;
1263		rcu_read_lock();
1264		in_dev = __in_dev_get_rcu(dev);
1265		if (in_dev)
1266			IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1267		rcu_read_unlock();
1268	}
1269	read_unlock(&dev_base_lock);
1270
1271	rt_cache_flush(0);
1272}
1273
1274static int devinet_conf_proc(ctl_table *ctl, int write,
1275			     struct file* filp, void __user *buffer,
1276			     size_t *lenp, loff_t *ppos)
1277{
1278	int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1279
1280	if (write) {
1281		struct ipv4_devconf *cnf = ctl->extra1;
1282		struct net *net = ctl->extra2;
1283		int i = (int *)ctl->data - cnf->data;
1284
1285		set_bit(i, cnf->state);
1286
1287		if (cnf == net->ipv4.devconf_dflt)
1288			devinet_copy_dflt_conf(net, i);
1289	}
1290
1291	return ret;
1292}
1293
1294static int devinet_conf_sysctl(ctl_table *table, int __user *name, int nlen,
1295			       void __user *oldval, size_t __user *oldlenp,
1296			       void __user *newval, size_t newlen)
1297{
1298	struct ipv4_devconf *cnf;
1299	struct net *net;
1300	int *valp = table->data;
1301	int new;
1302	int i;
1303
1304	if (!newval || !newlen)
1305		return 0;
1306
1307	if (newlen != sizeof(int))
1308		return -EINVAL;
1309
1310	if (get_user(new, (int __user *)newval))
1311		return -EFAULT;
1312
1313	if (new == *valp)
1314		return 0;
1315
1316	if (oldval && oldlenp) {
1317		size_t len;
1318
1319		if (get_user(len, oldlenp))
1320			return -EFAULT;
1321
1322		if (len) {
1323			if (len > table->maxlen)
1324				len = table->maxlen;
1325			if (copy_to_user(oldval, valp, len))
1326				return -EFAULT;
1327			if (put_user(len, oldlenp))
1328				return -EFAULT;
1329		}
1330	}
1331
1332	*valp = new;
1333
1334	cnf = table->extra1;
1335	net = table->extra2;
1336	i = (int *)table->data - cnf->data;
1337
1338	set_bit(i, cnf->state);
1339
1340	if (cnf == net->ipv4.devconf_dflt)
1341		devinet_copy_dflt_conf(net, i);
1342
1343	return 1;
1344}
1345
1346static int devinet_sysctl_forward(ctl_table *ctl, int write,
1347				  struct file* filp, void __user *buffer,
1348				  size_t *lenp, loff_t *ppos)
1349{
1350	int *valp = ctl->data;
1351	int val = *valp;
1352	int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1353
1354	if (write && *valp != val) {
1355		struct net *net = ctl->extra2;
1356
1357		if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING))
1358			inet_forward_change(net);
1359		else if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING))
1360			rt_cache_flush(0);
1361	}
1362
1363	return ret;
1364}
1365
1366int ipv4_doint_and_flush(ctl_table *ctl, int write,
1367			 struct file* filp, void __user *buffer,
1368			 size_t *lenp, loff_t *ppos)
1369{
1370	int *valp = ctl->data;
1371	int val = *valp;
1372	int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1373
1374	if (write && *valp != val)
1375		rt_cache_flush(0);
1376
1377	return ret;
1378}
1379
1380int ipv4_doint_and_flush_strategy(ctl_table *table, int __user *name, int nlen,
1381				  void __user *oldval, size_t __user *oldlenp,
1382				  void __user *newval, size_t newlen)
1383{
1384	int ret = devinet_conf_sysctl(table, name, nlen, oldval, oldlenp,
1385				      newval, newlen);
1386
1387	if (ret == 1)
1388		rt_cache_flush(0);
1389
1390	return ret;
1391}
1392
1393
1394#define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc, sysctl) \
1395	{ \
1396		.ctl_name	= NET_IPV4_CONF_ ## attr, \
1397		.procname	= name, \
1398		.data		= ipv4_devconf.data + \
1399				  NET_IPV4_CONF_ ## attr - 1, \
1400		.maxlen		= sizeof(int), \
1401		.mode		= mval, \
1402		.proc_handler	= proc, \
1403		.strategy	= sysctl, \
1404		.extra1		= &ipv4_devconf, \
1405	}
1406
1407#define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
1408	DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc, \
1409			     devinet_conf_sysctl)
1410
1411#define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
1412	DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc, \
1413			     devinet_conf_sysctl)
1414
1415#define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc, sysctl) \
1416	DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc, sysctl)
1417
1418#define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
1419	DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush, \
1420				     ipv4_doint_and_flush_strategy)
1421
1422static struct devinet_sysctl_table {
1423	struct ctl_table_header *sysctl_header;
1424	struct ctl_table devinet_vars[__NET_IPV4_CONF_MAX];
1425	char *dev_name;
1426} devinet_sysctl = {
1427	.devinet_vars = {
1428		DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
1429					     devinet_sysctl_forward,
1430					     devinet_conf_sysctl),
1431		DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
1432
1433		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
1434		DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
1435		DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
1436		DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
1437		DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
1438		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
1439					"accept_source_route"),
1440		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
1441		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
1442		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
1443		DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
1444		DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
1445		DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
1446		DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
1447		DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
1448		DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
1449
1450		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
1451		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
1452		DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
1453					      "force_igmp_version"),
1454		DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
1455					      "promote_secondaries"),
1456	},
1457};
1458
1459static int __devinet_sysctl_register(struct net *net, char *dev_name,
1460		int ctl_name, struct ipv4_devconf *p)
1461{
1462	int i;
1463	struct devinet_sysctl_table *t;
1464
1465#define DEVINET_CTL_PATH_DEV	3
1466
1467	struct ctl_path devinet_ctl_path[] = {
1468		{ .procname = "net", .ctl_name = CTL_NET, },
1469		{ .procname = "ipv4", .ctl_name = NET_IPV4, },
1470		{ .procname = "conf", .ctl_name = NET_IPV4_CONF, },
1471		{ /* to be set */ },
1472		{ },
1473	};
1474
1475	t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
1476	if (!t)
1477		goto out;
1478
1479	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
1480		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
1481		t->devinet_vars[i].extra1 = p;
1482		t->devinet_vars[i].extra2 = net;
1483	}
1484
1485	/*
1486	 * Make a copy of dev_name, because '.procname' is regarded as const
1487	 * by sysctl and we wouldn't want anyone to change it under our feet
1488	 * (see SIOCSIFNAME).
1489	 */
1490	t->dev_name = kstrdup(dev_name, GFP_KERNEL);
1491	if (!t->dev_name)
1492		goto free;
1493
1494	devinet_ctl_path[DEVINET_CTL_PATH_DEV].procname = t->dev_name;
1495	devinet_ctl_path[DEVINET_CTL_PATH_DEV].ctl_name = ctl_name;
1496
1497	t->sysctl_header = register_net_sysctl_table(net, devinet_ctl_path,
1498			t->devinet_vars);
1499	if (!t->sysctl_header)
1500		goto free_procname;
1501
1502	p->sysctl = t;
1503	return 0;
1504
1505free_procname:
1506	kfree(t->dev_name);
1507free:
1508	kfree(t);
1509out:
1510	return -ENOBUFS;
1511}
1512
1513static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
1514{
1515	struct devinet_sysctl_table *t = cnf->sysctl;
1516
1517	if (t == NULL)
1518		return;
1519
1520	cnf->sysctl = NULL;
1521	unregister_sysctl_table(t->sysctl_header);
1522	kfree(t->dev_name);
1523	kfree(t);
1524}
1525
1526static void devinet_sysctl_register(struct in_device *idev)
1527{
1528	neigh_sysctl_register(idev->dev, idev->arp_parms, NET_IPV4,
1529			NET_IPV4_NEIGH, "ipv4", NULL, NULL);
1530	__devinet_sysctl_register(idev->dev->nd_net, idev->dev->name,
1531			idev->dev->ifindex, &idev->cnf);
1532}
1533
1534static void devinet_sysctl_unregister(struct in_device *idev)
1535{
1536	__devinet_sysctl_unregister(&idev->cnf);
1537	neigh_sysctl_unregister(idev->arp_parms);
1538}
1539
1540static struct ctl_table ctl_forward_entry[] = {
1541	{
1542		.ctl_name	= NET_IPV4_FORWARD,
1543		.procname	= "ip_forward",
1544		.data		= &ipv4_devconf.data[
1545					NET_IPV4_CONF_FORWARDING - 1],
1546		.maxlen		= sizeof(int),
1547		.mode		= 0644,
1548		.proc_handler	= devinet_sysctl_forward,
1549		.strategy	= devinet_conf_sysctl,
1550		.extra1		= &ipv4_devconf,
1551		.extra2		= &init_net,
1552	},
1553	{ },
1554};
1555
1556static __net_initdata struct ctl_path net_ipv4_path[] = {
1557	{ .procname = "net", .ctl_name = CTL_NET, },
1558	{ .procname = "ipv4", .ctl_name = NET_IPV4, },
1559	{ },
1560};
1561#endif
1562
1563static __net_init int devinet_init_net(struct net *net)
1564{
1565	int err;
1566	struct ipv4_devconf *all, *dflt;
1567#ifdef CONFIG_SYSCTL
1568	struct ctl_table *tbl = ctl_forward_entry;
1569	struct ctl_table_header *forw_hdr;
1570#endif
1571
1572	err = -ENOMEM;
1573	all = &ipv4_devconf;
1574	dflt = &ipv4_devconf_dflt;
1575
1576	if (net != &init_net) {
1577		all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
1578		if (all == NULL)
1579			goto err_alloc_all;
1580
1581		dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
1582		if (dflt == NULL)
1583			goto err_alloc_dflt;
1584
1585#ifdef CONFIG_SYSCTL
1586		tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
1587		if (tbl == NULL)
1588			goto err_alloc_ctl;
1589
1590		tbl[0].data = &all->data[NET_IPV4_CONF_FORWARDING - 1];
1591		tbl[0].extra1 = all;
1592		tbl[0].extra2 = net;
1593#endif
1594	}
1595
1596#ifdef CONFIG_SYSCTL
1597	err = __devinet_sysctl_register(net, "all",
1598			NET_PROTO_CONF_ALL, all);
1599	if (err < 0)
1600		goto err_reg_all;
1601
1602	err = __devinet_sysctl_register(net, "default",
1603			NET_PROTO_CONF_DEFAULT, dflt);
1604	if (err < 0)
1605		goto err_reg_dflt;
1606
1607	err = -ENOMEM;
1608	forw_hdr = register_net_sysctl_table(net, net_ipv4_path, tbl);
1609	if (forw_hdr == NULL)
1610		goto err_reg_ctl;
1611	net->ipv4.forw_hdr = forw_hdr;
1612#endif
1613
1614	net->ipv4.devconf_all = all;
1615	net->ipv4.devconf_dflt = dflt;
1616	return 0;
1617
1618#ifdef CONFIG_SYSCTL
1619err_reg_ctl:
1620	__devinet_sysctl_unregister(dflt);
1621err_reg_dflt:
1622	__devinet_sysctl_unregister(all);
1623err_reg_all:
1624	if (tbl != ctl_forward_entry)
1625		kfree(tbl);
1626err_alloc_ctl:
1627#endif
1628	if (dflt != &ipv4_devconf_dflt)
1629		kfree(dflt);
1630err_alloc_dflt:
1631	if (all != &ipv4_devconf)
1632		kfree(all);
1633err_alloc_all:
1634	return err;
1635}
1636
1637static __net_exit void devinet_exit_net(struct net *net)
1638{
1639#ifdef CONFIG_SYSCTL
1640	struct ctl_table *tbl;
1641
1642	tbl = net->ipv4.forw_hdr->ctl_table_arg;
1643	unregister_net_sysctl_table(net->ipv4.forw_hdr);
1644	__devinet_sysctl_unregister(net->ipv4.devconf_dflt);
1645	__devinet_sysctl_unregister(net->ipv4.devconf_all);
1646	kfree(tbl);
1647#endif
1648	kfree(net->ipv4.devconf_dflt);
1649	kfree(net->ipv4.devconf_all);
1650}
1651
1652static __net_initdata struct pernet_operations devinet_ops = {
1653	.init = devinet_init_net,
1654	.exit = devinet_exit_net,
1655};
1656
1657void __init devinet_init(void)
1658{
1659	register_pernet_subsys(&devinet_ops);
1660
1661	register_gifconf(PF_INET, inet_gifconf);
1662	register_netdevice_notifier(&ip_netdev_notifier);
1663
1664	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL);
1665	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL);
1666	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr);
1667}
1668
1669EXPORT_SYMBOL(in_dev_finish_destroy);
1670EXPORT_SYMBOL(inet_select_addr);
1671EXPORT_SYMBOL(inetdev_by_index);
1672EXPORT_SYMBOL(register_inetaddr_notifier);
1673EXPORT_SYMBOL(unregister_inetaddr_notifier);
1674