devinet.c revision 9435eb1cf0b76b323019cebf8d16762a50a12a19
1/*
2 *	NET3	IP device support routines.
3 *
4 *		This program is free software; you can redistribute it and/or
5 *		modify it under the terms of the GNU General Public License
6 *		as published by the Free Software Foundation; either version
7 *		2 of the License, or (at your option) any later version.
8 *
9 *	Derived from the IP parts of dev.c 1.0.19
10 * 		Authors:	Ross Biro
11 *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 *				Mark Evans, <evansmp@uhura.aston.ac.uk>
13 *
14 *	Additional Authors:
15 *		Alan Cox, <gw4pts@gw4pts.ampr.org>
16 *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17 *
18 *	Changes:
19 *		Alexey Kuznetsov:	pa_* fields are replaced with ifaddr
20 *					lists.
21 *		Cyrus Durgin:		updated for kmod
22 *		Matthias Andree:	in devinet_ioctl, compare label and
23 *					address (4.4BSD alias style support),
24 *					fall back to comparing just the label
25 *					if no match found.
26 */
27
28
29#include <asm/uaccess.h>
30#include <asm/system.h>
31#include <linux/bitops.h>
32#include <linux/capability.h>
33#include <linux/module.h>
34#include <linux/types.h>
35#include <linux/kernel.h>
36#include <linux/string.h>
37#include <linux/mm.h>
38#include <linux/socket.h>
39#include <linux/sockios.h>
40#include <linux/in.h>
41#include <linux/errno.h>
42#include <linux/interrupt.h>
43#include <linux/if_addr.h>
44#include <linux/if_ether.h>
45#include <linux/inet.h>
46#include <linux/netdevice.h>
47#include <linux/etherdevice.h>
48#include <linux/skbuff.h>
49#include <linux/init.h>
50#include <linux/notifier.h>
51#include <linux/inetdevice.h>
52#include <linux/igmp.h>
53#include <linux/slab.h>
54#include <linux/hash.h>
55#ifdef CONFIG_SYSCTL
56#include <linux/sysctl.h>
57#endif
58#include <linux/kmod.h>
59
60#include <net/arp.h>
61#include <net/ip.h>
62#include <net/route.h>
63#include <net/ip_fib.h>
64#include <net/rtnetlink.h>
65#include <net/net_namespace.h>
66
67static struct ipv4_devconf ipv4_devconf = {
68	.data = {
69		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
70		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
71		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
72		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
73	},
74};
75
76static struct ipv4_devconf ipv4_devconf_dflt = {
77	.data = {
78		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
79		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
80		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
81		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
82		[IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
83	},
84};
85
86#define IPV4_DEVCONF_DFLT(net, attr) \
87	IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
88
89static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
90	[IFA_LOCAL]     	= { .type = NLA_U32 },
91	[IFA_ADDRESS]   	= { .type = NLA_U32 },
92	[IFA_BROADCAST] 	= { .type = NLA_U32 },
93	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
94};
95
96/* inet_addr_hash's shifting is dependent upon this IN4_ADDR_HSIZE
97 * value.  So if you change this define, make appropriate changes to
98 * inet_addr_hash as well.
99 */
100#define IN4_ADDR_HSIZE	256
101static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
102static DEFINE_SPINLOCK(inet_addr_hash_lock);
103
104static inline unsigned int inet_addr_hash(struct net *net, __be32 addr)
105{
106	u32 val = (__force u32) addr ^ hash_ptr(net, 8);
107
108	return ((val ^ (val >> 8) ^ (val >> 16) ^ (val >> 24)) &
109		(IN4_ADDR_HSIZE - 1));
110}
111
112static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
113{
114	unsigned int hash = inet_addr_hash(net, ifa->ifa_address);
115
116	spin_lock(&inet_addr_hash_lock);
117	hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
118	spin_unlock(&inet_addr_hash_lock);
119}
120
121static void inet_hash_remove(struct in_ifaddr *ifa)
122{
123	spin_lock(&inet_addr_hash_lock);
124	hlist_del_init_rcu(&ifa->hash);
125	spin_unlock(&inet_addr_hash_lock);
126}
127
128/**
129 * __ip_dev_find - find the first device with a given source address.
130 * @net: the net namespace
131 * @addr: the source address
132 * @devref: if true, take a reference on the found device
133 *
134 * If a caller uses devref=false, it should be protected by RCU, or RTNL
135 */
136struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
137{
138	unsigned int hash = inet_addr_hash(net, addr);
139	struct net_device *result = NULL;
140	struct in_ifaddr *ifa;
141	struct hlist_node *node;
142
143	rcu_read_lock();
144	hlist_for_each_entry_rcu(ifa, node, &inet_addr_lst[hash], hash) {
145		struct net_device *dev = ifa->ifa_dev->dev;
146
147		if (!net_eq(dev_net(dev), net))
148			continue;
149		if (ifa->ifa_address == addr) {
150			result = dev;
151			break;
152		}
153	}
154	if (result && devref)
155		dev_hold(result);
156	rcu_read_unlock();
157	return result;
158}
159EXPORT_SYMBOL(__ip_dev_find);
160
161static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
162
163static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
164static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
165			 int destroy);
166#ifdef CONFIG_SYSCTL
167static void devinet_sysctl_register(struct in_device *idev);
168static void devinet_sysctl_unregister(struct in_device *idev);
169#else
170static inline void devinet_sysctl_register(struct in_device *idev)
171{
172}
173static inline void devinet_sysctl_unregister(struct in_device *idev)
174{
175}
176#endif
177
178/* Locks all the inet devices. */
179
180static struct in_ifaddr *inet_alloc_ifa(void)
181{
182	return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
183}
184
185static void inet_rcu_free_ifa(struct rcu_head *head)
186{
187	struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
188	if (ifa->ifa_dev)
189		in_dev_put(ifa->ifa_dev);
190	kfree(ifa);
191}
192
193static inline void inet_free_ifa(struct in_ifaddr *ifa)
194{
195	call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
196}
197
198void in_dev_finish_destroy(struct in_device *idev)
199{
200	struct net_device *dev = idev->dev;
201
202	WARN_ON(idev->ifa_list);
203	WARN_ON(idev->mc_list);
204#ifdef NET_REFCNT_DEBUG
205	printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n",
206	       idev, dev ? dev->name : "NIL");
207#endif
208	dev_put(dev);
209	if (!idev->dead)
210		pr_err("Freeing alive in_device %p\n", idev);
211	else
212		kfree(idev);
213}
214EXPORT_SYMBOL(in_dev_finish_destroy);
215
216static struct in_device *inetdev_init(struct net_device *dev)
217{
218	struct in_device *in_dev;
219
220	ASSERT_RTNL();
221
222	in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
223	if (!in_dev)
224		goto out;
225	memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
226			sizeof(in_dev->cnf));
227	in_dev->cnf.sysctl = NULL;
228	in_dev->dev = dev;
229	in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
230	if (!in_dev->arp_parms)
231		goto out_kfree;
232	if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
233		dev_disable_lro(dev);
234	/* Reference in_dev->dev */
235	dev_hold(dev);
236	/* Account for reference dev->ip_ptr (below) */
237	in_dev_hold(in_dev);
238
239	devinet_sysctl_register(in_dev);
240	ip_mc_init_dev(in_dev);
241	if (dev->flags & IFF_UP)
242		ip_mc_up(in_dev);
243
244	/* we can receive as soon as ip_ptr is set -- do this last */
245	rcu_assign_pointer(dev->ip_ptr, in_dev);
246out:
247	return in_dev;
248out_kfree:
249	kfree(in_dev);
250	in_dev = NULL;
251	goto out;
252}
253
254static void in_dev_rcu_put(struct rcu_head *head)
255{
256	struct in_device *idev = container_of(head, struct in_device, rcu_head);
257	in_dev_put(idev);
258}
259
260static void inetdev_destroy(struct in_device *in_dev)
261{
262	struct in_ifaddr *ifa;
263	struct net_device *dev;
264
265	ASSERT_RTNL();
266
267	dev = in_dev->dev;
268
269	in_dev->dead = 1;
270
271	ip_mc_destroy_dev(in_dev);
272
273	while ((ifa = in_dev->ifa_list) != NULL) {
274		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
275		inet_free_ifa(ifa);
276	}
277
278	rcu_assign_pointer(dev->ip_ptr, NULL);
279
280	devinet_sysctl_unregister(in_dev);
281	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
282	arp_ifdown(dev);
283
284	call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
285}
286
287int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
288{
289	rcu_read_lock();
290	for_primary_ifa(in_dev) {
291		if (inet_ifa_match(a, ifa)) {
292			if (!b || inet_ifa_match(b, ifa)) {
293				rcu_read_unlock();
294				return 1;
295			}
296		}
297	} endfor_ifa(in_dev);
298	rcu_read_unlock();
299	return 0;
300}
301
302static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
303			 int destroy, struct nlmsghdr *nlh, u32 pid)
304{
305	struct in_ifaddr *promote = NULL;
306	struct in_ifaddr *ifa, *ifa1 = *ifap;
307	struct in_ifaddr *last_prim = in_dev->ifa_list;
308	struct in_ifaddr *prev_prom = NULL;
309	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
310
311	ASSERT_RTNL();
312
313	/* 1. Deleting primary ifaddr forces deletion all secondaries
314	 * unless alias promotion is set
315	 **/
316
317	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
318		struct in_ifaddr **ifap1 = &ifa1->ifa_next;
319
320		while ((ifa = *ifap1) != NULL) {
321			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
322			    ifa1->ifa_scope <= ifa->ifa_scope)
323				last_prim = ifa;
324
325			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
326			    ifa1->ifa_mask != ifa->ifa_mask ||
327			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
328				ifap1 = &ifa->ifa_next;
329				prev_prom = ifa;
330				continue;
331			}
332
333			if (!do_promote) {
334				inet_hash_remove(ifa);
335				*ifap1 = ifa->ifa_next;
336
337				rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid);
338				blocking_notifier_call_chain(&inetaddr_chain,
339						NETDEV_DOWN, ifa);
340				inet_free_ifa(ifa);
341			} else {
342				promote = ifa;
343				break;
344			}
345		}
346	}
347
348	/* 2. Unlink it */
349
350	*ifap = ifa1->ifa_next;
351	inet_hash_remove(ifa1);
352
353	/* 3. Announce address deletion */
354
355	/* Send message first, then call notifier.
356	   At first sight, FIB update triggered by notifier
357	   will refer to already deleted ifaddr, that could confuse
358	   netlink listeners. It is not true: look, gated sees
359	   that route deleted and if it still thinks that ifaddr
360	   is valid, it will try to restore deleted routes... Grr.
361	   So that, this order is correct.
362	 */
363	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid);
364	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
365
366	if (promote) {
367
368		if (prev_prom) {
369			prev_prom->ifa_next = promote->ifa_next;
370			promote->ifa_next = last_prim->ifa_next;
371			last_prim->ifa_next = promote;
372		}
373
374		promote->ifa_flags &= ~IFA_F_SECONDARY;
375		rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
376		blocking_notifier_call_chain(&inetaddr_chain,
377				NETDEV_UP, promote);
378		for (ifa = promote->ifa_next; ifa; ifa = ifa->ifa_next) {
379			if (ifa1->ifa_mask != ifa->ifa_mask ||
380			    !inet_ifa_match(ifa1->ifa_address, ifa))
381					continue;
382			fib_add_ifaddr(ifa);
383		}
384
385	}
386	if (destroy)
387		inet_free_ifa(ifa1);
388}
389
390static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
391			 int destroy)
392{
393	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
394}
395
396static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
397			     u32 pid)
398{
399	struct in_device *in_dev = ifa->ifa_dev;
400	struct in_ifaddr *ifa1, **ifap, **last_primary;
401
402	ASSERT_RTNL();
403
404	if (!ifa->ifa_local) {
405		inet_free_ifa(ifa);
406		return 0;
407	}
408
409	ifa->ifa_flags &= ~IFA_F_SECONDARY;
410	last_primary = &in_dev->ifa_list;
411
412	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
413	     ifap = &ifa1->ifa_next) {
414		if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
415		    ifa->ifa_scope <= ifa1->ifa_scope)
416			last_primary = &ifa1->ifa_next;
417		if (ifa1->ifa_mask == ifa->ifa_mask &&
418		    inet_ifa_match(ifa1->ifa_address, ifa)) {
419			if (ifa1->ifa_local == ifa->ifa_local) {
420				inet_free_ifa(ifa);
421				return -EEXIST;
422			}
423			if (ifa1->ifa_scope != ifa->ifa_scope) {
424				inet_free_ifa(ifa);
425				return -EINVAL;
426			}
427			ifa->ifa_flags |= IFA_F_SECONDARY;
428		}
429	}
430
431	if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
432		net_srandom(ifa->ifa_local);
433		ifap = last_primary;
434	}
435
436	ifa->ifa_next = *ifap;
437	*ifap = ifa;
438
439	inet_hash_insert(dev_net(in_dev->dev), ifa);
440
441	/* Send message first, then call notifier.
442	   Notifier will trigger FIB update, so that
443	   listeners of netlink will know about new ifaddr */
444	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid);
445	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
446
447	return 0;
448}
449
450static int inet_insert_ifa(struct in_ifaddr *ifa)
451{
452	return __inet_insert_ifa(ifa, NULL, 0);
453}
454
455static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
456{
457	struct in_device *in_dev = __in_dev_get_rtnl(dev);
458
459	ASSERT_RTNL();
460
461	if (!in_dev) {
462		inet_free_ifa(ifa);
463		return -ENOBUFS;
464	}
465	ipv4_devconf_setall(in_dev);
466	if (ifa->ifa_dev != in_dev) {
467		WARN_ON(ifa->ifa_dev);
468		in_dev_hold(in_dev);
469		ifa->ifa_dev = in_dev;
470	}
471	if (ipv4_is_loopback(ifa->ifa_local))
472		ifa->ifa_scope = RT_SCOPE_HOST;
473	return inet_insert_ifa(ifa);
474}
475
476/* Caller must hold RCU or RTNL :
477 * We dont take a reference on found in_device
478 */
479struct in_device *inetdev_by_index(struct net *net, int ifindex)
480{
481	struct net_device *dev;
482	struct in_device *in_dev = NULL;
483
484	rcu_read_lock();
485	dev = dev_get_by_index_rcu(net, ifindex);
486	if (dev)
487		in_dev = rcu_dereference_rtnl(dev->ip_ptr);
488	rcu_read_unlock();
489	return in_dev;
490}
491EXPORT_SYMBOL(inetdev_by_index);
492
493/* Called only from RTNL semaphored context. No locks. */
494
495struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
496				    __be32 mask)
497{
498	ASSERT_RTNL();
499
500	for_primary_ifa(in_dev) {
501		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
502			return ifa;
503	} endfor_ifa(in_dev);
504	return NULL;
505}
506
507static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
508{
509	struct net *net = sock_net(skb->sk);
510	struct nlattr *tb[IFA_MAX+1];
511	struct in_device *in_dev;
512	struct ifaddrmsg *ifm;
513	struct in_ifaddr *ifa, **ifap;
514	int err = -EINVAL;
515
516	ASSERT_RTNL();
517
518	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
519	if (err < 0)
520		goto errout;
521
522	ifm = nlmsg_data(nlh);
523	in_dev = inetdev_by_index(net, ifm->ifa_index);
524	if (in_dev == NULL) {
525		err = -ENODEV;
526		goto errout;
527	}
528
529	for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
530	     ifap = &ifa->ifa_next) {
531		if (tb[IFA_LOCAL] &&
532		    ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
533			continue;
534
535		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
536			continue;
537
538		if (tb[IFA_ADDRESS] &&
539		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
540		    !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
541			continue;
542
543		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid);
544		return 0;
545	}
546
547	err = -EADDRNOTAVAIL;
548errout:
549	return err;
550}
551
552static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
553{
554	struct nlattr *tb[IFA_MAX+1];
555	struct in_ifaddr *ifa;
556	struct ifaddrmsg *ifm;
557	struct net_device *dev;
558	struct in_device *in_dev;
559	int err;
560
561	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
562	if (err < 0)
563		goto errout;
564
565	ifm = nlmsg_data(nlh);
566	err = -EINVAL;
567	if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
568		goto errout;
569
570	dev = __dev_get_by_index(net, ifm->ifa_index);
571	err = -ENODEV;
572	if (dev == NULL)
573		goto errout;
574
575	in_dev = __in_dev_get_rtnl(dev);
576	err = -ENOBUFS;
577	if (in_dev == NULL)
578		goto errout;
579
580	ifa = inet_alloc_ifa();
581	if (ifa == NULL)
582		/*
583		 * A potential indev allocation can be left alive, it stays
584		 * assigned to its device and is destroy with it.
585		 */
586		goto errout;
587
588	ipv4_devconf_setall(in_dev);
589	in_dev_hold(in_dev);
590
591	if (tb[IFA_ADDRESS] == NULL)
592		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
593
594	INIT_HLIST_NODE(&ifa->hash);
595	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
596	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
597	ifa->ifa_flags = ifm->ifa_flags;
598	ifa->ifa_scope = ifm->ifa_scope;
599	ifa->ifa_dev = in_dev;
600
601	ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
602	ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
603
604	if (tb[IFA_BROADCAST])
605		ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
606
607	if (tb[IFA_LABEL])
608		nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
609	else
610		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
611
612	return ifa;
613
614errout:
615	return ERR_PTR(err);
616}
617
618static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
619{
620	struct net *net = sock_net(skb->sk);
621	struct in_ifaddr *ifa;
622
623	ASSERT_RTNL();
624
625	ifa = rtm_to_ifaddr(net, nlh);
626	if (IS_ERR(ifa))
627		return PTR_ERR(ifa);
628
629	return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid);
630}
631
632/*
633 *	Determine a default network mask, based on the IP address.
634 */
635
636static inline int inet_abc_len(__be32 addr)
637{
638	int rc = -1;	/* Something else, probably a multicast. */
639
640	if (ipv4_is_zeronet(addr))
641		rc = 0;
642	else {
643		__u32 haddr = ntohl(addr);
644
645		if (IN_CLASSA(haddr))
646			rc = 8;
647		else if (IN_CLASSB(haddr))
648			rc = 16;
649		else if (IN_CLASSC(haddr))
650			rc = 24;
651	}
652
653	return rc;
654}
655
656
657int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
658{
659	struct ifreq ifr;
660	struct sockaddr_in sin_orig;
661	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
662	struct in_device *in_dev;
663	struct in_ifaddr **ifap = NULL;
664	struct in_ifaddr *ifa = NULL;
665	struct net_device *dev;
666	char *colon;
667	int ret = -EFAULT;
668	int tryaddrmatch = 0;
669
670	/*
671	 *	Fetch the caller's info block into kernel space
672	 */
673
674	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
675		goto out;
676	ifr.ifr_name[IFNAMSIZ - 1] = 0;
677
678	/* save original address for comparison */
679	memcpy(&sin_orig, sin, sizeof(*sin));
680
681	colon = strchr(ifr.ifr_name, ':');
682	if (colon)
683		*colon = 0;
684
685	dev_load(net, ifr.ifr_name);
686
687	switch (cmd) {
688	case SIOCGIFADDR:	/* Get interface address */
689	case SIOCGIFBRDADDR:	/* Get the broadcast address */
690	case SIOCGIFDSTADDR:	/* Get the destination address */
691	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
692		/* Note that these ioctls will not sleep,
693		   so that we do not impose a lock.
694		   One day we will be forced to put shlock here (I mean SMP)
695		 */
696		tryaddrmatch = (sin_orig.sin_family == AF_INET);
697		memset(sin, 0, sizeof(*sin));
698		sin->sin_family = AF_INET;
699		break;
700
701	case SIOCSIFFLAGS:
702		ret = -EACCES;
703		if (!capable(CAP_NET_ADMIN))
704			goto out;
705		break;
706	case SIOCSIFADDR:	/* Set interface address (and family) */
707	case SIOCSIFBRDADDR:	/* Set the broadcast address */
708	case SIOCSIFDSTADDR:	/* Set the destination address */
709	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
710		ret = -EACCES;
711		if (!capable(CAP_NET_ADMIN))
712			goto out;
713		ret = -EINVAL;
714		if (sin->sin_family != AF_INET)
715			goto out;
716		break;
717	default:
718		ret = -EINVAL;
719		goto out;
720	}
721
722	rtnl_lock();
723
724	ret = -ENODEV;
725	dev = __dev_get_by_name(net, ifr.ifr_name);
726	if (!dev)
727		goto done;
728
729	if (colon)
730		*colon = ':';
731
732	in_dev = __in_dev_get_rtnl(dev);
733	if (in_dev) {
734		if (tryaddrmatch) {
735			/* Matthias Andree */
736			/* compare label and address (4.4BSD style) */
737			/* note: we only do this for a limited set of ioctls
738			   and only if the original address family was AF_INET.
739			   This is checked above. */
740			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
741			     ifap = &ifa->ifa_next) {
742				if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
743				    sin_orig.sin_addr.s_addr ==
744							ifa->ifa_address) {
745					break; /* found */
746				}
747			}
748		}
749		/* we didn't get a match, maybe the application is
750		   4.3BSD-style and passed in junk so we fall back to
751		   comparing just the label */
752		if (!ifa) {
753			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
754			     ifap = &ifa->ifa_next)
755				if (!strcmp(ifr.ifr_name, ifa->ifa_label))
756					break;
757		}
758	}
759
760	ret = -EADDRNOTAVAIL;
761	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
762		goto done;
763
764	switch (cmd) {
765	case SIOCGIFADDR:	/* Get interface address */
766		sin->sin_addr.s_addr = ifa->ifa_local;
767		goto rarok;
768
769	case SIOCGIFBRDADDR:	/* Get the broadcast address */
770		sin->sin_addr.s_addr = ifa->ifa_broadcast;
771		goto rarok;
772
773	case SIOCGIFDSTADDR:	/* Get the destination address */
774		sin->sin_addr.s_addr = ifa->ifa_address;
775		goto rarok;
776
777	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
778		sin->sin_addr.s_addr = ifa->ifa_mask;
779		goto rarok;
780
781	case SIOCSIFFLAGS:
782		if (colon) {
783			ret = -EADDRNOTAVAIL;
784			if (!ifa)
785				break;
786			ret = 0;
787			if (!(ifr.ifr_flags & IFF_UP))
788				inet_del_ifa(in_dev, ifap, 1);
789			break;
790		}
791		ret = dev_change_flags(dev, ifr.ifr_flags);
792		break;
793
794	case SIOCSIFADDR:	/* Set interface address (and family) */
795		ret = -EINVAL;
796		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
797			break;
798
799		if (!ifa) {
800			ret = -ENOBUFS;
801			ifa = inet_alloc_ifa();
802			INIT_HLIST_NODE(&ifa->hash);
803			if (!ifa)
804				break;
805			if (colon)
806				memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
807			else
808				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
809		} else {
810			ret = 0;
811			if (ifa->ifa_local == sin->sin_addr.s_addr)
812				break;
813			inet_del_ifa(in_dev, ifap, 0);
814			ifa->ifa_broadcast = 0;
815			ifa->ifa_scope = 0;
816		}
817
818		ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
819
820		if (!(dev->flags & IFF_POINTOPOINT)) {
821			ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
822			ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
823			if ((dev->flags & IFF_BROADCAST) &&
824			    ifa->ifa_prefixlen < 31)
825				ifa->ifa_broadcast = ifa->ifa_address |
826						     ~ifa->ifa_mask;
827		} else {
828			ifa->ifa_prefixlen = 32;
829			ifa->ifa_mask = inet_make_mask(32);
830		}
831		ret = inet_set_ifa(dev, ifa);
832		break;
833
834	case SIOCSIFBRDADDR:	/* Set the broadcast address */
835		ret = 0;
836		if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
837			inet_del_ifa(in_dev, ifap, 0);
838			ifa->ifa_broadcast = sin->sin_addr.s_addr;
839			inet_insert_ifa(ifa);
840		}
841		break;
842
843	case SIOCSIFDSTADDR:	/* Set the destination address */
844		ret = 0;
845		if (ifa->ifa_address == sin->sin_addr.s_addr)
846			break;
847		ret = -EINVAL;
848		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
849			break;
850		ret = 0;
851		inet_del_ifa(in_dev, ifap, 0);
852		ifa->ifa_address = sin->sin_addr.s_addr;
853		inet_insert_ifa(ifa);
854		break;
855
856	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
857
858		/*
859		 *	The mask we set must be legal.
860		 */
861		ret = -EINVAL;
862		if (bad_mask(sin->sin_addr.s_addr, 0))
863			break;
864		ret = 0;
865		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
866			__be32 old_mask = ifa->ifa_mask;
867			inet_del_ifa(in_dev, ifap, 0);
868			ifa->ifa_mask = sin->sin_addr.s_addr;
869			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
870
871			/* See if current broadcast address matches
872			 * with current netmask, then recalculate
873			 * the broadcast address. Otherwise it's a
874			 * funny address, so don't touch it since
875			 * the user seems to know what (s)he's doing...
876			 */
877			if ((dev->flags & IFF_BROADCAST) &&
878			    (ifa->ifa_prefixlen < 31) &&
879			    (ifa->ifa_broadcast ==
880			     (ifa->ifa_local|~old_mask))) {
881				ifa->ifa_broadcast = (ifa->ifa_local |
882						      ~sin->sin_addr.s_addr);
883			}
884			inet_insert_ifa(ifa);
885		}
886		break;
887	}
888done:
889	rtnl_unlock();
890out:
891	return ret;
892rarok:
893	rtnl_unlock();
894	ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
895	goto out;
896}
897
898static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
899{
900	struct in_device *in_dev = __in_dev_get_rtnl(dev);
901	struct in_ifaddr *ifa;
902	struct ifreq ifr;
903	int done = 0;
904
905	if (!in_dev)
906		goto out;
907
908	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
909		if (!buf) {
910			done += sizeof(ifr);
911			continue;
912		}
913		if (len < (int) sizeof(ifr))
914			break;
915		memset(&ifr, 0, sizeof(struct ifreq));
916		if (ifa->ifa_label)
917			strcpy(ifr.ifr_name, ifa->ifa_label);
918		else
919			strcpy(ifr.ifr_name, dev->name);
920
921		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
922		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
923								ifa->ifa_local;
924
925		if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
926			done = -EFAULT;
927			break;
928		}
929		buf  += sizeof(struct ifreq);
930		len  -= sizeof(struct ifreq);
931		done += sizeof(struct ifreq);
932	}
933out:
934	return done;
935}
936
937__be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
938{
939	__be32 addr = 0;
940	struct in_device *in_dev;
941	struct net *net = dev_net(dev);
942
943	rcu_read_lock();
944	in_dev = __in_dev_get_rcu(dev);
945	if (!in_dev)
946		goto no_in_dev;
947
948	for_primary_ifa(in_dev) {
949		if (ifa->ifa_scope > scope)
950			continue;
951		if (!dst || inet_ifa_match(dst, ifa)) {
952			addr = ifa->ifa_local;
953			break;
954		}
955		if (!addr)
956			addr = ifa->ifa_local;
957	} endfor_ifa(in_dev);
958
959	if (addr)
960		goto out_unlock;
961no_in_dev:
962
963	/* Not loopback addresses on loopback should be preferred
964	   in this case. It is importnat that lo is the first interface
965	   in dev_base list.
966	 */
967	for_each_netdev_rcu(net, dev) {
968		in_dev = __in_dev_get_rcu(dev);
969		if (!in_dev)
970			continue;
971
972		for_primary_ifa(in_dev) {
973			if (ifa->ifa_scope != RT_SCOPE_LINK &&
974			    ifa->ifa_scope <= scope) {
975				addr = ifa->ifa_local;
976				goto out_unlock;
977			}
978		} endfor_ifa(in_dev);
979	}
980out_unlock:
981	rcu_read_unlock();
982	return addr;
983}
984EXPORT_SYMBOL(inet_select_addr);
985
986static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
987			      __be32 local, int scope)
988{
989	int same = 0;
990	__be32 addr = 0;
991
992	for_ifa(in_dev) {
993		if (!addr &&
994		    (local == ifa->ifa_local || !local) &&
995		    ifa->ifa_scope <= scope) {
996			addr = ifa->ifa_local;
997			if (same)
998				break;
999		}
1000		if (!same) {
1001			same = (!local || inet_ifa_match(local, ifa)) &&
1002				(!dst || inet_ifa_match(dst, ifa));
1003			if (same && addr) {
1004				if (local || !dst)
1005					break;
1006				/* Is the selected addr into dst subnet? */
1007				if (inet_ifa_match(addr, ifa))
1008					break;
1009				/* No, then can we use new local src? */
1010				if (ifa->ifa_scope <= scope) {
1011					addr = ifa->ifa_local;
1012					break;
1013				}
1014				/* search for large dst subnet for addr */
1015				same = 0;
1016			}
1017		}
1018	} endfor_ifa(in_dev);
1019
1020	return same ? addr : 0;
1021}
1022
1023/*
1024 * Confirm that local IP address exists using wildcards:
1025 * - in_dev: only on this interface, 0=any interface
1026 * - dst: only in the same subnet as dst, 0=any dst
1027 * - local: address, 0=autoselect the local address
1028 * - scope: maximum allowed scope value for the local address
1029 */
1030__be32 inet_confirm_addr(struct in_device *in_dev,
1031			 __be32 dst, __be32 local, int scope)
1032{
1033	__be32 addr = 0;
1034	struct net_device *dev;
1035	struct net *net;
1036
1037	if (scope != RT_SCOPE_LINK)
1038		return confirm_addr_indev(in_dev, dst, local, scope);
1039
1040	net = dev_net(in_dev->dev);
1041	rcu_read_lock();
1042	for_each_netdev_rcu(net, dev) {
1043		in_dev = __in_dev_get_rcu(dev);
1044		if (in_dev) {
1045			addr = confirm_addr_indev(in_dev, dst, local, scope);
1046			if (addr)
1047				break;
1048		}
1049	}
1050	rcu_read_unlock();
1051
1052	return addr;
1053}
1054
1055/*
1056 *	Device notifier
1057 */
1058
1059int register_inetaddr_notifier(struct notifier_block *nb)
1060{
1061	return blocking_notifier_chain_register(&inetaddr_chain, nb);
1062}
1063EXPORT_SYMBOL(register_inetaddr_notifier);
1064
1065int unregister_inetaddr_notifier(struct notifier_block *nb)
1066{
1067	return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1068}
1069EXPORT_SYMBOL(unregister_inetaddr_notifier);
1070
1071/* Rename ifa_labels for a device name change. Make some effort to preserve
1072 * existing alias numbering and to create unique labels if possible.
1073*/
1074static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1075{
1076	struct in_ifaddr *ifa;
1077	int named = 0;
1078
1079	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1080		char old[IFNAMSIZ], *dot;
1081
1082		memcpy(old, ifa->ifa_label, IFNAMSIZ);
1083		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1084		if (named++ == 0)
1085			goto skip;
1086		dot = strchr(old, ':');
1087		if (dot == NULL) {
1088			sprintf(old, ":%d", named);
1089			dot = old;
1090		}
1091		if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1092			strcat(ifa->ifa_label, dot);
1093		else
1094			strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1095skip:
1096		rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1097	}
1098}
1099
1100static inline bool inetdev_valid_mtu(unsigned mtu)
1101{
1102	return mtu >= 68;
1103}
1104
1105/* Called only under RTNL semaphore */
1106
1107static int inetdev_event(struct notifier_block *this, unsigned long event,
1108			 void *ptr)
1109{
1110	struct net_device *dev = ptr;
1111	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1112
1113	ASSERT_RTNL();
1114
1115	if (!in_dev) {
1116		if (event == NETDEV_REGISTER) {
1117			in_dev = inetdev_init(dev);
1118			if (!in_dev)
1119				return notifier_from_errno(-ENOMEM);
1120			if (dev->flags & IFF_LOOPBACK) {
1121				IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1122				IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1123			}
1124		} else if (event == NETDEV_CHANGEMTU) {
1125			/* Re-enabling IP */
1126			if (inetdev_valid_mtu(dev->mtu))
1127				in_dev = inetdev_init(dev);
1128		}
1129		goto out;
1130	}
1131
1132	switch (event) {
1133	case NETDEV_REGISTER:
1134		printk(KERN_DEBUG "inetdev_event: bug\n");
1135		rcu_assign_pointer(dev->ip_ptr, NULL);
1136		break;
1137	case NETDEV_UP:
1138		if (!inetdev_valid_mtu(dev->mtu))
1139			break;
1140		if (dev->flags & IFF_LOOPBACK) {
1141			struct in_ifaddr *ifa = inet_alloc_ifa();
1142
1143			if (ifa) {
1144				INIT_HLIST_NODE(&ifa->hash);
1145				ifa->ifa_local =
1146				  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1147				ifa->ifa_prefixlen = 8;
1148				ifa->ifa_mask = inet_make_mask(8);
1149				in_dev_hold(in_dev);
1150				ifa->ifa_dev = in_dev;
1151				ifa->ifa_scope = RT_SCOPE_HOST;
1152				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1153				inet_insert_ifa(ifa);
1154			}
1155		}
1156		ip_mc_up(in_dev);
1157		/* fall through */
1158	case NETDEV_NOTIFY_PEERS:
1159	case NETDEV_CHANGEADDR:
1160		/* Send gratuitous ARP to notify of link change */
1161		if (IN_DEV_ARP_NOTIFY(in_dev)) {
1162			struct in_ifaddr *ifa = in_dev->ifa_list;
1163
1164			if (ifa)
1165				arp_send(ARPOP_REQUEST, ETH_P_ARP,
1166					 ifa->ifa_address, dev,
1167					 ifa->ifa_address, NULL,
1168					 dev->dev_addr, NULL);
1169		}
1170		break;
1171	case NETDEV_DOWN:
1172		ip_mc_down(in_dev);
1173		break;
1174	case NETDEV_PRE_TYPE_CHANGE:
1175		ip_mc_unmap(in_dev);
1176		break;
1177	case NETDEV_POST_TYPE_CHANGE:
1178		ip_mc_remap(in_dev);
1179		break;
1180	case NETDEV_CHANGEMTU:
1181		if (inetdev_valid_mtu(dev->mtu))
1182			break;
1183		/* disable IP when MTU is not enough */
1184	case NETDEV_UNREGISTER:
1185		inetdev_destroy(in_dev);
1186		break;
1187	case NETDEV_CHANGENAME:
1188		/* Do not notify about label change, this event is
1189		 * not interesting to applications using netlink.
1190		 */
1191		inetdev_changename(dev, in_dev);
1192
1193		devinet_sysctl_unregister(in_dev);
1194		devinet_sysctl_register(in_dev);
1195		break;
1196	}
1197out:
1198	return NOTIFY_DONE;
1199}
1200
1201static struct notifier_block ip_netdev_notifier = {
1202	.notifier_call = inetdev_event,
1203};
1204
1205static inline size_t inet_nlmsg_size(void)
1206{
1207	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1208	       + nla_total_size(4) /* IFA_ADDRESS */
1209	       + nla_total_size(4) /* IFA_LOCAL */
1210	       + nla_total_size(4) /* IFA_BROADCAST */
1211	       + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1212}
1213
1214static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1215			    u32 pid, u32 seq, int event, unsigned int flags)
1216{
1217	struct ifaddrmsg *ifm;
1218	struct nlmsghdr  *nlh;
1219
1220	nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags);
1221	if (nlh == NULL)
1222		return -EMSGSIZE;
1223
1224	ifm = nlmsg_data(nlh);
1225	ifm->ifa_family = AF_INET;
1226	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1227	ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
1228	ifm->ifa_scope = ifa->ifa_scope;
1229	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1230
1231	if (ifa->ifa_address)
1232		NLA_PUT_BE32(skb, IFA_ADDRESS, ifa->ifa_address);
1233
1234	if (ifa->ifa_local)
1235		NLA_PUT_BE32(skb, IFA_LOCAL, ifa->ifa_local);
1236
1237	if (ifa->ifa_broadcast)
1238		NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast);
1239
1240	if (ifa->ifa_label[0])
1241		NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label);
1242
1243	return nlmsg_end(skb, nlh);
1244
1245nla_put_failure:
1246	nlmsg_cancel(skb, nlh);
1247	return -EMSGSIZE;
1248}
1249
1250static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1251{
1252	struct net *net = sock_net(skb->sk);
1253	int h, s_h;
1254	int idx, s_idx;
1255	int ip_idx, s_ip_idx;
1256	struct net_device *dev;
1257	struct in_device *in_dev;
1258	struct in_ifaddr *ifa;
1259	struct hlist_head *head;
1260	struct hlist_node *node;
1261
1262	s_h = cb->args[0];
1263	s_idx = idx = cb->args[1];
1264	s_ip_idx = ip_idx = cb->args[2];
1265
1266	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1267		idx = 0;
1268		head = &net->dev_index_head[h];
1269		rcu_read_lock();
1270		hlist_for_each_entry_rcu(dev, node, head, index_hlist) {
1271			if (idx < s_idx)
1272				goto cont;
1273			if (h > s_h || idx > s_idx)
1274				s_ip_idx = 0;
1275			in_dev = __in_dev_get_rcu(dev);
1276			if (!in_dev)
1277				goto cont;
1278
1279			for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1280			     ifa = ifa->ifa_next, ip_idx++) {
1281				if (ip_idx < s_ip_idx)
1282					continue;
1283				if (inet_fill_ifaddr(skb, ifa,
1284					     NETLINK_CB(cb->skb).pid,
1285					     cb->nlh->nlmsg_seq,
1286					     RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1287					rcu_read_unlock();
1288					goto done;
1289				}
1290			}
1291cont:
1292			idx++;
1293		}
1294		rcu_read_unlock();
1295	}
1296
1297done:
1298	cb->args[0] = h;
1299	cb->args[1] = idx;
1300	cb->args[2] = ip_idx;
1301
1302	return skb->len;
1303}
1304
1305static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1306		      u32 pid)
1307{
1308	struct sk_buff *skb;
1309	u32 seq = nlh ? nlh->nlmsg_seq : 0;
1310	int err = -ENOBUFS;
1311	struct net *net;
1312
1313	net = dev_net(ifa->ifa_dev->dev);
1314	skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1315	if (skb == NULL)
1316		goto errout;
1317
1318	err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0);
1319	if (err < 0) {
1320		/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1321		WARN_ON(err == -EMSGSIZE);
1322		kfree_skb(skb);
1323		goto errout;
1324	}
1325	rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1326	return;
1327errout:
1328	if (err < 0)
1329		rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1330}
1331
1332static size_t inet_get_link_af_size(const struct net_device *dev)
1333{
1334	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1335
1336	if (!in_dev)
1337		return 0;
1338
1339	return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1340}
1341
1342static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
1343{
1344	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1345	struct nlattr *nla;
1346	int i;
1347
1348	if (!in_dev)
1349		return -ENODATA;
1350
1351	nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1352	if (nla == NULL)
1353		return -EMSGSIZE;
1354
1355	for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1356		((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1357
1358	return 0;
1359}
1360
1361static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1362	[IFLA_INET_CONF]	= { .type = NLA_NESTED },
1363};
1364
1365static int inet_validate_link_af(const struct net_device *dev,
1366				 const struct nlattr *nla)
1367{
1368	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1369	int err, rem;
1370
1371	if (dev && !__in_dev_get_rtnl(dev))
1372		return -EAFNOSUPPORT;
1373
1374	err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
1375	if (err < 0)
1376		return err;
1377
1378	if (tb[IFLA_INET_CONF]) {
1379		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1380			int cfgid = nla_type(a);
1381
1382			if (nla_len(a) < 4)
1383				return -EINVAL;
1384
1385			if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1386				return -EINVAL;
1387		}
1388	}
1389
1390	return 0;
1391}
1392
1393static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1394{
1395	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1396	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1397	int rem;
1398
1399	if (!in_dev)
1400		return -EAFNOSUPPORT;
1401
1402	if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
1403		BUG();
1404
1405	if (tb[IFLA_INET_CONF]) {
1406		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1407			ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1408	}
1409
1410	return 0;
1411}
1412
1413#ifdef CONFIG_SYSCTL
1414
1415static void devinet_copy_dflt_conf(struct net *net, int i)
1416{
1417	struct net_device *dev;
1418
1419	rcu_read_lock();
1420	for_each_netdev_rcu(net, dev) {
1421		struct in_device *in_dev;
1422
1423		in_dev = __in_dev_get_rcu(dev);
1424		if (in_dev && !test_bit(i, in_dev->cnf.state))
1425			in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1426	}
1427	rcu_read_unlock();
1428}
1429
1430/* called with RTNL locked */
1431static void inet_forward_change(struct net *net)
1432{
1433	struct net_device *dev;
1434	int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1435
1436	IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1437	IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1438
1439	for_each_netdev(net, dev) {
1440		struct in_device *in_dev;
1441		if (on)
1442			dev_disable_lro(dev);
1443		rcu_read_lock();
1444		in_dev = __in_dev_get_rcu(dev);
1445		if (in_dev)
1446			IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1447		rcu_read_unlock();
1448	}
1449}
1450
1451static int devinet_conf_proc(ctl_table *ctl, int write,
1452			     void __user *buffer,
1453			     size_t *lenp, loff_t *ppos)
1454{
1455	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1456
1457	if (write) {
1458		struct ipv4_devconf *cnf = ctl->extra1;
1459		struct net *net = ctl->extra2;
1460		int i = (int *)ctl->data - cnf->data;
1461
1462		set_bit(i, cnf->state);
1463
1464		if (cnf == net->ipv4.devconf_dflt)
1465			devinet_copy_dflt_conf(net, i);
1466	}
1467
1468	return ret;
1469}
1470
1471static int devinet_sysctl_forward(ctl_table *ctl, int write,
1472				  void __user *buffer,
1473				  size_t *lenp, loff_t *ppos)
1474{
1475	int *valp = ctl->data;
1476	int val = *valp;
1477	loff_t pos = *ppos;
1478	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1479
1480	if (write && *valp != val) {
1481		struct net *net = ctl->extra2;
1482
1483		if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
1484			if (!rtnl_trylock()) {
1485				/* Restore the original values before restarting */
1486				*valp = val;
1487				*ppos = pos;
1488				return restart_syscall();
1489			}
1490			if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
1491				inet_forward_change(net);
1492			} else if (*valp) {
1493				struct ipv4_devconf *cnf = ctl->extra1;
1494				struct in_device *idev =
1495					container_of(cnf, struct in_device, cnf);
1496				dev_disable_lro(idev->dev);
1497			}
1498			rtnl_unlock();
1499			rt_cache_flush(net, 0);
1500		}
1501	}
1502
1503	return ret;
1504}
1505
1506static int ipv4_doint_and_flush(ctl_table *ctl, int write,
1507				void __user *buffer,
1508				size_t *lenp, loff_t *ppos)
1509{
1510	int *valp = ctl->data;
1511	int val = *valp;
1512	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1513	struct net *net = ctl->extra2;
1514
1515	if (write && *valp != val)
1516		rt_cache_flush(net, 0);
1517
1518	return ret;
1519}
1520
1521#define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
1522	{ \
1523		.procname	= name, \
1524		.data		= ipv4_devconf.data + \
1525				  IPV4_DEVCONF_ ## attr - 1, \
1526		.maxlen		= sizeof(int), \
1527		.mode		= mval, \
1528		.proc_handler	= proc, \
1529		.extra1		= &ipv4_devconf, \
1530	}
1531
1532#define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
1533	DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
1534
1535#define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
1536	DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
1537
1538#define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
1539	DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
1540
1541#define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
1542	DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
1543
1544static struct devinet_sysctl_table {
1545	struct ctl_table_header *sysctl_header;
1546	struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
1547	char *dev_name;
1548} devinet_sysctl = {
1549	.devinet_vars = {
1550		DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
1551					     devinet_sysctl_forward),
1552		DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
1553
1554		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
1555		DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
1556		DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
1557		DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
1558		DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
1559		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
1560					"accept_source_route"),
1561		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
1562		DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
1563		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
1564		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
1565		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
1566		DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
1567		DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
1568		DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
1569		DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
1570		DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
1571		DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
1572		DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
1573		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
1574
1575		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
1576		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
1577		DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
1578					      "force_igmp_version"),
1579		DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
1580					      "promote_secondaries"),
1581	},
1582};
1583
1584static int __devinet_sysctl_register(struct net *net, char *dev_name,
1585					struct ipv4_devconf *p)
1586{
1587	int i;
1588	struct devinet_sysctl_table *t;
1589
1590#define DEVINET_CTL_PATH_DEV	3
1591
1592	struct ctl_path devinet_ctl_path[] = {
1593		{ .procname = "net",  },
1594		{ .procname = "ipv4", },
1595		{ .procname = "conf", },
1596		{ /* to be set */ },
1597		{ },
1598	};
1599
1600	t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
1601	if (!t)
1602		goto out;
1603
1604	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
1605		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
1606		t->devinet_vars[i].extra1 = p;
1607		t->devinet_vars[i].extra2 = net;
1608	}
1609
1610	/*
1611	 * Make a copy of dev_name, because '.procname' is regarded as const
1612	 * by sysctl and we wouldn't want anyone to change it under our feet
1613	 * (see SIOCSIFNAME).
1614	 */
1615	t->dev_name = kstrdup(dev_name, GFP_KERNEL);
1616	if (!t->dev_name)
1617		goto free;
1618
1619	devinet_ctl_path[DEVINET_CTL_PATH_DEV].procname = t->dev_name;
1620
1621	t->sysctl_header = register_net_sysctl_table(net, devinet_ctl_path,
1622			t->devinet_vars);
1623	if (!t->sysctl_header)
1624		goto free_procname;
1625
1626	p->sysctl = t;
1627	return 0;
1628
1629free_procname:
1630	kfree(t->dev_name);
1631free:
1632	kfree(t);
1633out:
1634	return -ENOBUFS;
1635}
1636
1637static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
1638{
1639	struct devinet_sysctl_table *t = cnf->sysctl;
1640
1641	if (t == NULL)
1642		return;
1643
1644	cnf->sysctl = NULL;
1645	unregister_sysctl_table(t->sysctl_header);
1646	kfree(t->dev_name);
1647	kfree(t);
1648}
1649
1650static void devinet_sysctl_register(struct in_device *idev)
1651{
1652	neigh_sysctl_register(idev->dev, idev->arp_parms, "ipv4", NULL);
1653	__devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
1654					&idev->cnf);
1655}
1656
1657static void devinet_sysctl_unregister(struct in_device *idev)
1658{
1659	__devinet_sysctl_unregister(&idev->cnf);
1660	neigh_sysctl_unregister(idev->arp_parms);
1661}
1662
1663static struct ctl_table ctl_forward_entry[] = {
1664	{
1665		.procname	= "ip_forward",
1666		.data		= &ipv4_devconf.data[
1667					IPV4_DEVCONF_FORWARDING - 1],
1668		.maxlen		= sizeof(int),
1669		.mode		= 0644,
1670		.proc_handler	= devinet_sysctl_forward,
1671		.extra1		= &ipv4_devconf,
1672		.extra2		= &init_net,
1673	},
1674	{ },
1675};
1676
1677static __net_initdata struct ctl_path net_ipv4_path[] = {
1678	{ .procname = "net", },
1679	{ .procname = "ipv4", },
1680	{ },
1681};
1682#endif
1683
1684static __net_init int devinet_init_net(struct net *net)
1685{
1686	int err;
1687	struct ipv4_devconf *all, *dflt;
1688#ifdef CONFIG_SYSCTL
1689	struct ctl_table *tbl = ctl_forward_entry;
1690	struct ctl_table_header *forw_hdr;
1691#endif
1692
1693	err = -ENOMEM;
1694	all = &ipv4_devconf;
1695	dflt = &ipv4_devconf_dflt;
1696
1697	if (!net_eq(net, &init_net)) {
1698		all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
1699		if (all == NULL)
1700			goto err_alloc_all;
1701
1702		dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
1703		if (dflt == NULL)
1704			goto err_alloc_dflt;
1705
1706#ifdef CONFIG_SYSCTL
1707		tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
1708		if (tbl == NULL)
1709			goto err_alloc_ctl;
1710
1711		tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
1712		tbl[0].extra1 = all;
1713		tbl[0].extra2 = net;
1714#endif
1715	}
1716
1717#ifdef CONFIG_SYSCTL
1718	err = __devinet_sysctl_register(net, "all", all);
1719	if (err < 0)
1720		goto err_reg_all;
1721
1722	err = __devinet_sysctl_register(net, "default", dflt);
1723	if (err < 0)
1724		goto err_reg_dflt;
1725
1726	err = -ENOMEM;
1727	forw_hdr = register_net_sysctl_table(net, net_ipv4_path, tbl);
1728	if (forw_hdr == NULL)
1729		goto err_reg_ctl;
1730	net->ipv4.forw_hdr = forw_hdr;
1731#endif
1732
1733	net->ipv4.devconf_all = all;
1734	net->ipv4.devconf_dflt = dflt;
1735	return 0;
1736
1737#ifdef CONFIG_SYSCTL
1738err_reg_ctl:
1739	__devinet_sysctl_unregister(dflt);
1740err_reg_dflt:
1741	__devinet_sysctl_unregister(all);
1742err_reg_all:
1743	if (tbl != ctl_forward_entry)
1744		kfree(tbl);
1745err_alloc_ctl:
1746#endif
1747	if (dflt != &ipv4_devconf_dflt)
1748		kfree(dflt);
1749err_alloc_dflt:
1750	if (all != &ipv4_devconf)
1751		kfree(all);
1752err_alloc_all:
1753	return err;
1754}
1755
1756static __net_exit void devinet_exit_net(struct net *net)
1757{
1758#ifdef CONFIG_SYSCTL
1759	struct ctl_table *tbl;
1760
1761	tbl = net->ipv4.forw_hdr->ctl_table_arg;
1762	unregister_net_sysctl_table(net->ipv4.forw_hdr);
1763	__devinet_sysctl_unregister(net->ipv4.devconf_dflt);
1764	__devinet_sysctl_unregister(net->ipv4.devconf_all);
1765	kfree(tbl);
1766#endif
1767	kfree(net->ipv4.devconf_dflt);
1768	kfree(net->ipv4.devconf_all);
1769}
1770
1771static __net_initdata struct pernet_operations devinet_ops = {
1772	.init = devinet_init_net,
1773	.exit = devinet_exit_net,
1774};
1775
1776static struct rtnl_af_ops inet_af_ops = {
1777	.family		  = AF_INET,
1778	.fill_link_af	  = inet_fill_link_af,
1779	.get_link_af_size = inet_get_link_af_size,
1780	.validate_link_af = inet_validate_link_af,
1781	.set_link_af	  = inet_set_link_af,
1782};
1783
1784void __init devinet_init(void)
1785{
1786	int i;
1787
1788	for (i = 0; i < IN4_ADDR_HSIZE; i++)
1789		INIT_HLIST_HEAD(&inet_addr_lst[i]);
1790
1791	register_pernet_subsys(&devinet_ops);
1792
1793	register_gifconf(PF_INET, inet_gifconf);
1794	register_netdevice_notifier(&ip_netdev_notifier);
1795
1796	rtnl_af_register(&inet_af_ops);
1797
1798	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL);
1799	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL);
1800	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr);
1801}
1802
1803