devinet.c revision d01ff0a049f749e0bf10a35bb23edd012718c8c2
1/*
2 *	NET3	IP device support routines.
3 *
4 *		This program is free software; you can redistribute it and/or
5 *		modify it under the terms of the GNU General Public License
6 *		as published by the Free Software Foundation; either version
7 *		2 of the License, or (at your option) any later version.
8 *
9 *	Derived from the IP parts of dev.c 1.0.19
10 * 		Authors:	Ross Biro
11 *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 *				Mark Evans, <evansmp@uhura.aston.ac.uk>
13 *
14 *	Additional Authors:
15 *		Alan Cox, <gw4pts@gw4pts.ampr.org>
16 *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17 *
18 *	Changes:
19 *		Alexey Kuznetsov:	pa_* fields are replaced with ifaddr
20 *					lists.
21 *		Cyrus Durgin:		updated for kmod
22 *		Matthias Andree:	in devinet_ioctl, compare label and
23 *					address (4.4BSD alias style support),
24 *					fall back to comparing just the label
25 *					if no match found.
26 */
27
28
29#include <asm/uaccess.h>
30#include <asm/system.h>
31#include <linux/bitops.h>
32#include <linux/capability.h>
33#include <linux/module.h>
34#include <linux/types.h>
35#include <linux/kernel.h>
36#include <linux/string.h>
37#include <linux/mm.h>
38#include <linux/socket.h>
39#include <linux/sockios.h>
40#include <linux/in.h>
41#include <linux/errno.h>
42#include <linux/interrupt.h>
43#include <linux/if_addr.h>
44#include <linux/if_ether.h>
45#include <linux/inet.h>
46#include <linux/netdevice.h>
47#include <linux/etherdevice.h>
48#include <linux/skbuff.h>
49#include <linux/init.h>
50#include <linux/notifier.h>
51#include <linux/inetdevice.h>
52#include <linux/igmp.h>
53#include <linux/slab.h>
54#include <linux/hash.h>
55#ifdef CONFIG_SYSCTL
56#include <linux/sysctl.h>
57#endif
58#include <linux/kmod.h>
59
60#include <net/arp.h>
61#include <net/ip.h>
62#include <net/route.h>
63#include <net/ip_fib.h>
64#include <net/rtnetlink.h>
65#include <net/net_namespace.h>
66
67#include "fib_lookup.h"
68
69static struct ipv4_devconf ipv4_devconf = {
70	.data = {
71		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
72		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
73		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
74		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
75	},
76};
77
78static struct ipv4_devconf ipv4_devconf_dflt = {
79	.data = {
80		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
81		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
82		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
83		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
84		[IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
85	},
86};
87
88#define IPV4_DEVCONF_DFLT(net, attr) \
89	IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
90
91static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
92	[IFA_LOCAL]     	= { .type = NLA_U32 },
93	[IFA_ADDRESS]   	= { .type = NLA_U32 },
94	[IFA_BROADCAST] 	= { .type = NLA_U32 },
95	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
96};
97
98/* inet_addr_hash's shifting is dependent upon this IN4_ADDR_HSIZE
99 * value.  So if you change this define, make appropriate changes to
100 * inet_addr_hash as well.
101 */
102#define IN4_ADDR_HSIZE	256
103static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
104static DEFINE_SPINLOCK(inet_addr_hash_lock);
105
106static inline unsigned int inet_addr_hash(struct net *net, __be32 addr)
107{
108	u32 val = (__force u32) addr ^ hash_ptr(net, 8);
109
110	return ((val ^ (val >> 8) ^ (val >> 16) ^ (val >> 24)) &
111		(IN4_ADDR_HSIZE - 1));
112}
113
114static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
115{
116	unsigned int hash = inet_addr_hash(net, ifa->ifa_local);
117
118	spin_lock(&inet_addr_hash_lock);
119	hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
120	spin_unlock(&inet_addr_hash_lock);
121}
122
123static void inet_hash_remove(struct in_ifaddr *ifa)
124{
125	spin_lock(&inet_addr_hash_lock);
126	hlist_del_init_rcu(&ifa->hash);
127	spin_unlock(&inet_addr_hash_lock);
128}
129
130/**
131 * __ip_dev_find - find the first device with a given source address.
132 * @net: the net namespace
133 * @addr: the source address
134 * @devref: if true, take a reference on the found device
135 *
136 * If a caller uses devref=false, it should be protected by RCU, or RTNL
137 */
138struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
139{
140	unsigned int hash = inet_addr_hash(net, addr);
141	struct net_device *result = NULL;
142	struct in_ifaddr *ifa;
143	struct hlist_node *node;
144
145	rcu_read_lock();
146	hlist_for_each_entry_rcu(ifa, node, &inet_addr_lst[hash], hash) {
147		struct net_device *dev = ifa->ifa_dev->dev;
148
149		if (!net_eq(dev_net(dev), net))
150			continue;
151		if (ifa->ifa_local == addr) {
152			result = dev;
153			break;
154		}
155	}
156	if (!result) {
157		struct flowi4 fl4 = { .daddr = addr };
158		struct fib_result res = { 0 };
159		struct fib_table *local;
160
161		/* Fallback to FIB local table so that communication
162		 * over loopback subnets work.
163		 */
164		local = fib_get_table(net, RT_TABLE_LOCAL);
165		if (local &&
166		    !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
167		    res.type == RTN_LOCAL)
168			result = FIB_RES_DEV(res);
169	}
170	if (result && devref)
171		dev_hold(result);
172	rcu_read_unlock();
173	return result;
174}
175EXPORT_SYMBOL(__ip_dev_find);
176
177static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
178
179static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
180static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
181			 int destroy);
182#ifdef CONFIG_SYSCTL
183static void devinet_sysctl_register(struct in_device *idev);
184static void devinet_sysctl_unregister(struct in_device *idev);
185#else
186static inline void devinet_sysctl_register(struct in_device *idev)
187{
188}
189static inline void devinet_sysctl_unregister(struct in_device *idev)
190{
191}
192#endif
193
194/* Locks all the inet devices. */
195
196static struct in_ifaddr *inet_alloc_ifa(void)
197{
198	return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
199}
200
201static void inet_rcu_free_ifa(struct rcu_head *head)
202{
203	struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
204	if (ifa->ifa_dev)
205		in_dev_put(ifa->ifa_dev);
206	kfree(ifa);
207}
208
209static inline void inet_free_ifa(struct in_ifaddr *ifa)
210{
211	call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
212}
213
214void in_dev_finish_destroy(struct in_device *idev)
215{
216	struct net_device *dev = idev->dev;
217
218	WARN_ON(idev->ifa_list);
219	WARN_ON(idev->mc_list);
220#ifdef NET_REFCNT_DEBUG
221	printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n",
222	       idev, dev ? dev->name : "NIL");
223#endif
224	dev_put(dev);
225	if (!idev->dead)
226		pr_err("Freeing alive in_device %p\n", idev);
227	else
228		kfree(idev);
229}
230EXPORT_SYMBOL(in_dev_finish_destroy);
231
232static struct in_device *inetdev_init(struct net_device *dev)
233{
234	struct in_device *in_dev;
235
236	ASSERT_RTNL();
237
238	in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
239	if (!in_dev)
240		goto out;
241	memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
242			sizeof(in_dev->cnf));
243	in_dev->cnf.sysctl = NULL;
244	in_dev->dev = dev;
245	in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
246	if (!in_dev->arp_parms)
247		goto out_kfree;
248	if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
249		dev_disable_lro(dev);
250	/* Reference in_dev->dev */
251	dev_hold(dev);
252	/* Account for reference dev->ip_ptr (below) */
253	in_dev_hold(in_dev);
254
255	devinet_sysctl_register(in_dev);
256	ip_mc_init_dev(in_dev);
257	if (dev->flags & IFF_UP)
258		ip_mc_up(in_dev);
259
260	/* we can receive as soon as ip_ptr is set -- do this last */
261	RCU_INIT_POINTER(dev->ip_ptr, in_dev);
262out:
263	return in_dev;
264out_kfree:
265	kfree(in_dev);
266	in_dev = NULL;
267	goto out;
268}
269
270static void in_dev_rcu_put(struct rcu_head *head)
271{
272	struct in_device *idev = container_of(head, struct in_device, rcu_head);
273	in_dev_put(idev);
274}
275
276static void inetdev_destroy(struct in_device *in_dev)
277{
278	struct in_ifaddr *ifa;
279	struct net_device *dev;
280
281	ASSERT_RTNL();
282
283	dev = in_dev->dev;
284
285	in_dev->dead = 1;
286
287	ip_mc_destroy_dev(in_dev);
288
289	while ((ifa = in_dev->ifa_list) != NULL) {
290		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
291		inet_free_ifa(ifa);
292	}
293
294	RCU_INIT_POINTER(dev->ip_ptr, NULL);
295
296	devinet_sysctl_unregister(in_dev);
297	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
298	arp_ifdown(dev);
299
300	call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
301}
302
303int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
304{
305	rcu_read_lock();
306	for_primary_ifa(in_dev) {
307		if (inet_ifa_match(a, ifa)) {
308			if (!b || inet_ifa_match(b, ifa)) {
309				rcu_read_unlock();
310				return 1;
311			}
312		}
313	} endfor_ifa(in_dev);
314	rcu_read_unlock();
315	return 0;
316}
317
318static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
319			 int destroy, struct nlmsghdr *nlh, u32 pid)
320{
321	struct in_ifaddr *promote = NULL;
322	struct in_ifaddr *ifa, *ifa1 = *ifap;
323	struct in_ifaddr *last_prim = in_dev->ifa_list;
324	struct in_ifaddr *prev_prom = NULL;
325	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
326
327	ASSERT_RTNL();
328
329	/* 1. Deleting primary ifaddr forces deletion all secondaries
330	 * unless alias promotion is set
331	 **/
332
333	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
334		struct in_ifaddr **ifap1 = &ifa1->ifa_next;
335
336		while ((ifa = *ifap1) != NULL) {
337			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
338			    ifa1->ifa_scope <= ifa->ifa_scope)
339				last_prim = ifa;
340
341			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
342			    ifa1->ifa_mask != ifa->ifa_mask ||
343			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
344				ifap1 = &ifa->ifa_next;
345				prev_prom = ifa;
346				continue;
347			}
348
349			if (!do_promote) {
350				inet_hash_remove(ifa);
351				*ifap1 = ifa->ifa_next;
352
353				rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid);
354				blocking_notifier_call_chain(&inetaddr_chain,
355						NETDEV_DOWN, ifa);
356				inet_free_ifa(ifa);
357			} else {
358				promote = ifa;
359				break;
360			}
361		}
362	}
363
364	/* On promotion all secondaries from subnet are changing
365	 * the primary IP, we must remove all their routes silently
366	 * and later to add them back with new prefsrc. Do this
367	 * while all addresses are on the device list.
368	 */
369	for (ifa = promote; ifa; ifa = ifa->ifa_next) {
370		if (ifa1->ifa_mask == ifa->ifa_mask &&
371		    inet_ifa_match(ifa1->ifa_address, ifa))
372			fib_del_ifaddr(ifa, ifa1);
373	}
374
375	/* 2. Unlink it */
376
377	*ifap = ifa1->ifa_next;
378	inet_hash_remove(ifa1);
379
380	/* 3. Announce address deletion */
381
382	/* Send message first, then call notifier.
383	   At first sight, FIB update triggered by notifier
384	   will refer to already deleted ifaddr, that could confuse
385	   netlink listeners. It is not true: look, gated sees
386	   that route deleted and if it still thinks that ifaddr
387	   is valid, it will try to restore deleted routes... Grr.
388	   So that, this order is correct.
389	 */
390	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid);
391	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
392
393	if (promote) {
394		struct in_ifaddr *next_sec = promote->ifa_next;
395
396		if (prev_prom) {
397			prev_prom->ifa_next = promote->ifa_next;
398			promote->ifa_next = last_prim->ifa_next;
399			last_prim->ifa_next = promote;
400		}
401
402		promote->ifa_flags &= ~IFA_F_SECONDARY;
403		rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
404		blocking_notifier_call_chain(&inetaddr_chain,
405				NETDEV_UP, promote);
406		for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
407			if (ifa1->ifa_mask != ifa->ifa_mask ||
408			    !inet_ifa_match(ifa1->ifa_address, ifa))
409					continue;
410			fib_add_ifaddr(ifa);
411		}
412
413	}
414	if (destroy)
415		inet_free_ifa(ifa1);
416}
417
418static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
419			 int destroy)
420{
421	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
422}
423
424static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
425			     u32 pid)
426{
427	struct in_device *in_dev = ifa->ifa_dev;
428	struct in_ifaddr *ifa1, **ifap, **last_primary;
429
430	ASSERT_RTNL();
431
432	if (!ifa->ifa_local) {
433		inet_free_ifa(ifa);
434		return 0;
435	}
436
437	ifa->ifa_flags &= ~IFA_F_SECONDARY;
438	last_primary = &in_dev->ifa_list;
439
440	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
441	     ifap = &ifa1->ifa_next) {
442		if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
443		    ifa->ifa_scope <= ifa1->ifa_scope)
444			last_primary = &ifa1->ifa_next;
445		if (ifa1->ifa_mask == ifa->ifa_mask &&
446		    inet_ifa_match(ifa1->ifa_address, ifa)) {
447			if (ifa1->ifa_local == ifa->ifa_local) {
448				inet_free_ifa(ifa);
449				return -EEXIST;
450			}
451			if (ifa1->ifa_scope != ifa->ifa_scope) {
452				inet_free_ifa(ifa);
453				return -EINVAL;
454			}
455			ifa->ifa_flags |= IFA_F_SECONDARY;
456		}
457	}
458
459	if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
460		net_srandom(ifa->ifa_local);
461		ifap = last_primary;
462	}
463
464	ifa->ifa_next = *ifap;
465	*ifap = ifa;
466
467	inet_hash_insert(dev_net(in_dev->dev), ifa);
468
469	/* Send message first, then call notifier.
470	   Notifier will trigger FIB update, so that
471	   listeners of netlink will know about new ifaddr */
472	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid);
473	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
474
475	return 0;
476}
477
478static int inet_insert_ifa(struct in_ifaddr *ifa)
479{
480	return __inet_insert_ifa(ifa, NULL, 0);
481}
482
483static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
484{
485	struct in_device *in_dev = __in_dev_get_rtnl(dev);
486
487	ASSERT_RTNL();
488
489	if (!in_dev) {
490		inet_free_ifa(ifa);
491		return -ENOBUFS;
492	}
493	ipv4_devconf_setall(in_dev);
494	if (ifa->ifa_dev != in_dev) {
495		WARN_ON(ifa->ifa_dev);
496		in_dev_hold(in_dev);
497		ifa->ifa_dev = in_dev;
498	}
499	if (ipv4_is_loopback(ifa->ifa_local))
500		ifa->ifa_scope = RT_SCOPE_HOST;
501	return inet_insert_ifa(ifa);
502}
503
504/* Caller must hold RCU or RTNL :
505 * We dont take a reference on found in_device
506 */
507struct in_device *inetdev_by_index(struct net *net, int ifindex)
508{
509	struct net_device *dev;
510	struct in_device *in_dev = NULL;
511
512	rcu_read_lock();
513	dev = dev_get_by_index_rcu(net, ifindex);
514	if (dev)
515		in_dev = rcu_dereference_rtnl(dev->ip_ptr);
516	rcu_read_unlock();
517	return in_dev;
518}
519EXPORT_SYMBOL(inetdev_by_index);
520
521/* Called only from RTNL semaphored context. No locks. */
522
523struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
524				    __be32 mask)
525{
526	ASSERT_RTNL();
527
528	for_primary_ifa(in_dev) {
529		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
530			return ifa;
531	} endfor_ifa(in_dev);
532	return NULL;
533}
534
535static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
536{
537	struct net *net = sock_net(skb->sk);
538	struct nlattr *tb[IFA_MAX+1];
539	struct in_device *in_dev;
540	struct ifaddrmsg *ifm;
541	struct in_ifaddr *ifa, **ifap;
542	int err = -EINVAL;
543
544	ASSERT_RTNL();
545
546	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
547	if (err < 0)
548		goto errout;
549
550	ifm = nlmsg_data(nlh);
551	in_dev = inetdev_by_index(net, ifm->ifa_index);
552	if (in_dev == NULL) {
553		err = -ENODEV;
554		goto errout;
555	}
556
557	for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
558	     ifap = &ifa->ifa_next) {
559		if (tb[IFA_LOCAL] &&
560		    ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
561			continue;
562
563		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
564			continue;
565
566		if (tb[IFA_ADDRESS] &&
567		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
568		    !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
569			continue;
570
571		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid);
572		return 0;
573	}
574
575	err = -EADDRNOTAVAIL;
576errout:
577	return err;
578}
579
580static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
581{
582	struct nlattr *tb[IFA_MAX+1];
583	struct in_ifaddr *ifa;
584	struct ifaddrmsg *ifm;
585	struct net_device *dev;
586	struct in_device *in_dev;
587	int err;
588
589	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
590	if (err < 0)
591		goto errout;
592
593	ifm = nlmsg_data(nlh);
594	err = -EINVAL;
595	if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
596		goto errout;
597
598	dev = __dev_get_by_index(net, ifm->ifa_index);
599	err = -ENODEV;
600	if (dev == NULL)
601		goto errout;
602
603	in_dev = __in_dev_get_rtnl(dev);
604	err = -ENOBUFS;
605	if (in_dev == NULL)
606		goto errout;
607
608	ifa = inet_alloc_ifa();
609	if (ifa == NULL)
610		/*
611		 * A potential indev allocation can be left alive, it stays
612		 * assigned to its device and is destroy with it.
613		 */
614		goto errout;
615
616	ipv4_devconf_setall(in_dev);
617	in_dev_hold(in_dev);
618
619	if (tb[IFA_ADDRESS] == NULL)
620		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
621
622	INIT_HLIST_NODE(&ifa->hash);
623	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
624	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
625	ifa->ifa_flags = ifm->ifa_flags;
626	ifa->ifa_scope = ifm->ifa_scope;
627	ifa->ifa_dev = in_dev;
628
629	ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
630	ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
631
632	if (tb[IFA_BROADCAST])
633		ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
634
635	if (tb[IFA_LABEL])
636		nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
637	else
638		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
639
640	return ifa;
641
642errout:
643	return ERR_PTR(err);
644}
645
646static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
647{
648	struct net *net = sock_net(skb->sk);
649	struct in_ifaddr *ifa;
650
651	ASSERT_RTNL();
652
653	ifa = rtm_to_ifaddr(net, nlh);
654	if (IS_ERR(ifa))
655		return PTR_ERR(ifa);
656
657	return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid);
658}
659
660/*
661 *	Determine a default network mask, based on the IP address.
662 */
663
664static inline int inet_abc_len(__be32 addr)
665{
666	int rc = -1;	/* Something else, probably a multicast. */
667
668	if (ipv4_is_zeronet(addr))
669		rc = 0;
670	else {
671		__u32 haddr = ntohl(addr);
672
673		if (IN_CLASSA(haddr))
674			rc = 8;
675		else if (IN_CLASSB(haddr))
676			rc = 16;
677		else if (IN_CLASSC(haddr))
678			rc = 24;
679	}
680
681	return rc;
682}
683
684
685int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
686{
687	struct ifreq ifr;
688	struct sockaddr_in sin_orig;
689	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
690	struct in_device *in_dev;
691	struct in_ifaddr **ifap = NULL;
692	struct in_ifaddr *ifa = NULL;
693	struct net_device *dev;
694	char *colon;
695	int ret = -EFAULT;
696	int tryaddrmatch = 0;
697
698	/*
699	 *	Fetch the caller's info block into kernel space
700	 */
701
702	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
703		goto out;
704	ifr.ifr_name[IFNAMSIZ - 1] = 0;
705
706	/* save original address for comparison */
707	memcpy(&sin_orig, sin, sizeof(*sin));
708
709	colon = strchr(ifr.ifr_name, ':');
710	if (colon)
711		*colon = 0;
712
713	dev_load(net, ifr.ifr_name);
714
715	switch (cmd) {
716	case SIOCGIFADDR:	/* Get interface address */
717	case SIOCGIFBRDADDR:	/* Get the broadcast address */
718	case SIOCGIFDSTADDR:	/* Get the destination address */
719	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
720		/* Note that these ioctls will not sleep,
721		   so that we do not impose a lock.
722		   One day we will be forced to put shlock here (I mean SMP)
723		 */
724		tryaddrmatch = (sin_orig.sin_family == AF_INET);
725		memset(sin, 0, sizeof(*sin));
726		sin->sin_family = AF_INET;
727		break;
728
729	case SIOCSIFFLAGS:
730		ret = -EACCES;
731		if (!capable(CAP_NET_ADMIN))
732			goto out;
733		break;
734	case SIOCSIFADDR:	/* Set interface address (and family) */
735	case SIOCSIFBRDADDR:	/* Set the broadcast address */
736	case SIOCSIFDSTADDR:	/* Set the destination address */
737	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
738		ret = -EACCES;
739		if (!capable(CAP_NET_ADMIN))
740			goto out;
741		ret = -EINVAL;
742		if (sin->sin_family != AF_INET)
743			goto out;
744		break;
745	default:
746		ret = -EINVAL;
747		goto out;
748	}
749
750	rtnl_lock();
751
752	ret = -ENODEV;
753	dev = __dev_get_by_name(net, ifr.ifr_name);
754	if (!dev)
755		goto done;
756
757	if (colon)
758		*colon = ':';
759
760	in_dev = __in_dev_get_rtnl(dev);
761	if (in_dev) {
762		if (tryaddrmatch) {
763			/* Matthias Andree */
764			/* compare label and address (4.4BSD style) */
765			/* note: we only do this for a limited set of ioctls
766			   and only if the original address family was AF_INET.
767			   This is checked above. */
768			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
769			     ifap = &ifa->ifa_next) {
770				if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
771				    sin_orig.sin_addr.s_addr ==
772							ifa->ifa_local) {
773					break; /* found */
774				}
775			}
776		}
777		/* we didn't get a match, maybe the application is
778		   4.3BSD-style and passed in junk so we fall back to
779		   comparing just the label */
780		if (!ifa) {
781			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
782			     ifap = &ifa->ifa_next)
783				if (!strcmp(ifr.ifr_name, ifa->ifa_label))
784					break;
785		}
786	}
787
788	ret = -EADDRNOTAVAIL;
789	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
790		goto done;
791
792	switch (cmd) {
793	case SIOCGIFADDR:	/* Get interface address */
794		sin->sin_addr.s_addr = ifa->ifa_local;
795		goto rarok;
796
797	case SIOCGIFBRDADDR:	/* Get the broadcast address */
798		sin->sin_addr.s_addr = ifa->ifa_broadcast;
799		goto rarok;
800
801	case SIOCGIFDSTADDR:	/* Get the destination address */
802		sin->sin_addr.s_addr = ifa->ifa_address;
803		goto rarok;
804
805	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
806		sin->sin_addr.s_addr = ifa->ifa_mask;
807		goto rarok;
808
809	case SIOCSIFFLAGS:
810		if (colon) {
811			ret = -EADDRNOTAVAIL;
812			if (!ifa)
813				break;
814			ret = 0;
815			if (!(ifr.ifr_flags & IFF_UP))
816				inet_del_ifa(in_dev, ifap, 1);
817			break;
818		}
819		ret = dev_change_flags(dev, ifr.ifr_flags);
820		break;
821
822	case SIOCSIFADDR:	/* Set interface address (and family) */
823		ret = -EINVAL;
824		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
825			break;
826
827		if (!ifa) {
828			ret = -ENOBUFS;
829			ifa = inet_alloc_ifa();
830			INIT_HLIST_NODE(&ifa->hash);
831			if (!ifa)
832				break;
833			if (colon)
834				memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
835			else
836				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
837		} else {
838			ret = 0;
839			if (ifa->ifa_local == sin->sin_addr.s_addr)
840				break;
841			inet_del_ifa(in_dev, ifap, 0);
842			ifa->ifa_broadcast = 0;
843			ifa->ifa_scope = 0;
844		}
845
846		ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
847
848		if (!(dev->flags & IFF_POINTOPOINT)) {
849			ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
850			ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
851			if ((dev->flags & IFF_BROADCAST) &&
852			    ifa->ifa_prefixlen < 31)
853				ifa->ifa_broadcast = ifa->ifa_address |
854						     ~ifa->ifa_mask;
855		} else {
856			ifa->ifa_prefixlen = 32;
857			ifa->ifa_mask = inet_make_mask(32);
858		}
859		ret = inet_set_ifa(dev, ifa);
860		break;
861
862	case SIOCSIFBRDADDR:	/* Set the broadcast address */
863		ret = 0;
864		if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
865			inet_del_ifa(in_dev, ifap, 0);
866			ifa->ifa_broadcast = sin->sin_addr.s_addr;
867			inet_insert_ifa(ifa);
868		}
869		break;
870
871	case SIOCSIFDSTADDR:	/* Set the destination address */
872		ret = 0;
873		if (ifa->ifa_address == sin->sin_addr.s_addr)
874			break;
875		ret = -EINVAL;
876		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
877			break;
878		ret = 0;
879		inet_del_ifa(in_dev, ifap, 0);
880		ifa->ifa_address = sin->sin_addr.s_addr;
881		inet_insert_ifa(ifa);
882		break;
883
884	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
885
886		/*
887		 *	The mask we set must be legal.
888		 */
889		ret = -EINVAL;
890		if (bad_mask(sin->sin_addr.s_addr, 0))
891			break;
892		ret = 0;
893		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
894			__be32 old_mask = ifa->ifa_mask;
895			inet_del_ifa(in_dev, ifap, 0);
896			ifa->ifa_mask = sin->sin_addr.s_addr;
897			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
898
899			/* See if current broadcast address matches
900			 * with current netmask, then recalculate
901			 * the broadcast address. Otherwise it's a
902			 * funny address, so don't touch it since
903			 * the user seems to know what (s)he's doing...
904			 */
905			if ((dev->flags & IFF_BROADCAST) &&
906			    (ifa->ifa_prefixlen < 31) &&
907			    (ifa->ifa_broadcast ==
908			     (ifa->ifa_local|~old_mask))) {
909				ifa->ifa_broadcast = (ifa->ifa_local |
910						      ~sin->sin_addr.s_addr);
911			}
912			inet_insert_ifa(ifa);
913		}
914		break;
915	}
916done:
917	rtnl_unlock();
918out:
919	return ret;
920rarok:
921	rtnl_unlock();
922	ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
923	goto out;
924}
925
926static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
927{
928	struct in_device *in_dev = __in_dev_get_rtnl(dev);
929	struct in_ifaddr *ifa;
930	struct ifreq ifr;
931	int done = 0;
932
933	if (!in_dev)
934		goto out;
935
936	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
937		if (!buf) {
938			done += sizeof(ifr);
939			continue;
940		}
941		if (len < (int) sizeof(ifr))
942			break;
943		memset(&ifr, 0, sizeof(struct ifreq));
944		if (ifa->ifa_label)
945			strcpy(ifr.ifr_name, ifa->ifa_label);
946		else
947			strcpy(ifr.ifr_name, dev->name);
948
949		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
950		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
951								ifa->ifa_local;
952
953		if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
954			done = -EFAULT;
955			break;
956		}
957		buf  += sizeof(struct ifreq);
958		len  -= sizeof(struct ifreq);
959		done += sizeof(struct ifreq);
960	}
961out:
962	return done;
963}
964
965__be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
966{
967	__be32 addr = 0;
968	struct in_device *in_dev;
969	struct net *net = dev_net(dev);
970
971	rcu_read_lock();
972	in_dev = __in_dev_get_rcu(dev);
973	if (!in_dev)
974		goto no_in_dev;
975
976	for_primary_ifa(in_dev) {
977		if (ifa->ifa_scope > scope)
978			continue;
979		if (!dst || inet_ifa_match(dst, ifa)) {
980			addr = ifa->ifa_local;
981			break;
982		}
983		if (!addr)
984			addr = ifa->ifa_local;
985	} endfor_ifa(in_dev);
986
987	if (addr)
988		goto out_unlock;
989no_in_dev:
990
991	/* Not loopback addresses on loopback should be preferred
992	   in this case. It is importnat that lo is the first interface
993	   in dev_base list.
994	 */
995	for_each_netdev_rcu(net, dev) {
996		in_dev = __in_dev_get_rcu(dev);
997		if (!in_dev)
998			continue;
999
1000		for_primary_ifa(in_dev) {
1001			if (ifa->ifa_scope != RT_SCOPE_LINK &&
1002			    ifa->ifa_scope <= scope) {
1003				addr = ifa->ifa_local;
1004				goto out_unlock;
1005			}
1006		} endfor_ifa(in_dev);
1007	}
1008out_unlock:
1009	rcu_read_unlock();
1010	return addr;
1011}
1012EXPORT_SYMBOL(inet_select_addr);
1013
1014static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1015			      __be32 local, int scope)
1016{
1017	int same = 0;
1018	__be32 addr = 0;
1019
1020	for_ifa(in_dev) {
1021		if (!addr &&
1022		    (local == ifa->ifa_local || !local) &&
1023		    ifa->ifa_scope <= scope) {
1024			addr = ifa->ifa_local;
1025			if (same)
1026				break;
1027		}
1028		if (!same) {
1029			same = (!local || inet_ifa_match(local, ifa)) &&
1030				(!dst || inet_ifa_match(dst, ifa));
1031			if (same && addr) {
1032				if (local || !dst)
1033					break;
1034				/* Is the selected addr into dst subnet? */
1035				if (inet_ifa_match(addr, ifa))
1036					break;
1037				/* No, then can we use new local src? */
1038				if (ifa->ifa_scope <= scope) {
1039					addr = ifa->ifa_local;
1040					break;
1041				}
1042				/* search for large dst subnet for addr */
1043				same = 0;
1044			}
1045		}
1046	} endfor_ifa(in_dev);
1047
1048	return same ? addr : 0;
1049}
1050
1051/*
1052 * Confirm that local IP address exists using wildcards:
1053 * - in_dev: only on this interface, 0=any interface
1054 * - dst: only in the same subnet as dst, 0=any dst
1055 * - local: address, 0=autoselect the local address
1056 * - scope: maximum allowed scope value for the local address
1057 */
1058__be32 inet_confirm_addr(struct in_device *in_dev,
1059			 __be32 dst, __be32 local, int scope)
1060{
1061	__be32 addr = 0;
1062	struct net_device *dev;
1063	struct net *net;
1064
1065	if (scope != RT_SCOPE_LINK)
1066		return confirm_addr_indev(in_dev, dst, local, scope);
1067
1068	net = dev_net(in_dev->dev);
1069	rcu_read_lock();
1070	for_each_netdev_rcu(net, dev) {
1071		in_dev = __in_dev_get_rcu(dev);
1072		if (in_dev) {
1073			addr = confirm_addr_indev(in_dev, dst, local, scope);
1074			if (addr)
1075				break;
1076		}
1077	}
1078	rcu_read_unlock();
1079
1080	return addr;
1081}
1082
1083/*
1084 *	Device notifier
1085 */
1086
1087int register_inetaddr_notifier(struct notifier_block *nb)
1088{
1089	return blocking_notifier_chain_register(&inetaddr_chain, nb);
1090}
1091EXPORT_SYMBOL(register_inetaddr_notifier);
1092
1093int unregister_inetaddr_notifier(struct notifier_block *nb)
1094{
1095	return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1096}
1097EXPORT_SYMBOL(unregister_inetaddr_notifier);
1098
1099/* Rename ifa_labels for a device name change. Make some effort to preserve
1100 * existing alias numbering and to create unique labels if possible.
1101*/
1102static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1103{
1104	struct in_ifaddr *ifa;
1105	int named = 0;
1106
1107	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1108		char old[IFNAMSIZ], *dot;
1109
1110		memcpy(old, ifa->ifa_label, IFNAMSIZ);
1111		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1112		if (named++ == 0)
1113			goto skip;
1114		dot = strchr(old, ':');
1115		if (dot == NULL) {
1116			sprintf(old, ":%d", named);
1117			dot = old;
1118		}
1119		if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1120			strcat(ifa->ifa_label, dot);
1121		else
1122			strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1123skip:
1124		rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1125	}
1126}
1127
1128static inline bool inetdev_valid_mtu(unsigned mtu)
1129{
1130	return mtu >= 68;
1131}
1132
1133static void inetdev_send_gratuitous_arp(struct net_device *dev,
1134					struct in_device *in_dev)
1135
1136{
1137	struct in_ifaddr *ifa;
1138
1139	for (ifa = in_dev->ifa_list; ifa;
1140	     ifa = ifa->ifa_next) {
1141		arp_send(ARPOP_REQUEST, ETH_P_ARP,
1142			 ifa->ifa_local, dev,
1143			 ifa->ifa_local, NULL,
1144			 dev->dev_addr, NULL);
1145	}
1146}
1147
1148/* Called only under RTNL semaphore */
1149
1150static int inetdev_event(struct notifier_block *this, unsigned long event,
1151			 void *ptr)
1152{
1153	struct net_device *dev = ptr;
1154	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1155
1156	ASSERT_RTNL();
1157
1158	if (!in_dev) {
1159		if (event == NETDEV_REGISTER) {
1160			in_dev = inetdev_init(dev);
1161			if (!in_dev)
1162				return notifier_from_errno(-ENOMEM);
1163			if (dev->flags & IFF_LOOPBACK) {
1164				IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1165				IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1166			}
1167		} else if (event == NETDEV_CHANGEMTU) {
1168			/* Re-enabling IP */
1169			if (inetdev_valid_mtu(dev->mtu))
1170				in_dev = inetdev_init(dev);
1171		}
1172		goto out;
1173	}
1174
1175	switch (event) {
1176	case NETDEV_REGISTER:
1177		printk(KERN_DEBUG "inetdev_event: bug\n");
1178		RCU_INIT_POINTER(dev->ip_ptr, NULL);
1179		break;
1180	case NETDEV_UP:
1181		if (!inetdev_valid_mtu(dev->mtu))
1182			break;
1183		if (dev->flags & IFF_LOOPBACK) {
1184			struct in_ifaddr *ifa = inet_alloc_ifa();
1185
1186			if (ifa) {
1187				INIT_HLIST_NODE(&ifa->hash);
1188				ifa->ifa_local =
1189				  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1190				ifa->ifa_prefixlen = 8;
1191				ifa->ifa_mask = inet_make_mask(8);
1192				in_dev_hold(in_dev);
1193				ifa->ifa_dev = in_dev;
1194				ifa->ifa_scope = RT_SCOPE_HOST;
1195				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1196				inet_insert_ifa(ifa);
1197			}
1198		}
1199		ip_mc_up(in_dev);
1200		/* fall through */
1201	case NETDEV_CHANGEADDR:
1202		if (!IN_DEV_ARP_NOTIFY(in_dev))
1203			break;
1204		/* fall through */
1205	case NETDEV_NOTIFY_PEERS:
1206		/* Send gratuitous ARP to notify of link change */
1207		inetdev_send_gratuitous_arp(dev, in_dev);
1208		break;
1209	case NETDEV_DOWN:
1210		ip_mc_down(in_dev);
1211		break;
1212	case NETDEV_PRE_TYPE_CHANGE:
1213		ip_mc_unmap(in_dev);
1214		break;
1215	case NETDEV_POST_TYPE_CHANGE:
1216		ip_mc_remap(in_dev);
1217		break;
1218	case NETDEV_CHANGEMTU:
1219		if (inetdev_valid_mtu(dev->mtu))
1220			break;
1221		/* disable IP when MTU is not enough */
1222	case NETDEV_UNREGISTER:
1223		inetdev_destroy(in_dev);
1224		break;
1225	case NETDEV_CHANGENAME:
1226		/* Do not notify about label change, this event is
1227		 * not interesting to applications using netlink.
1228		 */
1229		inetdev_changename(dev, in_dev);
1230
1231		devinet_sysctl_unregister(in_dev);
1232		devinet_sysctl_register(in_dev);
1233		break;
1234	}
1235out:
1236	return NOTIFY_DONE;
1237}
1238
1239static struct notifier_block ip_netdev_notifier = {
1240	.notifier_call = inetdev_event,
1241};
1242
1243static inline size_t inet_nlmsg_size(void)
1244{
1245	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1246	       + nla_total_size(4) /* IFA_ADDRESS */
1247	       + nla_total_size(4) /* IFA_LOCAL */
1248	       + nla_total_size(4) /* IFA_BROADCAST */
1249	       + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1250}
1251
1252static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1253			    u32 pid, u32 seq, int event, unsigned int flags)
1254{
1255	struct ifaddrmsg *ifm;
1256	struct nlmsghdr  *nlh;
1257
1258	nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags);
1259	if (nlh == NULL)
1260		return -EMSGSIZE;
1261
1262	ifm = nlmsg_data(nlh);
1263	ifm->ifa_family = AF_INET;
1264	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1265	ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
1266	ifm->ifa_scope = ifa->ifa_scope;
1267	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1268
1269	if (ifa->ifa_address)
1270		NLA_PUT_BE32(skb, IFA_ADDRESS, ifa->ifa_address);
1271
1272	if (ifa->ifa_local)
1273		NLA_PUT_BE32(skb, IFA_LOCAL, ifa->ifa_local);
1274
1275	if (ifa->ifa_broadcast)
1276		NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast);
1277
1278	if (ifa->ifa_label[0])
1279		NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label);
1280
1281	return nlmsg_end(skb, nlh);
1282
1283nla_put_failure:
1284	nlmsg_cancel(skb, nlh);
1285	return -EMSGSIZE;
1286}
1287
1288static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1289{
1290	struct net *net = sock_net(skb->sk);
1291	int h, s_h;
1292	int idx, s_idx;
1293	int ip_idx, s_ip_idx;
1294	struct net_device *dev;
1295	struct in_device *in_dev;
1296	struct in_ifaddr *ifa;
1297	struct hlist_head *head;
1298	struct hlist_node *node;
1299
1300	s_h = cb->args[0];
1301	s_idx = idx = cb->args[1];
1302	s_ip_idx = ip_idx = cb->args[2];
1303
1304	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1305		idx = 0;
1306		head = &net->dev_index_head[h];
1307		rcu_read_lock();
1308		hlist_for_each_entry_rcu(dev, node, head, index_hlist) {
1309			if (idx < s_idx)
1310				goto cont;
1311			if (h > s_h || idx > s_idx)
1312				s_ip_idx = 0;
1313			in_dev = __in_dev_get_rcu(dev);
1314			if (!in_dev)
1315				goto cont;
1316
1317			for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1318			     ifa = ifa->ifa_next, ip_idx++) {
1319				if (ip_idx < s_ip_idx)
1320					continue;
1321				if (inet_fill_ifaddr(skb, ifa,
1322					     NETLINK_CB(cb->skb).pid,
1323					     cb->nlh->nlmsg_seq,
1324					     RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1325					rcu_read_unlock();
1326					goto done;
1327				}
1328			}
1329cont:
1330			idx++;
1331		}
1332		rcu_read_unlock();
1333	}
1334
1335done:
1336	cb->args[0] = h;
1337	cb->args[1] = idx;
1338	cb->args[2] = ip_idx;
1339
1340	return skb->len;
1341}
1342
1343static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1344		      u32 pid)
1345{
1346	struct sk_buff *skb;
1347	u32 seq = nlh ? nlh->nlmsg_seq : 0;
1348	int err = -ENOBUFS;
1349	struct net *net;
1350
1351	net = dev_net(ifa->ifa_dev->dev);
1352	skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1353	if (skb == NULL)
1354		goto errout;
1355
1356	err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0);
1357	if (err < 0) {
1358		/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1359		WARN_ON(err == -EMSGSIZE);
1360		kfree_skb(skb);
1361		goto errout;
1362	}
1363	rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1364	return;
1365errout:
1366	if (err < 0)
1367		rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1368}
1369
1370static size_t inet_get_link_af_size(const struct net_device *dev)
1371{
1372	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1373
1374	if (!in_dev)
1375		return 0;
1376
1377	return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1378}
1379
1380static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
1381{
1382	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1383	struct nlattr *nla;
1384	int i;
1385
1386	if (!in_dev)
1387		return -ENODATA;
1388
1389	nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1390	if (nla == NULL)
1391		return -EMSGSIZE;
1392
1393	for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1394		((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1395
1396	return 0;
1397}
1398
1399static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1400	[IFLA_INET_CONF]	= { .type = NLA_NESTED },
1401};
1402
1403static int inet_validate_link_af(const struct net_device *dev,
1404				 const struct nlattr *nla)
1405{
1406	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1407	int err, rem;
1408
1409	if (dev && !__in_dev_get_rtnl(dev))
1410		return -EAFNOSUPPORT;
1411
1412	err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
1413	if (err < 0)
1414		return err;
1415
1416	if (tb[IFLA_INET_CONF]) {
1417		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1418			int cfgid = nla_type(a);
1419
1420			if (nla_len(a) < 4)
1421				return -EINVAL;
1422
1423			if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1424				return -EINVAL;
1425		}
1426	}
1427
1428	return 0;
1429}
1430
1431static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1432{
1433	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1434	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1435	int rem;
1436
1437	if (!in_dev)
1438		return -EAFNOSUPPORT;
1439
1440	if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
1441		BUG();
1442
1443	if (tb[IFLA_INET_CONF]) {
1444		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1445			ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1446	}
1447
1448	return 0;
1449}
1450
1451#ifdef CONFIG_SYSCTL
1452
1453static void devinet_copy_dflt_conf(struct net *net, int i)
1454{
1455	struct net_device *dev;
1456
1457	rcu_read_lock();
1458	for_each_netdev_rcu(net, dev) {
1459		struct in_device *in_dev;
1460
1461		in_dev = __in_dev_get_rcu(dev);
1462		if (in_dev && !test_bit(i, in_dev->cnf.state))
1463			in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1464	}
1465	rcu_read_unlock();
1466}
1467
1468/* called with RTNL locked */
1469static void inet_forward_change(struct net *net)
1470{
1471	struct net_device *dev;
1472	int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1473
1474	IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1475	IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1476
1477	for_each_netdev(net, dev) {
1478		struct in_device *in_dev;
1479		if (on)
1480			dev_disable_lro(dev);
1481		rcu_read_lock();
1482		in_dev = __in_dev_get_rcu(dev);
1483		if (in_dev)
1484			IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1485		rcu_read_unlock();
1486	}
1487}
1488
1489static int devinet_conf_proc(ctl_table *ctl, int write,
1490			     void __user *buffer,
1491			     size_t *lenp, loff_t *ppos)
1492{
1493	int old_value = *(int *)ctl->data;
1494	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1495	int new_value = *(int *)ctl->data;
1496
1497	if (write) {
1498		struct ipv4_devconf *cnf = ctl->extra1;
1499		struct net *net = ctl->extra2;
1500		int i = (int *)ctl->data - cnf->data;
1501
1502		set_bit(i, cnf->state);
1503
1504		if (cnf == net->ipv4.devconf_dflt)
1505			devinet_copy_dflt_conf(net, i);
1506		if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1)
1507			if ((new_value == 0) && (old_value != 0))
1508				rt_cache_flush(net, 0);
1509	}
1510
1511	return ret;
1512}
1513
1514static int devinet_sysctl_forward(ctl_table *ctl, int write,
1515				  void __user *buffer,
1516				  size_t *lenp, loff_t *ppos)
1517{
1518	int *valp = ctl->data;
1519	int val = *valp;
1520	loff_t pos = *ppos;
1521	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1522
1523	if (write && *valp != val) {
1524		struct net *net = ctl->extra2;
1525
1526		if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
1527			if (!rtnl_trylock()) {
1528				/* Restore the original values before restarting */
1529				*valp = val;
1530				*ppos = pos;
1531				return restart_syscall();
1532			}
1533			if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
1534				inet_forward_change(net);
1535			} else if (*valp) {
1536				struct ipv4_devconf *cnf = ctl->extra1;
1537				struct in_device *idev =
1538					container_of(cnf, struct in_device, cnf);
1539				dev_disable_lro(idev->dev);
1540			}
1541			rtnl_unlock();
1542			rt_cache_flush(net, 0);
1543		}
1544	}
1545
1546	return ret;
1547}
1548
1549static int ipv4_doint_and_flush(ctl_table *ctl, int write,
1550				void __user *buffer,
1551				size_t *lenp, loff_t *ppos)
1552{
1553	int *valp = ctl->data;
1554	int val = *valp;
1555	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1556	struct net *net = ctl->extra2;
1557
1558	if (write && *valp != val)
1559		rt_cache_flush(net, 0);
1560
1561	return ret;
1562}
1563
1564#define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
1565	{ \
1566		.procname	= name, \
1567		.data		= ipv4_devconf.data + \
1568				  IPV4_DEVCONF_ ## attr - 1, \
1569		.maxlen		= sizeof(int), \
1570		.mode		= mval, \
1571		.proc_handler	= proc, \
1572		.extra1		= &ipv4_devconf, \
1573	}
1574
1575#define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
1576	DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
1577
1578#define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
1579	DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
1580
1581#define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
1582	DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
1583
1584#define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
1585	DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
1586
1587static struct devinet_sysctl_table {
1588	struct ctl_table_header *sysctl_header;
1589	struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
1590	char *dev_name;
1591} devinet_sysctl = {
1592	.devinet_vars = {
1593		DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
1594					     devinet_sysctl_forward),
1595		DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
1596
1597		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
1598		DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
1599		DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
1600		DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
1601		DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
1602		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
1603					"accept_source_route"),
1604		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
1605		DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
1606		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
1607		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
1608		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
1609		DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
1610		DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
1611		DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
1612		DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
1613		DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
1614		DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
1615		DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
1616		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
1617
1618		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
1619		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
1620		DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
1621					      "force_igmp_version"),
1622		DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
1623					      "promote_secondaries"),
1624	},
1625};
1626
1627static int __devinet_sysctl_register(struct net *net, char *dev_name,
1628					struct ipv4_devconf *p)
1629{
1630	int i;
1631	struct devinet_sysctl_table *t;
1632
1633#define DEVINET_CTL_PATH_DEV	3
1634
1635	struct ctl_path devinet_ctl_path[] = {
1636		{ .procname = "net",  },
1637		{ .procname = "ipv4", },
1638		{ .procname = "conf", },
1639		{ /* to be set */ },
1640		{ },
1641	};
1642
1643	t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
1644	if (!t)
1645		goto out;
1646
1647	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
1648		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
1649		t->devinet_vars[i].extra1 = p;
1650		t->devinet_vars[i].extra2 = net;
1651	}
1652
1653	/*
1654	 * Make a copy of dev_name, because '.procname' is regarded as const
1655	 * by sysctl and we wouldn't want anyone to change it under our feet
1656	 * (see SIOCSIFNAME).
1657	 */
1658	t->dev_name = kstrdup(dev_name, GFP_KERNEL);
1659	if (!t->dev_name)
1660		goto free;
1661
1662	devinet_ctl_path[DEVINET_CTL_PATH_DEV].procname = t->dev_name;
1663
1664	t->sysctl_header = register_net_sysctl_table(net, devinet_ctl_path,
1665			t->devinet_vars);
1666	if (!t->sysctl_header)
1667		goto free_procname;
1668
1669	p->sysctl = t;
1670	return 0;
1671
1672free_procname:
1673	kfree(t->dev_name);
1674free:
1675	kfree(t);
1676out:
1677	return -ENOBUFS;
1678}
1679
1680static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
1681{
1682	struct devinet_sysctl_table *t = cnf->sysctl;
1683
1684	if (t == NULL)
1685		return;
1686
1687	cnf->sysctl = NULL;
1688	unregister_net_sysctl_table(t->sysctl_header);
1689	kfree(t->dev_name);
1690	kfree(t);
1691}
1692
1693static void devinet_sysctl_register(struct in_device *idev)
1694{
1695	neigh_sysctl_register(idev->dev, idev->arp_parms, "ipv4", NULL);
1696	__devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
1697					&idev->cnf);
1698}
1699
1700static void devinet_sysctl_unregister(struct in_device *idev)
1701{
1702	__devinet_sysctl_unregister(&idev->cnf);
1703	neigh_sysctl_unregister(idev->arp_parms);
1704}
1705
1706static struct ctl_table ctl_forward_entry[] = {
1707	{
1708		.procname	= "ip_forward",
1709		.data		= &ipv4_devconf.data[
1710					IPV4_DEVCONF_FORWARDING - 1],
1711		.maxlen		= sizeof(int),
1712		.mode		= 0644,
1713		.proc_handler	= devinet_sysctl_forward,
1714		.extra1		= &ipv4_devconf,
1715		.extra2		= &init_net,
1716	},
1717	{ },
1718};
1719
1720static __net_initdata struct ctl_path net_ipv4_path[] = {
1721	{ .procname = "net", },
1722	{ .procname = "ipv4", },
1723	{ },
1724};
1725#endif
1726
1727static __net_init int devinet_init_net(struct net *net)
1728{
1729	int err;
1730	struct ipv4_devconf *all, *dflt;
1731#ifdef CONFIG_SYSCTL
1732	struct ctl_table *tbl = ctl_forward_entry;
1733	struct ctl_table_header *forw_hdr;
1734#endif
1735
1736	err = -ENOMEM;
1737	all = &ipv4_devconf;
1738	dflt = &ipv4_devconf_dflt;
1739
1740	if (!net_eq(net, &init_net)) {
1741		all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
1742		if (all == NULL)
1743			goto err_alloc_all;
1744
1745		dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
1746		if (dflt == NULL)
1747			goto err_alloc_dflt;
1748
1749#ifdef CONFIG_SYSCTL
1750		tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
1751		if (tbl == NULL)
1752			goto err_alloc_ctl;
1753
1754		tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
1755		tbl[0].extra1 = all;
1756		tbl[0].extra2 = net;
1757#endif
1758	}
1759
1760#ifdef CONFIG_SYSCTL
1761	err = __devinet_sysctl_register(net, "all", all);
1762	if (err < 0)
1763		goto err_reg_all;
1764
1765	err = __devinet_sysctl_register(net, "default", dflt);
1766	if (err < 0)
1767		goto err_reg_dflt;
1768
1769	err = -ENOMEM;
1770	forw_hdr = register_net_sysctl_table(net, net_ipv4_path, tbl);
1771	if (forw_hdr == NULL)
1772		goto err_reg_ctl;
1773	net->ipv4.forw_hdr = forw_hdr;
1774#endif
1775
1776	net->ipv4.devconf_all = all;
1777	net->ipv4.devconf_dflt = dflt;
1778	return 0;
1779
1780#ifdef CONFIG_SYSCTL
1781err_reg_ctl:
1782	__devinet_sysctl_unregister(dflt);
1783err_reg_dflt:
1784	__devinet_sysctl_unregister(all);
1785err_reg_all:
1786	if (tbl != ctl_forward_entry)
1787		kfree(tbl);
1788err_alloc_ctl:
1789#endif
1790	if (dflt != &ipv4_devconf_dflt)
1791		kfree(dflt);
1792err_alloc_dflt:
1793	if (all != &ipv4_devconf)
1794		kfree(all);
1795err_alloc_all:
1796	return err;
1797}
1798
1799static __net_exit void devinet_exit_net(struct net *net)
1800{
1801#ifdef CONFIG_SYSCTL
1802	struct ctl_table *tbl;
1803
1804	tbl = net->ipv4.forw_hdr->ctl_table_arg;
1805	unregister_net_sysctl_table(net->ipv4.forw_hdr);
1806	__devinet_sysctl_unregister(net->ipv4.devconf_dflt);
1807	__devinet_sysctl_unregister(net->ipv4.devconf_all);
1808	kfree(tbl);
1809#endif
1810	kfree(net->ipv4.devconf_dflt);
1811	kfree(net->ipv4.devconf_all);
1812}
1813
1814static __net_initdata struct pernet_operations devinet_ops = {
1815	.init = devinet_init_net,
1816	.exit = devinet_exit_net,
1817};
1818
1819static struct rtnl_af_ops inet_af_ops = {
1820	.family		  = AF_INET,
1821	.fill_link_af	  = inet_fill_link_af,
1822	.get_link_af_size = inet_get_link_af_size,
1823	.validate_link_af = inet_validate_link_af,
1824	.set_link_af	  = inet_set_link_af,
1825};
1826
1827void __init devinet_init(void)
1828{
1829	int i;
1830
1831	for (i = 0; i < IN4_ADDR_HSIZE; i++)
1832		INIT_HLIST_HEAD(&inet_addr_lst[i]);
1833
1834	register_pernet_subsys(&devinet_ops);
1835
1836	register_gifconf(PF_INET, inet_gifconf);
1837	register_netdevice_notifier(&ip_netdev_notifier);
1838
1839	rtnl_af_register(&inet_af_ops);
1840
1841	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
1842	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
1843	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
1844}
1845
1846