devinet.c revision 15e473046cb6e5d18a4d0057e61d76315230382b
1/*
2 *	NET3	IP device support routines.
3 *
4 *		This program is free software; you can redistribute it and/or
5 *		modify it under the terms of the GNU General Public License
6 *		as published by the Free Software Foundation; either version
7 *		2 of the License, or (at your option) any later version.
8 *
9 *	Derived from the IP parts of dev.c 1.0.19
10 * 		Authors:	Ross Biro
11 *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 *				Mark Evans, <evansmp@uhura.aston.ac.uk>
13 *
14 *	Additional Authors:
15 *		Alan Cox, <gw4pts@gw4pts.ampr.org>
16 *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17 *
18 *	Changes:
19 *		Alexey Kuznetsov:	pa_* fields are replaced with ifaddr
20 *					lists.
21 *		Cyrus Durgin:		updated for kmod
22 *		Matthias Andree:	in devinet_ioctl, compare label and
23 *					address (4.4BSD alias style support),
24 *					fall back to comparing just the label
25 *					if no match found.
26 */
27
28
29#include <asm/uaccess.h>
30#include <linux/bitops.h>
31#include <linux/capability.h>
32#include <linux/module.h>
33#include <linux/types.h>
34#include <linux/kernel.h>
35#include <linux/string.h>
36#include <linux/mm.h>
37#include <linux/socket.h>
38#include <linux/sockios.h>
39#include <linux/in.h>
40#include <linux/errno.h>
41#include <linux/interrupt.h>
42#include <linux/if_addr.h>
43#include <linux/if_ether.h>
44#include <linux/inet.h>
45#include <linux/netdevice.h>
46#include <linux/etherdevice.h>
47#include <linux/skbuff.h>
48#include <linux/init.h>
49#include <linux/notifier.h>
50#include <linux/inetdevice.h>
51#include <linux/igmp.h>
52#include <linux/slab.h>
53#include <linux/hash.h>
54#ifdef CONFIG_SYSCTL
55#include <linux/sysctl.h>
56#endif
57#include <linux/kmod.h>
58
59#include <net/arp.h>
60#include <net/ip.h>
61#include <net/route.h>
62#include <net/ip_fib.h>
63#include <net/rtnetlink.h>
64#include <net/net_namespace.h>
65
66#include "fib_lookup.h"
67
68static struct ipv4_devconf ipv4_devconf = {
69	.data = {
70		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
71		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
72		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
73		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
74	},
75};
76
77static struct ipv4_devconf ipv4_devconf_dflt = {
78	.data = {
79		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
80		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
81		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
82		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
83		[IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
84	},
85};
86
87#define IPV4_DEVCONF_DFLT(net, attr) \
88	IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
89
90static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
91	[IFA_LOCAL]     	= { .type = NLA_U32 },
92	[IFA_ADDRESS]   	= { .type = NLA_U32 },
93	[IFA_BROADCAST] 	= { .type = NLA_U32 },
94	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
95};
96
97#define IN4_ADDR_HSIZE_SHIFT	8
98#define IN4_ADDR_HSIZE		(1U << IN4_ADDR_HSIZE_SHIFT)
99
100static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
101static DEFINE_SPINLOCK(inet_addr_hash_lock);
102
103static u32 inet_addr_hash(struct net *net, __be32 addr)
104{
105	u32 val = (__force u32) addr ^ net_hash_mix(net);
106
107	return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
108}
109
110static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
111{
112	u32 hash = inet_addr_hash(net, ifa->ifa_local);
113
114	spin_lock(&inet_addr_hash_lock);
115	hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
116	spin_unlock(&inet_addr_hash_lock);
117}
118
119static void inet_hash_remove(struct in_ifaddr *ifa)
120{
121	spin_lock(&inet_addr_hash_lock);
122	hlist_del_init_rcu(&ifa->hash);
123	spin_unlock(&inet_addr_hash_lock);
124}
125
126/**
127 * __ip_dev_find - find the first device with a given source address.
128 * @net: the net namespace
129 * @addr: the source address
130 * @devref: if true, take a reference on the found device
131 *
132 * If a caller uses devref=false, it should be protected by RCU, or RTNL
133 */
134struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
135{
136	u32 hash = inet_addr_hash(net, addr);
137	struct net_device *result = NULL;
138	struct in_ifaddr *ifa;
139	struct hlist_node *node;
140
141	rcu_read_lock();
142	hlist_for_each_entry_rcu(ifa, node, &inet_addr_lst[hash], hash) {
143		if (ifa->ifa_local == addr) {
144			struct net_device *dev = ifa->ifa_dev->dev;
145
146			if (!net_eq(dev_net(dev), net))
147				continue;
148			result = dev;
149			break;
150		}
151	}
152	if (!result) {
153		struct flowi4 fl4 = { .daddr = addr };
154		struct fib_result res = { 0 };
155		struct fib_table *local;
156
157		/* Fallback to FIB local table so that communication
158		 * over loopback subnets work.
159		 */
160		local = fib_get_table(net, RT_TABLE_LOCAL);
161		if (local &&
162		    !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
163		    res.type == RTN_LOCAL)
164			result = FIB_RES_DEV(res);
165	}
166	if (result && devref)
167		dev_hold(result);
168	rcu_read_unlock();
169	return result;
170}
171EXPORT_SYMBOL(__ip_dev_find);
172
173static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
174
175static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
176static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
177			 int destroy);
178#ifdef CONFIG_SYSCTL
179static void devinet_sysctl_register(struct in_device *idev);
180static void devinet_sysctl_unregister(struct in_device *idev);
181#else
182static void devinet_sysctl_register(struct in_device *idev)
183{
184}
185static void devinet_sysctl_unregister(struct in_device *idev)
186{
187}
188#endif
189
190/* Locks all the inet devices. */
191
192static struct in_ifaddr *inet_alloc_ifa(void)
193{
194	return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
195}
196
197static void inet_rcu_free_ifa(struct rcu_head *head)
198{
199	struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
200	if (ifa->ifa_dev)
201		in_dev_put(ifa->ifa_dev);
202	kfree(ifa);
203}
204
205static void inet_free_ifa(struct in_ifaddr *ifa)
206{
207	call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
208}
209
210void in_dev_finish_destroy(struct in_device *idev)
211{
212	struct net_device *dev = idev->dev;
213
214	WARN_ON(idev->ifa_list);
215	WARN_ON(idev->mc_list);
216#ifdef NET_REFCNT_DEBUG
217	pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
218#endif
219	dev_put(dev);
220	if (!idev->dead)
221		pr_err("Freeing alive in_device %p\n", idev);
222	else
223		kfree(idev);
224}
225EXPORT_SYMBOL(in_dev_finish_destroy);
226
227static struct in_device *inetdev_init(struct net_device *dev)
228{
229	struct in_device *in_dev;
230
231	ASSERT_RTNL();
232
233	in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
234	if (!in_dev)
235		goto out;
236	memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
237			sizeof(in_dev->cnf));
238	in_dev->cnf.sysctl = NULL;
239	in_dev->dev = dev;
240	in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
241	if (!in_dev->arp_parms)
242		goto out_kfree;
243	if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
244		dev_disable_lro(dev);
245	/* Reference in_dev->dev */
246	dev_hold(dev);
247	/* Account for reference dev->ip_ptr (below) */
248	in_dev_hold(in_dev);
249
250	devinet_sysctl_register(in_dev);
251	ip_mc_init_dev(in_dev);
252	if (dev->flags & IFF_UP)
253		ip_mc_up(in_dev);
254
255	/* we can receive as soon as ip_ptr is set -- do this last */
256	rcu_assign_pointer(dev->ip_ptr, in_dev);
257out:
258	return in_dev;
259out_kfree:
260	kfree(in_dev);
261	in_dev = NULL;
262	goto out;
263}
264
265static void in_dev_rcu_put(struct rcu_head *head)
266{
267	struct in_device *idev = container_of(head, struct in_device, rcu_head);
268	in_dev_put(idev);
269}
270
271static void inetdev_destroy(struct in_device *in_dev)
272{
273	struct in_ifaddr *ifa;
274	struct net_device *dev;
275
276	ASSERT_RTNL();
277
278	dev = in_dev->dev;
279
280	in_dev->dead = 1;
281
282	ip_mc_destroy_dev(in_dev);
283
284	while ((ifa = in_dev->ifa_list) != NULL) {
285		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
286		inet_free_ifa(ifa);
287	}
288
289	RCU_INIT_POINTER(dev->ip_ptr, NULL);
290
291	devinet_sysctl_unregister(in_dev);
292	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
293	arp_ifdown(dev);
294
295	call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
296}
297
298int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
299{
300	rcu_read_lock();
301	for_primary_ifa(in_dev) {
302		if (inet_ifa_match(a, ifa)) {
303			if (!b || inet_ifa_match(b, ifa)) {
304				rcu_read_unlock();
305				return 1;
306			}
307		}
308	} endfor_ifa(in_dev);
309	rcu_read_unlock();
310	return 0;
311}
312
313static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
314			 int destroy, struct nlmsghdr *nlh, u32 portid)
315{
316	struct in_ifaddr *promote = NULL;
317	struct in_ifaddr *ifa, *ifa1 = *ifap;
318	struct in_ifaddr *last_prim = in_dev->ifa_list;
319	struct in_ifaddr *prev_prom = NULL;
320	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
321
322	ASSERT_RTNL();
323
324	/* 1. Deleting primary ifaddr forces deletion all secondaries
325	 * unless alias promotion is set
326	 **/
327
328	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
329		struct in_ifaddr **ifap1 = &ifa1->ifa_next;
330
331		while ((ifa = *ifap1) != NULL) {
332			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
333			    ifa1->ifa_scope <= ifa->ifa_scope)
334				last_prim = ifa;
335
336			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
337			    ifa1->ifa_mask != ifa->ifa_mask ||
338			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
339				ifap1 = &ifa->ifa_next;
340				prev_prom = ifa;
341				continue;
342			}
343
344			if (!do_promote) {
345				inet_hash_remove(ifa);
346				*ifap1 = ifa->ifa_next;
347
348				rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
349				blocking_notifier_call_chain(&inetaddr_chain,
350						NETDEV_DOWN, ifa);
351				inet_free_ifa(ifa);
352			} else {
353				promote = ifa;
354				break;
355			}
356		}
357	}
358
359	/* On promotion all secondaries from subnet are changing
360	 * the primary IP, we must remove all their routes silently
361	 * and later to add them back with new prefsrc. Do this
362	 * while all addresses are on the device list.
363	 */
364	for (ifa = promote; ifa; ifa = ifa->ifa_next) {
365		if (ifa1->ifa_mask == ifa->ifa_mask &&
366		    inet_ifa_match(ifa1->ifa_address, ifa))
367			fib_del_ifaddr(ifa, ifa1);
368	}
369
370	/* 2. Unlink it */
371
372	*ifap = ifa1->ifa_next;
373	inet_hash_remove(ifa1);
374
375	/* 3. Announce address deletion */
376
377	/* Send message first, then call notifier.
378	   At first sight, FIB update triggered by notifier
379	   will refer to already deleted ifaddr, that could confuse
380	   netlink listeners. It is not true: look, gated sees
381	   that route deleted and if it still thinks that ifaddr
382	   is valid, it will try to restore deleted routes... Grr.
383	   So that, this order is correct.
384	 */
385	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
386	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
387
388	if (promote) {
389		struct in_ifaddr *next_sec = promote->ifa_next;
390
391		if (prev_prom) {
392			prev_prom->ifa_next = promote->ifa_next;
393			promote->ifa_next = last_prim->ifa_next;
394			last_prim->ifa_next = promote;
395		}
396
397		promote->ifa_flags &= ~IFA_F_SECONDARY;
398		rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
399		blocking_notifier_call_chain(&inetaddr_chain,
400				NETDEV_UP, promote);
401		for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
402			if (ifa1->ifa_mask != ifa->ifa_mask ||
403			    !inet_ifa_match(ifa1->ifa_address, ifa))
404					continue;
405			fib_add_ifaddr(ifa);
406		}
407
408	}
409	if (destroy)
410		inet_free_ifa(ifa1);
411}
412
413static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
414			 int destroy)
415{
416	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
417}
418
419static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
420			     u32 portid)
421{
422	struct in_device *in_dev = ifa->ifa_dev;
423	struct in_ifaddr *ifa1, **ifap, **last_primary;
424
425	ASSERT_RTNL();
426
427	if (!ifa->ifa_local) {
428		inet_free_ifa(ifa);
429		return 0;
430	}
431
432	ifa->ifa_flags &= ~IFA_F_SECONDARY;
433	last_primary = &in_dev->ifa_list;
434
435	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
436	     ifap = &ifa1->ifa_next) {
437		if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
438		    ifa->ifa_scope <= ifa1->ifa_scope)
439			last_primary = &ifa1->ifa_next;
440		if (ifa1->ifa_mask == ifa->ifa_mask &&
441		    inet_ifa_match(ifa1->ifa_address, ifa)) {
442			if (ifa1->ifa_local == ifa->ifa_local) {
443				inet_free_ifa(ifa);
444				return -EEXIST;
445			}
446			if (ifa1->ifa_scope != ifa->ifa_scope) {
447				inet_free_ifa(ifa);
448				return -EINVAL;
449			}
450			ifa->ifa_flags |= IFA_F_SECONDARY;
451		}
452	}
453
454	if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
455		net_srandom(ifa->ifa_local);
456		ifap = last_primary;
457	}
458
459	ifa->ifa_next = *ifap;
460	*ifap = ifa;
461
462	inet_hash_insert(dev_net(in_dev->dev), ifa);
463
464	/* Send message first, then call notifier.
465	   Notifier will trigger FIB update, so that
466	   listeners of netlink will know about new ifaddr */
467	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
468	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
469
470	return 0;
471}
472
473static int inet_insert_ifa(struct in_ifaddr *ifa)
474{
475	return __inet_insert_ifa(ifa, NULL, 0);
476}
477
478static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
479{
480	struct in_device *in_dev = __in_dev_get_rtnl(dev);
481
482	ASSERT_RTNL();
483
484	if (!in_dev) {
485		inet_free_ifa(ifa);
486		return -ENOBUFS;
487	}
488	ipv4_devconf_setall(in_dev);
489	if (ifa->ifa_dev != in_dev) {
490		WARN_ON(ifa->ifa_dev);
491		in_dev_hold(in_dev);
492		ifa->ifa_dev = in_dev;
493	}
494	if (ipv4_is_loopback(ifa->ifa_local))
495		ifa->ifa_scope = RT_SCOPE_HOST;
496	return inet_insert_ifa(ifa);
497}
498
499/* Caller must hold RCU or RTNL :
500 * We dont take a reference on found in_device
501 */
502struct in_device *inetdev_by_index(struct net *net, int ifindex)
503{
504	struct net_device *dev;
505	struct in_device *in_dev = NULL;
506
507	rcu_read_lock();
508	dev = dev_get_by_index_rcu(net, ifindex);
509	if (dev)
510		in_dev = rcu_dereference_rtnl(dev->ip_ptr);
511	rcu_read_unlock();
512	return in_dev;
513}
514EXPORT_SYMBOL(inetdev_by_index);
515
516/* Called only from RTNL semaphored context. No locks. */
517
518struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
519				    __be32 mask)
520{
521	ASSERT_RTNL();
522
523	for_primary_ifa(in_dev) {
524		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
525			return ifa;
526	} endfor_ifa(in_dev);
527	return NULL;
528}
529
530static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
531{
532	struct net *net = sock_net(skb->sk);
533	struct nlattr *tb[IFA_MAX+1];
534	struct in_device *in_dev;
535	struct ifaddrmsg *ifm;
536	struct in_ifaddr *ifa, **ifap;
537	int err = -EINVAL;
538
539	ASSERT_RTNL();
540
541	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
542	if (err < 0)
543		goto errout;
544
545	ifm = nlmsg_data(nlh);
546	in_dev = inetdev_by_index(net, ifm->ifa_index);
547	if (in_dev == NULL) {
548		err = -ENODEV;
549		goto errout;
550	}
551
552	for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
553	     ifap = &ifa->ifa_next) {
554		if (tb[IFA_LOCAL] &&
555		    ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
556			continue;
557
558		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
559			continue;
560
561		if (tb[IFA_ADDRESS] &&
562		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
563		    !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
564			continue;
565
566		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
567		return 0;
568	}
569
570	err = -EADDRNOTAVAIL;
571errout:
572	return err;
573}
574
575static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
576{
577	struct nlattr *tb[IFA_MAX+1];
578	struct in_ifaddr *ifa;
579	struct ifaddrmsg *ifm;
580	struct net_device *dev;
581	struct in_device *in_dev;
582	int err;
583
584	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
585	if (err < 0)
586		goto errout;
587
588	ifm = nlmsg_data(nlh);
589	err = -EINVAL;
590	if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
591		goto errout;
592
593	dev = __dev_get_by_index(net, ifm->ifa_index);
594	err = -ENODEV;
595	if (dev == NULL)
596		goto errout;
597
598	in_dev = __in_dev_get_rtnl(dev);
599	err = -ENOBUFS;
600	if (in_dev == NULL)
601		goto errout;
602
603	ifa = inet_alloc_ifa();
604	if (ifa == NULL)
605		/*
606		 * A potential indev allocation can be left alive, it stays
607		 * assigned to its device and is destroy with it.
608		 */
609		goto errout;
610
611	ipv4_devconf_setall(in_dev);
612	in_dev_hold(in_dev);
613
614	if (tb[IFA_ADDRESS] == NULL)
615		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
616
617	INIT_HLIST_NODE(&ifa->hash);
618	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
619	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
620	ifa->ifa_flags = ifm->ifa_flags;
621	ifa->ifa_scope = ifm->ifa_scope;
622	ifa->ifa_dev = in_dev;
623
624	ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
625	ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
626
627	if (tb[IFA_BROADCAST])
628		ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
629
630	if (tb[IFA_LABEL])
631		nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
632	else
633		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
634
635	return ifa;
636
637errout:
638	return ERR_PTR(err);
639}
640
641static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
642{
643	struct net *net = sock_net(skb->sk);
644	struct in_ifaddr *ifa;
645
646	ASSERT_RTNL();
647
648	ifa = rtm_to_ifaddr(net, nlh);
649	if (IS_ERR(ifa))
650		return PTR_ERR(ifa);
651
652	return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid);
653}
654
655/*
656 *	Determine a default network mask, based on the IP address.
657 */
658
659static int inet_abc_len(__be32 addr)
660{
661	int rc = -1;	/* Something else, probably a multicast. */
662
663	if (ipv4_is_zeronet(addr))
664		rc = 0;
665	else {
666		__u32 haddr = ntohl(addr);
667
668		if (IN_CLASSA(haddr))
669			rc = 8;
670		else if (IN_CLASSB(haddr))
671			rc = 16;
672		else if (IN_CLASSC(haddr))
673			rc = 24;
674	}
675
676	return rc;
677}
678
679
680int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
681{
682	struct ifreq ifr;
683	struct sockaddr_in sin_orig;
684	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
685	struct in_device *in_dev;
686	struct in_ifaddr **ifap = NULL;
687	struct in_ifaddr *ifa = NULL;
688	struct net_device *dev;
689	char *colon;
690	int ret = -EFAULT;
691	int tryaddrmatch = 0;
692
693	/*
694	 *	Fetch the caller's info block into kernel space
695	 */
696
697	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
698		goto out;
699	ifr.ifr_name[IFNAMSIZ - 1] = 0;
700
701	/* save original address for comparison */
702	memcpy(&sin_orig, sin, sizeof(*sin));
703
704	colon = strchr(ifr.ifr_name, ':');
705	if (colon)
706		*colon = 0;
707
708	dev_load(net, ifr.ifr_name);
709
710	switch (cmd) {
711	case SIOCGIFADDR:	/* Get interface address */
712	case SIOCGIFBRDADDR:	/* Get the broadcast address */
713	case SIOCGIFDSTADDR:	/* Get the destination address */
714	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
715		/* Note that these ioctls will not sleep,
716		   so that we do not impose a lock.
717		   One day we will be forced to put shlock here (I mean SMP)
718		 */
719		tryaddrmatch = (sin_orig.sin_family == AF_INET);
720		memset(sin, 0, sizeof(*sin));
721		sin->sin_family = AF_INET;
722		break;
723
724	case SIOCSIFFLAGS:
725		ret = -EACCES;
726		if (!capable(CAP_NET_ADMIN))
727			goto out;
728		break;
729	case SIOCSIFADDR:	/* Set interface address (and family) */
730	case SIOCSIFBRDADDR:	/* Set the broadcast address */
731	case SIOCSIFDSTADDR:	/* Set the destination address */
732	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
733		ret = -EACCES;
734		if (!capable(CAP_NET_ADMIN))
735			goto out;
736		ret = -EINVAL;
737		if (sin->sin_family != AF_INET)
738			goto out;
739		break;
740	default:
741		ret = -EINVAL;
742		goto out;
743	}
744
745	rtnl_lock();
746
747	ret = -ENODEV;
748	dev = __dev_get_by_name(net, ifr.ifr_name);
749	if (!dev)
750		goto done;
751
752	if (colon)
753		*colon = ':';
754
755	in_dev = __in_dev_get_rtnl(dev);
756	if (in_dev) {
757		if (tryaddrmatch) {
758			/* Matthias Andree */
759			/* compare label and address (4.4BSD style) */
760			/* note: we only do this for a limited set of ioctls
761			   and only if the original address family was AF_INET.
762			   This is checked above. */
763			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
764			     ifap = &ifa->ifa_next) {
765				if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
766				    sin_orig.sin_addr.s_addr ==
767							ifa->ifa_local) {
768					break; /* found */
769				}
770			}
771		}
772		/* we didn't get a match, maybe the application is
773		   4.3BSD-style and passed in junk so we fall back to
774		   comparing just the label */
775		if (!ifa) {
776			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
777			     ifap = &ifa->ifa_next)
778				if (!strcmp(ifr.ifr_name, ifa->ifa_label))
779					break;
780		}
781	}
782
783	ret = -EADDRNOTAVAIL;
784	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
785		goto done;
786
787	switch (cmd) {
788	case SIOCGIFADDR:	/* Get interface address */
789		sin->sin_addr.s_addr = ifa->ifa_local;
790		goto rarok;
791
792	case SIOCGIFBRDADDR:	/* Get the broadcast address */
793		sin->sin_addr.s_addr = ifa->ifa_broadcast;
794		goto rarok;
795
796	case SIOCGIFDSTADDR:	/* Get the destination address */
797		sin->sin_addr.s_addr = ifa->ifa_address;
798		goto rarok;
799
800	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
801		sin->sin_addr.s_addr = ifa->ifa_mask;
802		goto rarok;
803
804	case SIOCSIFFLAGS:
805		if (colon) {
806			ret = -EADDRNOTAVAIL;
807			if (!ifa)
808				break;
809			ret = 0;
810			if (!(ifr.ifr_flags & IFF_UP))
811				inet_del_ifa(in_dev, ifap, 1);
812			break;
813		}
814		ret = dev_change_flags(dev, ifr.ifr_flags);
815		break;
816
817	case SIOCSIFADDR:	/* Set interface address (and family) */
818		ret = -EINVAL;
819		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
820			break;
821
822		if (!ifa) {
823			ret = -ENOBUFS;
824			ifa = inet_alloc_ifa();
825			INIT_HLIST_NODE(&ifa->hash);
826			if (!ifa)
827				break;
828			if (colon)
829				memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
830			else
831				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
832		} else {
833			ret = 0;
834			if (ifa->ifa_local == sin->sin_addr.s_addr)
835				break;
836			inet_del_ifa(in_dev, ifap, 0);
837			ifa->ifa_broadcast = 0;
838			ifa->ifa_scope = 0;
839		}
840
841		ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
842
843		if (!(dev->flags & IFF_POINTOPOINT)) {
844			ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
845			ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
846			if ((dev->flags & IFF_BROADCAST) &&
847			    ifa->ifa_prefixlen < 31)
848				ifa->ifa_broadcast = ifa->ifa_address |
849						     ~ifa->ifa_mask;
850		} else {
851			ifa->ifa_prefixlen = 32;
852			ifa->ifa_mask = inet_make_mask(32);
853		}
854		ret = inet_set_ifa(dev, ifa);
855		break;
856
857	case SIOCSIFBRDADDR:	/* Set the broadcast address */
858		ret = 0;
859		if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
860			inet_del_ifa(in_dev, ifap, 0);
861			ifa->ifa_broadcast = sin->sin_addr.s_addr;
862			inet_insert_ifa(ifa);
863		}
864		break;
865
866	case SIOCSIFDSTADDR:	/* Set the destination address */
867		ret = 0;
868		if (ifa->ifa_address == sin->sin_addr.s_addr)
869			break;
870		ret = -EINVAL;
871		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
872			break;
873		ret = 0;
874		inet_del_ifa(in_dev, ifap, 0);
875		ifa->ifa_address = sin->sin_addr.s_addr;
876		inet_insert_ifa(ifa);
877		break;
878
879	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
880
881		/*
882		 *	The mask we set must be legal.
883		 */
884		ret = -EINVAL;
885		if (bad_mask(sin->sin_addr.s_addr, 0))
886			break;
887		ret = 0;
888		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
889			__be32 old_mask = ifa->ifa_mask;
890			inet_del_ifa(in_dev, ifap, 0);
891			ifa->ifa_mask = sin->sin_addr.s_addr;
892			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
893
894			/* See if current broadcast address matches
895			 * with current netmask, then recalculate
896			 * the broadcast address. Otherwise it's a
897			 * funny address, so don't touch it since
898			 * the user seems to know what (s)he's doing...
899			 */
900			if ((dev->flags & IFF_BROADCAST) &&
901			    (ifa->ifa_prefixlen < 31) &&
902			    (ifa->ifa_broadcast ==
903			     (ifa->ifa_local|~old_mask))) {
904				ifa->ifa_broadcast = (ifa->ifa_local |
905						      ~sin->sin_addr.s_addr);
906			}
907			inet_insert_ifa(ifa);
908		}
909		break;
910	}
911done:
912	rtnl_unlock();
913out:
914	return ret;
915rarok:
916	rtnl_unlock();
917	ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
918	goto out;
919}
920
921static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
922{
923	struct in_device *in_dev = __in_dev_get_rtnl(dev);
924	struct in_ifaddr *ifa;
925	struct ifreq ifr;
926	int done = 0;
927
928	if (!in_dev)
929		goto out;
930
931	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
932		if (!buf) {
933			done += sizeof(ifr);
934			continue;
935		}
936		if (len < (int) sizeof(ifr))
937			break;
938		memset(&ifr, 0, sizeof(struct ifreq));
939		if (ifa->ifa_label)
940			strcpy(ifr.ifr_name, ifa->ifa_label);
941		else
942			strcpy(ifr.ifr_name, dev->name);
943
944		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
945		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
946								ifa->ifa_local;
947
948		if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
949			done = -EFAULT;
950			break;
951		}
952		buf  += sizeof(struct ifreq);
953		len  -= sizeof(struct ifreq);
954		done += sizeof(struct ifreq);
955	}
956out:
957	return done;
958}
959
960__be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
961{
962	__be32 addr = 0;
963	struct in_device *in_dev;
964	struct net *net = dev_net(dev);
965
966	rcu_read_lock();
967	in_dev = __in_dev_get_rcu(dev);
968	if (!in_dev)
969		goto no_in_dev;
970
971	for_primary_ifa(in_dev) {
972		if (ifa->ifa_scope > scope)
973			continue;
974		if (!dst || inet_ifa_match(dst, ifa)) {
975			addr = ifa->ifa_local;
976			break;
977		}
978		if (!addr)
979			addr = ifa->ifa_local;
980	} endfor_ifa(in_dev);
981
982	if (addr)
983		goto out_unlock;
984no_in_dev:
985
986	/* Not loopback addresses on loopback should be preferred
987	   in this case. It is importnat that lo is the first interface
988	   in dev_base list.
989	 */
990	for_each_netdev_rcu(net, dev) {
991		in_dev = __in_dev_get_rcu(dev);
992		if (!in_dev)
993			continue;
994
995		for_primary_ifa(in_dev) {
996			if (ifa->ifa_scope != RT_SCOPE_LINK &&
997			    ifa->ifa_scope <= scope) {
998				addr = ifa->ifa_local;
999				goto out_unlock;
1000			}
1001		} endfor_ifa(in_dev);
1002	}
1003out_unlock:
1004	rcu_read_unlock();
1005	return addr;
1006}
1007EXPORT_SYMBOL(inet_select_addr);
1008
1009static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1010			      __be32 local, int scope)
1011{
1012	int same = 0;
1013	__be32 addr = 0;
1014
1015	for_ifa(in_dev) {
1016		if (!addr &&
1017		    (local == ifa->ifa_local || !local) &&
1018		    ifa->ifa_scope <= scope) {
1019			addr = ifa->ifa_local;
1020			if (same)
1021				break;
1022		}
1023		if (!same) {
1024			same = (!local || inet_ifa_match(local, ifa)) &&
1025				(!dst || inet_ifa_match(dst, ifa));
1026			if (same && addr) {
1027				if (local || !dst)
1028					break;
1029				/* Is the selected addr into dst subnet? */
1030				if (inet_ifa_match(addr, ifa))
1031					break;
1032				/* No, then can we use new local src? */
1033				if (ifa->ifa_scope <= scope) {
1034					addr = ifa->ifa_local;
1035					break;
1036				}
1037				/* search for large dst subnet for addr */
1038				same = 0;
1039			}
1040		}
1041	} endfor_ifa(in_dev);
1042
1043	return same ? addr : 0;
1044}
1045
1046/*
1047 * Confirm that local IP address exists using wildcards:
1048 * - in_dev: only on this interface, 0=any interface
1049 * - dst: only in the same subnet as dst, 0=any dst
1050 * - local: address, 0=autoselect the local address
1051 * - scope: maximum allowed scope value for the local address
1052 */
1053__be32 inet_confirm_addr(struct in_device *in_dev,
1054			 __be32 dst, __be32 local, int scope)
1055{
1056	__be32 addr = 0;
1057	struct net_device *dev;
1058	struct net *net;
1059
1060	if (scope != RT_SCOPE_LINK)
1061		return confirm_addr_indev(in_dev, dst, local, scope);
1062
1063	net = dev_net(in_dev->dev);
1064	rcu_read_lock();
1065	for_each_netdev_rcu(net, dev) {
1066		in_dev = __in_dev_get_rcu(dev);
1067		if (in_dev) {
1068			addr = confirm_addr_indev(in_dev, dst, local, scope);
1069			if (addr)
1070				break;
1071		}
1072	}
1073	rcu_read_unlock();
1074
1075	return addr;
1076}
1077EXPORT_SYMBOL(inet_confirm_addr);
1078
1079/*
1080 *	Device notifier
1081 */
1082
1083int register_inetaddr_notifier(struct notifier_block *nb)
1084{
1085	return blocking_notifier_chain_register(&inetaddr_chain, nb);
1086}
1087EXPORT_SYMBOL(register_inetaddr_notifier);
1088
1089int unregister_inetaddr_notifier(struct notifier_block *nb)
1090{
1091	return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1092}
1093EXPORT_SYMBOL(unregister_inetaddr_notifier);
1094
1095/* Rename ifa_labels for a device name change. Make some effort to preserve
1096 * existing alias numbering and to create unique labels if possible.
1097*/
1098static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1099{
1100	struct in_ifaddr *ifa;
1101	int named = 0;
1102
1103	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1104		char old[IFNAMSIZ], *dot;
1105
1106		memcpy(old, ifa->ifa_label, IFNAMSIZ);
1107		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1108		if (named++ == 0)
1109			goto skip;
1110		dot = strchr(old, ':');
1111		if (dot == NULL) {
1112			sprintf(old, ":%d", named);
1113			dot = old;
1114		}
1115		if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1116			strcat(ifa->ifa_label, dot);
1117		else
1118			strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1119skip:
1120		rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1121	}
1122}
1123
1124static bool inetdev_valid_mtu(unsigned int mtu)
1125{
1126	return mtu >= 68;
1127}
1128
1129static void inetdev_send_gratuitous_arp(struct net_device *dev,
1130					struct in_device *in_dev)
1131
1132{
1133	struct in_ifaddr *ifa;
1134
1135	for (ifa = in_dev->ifa_list; ifa;
1136	     ifa = ifa->ifa_next) {
1137		arp_send(ARPOP_REQUEST, ETH_P_ARP,
1138			 ifa->ifa_local, dev,
1139			 ifa->ifa_local, NULL,
1140			 dev->dev_addr, NULL);
1141	}
1142}
1143
1144/* Called only under RTNL semaphore */
1145
1146static int inetdev_event(struct notifier_block *this, unsigned long event,
1147			 void *ptr)
1148{
1149	struct net_device *dev = ptr;
1150	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1151
1152	ASSERT_RTNL();
1153
1154	if (!in_dev) {
1155		if (event == NETDEV_REGISTER) {
1156			in_dev = inetdev_init(dev);
1157			if (!in_dev)
1158				return notifier_from_errno(-ENOMEM);
1159			if (dev->flags & IFF_LOOPBACK) {
1160				IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1161				IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1162			}
1163		} else if (event == NETDEV_CHANGEMTU) {
1164			/* Re-enabling IP */
1165			if (inetdev_valid_mtu(dev->mtu))
1166				in_dev = inetdev_init(dev);
1167		}
1168		goto out;
1169	}
1170
1171	switch (event) {
1172	case NETDEV_REGISTER:
1173		pr_debug("%s: bug\n", __func__);
1174		RCU_INIT_POINTER(dev->ip_ptr, NULL);
1175		break;
1176	case NETDEV_UP:
1177		if (!inetdev_valid_mtu(dev->mtu))
1178			break;
1179		if (dev->flags & IFF_LOOPBACK) {
1180			struct in_ifaddr *ifa = inet_alloc_ifa();
1181
1182			if (ifa) {
1183				INIT_HLIST_NODE(&ifa->hash);
1184				ifa->ifa_local =
1185				  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1186				ifa->ifa_prefixlen = 8;
1187				ifa->ifa_mask = inet_make_mask(8);
1188				in_dev_hold(in_dev);
1189				ifa->ifa_dev = in_dev;
1190				ifa->ifa_scope = RT_SCOPE_HOST;
1191				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1192				inet_insert_ifa(ifa);
1193			}
1194		}
1195		ip_mc_up(in_dev);
1196		/* fall through */
1197	case NETDEV_CHANGEADDR:
1198		if (!IN_DEV_ARP_NOTIFY(in_dev))
1199			break;
1200		/* fall through */
1201	case NETDEV_NOTIFY_PEERS:
1202		/* Send gratuitous ARP to notify of link change */
1203		inetdev_send_gratuitous_arp(dev, in_dev);
1204		break;
1205	case NETDEV_DOWN:
1206		ip_mc_down(in_dev);
1207		break;
1208	case NETDEV_PRE_TYPE_CHANGE:
1209		ip_mc_unmap(in_dev);
1210		break;
1211	case NETDEV_POST_TYPE_CHANGE:
1212		ip_mc_remap(in_dev);
1213		break;
1214	case NETDEV_CHANGEMTU:
1215		if (inetdev_valid_mtu(dev->mtu))
1216			break;
1217		/* disable IP when MTU is not enough */
1218	case NETDEV_UNREGISTER:
1219		inetdev_destroy(in_dev);
1220		break;
1221	case NETDEV_CHANGENAME:
1222		/* Do not notify about label change, this event is
1223		 * not interesting to applications using netlink.
1224		 */
1225		inetdev_changename(dev, in_dev);
1226
1227		devinet_sysctl_unregister(in_dev);
1228		devinet_sysctl_register(in_dev);
1229		break;
1230	}
1231out:
1232	return NOTIFY_DONE;
1233}
1234
1235static struct notifier_block ip_netdev_notifier = {
1236	.notifier_call = inetdev_event,
1237};
1238
1239static size_t inet_nlmsg_size(void)
1240{
1241	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1242	       + nla_total_size(4) /* IFA_ADDRESS */
1243	       + nla_total_size(4) /* IFA_LOCAL */
1244	       + nla_total_size(4) /* IFA_BROADCAST */
1245	       + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1246}
1247
1248static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1249			    u32 portid, u32 seq, int event, unsigned int flags)
1250{
1251	struct ifaddrmsg *ifm;
1252	struct nlmsghdr  *nlh;
1253
1254	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
1255	if (nlh == NULL)
1256		return -EMSGSIZE;
1257
1258	ifm = nlmsg_data(nlh);
1259	ifm->ifa_family = AF_INET;
1260	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1261	ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
1262	ifm->ifa_scope = ifa->ifa_scope;
1263	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1264
1265	if ((ifa->ifa_address &&
1266	     nla_put_be32(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1267	    (ifa->ifa_local &&
1268	     nla_put_be32(skb, IFA_LOCAL, ifa->ifa_local)) ||
1269	    (ifa->ifa_broadcast &&
1270	     nla_put_be32(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1271	    (ifa->ifa_label[0] &&
1272	     nla_put_string(skb, IFA_LABEL, ifa->ifa_label)))
1273		goto nla_put_failure;
1274
1275	return nlmsg_end(skb, nlh);
1276
1277nla_put_failure:
1278	nlmsg_cancel(skb, nlh);
1279	return -EMSGSIZE;
1280}
1281
1282static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1283{
1284	struct net *net = sock_net(skb->sk);
1285	int h, s_h;
1286	int idx, s_idx;
1287	int ip_idx, s_ip_idx;
1288	struct net_device *dev;
1289	struct in_device *in_dev;
1290	struct in_ifaddr *ifa;
1291	struct hlist_head *head;
1292	struct hlist_node *node;
1293
1294	s_h = cb->args[0];
1295	s_idx = idx = cb->args[1];
1296	s_ip_idx = ip_idx = cb->args[2];
1297
1298	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1299		idx = 0;
1300		head = &net->dev_index_head[h];
1301		rcu_read_lock();
1302		hlist_for_each_entry_rcu(dev, node, head, index_hlist) {
1303			if (idx < s_idx)
1304				goto cont;
1305			if (h > s_h || idx > s_idx)
1306				s_ip_idx = 0;
1307			in_dev = __in_dev_get_rcu(dev);
1308			if (!in_dev)
1309				goto cont;
1310
1311			for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1312			     ifa = ifa->ifa_next, ip_idx++) {
1313				if (ip_idx < s_ip_idx)
1314					continue;
1315				if (inet_fill_ifaddr(skb, ifa,
1316					     NETLINK_CB(cb->skb).portid,
1317					     cb->nlh->nlmsg_seq,
1318					     RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1319					rcu_read_unlock();
1320					goto done;
1321				}
1322			}
1323cont:
1324			idx++;
1325		}
1326		rcu_read_unlock();
1327	}
1328
1329done:
1330	cb->args[0] = h;
1331	cb->args[1] = idx;
1332	cb->args[2] = ip_idx;
1333
1334	return skb->len;
1335}
1336
1337static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1338		      u32 portid)
1339{
1340	struct sk_buff *skb;
1341	u32 seq = nlh ? nlh->nlmsg_seq : 0;
1342	int err = -ENOBUFS;
1343	struct net *net;
1344
1345	net = dev_net(ifa->ifa_dev->dev);
1346	skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1347	if (skb == NULL)
1348		goto errout;
1349
1350	err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0);
1351	if (err < 0) {
1352		/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1353		WARN_ON(err == -EMSGSIZE);
1354		kfree_skb(skb);
1355		goto errout;
1356	}
1357	rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1358	return;
1359errout:
1360	if (err < 0)
1361		rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1362}
1363
1364static size_t inet_get_link_af_size(const struct net_device *dev)
1365{
1366	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1367
1368	if (!in_dev)
1369		return 0;
1370
1371	return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1372}
1373
1374static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
1375{
1376	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1377	struct nlattr *nla;
1378	int i;
1379
1380	if (!in_dev)
1381		return -ENODATA;
1382
1383	nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1384	if (nla == NULL)
1385		return -EMSGSIZE;
1386
1387	for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1388		((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1389
1390	return 0;
1391}
1392
1393static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1394	[IFLA_INET_CONF]	= { .type = NLA_NESTED },
1395};
1396
1397static int inet_validate_link_af(const struct net_device *dev,
1398				 const struct nlattr *nla)
1399{
1400	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1401	int err, rem;
1402
1403	if (dev && !__in_dev_get_rtnl(dev))
1404		return -EAFNOSUPPORT;
1405
1406	err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
1407	if (err < 0)
1408		return err;
1409
1410	if (tb[IFLA_INET_CONF]) {
1411		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1412			int cfgid = nla_type(a);
1413
1414			if (nla_len(a) < 4)
1415				return -EINVAL;
1416
1417			if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1418				return -EINVAL;
1419		}
1420	}
1421
1422	return 0;
1423}
1424
1425static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1426{
1427	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1428	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1429	int rem;
1430
1431	if (!in_dev)
1432		return -EAFNOSUPPORT;
1433
1434	if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
1435		BUG();
1436
1437	if (tb[IFLA_INET_CONF]) {
1438		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1439			ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1440	}
1441
1442	return 0;
1443}
1444
1445#ifdef CONFIG_SYSCTL
1446
1447static void devinet_copy_dflt_conf(struct net *net, int i)
1448{
1449	struct net_device *dev;
1450
1451	rcu_read_lock();
1452	for_each_netdev_rcu(net, dev) {
1453		struct in_device *in_dev;
1454
1455		in_dev = __in_dev_get_rcu(dev);
1456		if (in_dev && !test_bit(i, in_dev->cnf.state))
1457			in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1458	}
1459	rcu_read_unlock();
1460}
1461
1462/* called with RTNL locked */
1463static void inet_forward_change(struct net *net)
1464{
1465	struct net_device *dev;
1466	int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1467
1468	IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1469	IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1470
1471	for_each_netdev(net, dev) {
1472		struct in_device *in_dev;
1473		if (on)
1474			dev_disable_lro(dev);
1475		rcu_read_lock();
1476		in_dev = __in_dev_get_rcu(dev);
1477		if (in_dev)
1478			IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1479		rcu_read_unlock();
1480	}
1481}
1482
1483static int devinet_conf_proc(ctl_table *ctl, int write,
1484			     void __user *buffer,
1485			     size_t *lenp, loff_t *ppos)
1486{
1487	int old_value = *(int *)ctl->data;
1488	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1489	int new_value = *(int *)ctl->data;
1490
1491	if (write) {
1492		struct ipv4_devconf *cnf = ctl->extra1;
1493		struct net *net = ctl->extra2;
1494		int i = (int *)ctl->data - cnf->data;
1495
1496		set_bit(i, cnf->state);
1497
1498		if (cnf == net->ipv4.devconf_dflt)
1499			devinet_copy_dflt_conf(net, i);
1500		if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
1501		    i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
1502			if ((new_value == 0) && (old_value != 0))
1503				rt_cache_flush(net);
1504	}
1505
1506	return ret;
1507}
1508
1509static int devinet_sysctl_forward(ctl_table *ctl, int write,
1510				  void __user *buffer,
1511				  size_t *lenp, loff_t *ppos)
1512{
1513	int *valp = ctl->data;
1514	int val = *valp;
1515	loff_t pos = *ppos;
1516	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1517
1518	if (write && *valp != val) {
1519		struct net *net = ctl->extra2;
1520
1521		if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
1522			if (!rtnl_trylock()) {
1523				/* Restore the original values before restarting */
1524				*valp = val;
1525				*ppos = pos;
1526				return restart_syscall();
1527			}
1528			if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
1529				inet_forward_change(net);
1530			} else if (*valp) {
1531				struct ipv4_devconf *cnf = ctl->extra1;
1532				struct in_device *idev =
1533					container_of(cnf, struct in_device, cnf);
1534				dev_disable_lro(idev->dev);
1535			}
1536			rtnl_unlock();
1537			rt_cache_flush(net);
1538		}
1539	}
1540
1541	return ret;
1542}
1543
1544static int ipv4_doint_and_flush(ctl_table *ctl, int write,
1545				void __user *buffer,
1546				size_t *lenp, loff_t *ppos)
1547{
1548	int *valp = ctl->data;
1549	int val = *valp;
1550	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1551	struct net *net = ctl->extra2;
1552
1553	if (write && *valp != val)
1554		rt_cache_flush(net);
1555
1556	return ret;
1557}
1558
1559#define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
1560	{ \
1561		.procname	= name, \
1562		.data		= ipv4_devconf.data + \
1563				  IPV4_DEVCONF_ ## attr - 1, \
1564		.maxlen		= sizeof(int), \
1565		.mode		= mval, \
1566		.proc_handler	= proc, \
1567		.extra1		= &ipv4_devconf, \
1568	}
1569
1570#define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
1571	DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
1572
1573#define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
1574	DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
1575
1576#define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
1577	DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
1578
1579#define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
1580	DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
1581
1582static struct devinet_sysctl_table {
1583	struct ctl_table_header *sysctl_header;
1584	struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
1585} devinet_sysctl = {
1586	.devinet_vars = {
1587		DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
1588					     devinet_sysctl_forward),
1589		DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
1590
1591		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
1592		DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
1593		DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
1594		DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
1595		DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
1596		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
1597					"accept_source_route"),
1598		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
1599		DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
1600		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
1601		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
1602		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
1603		DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
1604		DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
1605		DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
1606		DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
1607		DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
1608		DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
1609		DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
1610		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
1611
1612		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
1613		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
1614		DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
1615					      "force_igmp_version"),
1616		DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
1617					      "promote_secondaries"),
1618		DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
1619					      "route_localnet"),
1620	},
1621};
1622
1623static int __devinet_sysctl_register(struct net *net, char *dev_name,
1624					struct ipv4_devconf *p)
1625{
1626	int i;
1627	struct devinet_sysctl_table *t;
1628	char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
1629
1630	t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
1631	if (!t)
1632		goto out;
1633
1634	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
1635		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
1636		t->devinet_vars[i].extra1 = p;
1637		t->devinet_vars[i].extra2 = net;
1638	}
1639
1640	snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
1641
1642	t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
1643	if (!t->sysctl_header)
1644		goto free;
1645
1646	p->sysctl = t;
1647	return 0;
1648
1649free:
1650	kfree(t);
1651out:
1652	return -ENOBUFS;
1653}
1654
1655static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
1656{
1657	struct devinet_sysctl_table *t = cnf->sysctl;
1658
1659	if (t == NULL)
1660		return;
1661
1662	cnf->sysctl = NULL;
1663	unregister_net_sysctl_table(t->sysctl_header);
1664	kfree(t);
1665}
1666
1667static void devinet_sysctl_register(struct in_device *idev)
1668{
1669	neigh_sysctl_register(idev->dev, idev->arp_parms, "ipv4", NULL);
1670	__devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
1671					&idev->cnf);
1672}
1673
1674static void devinet_sysctl_unregister(struct in_device *idev)
1675{
1676	__devinet_sysctl_unregister(&idev->cnf);
1677	neigh_sysctl_unregister(idev->arp_parms);
1678}
1679
1680static struct ctl_table ctl_forward_entry[] = {
1681	{
1682		.procname	= "ip_forward",
1683		.data		= &ipv4_devconf.data[
1684					IPV4_DEVCONF_FORWARDING - 1],
1685		.maxlen		= sizeof(int),
1686		.mode		= 0644,
1687		.proc_handler	= devinet_sysctl_forward,
1688		.extra1		= &ipv4_devconf,
1689		.extra2		= &init_net,
1690	},
1691	{ },
1692};
1693#endif
1694
1695static __net_init int devinet_init_net(struct net *net)
1696{
1697	int err;
1698	struct ipv4_devconf *all, *dflt;
1699#ifdef CONFIG_SYSCTL
1700	struct ctl_table *tbl = ctl_forward_entry;
1701	struct ctl_table_header *forw_hdr;
1702#endif
1703
1704	err = -ENOMEM;
1705	all = &ipv4_devconf;
1706	dflt = &ipv4_devconf_dflt;
1707
1708	if (!net_eq(net, &init_net)) {
1709		all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
1710		if (all == NULL)
1711			goto err_alloc_all;
1712
1713		dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
1714		if (dflt == NULL)
1715			goto err_alloc_dflt;
1716
1717#ifdef CONFIG_SYSCTL
1718		tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
1719		if (tbl == NULL)
1720			goto err_alloc_ctl;
1721
1722		tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
1723		tbl[0].extra1 = all;
1724		tbl[0].extra2 = net;
1725#endif
1726	}
1727
1728#ifdef CONFIG_SYSCTL
1729	err = __devinet_sysctl_register(net, "all", all);
1730	if (err < 0)
1731		goto err_reg_all;
1732
1733	err = __devinet_sysctl_register(net, "default", dflt);
1734	if (err < 0)
1735		goto err_reg_dflt;
1736
1737	err = -ENOMEM;
1738	forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
1739	if (forw_hdr == NULL)
1740		goto err_reg_ctl;
1741	net->ipv4.forw_hdr = forw_hdr;
1742#endif
1743
1744	net->ipv4.devconf_all = all;
1745	net->ipv4.devconf_dflt = dflt;
1746	return 0;
1747
1748#ifdef CONFIG_SYSCTL
1749err_reg_ctl:
1750	__devinet_sysctl_unregister(dflt);
1751err_reg_dflt:
1752	__devinet_sysctl_unregister(all);
1753err_reg_all:
1754	if (tbl != ctl_forward_entry)
1755		kfree(tbl);
1756err_alloc_ctl:
1757#endif
1758	if (dflt != &ipv4_devconf_dflt)
1759		kfree(dflt);
1760err_alloc_dflt:
1761	if (all != &ipv4_devconf)
1762		kfree(all);
1763err_alloc_all:
1764	return err;
1765}
1766
1767static __net_exit void devinet_exit_net(struct net *net)
1768{
1769#ifdef CONFIG_SYSCTL
1770	struct ctl_table *tbl;
1771
1772	tbl = net->ipv4.forw_hdr->ctl_table_arg;
1773	unregister_net_sysctl_table(net->ipv4.forw_hdr);
1774	__devinet_sysctl_unregister(net->ipv4.devconf_dflt);
1775	__devinet_sysctl_unregister(net->ipv4.devconf_all);
1776	kfree(tbl);
1777#endif
1778	kfree(net->ipv4.devconf_dflt);
1779	kfree(net->ipv4.devconf_all);
1780}
1781
1782static __net_initdata struct pernet_operations devinet_ops = {
1783	.init = devinet_init_net,
1784	.exit = devinet_exit_net,
1785};
1786
1787static struct rtnl_af_ops inet_af_ops = {
1788	.family		  = AF_INET,
1789	.fill_link_af	  = inet_fill_link_af,
1790	.get_link_af_size = inet_get_link_af_size,
1791	.validate_link_af = inet_validate_link_af,
1792	.set_link_af	  = inet_set_link_af,
1793};
1794
1795void __init devinet_init(void)
1796{
1797	int i;
1798
1799	for (i = 0; i < IN4_ADDR_HSIZE; i++)
1800		INIT_HLIST_HEAD(&inet_addr_lst[i]);
1801
1802	register_pernet_subsys(&devinet_ops);
1803
1804	register_gifconf(PF_INET, inet_gifconf);
1805	register_netdevice_notifier(&ip_netdev_notifier);
1806
1807	rtnl_af_register(&inet_af_ops);
1808
1809	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
1810	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
1811	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
1812}
1813
1814