devinet.c revision 2d230e2b2c3111cf4a11619f60dcd158ae84e3ab
1/*
2 *	NET3	IP device support routines.
3 *
4 *		This program is free software; you can redistribute it and/or
5 *		modify it under the terms of the GNU General Public License
6 *		as published by the Free Software Foundation; either version
7 *		2 of the License, or (at your option) any later version.
8 *
9 *	Derived from the IP parts of dev.c 1.0.19
10 * 		Authors:	Ross Biro
11 *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 *				Mark Evans, <evansmp@uhura.aston.ac.uk>
13 *
14 *	Additional Authors:
15 *		Alan Cox, <gw4pts@gw4pts.ampr.org>
16 *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17 *
18 *	Changes:
19 *		Alexey Kuznetsov:	pa_* fields are replaced with ifaddr
20 *					lists.
21 *		Cyrus Durgin:		updated for kmod
22 *		Matthias Andree:	in devinet_ioctl, compare label and
23 *					address (4.4BSD alias style support),
24 *					fall back to comparing just the label
25 *					if no match found.
26 */
27
28
29#include <asm/uaccess.h>
30#include <asm/system.h>
31#include <linux/bitops.h>
32#include <linux/capability.h>
33#include <linux/module.h>
34#include <linux/types.h>
35#include <linux/kernel.h>
36#include <linux/string.h>
37#include <linux/mm.h>
38#include <linux/socket.h>
39#include <linux/sockios.h>
40#include <linux/in.h>
41#include <linux/errno.h>
42#include <linux/interrupt.h>
43#include <linux/if_addr.h>
44#include <linux/if_ether.h>
45#include <linux/inet.h>
46#include <linux/netdevice.h>
47#include <linux/etherdevice.h>
48#include <linux/skbuff.h>
49#include <linux/init.h>
50#include <linux/notifier.h>
51#include <linux/inetdevice.h>
52#include <linux/igmp.h>
53#include <linux/slab.h>
54#include <linux/hash.h>
55#ifdef CONFIG_SYSCTL
56#include <linux/sysctl.h>
57#endif
58#include <linux/kmod.h>
59
60#include <net/arp.h>
61#include <net/ip.h>
62#include <net/route.h>
63#include <net/ip_fib.h>
64#include <net/rtnetlink.h>
65#include <net/net_namespace.h>
66
67static struct ipv4_devconf ipv4_devconf = {
68	.data = {
69		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
70		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
71		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
72		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
73	},
74};
75
76static struct ipv4_devconf ipv4_devconf_dflt = {
77	.data = {
78		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
79		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
80		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
81		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
82		[IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
83	},
84};
85
86#define IPV4_DEVCONF_DFLT(net, attr) \
87	IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
88
89static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
90	[IFA_LOCAL]     	= { .type = NLA_U32 },
91	[IFA_ADDRESS]   	= { .type = NLA_U32 },
92	[IFA_BROADCAST] 	= { .type = NLA_U32 },
93	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
94};
95
96/* inet_addr_hash's shifting is dependent upon this IN4_ADDR_HSIZE
97 * value.  So if you change this define, make appropriate changes to
98 * inet_addr_hash as well.
99 */
100#define IN4_ADDR_HSIZE	256
101static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
102static DEFINE_SPINLOCK(inet_addr_hash_lock);
103
104static inline unsigned int inet_addr_hash(struct net *net, __be32 addr)
105{
106	u32 val = (__force u32) addr ^ hash_ptr(net, 8);
107
108	return ((val ^ (val >> 8) ^ (val >> 16) ^ (val >> 24)) &
109		(IN4_ADDR_HSIZE - 1));
110}
111
112static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
113{
114	unsigned int hash = inet_addr_hash(net, ifa->ifa_local);
115
116	spin_lock(&inet_addr_hash_lock);
117	hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
118	spin_unlock(&inet_addr_hash_lock);
119}
120
121static void inet_hash_remove(struct in_ifaddr *ifa)
122{
123	spin_lock(&inet_addr_hash_lock);
124	hlist_del_init_rcu(&ifa->hash);
125	spin_unlock(&inet_addr_hash_lock);
126}
127
128/**
129 * __ip_dev_find - find the first device with a given source address.
130 * @net: the net namespace
131 * @addr: the source address
132 * @devref: if true, take a reference on the found device
133 *
134 * If a caller uses devref=false, it should be protected by RCU, or RTNL
135 */
136struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
137{
138	unsigned int hash = inet_addr_hash(net, addr);
139	struct net_device *result = NULL;
140	struct in_ifaddr *ifa;
141	struct hlist_node *node;
142
143	rcu_read_lock();
144	hlist_for_each_entry_rcu(ifa, node, &inet_addr_lst[hash], hash) {
145		struct net_device *dev = ifa->ifa_dev->dev;
146
147		if (!net_eq(dev_net(dev), net))
148			continue;
149		if (ifa->ifa_local == addr) {
150			result = dev;
151			break;
152		}
153	}
154	if (result && devref)
155		dev_hold(result);
156	rcu_read_unlock();
157	return result;
158}
159EXPORT_SYMBOL(__ip_dev_find);
160
161static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
162
163static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
164static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
165			 int destroy);
166#ifdef CONFIG_SYSCTL
167static void devinet_sysctl_register(struct in_device *idev);
168static void devinet_sysctl_unregister(struct in_device *idev);
169#else
170static inline void devinet_sysctl_register(struct in_device *idev)
171{
172}
173static inline void devinet_sysctl_unregister(struct in_device *idev)
174{
175}
176#endif
177
178/* Locks all the inet devices. */
179
180static struct in_ifaddr *inet_alloc_ifa(void)
181{
182	return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
183}
184
185static void inet_rcu_free_ifa(struct rcu_head *head)
186{
187	struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
188	if (ifa->ifa_dev)
189		in_dev_put(ifa->ifa_dev);
190	kfree(ifa);
191}
192
193static inline void inet_free_ifa(struct in_ifaddr *ifa)
194{
195	call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
196}
197
198void in_dev_finish_destroy(struct in_device *idev)
199{
200	struct net_device *dev = idev->dev;
201
202	WARN_ON(idev->ifa_list);
203	WARN_ON(idev->mc_list);
204#ifdef NET_REFCNT_DEBUG
205	printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n",
206	       idev, dev ? dev->name : "NIL");
207#endif
208	dev_put(dev);
209	if (!idev->dead)
210		pr_err("Freeing alive in_device %p\n", idev);
211	else
212		kfree(idev);
213}
214EXPORT_SYMBOL(in_dev_finish_destroy);
215
216static struct in_device *inetdev_init(struct net_device *dev)
217{
218	struct in_device *in_dev;
219
220	ASSERT_RTNL();
221
222	in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
223	if (!in_dev)
224		goto out;
225	memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
226			sizeof(in_dev->cnf));
227	in_dev->cnf.sysctl = NULL;
228	in_dev->dev = dev;
229	in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
230	if (!in_dev->arp_parms)
231		goto out_kfree;
232	if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
233		dev_disable_lro(dev);
234	/* Reference in_dev->dev */
235	dev_hold(dev);
236	/* Account for reference dev->ip_ptr (below) */
237	in_dev_hold(in_dev);
238
239	devinet_sysctl_register(in_dev);
240	ip_mc_init_dev(in_dev);
241	if (dev->flags & IFF_UP)
242		ip_mc_up(in_dev);
243
244	/* we can receive as soon as ip_ptr is set -- do this last */
245	rcu_assign_pointer(dev->ip_ptr, in_dev);
246out:
247	return in_dev;
248out_kfree:
249	kfree(in_dev);
250	in_dev = NULL;
251	goto out;
252}
253
254static void in_dev_rcu_put(struct rcu_head *head)
255{
256	struct in_device *idev = container_of(head, struct in_device, rcu_head);
257	in_dev_put(idev);
258}
259
260static void inetdev_destroy(struct in_device *in_dev)
261{
262	struct in_ifaddr *ifa;
263	struct net_device *dev;
264
265	ASSERT_RTNL();
266
267	dev = in_dev->dev;
268
269	in_dev->dead = 1;
270
271	ip_mc_destroy_dev(in_dev);
272
273	while ((ifa = in_dev->ifa_list) != NULL) {
274		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
275		inet_free_ifa(ifa);
276	}
277
278	rcu_assign_pointer(dev->ip_ptr, NULL);
279
280	devinet_sysctl_unregister(in_dev);
281	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
282	arp_ifdown(dev);
283
284	call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
285}
286
287int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
288{
289	rcu_read_lock();
290	for_primary_ifa(in_dev) {
291		if (inet_ifa_match(a, ifa)) {
292			if (!b || inet_ifa_match(b, ifa)) {
293				rcu_read_unlock();
294				return 1;
295			}
296		}
297	} endfor_ifa(in_dev);
298	rcu_read_unlock();
299	return 0;
300}
301
302static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
303			 int destroy, struct nlmsghdr *nlh, u32 pid)
304{
305	struct in_ifaddr *promote = NULL;
306	struct in_ifaddr *ifa, *ifa1 = *ifap;
307	struct in_ifaddr *last_prim = in_dev->ifa_list;
308	struct in_ifaddr *prev_prom = NULL;
309	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
310
311	ASSERT_RTNL();
312
313	/* 1. Deleting primary ifaddr forces deletion all secondaries
314	 * unless alias promotion is set
315	 **/
316
317	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
318		struct in_ifaddr **ifap1 = &ifa1->ifa_next;
319
320		while ((ifa = *ifap1) != NULL) {
321			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
322			    ifa1->ifa_scope <= ifa->ifa_scope)
323				last_prim = ifa;
324
325			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
326			    ifa1->ifa_mask != ifa->ifa_mask ||
327			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
328				ifap1 = &ifa->ifa_next;
329				prev_prom = ifa;
330				continue;
331			}
332
333			if (!do_promote) {
334				inet_hash_remove(ifa);
335				*ifap1 = ifa->ifa_next;
336
337				rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid);
338				blocking_notifier_call_chain(&inetaddr_chain,
339						NETDEV_DOWN, ifa);
340				inet_free_ifa(ifa);
341			} else {
342				promote = ifa;
343				break;
344			}
345		}
346	}
347
348	/* On promotion all secondaries from subnet are changing
349	 * the primary IP, we must remove all their routes silently
350	 * and later to add them back with new prefsrc. Do this
351	 * while all addresses are on the device list.
352	 */
353	for (ifa = promote; ifa; ifa = ifa->ifa_next) {
354		if (ifa1->ifa_mask == ifa->ifa_mask &&
355		    inet_ifa_match(ifa1->ifa_address, ifa))
356			fib_del_ifaddr(ifa, ifa1);
357	}
358
359	/* 2. Unlink it */
360
361	*ifap = ifa1->ifa_next;
362	inet_hash_remove(ifa1);
363
364	/* 3. Announce address deletion */
365
366	/* Send message first, then call notifier.
367	   At first sight, FIB update triggered by notifier
368	   will refer to already deleted ifaddr, that could confuse
369	   netlink listeners. It is not true: look, gated sees
370	   that route deleted and if it still thinks that ifaddr
371	   is valid, it will try to restore deleted routes... Grr.
372	   So that, this order is correct.
373	 */
374	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid);
375	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
376
377	if (promote) {
378
379		if (prev_prom) {
380			prev_prom->ifa_next = promote->ifa_next;
381			promote->ifa_next = last_prim->ifa_next;
382			last_prim->ifa_next = promote;
383		}
384
385		promote->ifa_flags &= ~IFA_F_SECONDARY;
386		rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
387		blocking_notifier_call_chain(&inetaddr_chain,
388				NETDEV_UP, promote);
389		for (ifa = promote->ifa_next; ifa; ifa = ifa->ifa_next) {
390			if (ifa1->ifa_mask != ifa->ifa_mask ||
391			    !inet_ifa_match(ifa1->ifa_address, ifa))
392					continue;
393			fib_add_ifaddr(ifa);
394		}
395
396	}
397	if (destroy)
398		inet_free_ifa(ifa1);
399}
400
401static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
402			 int destroy)
403{
404	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
405}
406
407static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
408			     u32 pid)
409{
410	struct in_device *in_dev = ifa->ifa_dev;
411	struct in_ifaddr *ifa1, **ifap, **last_primary;
412
413	ASSERT_RTNL();
414
415	if (!ifa->ifa_local) {
416		inet_free_ifa(ifa);
417		return 0;
418	}
419
420	ifa->ifa_flags &= ~IFA_F_SECONDARY;
421	last_primary = &in_dev->ifa_list;
422
423	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
424	     ifap = &ifa1->ifa_next) {
425		if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
426		    ifa->ifa_scope <= ifa1->ifa_scope)
427			last_primary = &ifa1->ifa_next;
428		if (ifa1->ifa_mask == ifa->ifa_mask &&
429		    inet_ifa_match(ifa1->ifa_address, ifa)) {
430			if (ifa1->ifa_local == ifa->ifa_local) {
431				inet_free_ifa(ifa);
432				return -EEXIST;
433			}
434			if (ifa1->ifa_scope != ifa->ifa_scope) {
435				inet_free_ifa(ifa);
436				return -EINVAL;
437			}
438			ifa->ifa_flags |= IFA_F_SECONDARY;
439		}
440	}
441
442	if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
443		net_srandom(ifa->ifa_local);
444		ifap = last_primary;
445	}
446
447	ifa->ifa_next = *ifap;
448	*ifap = ifa;
449
450	inet_hash_insert(dev_net(in_dev->dev), ifa);
451
452	/* Send message first, then call notifier.
453	   Notifier will trigger FIB update, so that
454	   listeners of netlink will know about new ifaddr */
455	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid);
456	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
457
458	return 0;
459}
460
461static int inet_insert_ifa(struct in_ifaddr *ifa)
462{
463	return __inet_insert_ifa(ifa, NULL, 0);
464}
465
466static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
467{
468	struct in_device *in_dev = __in_dev_get_rtnl(dev);
469
470	ASSERT_RTNL();
471
472	if (!in_dev) {
473		inet_free_ifa(ifa);
474		return -ENOBUFS;
475	}
476	ipv4_devconf_setall(in_dev);
477	if (ifa->ifa_dev != in_dev) {
478		WARN_ON(ifa->ifa_dev);
479		in_dev_hold(in_dev);
480		ifa->ifa_dev = in_dev;
481	}
482	if (ipv4_is_loopback(ifa->ifa_local))
483		ifa->ifa_scope = RT_SCOPE_HOST;
484	return inet_insert_ifa(ifa);
485}
486
487/* Caller must hold RCU or RTNL :
488 * We dont take a reference on found in_device
489 */
490struct in_device *inetdev_by_index(struct net *net, int ifindex)
491{
492	struct net_device *dev;
493	struct in_device *in_dev = NULL;
494
495	rcu_read_lock();
496	dev = dev_get_by_index_rcu(net, ifindex);
497	if (dev)
498		in_dev = rcu_dereference_rtnl(dev->ip_ptr);
499	rcu_read_unlock();
500	return in_dev;
501}
502EXPORT_SYMBOL(inetdev_by_index);
503
504/* Called only from RTNL semaphored context. No locks. */
505
506struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
507				    __be32 mask)
508{
509	ASSERT_RTNL();
510
511	for_primary_ifa(in_dev) {
512		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
513			return ifa;
514	} endfor_ifa(in_dev);
515	return NULL;
516}
517
518static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
519{
520	struct net *net = sock_net(skb->sk);
521	struct nlattr *tb[IFA_MAX+1];
522	struct in_device *in_dev;
523	struct ifaddrmsg *ifm;
524	struct in_ifaddr *ifa, **ifap;
525	int err = -EINVAL;
526
527	ASSERT_RTNL();
528
529	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
530	if (err < 0)
531		goto errout;
532
533	ifm = nlmsg_data(nlh);
534	in_dev = inetdev_by_index(net, ifm->ifa_index);
535	if (in_dev == NULL) {
536		err = -ENODEV;
537		goto errout;
538	}
539
540	for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
541	     ifap = &ifa->ifa_next) {
542		if (tb[IFA_LOCAL] &&
543		    ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
544			continue;
545
546		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
547			continue;
548
549		if (tb[IFA_ADDRESS] &&
550		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
551		    !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
552			continue;
553
554		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid);
555		return 0;
556	}
557
558	err = -EADDRNOTAVAIL;
559errout:
560	return err;
561}
562
563static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
564{
565	struct nlattr *tb[IFA_MAX+1];
566	struct in_ifaddr *ifa;
567	struct ifaddrmsg *ifm;
568	struct net_device *dev;
569	struct in_device *in_dev;
570	int err;
571
572	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
573	if (err < 0)
574		goto errout;
575
576	ifm = nlmsg_data(nlh);
577	err = -EINVAL;
578	if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
579		goto errout;
580
581	dev = __dev_get_by_index(net, ifm->ifa_index);
582	err = -ENODEV;
583	if (dev == NULL)
584		goto errout;
585
586	in_dev = __in_dev_get_rtnl(dev);
587	err = -ENOBUFS;
588	if (in_dev == NULL)
589		goto errout;
590
591	ifa = inet_alloc_ifa();
592	if (ifa == NULL)
593		/*
594		 * A potential indev allocation can be left alive, it stays
595		 * assigned to its device and is destroy with it.
596		 */
597		goto errout;
598
599	ipv4_devconf_setall(in_dev);
600	in_dev_hold(in_dev);
601
602	if (tb[IFA_ADDRESS] == NULL)
603		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
604
605	INIT_HLIST_NODE(&ifa->hash);
606	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
607	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
608	ifa->ifa_flags = ifm->ifa_flags;
609	ifa->ifa_scope = ifm->ifa_scope;
610	ifa->ifa_dev = in_dev;
611
612	ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
613	ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
614
615	if (tb[IFA_BROADCAST])
616		ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
617
618	if (tb[IFA_LABEL])
619		nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
620	else
621		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
622
623	return ifa;
624
625errout:
626	return ERR_PTR(err);
627}
628
629static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
630{
631	struct net *net = sock_net(skb->sk);
632	struct in_ifaddr *ifa;
633
634	ASSERT_RTNL();
635
636	ifa = rtm_to_ifaddr(net, nlh);
637	if (IS_ERR(ifa))
638		return PTR_ERR(ifa);
639
640	return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid);
641}
642
643/*
644 *	Determine a default network mask, based on the IP address.
645 */
646
647static inline int inet_abc_len(__be32 addr)
648{
649	int rc = -1;	/* Something else, probably a multicast. */
650
651	if (ipv4_is_zeronet(addr))
652		rc = 0;
653	else {
654		__u32 haddr = ntohl(addr);
655
656		if (IN_CLASSA(haddr))
657			rc = 8;
658		else if (IN_CLASSB(haddr))
659			rc = 16;
660		else if (IN_CLASSC(haddr))
661			rc = 24;
662	}
663
664	return rc;
665}
666
667
668int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
669{
670	struct ifreq ifr;
671	struct sockaddr_in sin_orig;
672	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
673	struct in_device *in_dev;
674	struct in_ifaddr **ifap = NULL;
675	struct in_ifaddr *ifa = NULL;
676	struct net_device *dev;
677	char *colon;
678	int ret = -EFAULT;
679	int tryaddrmatch = 0;
680
681	/*
682	 *	Fetch the caller's info block into kernel space
683	 */
684
685	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
686		goto out;
687	ifr.ifr_name[IFNAMSIZ - 1] = 0;
688
689	/* save original address for comparison */
690	memcpy(&sin_orig, sin, sizeof(*sin));
691
692	colon = strchr(ifr.ifr_name, ':');
693	if (colon)
694		*colon = 0;
695
696	dev_load(net, ifr.ifr_name);
697
698	switch (cmd) {
699	case SIOCGIFADDR:	/* Get interface address */
700	case SIOCGIFBRDADDR:	/* Get the broadcast address */
701	case SIOCGIFDSTADDR:	/* Get the destination address */
702	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
703		/* Note that these ioctls will not sleep,
704		   so that we do not impose a lock.
705		   One day we will be forced to put shlock here (I mean SMP)
706		 */
707		tryaddrmatch = (sin_orig.sin_family == AF_INET);
708		memset(sin, 0, sizeof(*sin));
709		sin->sin_family = AF_INET;
710		break;
711
712	case SIOCSIFFLAGS:
713		ret = -EACCES;
714		if (!capable(CAP_NET_ADMIN))
715			goto out;
716		break;
717	case SIOCSIFADDR:	/* Set interface address (and family) */
718	case SIOCSIFBRDADDR:	/* Set the broadcast address */
719	case SIOCSIFDSTADDR:	/* Set the destination address */
720	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
721		ret = -EACCES;
722		if (!capable(CAP_NET_ADMIN))
723			goto out;
724		ret = -EINVAL;
725		if (sin->sin_family != AF_INET)
726			goto out;
727		break;
728	default:
729		ret = -EINVAL;
730		goto out;
731	}
732
733	rtnl_lock();
734
735	ret = -ENODEV;
736	dev = __dev_get_by_name(net, ifr.ifr_name);
737	if (!dev)
738		goto done;
739
740	if (colon)
741		*colon = ':';
742
743	in_dev = __in_dev_get_rtnl(dev);
744	if (in_dev) {
745		if (tryaddrmatch) {
746			/* Matthias Andree */
747			/* compare label and address (4.4BSD style) */
748			/* note: we only do this for a limited set of ioctls
749			   and only if the original address family was AF_INET.
750			   This is checked above. */
751			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
752			     ifap = &ifa->ifa_next) {
753				if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
754				    sin_orig.sin_addr.s_addr ==
755							ifa->ifa_local) {
756					break; /* found */
757				}
758			}
759		}
760		/* we didn't get a match, maybe the application is
761		   4.3BSD-style and passed in junk so we fall back to
762		   comparing just the label */
763		if (!ifa) {
764			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
765			     ifap = &ifa->ifa_next)
766				if (!strcmp(ifr.ifr_name, ifa->ifa_label))
767					break;
768		}
769	}
770
771	ret = -EADDRNOTAVAIL;
772	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
773		goto done;
774
775	switch (cmd) {
776	case SIOCGIFADDR:	/* Get interface address */
777		sin->sin_addr.s_addr = ifa->ifa_local;
778		goto rarok;
779
780	case SIOCGIFBRDADDR:	/* Get the broadcast address */
781		sin->sin_addr.s_addr = ifa->ifa_broadcast;
782		goto rarok;
783
784	case SIOCGIFDSTADDR:	/* Get the destination address */
785		sin->sin_addr.s_addr = ifa->ifa_address;
786		goto rarok;
787
788	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
789		sin->sin_addr.s_addr = ifa->ifa_mask;
790		goto rarok;
791
792	case SIOCSIFFLAGS:
793		if (colon) {
794			ret = -EADDRNOTAVAIL;
795			if (!ifa)
796				break;
797			ret = 0;
798			if (!(ifr.ifr_flags & IFF_UP))
799				inet_del_ifa(in_dev, ifap, 1);
800			break;
801		}
802		ret = dev_change_flags(dev, ifr.ifr_flags);
803		break;
804
805	case SIOCSIFADDR:	/* Set interface address (and family) */
806		ret = -EINVAL;
807		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
808			break;
809
810		if (!ifa) {
811			ret = -ENOBUFS;
812			ifa = inet_alloc_ifa();
813			INIT_HLIST_NODE(&ifa->hash);
814			if (!ifa)
815				break;
816			if (colon)
817				memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
818			else
819				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
820		} else {
821			ret = 0;
822			if (ifa->ifa_local == sin->sin_addr.s_addr)
823				break;
824			inet_del_ifa(in_dev, ifap, 0);
825			ifa->ifa_broadcast = 0;
826			ifa->ifa_scope = 0;
827		}
828
829		ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
830
831		if (!(dev->flags & IFF_POINTOPOINT)) {
832			ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
833			ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
834			if ((dev->flags & IFF_BROADCAST) &&
835			    ifa->ifa_prefixlen < 31)
836				ifa->ifa_broadcast = ifa->ifa_address |
837						     ~ifa->ifa_mask;
838		} else {
839			ifa->ifa_prefixlen = 32;
840			ifa->ifa_mask = inet_make_mask(32);
841		}
842		ret = inet_set_ifa(dev, ifa);
843		break;
844
845	case SIOCSIFBRDADDR:	/* Set the broadcast address */
846		ret = 0;
847		if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
848			inet_del_ifa(in_dev, ifap, 0);
849			ifa->ifa_broadcast = sin->sin_addr.s_addr;
850			inet_insert_ifa(ifa);
851		}
852		break;
853
854	case SIOCSIFDSTADDR:	/* Set the destination address */
855		ret = 0;
856		if (ifa->ifa_address == sin->sin_addr.s_addr)
857			break;
858		ret = -EINVAL;
859		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
860			break;
861		ret = 0;
862		inet_del_ifa(in_dev, ifap, 0);
863		ifa->ifa_address = sin->sin_addr.s_addr;
864		inet_insert_ifa(ifa);
865		break;
866
867	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
868
869		/*
870		 *	The mask we set must be legal.
871		 */
872		ret = -EINVAL;
873		if (bad_mask(sin->sin_addr.s_addr, 0))
874			break;
875		ret = 0;
876		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
877			__be32 old_mask = ifa->ifa_mask;
878			inet_del_ifa(in_dev, ifap, 0);
879			ifa->ifa_mask = sin->sin_addr.s_addr;
880			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
881
882			/* See if current broadcast address matches
883			 * with current netmask, then recalculate
884			 * the broadcast address. Otherwise it's a
885			 * funny address, so don't touch it since
886			 * the user seems to know what (s)he's doing...
887			 */
888			if ((dev->flags & IFF_BROADCAST) &&
889			    (ifa->ifa_prefixlen < 31) &&
890			    (ifa->ifa_broadcast ==
891			     (ifa->ifa_local|~old_mask))) {
892				ifa->ifa_broadcast = (ifa->ifa_local |
893						      ~sin->sin_addr.s_addr);
894			}
895			inet_insert_ifa(ifa);
896		}
897		break;
898	}
899done:
900	rtnl_unlock();
901out:
902	return ret;
903rarok:
904	rtnl_unlock();
905	ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
906	goto out;
907}
908
909static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
910{
911	struct in_device *in_dev = __in_dev_get_rtnl(dev);
912	struct in_ifaddr *ifa;
913	struct ifreq ifr;
914	int done = 0;
915
916	if (!in_dev)
917		goto out;
918
919	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
920		if (!buf) {
921			done += sizeof(ifr);
922			continue;
923		}
924		if (len < (int) sizeof(ifr))
925			break;
926		memset(&ifr, 0, sizeof(struct ifreq));
927		if (ifa->ifa_label)
928			strcpy(ifr.ifr_name, ifa->ifa_label);
929		else
930			strcpy(ifr.ifr_name, dev->name);
931
932		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
933		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
934								ifa->ifa_local;
935
936		if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
937			done = -EFAULT;
938			break;
939		}
940		buf  += sizeof(struct ifreq);
941		len  -= sizeof(struct ifreq);
942		done += sizeof(struct ifreq);
943	}
944out:
945	return done;
946}
947
948__be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
949{
950	__be32 addr = 0;
951	struct in_device *in_dev;
952	struct net *net = dev_net(dev);
953
954	rcu_read_lock();
955	in_dev = __in_dev_get_rcu(dev);
956	if (!in_dev)
957		goto no_in_dev;
958
959	for_primary_ifa(in_dev) {
960		if (ifa->ifa_scope > scope)
961			continue;
962		if (!dst || inet_ifa_match(dst, ifa)) {
963			addr = ifa->ifa_local;
964			break;
965		}
966		if (!addr)
967			addr = ifa->ifa_local;
968	} endfor_ifa(in_dev);
969
970	if (addr)
971		goto out_unlock;
972no_in_dev:
973
974	/* Not loopback addresses on loopback should be preferred
975	   in this case. It is importnat that lo is the first interface
976	   in dev_base list.
977	 */
978	for_each_netdev_rcu(net, dev) {
979		in_dev = __in_dev_get_rcu(dev);
980		if (!in_dev)
981			continue;
982
983		for_primary_ifa(in_dev) {
984			if (ifa->ifa_scope != RT_SCOPE_LINK &&
985			    ifa->ifa_scope <= scope) {
986				addr = ifa->ifa_local;
987				goto out_unlock;
988			}
989		} endfor_ifa(in_dev);
990	}
991out_unlock:
992	rcu_read_unlock();
993	return addr;
994}
995EXPORT_SYMBOL(inet_select_addr);
996
997static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
998			      __be32 local, int scope)
999{
1000	int same = 0;
1001	__be32 addr = 0;
1002
1003	for_ifa(in_dev) {
1004		if (!addr &&
1005		    (local == ifa->ifa_local || !local) &&
1006		    ifa->ifa_scope <= scope) {
1007			addr = ifa->ifa_local;
1008			if (same)
1009				break;
1010		}
1011		if (!same) {
1012			same = (!local || inet_ifa_match(local, ifa)) &&
1013				(!dst || inet_ifa_match(dst, ifa));
1014			if (same && addr) {
1015				if (local || !dst)
1016					break;
1017				/* Is the selected addr into dst subnet? */
1018				if (inet_ifa_match(addr, ifa))
1019					break;
1020				/* No, then can we use new local src? */
1021				if (ifa->ifa_scope <= scope) {
1022					addr = ifa->ifa_local;
1023					break;
1024				}
1025				/* search for large dst subnet for addr */
1026				same = 0;
1027			}
1028		}
1029	} endfor_ifa(in_dev);
1030
1031	return same ? addr : 0;
1032}
1033
1034/*
1035 * Confirm that local IP address exists using wildcards:
1036 * - in_dev: only on this interface, 0=any interface
1037 * - dst: only in the same subnet as dst, 0=any dst
1038 * - local: address, 0=autoselect the local address
1039 * - scope: maximum allowed scope value for the local address
1040 */
1041__be32 inet_confirm_addr(struct in_device *in_dev,
1042			 __be32 dst, __be32 local, int scope)
1043{
1044	__be32 addr = 0;
1045	struct net_device *dev;
1046	struct net *net;
1047
1048	if (scope != RT_SCOPE_LINK)
1049		return confirm_addr_indev(in_dev, dst, local, scope);
1050
1051	net = dev_net(in_dev->dev);
1052	rcu_read_lock();
1053	for_each_netdev_rcu(net, dev) {
1054		in_dev = __in_dev_get_rcu(dev);
1055		if (in_dev) {
1056			addr = confirm_addr_indev(in_dev, dst, local, scope);
1057			if (addr)
1058				break;
1059		}
1060	}
1061	rcu_read_unlock();
1062
1063	return addr;
1064}
1065
1066/*
1067 *	Device notifier
1068 */
1069
1070int register_inetaddr_notifier(struct notifier_block *nb)
1071{
1072	return blocking_notifier_chain_register(&inetaddr_chain, nb);
1073}
1074EXPORT_SYMBOL(register_inetaddr_notifier);
1075
1076int unregister_inetaddr_notifier(struct notifier_block *nb)
1077{
1078	return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1079}
1080EXPORT_SYMBOL(unregister_inetaddr_notifier);
1081
1082/* Rename ifa_labels for a device name change. Make some effort to preserve
1083 * existing alias numbering and to create unique labels if possible.
1084*/
1085static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1086{
1087	struct in_ifaddr *ifa;
1088	int named = 0;
1089
1090	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1091		char old[IFNAMSIZ], *dot;
1092
1093		memcpy(old, ifa->ifa_label, IFNAMSIZ);
1094		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1095		if (named++ == 0)
1096			goto skip;
1097		dot = strchr(old, ':');
1098		if (dot == NULL) {
1099			sprintf(old, ":%d", named);
1100			dot = old;
1101		}
1102		if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1103			strcat(ifa->ifa_label, dot);
1104		else
1105			strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1106skip:
1107		rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1108	}
1109}
1110
1111static inline bool inetdev_valid_mtu(unsigned mtu)
1112{
1113	return mtu >= 68;
1114}
1115
1116static void inetdev_send_gratuitous_arp(struct net_device *dev,
1117					struct in_device *in_dev)
1118
1119{
1120	struct in_ifaddr *ifa = in_dev->ifa_list;
1121
1122	if (!ifa)
1123		return;
1124
1125	arp_send(ARPOP_REQUEST, ETH_P_ARP,
1126		 ifa->ifa_local, dev,
1127		 ifa->ifa_local, NULL,
1128		 dev->dev_addr, NULL);
1129}
1130
1131/* Called only under RTNL semaphore */
1132
1133static int inetdev_event(struct notifier_block *this, unsigned long event,
1134			 void *ptr)
1135{
1136	struct net_device *dev = ptr;
1137	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1138
1139	ASSERT_RTNL();
1140
1141	if (!in_dev) {
1142		if (event == NETDEV_REGISTER) {
1143			in_dev = inetdev_init(dev);
1144			if (!in_dev)
1145				return notifier_from_errno(-ENOMEM);
1146			if (dev->flags & IFF_LOOPBACK) {
1147				IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1148				IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1149			}
1150		} else if (event == NETDEV_CHANGEMTU) {
1151			/* Re-enabling IP */
1152			if (inetdev_valid_mtu(dev->mtu))
1153				in_dev = inetdev_init(dev);
1154		}
1155		goto out;
1156	}
1157
1158	switch (event) {
1159	case NETDEV_REGISTER:
1160		printk(KERN_DEBUG "inetdev_event: bug\n");
1161		rcu_assign_pointer(dev->ip_ptr, NULL);
1162		break;
1163	case NETDEV_UP:
1164		if (!inetdev_valid_mtu(dev->mtu))
1165			break;
1166		if (dev->flags & IFF_LOOPBACK) {
1167			struct in_ifaddr *ifa = inet_alloc_ifa();
1168
1169			if (ifa) {
1170				INIT_HLIST_NODE(&ifa->hash);
1171				ifa->ifa_local =
1172				  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1173				ifa->ifa_prefixlen = 8;
1174				ifa->ifa_mask = inet_make_mask(8);
1175				in_dev_hold(in_dev);
1176				ifa->ifa_dev = in_dev;
1177				ifa->ifa_scope = RT_SCOPE_HOST;
1178				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1179				inet_insert_ifa(ifa);
1180			}
1181		}
1182		ip_mc_up(in_dev);
1183		/* fall through */
1184	case NETDEV_CHANGEADDR:
1185		if (!IN_DEV_ARP_NOTIFY(in_dev))
1186			break;
1187		/* fall through */
1188	case NETDEV_NOTIFY_PEERS:
1189		/* Send gratuitous ARP to notify of link change */
1190		inetdev_send_gratuitous_arp(dev, in_dev);
1191		break;
1192	case NETDEV_DOWN:
1193		ip_mc_down(in_dev);
1194		break;
1195	case NETDEV_PRE_TYPE_CHANGE:
1196		ip_mc_unmap(in_dev);
1197		break;
1198	case NETDEV_POST_TYPE_CHANGE:
1199		ip_mc_remap(in_dev);
1200		break;
1201	case NETDEV_CHANGEMTU:
1202		if (inetdev_valid_mtu(dev->mtu))
1203			break;
1204		/* disable IP when MTU is not enough */
1205	case NETDEV_UNREGISTER:
1206		inetdev_destroy(in_dev);
1207		break;
1208	case NETDEV_CHANGENAME:
1209		/* Do not notify about label change, this event is
1210		 * not interesting to applications using netlink.
1211		 */
1212		inetdev_changename(dev, in_dev);
1213
1214		devinet_sysctl_unregister(in_dev);
1215		devinet_sysctl_register(in_dev);
1216		break;
1217	}
1218out:
1219	return NOTIFY_DONE;
1220}
1221
1222static struct notifier_block ip_netdev_notifier = {
1223	.notifier_call = inetdev_event,
1224};
1225
1226static inline size_t inet_nlmsg_size(void)
1227{
1228	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1229	       + nla_total_size(4) /* IFA_ADDRESS */
1230	       + nla_total_size(4) /* IFA_LOCAL */
1231	       + nla_total_size(4) /* IFA_BROADCAST */
1232	       + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1233}
1234
1235static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1236			    u32 pid, u32 seq, int event, unsigned int flags)
1237{
1238	struct ifaddrmsg *ifm;
1239	struct nlmsghdr  *nlh;
1240
1241	nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags);
1242	if (nlh == NULL)
1243		return -EMSGSIZE;
1244
1245	ifm = nlmsg_data(nlh);
1246	ifm->ifa_family = AF_INET;
1247	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1248	ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
1249	ifm->ifa_scope = ifa->ifa_scope;
1250	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1251
1252	if (ifa->ifa_address)
1253		NLA_PUT_BE32(skb, IFA_ADDRESS, ifa->ifa_address);
1254
1255	if (ifa->ifa_local)
1256		NLA_PUT_BE32(skb, IFA_LOCAL, ifa->ifa_local);
1257
1258	if (ifa->ifa_broadcast)
1259		NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast);
1260
1261	if (ifa->ifa_label[0])
1262		NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label);
1263
1264	return nlmsg_end(skb, nlh);
1265
1266nla_put_failure:
1267	nlmsg_cancel(skb, nlh);
1268	return -EMSGSIZE;
1269}
1270
1271static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1272{
1273	struct net *net = sock_net(skb->sk);
1274	int h, s_h;
1275	int idx, s_idx;
1276	int ip_idx, s_ip_idx;
1277	struct net_device *dev;
1278	struct in_device *in_dev;
1279	struct in_ifaddr *ifa;
1280	struct hlist_head *head;
1281	struct hlist_node *node;
1282
1283	s_h = cb->args[0];
1284	s_idx = idx = cb->args[1];
1285	s_ip_idx = ip_idx = cb->args[2];
1286
1287	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1288		idx = 0;
1289		head = &net->dev_index_head[h];
1290		rcu_read_lock();
1291		hlist_for_each_entry_rcu(dev, node, head, index_hlist) {
1292			if (idx < s_idx)
1293				goto cont;
1294			if (h > s_h || idx > s_idx)
1295				s_ip_idx = 0;
1296			in_dev = __in_dev_get_rcu(dev);
1297			if (!in_dev)
1298				goto cont;
1299
1300			for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1301			     ifa = ifa->ifa_next, ip_idx++) {
1302				if (ip_idx < s_ip_idx)
1303					continue;
1304				if (inet_fill_ifaddr(skb, ifa,
1305					     NETLINK_CB(cb->skb).pid,
1306					     cb->nlh->nlmsg_seq,
1307					     RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1308					rcu_read_unlock();
1309					goto done;
1310				}
1311			}
1312cont:
1313			idx++;
1314		}
1315		rcu_read_unlock();
1316	}
1317
1318done:
1319	cb->args[0] = h;
1320	cb->args[1] = idx;
1321	cb->args[2] = ip_idx;
1322
1323	return skb->len;
1324}
1325
1326static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1327		      u32 pid)
1328{
1329	struct sk_buff *skb;
1330	u32 seq = nlh ? nlh->nlmsg_seq : 0;
1331	int err = -ENOBUFS;
1332	struct net *net;
1333
1334	net = dev_net(ifa->ifa_dev->dev);
1335	skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1336	if (skb == NULL)
1337		goto errout;
1338
1339	err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0);
1340	if (err < 0) {
1341		/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1342		WARN_ON(err == -EMSGSIZE);
1343		kfree_skb(skb);
1344		goto errout;
1345	}
1346	rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1347	return;
1348errout:
1349	if (err < 0)
1350		rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1351}
1352
1353static size_t inet_get_link_af_size(const struct net_device *dev)
1354{
1355	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1356
1357	if (!in_dev)
1358		return 0;
1359
1360	return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1361}
1362
1363static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
1364{
1365	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1366	struct nlattr *nla;
1367	int i;
1368
1369	if (!in_dev)
1370		return -ENODATA;
1371
1372	nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1373	if (nla == NULL)
1374		return -EMSGSIZE;
1375
1376	for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1377		((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1378
1379	return 0;
1380}
1381
1382static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1383	[IFLA_INET_CONF]	= { .type = NLA_NESTED },
1384};
1385
1386static int inet_validate_link_af(const struct net_device *dev,
1387				 const struct nlattr *nla)
1388{
1389	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1390	int err, rem;
1391
1392	if (dev && !__in_dev_get_rtnl(dev))
1393		return -EAFNOSUPPORT;
1394
1395	err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
1396	if (err < 0)
1397		return err;
1398
1399	if (tb[IFLA_INET_CONF]) {
1400		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1401			int cfgid = nla_type(a);
1402
1403			if (nla_len(a) < 4)
1404				return -EINVAL;
1405
1406			if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1407				return -EINVAL;
1408		}
1409	}
1410
1411	return 0;
1412}
1413
1414static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1415{
1416	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1417	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1418	int rem;
1419
1420	if (!in_dev)
1421		return -EAFNOSUPPORT;
1422
1423	if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
1424		BUG();
1425
1426	if (tb[IFLA_INET_CONF]) {
1427		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1428			ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1429	}
1430
1431	return 0;
1432}
1433
1434#ifdef CONFIG_SYSCTL
1435
1436static void devinet_copy_dflt_conf(struct net *net, int i)
1437{
1438	struct net_device *dev;
1439
1440	rcu_read_lock();
1441	for_each_netdev_rcu(net, dev) {
1442		struct in_device *in_dev;
1443
1444		in_dev = __in_dev_get_rcu(dev);
1445		if (in_dev && !test_bit(i, in_dev->cnf.state))
1446			in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1447	}
1448	rcu_read_unlock();
1449}
1450
1451/* called with RTNL locked */
1452static void inet_forward_change(struct net *net)
1453{
1454	struct net_device *dev;
1455	int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1456
1457	IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1458	IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1459
1460	for_each_netdev(net, dev) {
1461		struct in_device *in_dev;
1462		if (on)
1463			dev_disable_lro(dev);
1464		rcu_read_lock();
1465		in_dev = __in_dev_get_rcu(dev);
1466		if (in_dev)
1467			IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1468		rcu_read_unlock();
1469	}
1470}
1471
1472static int devinet_conf_proc(ctl_table *ctl, int write,
1473			     void __user *buffer,
1474			     size_t *lenp, loff_t *ppos)
1475{
1476	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1477
1478	if (write) {
1479		struct ipv4_devconf *cnf = ctl->extra1;
1480		struct net *net = ctl->extra2;
1481		int i = (int *)ctl->data - cnf->data;
1482
1483		set_bit(i, cnf->state);
1484
1485		if (cnf == net->ipv4.devconf_dflt)
1486			devinet_copy_dflt_conf(net, i);
1487	}
1488
1489	return ret;
1490}
1491
1492static int devinet_sysctl_forward(ctl_table *ctl, int write,
1493				  void __user *buffer,
1494				  size_t *lenp, loff_t *ppos)
1495{
1496	int *valp = ctl->data;
1497	int val = *valp;
1498	loff_t pos = *ppos;
1499	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1500
1501	if (write && *valp != val) {
1502		struct net *net = ctl->extra2;
1503
1504		if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
1505			if (!rtnl_trylock()) {
1506				/* Restore the original values before restarting */
1507				*valp = val;
1508				*ppos = pos;
1509				return restart_syscall();
1510			}
1511			if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
1512				inet_forward_change(net);
1513			} else if (*valp) {
1514				struct ipv4_devconf *cnf = ctl->extra1;
1515				struct in_device *idev =
1516					container_of(cnf, struct in_device, cnf);
1517				dev_disable_lro(idev->dev);
1518			}
1519			rtnl_unlock();
1520			rt_cache_flush(net, 0);
1521		}
1522	}
1523
1524	return ret;
1525}
1526
1527static int ipv4_doint_and_flush(ctl_table *ctl, int write,
1528				void __user *buffer,
1529				size_t *lenp, loff_t *ppos)
1530{
1531	int *valp = ctl->data;
1532	int val = *valp;
1533	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1534	struct net *net = ctl->extra2;
1535
1536	if (write && *valp != val)
1537		rt_cache_flush(net, 0);
1538
1539	return ret;
1540}
1541
1542#define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
1543	{ \
1544		.procname	= name, \
1545		.data		= ipv4_devconf.data + \
1546				  IPV4_DEVCONF_ ## attr - 1, \
1547		.maxlen		= sizeof(int), \
1548		.mode		= mval, \
1549		.proc_handler	= proc, \
1550		.extra1		= &ipv4_devconf, \
1551	}
1552
1553#define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
1554	DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
1555
1556#define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
1557	DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
1558
1559#define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
1560	DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
1561
1562#define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
1563	DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
1564
1565static struct devinet_sysctl_table {
1566	struct ctl_table_header *sysctl_header;
1567	struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
1568	char *dev_name;
1569} devinet_sysctl = {
1570	.devinet_vars = {
1571		DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
1572					     devinet_sysctl_forward),
1573		DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
1574
1575		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
1576		DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
1577		DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
1578		DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
1579		DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
1580		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
1581					"accept_source_route"),
1582		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
1583		DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
1584		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
1585		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
1586		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
1587		DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
1588		DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
1589		DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
1590		DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
1591		DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
1592		DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
1593		DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
1594		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
1595
1596		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
1597		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
1598		DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
1599					      "force_igmp_version"),
1600		DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
1601					      "promote_secondaries"),
1602	},
1603};
1604
1605static int __devinet_sysctl_register(struct net *net, char *dev_name,
1606					struct ipv4_devconf *p)
1607{
1608	int i;
1609	struct devinet_sysctl_table *t;
1610
1611#define DEVINET_CTL_PATH_DEV	3
1612
1613	struct ctl_path devinet_ctl_path[] = {
1614		{ .procname = "net",  },
1615		{ .procname = "ipv4", },
1616		{ .procname = "conf", },
1617		{ /* to be set */ },
1618		{ },
1619	};
1620
1621	t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
1622	if (!t)
1623		goto out;
1624
1625	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
1626		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
1627		t->devinet_vars[i].extra1 = p;
1628		t->devinet_vars[i].extra2 = net;
1629	}
1630
1631	/*
1632	 * Make a copy of dev_name, because '.procname' is regarded as const
1633	 * by sysctl and we wouldn't want anyone to change it under our feet
1634	 * (see SIOCSIFNAME).
1635	 */
1636	t->dev_name = kstrdup(dev_name, GFP_KERNEL);
1637	if (!t->dev_name)
1638		goto free;
1639
1640	devinet_ctl_path[DEVINET_CTL_PATH_DEV].procname = t->dev_name;
1641
1642	t->sysctl_header = register_net_sysctl_table(net, devinet_ctl_path,
1643			t->devinet_vars);
1644	if (!t->sysctl_header)
1645		goto free_procname;
1646
1647	p->sysctl = t;
1648	return 0;
1649
1650free_procname:
1651	kfree(t->dev_name);
1652free:
1653	kfree(t);
1654out:
1655	return -ENOBUFS;
1656}
1657
1658static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
1659{
1660	struct devinet_sysctl_table *t = cnf->sysctl;
1661
1662	if (t == NULL)
1663		return;
1664
1665	cnf->sysctl = NULL;
1666	unregister_sysctl_table(t->sysctl_header);
1667	kfree(t->dev_name);
1668	kfree(t);
1669}
1670
1671static void devinet_sysctl_register(struct in_device *idev)
1672{
1673	neigh_sysctl_register(idev->dev, idev->arp_parms, "ipv4", NULL);
1674	__devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
1675					&idev->cnf);
1676}
1677
1678static void devinet_sysctl_unregister(struct in_device *idev)
1679{
1680	__devinet_sysctl_unregister(&idev->cnf);
1681	neigh_sysctl_unregister(idev->arp_parms);
1682}
1683
1684static struct ctl_table ctl_forward_entry[] = {
1685	{
1686		.procname	= "ip_forward",
1687		.data		= &ipv4_devconf.data[
1688					IPV4_DEVCONF_FORWARDING - 1],
1689		.maxlen		= sizeof(int),
1690		.mode		= 0644,
1691		.proc_handler	= devinet_sysctl_forward,
1692		.extra1		= &ipv4_devconf,
1693		.extra2		= &init_net,
1694	},
1695	{ },
1696};
1697
1698static __net_initdata struct ctl_path net_ipv4_path[] = {
1699	{ .procname = "net", },
1700	{ .procname = "ipv4", },
1701	{ },
1702};
1703#endif
1704
1705static __net_init int devinet_init_net(struct net *net)
1706{
1707	int err;
1708	struct ipv4_devconf *all, *dflt;
1709#ifdef CONFIG_SYSCTL
1710	struct ctl_table *tbl = ctl_forward_entry;
1711	struct ctl_table_header *forw_hdr;
1712#endif
1713
1714	err = -ENOMEM;
1715	all = &ipv4_devconf;
1716	dflt = &ipv4_devconf_dflt;
1717
1718	if (!net_eq(net, &init_net)) {
1719		all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
1720		if (all == NULL)
1721			goto err_alloc_all;
1722
1723		dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
1724		if (dflt == NULL)
1725			goto err_alloc_dflt;
1726
1727#ifdef CONFIG_SYSCTL
1728		tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
1729		if (tbl == NULL)
1730			goto err_alloc_ctl;
1731
1732		tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
1733		tbl[0].extra1 = all;
1734		tbl[0].extra2 = net;
1735#endif
1736	}
1737
1738#ifdef CONFIG_SYSCTL
1739	err = __devinet_sysctl_register(net, "all", all);
1740	if (err < 0)
1741		goto err_reg_all;
1742
1743	err = __devinet_sysctl_register(net, "default", dflt);
1744	if (err < 0)
1745		goto err_reg_dflt;
1746
1747	err = -ENOMEM;
1748	forw_hdr = register_net_sysctl_table(net, net_ipv4_path, tbl);
1749	if (forw_hdr == NULL)
1750		goto err_reg_ctl;
1751	net->ipv4.forw_hdr = forw_hdr;
1752#endif
1753
1754	net->ipv4.devconf_all = all;
1755	net->ipv4.devconf_dflt = dflt;
1756	return 0;
1757
1758#ifdef CONFIG_SYSCTL
1759err_reg_ctl:
1760	__devinet_sysctl_unregister(dflt);
1761err_reg_dflt:
1762	__devinet_sysctl_unregister(all);
1763err_reg_all:
1764	if (tbl != ctl_forward_entry)
1765		kfree(tbl);
1766err_alloc_ctl:
1767#endif
1768	if (dflt != &ipv4_devconf_dflt)
1769		kfree(dflt);
1770err_alloc_dflt:
1771	if (all != &ipv4_devconf)
1772		kfree(all);
1773err_alloc_all:
1774	return err;
1775}
1776
1777static __net_exit void devinet_exit_net(struct net *net)
1778{
1779#ifdef CONFIG_SYSCTL
1780	struct ctl_table *tbl;
1781
1782	tbl = net->ipv4.forw_hdr->ctl_table_arg;
1783	unregister_net_sysctl_table(net->ipv4.forw_hdr);
1784	__devinet_sysctl_unregister(net->ipv4.devconf_dflt);
1785	__devinet_sysctl_unregister(net->ipv4.devconf_all);
1786	kfree(tbl);
1787#endif
1788	kfree(net->ipv4.devconf_dflt);
1789	kfree(net->ipv4.devconf_all);
1790}
1791
1792static __net_initdata struct pernet_operations devinet_ops = {
1793	.init = devinet_init_net,
1794	.exit = devinet_exit_net,
1795};
1796
1797static struct rtnl_af_ops inet_af_ops = {
1798	.family		  = AF_INET,
1799	.fill_link_af	  = inet_fill_link_af,
1800	.get_link_af_size = inet_get_link_af_size,
1801	.validate_link_af = inet_validate_link_af,
1802	.set_link_af	  = inet_set_link_af,
1803};
1804
1805void __init devinet_init(void)
1806{
1807	int i;
1808
1809	for (i = 0; i < IN4_ADDR_HSIZE; i++)
1810		INIT_HLIST_HEAD(&inet_addr_lst[i]);
1811
1812	register_pernet_subsys(&devinet_ops);
1813
1814	register_gifconf(PF_INET, inet_gifconf);
1815	register_netdevice_notifier(&ip_netdev_notifier);
1816
1817	rtnl_af_register(&inet_af_ops);
1818
1819	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL);
1820	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL);
1821	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr);
1822}
1823
1824