devinet.c revision 73af614aedd221df8495fc8c9993c50e87f899f2
1/*
2 *	NET3	IP device support routines.
3 *
4 *		This program is free software; you can redistribute it and/or
5 *		modify it under the terms of the GNU General Public License
6 *		as published by the Free Software Foundation; either version
7 *		2 of the License, or (at your option) any later version.
8 *
9 *	Derived from the IP parts of dev.c 1.0.19
10 * 		Authors:	Ross Biro
11 *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 *				Mark Evans, <evansmp@uhura.aston.ac.uk>
13 *
14 *	Additional Authors:
15 *		Alan Cox, <gw4pts@gw4pts.ampr.org>
16 *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17 *
18 *	Changes:
19 *		Alexey Kuznetsov:	pa_* fields are replaced with ifaddr
20 *					lists.
21 *		Cyrus Durgin:		updated for kmod
22 *		Matthias Andree:	in devinet_ioctl, compare label and
23 *					address (4.4BSD alias style support),
24 *					fall back to comparing just the label
25 *					if no match found.
26 */
27
28
29#include <asm/uaccess.h>
30#include <linux/bitops.h>
31#include <linux/capability.h>
32#include <linux/module.h>
33#include <linux/types.h>
34#include <linux/kernel.h>
35#include <linux/string.h>
36#include <linux/mm.h>
37#include <linux/socket.h>
38#include <linux/sockios.h>
39#include <linux/in.h>
40#include <linux/errno.h>
41#include <linux/interrupt.h>
42#include <linux/if_addr.h>
43#include <linux/if_ether.h>
44#include <linux/inet.h>
45#include <linux/netdevice.h>
46#include <linux/etherdevice.h>
47#include <linux/skbuff.h>
48#include <linux/init.h>
49#include <linux/notifier.h>
50#include <linux/inetdevice.h>
51#include <linux/igmp.h>
52#include <linux/slab.h>
53#include <linux/hash.h>
54#ifdef CONFIG_SYSCTL
55#include <linux/sysctl.h>
56#endif
57#include <linux/kmod.h>
58#include <linux/netconf.h>
59
60#include <net/arp.h>
61#include <net/ip.h>
62#include <net/route.h>
63#include <net/ip_fib.h>
64#include <net/rtnetlink.h>
65#include <net/net_namespace.h>
66#include <net/addrconf.h>
67
68#include "fib_lookup.h"
69
70static struct ipv4_devconf ipv4_devconf = {
71	.data = {
72		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
73		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
74		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
75		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
76		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
77		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
78	},
79};
80
81static struct ipv4_devconf ipv4_devconf_dflt = {
82	.data = {
83		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
84		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
85		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
86		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
87		[IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
88		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
89		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
90	},
91};
92
93#define IPV4_DEVCONF_DFLT(net, attr) \
94	IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
95
96static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
97	[IFA_LOCAL]     	= { .type = NLA_U32 },
98	[IFA_ADDRESS]   	= { .type = NLA_U32 },
99	[IFA_BROADCAST] 	= { .type = NLA_U32 },
100	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
101	[IFA_CACHEINFO]		= { .len = sizeof(struct ifa_cacheinfo) },
102};
103
104#define IN4_ADDR_HSIZE_SHIFT	8
105#define IN4_ADDR_HSIZE		(1U << IN4_ADDR_HSIZE_SHIFT)
106
107static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
108static DEFINE_SPINLOCK(inet_addr_hash_lock);
109
110static u32 inet_addr_hash(struct net *net, __be32 addr)
111{
112	u32 val = (__force u32) addr ^ net_hash_mix(net);
113
114	return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
115}
116
117static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
118{
119	u32 hash = inet_addr_hash(net, ifa->ifa_local);
120
121	spin_lock(&inet_addr_hash_lock);
122	hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
123	spin_unlock(&inet_addr_hash_lock);
124}
125
126static void inet_hash_remove(struct in_ifaddr *ifa)
127{
128	spin_lock(&inet_addr_hash_lock);
129	hlist_del_init_rcu(&ifa->hash);
130	spin_unlock(&inet_addr_hash_lock);
131}
132
133/**
134 * __ip_dev_find - find the first device with a given source address.
135 * @net: the net namespace
136 * @addr: the source address
137 * @devref: if true, take a reference on the found device
138 *
139 * If a caller uses devref=false, it should be protected by RCU, or RTNL
140 */
141struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
142{
143	u32 hash = inet_addr_hash(net, addr);
144	struct net_device *result = NULL;
145	struct in_ifaddr *ifa;
146
147	rcu_read_lock();
148	hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash) {
149		if (ifa->ifa_local == addr) {
150			struct net_device *dev = ifa->ifa_dev->dev;
151
152			if (!net_eq(dev_net(dev), net))
153				continue;
154			result = dev;
155			break;
156		}
157	}
158	if (!result) {
159		struct flowi4 fl4 = { .daddr = addr };
160		struct fib_result res = { 0 };
161		struct fib_table *local;
162
163		/* Fallback to FIB local table so that communication
164		 * over loopback subnets work.
165		 */
166		local = fib_get_table(net, RT_TABLE_LOCAL);
167		if (local &&
168		    !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
169		    res.type == RTN_LOCAL)
170			result = FIB_RES_DEV(res);
171	}
172	if (result && devref)
173		dev_hold(result);
174	rcu_read_unlock();
175	return result;
176}
177EXPORT_SYMBOL(__ip_dev_find);
178
179static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
180
181static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
182static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
183			 int destroy);
184#ifdef CONFIG_SYSCTL
185static void devinet_sysctl_register(struct in_device *idev);
186static void devinet_sysctl_unregister(struct in_device *idev);
187#else
188static void devinet_sysctl_register(struct in_device *idev)
189{
190}
191static void devinet_sysctl_unregister(struct in_device *idev)
192{
193}
194#endif
195
196/* Locks all the inet devices. */
197
198static struct in_ifaddr *inet_alloc_ifa(void)
199{
200	return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
201}
202
203static void inet_rcu_free_ifa(struct rcu_head *head)
204{
205	struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
206	if (ifa->ifa_dev)
207		in_dev_put(ifa->ifa_dev);
208	kfree(ifa);
209}
210
211static void inet_free_ifa(struct in_ifaddr *ifa)
212{
213	call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
214}
215
216void in_dev_finish_destroy(struct in_device *idev)
217{
218	struct net_device *dev = idev->dev;
219
220	WARN_ON(idev->ifa_list);
221	WARN_ON(idev->mc_list);
222	kfree(rcu_dereference_protected(idev->mc_hash, 1));
223#ifdef NET_REFCNT_DEBUG
224	pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
225#endif
226	dev_put(dev);
227	if (!idev->dead)
228		pr_err("Freeing alive in_device %p\n", idev);
229	else
230		kfree(idev);
231}
232EXPORT_SYMBOL(in_dev_finish_destroy);
233
234static struct in_device *inetdev_init(struct net_device *dev)
235{
236	struct in_device *in_dev;
237
238	ASSERT_RTNL();
239
240	in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
241	if (!in_dev)
242		goto out;
243	memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
244			sizeof(in_dev->cnf));
245	in_dev->cnf.sysctl = NULL;
246	in_dev->dev = dev;
247	in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
248	if (!in_dev->arp_parms)
249		goto out_kfree;
250	if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
251		dev_disable_lro(dev);
252	/* Reference in_dev->dev */
253	dev_hold(dev);
254	/* Account for reference dev->ip_ptr (below) */
255	in_dev_hold(in_dev);
256
257	devinet_sysctl_register(in_dev);
258	ip_mc_init_dev(in_dev);
259	if (dev->flags & IFF_UP)
260		ip_mc_up(in_dev);
261
262	/* we can receive as soon as ip_ptr is set -- do this last */
263	rcu_assign_pointer(dev->ip_ptr, in_dev);
264out:
265	return in_dev;
266out_kfree:
267	kfree(in_dev);
268	in_dev = NULL;
269	goto out;
270}
271
272static void in_dev_rcu_put(struct rcu_head *head)
273{
274	struct in_device *idev = container_of(head, struct in_device, rcu_head);
275	in_dev_put(idev);
276}
277
278static void inetdev_destroy(struct in_device *in_dev)
279{
280	struct in_ifaddr *ifa;
281	struct net_device *dev;
282
283	ASSERT_RTNL();
284
285	dev = in_dev->dev;
286
287	in_dev->dead = 1;
288
289	ip_mc_destroy_dev(in_dev);
290
291	while ((ifa = in_dev->ifa_list) != NULL) {
292		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
293		inet_free_ifa(ifa);
294	}
295
296	RCU_INIT_POINTER(dev->ip_ptr, NULL);
297
298	devinet_sysctl_unregister(in_dev);
299	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
300	arp_ifdown(dev);
301
302	call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
303}
304
305int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
306{
307	rcu_read_lock();
308	for_primary_ifa(in_dev) {
309		if (inet_ifa_match(a, ifa)) {
310			if (!b || inet_ifa_match(b, ifa)) {
311				rcu_read_unlock();
312				return 1;
313			}
314		}
315	} endfor_ifa(in_dev);
316	rcu_read_unlock();
317	return 0;
318}
319
320static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
321			 int destroy, struct nlmsghdr *nlh, u32 portid)
322{
323	struct in_ifaddr *promote = NULL;
324	struct in_ifaddr *ifa, *ifa1 = *ifap;
325	struct in_ifaddr *last_prim = in_dev->ifa_list;
326	struct in_ifaddr *prev_prom = NULL;
327	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
328
329	ASSERT_RTNL();
330
331	/* 1. Deleting primary ifaddr forces deletion all secondaries
332	 * unless alias promotion is set
333	 **/
334
335	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
336		struct in_ifaddr **ifap1 = &ifa1->ifa_next;
337
338		while ((ifa = *ifap1) != NULL) {
339			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
340			    ifa1->ifa_scope <= ifa->ifa_scope)
341				last_prim = ifa;
342
343			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
344			    ifa1->ifa_mask != ifa->ifa_mask ||
345			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
346				ifap1 = &ifa->ifa_next;
347				prev_prom = ifa;
348				continue;
349			}
350
351			if (!do_promote) {
352				inet_hash_remove(ifa);
353				*ifap1 = ifa->ifa_next;
354
355				rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
356				blocking_notifier_call_chain(&inetaddr_chain,
357						NETDEV_DOWN, ifa);
358				inet_free_ifa(ifa);
359			} else {
360				promote = ifa;
361				break;
362			}
363		}
364	}
365
366	/* On promotion all secondaries from subnet are changing
367	 * the primary IP, we must remove all their routes silently
368	 * and later to add them back with new prefsrc. Do this
369	 * while all addresses are on the device list.
370	 */
371	for (ifa = promote; ifa; ifa = ifa->ifa_next) {
372		if (ifa1->ifa_mask == ifa->ifa_mask &&
373		    inet_ifa_match(ifa1->ifa_address, ifa))
374			fib_del_ifaddr(ifa, ifa1);
375	}
376
377	/* 2. Unlink it */
378
379	*ifap = ifa1->ifa_next;
380	inet_hash_remove(ifa1);
381
382	/* 3. Announce address deletion */
383
384	/* Send message first, then call notifier.
385	   At first sight, FIB update triggered by notifier
386	   will refer to already deleted ifaddr, that could confuse
387	   netlink listeners. It is not true: look, gated sees
388	   that route deleted and if it still thinks that ifaddr
389	   is valid, it will try to restore deleted routes... Grr.
390	   So that, this order is correct.
391	 */
392	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
393	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
394
395	if (promote) {
396		struct in_ifaddr *next_sec = promote->ifa_next;
397
398		if (prev_prom) {
399			prev_prom->ifa_next = promote->ifa_next;
400			promote->ifa_next = last_prim->ifa_next;
401			last_prim->ifa_next = promote;
402		}
403
404		promote->ifa_flags &= ~IFA_F_SECONDARY;
405		rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
406		blocking_notifier_call_chain(&inetaddr_chain,
407				NETDEV_UP, promote);
408		for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
409			if (ifa1->ifa_mask != ifa->ifa_mask ||
410			    !inet_ifa_match(ifa1->ifa_address, ifa))
411					continue;
412			fib_add_ifaddr(ifa);
413		}
414
415	}
416	if (destroy)
417		inet_free_ifa(ifa1);
418}
419
420static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
421			 int destroy)
422{
423	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
424}
425
426static void check_lifetime(struct work_struct *work);
427
428static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
429
430static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
431			     u32 portid)
432{
433	struct in_device *in_dev = ifa->ifa_dev;
434	struct in_ifaddr *ifa1, **ifap, **last_primary;
435
436	ASSERT_RTNL();
437
438	if (!ifa->ifa_local) {
439		inet_free_ifa(ifa);
440		return 0;
441	}
442
443	ifa->ifa_flags &= ~IFA_F_SECONDARY;
444	last_primary = &in_dev->ifa_list;
445
446	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
447	     ifap = &ifa1->ifa_next) {
448		if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
449		    ifa->ifa_scope <= ifa1->ifa_scope)
450			last_primary = &ifa1->ifa_next;
451		if (ifa1->ifa_mask == ifa->ifa_mask &&
452		    inet_ifa_match(ifa1->ifa_address, ifa)) {
453			if (ifa1->ifa_local == ifa->ifa_local) {
454				inet_free_ifa(ifa);
455				return -EEXIST;
456			}
457			if (ifa1->ifa_scope != ifa->ifa_scope) {
458				inet_free_ifa(ifa);
459				return -EINVAL;
460			}
461			ifa->ifa_flags |= IFA_F_SECONDARY;
462		}
463	}
464
465	if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
466		net_srandom(ifa->ifa_local);
467		ifap = last_primary;
468	}
469
470	ifa->ifa_next = *ifap;
471	*ifap = ifa;
472
473	inet_hash_insert(dev_net(in_dev->dev), ifa);
474
475	cancel_delayed_work(&check_lifetime_work);
476	schedule_delayed_work(&check_lifetime_work, 0);
477
478	/* Send message first, then call notifier.
479	   Notifier will trigger FIB update, so that
480	   listeners of netlink will know about new ifaddr */
481	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
482	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
483
484	return 0;
485}
486
487static int inet_insert_ifa(struct in_ifaddr *ifa)
488{
489	return __inet_insert_ifa(ifa, NULL, 0);
490}
491
492static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
493{
494	struct in_device *in_dev = __in_dev_get_rtnl(dev);
495
496	ASSERT_RTNL();
497
498	if (!in_dev) {
499		inet_free_ifa(ifa);
500		return -ENOBUFS;
501	}
502	ipv4_devconf_setall(in_dev);
503	if (ifa->ifa_dev != in_dev) {
504		WARN_ON(ifa->ifa_dev);
505		in_dev_hold(in_dev);
506		ifa->ifa_dev = in_dev;
507	}
508	if (ipv4_is_loopback(ifa->ifa_local))
509		ifa->ifa_scope = RT_SCOPE_HOST;
510	return inet_insert_ifa(ifa);
511}
512
513/* Caller must hold RCU or RTNL :
514 * We dont take a reference on found in_device
515 */
516struct in_device *inetdev_by_index(struct net *net, int ifindex)
517{
518	struct net_device *dev;
519	struct in_device *in_dev = NULL;
520
521	rcu_read_lock();
522	dev = dev_get_by_index_rcu(net, ifindex);
523	if (dev)
524		in_dev = rcu_dereference_rtnl(dev->ip_ptr);
525	rcu_read_unlock();
526	return in_dev;
527}
528EXPORT_SYMBOL(inetdev_by_index);
529
530/* Called only from RTNL semaphored context. No locks. */
531
532struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
533				    __be32 mask)
534{
535	ASSERT_RTNL();
536
537	for_primary_ifa(in_dev) {
538		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
539			return ifa;
540	} endfor_ifa(in_dev);
541	return NULL;
542}
543
544static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
545{
546	struct net *net = sock_net(skb->sk);
547	struct nlattr *tb[IFA_MAX+1];
548	struct in_device *in_dev;
549	struct ifaddrmsg *ifm;
550	struct in_ifaddr *ifa, **ifap;
551	int err = -EINVAL;
552
553	ASSERT_RTNL();
554
555	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
556	if (err < 0)
557		goto errout;
558
559	ifm = nlmsg_data(nlh);
560	in_dev = inetdev_by_index(net, ifm->ifa_index);
561	if (in_dev == NULL) {
562		err = -ENODEV;
563		goto errout;
564	}
565
566	for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
567	     ifap = &ifa->ifa_next) {
568		if (tb[IFA_LOCAL] &&
569		    ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
570			continue;
571
572		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
573			continue;
574
575		if (tb[IFA_ADDRESS] &&
576		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
577		    !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
578			continue;
579
580		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
581		return 0;
582	}
583
584	err = -EADDRNOTAVAIL;
585errout:
586	return err;
587}
588
589#define INFINITY_LIFE_TIME	0xFFFFFFFF
590
591static void check_lifetime(struct work_struct *work)
592{
593	unsigned long now, next, next_sec, next_sched;
594	struct in_ifaddr *ifa;
595	struct hlist_node *n;
596	int i;
597
598	now = jiffies;
599	next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
600
601	for (i = 0; i < IN4_ADDR_HSIZE; i++) {
602		bool change_needed = false;
603
604		rcu_read_lock();
605		hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
606			unsigned long age;
607
608			if (ifa->ifa_flags & IFA_F_PERMANENT)
609				continue;
610
611			/* We try to batch several events at once. */
612			age = (now - ifa->ifa_tstamp +
613			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
614
615			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
616			    age >= ifa->ifa_valid_lft) {
617				change_needed = true;
618			} else if (ifa->ifa_preferred_lft ==
619				   INFINITY_LIFE_TIME) {
620				continue;
621			} else if (age >= ifa->ifa_preferred_lft) {
622				if (time_before(ifa->ifa_tstamp +
623						ifa->ifa_valid_lft * HZ, next))
624					next = ifa->ifa_tstamp +
625					       ifa->ifa_valid_lft * HZ;
626
627				if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
628					change_needed = true;
629			} else if (time_before(ifa->ifa_tstamp +
630					       ifa->ifa_preferred_lft * HZ,
631					       next)) {
632				next = ifa->ifa_tstamp +
633				       ifa->ifa_preferred_lft * HZ;
634			}
635		}
636		rcu_read_unlock();
637		if (!change_needed)
638			continue;
639		rtnl_lock();
640		hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
641			unsigned long age;
642
643			if (ifa->ifa_flags & IFA_F_PERMANENT)
644				continue;
645
646			/* We try to batch several events at once. */
647			age = (now - ifa->ifa_tstamp +
648			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
649
650			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
651			    age >= ifa->ifa_valid_lft) {
652				struct in_ifaddr **ifap;
653
654				for (ifap = &ifa->ifa_dev->ifa_list;
655				     *ifap != NULL; ifap = &(*ifap)->ifa_next) {
656					if (*ifap == ifa) {
657						inet_del_ifa(ifa->ifa_dev,
658							     ifap, 1);
659						break;
660					}
661				}
662			} else if (ifa->ifa_preferred_lft !=
663				   INFINITY_LIFE_TIME &&
664				   age >= ifa->ifa_preferred_lft &&
665				   !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
666				ifa->ifa_flags |= IFA_F_DEPRECATED;
667				rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
668			}
669		}
670		rtnl_unlock();
671	}
672
673	next_sec = round_jiffies_up(next);
674	next_sched = next;
675
676	/* If rounded timeout is accurate enough, accept it. */
677	if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
678		next_sched = next_sec;
679
680	now = jiffies;
681	/* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
682	if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
683		next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
684
685	schedule_delayed_work(&check_lifetime_work, next_sched - now);
686}
687
688static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
689			     __u32 prefered_lft)
690{
691	unsigned long timeout;
692
693	ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
694
695	timeout = addrconf_timeout_fixup(valid_lft, HZ);
696	if (addrconf_finite_timeout(timeout))
697		ifa->ifa_valid_lft = timeout;
698	else
699		ifa->ifa_flags |= IFA_F_PERMANENT;
700
701	timeout = addrconf_timeout_fixup(prefered_lft, HZ);
702	if (addrconf_finite_timeout(timeout)) {
703		if (timeout == 0)
704			ifa->ifa_flags |= IFA_F_DEPRECATED;
705		ifa->ifa_preferred_lft = timeout;
706	}
707	ifa->ifa_tstamp = jiffies;
708	if (!ifa->ifa_cstamp)
709		ifa->ifa_cstamp = ifa->ifa_tstamp;
710}
711
712static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
713				       __u32 *pvalid_lft, __u32 *pprefered_lft)
714{
715	struct nlattr *tb[IFA_MAX+1];
716	struct in_ifaddr *ifa;
717	struct ifaddrmsg *ifm;
718	struct net_device *dev;
719	struct in_device *in_dev;
720	int err;
721
722	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
723	if (err < 0)
724		goto errout;
725
726	ifm = nlmsg_data(nlh);
727	err = -EINVAL;
728	if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
729		goto errout;
730
731	dev = __dev_get_by_index(net, ifm->ifa_index);
732	err = -ENODEV;
733	if (dev == NULL)
734		goto errout;
735
736	in_dev = __in_dev_get_rtnl(dev);
737	err = -ENOBUFS;
738	if (in_dev == NULL)
739		goto errout;
740
741	ifa = inet_alloc_ifa();
742	if (ifa == NULL)
743		/*
744		 * A potential indev allocation can be left alive, it stays
745		 * assigned to its device and is destroy with it.
746		 */
747		goto errout;
748
749	ipv4_devconf_setall(in_dev);
750	in_dev_hold(in_dev);
751
752	if (tb[IFA_ADDRESS] == NULL)
753		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
754
755	INIT_HLIST_NODE(&ifa->hash);
756	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
757	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
758	ifa->ifa_flags = ifm->ifa_flags;
759	ifa->ifa_scope = ifm->ifa_scope;
760	ifa->ifa_dev = in_dev;
761
762	ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
763	ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
764
765	if (tb[IFA_BROADCAST])
766		ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
767
768	if (tb[IFA_LABEL])
769		nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
770	else
771		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
772
773	if (tb[IFA_CACHEINFO]) {
774		struct ifa_cacheinfo *ci;
775
776		ci = nla_data(tb[IFA_CACHEINFO]);
777		if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
778			err = -EINVAL;
779			goto errout_free;
780		}
781		*pvalid_lft = ci->ifa_valid;
782		*pprefered_lft = ci->ifa_prefered;
783	}
784
785	return ifa;
786
787errout_free:
788	inet_free_ifa(ifa);
789errout:
790	return ERR_PTR(err);
791}
792
793static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
794{
795	struct in_device *in_dev = ifa->ifa_dev;
796	struct in_ifaddr *ifa1, **ifap;
797
798	if (!ifa->ifa_local)
799		return NULL;
800
801	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
802	     ifap = &ifa1->ifa_next) {
803		if (ifa1->ifa_mask == ifa->ifa_mask &&
804		    inet_ifa_match(ifa1->ifa_address, ifa) &&
805		    ifa1->ifa_local == ifa->ifa_local)
806			return ifa1;
807	}
808	return NULL;
809}
810
811static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
812{
813	struct net *net = sock_net(skb->sk);
814	struct in_ifaddr *ifa;
815	struct in_ifaddr *ifa_existing;
816	__u32 valid_lft = INFINITY_LIFE_TIME;
817	__u32 prefered_lft = INFINITY_LIFE_TIME;
818
819	ASSERT_RTNL();
820
821	ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft);
822	if (IS_ERR(ifa))
823		return PTR_ERR(ifa);
824
825	ifa_existing = find_matching_ifa(ifa);
826	if (!ifa_existing) {
827		/* It would be best to check for !NLM_F_CREATE here but
828		 * userspace alreay relies on not having to provide this.
829		 */
830		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
831		return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid);
832	} else {
833		inet_free_ifa(ifa);
834
835		if (nlh->nlmsg_flags & NLM_F_EXCL ||
836		    !(nlh->nlmsg_flags & NLM_F_REPLACE))
837			return -EEXIST;
838		ifa = ifa_existing;
839		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
840		cancel_delayed_work(&check_lifetime_work);
841		schedule_delayed_work(&check_lifetime_work, 0);
842		rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
843		blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
844	}
845	return 0;
846}
847
848/*
849 *	Determine a default network mask, based on the IP address.
850 */
851
852static int inet_abc_len(__be32 addr)
853{
854	int rc = -1;	/* Something else, probably a multicast. */
855
856	if (ipv4_is_zeronet(addr))
857		rc = 0;
858	else {
859		__u32 haddr = ntohl(addr);
860
861		if (IN_CLASSA(haddr))
862			rc = 8;
863		else if (IN_CLASSB(haddr))
864			rc = 16;
865		else if (IN_CLASSC(haddr))
866			rc = 24;
867	}
868
869	return rc;
870}
871
872
873int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
874{
875	struct ifreq ifr;
876	struct sockaddr_in sin_orig;
877	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
878	struct in_device *in_dev;
879	struct in_ifaddr **ifap = NULL;
880	struct in_ifaddr *ifa = NULL;
881	struct net_device *dev;
882	char *colon;
883	int ret = -EFAULT;
884	int tryaddrmatch = 0;
885
886	/*
887	 *	Fetch the caller's info block into kernel space
888	 */
889
890	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
891		goto out;
892	ifr.ifr_name[IFNAMSIZ - 1] = 0;
893
894	/* save original address for comparison */
895	memcpy(&sin_orig, sin, sizeof(*sin));
896
897	colon = strchr(ifr.ifr_name, ':');
898	if (colon)
899		*colon = 0;
900
901	dev_load(net, ifr.ifr_name);
902
903	switch (cmd) {
904	case SIOCGIFADDR:	/* Get interface address */
905	case SIOCGIFBRDADDR:	/* Get the broadcast address */
906	case SIOCGIFDSTADDR:	/* Get the destination address */
907	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
908		/* Note that these ioctls will not sleep,
909		   so that we do not impose a lock.
910		   One day we will be forced to put shlock here (I mean SMP)
911		 */
912		tryaddrmatch = (sin_orig.sin_family == AF_INET);
913		memset(sin, 0, sizeof(*sin));
914		sin->sin_family = AF_INET;
915		break;
916
917	case SIOCSIFFLAGS:
918		ret = -EPERM;
919		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
920			goto out;
921		break;
922	case SIOCSIFADDR:	/* Set interface address (and family) */
923	case SIOCSIFBRDADDR:	/* Set the broadcast address */
924	case SIOCSIFDSTADDR:	/* Set the destination address */
925	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
926		ret = -EPERM;
927		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
928			goto out;
929		ret = -EINVAL;
930		if (sin->sin_family != AF_INET)
931			goto out;
932		break;
933	default:
934		ret = -EINVAL;
935		goto out;
936	}
937
938	rtnl_lock();
939
940	ret = -ENODEV;
941	dev = __dev_get_by_name(net, ifr.ifr_name);
942	if (!dev)
943		goto done;
944
945	if (colon)
946		*colon = ':';
947
948	in_dev = __in_dev_get_rtnl(dev);
949	if (in_dev) {
950		if (tryaddrmatch) {
951			/* Matthias Andree */
952			/* compare label and address (4.4BSD style) */
953			/* note: we only do this for a limited set of ioctls
954			   and only if the original address family was AF_INET.
955			   This is checked above. */
956			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
957			     ifap = &ifa->ifa_next) {
958				if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
959				    sin_orig.sin_addr.s_addr ==
960							ifa->ifa_local) {
961					break; /* found */
962				}
963			}
964		}
965		/* we didn't get a match, maybe the application is
966		   4.3BSD-style and passed in junk so we fall back to
967		   comparing just the label */
968		if (!ifa) {
969			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
970			     ifap = &ifa->ifa_next)
971				if (!strcmp(ifr.ifr_name, ifa->ifa_label))
972					break;
973		}
974	}
975
976	ret = -EADDRNOTAVAIL;
977	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
978		goto done;
979
980	switch (cmd) {
981	case SIOCGIFADDR:	/* Get interface address */
982		sin->sin_addr.s_addr = ifa->ifa_local;
983		goto rarok;
984
985	case SIOCGIFBRDADDR:	/* Get the broadcast address */
986		sin->sin_addr.s_addr = ifa->ifa_broadcast;
987		goto rarok;
988
989	case SIOCGIFDSTADDR:	/* Get the destination address */
990		sin->sin_addr.s_addr = ifa->ifa_address;
991		goto rarok;
992
993	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
994		sin->sin_addr.s_addr = ifa->ifa_mask;
995		goto rarok;
996
997	case SIOCSIFFLAGS:
998		if (colon) {
999			ret = -EADDRNOTAVAIL;
1000			if (!ifa)
1001				break;
1002			ret = 0;
1003			if (!(ifr.ifr_flags & IFF_UP))
1004				inet_del_ifa(in_dev, ifap, 1);
1005			break;
1006		}
1007		ret = dev_change_flags(dev, ifr.ifr_flags);
1008		break;
1009
1010	case SIOCSIFADDR:	/* Set interface address (and family) */
1011		ret = -EINVAL;
1012		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1013			break;
1014
1015		if (!ifa) {
1016			ret = -ENOBUFS;
1017			ifa = inet_alloc_ifa();
1018			if (!ifa)
1019				break;
1020			INIT_HLIST_NODE(&ifa->hash);
1021			if (colon)
1022				memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
1023			else
1024				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1025		} else {
1026			ret = 0;
1027			if (ifa->ifa_local == sin->sin_addr.s_addr)
1028				break;
1029			inet_del_ifa(in_dev, ifap, 0);
1030			ifa->ifa_broadcast = 0;
1031			ifa->ifa_scope = 0;
1032		}
1033
1034		ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1035
1036		if (!(dev->flags & IFF_POINTOPOINT)) {
1037			ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1038			ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1039			if ((dev->flags & IFF_BROADCAST) &&
1040			    ifa->ifa_prefixlen < 31)
1041				ifa->ifa_broadcast = ifa->ifa_address |
1042						     ~ifa->ifa_mask;
1043		} else {
1044			ifa->ifa_prefixlen = 32;
1045			ifa->ifa_mask = inet_make_mask(32);
1046		}
1047		set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1048		ret = inet_set_ifa(dev, ifa);
1049		break;
1050
1051	case SIOCSIFBRDADDR:	/* Set the broadcast address */
1052		ret = 0;
1053		if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1054			inet_del_ifa(in_dev, ifap, 0);
1055			ifa->ifa_broadcast = sin->sin_addr.s_addr;
1056			inet_insert_ifa(ifa);
1057		}
1058		break;
1059
1060	case SIOCSIFDSTADDR:	/* Set the destination address */
1061		ret = 0;
1062		if (ifa->ifa_address == sin->sin_addr.s_addr)
1063			break;
1064		ret = -EINVAL;
1065		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1066			break;
1067		ret = 0;
1068		inet_del_ifa(in_dev, ifap, 0);
1069		ifa->ifa_address = sin->sin_addr.s_addr;
1070		inet_insert_ifa(ifa);
1071		break;
1072
1073	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
1074
1075		/*
1076		 *	The mask we set must be legal.
1077		 */
1078		ret = -EINVAL;
1079		if (bad_mask(sin->sin_addr.s_addr, 0))
1080			break;
1081		ret = 0;
1082		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1083			__be32 old_mask = ifa->ifa_mask;
1084			inet_del_ifa(in_dev, ifap, 0);
1085			ifa->ifa_mask = sin->sin_addr.s_addr;
1086			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1087
1088			/* See if current broadcast address matches
1089			 * with current netmask, then recalculate
1090			 * the broadcast address. Otherwise it's a
1091			 * funny address, so don't touch it since
1092			 * the user seems to know what (s)he's doing...
1093			 */
1094			if ((dev->flags & IFF_BROADCAST) &&
1095			    (ifa->ifa_prefixlen < 31) &&
1096			    (ifa->ifa_broadcast ==
1097			     (ifa->ifa_local|~old_mask))) {
1098				ifa->ifa_broadcast = (ifa->ifa_local |
1099						      ~sin->sin_addr.s_addr);
1100			}
1101			inet_insert_ifa(ifa);
1102		}
1103		break;
1104	}
1105done:
1106	rtnl_unlock();
1107out:
1108	return ret;
1109rarok:
1110	rtnl_unlock();
1111	ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
1112	goto out;
1113}
1114
1115static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
1116{
1117	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1118	struct in_ifaddr *ifa;
1119	struct ifreq ifr;
1120	int done = 0;
1121
1122	if (!in_dev)
1123		goto out;
1124
1125	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1126		if (!buf) {
1127			done += sizeof(ifr);
1128			continue;
1129		}
1130		if (len < (int) sizeof(ifr))
1131			break;
1132		memset(&ifr, 0, sizeof(struct ifreq));
1133		strcpy(ifr.ifr_name, ifa->ifa_label);
1134
1135		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1136		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1137								ifa->ifa_local;
1138
1139		if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
1140			done = -EFAULT;
1141			break;
1142		}
1143		buf  += sizeof(struct ifreq);
1144		len  -= sizeof(struct ifreq);
1145		done += sizeof(struct ifreq);
1146	}
1147out:
1148	return done;
1149}
1150
1151__be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1152{
1153	__be32 addr = 0;
1154	struct in_device *in_dev;
1155	struct net *net = dev_net(dev);
1156
1157	rcu_read_lock();
1158	in_dev = __in_dev_get_rcu(dev);
1159	if (!in_dev)
1160		goto no_in_dev;
1161
1162	for_primary_ifa(in_dev) {
1163		if (ifa->ifa_scope > scope)
1164			continue;
1165		if (!dst || inet_ifa_match(dst, ifa)) {
1166			addr = ifa->ifa_local;
1167			break;
1168		}
1169		if (!addr)
1170			addr = ifa->ifa_local;
1171	} endfor_ifa(in_dev);
1172
1173	if (addr)
1174		goto out_unlock;
1175no_in_dev:
1176
1177	/* Not loopback addresses on loopback should be preferred
1178	   in this case. It is importnat that lo is the first interface
1179	   in dev_base list.
1180	 */
1181	for_each_netdev_rcu(net, dev) {
1182		in_dev = __in_dev_get_rcu(dev);
1183		if (!in_dev)
1184			continue;
1185
1186		for_primary_ifa(in_dev) {
1187			if (ifa->ifa_scope != RT_SCOPE_LINK &&
1188			    ifa->ifa_scope <= scope) {
1189				addr = ifa->ifa_local;
1190				goto out_unlock;
1191			}
1192		} endfor_ifa(in_dev);
1193	}
1194out_unlock:
1195	rcu_read_unlock();
1196	return addr;
1197}
1198EXPORT_SYMBOL(inet_select_addr);
1199
1200static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1201			      __be32 local, int scope)
1202{
1203	int same = 0;
1204	__be32 addr = 0;
1205
1206	for_ifa(in_dev) {
1207		if (!addr &&
1208		    (local == ifa->ifa_local || !local) &&
1209		    ifa->ifa_scope <= scope) {
1210			addr = ifa->ifa_local;
1211			if (same)
1212				break;
1213		}
1214		if (!same) {
1215			same = (!local || inet_ifa_match(local, ifa)) &&
1216				(!dst || inet_ifa_match(dst, ifa));
1217			if (same && addr) {
1218				if (local || !dst)
1219					break;
1220				/* Is the selected addr into dst subnet? */
1221				if (inet_ifa_match(addr, ifa))
1222					break;
1223				/* No, then can we use new local src? */
1224				if (ifa->ifa_scope <= scope) {
1225					addr = ifa->ifa_local;
1226					break;
1227				}
1228				/* search for large dst subnet for addr */
1229				same = 0;
1230			}
1231		}
1232	} endfor_ifa(in_dev);
1233
1234	return same ? addr : 0;
1235}
1236
1237/*
1238 * Confirm that local IP address exists using wildcards:
1239 * - in_dev: only on this interface, 0=any interface
1240 * - dst: only in the same subnet as dst, 0=any dst
1241 * - local: address, 0=autoselect the local address
1242 * - scope: maximum allowed scope value for the local address
1243 */
1244__be32 inet_confirm_addr(struct in_device *in_dev,
1245			 __be32 dst, __be32 local, int scope)
1246{
1247	__be32 addr = 0;
1248	struct net_device *dev;
1249	struct net *net;
1250
1251	if (scope != RT_SCOPE_LINK)
1252		return confirm_addr_indev(in_dev, dst, local, scope);
1253
1254	net = dev_net(in_dev->dev);
1255	rcu_read_lock();
1256	for_each_netdev_rcu(net, dev) {
1257		in_dev = __in_dev_get_rcu(dev);
1258		if (in_dev) {
1259			addr = confirm_addr_indev(in_dev, dst, local, scope);
1260			if (addr)
1261				break;
1262		}
1263	}
1264	rcu_read_unlock();
1265
1266	return addr;
1267}
1268EXPORT_SYMBOL(inet_confirm_addr);
1269
1270/*
1271 *	Device notifier
1272 */
1273
1274int register_inetaddr_notifier(struct notifier_block *nb)
1275{
1276	return blocking_notifier_chain_register(&inetaddr_chain, nb);
1277}
1278EXPORT_SYMBOL(register_inetaddr_notifier);
1279
1280int unregister_inetaddr_notifier(struct notifier_block *nb)
1281{
1282	return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1283}
1284EXPORT_SYMBOL(unregister_inetaddr_notifier);
1285
1286/* Rename ifa_labels for a device name change. Make some effort to preserve
1287 * existing alias numbering and to create unique labels if possible.
1288*/
1289static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1290{
1291	struct in_ifaddr *ifa;
1292	int named = 0;
1293
1294	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1295		char old[IFNAMSIZ], *dot;
1296
1297		memcpy(old, ifa->ifa_label, IFNAMSIZ);
1298		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1299		if (named++ == 0)
1300			goto skip;
1301		dot = strchr(old, ':');
1302		if (dot == NULL) {
1303			sprintf(old, ":%d", named);
1304			dot = old;
1305		}
1306		if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1307			strcat(ifa->ifa_label, dot);
1308		else
1309			strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1310skip:
1311		rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1312	}
1313}
1314
1315static bool inetdev_valid_mtu(unsigned int mtu)
1316{
1317	return mtu >= 68;
1318}
1319
1320static void inetdev_send_gratuitous_arp(struct net_device *dev,
1321					struct in_device *in_dev)
1322
1323{
1324	struct in_ifaddr *ifa;
1325
1326	for (ifa = in_dev->ifa_list; ifa;
1327	     ifa = ifa->ifa_next) {
1328		arp_send(ARPOP_REQUEST, ETH_P_ARP,
1329			 ifa->ifa_local, dev,
1330			 ifa->ifa_local, NULL,
1331			 dev->dev_addr, NULL);
1332	}
1333}
1334
1335/* Called only under RTNL semaphore */
1336
1337static int inetdev_event(struct notifier_block *this, unsigned long event,
1338			 void *ptr)
1339{
1340	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1341	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1342
1343	ASSERT_RTNL();
1344
1345	if (!in_dev) {
1346		if (event == NETDEV_REGISTER) {
1347			in_dev = inetdev_init(dev);
1348			if (!in_dev)
1349				return notifier_from_errno(-ENOMEM);
1350			if (dev->flags & IFF_LOOPBACK) {
1351				IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1352				IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1353			}
1354		} else if (event == NETDEV_CHANGEMTU) {
1355			/* Re-enabling IP */
1356			if (inetdev_valid_mtu(dev->mtu))
1357				in_dev = inetdev_init(dev);
1358		}
1359		goto out;
1360	}
1361
1362	switch (event) {
1363	case NETDEV_REGISTER:
1364		pr_debug("%s: bug\n", __func__);
1365		RCU_INIT_POINTER(dev->ip_ptr, NULL);
1366		break;
1367	case NETDEV_UP:
1368		if (!inetdev_valid_mtu(dev->mtu))
1369			break;
1370		if (dev->flags & IFF_LOOPBACK) {
1371			struct in_ifaddr *ifa = inet_alloc_ifa();
1372
1373			if (ifa) {
1374				INIT_HLIST_NODE(&ifa->hash);
1375				ifa->ifa_local =
1376				  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1377				ifa->ifa_prefixlen = 8;
1378				ifa->ifa_mask = inet_make_mask(8);
1379				in_dev_hold(in_dev);
1380				ifa->ifa_dev = in_dev;
1381				ifa->ifa_scope = RT_SCOPE_HOST;
1382				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1383				set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1384						 INFINITY_LIFE_TIME);
1385				inet_insert_ifa(ifa);
1386			}
1387		}
1388		ip_mc_up(in_dev);
1389		/* fall through */
1390	case NETDEV_CHANGEADDR:
1391		if (!IN_DEV_ARP_NOTIFY(in_dev))
1392			break;
1393		/* fall through */
1394	case NETDEV_NOTIFY_PEERS:
1395		/* Send gratuitous ARP to notify of link change */
1396		inetdev_send_gratuitous_arp(dev, in_dev);
1397		break;
1398	case NETDEV_DOWN:
1399		ip_mc_down(in_dev);
1400		break;
1401	case NETDEV_PRE_TYPE_CHANGE:
1402		ip_mc_unmap(in_dev);
1403		break;
1404	case NETDEV_POST_TYPE_CHANGE:
1405		ip_mc_remap(in_dev);
1406		break;
1407	case NETDEV_CHANGEMTU:
1408		if (inetdev_valid_mtu(dev->mtu))
1409			break;
1410		/* disable IP when MTU is not enough */
1411	case NETDEV_UNREGISTER:
1412		inetdev_destroy(in_dev);
1413		break;
1414	case NETDEV_CHANGENAME:
1415		/* Do not notify about label change, this event is
1416		 * not interesting to applications using netlink.
1417		 */
1418		inetdev_changename(dev, in_dev);
1419
1420		devinet_sysctl_unregister(in_dev);
1421		devinet_sysctl_register(in_dev);
1422		break;
1423	}
1424out:
1425	return NOTIFY_DONE;
1426}
1427
1428static struct notifier_block ip_netdev_notifier = {
1429	.notifier_call = inetdev_event,
1430};
1431
1432static size_t inet_nlmsg_size(void)
1433{
1434	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1435	       + nla_total_size(4) /* IFA_ADDRESS */
1436	       + nla_total_size(4) /* IFA_LOCAL */
1437	       + nla_total_size(4) /* IFA_BROADCAST */
1438	       + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1439}
1440
1441static inline u32 cstamp_delta(unsigned long cstamp)
1442{
1443	return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1444}
1445
1446static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1447			 unsigned long tstamp, u32 preferred, u32 valid)
1448{
1449	struct ifa_cacheinfo ci;
1450
1451	ci.cstamp = cstamp_delta(cstamp);
1452	ci.tstamp = cstamp_delta(tstamp);
1453	ci.ifa_prefered = preferred;
1454	ci.ifa_valid = valid;
1455
1456	return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1457}
1458
1459static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1460			    u32 portid, u32 seq, int event, unsigned int flags)
1461{
1462	struct ifaddrmsg *ifm;
1463	struct nlmsghdr  *nlh;
1464	u32 preferred, valid;
1465
1466	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
1467	if (nlh == NULL)
1468		return -EMSGSIZE;
1469
1470	ifm = nlmsg_data(nlh);
1471	ifm->ifa_family = AF_INET;
1472	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1473	ifm->ifa_flags = ifa->ifa_flags;
1474	ifm->ifa_scope = ifa->ifa_scope;
1475	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1476
1477	if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1478		preferred = ifa->ifa_preferred_lft;
1479		valid = ifa->ifa_valid_lft;
1480		if (preferred != INFINITY_LIFE_TIME) {
1481			long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1482
1483			if (preferred > tval)
1484				preferred -= tval;
1485			else
1486				preferred = 0;
1487			if (valid != INFINITY_LIFE_TIME) {
1488				if (valid > tval)
1489					valid -= tval;
1490				else
1491					valid = 0;
1492			}
1493		}
1494	} else {
1495		preferred = INFINITY_LIFE_TIME;
1496		valid = INFINITY_LIFE_TIME;
1497	}
1498	if ((ifa->ifa_address &&
1499	     nla_put_be32(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1500	    (ifa->ifa_local &&
1501	     nla_put_be32(skb, IFA_LOCAL, ifa->ifa_local)) ||
1502	    (ifa->ifa_broadcast &&
1503	     nla_put_be32(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1504	    (ifa->ifa_label[0] &&
1505	     nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1506	    put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1507			  preferred, valid))
1508		goto nla_put_failure;
1509
1510	return nlmsg_end(skb, nlh);
1511
1512nla_put_failure:
1513	nlmsg_cancel(skb, nlh);
1514	return -EMSGSIZE;
1515}
1516
1517static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1518{
1519	struct net *net = sock_net(skb->sk);
1520	int h, s_h;
1521	int idx, s_idx;
1522	int ip_idx, s_ip_idx;
1523	struct net_device *dev;
1524	struct in_device *in_dev;
1525	struct in_ifaddr *ifa;
1526	struct hlist_head *head;
1527
1528	s_h = cb->args[0];
1529	s_idx = idx = cb->args[1];
1530	s_ip_idx = ip_idx = cb->args[2];
1531
1532	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1533		idx = 0;
1534		head = &net->dev_index_head[h];
1535		rcu_read_lock();
1536		cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1537			  net->dev_base_seq;
1538		hlist_for_each_entry_rcu(dev, head, index_hlist) {
1539			if (idx < s_idx)
1540				goto cont;
1541			if (h > s_h || idx > s_idx)
1542				s_ip_idx = 0;
1543			in_dev = __in_dev_get_rcu(dev);
1544			if (!in_dev)
1545				goto cont;
1546
1547			for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1548			     ifa = ifa->ifa_next, ip_idx++) {
1549				if (ip_idx < s_ip_idx)
1550					continue;
1551				if (inet_fill_ifaddr(skb, ifa,
1552					     NETLINK_CB(cb->skb).portid,
1553					     cb->nlh->nlmsg_seq,
1554					     RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1555					rcu_read_unlock();
1556					goto done;
1557				}
1558				nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1559			}
1560cont:
1561			idx++;
1562		}
1563		rcu_read_unlock();
1564	}
1565
1566done:
1567	cb->args[0] = h;
1568	cb->args[1] = idx;
1569	cb->args[2] = ip_idx;
1570
1571	return skb->len;
1572}
1573
1574static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1575		      u32 portid)
1576{
1577	struct sk_buff *skb;
1578	u32 seq = nlh ? nlh->nlmsg_seq : 0;
1579	int err = -ENOBUFS;
1580	struct net *net;
1581
1582	net = dev_net(ifa->ifa_dev->dev);
1583	skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1584	if (skb == NULL)
1585		goto errout;
1586
1587	err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0);
1588	if (err < 0) {
1589		/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1590		WARN_ON(err == -EMSGSIZE);
1591		kfree_skb(skb);
1592		goto errout;
1593	}
1594	rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1595	return;
1596errout:
1597	if (err < 0)
1598		rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1599}
1600
1601static size_t inet_get_link_af_size(const struct net_device *dev)
1602{
1603	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1604
1605	if (!in_dev)
1606		return 0;
1607
1608	return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1609}
1610
1611static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
1612{
1613	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1614	struct nlattr *nla;
1615	int i;
1616
1617	if (!in_dev)
1618		return -ENODATA;
1619
1620	nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1621	if (nla == NULL)
1622		return -EMSGSIZE;
1623
1624	for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1625		((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1626
1627	return 0;
1628}
1629
1630static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1631	[IFLA_INET_CONF]	= { .type = NLA_NESTED },
1632};
1633
1634static int inet_validate_link_af(const struct net_device *dev,
1635				 const struct nlattr *nla)
1636{
1637	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1638	int err, rem;
1639
1640	if (dev && !__in_dev_get_rtnl(dev))
1641		return -EAFNOSUPPORT;
1642
1643	err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
1644	if (err < 0)
1645		return err;
1646
1647	if (tb[IFLA_INET_CONF]) {
1648		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1649			int cfgid = nla_type(a);
1650
1651			if (nla_len(a) < 4)
1652				return -EINVAL;
1653
1654			if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1655				return -EINVAL;
1656		}
1657	}
1658
1659	return 0;
1660}
1661
1662static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1663{
1664	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1665	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1666	int rem;
1667
1668	if (!in_dev)
1669		return -EAFNOSUPPORT;
1670
1671	if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
1672		BUG();
1673
1674	if (tb[IFLA_INET_CONF]) {
1675		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1676			ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1677	}
1678
1679	return 0;
1680}
1681
1682static int inet_netconf_msgsize_devconf(int type)
1683{
1684	int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1685		   + nla_total_size(4);	/* NETCONFA_IFINDEX */
1686
1687	/* type -1 is used for ALL */
1688	if (type == -1 || type == NETCONFA_FORWARDING)
1689		size += nla_total_size(4);
1690	if (type == -1 || type == NETCONFA_RP_FILTER)
1691		size += nla_total_size(4);
1692	if (type == -1 || type == NETCONFA_MC_FORWARDING)
1693		size += nla_total_size(4);
1694
1695	return size;
1696}
1697
1698static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1699				     struct ipv4_devconf *devconf, u32 portid,
1700				     u32 seq, int event, unsigned int flags,
1701				     int type)
1702{
1703	struct nlmsghdr  *nlh;
1704	struct netconfmsg *ncm;
1705
1706	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1707			flags);
1708	if (nlh == NULL)
1709		return -EMSGSIZE;
1710
1711	ncm = nlmsg_data(nlh);
1712	ncm->ncm_family = AF_INET;
1713
1714	if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1715		goto nla_put_failure;
1716
1717	/* type -1 is used for ALL */
1718	if ((type == -1 || type == NETCONFA_FORWARDING) &&
1719	    nla_put_s32(skb, NETCONFA_FORWARDING,
1720			IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
1721		goto nla_put_failure;
1722	if ((type == -1 || type == NETCONFA_RP_FILTER) &&
1723	    nla_put_s32(skb, NETCONFA_RP_FILTER,
1724			IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
1725		goto nla_put_failure;
1726	if ((type == -1 || type == NETCONFA_MC_FORWARDING) &&
1727	    nla_put_s32(skb, NETCONFA_MC_FORWARDING,
1728			IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
1729		goto nla_put_failure;
1730
1731	return nlmsg_end(skb, nlh);
1732
1733nla_put_failure:
1734	nlmsg_cancel(skb, nlh);
1735	return -EMSGSIZE;
1736}
1737
1738void inet_netconf_notify_devconf(struct net *net, int type, int ifindex,
1739				 struct ipv4_devconf *devconf)
1740{
1741	struct sk_buff *skb;
1742	int err = -ENOBUFS;
1743
1744	skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_ATOMIC);
1745	if (skb == NULL)
1746		goto errout;
1747
1748	err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
1749					RTM_NEWNETCONF, 0, type);
1750	if (err < 0) {
1751		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1752		WARN_ON(err == -EMSGSIZE);
1753		kfree_skb(skb);
1754		goto errout;
1755	}
1756	rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_ATOMIC);
1757	return;
1758errout:
1759	if (err < 0)
1760		rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
1761}
1762
1763static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
1764	[NETCONFA_IFINDEX]	= { .len = sizeof(int) },
1765	[NETCONFA_FORWARDING]	= { .len = sizeof(int) },
1766	[NETCONFA_RP_FILTER]	= { .len = sizeof(int) },
1767};
1768
1769static int inet_netconf_get_devconf(struct sk_buff *in_skb,
1770				    struct nlmsghdr *nlh)
1771{
1772	struct net *net = sock_net(in_skb->sk);
1773	struct nlattr *tb[NETCONFA_MAX+1];
1774	struct netconfmsg *ncm;
1775	struct sk_buff *skb;
1776	struct ipv4_devconf *devconf;
1777	struct in_device *in_dev;
1778	struct net_device *dev;
1779	int ifindex;
1780	int err;
1781
1782	err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
1783			  devconf_ipv4_policy);
1784	if (err < 0)
1785		goto errout;
1786
1787	err = EINVAL;
1788	if (!tb[NETCONFA_IFINDEX])
1789		goto errout;
1790
1791	ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
1792	switch (ifindex) {
1793	case NETCONFA_IFINDEX_ALL:
1794		devconf = net->ipv4.devconf_all;
1795		break;
1796	case NETCONFA_IFINDEX_DEFAULT:
1797		devconf = net->ipv4.devconf_dflt;
1798		break;
1799	default:
1800		dev = __dev_get_by_index(net, ifindex);
1801		if (dev == NULL)
1802			goto errout;
1803		in_dev = __in_dev_get_rtnl(dev);
1804		if (in_dev == NULL)
1805			goto errout;
1806		devconf = &in_dev->cnf;
1807		break;
1808	}
1809
1810	err = -ENOBUFS;
1811	skb = nlmsg_new(inet_netconf_msgsize_devconf(-1), GFP_ATOMIC);
1812	if (skb == NULL)
1813		goto errout;
1814
1815	err = inet_netconf_fill_devconf(skb, ifindex, devconf,
1816					NETLINK_CB(in_skb).portid,
1817					nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
1818					-1);
1819	if (err < 0) {
1820		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1821		WARN_ON(err == -EMSGSIZE);
1822		kfree_skb(skb);
1823		goto errout;
1824	}
1825	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
1826errout:
1827	return err;
1828}
1829
1830static int inet_netconf_dump_devconf(struct sk_buff *skb,
1831				     struct netlink_callback *cb)
1832{
1833	struct net *net = sock_net(skb->sk);
1834	int h, s_h;
1835	int idx, s_idx;
1836	struct net_device *dev;
1837	struct in_device *in_dev;
1838	struct hlist_head *head;
1839
1840	s_h = cb->args[0];
1841	s_idx = idx = cb->args[1];
1842
1843	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1844		idx = 0;
1845		head = &net->dev_index_head[h];
1846		rcu_read_lock();
1847		cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1848			  net->dev_base_seq;
1849		hlist_for_each_entry_rcu(dev, head, index_hlist) {
1850			if (idx < s_idx)
1851				goto cont;
1852			in_dev = __in_dev_get_rcu(dev);
1853			if (!in_dev)
1854				goto cont;
1855
1856			if (inet_netconf_fill_devconf(skb, dev->ifindex,
1857						      &in_dev->cnf,
1858						      NETLINK_CB(cb->skb).portid,
1859						      cb->nlh->nlmsg_seq,
1860						      RTM_NEWNETCONF,
1861						      NLM_F_MULTI,
1862						      -1) <= 0) {
1863				rcu_read_unlock();
1864				goto done;
1865			}
1866			nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1867cont:
1868			idx++;
1869		}
1870		rcu_read_unlock();
1871	}
1872	if (h == NETDEV_HASHENTRIES) {
1873		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
1874					      net->ipv4.devconf_all,
1875					      NETLINK_CB(cb->skb).portid,
1876					      cb->nlh->nlmsg_seq,
1877					      RTM_NEWNETCONF, NLM_F_MULTI,
1878					      -1) <= 0)
1879			goto done;
1880		else
1881			h++;
1882	}
1883	if (h == NETDEV_HASHENTRIES + 1) {
1884		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
1885					      net->ipv4.devconf_dflt,
1886					      NETLINK_CB(cb->skb).portid,
1887					      cb->nlh->nlmsg_seq,
1888					      RTM_NEWNETCONF, NLM_F_MULTI,
1889					      -1) <= 0)
1890			goto done;
1891		else
1892			h++;
1893	}
1894done:
1895	cb->args[0] = h;
1896	cb->args[1] = idx;
1897
1898	return skb->len;
1899}
1900
1901#ifdef CONFIG_SYSCTL
1902
1903static void devinet_copy_dflt_conf(struct net *net, int i)
1904{
1905	struct net_device *dev;
1906
1907	rcu_read_lock();
1908	for_each_netdev_rcu(net, dev) {
1909		struct in_device *in_dev;
1910
1911		in_dev = __in_dev_get_rcu(dev);
1912		if (in_dev && !test_bit(i, in_dev->cnf.state))
1913			in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1914	}
1915	rcu_read_unlock();
1916}
1917
1918/* called with RTNL locked */
1919static void inet_forward_change(struct net *net)
1920{
1921	struct net_device *dev;
1922	int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1923
1924	IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1925	IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1926	inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1927				    NETCONFA_IFINDEX_ALL,
1928				    net->ipv4.devconf_all);
1929	inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1930				    NETCONFA_IFINDEX_DEFAULT,
1931				    net->ipv4.devconf_dflt);
1932
1933	for_each_netdev(net, dev) {
1934		struct in_device *in_dev;
1935		if (on)
1936			dev_disable_lro(dev);
1937		rcu_read_lock();
1938		in_dev = __in_dev_get_rcu(dev);
1939		if (in_dev) {
1940			IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1941			inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1942						    dev->ifindex, &in_dev->cnf);
1943		}
1944		rcu_read_unlock();
1945	}
1946}
1947
1948static int devinet_conf_proc(struct ctl_table *ctl, int write,
1949			     void __user *buffer,
1950			     size_t *lenp, loff_t *ppos)
1951{
1952	int old_value = *(int *)ctl->data;
1953	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1954	int new_value = *(int *)ctl->data;
1955
1956	if (write) {
1957		struct ipv4_devconf *cnf = ctl->extra1;
1958		struct net *net = ctl->extra2;
1959		int i = (int *)ctl->data - cnf->data;
1960
1961		set_bit(i, cnf->state);
1962
1963		if (cnf == net->ipv4.devconf_dflt)
1964			devinet_copy_dflt_conf(net, i);
1965		if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
1966		    i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
1967			if ((new_value == 0) && (old_value != 0))
1968				rt_cache_flush(net);
1969		if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
1970		    new_value != old_value) {
1971			int ifindex;
1972
1973			if (cnf == net->ipv4.devconf_dflt)
1974				ifindex = NETCONFA_IFINDEX_DEFAULT;
1975			else if (cnf == net->ipv4.devconf_all)
1976				ifindex = NETCONFA_IFINDEX_ALL;
1977			else {
1978				struct in_device *idev =
1979					container_of(cnf, struct in_device,
1980						     cnf);
1981				ifindex = idev->dev->ifindex;
1982			}
1983			inet_netconf_notify_devconf(net, NETCONFA_RP_FILTER,
1984						    ifindex, cnf);
1985		}
1986	}
1987
1988	return ret;
1989}
1990
1991static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
1992				  void __user *buffer,
1993				  size_t *lenp, loff_t *ppos)
1994{
1995	int *valp = ctl->data;
1996	int val = *valp;
1997	loff_t pos = *ppos;
1998	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1999
2000	if (write && *valp != val) {
2001		struct net *net = ctl->extra2;
2002
2003		if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2004			if (!rtnl_trylock()) {
2005				/* Restore the original values before restarting */
2006				*valp = val;
2007				*ppos = pos;
2008				return restart_syscall();
2009			}
2010			if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2011				inet_forward_change(net);
2012			} else {
2013				struct ipv4_devconf *cnf = ctl->extra1;
2014				struct in_device *idev =
2015					container_of(cnf, struct in_device, cnf);
2016				if (*valp)
2017					dev_disable_lro(idev->dev);
2018				inet_netconf_notify_devconf(net,
2019							    NETCONFA_FORWARDING,
2020							    idev->dev->ifindex,
2021							    cnf);
2022			}
2023			rtnl_unlock();
2024			rt_cache_flush(net);
2025		} else
2026			inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
2027						    NETCONFA_IFINDEX_DEFAULT,
2028						    net->ipv4.devconf_dflt);
2029	}
2030
2031	return ret;
2032}
2033
2034static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2035				void __user *buffer,
2036				size_t *lenp, loff_t *ppos)
2037{
2038	int *valp = ctl->data;
2039	int val = *valp;
2040	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2041	struct net *net = ctl->extra2;
2042
2043	if (write && *valp != val)
2044		rt_cache_flush(net);
2045
2046	return ret;
2047}
2048
2049#define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2050	{ \
2051		.procname	= name, \
2052		.data		= ipv4_devconf.data + \
2053				  IPV4_DEVCONF_ ## attr - 1, \
2054		.maxlen		= sizeof(int), \
2055		.mode		= mval, \
2056		.proc_handler	= proc, \
2057		.extra1		= &ipv4_devconf, \
2058	}
2059
2060#define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2061	DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2062
2063#define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2064	DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2065
2066#define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2067	DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2068
2069#define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2070	DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2071
2072static struct devinet_sysctl_table {
2073	struct ctl_table_header *sysctl_header;
2074	struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2075} devinet_sysctl = {
2076	.devinet_vars = {
2077		DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2078					     devinet_sysctl_forward),
2079		DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2080
2081		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2082		DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2083		DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2084		DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2085		DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2086		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2087					"accept_source_route"),
2088		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2089		DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2090		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2091		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2092		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2093		DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2094		DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2095		DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2096		DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2097		DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2098		DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2099		DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2100		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2101		DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2102					"force_igmp_version"),
2103		DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2104					"igmpv2_unsolicited_report_interval"),
2105		DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2106					"igmpv3_unsolicited_report_interval"),
2107
2108		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2109		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2110		DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2111					      "promote_secondaries"),
2112		DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2113					      "route_localnet"),
2114	},
2115};
2116
2117static int __devinet_sysctl_register(struct net *net, char *dev_name,
2118					struct ipv4_devconf *p)
2119{
2120	int i;
2121	struct devinet_sysctl_table *t;
2122	char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2123
2124	t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
2125	if (!t)
2126		goto out;
2127
2128	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2129		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2130		t->devinet_vars[i].extra1 = p;
2131		t->devinet_vars[i].extra2 = net;
2132	}
2133
2134	snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2135
2136	t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2137	if (!t->sysctl_header)
2138		goto free;
2139
2140	p->sysctl = t;
2141	return 0;
2142
2143free:
2144	kfree(t);
2145out:
2146	return -ENOBUFS;
2147}
2148
2149static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
2150{
2151	struct devinet_sysctl_table *t = cnf->sysctl;
2152
2153	if (t == NULL)
2154		return;
2155
2156	cnf->sysctl = NULL;
2157	unregister_net_sysctl_table(t->sysctl_header);
2158	kfree(t);
2159}
2160
2161static void devinet_sysctl_register(struct in_device *idev)
2162{
2163	neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2164	__devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2165					&idev->cnf);
2166}
2167
2168static void devinet_sysctl_unregister(struct in_device *idev)
2169{
2170	__devinet_sysctl_unregister(&idev->cnf);
2171	neigh_sysctl_unregister(idev->arp_parms);
2172}
2173
2174static struct ctl_table ctl_forward_entry[] = {
2175	{
2176		.procname	= "ip_forward",
2177		.data		= &ipv4_devconf.data[
2178					IPV4_DEVCONF_FORWARDING - 1],
2179		.maxlen		= sizeof(int),
2180		.mode		= 0644,
2181		.proc_handler	= devinet_sysctl_forward,
2182		.extra1		= &ipv4_devconf,
2183		.extra2		= &init_net,
2184	},
2185	{ },
2186};
2187#endif
2188
2189static __net_init int devinet_init_net(struct net *net)
2190{
2191	int err;
2192	struct ipv4_devconf *all, *dflt;
2193#ifdef CONFIG_SYSCTL
2194	struct ctl_table *tbl = ctl_forward_entry;
2195	struct ctl_table_header *forw_hdr;
2196#endif
2197
2198	err = -ENOMEM;
2199	all = &ipv4_devconf;
2200	dflt = &ipv4_devconf_dflt;
2201
2202	if (!net_eq(net, &init_net)) {
2203		all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
2204		if (all == NULL)
2205			goto err_alloc_all;
2206
2207		dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2208		if (dflt == NULL)
2209			goto err_alloc_dflt;
2210
2211#ifdef CONFIG_SYSCTL
2212		tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
2213		if (tbl == NULL)
2214			goto err_alloc_ctl;
2215
2216		tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2217		tbl[0].extra1 = all;
2218		tbl[0].extra2 = net;
2219#endif
2220	}
2221
2222#ifdef CONFIG_SYSCTL
2223	err = __devinet_sysctl_register(net, "all", all);
2224	if (err < 0)
2225		goto err_reg_all;
2226
2227	err = __devinet_sysctl_register(net, "default", dflt);
2228	if (err < 0)
2229		goto err_reg_dflt;
2230
2231	err = -ENOMEM;
2232	forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2233	if (forw_hdr == NULL)
2234		goto err_reg_ctl;
2235	net->ipv4.forw_hdr = forw_hdr;
2236#endif
2237
2238	net->ipv4.devconf_all = all;
2239	net->ipv4.devconf_dflt = dflt;
2240	return 0;
2241
2242#ifdef CONFIG_SYSCTL
2243err_reg_ctl:
2244	__devinet_sysctl_unregister(dflt);
2245err_reg_dflt:
2246	__devinet_sysctl_unregister(all);
2247err_reg_all:
2248	if (tbl != ctl_forward_entry)
2249		kfree(tbl);
2250err_alloc_ctl:
2251#endif
2252	if (dflt != &ipv4_devconf_dflt)
2253		kfree(dflt);
2254err_alloc_dflt:
2255	if (all != &ipv4_devconf)
2256		kfree(all);
2257err_alloc_all:
2258	return err;
2259}
2260
2261static __net_exit void devinet_exit_net(struct net *net)
2262{
2263#ifdef CONFIG_SYSCTL
2264	struct ctl_table *tbl;
2265
2266	tbl = net->ipv4.forw_hdr->ctl_table_arg;
2267	unregister_net_sysctl_table(net->ipv4.forw_hdr);
2268	__devinet_sysctl_unregister(net->ipv4.devconf_dflt);
2269	__devinet_sysctl_unregister(net->ipv4.devconf_all);
2270	kfree(tbl);
2271#endif
2272	kfree(net->ipv4.devconf_dflt);
2273	kfree(net->ipv4.devconf_all);
2274}
2275
2276static __net_initdata struct pernet_operations devinet_ops = {
2277	.init = devinet_init_net,
2278	.exit = devinet_exit_net,
2279};
2280
2281static struct rtnl_af_ops inet_af_ops = {
2282	.family		  = AF_INET,
2283	.fill_link_af	  = inet_fill_link_af,
2284	.get_link_af_size = inet_get_link_af_size,
2285	.validate_link_af = inet_validate_link_af,
2286	.set_link_af	  = inet_set_link_af,
2287};
2288
2289void __init devinet_init(void)
2290{
2291	int i;
2292
2293	for (i = 0; i < IN4_ADDR_HSIZE; i++)
2294		INIT_HLIST_HEAD(&inet_addr_lst[i]);
2295
2296	register_pernet_subsys(&devinet_ops);
2297
2298	register_gifconf(PF_INET, inet_gifconf);
2299	register_netdevice_notifier(&ip_netdev_notifier);
2300
2301	schedule_delayed_work(&check_lifetime_work, 0);
2302
2303	rtnl_af_register(&inet_af_ops);
2304
2305	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
2306	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
2307	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
2308	rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2309		      inet_netconf_dump_devconf, NULL);
2310}
2311
2312