devinet.c revision 20e61da7ffcfd84a1b6f797e745608572e5bc218
1/*
2 *	NET3	IP device support routines.
3 *
4 *		This program is free software; you can redistribute it and/or
5 *		modify it under the terms of the GNU General Public License
6 *		as published by the Free Software Foundation; either version
7 *		2 of the License, or (at your option) any later version.
8 *
9 *	Derived from the IP parts of dev.c 1.0.19
10 * 		Authors:	Ross Biro
11 *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 *				Mark Evans, <evansmp@uhura.aston.ac.uk>
13 *
14 *	Additional Authors:
15 *		Alan Cox, <gw4pts@gw4pts.ampr.org>
16 *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17 *
18 *	Changes:
19 *		Alexey Kuznetsov:	pa_* fields are replaced with ifaddr
20 *					lists.
21 *		Cyrus Durgin:		updated for kmod
22 *		Matthias Andree:	in devinet_ioctl, compare label and
23 *					address (4.4BSD alias style support),
24 *					fall back to comparing just the label
25 *					if no match found.
26 */
27
28
29#include <asm/uaccess.h>
30#include <linux/bitops.h>
31#include <linux/capability.h>
32#include <linux/module.h>
33#include <linux/types.h>
34#include <linux/kernel.h>
35#include <linux/string.h>
36#include <linux/mm.h>
37#include <linux/socket.h>
38#include <linux/sockios.h>
39#include <linux/in.h>
40#include <linux/errno.h>
41#include <linux/interrupt.h>
42#include <linux/if_addr.h>
43#include <linux/if_ether.h>
44#include <linux/inet.h>
45#include <linux/netdevice.h>
46#include <linux/etherdevice.h>
47#include <linux/skbuff.h>
48#include <linux/init.h>
49#include <linux/notifier.h>
50#include <linux/inetdevice.h>
51#include <linux/igmp.h>
52#include <linux/slab.h>
53#include <linux/hash.h>
54#ifdef CONFIG_SYSCTL
55#include <linux/sysctl.h>
56#endif
57#include <linux/kmod.h>
58#include <linux/netconf.h>
59
60#include <net/arp.h>
61#include <net/ip.h>
62#include <net/route.h>
63#include <net/ip_fib.h>
64#include <net/rtnetlink.h>
65#include <net/net_namespace.h>
66#include <net/addrconf.h>
67
68#include "fib_lookup.h"
69
70static struct ipv4_devconf ipv4_devconf = {
71	.data = {
72		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
73		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
74		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
75		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
76		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
77		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
78	},
79};
80
81static struct ipv4_devconf ipv4_devconf_dflt = {
82	.data = {
83		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
84		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
85		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
86		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
87		[IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
88		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
89		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
90	},
91};
92
93#define IPV4_DEVCONF_DFLT(net, attr) \
94	IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
95
96static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
97	[IFA_LOCAL]     	= { .type = NLA_U32 },
98	[IFA_ADDRESS]   	= { .type = NLA_U32 },
99	[IFA_BROADCAST] 	= { .type = NLA_U32 },
100	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
101	[IFA_CACHEINFO]		= { .len = sizeof(struct ifa_cacheinfo) },
102	[IFA_FLAGS]		= { .type = NLA_U32 },
103};
104
105#define IN4_ADDR_HSIZE_SHIFT	8
106#define IN4_ADDR_HSIZE		(1U << IN4_ADDR_HSIZE_SHIFT)
107
108static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
109
110static u32 inet_addr_hash(struct net *net, __be32 addr)
111{
112	u32 val = (__force u32) addr ^ net_hash_mix(net);
113
114	return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
115}
116
117static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
118{
119	u32 hash = inet_addr_hash(net, ifa->ifa_local);
120
121	ASSERT_RTNL();
122	hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
123}
124
125static void inet_hash_remove(struct in_ifaddr *ifa)
126{
127	ASSERT_RTNL();
128	hlist_del_init_rcu(&ifa->hash);
129}
130
131/**
132 * __ip_dev_find - find the first device with a given source address.
133 * @net: the net namespace
134 * @addr: the source address
135 * @devref: if true, take a reference on the found device
136 *
137 * If a caller uses devref=false, it should be protected by RCU, or RTNL
138 */
139struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
140{
141	u32 hash = inet_addr_hash(net, addr);
142	struct net_device *result = NULL;
143	struct in_ifaddr *ifa;
144
145	rcu_read_lock();
146	hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash) {
147		if (ifa->ifa_local == addr) {
148			struct net_device *dev = ifa->ifa_dev->dev;
149
150			if (!net_eq(dev_net(dev), net))
151				continue;
152			result = dev;
153			break;
154		}
155	}
156	if (!result) {
157		struct flowi4 fl4 = { .daddr = addr };
158		struct fib_result res = { 0 };
159		struct fib_table *local;
160
161		/* Fallback to FIB local table so that communication
162		 * over loopback subnets work.
163		 */
164		local = fib_get_table(net, RT_TABLE_LOCAL);
165		if (local &&
166		    !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
167		    res.type == RTN_LOCAL)
168			result = FIB_RES_DEV(res);
169	}
170	if (result && devref)
171		dev_hold(result);
172	rcu_read_unlock();
173	return result;
174}
175EXPORT_SYMBOL(__ip_dev_find);
176
177static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
178
179static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
180static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
181			 int destroy);
182#ifdef CONFIG_SYSCTL
183static int devinet_sysctl_register(struct in_device *idev);
184static void devinet_sysctl_unregister(struct in_device *idev);
185#else
186static int devinet_sysctl_register(struct in_device *idev)
187{
188	return 0;
189}
190static void devinet_sysctl_unregister(struct in_device *idev)
191{
192}
193#endif
194
195/* Locks all the inet devices. */
196
197static struct in_ifaddr *inet_alloc_ifa(void)
198{
199	return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
200}
201
202static void inet_rcu_free_ifa(struct rcu_head *head)
203{
204	struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
205	if (ifa->ifa_dev)
206		in_dev_put(ifa->ifa_dev);
207	kfree(ifa);
208}
209
210static void inet_free_ifa(struct in_ifaddr *ifa)
211{
212	call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
213}
214
215void in_dev_finish_destroy(struct in_device *idev)
216{
217	struct net_device *dev = idev->dev;
218
219	WARN_ON(idev->ifa_list);
220	WARN_ON(idev->mc_list);
221	kfree(rcu_dereference_protected(idev->mc_hash, 1));
222#ifdef NET_REFCNT_DEBUG
223	pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
224#endif
225	dev_put(dev);
226	if (!idev->dead)
227		pr_err("Freeing alive in_device %p\n", idev);
228	else
229		kfree(idev);
230}
231EXPORT_SYMBOL(in_dev_finish_destroy);
232
233static struct in_device *inetdev_init(struct net_device *dev)
234{
235	struct in_device *in_dev;
236	int err = -ENOMEM;
237
238	ASSERT_RTNL();
239
240	in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
241	if (!in_dev)
242		goto out;
243	memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
244			sizeof(in_dev->cnf));
245	in_dev->cnf.sysctl = NULL;
246	in_dev->dev = dev;
247	in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
248	if (!in_dev->arp_parms)
249		goto out_kfree;
250	if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
251		dev_disable_lro(dev);
252	/* Reference in_dev->dev */
253	dev_hold(dev);
254	/* Account for reference dev->ip_ptr (below) */
255	in_dev_hold(in_dev);
256
257	err = devinet_sysctl_register(in_dev);
258	if (err) {
259		in_dev->dead = 1;
260		in_dev_put(in_dev);
261		in_dev = NULL;
262		goto out;
263	}
264	ip_mc_init_dev(in_dev);
265	if (dev->flags & IFF_UP)
266		ip_mc_up(in_dev);
267
268	/* we can receive as soon as ip_ptr is set -- do this last */
269	rcu_assign_pointer(dev->ip_ptr, in_dev);
270out:
271	return in_dev ?: ERR_PTR(err);
272out_kfree:
273	kfree(in_dev);
274	in_dev = NULL;
275	goto out;
276}
277
278static void in_dev_rcu_put(struct rcu_head *head)
279{
280	struct in_device *idev = container_of(head, struct in_device, rcu_head);
281	in_dev_put(idev);
282}
283
284static void inetdev_destroy(struct in_device *in_dev)
285{
286	struct in_ifaddr *ifa;
287	struct net_device *dev;
288
289	ASSERT_RTNL();
290
291	dev = in_dev->dev;
292
293	in_dev->dead = 1;
294
295	ip_mc_destroy_dev(in_dev);
296
297	while ((ifa = in_dev->ifa_list) != NULL) {
298		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
299		inet_free_ifa(ifa);
300	}
301
302	RCU_INIT_POINTER(dev->ip_ptr, NULL);
303
304	devinet_sysctl_unregister(in_dev);
305	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
306	arp_ifdown(dev);
307
308	call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
309}
310
311int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
312{
313	rcu_read_lock();
314	for_primary_ifa(in_dev) {
315		if (inet_ifa_match(a, ifa)) {
316			if (!b || inet_ifa_match(b, ifa)) {
317				rcu_read_unlock();
318				return 1;
319			}
320		}
321	} endfor_ifa(in_dev);
322	rcu_read_unlock();
323	return 0;
324}
325
326static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
327			 int destroy, struct nlmsghdr *nlh, u32 portid)
328{
329	struct in_ifaddr *promote = NULL;
330	struct in_ifaddr *ifa, *ifa1 = *ifap;
331	struct in_ifaddr *last_prim = in_dev->ifa_list;
332	struct in_ifaddr *prev_prom = NULL;
333	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
334
335	ASSERT_RTNL();
336
337	/* 1. Deleting primary ifaddr forces deletion all secondaries
338	 * unless alias promotion is set
339	 **/
340
341	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
342		struct in_ifaddr **ifap1 = &ifa1->ifa_next;
343
344		while ((ifa = *ifap1) != NULL) {
345			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
346			    ifa1->ifa_scope <= ifa->ifa_scope)
347				last_prim = ifa;
348
349			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
350			    ifa1->ifa_mask != ifa->ifa_mask ||
351			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
352				ifap1 = &ifa->ifa_next;
353				prev_prom = ifa;
354				continue;
355			}
356
357			if (!do_promote) {
358				inet_hash_remove(ifa);
359				*ifap1 = ifa->ifa_next;
360
361				rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
362				blocking_notifier_call_chain(&inetaddr_chain,
363						NETDEV_DOWN, ifa);
364				inet_free_ifa(ifa);
365			} else {
366				promote = ifa;
367				break;
368			}
369		}
370	}
371
372	/* On promotion all secondaries from subnet are changing
373	 * the primary IP, we must remove all their routes silently
374	 * and later to add them back with new prefsrc. Do this
375	 * while all addresses are on the device list.
376	 */
377	for (ifa = promote; ifa; ifa = ifa->ifa_next) {
378		if (ifa1->ifa_mask == ifa->ifa_mask &&
379		    inet_ifa_match(ifa1->ifa_address, ifa))
380			fib_del_ifaddr(ifa, ifa1);
381	}
382
383	/* 2. Unlink it */
384
385	*ifap = ifa1->ifa_next;
386	inet_hash_remove(ifa1);
387
388	/* 3. Announce address deletion */
389
390	/* Send message first, then call notifier.
391	   At first sight, FIB update triggered by notifier
392	   will refer to already deleted ifaddr, that could confuse
393	   netlink listeners. It is not true: look, gated sees
394	   that route deleted and if it still thinks that ifaddr
395	   is valid, it will try to restore deleted routes... Grr.
396	   So that, this order is correct.
397	 */
398	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
399	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
400
401	if (promote) {
402		struct in_ifaddr *next_sec = promote->ifa_next;
403
404		if (prev_prom) {
405			prev_prom->ifa_next = promote->ifa_next;
406			promote->ifa_next = last_prim->ifa_next;
407			last_prim->ifa_next = promote;
408		}
409
410		promote->ifa_flags &= ~IFA_F_SECONDARY;
411		rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
412		blocking_notifier_call_chain(&inetaddr_chain,
413				NETDEV_UP, promote);
414		for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
415			if (ifa1->ifa_mask != ifa->ifa_mask ||
416			    !inet_ifa_match(ifa1->ifa_address, ifa))
417					continue;
418			fib_add_ifaddr(ifa);
419		}
420
421	}
422	if (destroy)
423		inet_free_ifa(ifa1);
424}
425
426static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
427			 int destroy)
428{
429	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
430}
431
432static void check_lifetime(struct work_struct *work);
433
434static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
435
436static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
437			     u32 portid)
438{
439	struct in_device *in_dev = ifa->ifa_dev;
440	struct in_ifaddr *ifa1, **ifap, **last_primary;
441
442	ASSERT_RTNL();
443
444	if (!ifa->ifa_local) {
445		inet_free_ifa(ifa);
446		return 0;
447	}
448
449	ifa->ifa_flags &= ~IFA_F_SECONDARY;
450	last_primary = &in_dev->ifa_list;
451
452	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
453	     ifap = &ifa1->ifa_next) {
454		if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
455		    ifa->ifa_scope <= ifa1->ifa_scope)
456			last_primary = &ifa1->ifa_next;
457		if (ifa1->ifa_mask == ifa->ifa_mask &&
458		    inet_ifa_match(ifa1->ifa_address, ifa)) {
459			if (ifa1->ifa_local == ifa->ifa_local) {
460				inet_free_ifa(ifa);
461				return -EEXIST;
462			}
463			if (ifa1->ifa_scope != ifa->ifa_scope) {
464				inet_free_ifa(ifa);
465				return -EINVAL;
466			}
467			ifa->ifa_flags |= IFA_F_SECONDARY;
468		}
469	}
470
471	if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
472		prandom_seed((__force u32) ifa->ifa_local);
473		ifap = last_primary;
474	}
475
476	ifa->ifa_next = *ifap;
477	*ifap = ifa;
478
479	inet_hash_insert(dev_net(in_dev->dev), ifa);
480
481	cancel_delayed_work(&check_lifetime_work);
482	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
483
484	/* Send message first, then call notifier.
485	   Notifier will trigger FIB update, so that
486	   listeners of netlink will know about new ifaddr */
487	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
488	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
489
490	return 0;
491}
492
493static int inet_insert_ifa(struct in_ifaddr *ifa)
494{
495	return __inet_insert_ifa(ifa, NULL, 0);
496}
497
498static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
499{
500	struct in_device *in_dev = __in_dev_get_rtnl(dev);
501
502	ASSERT_RTNL();
503
504	if (!in_dev) {
505		inet_free_ifa(ifa);
506		return -ENOBUFS;
507	}
508	ipv4_devconf_setall(in_dev);
509	neigh_parms_data_state_setall(in_dev->arp_parms);
510	if (ifa->ifa_dev != in_dev) {
511		WARN_ON(ifa->ifa_dev);
512		in_dev_hold(in_dev);
513		ifa->ifa_dev = in_dev;
514	}
515	if (ipv4_is_loopback(ifa->ifa_local))
516		ifa->ifa_scope = RT_SCOPE_HOST;
517	return inet_insert_ifa(ifa);
518}
519
520/* Caller must hold RCU or RTNL :
521 * We dont take a reference on found in_device
522 */
523struct in_device *inetdev_by_index(struct net *net, int ifindex)
524{
525	struct net_device *dev;
526	struct in_device *in_dev = NULL;
527
528	rcu_read_lock();
529	dev = dev_get_by_index_rcu(net, ifindex);
530	if (dev)
531		in_dev = rcu_dereference_rtnl(dev->ip_ptr);
532	rcu_read_unlock();
533	return in_dev;
534}
535EXPORT_SYMBOL(inetdev_by_index);
536
537/* Called only from RTNL semaphored context. No locks. */
538
539struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
540				    __be32 mask)
541{
542	ASSERT_RTNL();
543
544	for_primary_ifa(in_dev) {
545		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
546			return ifa;
547	} endfor_ifa(in_dev);
548	return NULL;
549}
550
551static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
552{
553	struct net *net = sock_net(skb->sk);
554	struct nlattr *tb[IFA_MAX+1];
555	struct in_device *in_dev;
556	struct ifaddrmsg *ifm;
557	struct in_ifaddr *ifa, **ifap;
558	int err = -EINVAL;
559
560	ASSERT_RTNL();
561
562	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
563	if (err < 0)
564		goto errout;
565
566	ifm = nlmsg_data(nlh);
567	in_dev = inetdev_by_index(net, ifm->ifa_index);
568	if (in_dev == NULL) {
569		err = -ENODEV;
570		goto errout;
571	}
572
573	for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
574	     ifap = &ifa->ifa_next) {
575		if (tb[IFA_LOCAL] &&
576		    ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
577			continue;
578
579		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
580			continue;
581
582		if (tb[IFA_ADDRESS] &&
583		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
584		    !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
585			continue;
586
587		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
588		return 0;
589	}
590
591	err = -EADDRNOTAVAIL;
592errout:
593	return err;
594}
595
596#define INFINITY_LIFE_TIME	0xFFFFFFFF
597
598static void check_lifetime(struct work_struct *work)
599{
600	unsigned long now, next, next_sec, next_sched;
601	struct in_ifaddr *ifa;
602	struct hlist_node *n;
603	int i;
604
605	now = jiffies;
606	next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
607
608	for (i = 0; i < IN4_ADDR_HSIZE; i++) {
609		bool change_needed = false;
610
611		rcu_read_lock();
612		hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
613			unsigned long age;
614
615			if (ifa->ifa_flags & IFA_F_PERMANENT)
616				continue;
617
618			/* We try to batch several events at once. */
619			age = (now - ifa->ifa_tstamp +
620			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
621
622			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
623			    age >= ifa->ifa_valid_lft) {
624				change_needed = true;
625			} else if (ifa->ifa_preferred_lft ==
626				   INFINITY_LIFE_TIME) {
627				continue;
628			} else if (age >= ifa->ifa_preferred_lft) {
629				if (time_before(ifa->ifa_tstamp +
630						ifa->ifa_valid_lft * HZ, next))
631					next = ifa->ifa_tstamp +
632					       ifa->ifa_valid_lft * HZ;
633
634				if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
635					change_needed = true;
636			} else if (time_before(ifa->ifa_tstamp +
637					       ifa->ifa_preferred_lft * HZ,
638					       next)) {
639				next = ifa->ifa_tstamp +
640				       ifa->ifa_preferred_lft * HZ;
641			}
642		}
643		rcu_read_unlock();
644		if (!change_needed)
645			continue;
646		rtnl_lock();
647		hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
648			unsigned long age;
649
650			if (ifa->ifa_flags & IFA_F_PERMANENT)
651				continue;
652
653			/* We try to batch several events at once. */
654			age = (now - ifa->ifa_tstamp +
655			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
656
657			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
658			    age >= ifa->ifa_valid_lft) {
659				struct in_ifaddr **ifap;
660
661				for (ifap = &ifa->ifa_dev->ifa_list;
662				     *ifap != NULL; ifap = &(*ifap)->ifa_next) {
663					if (*ifap == ifa) {
664						inet_del_ifa(ifa->ifa_dev,
665							     ifap, 1);
666						break;
667					}
668				}
669			} else if (ifa->ifa_preferred_lft !=
670				   INFINITY_LIFE_TIME &&
671				   age >= ifa->ifa_preferred_lft &&
672				   !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
673				ifa->ifa_flags |= IFA_F_DEPRECATED;
674				rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
675			}
676		}
677		rtnl_unlock();
678	}
679
680	next_sec = round_jiffies_up(next);
681	next_sched = next;
682
683	/* If rounded timeout is accurate enough, accept it. */
684	if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
685		next_sched = next_sec;
686
687	now = jiffies;
688	/* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
689	if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
690		next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
691
692	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work,
693			next_sched - now);
694}
695
696static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
697			     __u32 prefered_lft)
698{
699	unsigned long timeout;
700
701	ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
702
703	timeout = addrconf_timeout_fixup(valid_lft, HZ);
704	if (addrconf_finite_timeout(timeout))
705		ifa->ifa_valid_lft = timeout;
706	else
707		ifa->ifa_flags |= IFA_F_PERMANENT;
708
709	timeout = addrconf_timeout_fixup(prefered_lft, HZ);
710	if (addrconf_finite_timeout(timeout)) {
711		if (timeout == 0)
712			ifa->ifa_flags |= IFA_F_DEPRECATED;
713		ifa->ifa_preferred_lft = timeout;
714	}
715	ifa->ifa_tstamp = jiffies;
716	if (!ifa->ifa_cstamp)
717		ifa->ifa_cstamp = ifa->ifa_tstamp;
718}
719
720static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
721				       __u32 *pvalid_lft, __u32 *pprefered_lft)
722{
723	struct nlattr *tb[IFA_MAX+1];
724	struct in_ifaddr *ifa;
725	struct ifaddrmsg *ifm;
726	struct net_device *dev;
727	struct in_device *in_dev;
728	int err;
729
730	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
731	if (err < 0)
732		goto errout;
733
734	ifm = nlmsg_data(nlh);
735	err = -EINVAL;
736	if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
737		goto errout;
738
739	dev = __dev_get_by_index(net, ifm->ifa_index);
740	err = -ENODEV;
741	if (dev == NULL)
742		goto errout;
743
744	in_dev = __in_dev_get_rtnl(dev);
745	err = -ENOBUFS;
746	if (in_dev == NULL)
747		goto errout;
748
749	ifa = inet_alloc_ifa();
750	if (ifa == NULL)
751		/*
752		 * A potential indev allocation can be left alive, it stays
753		 * assigned to its device and is destroy with it.
754		 */
755		goto errout;
756
757	ipv4_devconf_setall(in_dev);
758	neigh_parms_data_state_setall(in_dev->arp_parms);
759	in_dev_hold(in_dev);
760
761	if (tb[IFA_ADDRESS] == NULL)
762		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
763
764	INIT_HLIST_NODE(&ifa->hash);
765	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
766	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
767	ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
768					 ifm->ifa_flags;
769	ifa->ifa_scope = ifm->ifa_scope;
770	ifa->ifa_dev = in_dev;
771
772	ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
773	ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
774
775	if (tb[IFA_BROADCAST])
776		ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
777
778	if (tb[IFA_LABEL])
779		nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
780	else
781		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
782
783	if (tb[IFA_CACHEINFO]) {
784		struct ifa_cacheinfo *ci;
785
786		ci = nla_data(tb[IFA_CACHEINFO]);
787		if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
788			err = -EINVAL;
789			goto errout_free;
790		}
791		*pvalid_lft = ci->ifa_valid;
792		*pprefered_lft = ci->ifa_prefered;
793	}
794
795	return ifa;
796
797errout_free:
798	inet_free_ifa(ifa);
799errout:
800	return ERR_PTR(err);
801}
802
803static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
804{
805	struct in_device *in_dev = ifa->ifa_dev;
806	struct in_ifaddr *ifa1, **ifap;
807
808	if (!ifa->ifa_local)
809		return NULL;
810
811	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
812	     ifap = &ifa1->ifa_next) {
813		if (ifa1->ifa_mask == ifa->ifa_mask &&
814		    inet_ifa_match(ifa1->ifa_address, ifa) &&
815		    ifa1->ifa_local == ifa->ifa_local)
816			return ifa1;
817	}
818	return NULL;
819}
820
821static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
822{
823	struct net *net = sock_net(skb->sk);
824	struct in_ifaddr *ifa;
825	struct in_ifaddr *ifa_existing;
826	__u32 valid_lft = INFINITY_LIFE_TIME;
827	__u32 prefered_lft = INFINITY_LIFE_TIME;
828
829	ASSERT_RTNL();
830
831	ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft);
832	if (IS_ERR(ifa))
833		return PTR_ERR(ifa);
834
835	ifa_existing = find_matching_ifa(ifa);
836	if (!ifa_existing) {
837		/* It would be best to check for !NLM_F_CREATE here but
838		 * userspace already relies on not having to provide this.
839		 */
840		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
841		return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid);
842	} else {
843		inet_free_ifa(ifa);
844
845		if (nlh->nlmsg_flags & NLM_F_EXCL ||
846		    !(nlh->nlmsg_flags & NLM_F_REPLACE))
847			return -EEXIST;
848		ifa = ifa_existing;
849		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
850		cancel_delayed_work(&check_lifetime_work);
851		queue_delayed_work(system_power_efficient_wq,
852				&check_lifetime_work, 0);
853		rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
854		blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
855	}
856	return 0;
857}
858
859/*
860 *	Determine a default network mask, based on the IP address.
861 */
862
863static int inet_abc_len(__be32 addr)
864{
865	int rc = -1;	/* Something else, probably a multicast. */
866
867	if (ipv4_is_zeronet(addr))
868		rc = 0;
869	else {
870		__u32 haddr = ntohl(addr);
871
872		if (IN_CLASSA(haddr))
873			rc = 8;
874		else if (IN_CLASSB(haddr))
875			rc = 16;
876		else if (IN_CLASSC(haddr))
877			rc = 24;
878	}
879
880	return rc;
881}
882
883
884int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
885{
886	struct ifreq ifr;
887	struct sockaddr_in sin_orig;
888	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
889	struct in_device *in_dev;
890	struct in_ifaddr **ifap = NULL;
891	struct in_ifaddr *ifa = NULL;
892	struct net_device *dev;
893	char *colon;
894	int ret = -EFAULT;
895	int tryaddrmatch = 0;
896
897	/*
898	 *	Fetch the caller's info block into kernel space
899	 */
900
901	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
902		goto out;
903	ifr.ifr_name[IFNAMSIZ - 1] = 0;
904
905	/* save original address for comparison */
906	memcpy(&sin_orig, sin, sizeof(*sin));
907
908	colon = strchr(ifr.ifr_name, ':');
909	if (colon)
910		*colon = 0;
911
912	dev_load(net, ifr.ifr_name);
913
914	switch (cmd) {
915	case SIOCGIFADDR:	/* Get interface address */
916	case SIOCGIFBRDADDR:	/* Get the broadcast address */
917	case SIOCGIFDSTADDR:	/* Get the destination address */
918	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
919		/* Note that these ioctls will not sleep,
920		   so that we do not impose a lock.
921		   One day we will be forced to put shlock here (I mean SMP)
922		 */
923		tryaddrmatch = (sin_orig.sin_family == AF_INET);
924		memset(sin, 0, sizeof(*sin));
925		sin->sin_family = AF_INET;
926		break;
927
928	case SIOCSIFFLAGS:
929		ret = -EPERM;
930		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
931			goto out;
932		break;
933	case SIOCSIFADDR:	/* Set interface address (and family) */
934	case SIOCSIFBRDADDR:	/* Set the broadcast address */
935	case SIOCSIFDSTADDR:	/* Set the destination address */
936	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
937		ret = -EPERM;
938		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
939			goto out;
940		ret = -EINVAL;
941		if (sin->sin_family != AF_INET)
942			goto out;
943		break;
944	default:
945		ret = -EINVAL;
946		goto out;
947	}
948
949	rtnl_lock();
950
951	ret = -ENODEV;
952	dev = __dev_get_by_name(net, ifr.ifr_name);
953	if (!dev)
954		goto done;
955
956	if (colon)
957		*colon = ':';
958
959	in_dev = __in_dev_get_rtnl(dev);
960	if (in_dev) {
961		if (tryaddrmatch) {
962			/* Matthias Andree */
963			/* compare label and address (4.4BSD style) */
964			/* note: we only do this for a limited set of ioctls
965			   and only if the original address family was AF_INET.
966			   This is checked above. */
967			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
968			     ifap = &ifa->ifa_next) {
969				if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
970				    sin_orig.sin_addr.s_addr ==
971							ifa->ifa_local) {
972					break; /* found */
973				}
974			}
975		}
976		/* we didn't get a match, maybe the application is
977		   4.3BSD-style and passed in junk so we fall back to
978		   comparing just the label */
979		if (!ifa) {
980			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
981			     ifap = &ifa->ifa_next)
982				if (!strcmp(ifr.ifr_name, ifa->ifa_label))
983					break;
984		}
985	}
986
987	ret = -EADDRNOTAVAIL;
988	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
989		goto done;
990
991	switch (cmd) {
992	case SIOCGIFADDR:	/* Get interface address */
993		sin->sin_addr.s_addr = ifa->ifa_local;
994		goto rarok;
995
996	case SIOCGIFBRDADDR:	/* Get the broadcast address */
997		sin->sin_addr.s_addr = ifa->ifa_broadcast;
998		goto rarok;
999
1000	case SIOCGIFDSTADDR:	/* Get the destination address */
1001		sin->sin_addr.s_addr = ifa->ifa_address;
1002		goto rarok;
1003
1004	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
1005		sin->sin_addr.s_addr = ifa->ifa_mask;
1006		goto rarok;
1007
1008	case SIOCSIFFLAGS:
1009		if (colon) {
1010			ret = -EADDRNOTAVAIL;
1011			if (!ifa)
1012				break;
1013			ret = 0;
1014			if (!(ifr.ifr_flags & IFF_UP))
1015				inet_del_ifa(in_dev, ifap, 1);
1016			break;
1017		}
1018		ret = dev_change_flags(dev, ifr.ifr_flags);
1019		break;
1020
1021	case SIOCSIFADDR:	/* Set interface address (and family) */
1022		ret = -EINVAL;
1023		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1024			break;
1025
1026		if (!ifa) {
1027			ret = -ENOBUFS;
1028			ifa = inet_alloc_ifa();
1029			if (!ifa)
1030				break;
1031			INIT_HLIST_NODE(&ifa->hash);
1032			if (colon)
1033				memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
1034			else
1035				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1036		} else {
1037			ret = 0;
1038			if (ifa->ifa_local == sin->sin_addr.s_addr)
1039				break;
1040			inet_del_ifa(in_dev, ifap, 0);
1041			ifa->ifa_broadcast = 0;
1042			ifa->ifa_scope = 0;
1043		}
1044
1045		ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1046
1047		if (!(dev->flags & IFF_POINTOPOINT)) {
1048			ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1049			ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1050			if ((dev->flags & IFF_BROADCAST) &&
1051			    ifa->ifa_prefixlen < 31)
1052				ifa->ifa_broadcast = ifa->ifa_address |
1053						     ~ifa->ifa_mask;
1054		} else {
1055			ifa->ifa_prefixlen = 32;
1056			ifa->ifa_mask = inet_make_mask(32);
1057		}
1058		set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1059		ret = inet_set_ifa(dev, ifa);
1060		break;
1061
1062	case SIOCSIFBRDADDR:	/* Set the broadcast address */
1063		ret = 0;
1064		if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1065			inet_del_ifa(in_dev, ifap, 0);
1066			ifa->ifa_broadcast = sin->sin_addr.s_addr;
1067			inet_insert_ifa(ifa);
1068		}
1069		break;
1070
1071	case SIOCSIFDSTADDR:	/* Set the destination address */
1072		ret = 0;
1073		if (ifa->ifa_address == sin->sin_addr.s_addr)
1074			break;
1075		ret = -EINVAL;
1076		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1077			break;
1078		ret = 0;
1079		inet_del_ifa(in_dev, ifap, 0);
1080		ifa->ifa_address = sin->sin_addr.s_addr;
1081		inet_insert_ifa(ifa);
1082		break;
1083
1084	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
1085
1086		/*
1087		 *	The mask we set must be legal.
1088		 */
1089		ret = -EINVAL;
1090		if (bad_mask(sin->sin_addr.s_addr, 0))
1091			break;
1092		ret = 0;
1093		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1094			__be32 old_mask = ifa->ifa_mask;
1095			inet_del_ifa(in_dev, ifap, 0);
1096			ifa->ifa_mask = sin->sin_addr.s_addr;
1097			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1098
1099			/* See if current broadcast address matches
1100			 * with current netmask, then recalculate
1101			 * the broadcast address. Otherwise it's a
1102			 * funny address, so don't touch it since
1103			 * the user seems to know what (s)he's doing...
1104			 */
1105			if ((dev->flags & IFF_BROADCAST) &&
1106			    (ifa->ifa_prefixlen < 31) &&
1107			    (ifa->ifa_broadcast ==
1108			     (ifa->ifa_local|~old_mask))) {
1109				ifa->ifa_broadcast = (ifa->ifa_local |
1110						      ~sin->sin_addr.s_addr);
1111			}
1112			inet_insert_ifa(ifa);
1113		}
1114		break;
1115	}
1116done:
1117	rtnl_unlock();
1118out:
1119	return ret;
1120rarok:
1121	rtnl_unlock();
1122	ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
1123	goto out;
1124}
1125
1126static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
1127{
1128	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1129	struct in_ifaddr *ifa;
1130	struct ifreq ifr;
1131	int done = 0;
1132
1133	if (!in_dev)
1134		goto out;
1135
1136	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1137		if (!buf) {
1138			done += sizeof(ifr);
1139			continue;
1140		}
1141		if (len < (int) sizeof(ifr))
1142			break;
1143		memset(&ifr, 0, sizeof(struct ifreq));
1144		strcpy(ifr.ifr_name, ifa->ifa_label);
1145
1146		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1147		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1148								ifa->ifa_local;
1149
1150		if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
1151			done = -EFAULT;
1152			break;
1153		}
1154		buf  += sizeof(struct ifreq);
1155		len  -= sizeof(struct ifreq);
1156		done += sizeof(struct ifreq);
1157	}
1158out:
1159	return done;
1160}
1161
1162__be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1163{
1164	__be32 addr = 0;
1165	struct in_device *in_dev;
1166	struct net *net = dev_net(dev);
1167
1168	rcu_read_lock();
1169	in_dev = __in_dev_get_rcu(dev);
1170	if (!in_dev)
1171		goto no_in_dev;
1172
1173	for_primary_ifa(in_dev) {
1174		if (ifa->ifa_scope > scope)
1175			continue;
1176		if (!dst || inet_ifa_match(dst, ifa)) {
1177			addr = ifa->ifa_local;
1178			break;
1179		}
1180		if (!addr)
1181			addr = ifa->ifa_local;
1182	} endfor_ifa(in_dev);
1183
1184	if (addr)
1185		goto out_unlock;
1186no_in_dev:
1187
1188	/* Not loopback addresses on loopback should be preferred
1189	   in this case. It is importnat that lo is the first interface
1190	   in dev_base list.
1191	 */
1192	for_each_netdev_rcu(net, dev) {
1193		in_dev = __in_dev_get_rcu(dev);
1194		if (!in_dev)
1195			continue;
1196
1197		for_primary_ifa(in_dev) {
1198			if (ifa->ifa_scope != RT_SCOPE_LINK &&
1199			    ifa->ifa_scope <= scope) {
1200				addr = ifa->ifa_local;
1201				goto out_unlock;
1202			}
1203		} endfor_ifa(in_dev);
1204	}
1205out_unlock:
1206	rcu_read_unlock();
1207	return addr;
1208}
1209EXPORT_SYMBOL(inet_select_addr);
1210
1211static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1212			      __be32 local, int scope)
1213{
1214	int same = 0;
1215	__be32 addr = 0;
1216
1217	for_ifa(in_dev) {
1218		if (!addr &&
1219		    (local == ifa->ifa_local || !local) &&
1220		    ifa->ifa_scope <= scope) {
1221			addr = ifa->ifa_local;
1222			if (same)
1223				break;
1224		}
1225		if (!same) {
1226			same = (!local || inet_ifa_match(local, ifa)) &&
1227				(!dst || inet_ifa_match(dst, ifa));
1228			if (same && addr) {
1229				if (local || !dst)
1230					break;
1231				/* Is the selected addr into dst subnet? */
1232				if (inet_ifa_match(addr, ifa))
1233					break;
1234				/* No, then can we use new local src? */
1235				if (ifa->ifa_scope <= scope) {
1236					addr = ifa->ifa_local;
1237					break;
1238				}
1239				/* search for large dst subnet for addr */
1240				same = 0;
1241			}
1242		}
1243	} endfor_ifa(in_dev);
1244
1245	return same ? addr : 0;
1246}
1247
1248/*
1249 * Confirm that local IP address exists using wildcards:
1250 * - net: netns to check, cannot be NULL
1251 * - in_dev: only on this interface, NULL=any interface
1252 * - dst: only in the same subnet as dst, 0=any dst
1253 * - local: address, 0=autoselect the local address
1254 * - scope: maximum allowed scope value for the local address
1255 */
1256__be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1257			 __be32 dst, __be32 local, int scope)
1258{
1259	__be32 addr = 0;
1260	struct net_device *dev;
1261
1262	if (in_dev != NULL)
1263		return confirm_addr_indev(in_dev, dst, local, scope);
1264
1265	rcu_read_lock();
1266	for_each_netdev_rcu(net, dev) {
1267		in_dev = __in_dev_get_rcu(dev);
1268		if (in_dev) {
1269			addr = confirm_addr_indev(in_dev, dst, local, scope);
1270			if (addr)
1271				break;
1272		}
1273	}
1274	rcu_read_unlock();
1275
1276	return addr;
1277}
1278EXPORT_SYMBOL(inet_confirm_addr);
1279
1280/*
1281 *	Device notifier
1282 */
1283
1284int register_inetaddr_notifier(struct notifier_block *nb)
1285{
1286	return blocking_notifier_chain_register(&inetaddr_chain, nb);
1287}
1288EXPORT_SYMBOL(register_inetaddr_notifier);
1289
1290int unregister_inetaddr_notifier(struct notifier_block *nb)
1291{
1292	return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1293}
1294EXPORT_SYMBOL(unregister_inetaddr_notifier);
1295
1296/* Rename ifa_labels for a device name change. Make some effort to preserve
1297 * existing alias numbering and to create unique labels if possible.
1298*/
1299static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1300{
1301	struct in_ifaddr *ifa;
1302	int named = 0;
1303
1304	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1305		char old[IFNAMSIZ], *dot;
1306
1307		memcpy(old, ifa->ifa_label, IFNAMSIZ);
1308		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1309		if (named++ == 0)
1310			goto skip;
1311		dot = strchr(old, ':');
1312		if (dot == NULL) {
1313			sprintf(old, ":%d", named);
1314			dot = old;
1315		}
1316		if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1317			strcat(ifa->ifa_label, dot);
1318		else
1319			strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1320skip:
1321		rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1322	}
1323}
1324
1325static bool inetdev_valid_mtu(unsigned int mtu)
1326{
1327	return mtu >= 68;
1328}
1329
1330static void inetdev_send_gratuitous_arp(struct net_device *dev,
1331					struct in_device *in_dev)
1332
1333{
1334	struct in_ifaddr *ifa;
1335
1336	for (ifa = in_dev->ifa_list; ifa;
1337	     ifa = ifa->ifa_next) {
1338		arp_send(ARPOP_REQUEST, ETH_P_ARP,
1339			 ifa->ifa_local, dev,
1340			 ifa->ifa_local, NULL,
1341			 dev->dev_addr, NULL);
1342	}
1343}
1344
1345/* Called only under RTNL semaphore */
1346
1347static int inetdev_event(struct notifier_block *this, unsigned long event,
1348			 void *ptr)
1349{
1350	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1351	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1352
1353	ASSERT_RTNL();
1354
1355	if (!in_dev) {
1356		if (event == NETDEV_REGISTER) {
1357			in_dev = inetdev_init(dev);
1358			if (IS_ERR(in_dev))
1359				return notifier_from_errno(PTR_ERR(in_dev));
1360			if (dev->flags & IFF_LOOPBACK) {
1361				IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1362				IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1363			}
1364		} else if (event == NETDEV_CHANGEMTU) {
1365			/* Re-enabling IP */
1366			if (inetdev_valid_mtu(dev->mtu))
1367				in_dev = inetdev_init(dev);
1368		}
1369		goto out;
1370	}
1371
1372	switch (event) {
1373	case NETDEV_REGISTER:
1374		pr_debug("%s: bug\n", __func__);
1375		RCU_INIT_POINTER(dev->ip_ptr, NULL);
1376		break;
1377	case NETDEV_UP:
1378		if (!inetdev_valid_mtu(dev->mtu))
1379			break;
1380		if (dev->flags & IFF_LOOPBACK) {
1381			struct in_ifaddr *ifa = inet_alloc_ifa();
1382
1383			if (ifa) {
1384				INIT_HLIST_NODE(&ifa->hash);
1385				ifa->ifa_local =
1386				  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1387				ifa->ifa_prefixlen = 8;
1388				ifa->ifa_mask = inet_make_mask(8);
1389				in_dev_hold(in_dev);
1390				ifa->ifa_dev = in_dev;
1391				ifa->ifa_scope = RT_SCOPE_HOST;
1392				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1393				set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1394						 INFINITY_LIFE_TIME);
1395				ipv4_devconf_setall(in_dev);
1396				neigh_parms_data_state_setall(in_dev->arp_parms);
1397				inet_insert_ifa(ifa);
1398			}
1399		}
1400		ip_mc_up(in_dev);
1401		/* fall through */
1402	case NETDEV_CHANGEADDR:
1403		if (!IN_DEV_ARP_NOTIFY(in_dev))
1404			break;
1405		/* fall through */
1406	case NETDEV_NOTIFY_PEERS:
1407		/* Send gratuitous ARP to notify of link change */
1408		inetdev_send_gratuitous_arp(dev, in_dev);
1409		break;
1410	case NETDEV_DOWN:
1411		ip_mc_down(in_dev);
1412		break;
1413	case NETDEV_PRE_TYPE_CHANGE:
1414		ip_mc_unmap(in_dev);
1415		break;
1416	case NETDEV_POST_TYPE_CHANGE:
1417		ip_mc_remap(in_dev);
1418		break;
1419	case NETDEV_CHANGEMTU:
1420		if (inetdev_valid_mtu(dev->mtu))
1421			break;
1422		/* disable IP when MTU is not enough */
1423	case NETDEV_UNREGISTER:
1424		inetdev_destroy(in_dev);
1425		break;
1426	case NETDEV_CHANGENAME:
1427		/* Do not notify about label change, this event is
1428		 * not interesting to applications using netlink.
1429		 */
1430		inetdev_changename(dev, in_dev);
1431
1432		devinet_sysctl_unregister(in_dev);
1433		devinet_sysctl_register(in_dev);
1434		break;
1435	}
1436out:
1437	return NOTIFY_DONE;
1438}
1439
1440static struct notifier_block ip_netdev_notifier = {
1441	.notifier_call = inetdev_event,
1442};
1443
1444static size_t inet_nlmsg_size(void)
1445{
1446	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1447	       + nla_total_size(4) /* IFA_ADDRESS */
1448	       + nla_total_size(4) /* IFA_LOCAL */
1449	       + nla_total_size(4) /* IFA_BROADCAST */
1450	       + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1451	       + nla_total_size(4)  /* IFA_FLAGS */
1452	       + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1453}
1454
1455static inline u32 cstamp_delta(unsigned long cstamp)
1456{
1457	return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1458}
1459
1460static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1461			 unsigned long tstamp, u32 preferred, u32 valid)
1462{
1463	struct ifa_cacheinfo ci;
1464
1465	ci.cstamp = cstamp_delta(cstamp);
1466	ci.tstamp = cstamp_delta(tstamp);
1467	ci.ifa_prefered = preferred;
1468	ci.ifa_valid = valid;
1469
1470	return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1471}
1472
1473static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1474			    u32 portid, u32 seq, int event, unsigned int flags)
1475{
1476	struct ifaddrmsg *ifm;
1477	struct nlmsghdr  *nlh;
1478	u32 preferred, valid;
1479
1480	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
1481	if (nlh == NULL)
1482		return -EMSGSIZE;
1483
1484	ifm = nlmsg_data(nlh);
1485	ifm->ifa_family = AF_INET;
1486	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1487	ifm->ifa_flags = ifa->ifa_flags;
1488	ifm->ifa_scope = ifa->ifa_scope;
1489	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1490
1491	if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1492		preferred = ifa->ifa_preferred_lft;
1493		valid = ifa->ifa_valid_lft;
1494		if (preferred != INFINITY_LIFE_TIME) {
1495			long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1496
1497			if (preferred > tval)
1498				preferred -= tval;
1499			else
1500				preferred = 0;
1501			if (valid != INFINITY_LIFE_TIME) {
1502				if (valid > tval)
1503					valid -= tval;
1504				else
1505					valid = 0;
1506			}
1507		}
1508	} else {
1509		preferred = INFINITY_LIFE_TIME;
1510		valid = INFINITY_LIFE_TIME;
1511	}
1512	if ((ifa->ifa_address &&
1513	     nla_put_be32(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1514	    (ifa->ifa_local &&
1515	     nla_put_be32(skb, IFA_LOCAL, ifa->ifa_local)) ||
1516	    (ifa->ifa_broadcast &&
1517	     nla_put_be32(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1518	    (ifa->ifa_label[0] &&
1519	     nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1520	    nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) ||
1521	    put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1522			  preferred, valid))
1523		goto nla_put_failure;
1524
1525	return nlmsg_end(skb, nlh);
1526
1527nla_put_failure:
1528	nlmsg_cancel(skb, nlh);
1529	return -EMSGSIZE;
1530}
1531
1532static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1533{
1534	struct net *net = sock_net(skb->sk);
1535	int h, s_h;
1536	int idx, s_idx;
1537	int ip_idx, s_ip_idx;
1538	struct net_device *dev;
1539	struct in_device *in_dev;
1540	struct in_ifaddr *ifa;
1541	struct hlist_head *head;
1542
1543	s_h = cb->args[0];
1544	s_idx = idx = cb->args[1];
1545	s_ip_idx = ip_idx = cb->args[2];
1546
1547	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1548		idx = 0;
1549		head = &net->dev_index_head[h];
1550		rcu_read_lock();
1551		cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1552			  net->dev_base_seq;
1553		hlist_for_each_entry_rcu(dev, head, index_hlist) {
1554			if (idx < s_idx)
1555				goto cont;
1556			if (h > s_h || idx > s_idx)
1557				s_ip_idx = 0;
1558			in_dev = __in_dev_get_rcu(dev);
1559			if (!in_dev)
1560				goto cont;
1561
1562			for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1563			     ifa = ifa->ifa_next, ip_idx++) {
1564				if (ip_idx < s_ip_idx)
1565					continue;
1566				if (inet_fill_ifaddr(skb, ifa,
1567					     NETLINK_CB(cb->skb).portid,
1568					     cb->nlh->nlmsg_seq,
1569					     RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1570					rcu_read_unlock();
1571					goto done;
1572				}
1573				nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1574			}
1575cont:
1576			idx++;
1577		}
1578		rcu_read_unlock();
1579	}
1580
1581done:
1582	cb->args[0] = h;
1583	cb->args[1] = idx;
1584	cb->args[2] = ip_idx;
1585
1586	return skb->len;
1587}
1588
1589static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1590		      u32 portid)
1591{
1592	struct sk_buff *skb;
1593	u32 seq = nlh ? nlh->nlmsg_seq : 0;
1594	int err = -ENOBUFS;
1595	struct net *net;
1596
1597	net = dev_net(ifa->ifa_dev->dev);
1598	skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1599	if (skb == NULL)
1600		goto errout;
1601
1602	err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0);
1603	if (err < 0) {
1604		/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1605		WARN_ON(err == -EMSGSIZE);
1606		kfree_skb(skb);
1607		goto errout;
1608	}
1609	rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1610	return;
1611errout:
1612	if (err < 0)
1613		rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1614}
1615
1616static size_t inet_get_link_af_size(const struct net_device *dev)
1617{
1618	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1619
1620	if (!in_dev)
1621		return 0;
1622
1623	return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1624}
1625
1626static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
1627{
1628	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1629	struct nlattr *nla;
1630	int i;
1631
1632	if (!in_dev)
1633		return -ENODATA;
1634
1635	nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1636	if (nla == NULL)
1637		return -EMSGSIZE;
1638
1639	for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1640		((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1641
1642	return 0;
1643}
1644
1645static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1646	[IFLA_INET_CONF]	= { .type = NLA_NESTED },
1647};
1648
1649static int inet_validate_link_af(const struct net_device *dev,
1650				 const struct nlattr *nla)
1651{
1652	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1653	int err, rem;
1654
1655	if (dev && !__in_dev_get_rtnl(dev))
1656		return -EAFNOSUPPORT;
1657
1658	err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
1659	if (err < 0)
1660		return err;
1661
1662	if (tb[IFLA_INET_CONF]) {
1663		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1664			int cfgid = nla_type(a);
1665
1666			if (nla_len(a) < 4)
1667				return -EINVAL;
1668
1669			if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1670				return -EINVAL;
1671		}
1672	}
1673
1674	return 0;
1675}
1676
1677static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1678{
1679	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1680	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1681	int rem;
1682
1683	if (!in_dev)
1684		return -EAFNOSUPPORT;
1685
1686	if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
1687		BUG();
1688
1689	if (tb[IFLA_INET_CONF]) {
1690		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1691			ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1692	}
1693
1694	return 0;
1695}
1696
1697static int inet_netconf_msgsize_devconf(int type)
1698{
1699	int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1700		   + nla_total_size(4);	/* NETCONFA_IFINDEX */
1701
1702	/* type -1 is used for ALL */
1703	if (type == -1 || type == NETCONFA_FORWARDING)
1704		size += nla_total_size(4);
1705	if (type == -1 || type == NETCONFA_RP_FILTER)
1706		size += nla_total_size(4);
1707	if (type == -1 || type == NETCONFA_MC_FORWARDING)
1708		size += nla_total_size(4);
1709	if (type == -1 || type == NETCONFA_PROXY_NEIGH)
1710		size += nla_total_size(4);
1711
1712	return size;
1713}
1714
1715static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1716				     struct ipv4_devconf *devconf, u32 portid,
1717				     u32 seq, int event, unsigned int flags,
1718				     int type)
1719{
1720	struct nlmsghdr  *nlh;
1721	struct netconfmsg *ncm;
1722
1723	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1724			flags);
1725	if (nlh == NULL)
1726		return -EMSGSIZE;
1727
1728	ncm = nlmsg_data(nlh);
1729	ncm->ncm_family = AF_INET;
1730
1731	if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1732		goto nla_put_failure;
1733
1734	/* type -1 is used for ALL */
1735	if ((type == -1 || type == NETCONFA_FORWARDING) &&
1736	    nla_put_s32(skb, NETCONFA_FORWARDING,
1737			IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
1738		goto nla_put_failure;
1739	if ((type == -1 || type == NETCONFA_RP_FILTER) &&
1740	    nla_put_s32(skb, NETCONFA_RP_FILTER,
1741			IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
1742		goto nla_put_failure;
1743	if ((type == -1 || type == NETCONFA_MC_FORWARDING) &&
1744	    nla_put_s32(skb, NETCONFA_MC_FORWARDING,
1745			IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
1746		goto nla_put_failure;
1747	if ((type == -1 || type == NETCONFA_PROXY_NEIGH) &&
1748	    nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
1749			IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
1750		goto nla_put_failure;
1751
1752	return nlmsg_end(skb, nlh);
1753
1754nla_put_failure:
1755	nlmsg_cancel(skb, nlh);
1756	return -EMSGSIZE;
1757}
1758
1759void inet_netconf_notify_devconf(struct net *net, int type, int ifindex,
1760				 struct ipv4_devconf *devconf)
1761{
1762	struct sk_buff *skb;
1763	int err = -ENOBUFS;
1764
1765	skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_ATOMIC);
1766	if (skb == NULL)
1767		goto errout;
1768
1769	err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
1770					RTM_NEWNETCONF, 0, type);
1771	if (err < 0) {
1772		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1773		WARN_ON(err == -EMSGSIZE);
1774		kfree_skb(skb);
1775		goto errout;
1776	}
1777	rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_ATOMIC);
1778	return;
1779errout:
1780	if (err < 0)
1781		rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
1782}
1783
1784static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
1785	[NETCONFA_IFINDEX]	= { .len = sizeof(int) },
1786	[NETCONFA_FORWARDING]	= { .len = sizeof(int) },
1787	[NETCONFA_RP_FILTER]	= { .len = sizeof(int) },
1788	[NETCONFA_PROXY_NEIGH]	= { .len = sizeof(int) },
1789};
1790
1791static int inet_netconf_get_devconf(struct sk_buff *in_skb,
1792				    struct nlmsghdr *nlh)
1793{
1794	struct net *net = sock_net(in_skb->sk);
1795	struct nlattr *tb[NETCONFA_MAX+1];
1796	struct netconfmsg *ncm;
1797	struct sk_buff *skb;
1798	struct ipv4_devconf *devconf;
1799	struct in_device *in_dev;
1800	struct net_device *dev;
1801	int ifindex;
1802	int err;
1803
1804	err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
1805			  devconf_ipv4_policy);
1806	if (err < 0)
1807		goto errout;
1808
1809	err = EINVAL;
1810	if (!tb[NETCONFA_IFINDEX])
1811		goto errout;
1812
1813	ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
1814	switch (ifindex) {
1815	case NETCONFA_IFINDEX_ALL:
1816		devconf = net->ipv4.devconf_all;
1817		break;
1818	case NETCONFA_IFINDEX_DEFAULT:
1819		devconf = net->ipv4.devconf_dflt;
1820		break;
1821	default:
1822		dev = __dev_get_by_index(net, ifindex);
1823		if (dev == NULL)
1824			goto errout;
1825		in_dev = __in_dev_get_rtnl(dev);
1826		if (in_dev == NULL)
1827			goto errout;
1828		devconf = &in_dev->cnf;
1829		break;
1830	}
1831
1832	err = -ENOBUFS;
1833	skb = nlmsg_new(inet_netconf_msgsize_devconf(-1), GFP_ATOMIC);
1834	if (skb == NULL)
1835		goto errout;
1836
1837	err = inet_netconf_fill_devconf(skb, ifindex, devconf,
1838					NETLINK_CB(in_skb).portid,
1839					nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
1840					-1);
1841	if (err < 0) {
1842		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1843		WARN_ON(err == -EMSGSIZE);
1844		kfree_skb(skb);
1845		goto errout;
1846	}
1847	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
1848errout:
1849	return err;
1850}
1851
1852static int inet_netconf_dump_devconf(struct sk_buff *skb,
1853				     struct netlink_callback *cb)
1854{
1855	struct net *net = sock_net(skb->sk);
1856	int h, s_h;
1857	int idx, s_idx;
1858	struct net_device *dev;
1859	struct in_device *in_dev;
1860	struct hlist_head *head;
1861
1862	s_h = cb->args[0];
1863	s_idx = idx = cb->args[1];
1864
1865	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1866		idx = 0;
1867		head = &net->dev_index_head[h];
1868		rcu_read_lock();
1869		cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1870			  net->dev_base_seq;
1871		hlist_for_each_entry_rcu(dev, head, index_hlist) {
1872			if (idx < s_idx)
1873				goto cont;
1874			in_dev = __in_dev_get_rcu(dev);
1875			if (!in_dev)
1876				goto cont;
1877
1878			if (inet_netconf_fill_devconf(skb, dev->ifindex,
1879						      &in_dev->cnf,
1880						      NETLINK_CB(cb->skb).portid,
1881						      cb->nlh->nlmsg_seq,
1882						      RTM_NEWNETCONF,
1883						      NLM_F_MULTI,
1884						      -1) <= 0) {
1885				rcu_read_unlock();
1886				goto done;
1887			}
1888			nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1889cont:
1890			idx++;
1891		}
1892		rcu_read_unlock();
1893	}
1894	if (h == NETDEV_HASHENTRIES) {
1895		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
1896					      net->ipv4.devconf_all,
1897					      NETLINK_CB(cb->skb).portid,
1898					      cb->nlh->nlmsg_seq,
1899					      RTM_NEWNETCONF, NLM_F_MULTI,
1900					      -1) <= 0)
1901			goto done;
1902		else
1903			h++;
1904	}
1905	if (h == NETDEV_HASHENTRIES + 1) {
1906		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
1907					      net->ipv4.devconf_dflt,
1908					      NETLINK_CB(cb->skb).portid,
1909					      cb->nlh->nlmsg_seq,
1910					      RTM_NEWNETCONF, NLM_F_MULTI,
1911					      -1) <= 0)
1912			goto done;
1913		else
1914			h++;
1915	}
1916done:
1917	cb->args[0] = h;
1918	cb->args[1] = idx;
1919
1920	return skb->len;
1921}
1922
1923#ifdef CONFIG_SYSCTL
1924
1925static void devinet_copy_dflt_conf(struct net *net, int i)
1926{
1927	struct net_device *dev;
1928
1929	rcu_read_lock();
1930	for_each_netdev_rcu(net, dev) {
1931		struct in_device *in_dev;
1932
1933		in_dev = __in_dev_get_rcu(dev);
1934		if (in_dev && !test_bit(i, in_dev->cnf.state))
1935			in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1936	}
1937	rcu_read_unlock();
1938}
1939
1940/* called with RTNL locked */
1941static void inet_forward_change(struct net *net)
1942{
1943	struct net_device *dev;
1944	int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1945
1946	IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1947	IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1948	inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1949				    NETCONFA_IFINDEX_ALL,
1950				    net->ipv4.devconf_all);
1951	inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1952				    NETCONFA_IFINDEX_DEFAULT,
1953				    net->ipv4.devconf_dflt);
1954
1955	for_each_netdev(net, dev) {
1956		struct in_device *in_dev;
1957		if (on)
1958			dev_disable_lro(dev);
1959		rcu_read_lock();
1960		in_dev = __in_dev_get_rcu(dev);
1961		if (in_dev) {
1962			IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1963			inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1964						    dev->ifindex, &in_dev->cnf);
1965		}
1966		rcu_read_unlock();
1967	}
1968}
1969
1970static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
1971{
1972	if (cnf == net->ipv4.devconf_dflt)
1973		return NETCONFA_IFINDEX_DEFAULT;
1974	else if (cnf == net->ipv4.devconf_all)
1975		return NETCONFA_IFINDEX_ALL;
1976	else {
1977		struct in_device *idev
1978			= container_of(cnf, struct in_device, cnf);
1979		return idev->dev->ifindex;
1980	}
1981}
1982
1983static int devinet_conf_proc(struct ctl_table *ctl, int write,
1984			     void __user *buffer,
1985			     size_t *lenp, loff_t *ppos)
1986{
1987	int old_value = *(int *)ctl->data;
1988	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1989	int new_value = *(int *)ctl->data;
1990
1991	if (write) {
1992		struct ipv4_devconf *cnf = ctl->extra1;
1993		struct net *net = ctl->extra2;
1994		int i = (int *)ctl->data - cnf->data;
1995		int ifindex;
1996
1997		set_bit(i, cnf->state);
1998
1999		if (cnf == net->ipv4.devconf_dflt)
2000			devinet_copy_dflt_conf(net, i);
2001		if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
2002		    i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
2003			if ((new_value == 0) && (old_value != 0))
2004				rt_cache_flush(net);
2005
2006		if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
2007		    new_value != old_value) {
2008			ifindex = devinet_conf_ifindex(net, cnf);
2009			inet_netconf_notify_devconf(net, NETCONFA_RP_FILTER,
2010						    ifindex, cnf);
2011		}
2012		if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2013		    new_value != old_value) {
2014			ifindex = devinet_conf_ifindex(net, cnf);
2015			inet_netconf_notify_devconf(net, NETCONFA_PROXY_NEIGH,
2016						    ifindex, cnf);
2017		}
2018	}
2019
2020	return ret;
2021}
2022
2023static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
2024				  void __user *buffer,
2025				  size_t *lenp, loff_t *ppos)
2026{
2027	int *valp = ctl->data;
2028	int val = *valp;
2029	loff_t pos = *ppos;
2030	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2031
2032	if (write && *valp != val) {
2033		struct net *net = ctl->extra2;
2034
2035		if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2036			if (!rtnl_trylock()) {
2037				/* Restore the original values before restarting */
2038				*valp = val;
2039				*ppos = pos;
2040				return restart_syscall();
2041			}
2042			if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2043				inet_forward_change(net);
2044			} else {
2045				struct ipv4_devconf *cnf = ctl->extra1;
2046				struct in_device *idev =
2047					container_of(cnf, struct in_device, cnf);
2048				if (*valp)
2049					dev_disable_lro(idev->dev);
2050				inet_netconf_notify_devconf(net,
2051							    NETCONFA_FORWARDING,
2052							    idev->dev->ifindex,
2053							    cnf);
2054			}
2055			rtnl_unlock();
2056			rt_cache_flush(net);
2057		} else
2058			inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
2059						    NETCONFA_IFINDEX_DEFAULT,
2060						    net->ipv4.devconf_dflt);
2061	}
2062
2063	return ret;
2064}
2065
2066static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2067				void __user *buffer,
2068				size_t *lenp, loff_t *ppos)
2069{
2070	int *valp = ctl->data;
2071	int val = *valp;
2072	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2073	struct net *net = ctl->extra2;
2074
2075	if (write && *valp != val)
2076		rt_cache_flush(net);
2077
2078	return ret;
2079}
2080
2081#define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2082	{ \
2083		.procname	= name, \
2084		.data		= ipv4_devconf.data + \
2085				  IPV4_DEVCONF_ ## attr - 1, \
2086		.maxlen		= sizeof(int), \
2087		.mode		= mval, \
2088		.proc_handler	= proc, \
2089		.extra1		= &ipv4_devconf, \
2090	}
2091
2092#define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2093	DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2094
2095#define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2096	DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2097
2098#define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2099	DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2100
2101#define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2102	DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2103
2104static struct devinet_sysctl_table {
2105	struct ctl_table_header *sysctl_header;
2106	struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2107} devinet_sysctl = {
2108	.devinet_vars = {
2109		DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2110					     devinet_sysctl_forward),
2111		DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2112
2113		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2114		DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2115		DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2116		DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2117		DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2118		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2119					"accept_source_route"),
2120		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2121		DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2122		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2123		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2124		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2125		DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2126		DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2127		DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2128		DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2129		DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2130		DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2131		DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2132		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2133		DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2134					"force_igmp_version"),
2135		DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2136					"igmpv2_unsolicited_report_interval"),
2137		DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2138					"igmpv3_unsolicited_report_interval"),
2139
2140		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2141		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2142		DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2143					      "promote_secondaries"),
2144		DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2145					      "route_localnet"),
2146	},
2147};
2148
2149static int __devinet_sysctl_register(struct net *net, char *dev_name,
2150					struct ipv4_devconf *p)
2151{
2152	int i;
2153	struct devinet_sysctl_table *t;
2154	char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2155
2156	t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
2157	if (!t)
2158		goto out;
2159
2160	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2161		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2162		t->devinet_vars[i].extra1 = p;
2163		t->devinet_vars[i].extra2 = net;
2164	}
2165
2166	snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2167
2168	t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2169	if (!t->sysctl_header)
2170		goto free;
2171
2172	p->sysctl = t;
2173	return 0;
2174
2175free:
2176	kfree(t);
2177out:
2178	return -ENOBUFS;
2179}
2180
2181static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
2182{
2183	struct devinet_sysctl_table *t = cnf->sysctl;
2184
2185	if (t == NULL)
2186		return;
2187
2188	cnf->sysctl = NULL;
2189	unregister_net_sysctl_table(t->sysctl_header);
2190	kfree(t);
2191}
2192
2193static int devinet_sysctl_register(struct in_device *idev)
2194{
2195	int err;
2196
2197	if (!sysctl_dev_name_is_allowed(idev->dev->name))
2198		return -EINVAL;
2199
2200	err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2201	if (err)
2202		return err;
2203	err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2204					&idev->cnf);
2205	if (err)
2206		neigh_sysctl_unregister(idev->arp_parms);
2207	return err;
2208}
2209
2210static void devinet_sysctl_unregister(struct in_device *idev)
2211{
2212	__devinet_sysctl_unregister(&idev->cnf);
2213	neigh_sysctl_unregister(idev->arp_parms);
2214}
2215
2216static struct ctl_table ctl_forward_entry[] = {
2217	{
2218		.procname	= "ip_forward",
2219		.data		= &ipv4_devconf.data[
2220					IPV4_DEVCONF_FORWARDING - 1],
2221		.maxlen		= sizeof(int),
2222		.mode		= 0644,
2223		.proc_handler	= devinet_sysctl_forward,
2224		.extra1		= &ipv4_devconf,
2225		.extra2		= &init_net,
2226	},
2227	{ },
2228};
2229#endif
2230
2231static __net_init int devinet_init_net(struct net *net)
2232{
2233	int err;
2234	struct ipv4_devconf *all, *dflt;
2235#ifdef CONFIG_SYSCTL
2236	struct ctl_table *tbl = ctl_forward_entry;
2237	struct ctl_table_header *forw_hdr;
2238#endif
2239
2240	err = -ENOMEM;
2241	all = &ipv4_devconf;
2242	dflt = &ipv4_devconf_dflt;
2243
2244	if (!net_eq(net, &init_net)) {
2245		all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
2246		if (all == NULL)
2247			goto err_alloc_all;
2248
2249		dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2250		if (dflt == NULL)
2251			goto err_alloc_dflt;
2252
2253#ifdef CONFIG_SYSCTL
2254		tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
2255		if (tbl == NULL)
2256			goto err_alloc_ctl;
2257
2258		tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2259		tbl[0].extra1 = all;
2260		tbl[0].extra2 = net;
2261#endif
2262	}
2263
2264#ifdef CONFIG_SYSCTL
2265	err = __devinet_sysctl_register(net, "all", all);
2266	if (err < 0)
2267		goto err_reg_all;
2268
2269	err = __devinet_sysctl_register(net, "default", dflt);
2270	if (err < 0)
2271		goto err_reg_dflt;
2272
2273	err = -ENOMEM;
2274	forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2275	if (forw_hdr == NULL)
2276		goto err_reg_ctl;
2277	net->ipv4.forw_hdr = forw_hdr;
2278#endif
2279
2280	net->ipv4.devconf_all = all;
2281	net->ipv4.devconf_dflt = dflt;
2282	return 0;
2283
2284#ifdef CONFIG_SYSCTL
2285err_reg_ctl:
2286	__devinet_sysctl_unregister(dflt);
2287err_reg_dflt:
2288	__devinet_sysctl_unregister(all);
2289err_reg_all:
2290	if (tbl != ctl_forward_entry)
2291		kfree(tbl);
2292err_alloc_ctl:
2293#endif
2294	if (dflt != &ipv4_devconf_dflt)
2295		kfree(dflt);
2296err_alloc_dflt:
2297	if (all != &ipv4_devconf)
2298		kfree(all);
2299err_alloc_all:
2300	return err;
2301}
2302
2303static __net_exit void devinet_exit_net(struct net *net)
2304{
2305#ifdef CONFIG_SYSCTL
2306	struct ctl_table *tbl;
2307
2308	tbl = net->ipv4.forw_hdr->ctl_table_arg;
2309	unregister_net_sysctl_table(net->ipv4.forw_hdr);
2310	__devinet_sysctl_unregister(net->ipv4.devconf_dflt);
2311	__devinet_sysctl_unregister(net->ipv4.devconf_all);
2312	kfree(tbl);
2313#endif
2314	kfree(net->ipv4.devconf_dflt);
2315	kfree(net->ipv4.devconf_all);
2316}
2317
2318static __net_initdata struct pernet_operations devinet_ops = {
2319	.init = devinet_init_net,
2320	.exit = devinet_exit_net,
2321};
2322
2323static struct rtnl_af_ops inet_af_ops = {
2324	.family		  = AF_INET,
2325	.fill_link_af	  = inet_fill_link_af,
2326	.get_link_af_size = inet_get_link_af_size,
2327	.validate_link_af = inet_validate_link_af,
2328	.set_link_af	  = inet_set_link_af,
2329};
2330
2331void __init devinet_init(void)
2332{
2333	int i;
2334
2335	for (i = 0; i < IN4_ADDR_HSIZE; i++)
2336		INIT_HLIST_HEAD(&inet_addr_lst[i]);
2337
2338	register_pernet_subsys(&devinet_ops);
2339
2340	register_gifconf(PF_INET, inet_gifconf);
2341	register_netdevice_notifier(&ip_netdev_notifier);
2342
2343	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
2344
2345	rtnl_af_register(&inet_af_ops);
2346
2347	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
2348	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
2349	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
2350	rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2351		      inet_netconf_dump_devconf, NULL);
2352}
2353
2354