devinet.c revision dfd1582d1e4d117f46df720679d595f984ef902a
1/*
2 *	NET3	IP device support routines.
3 *
4 *		This program is free software; you can redistribute it and/or
5 *		modify it under the terms of the GNU General Public License
6 *		as published by the Free Software Foundation; either version
7 *		2 of the License, or (at your option) any later version.
8 *
9 *	Derived from the IP parts of dev.c 1.0.19
10 * 		Authors:	Ross Biro
11 *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 *				Mark Evans, <evansmp@uhura.aston.ac.uk>
13 *
14 *	Additional Authors:
15 *		Alan Cox, <gw4pts@gw4pts.ampr.org>
16 *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17 *
18 *	Changes:
19 *		Alexey Kuznetsov:	pa_* fields are replaced with ifaddr
20 *					lists.
21 *		Cyrus Durgin:		updated for kmod
22 *		Matthias Andree:	in devinet_ioctl, compare label and
23 *					address (4.4BSD alias style support),
24 *					fall back to comparing just the label
25 *					if no match found.
26 */
27
28
29#include <asm/uaccess.h>
30#include <linux/bitops.h>
31#include <linux/capability.h>
32#include <linux/module.h>
33#include <linux/types.h>
34#include <linux/kernel.h>
35#include <linux/string.h>
36#include <linux/mm.h>
37#include <linux/socket.h>
38#include <linux/sockios.h>
39#include <linux/in.h>
40#include <linux/errno.h>
41#include <linux/interrupt.h>
42#include <linux/if_addr.h>
43#include <linux/if_ether.h>
44#include <linux/inet.h>
45#include <linux/netdevice.h>
46#include <linux/etherdevice.h>
47#include <linux/skbuff.h>
48#include <linux/init.h>
49#include <linux/notifier.h>
50#include <linux/inetdevice.h>
51#include <linux/igmp.h>
52#include <linux/slab.h>
53#include <linux/hash.h>
54#ifdef CONFIG_SYSCTL
55#include <linux/sysctl.h>
56#endif
57#include <linux/kmod.h>
58#include <linux/netconf.h>
59
60#include <net/arp.h>
61#include <net/ip.h>
62#include <net/route.h>
63#include <net/ip_fib.h>
64#include <net/rtnetlink.h>
65#include <net/net_namespace.h>
66#include <net/addrconf.h>
67
68#include "fib_lookup.h"
69
70static struct ipv4_devconf ipv4_devconf = {
71	.data = {
72		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
73		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
74		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
75		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
76		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
77		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
78	},
79};
80
81static struct ipv4_devconf ipv4_devconf_dflt = {
82	.data = {
83		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
84		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
85		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
86		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
87		[IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
88		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
89		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
90	},
91};
92
93#define IPV4_DEVCONF_DFLT(net, attr) \
94	IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
95
96static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
97	[IFA_LOCAL]     	= { .type = NLA_U32 },
98	[IFA_ADDRESS]   	= { .type = NLA_U32 },
99	[IFA_BROADCAST] 	= { .type = NLA_U32 },
100	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
101	[IFA_CACHEINFO]		= { .len = sizeof(struct ifa_cacheinfo) },
102	[IFA_FLAGS]		= { .type = NLA_U32 },
103};
104
105#define IN4_ADDR_HSIZE_SHIFT	8
106#define IN4_ADDR_HSIZE		(1U << IN4_ADDR_HSIZE_SHIFT)
107
108static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
109static DEFINE_SPINLOCK(inet_addr_hash_lock);
110
111static u32 inet_addr_hash(struct net *net, __be32 addr)
112{
113	u32 val = (__force u32) addr ^ net_hash_mix(net);
114
115	return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
116}
117
118static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
119{
120	u32 hash = inet_addr_hash(net, ifa->ifa_local);
121
122	spin_lock(&inet_addr_hash_lock);
123	hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
124	spin_unlock(&inet_addr_hash_lock);
125}
126
127static void inet_hash_remove(struct in_ifaddr *ifa)
128{
129	spin_lock(&inet_addr_hash_lock);
130	hlist_del_init_rcu(&ifa->hash);
131	spin_unlock(&inet_addr_hash_lock);
132}
133
134/**
135 * __ip_dev_find - find the first device with a given source address.
136 * @net: the net namespace
137 * @addr: the source address
138 * @devref: if true, take a reference on the found device
139 *
140 * If a caller uses devref=false, it should be protected by RCU, or RTNL
141 */
142struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
143{
144	u32 hash = inet_addr_hash(net, addr);
145	struct net_device *result = NULL;
146	struct in_ifaddr *ifa;
147
148	rcu_read_lock();
149	hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash) {
150		if (ifa->ifa_local == addr) {
151			struct net_device *dev = ifa->ifa_dev->dev;
152
153			if (!net_eq(dev_net(dev), net))
154				continue;
155			result = dev;
156			break;
157		}
158	}
159	if (!result) {
160		struct flowi4 fl4 = { .daddr = addr };
161		struct fib_result res = { 0 };
162		struct fib_table *local;
163
164		/* Fallback to FIB local table so that communication
165		 * over loopback subnets work.
166		 */
167		local = fib_get_table(net, RT_TABLE_LOCAL);
168		if (local &&
169		    !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
170		    res.type == RTN_LOCAL)
171			result = FIB_RES_DEV(res);
172	}
173	if (result && devref)
174		dev_hold(result);
175	rcu_read_unlock();
176	return result;
177}
178EXPORT_SYMBOL(__ip_dev_find);
179
180static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
181
182static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
183static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
184			 int destroy);
185#ifdef CONFIG_SYSCTL
186static void devinet_sysctl_register(struct in_device *idev);
187static void devinet_sysctl_unregister(struct in_device *idev);
188#else
189static void devinet_sysctl_register(struct in_device *idev)
190{
191}
192static void devinet_sysctl_unregister(struct in_device *idev)
193{
194}
195#endif
196
197/* Locks all the inet devices. */
198
199static struct in_ifaddr *inet_alloc_ifa(void)
200{
201	return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
202}
203
204static void inet_rcu_free_ifa(struct rcu_head *head)
205{
206	struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
207	if (ifa->ifa_dev)
208		in_dev_put(ifa->ifa_dev);
209	kfree(ifa);
210}
211
212static void inet_free_ifa(struct in_ifaddr *ifa)
213{
214	call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
215}
216
217void in_dev_finish_destroy(struct in_device *idev)
218{
219	struct net_device *dev = idev->dev;
220
221	WARN_ON(idev->ifa_list);
222	WARN_ON(idev->mc_list);
223	kfree(rcu_dereference_protected(idev->mc_hash, 1));
224#ifdef NET_REFCNT_DEBUG
225	pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
226#endif
227	dev_put(dev);
228	if (!idev->dead)
229		pr_err("Freeing alive in_device %p\n", idev);
230	else
231		kfree(idev);
232}
233EXPORT_SYMBOL(in_dev_finish_destroy);
234
235static struct in_device *inetdev_init(struct net_device *dev)
236{
237	struct in_device *in_dev;
238
239	ASSERT_RTNL();
240
241	in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
242	if (!in_dev)
243		goto out;
244	memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
245			sizeof(in_dev->cnf));
246	in_dev->cnf.sysctl = NULL;
247	in_dev->dev = dev;
248	in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
249	if (!in_dev->arp_parms)
250		goto out_kfree;
251	if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
252		dev_disable_lro(dev);
253	/* Reference in_dev->dev */
254	dev_hold(dev);
255	/* Account for reference dev->ip_ptr (below) */
256	in_dev_hold(in_dev);
257
258	devinet_sysctl_register(in_dev);
259	ip_mc_init_dev(in_dev);
260	if (dev->flags & IFF_UP)
261		ip_mc_up(in_dev);
262
263	/* we can receive as soon as ip_ptr is set -- do this last */
264	rcu_assign_pointer(dev->ip_ptr, in_dev);
265out:
266	return in_dev;
267out_kfree:
268	kfree(in_dev);
269	in_dev = NULL;
270	goto out;
271}
272
273static void in_dev_rcu_put(struct rcu_head *head)
274{
275	struct in_device *idev = container_of(head, struct in_device, rcu_head);
276	in_dev_put(idev);
277}
278
279static void inetdev_destroy(struct in_device *in_dev)
280{
281	struct in_ifaddr *ifa;
282	struct net_device *dev;
283
284	ASSERT_RTNL();
285
286	dev = in_dev->dev;
287
288	in_dev->dead = 1;
289
290	ip_mc_destroy_dev(in_dev);
291
292	while ((ifa = in_dev->ifa_list) != NULL) {
293		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
294		inet_free_ifa(ifa);
295	}
296
297	RCU_INIT_POINTER(dev->ip_ptr, NULL);
298
299	devinet_sysctl_unregister(in_dev);
300	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
301	arp_ifdown(dev);
302
303	call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
304}
305
306int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
307{
308	rcu_read_lock();
309	for_primary_ifa(in_dev) {
310		if (inet_ifa_match(a, ifa)) {
311			if (!b || inet_ifa_match(b, ifa)) {
312				rcu_read_unlock();
313				return 1;
314			}
315		}
316	} endfor_ifa(in_dev);
317	rcu_read_unlock();
318	return 0;
319}
320
321static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
322			 int destroy, struct nlmsghdr *nlh, u32 portid)
323{
324	struct in_ifaddr *promote = NULL;
325	struct in_ifaddr *ifa, *ifa1 = *ifap;
326	struct in_ifaddr *last_prim = in_dev->ifa_list;
327	struct in_ifaddr *prev_prom = NULL;
328	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
329
330	ASSERT_RTNL();
331
332	/* 1. Deleting primary ifaddr forces deletion all secondaries
333	 * unless alias promotion is set
334	 **/
335
336	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
337		struct in_ifaddr **ifap1 = &ifa1->ifa_next;
338
339		while ((ifa = *ifap1) != NULL) {
340			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
341			    ifa1->ifa_scope <= ifa->ifa_scope)
342				last_prim = ifa;
343
344			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
345			    ifa1->ifa_mask != ifa->ifa_mask ||
346			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
347				ifap1 = &ifa->ifa_next;
348				prev_prom = ifa;
349				continue;
350			}
351
352			if (!do_promote) {
353				inet_hash_remove(ifa);
354				*ifap1 = ifa->ifa_next;
355
356				rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
357				blocking_notifier_call_chain(&inetaddr_chain,
358						NETDEV_DOWN, ifa);
359				inet_free_ifa(ifa);
360			} else {
361				promote = ifa;
362				break;
363			}
364		}
365	}
366
367	/* On promotion all secondaries from subnet are changing
368	 * the primary IP, we must remove all their routes silently
369	 * and later to add them back with new prefsrc. Do this
370	 * while all addresses are on the device list.
371	 */
372	for (ifa = promote; ifa; ifa = ifa->ifa_next) {
373		if (ifa1->ifa_mask == ifa->ifa_mask &&
374		    inet_ifa_match(ifa1->ifa_address, ifa))
375			fib_del_ifaddr(ifa, ifa1);
376	}
377
378	/* 2. Unlink it */
379
380	*ifap = ifa1->ifa_next;
381	inet_hash_remove(ifa1);
382
383	/* 3. Announce address deletion */
384
385	/* Send message first, then call notifier.
386	   At first sight, FIB update triggered by notifier
387	   will refer to already deleted ifaddr, that could confuse
388	   netlink listeners. It is not true: look, gated sees
389	   that route deleted and if it still thinks that ifaddr
390	   is valid, it will try to restore deleted routes... Grr.
391	   So that, this order is correct.
392	 */
393	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
394	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
395
396	if (promote) {
397		struct in_ifaddr *next_sec = promote->ifa_next;
398
399		if (prev_prom) {
400			prev_prom->ifa_next = promote->ifa_next;
401			promote->ifa_next = last_prim->ifa_next;
402			last_prim->ifa_next = promote;
403		}
404
405		promote->ifa_flags &= ~IFA_F_SECONDARY;
406		rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
407		blocking_notifier_call_chain(&inetaddr_chain,
408				NETDEV_UP, promote);
409		for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
410			if (ifa1->ifa_mask != ifa->ifa_mask ||
411			    !inet_ifa_match(ifa1->ifa_address, ifa))
412					continue;
413			fib_add_ifaddr(ifa);
414		}
415
416	}
417	if (destroy)
418		inet_free_ifa(ifa1);
419}
420
421static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
422			 int destroy)
423{
424	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
425}
426
427static void check_lifetime(struct work_struct *work);
428
429static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
430
431static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
432			     u32 portid)
433{
434	struct in_device *in_dev = ifa->ifa_dev;
435	struct in_ifaddr *ifa1, **ifap, **last_primary;
436
437	ASSERT_RTNL();
438
439	if (!ifa->ifa_local) {
440		inet_free_ifa(ifa);
441		return 0;
442	}
443
444	ifa->ifa_flags &= ~IFA_F_SECONDARY;
445	last_primary = &in_dev->ifa_list;
446
447	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
448	     ifap = &ifa1->ifa_next) {
449		if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
450		    ifa->ifa_scope <= ifa1->ifa_scope)
451			last_primary = &ifa1->ifa_next;
452		if (ifa1->ifa_mask == ifa->ifa_mask &&
453		    inet_ifa_match(ifa1->ifa_address, ifa)) {
454			if (ifa1->ifa_local == ifa->ifa_local) {
455				inet_free_ifa(ifa);
456				return -EEXIST;
457			}
458			if (ifa1->ifa_scope != ifa->ifa_scope) {
459				inet_free_ifa(ifa);
460				return -EINVAL;
461			}
462			ifa->ifa_flags |= IFA_F_SECONDARY;
463		}
464	}
465
466	if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
467		net_srandom(ifa->ifa_local);
468		ifap = last_primary;
469	}
470
471	ifa->ifa_next = *ifap;
472	*ifap = ifa;
473
474	inet_hash_insert(dev_net(in_dev->dev), ifa);
475
476	cancel_delayed_work(&check_lifetime_work);
477	schedule_delayed_work(&check_lifetime_work, 0);
478
479	/* Send message first, then call notifier.
480	   Notifier will trigger FIB update, so that
481	   listeners of netlink will know about new ifaddr */
482	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
483	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
484
485	return 0;
486}
487
488static int inet_insert_ifa(struct in_ifaddr *ifa)
489{
490	return __inet_insert_ifa(ifa, NULL, 0);
491}
492
493static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
494{
495	struct in_device *in_dev = __in_dev_get_rtnl(dev);
496
497	ASSERT_RTNL();
498
499	if (!in_dev) {
500		inet_free_ifa(ifa);
501		return -ENOBUFS;
502	}
503	ipv4_devconf_setall(in_dev);
504	neigh_parms_data_state_setall(in_dev->arp_parms);
505	if (ifa->ifa_dev != in_dev) {
506		WARN_ON(ifa->ifa_dev);
507		in_dev_hold(in_dev);
508		ifa->ifa_dev = in_dev;
509	}
510	if (ipv4_is_loopback(ifa->ifa_local))
511		ifa->ifa_scope = RT_SCOPE_HOST;
512	return inet_insert_ifa(ifa);
513}
514
515/* Caller must hold RCU or RTNL :
516 * We dont take a reference on found in_device
517 */
518struct in_device *inetdev_by_index(struct net *net, int ifindex)
519{
520	struct net_device *dev;
521	struct in_device *in_dev = NULL;
522
523	rcu_read_lock();
524	dev = dev_get_by_index_rcu(net, ifindex);
525	if (dev)
526		in_dev = rcu_dereference_rtnl(dev->ip_ptr);
527	rcu_read_unlock();
528	return in_dev;
529}
530EXPORT_SYMBOL(inetdev_by_index);
531
532/* Called only from RTNL semaphored context. No locks. */
533
534struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
535				    __be32 mask)
536{
537	ASSERT_RTNL();
538
539	for_primary_ifa(in_dev) {
540		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
541			return ifa;
542	} endfor_ifa(in_dev);
543	return NULL;
544}
545
546static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
547{
548	struct net *net = sock_net(skb->sk);
549	struct nlattr *tb[IFA_MAX+1];
550	struct in_device *in_dev;
551	struct ifaddrmsg *ifm;
552	struct in_ifaddr *ifa, **ifap;
553	int err = -EINVAL;
554
555	ASSERT_RTNL();
556
557	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
558	if (err < 0)
559		goto errout;
560
561	ifm = nlmsg_data(nlh);
562	in_dev = inetdev_by_index(net, ifm->ifa_index);
563	if (in_dev == NULL) {
564		err = -ENODEV;
565		goto errout;
566	}
567
568	for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
569	     ifap = &ifa->ifa_next) {
570		if (tb[IFA_LOCAL] &&
571		    ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
572			continue;
573
574		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
575			continue;
576
577		if (tb[IFA_ADDRESS] &&
578		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
579		    !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
580			continue;
581
582		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
583		return 0;
584	}
585
586	err = -EADDRNOTAVAIL;
587errout:
588	return err;
589}
590
591#define INFINITY_LIFE_TIME	0xFFFFFFFF
592
593static void check_lifetime(struct work_struct *work)
594{
595	unsigned long now, next, next_sec, next_sched;
596	struct in_ifaddr *ifa;
597	struct hlist_node *n;
598	int i;
599
600	now = jiffies;
601	next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
602
603	for (i = 0; i < IN4_ADDR_HSIZE; i++) {
604		bool change_needed = false;
605
606		rcu_read_lock();
607		hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
608			unsigned long age;
609
610			if (ifa->ifa_flags & IFA_F_PERMANENT)
611				continue;
612
613			/* We try to batch several events at once. */
614			age = (now - ifa->ifa_tstamp +
615			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
616
617			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
618			    age >= ifa->ifa_valid_lft) {
619				change_needed = true;
620			} else if (ifa->ifa_preferred_lft ==
621				   INFINITY_LIFE_TIME) {
622				continue;
623			} else if (age >= ifa->ifa_preferred_lft) {
624				if (time_before(ifa->ifa_tstamp +
625						ifa->ifa_valid_lft * HZ, next))
626					next = ifa->ifa_tstamp +
627					       ifa->ifa_valid_lft * HZ;
628
629				if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
630					change_needed = true;
631			} else if (time_before(ifa->ifa_tstamp +
632					       ifa->ifa_preferred_lft * HZ,
633					       next)) {
634				next = ifa->ifa_tstamp +
635				       ifa->ifa_preferred_lft * HZ;
636			}
637		}
638		rcu_read_unlock();
639		if (!change_needed)
640			continue;
641		rtnl_lock();
642		hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
643			unsigned long age;
644
645			if (ifa->ifa_flags & IFA_F_PERMANENT)
646				continue;
647
648			/* We try to batch several events at once. */
649			age = (now - ifa->ifa_tstamp +
650			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
651
652			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
653			    age >= ifa->ifa_valid_lft) {
654				struct in_ifaddr **ifap;
655
656				for (ifap = &ifa->ifa_dev->ifa_list;
657				     *ifap != NULL; ifap = &(*ifap)->ifa_next) {
658					if (*ifap == ifa) {
659						inet_del_ifa(ifa->ifa_dev,
660							     ifap, 1);
661						break;
662					}
663				}
664			} else if (ifa->ifa_preferred_lft !=
665				   INFINITY_LIFE_TIME &&
666				   age >= ifa->ifa_preferred_lft &&
667				   !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
668				ifa->ifa_flags |= IFA_F_DEPRECATED;
669				rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
670			}
671		}
672		rtnl_unlock();
673	}
674
675	next_sec = round_jiffies_up(next);
676	next_sched = next;
677
678	/* If rounded timeout is accurate enough, accept it. */
679	if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
680		next_sched = next_sec;
681
682	now = jiffies;
683	/* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
684	if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
685		next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
686
687	schedule_delayed_work(&check_lifetime_work, next_sched - now);
688}
689
690static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
691			     __u32 prefered_lft)
692{
693	unsigned long timeout;
694
695	ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
696
697	timeout = addrconf_timeout_fixup(valid_lft, HZ);
698	if (addrconf_finite_timeout(timeout))
699		ifa->ifa_valid_lft = timeout;
700	else
701		ifa->ifa_flags |= IFA_F_PERMANENT;
702
703	timeout = addrconf_timeout_fixup(prefered_lft, HZ);
704	if (addrconf_finite_timeout(timeout)) {
705		if (timeout == 0)
706			ifa->ifa_flags |= IFA_F_DEPRECATED;
707		ifa->ifa_preferred_lft = timeout;
708	}
709	ifa->ifa_tstamp = jiffies;
710	if (!ifa->ifa_cstamp)
711		ifa->ifa_cstamp = ifa->ifa_tstamp;
712}
713
714static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
715				       __u32 *pvalid_lft, __u32 *pprefered_lft)
716{
717	struct nlattr *tb[IFA_MAX+1];
718	struct in_ifaddr *ifa;
719	struct ifaddrmsg *ifm;
720	struct net_device *dev;
721	struct in_device *in_dev;
722	int err;
723
724	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
725	if (err < 0)
726		goto errout;
727
728	ifm = nlmsg_data(nlh);
729	err = -EINVAL;
730	if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
731		goto errout;
732
733	dev = __dev_get_by_index(net, ifm->ifa_index);
734	err = -ENODEV;
735	if (dev == NULL)
736		goto errout;
737
738	in_dev = __in_dev_get_rtnl(dev);
739	err = -ENOBUFS;
740	if (in_dev == NULL)
741		goto errout;
742
743	ifa = inet_alloc_ifa();
744	if (ifa == NULL)
745		/*
746		 * A potential indev allocation can be left alive, it stays
747		 * assigned to its device and is destroy with it.
748		 */
749		goto errout;
750
751	ipv4_devconf_setall(in_dev);
752	neigh_parms_data_state_setall(in_dev->arp_parms);
753	in_dev_hold(in_dev);
754
755	if (tb[IFA_ADDRESS] == NULL)
756		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
757
758	INIT_HLIST_NODE(&ifa->hash);
759	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
760	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
761	ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
762					 ifm->ifa_flags;
763	ifa->ifa_scope = ifm->ifa_scope;
764	ifa->ifa_dev = in_dev;
765
766	ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
767	ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
768
769	if (tb[IFA_BROADCAST])
770		ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
771
772	if (tb[IFA_LABEL])
773		nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
774	else
775		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
776
777	if (tb[IFA_CACHEINFO]) {
778		struct ifa_cacheinfo *ci;
779
780		ci = nla_data(tb[IFA_CACHEINFO]);
781		if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
782			err = -EINVAL;
783			goto errout_free;
784		}
785		*pvalid_lft = ci->ifa_valid;
786		*pprefered_lft = ci->ifa_prefered;
787	}
788
789	return ifa;
790
791errout_free:
792	inet_free_ifa(ifa);
793errout:
794	return ERR_PTR(err);
795}
796
797static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
798{
799	struct in_device *in_dev = ifa->ifa_dev;
800	struct in_ifaddr *ifa1, **ifap;
801
802	if (!ifa->ifa_local)
803		return NULL;
804
805	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
806	     ifap = &ifa1->ifa_next) {
807		if (ifa1->ifa_mask == ifa->ifa_mask &&
808		    inet_ifa_match(ifa1->ifa_address, ifa) &&
809		    ifa1->ifa_local == ifa->ifa_local)
810			return ifa1;
811	}
812	return NULL;
813}
814
815static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
816{
817	struct net *net = sock_net(skb->sk);
818	struct in_ifaddr *ifa;
819	struct in_ifaddr *ifa_existing;
820	__u32 valid_lft = INFINITY_LIFE_TIME;
821	__u32 prefered_lft = INFINITY_LIFE_TIME;
822
823	ASSERT_RTNL();
824
825	ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft);
826	if (IS_ERR(ifa))
827		return PTR_ERR(ifa);
828
829	ifa_existing = find_matching_ifa(ifa);
830	if (!ifa_existing) {
831		/* It would be best to check for !NLM_F_CREATE here but
832		 * userspace alreay relies on not having to provide this.
833		 */
834		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
835		return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid);
836	} else {
837		inet_free_ifa(ifa);
838
839		if (nlh->nlmsg_flags & NLM_F_EXCL ||
840		    !(nlh->nlmsg_flags & NLM_F_REPLACE))
841			return -EEXIST;
842		ifa = ifa_existing;
843		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
844		cancel_delayed_work(&check_lifetime_work);
845		schedule_delayed_work(&check_lifetime_work, 0);
846		rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
847		blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
848	}
849	return 0;
850}
851
852/*
853 *	Determine a default network mask, based on the IP address.
854 */
855
856static int inet_abc_len(__be32 addr)
857{
858	int rc = -1;	/* Something else, probably a multicast. */
859
860	if (ipv4_is_zeronet(addr))
861		rc = 0;
862	else {
863		__u32 haddr = ntohl(addr);
864
865		if (IN_CLASSA(haddr))
866			rc = 8;
867		else if (IN_CLASSB(haddr))
868			rc = 16;
869		else if (IN_CLASSC(haddr))
870			rc = 24;
871	}
872
873	return rc;
874}
875
876
877int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
878{
879	struct ifreq ifr;
880	struct sockaddr_in sin_orig;
881	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
882	struct in_device *in_dev;
883	struct in_ifaddr **ifap = NULL;
884	struct in_ifaddr *ifa = NULL;
885	struct net_device *dev;
886	char *colon;
887	int ret = -EFAULT;
888	int tryaddrmatch = 0;
889
890	/*
891	 *	Fetch the caller's info block into kernel space
892	 */
893
894	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
895		goto out;
896	ifr.ifr_name[IFNAMSIZ - 1] = 0;
897
898	/* save original address for comparison */
899	memcpy(&sin_orig, sin, sizeof(*sin));
900
901	colon = strchr(ifr.ifr_name, ':');
902	if (colon)
903		*colon = 0;
904
905	dev_load(net, ifr.ifr_name);
906
907	switch (cmd) {
908	case SIOCGIFADDR:	/* Get interface address */
909	case SIOCGIFBRDADDR:	/* Get the broadcast address */
910	case SIOCGIFDSTADDR:	/* Get the destination address */
911	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
912		/* Note that these ioctls will not sleep,
913		   so that we do not impose a lock.
914		   One day we will be forced to put shlock here (I mean SMP)
915		 */
916		tryaddrmatch = (sin_orig.sin_family == AF_INET);
917		memset(sin, 0, sizeof(*sin));
918		sin->sin_family = AF_INET;
919		break;
920
921	case SIOCSIFFLAGS:
922		ret = -EPERM;
923		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
924			goto out;
925		break;
926	case SIOCSIFADDR:	/* Set interface address (and family) */
927	case SIOCSIFBRDADDR:	/* Set the broadcast address */
928	case SIOCSIFDSTADDR:	/* Set the destination address */
929	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
930		ret = -EPERM;
931		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
932			goto out;
933		ret = -EINVAL;
934		if (sin->sin_family != AF_INET)
935			goto out;
936		break;
937	default:
938		ret = -EINVAL;
939		goto out;
940	}
941
942	rtnl_lock();
943
944	ret = -ENODEV;
945	dev = __dev_get_by_name(net, ifr.ifr_name);
946	if (!dev)
947		goto done;
948
949	if (colon)
950		*colon = ':';
951
952	in_dev = __in_dev_get_rtnl(dev);
953	if (in_dev) {
954		if (tryaddrmatch) {
955			/* Matthias Andree */
956			/* compare label and address (4.4BSD style) */
957			/* note: we only do this for a limited set of ioctls
958			   and only if the original address family was AF_INET.
959			   This is checked above. */
960			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
961			     ifap = &ifa->ifa_next) {
962				if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
963				    sin_orig.sin_addr.s_addr ==
964							ifa->ifa_local) {
965					break; /* found */
966				}
967			}
968		}
969		/* we didn't get a match, maybe the application is
970		   4.3BSD-style and passed in junk so we fall back to
971		   comparing just the label */
972		if (!ifa) {
973			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
974			     ifap = &ifa->ifa_next)
975				if (!strcmp(ifr.ifr_name, ifa->ifa_label))
976					break;
977		}
978	}
979
980	ret = -EADDRNOTAVAIL;
981	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
982		goto done;
983
984	switch (cmd) {
985	case SIOCGIFADDR:	/* Get interface address */
986		sin->sin_addr.s_addr = ifa->ifa_local;
987		goto rarok;
988
989	case SIOCGIFBRDADDR:	/* Get the broadcast address */
990		sin->sin_addr.s_addr = ifa->ifa_broadcast;
991		goto rarok;
992
993	case SIOCGIFDSTADDR:	/* Get the destination address */
994		sin->sin_addr.s_addr = ifa->ifa_address;
995		goto rarok;
996
997	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
998		sin->sin_addr.s_addr = ifa->ifa_mask;
999		goto rarok;
1000
1001	case SIOCSIFFLAGS:
1002		if (colon) {
1003			ret = -EADDRNOTAVAIL;
1004			if (!ifa)
1005				break;
1006			ret = 0;
1007			if (!(ifr.ifr_flags & IFF_UP))
1008				inet_del_ifa(in_dev, ifap, 1);
1009			break;
1010		}
1011		ret = dev_change_flags(dev, ifr.ifr_flags);
1012		break;
1013
1014	case SIOCSIFADDR:	/* Set interface address (and family) */
1015		ret = -EINVAL;
1016		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1017			break;
1018
1019		if (!ifa) {
1020			ret = -ENOBUFS;
1021			ifa = inet_alloc_ifa();
1022			if (!ifa)
1023				break;
1024			INIT_HLIST_NODE(&ifa->hash);
1025			if (colon)
1026				memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
1027			else
1028				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1029		} else {
1030			ret = 0;
1031			if (ifa->ifa_local == sin->sin_addr.s_addr)
1032				break;
1033			inet_del_ifa(in_dev, ifap, 0);
1034			ifa->ifa_broadcast = 0;
1035			ifa->ifa_scope = 0;
1036		}
1037
1038		ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1039
1040		if (!(dev->flags & IFF_POINTOPOINT)) {
1041			ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1042			ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1043			if ((dev->flags & IFF_BROADCAST) &&
1044			    ifa->ifa_prefixlen < 31)
1045				ifa->ifa_broadcast = ifa->ifa_address |
1046						     ~ifa->ifa_mask;
1047		} else {
1048			ifa->ifa_prefixlen = 32;
1049			ifa->ifa_mask = inet_make_mask(32);
1050		}
1051		set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1052		ret = inet_set_ifa(dev, ifa);
1053		break;
1054
1055	case SIOCSIFBRDADDR:	/* Set the broadcast address */
1056		ret = 0;
1057		if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1058			inet_del_ifa(in_dev, ifap, 0);
1059			ifa->ifa_broadcast = sin->sin_addr.s_addr;
1060			inet_insert_ifa(ifa);
1061		}
1062		break;
1063
1064	case SIOCSIFDSTADDR:	/* Set the destination address */
1065		ret = 0;
1066		if (ifa->ifa_address == sin->sin_addr.s_addr)
1067			break;
1068		ret = -EINVAL;
1069		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1070			break;
1071		ret = 0;
1072		inet_del_ifa(in_dev, ifap, 0);
1073		ifa->ifa_address = sin->sin_addr.s_addr;
1074		inet_insert_ifa(ifa);
1075		break;
1076
1077	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
1078
1079		/*
1080		 *	The mask we set must be legal.
1081		 */
1082		ret = -EINVAL;
1083		if (bad_mask(sin->sin_addr.s_addr, 0))
1084			break;
1085		ret = 0;
1086		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1087			__be32 old_mask = ifa->ifa_mask;
1088			inet_del_ifa(in_dev, ifap, 0);
1089			ifa->ifa_mask = sin->sin_addr.s_addr;
1090			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1091
1092			/* See if current broadcast address matches
1093			 * with current netmask, then recalculate
1094			 * the broadcast address. Otherwise it's a
1095			 * funny address, so don't touch it since
1096			 * the user seems to know what (s)he's doing...
1097			 */
1098			if ((dev->flags & IFF_BROADCAST) &&
1099			    (ifa->ifa_prefixlen < 31) &&
1100			    (ifa->ifa_broadcast ==
1101			     (ifa->ifa_local|~old_mask))) {
1102				ifa->ifa_broadcast = (ifa->ifa_local |
1103						      ~sin->sin_addr.s_addr);
1104			}
1105			inet_insert_ifa(ifa);
1106		}
1107		break;
1108	}
1109done:
1110	rtnl_unlock();
1111out:
1112	return ret;
1113rarok:
1114	rtnl_unlock();
1115	ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
1116	goto out;
1117}
1118
1119static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
1120{
1121	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1122	struct in_ifaddr *ifa;
1123	struct ifreq ifr;
1124	int done = 0;
1125
1126	if (!in_dev)
1127		goto out;
1128
1129	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1130		if (!buf) {
1131			done += sizeof(ifr);
1132			continue;
1133		}
1134		if (len < (int) sizeof(ifr))
1135			break;
1136		memset(&ifr, 0, sizeof(struct ifreq));
1137		strcpy(ifr.ifr_name, ifa->ifa_label);
1138
1139		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1140		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1141								ifa->ifa_local;
1142
1143		if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
1144			done = -EFAULT;
1145			break;
1146		}
1147		buf  += sizeof(struct ifreq);
1148		len  -= sizeof(struct ifreq);
1149		done += sizeof(struct ifreq);
1150	}
1151out:
1152	return done;
1153}
1154
1155__be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1156{
1157	__be32 addr = 0;
1158	struct in_device *in_dev;
1159	struct net *net = dev_net(dev);
1160
1161	rcu_read_lock();
1162	in_dev = __in_dev_get_rcu(dev);
1163	if (!in_dev)
1164		goto no_in_dev;
1165
1166	for_primary_ifa(in_dev) {
1167		if (ifa->ifa_scope > scope)
1168			continue;
1169		if (!dst || inet_ifa_match(dst, ifa)) {
1170			addr = ifa->ifa_local;
1171			break;
1172		}
1173		if (!addr)
1174			addr = ifa->ifa_local;
1175	} endfor_ifa(in_dev);
1176
1177	if (addr)
1178		goto out_unlock;
1179no_in_dev:
1180
1181	/* Not loopback addresses on loopback should be preferred
1182	   in this case. It is importnat that lo is the first interface
1183	   in dev_base list.
1184	 */
1185	for_each_netdev_rcu(net, dev) {
1186		in_dev = __in_dev_get_rcu(dev);
1187		if (!in_dev)
1188			continue;
1189
1190		for_primary_ifa(in_dev) {
1191			if (ifa->ifa_scope != RT_SCOPE_LINK &&
1192			    ifa->ifa_scope <= scope) {
1193				addr = ifa->ifa_local;
1194				goto out_unlock;
1195			}
1196		} endfor_ifa(in_dev);
1197	}
1198out_unlock:
1199	rcu_read_unlock();
1200	return addr;
1201}
1202EXPORT_SYMBOL(inet_select_addr);
1203
1204static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1205			      __be32 local, int scope)
1206{
1207	int same = 0;
1208	__be32 addr = 0;
1209
1210	for_ifa(in_dev) {
1211		if (!addr &&
1212		    (local == ifa->ifa_local || !local) &&
1213		    ifa->ifa_scope <= scope) {
1214			addr = ifa->ifa_local;
1215			if (same)
1216				break;
1217		}
1218		if (!same) {
1219			same = (!local || inet_ifa_match(local, ifa)) &&
1220				(!dst || inet_ifa_match(dst, ifa));
1221			if (same && addr) {
1222				if (local || !dst)
1223					break;
1224				/* Is the selected addr into dst subnet? */
1225				if (inet_ifa_match(addr, ifa))
1226					break;
1227				/* No, then can we use new local src? */
1228				if (ifa->ifa_scope <= scope) {
1229					addr = ifa->ifa_local;
1230					break;
1231				}
1232				/* search for large dst subnet for addr */
1233				same = 0;
1234			}
1235		}
1236	} endfor_ifa(in_dev);
1237
1238	return same ? addr : 0;
1239}
1240
1241/*
1242 * Confirm that local IP address exists using wildcards:
1243 * - net: netns to check, cannot be NULL
1244 * - in_dev: only on this interface, NULL=any interface
1245 * - dst: only in the same subnet as dst, 0=any dst
1246 * - local: address, 0=autoselect the local address
1247 * - scope: maximum allowed scope value for the local address
1248 */
1249__be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1250			 __be32 dst, __be32 local, int scope)
1251{
1252	__be32 addr = 0;
1253	struct net_device *dev;
1254
1255	if (in_dev != NULL)
1256		return confirm_addr_indev(in_dev, dst, local, scope);
1257
1258	rcu_read_lock();
1259	for_each_netdev_rcu(net, dev) {
1260		in_dev = __in_dev_get_rcu(dev);
1261		if (in_dev) {
1262			addr = confirm_addr_indev(in_dev, dst, local, scope);
1263			if (addr)
1264				break;
1265		}
1266	}
1267	rcu_read_unlock();
1268
1269	return addr;
1270}
1271EXPORT_SYMBOL(inet_confirm_addr);
1272
1273/*
1274 *	Device notifier
1275 */
1276
1277int register_inetaddr_notifier(struct notifier_block *nb)
1278{
1279	return blocking_notifier_chain_register(&inetaddr_chain, nb);
1280}
1281EXPORT_SYMBOL(register_inetaddr_notifier);
1282
1283int unregister_inetaddr_notifier(struct notifier_block *nb)
1284{
1285	return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1286}
1287EXPORT_SYMBOL(unregister_inetaddr_notifier);
1288
1289/* Rename ifa_labels for a device name change. Make some effort to preserve
1290 * existing alias numbering and to create unique labels if possible.
1291*/
1292static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1293{
1294	struct in_ifaddr *ifa;
1295	int named = 0;
1296
1297	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1298		char old[IFNAMSIZ], *dot;
1299
1300		memcpy(old, ifa->ifa_label, IFNAMSIZ);
1301		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1302		if (named++ == 0)
1303			goto skip;
1304		dot = strchr(old, ':');
1305		if (dot == NULL) {
1306			sprintf(old, ":%d", named);
1307			dot = old;
1308		}
1309		if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1310			strcat(ifa->ifa_label, dot);
1311		else
1312			strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1313skip:
1314		rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1315	}
1316}
1317
1318static bool inetdev_valid_mtu(unsigned int mtu)
1319{
1320	return mtu >= 68;
1321}
1322
1323static void inetdev_send_gratuitous_arp(struct net_device *dev,
1324					struct in_device *in_dev)
1325
1326{
1327	struct in_ifaddr *ifa;
1328
1329	for (ifa = in_dev->ifa_list; ifa;
1330	     ifa = ifa->ifa_next) {
1331		arp_send(ARPOP_REQUEST, ETH_P_ARP,
1332			 ifa->ifa_local, dev,
1333			 ifa->ifa_local, NULL,
1334			 dev->dev_addr, NULL);
1335	}
1336}
1337
1338/* Called only under RTNL semaphore */
1339
1340static int inetdev_event(struct notifier_block *this, unsigned long event,
1341			 void *ptr)
1342{
1343	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1344	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1345
1346	ASSERT_RTNL();
1347
1348	if (!in_dev) {
1349		if (event == NETDEV_REGISTER) {
1350			in_dev = inetdev_init(dev);
1351			if (!in_dev)
1352				return notifier_from_errno(-ENOMEM);
1353			if (dev->flags & IFF_LOOPBACK) {
1354				IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1355				IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1356			}
1357		} else if (event == NETDEV_CHANGEMTU) {
1358			/* Re-enabling IP */
1359			if (inetdev_valid_mtu(dev->mtu))
1360				in_dev = inetdev_init(dev);
1361		}
1362		goto out;
1363	}
1364
1365	switch (event) {
1366	case NETDEV_REGISTER:
1367		pr_debug("%s: bug\n", __func__);
1368		RCU_INIT_POINTER(dev->ip_ptr, NULL);
1369		break;
1370	case NETDEV_UP:
1371		if (!inetdev_valid_mtu(dev->mtu))
1372			break;
1373		if (dev->flags & IFF_LOOPBACK) {
1374			struct in_ifaddr *ifa = inet_alloc_ifa();
1375
1376			if (ifa) {
1377				INIT_HLIST_NODE(&ifa->hash);
1378				ifa->ifa_local =
1379				  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1380				ifa->ifa_prefixlen = 8;
1381				ifa->ifa_mask = inet_make_mask(8);
1382				in_dev_hold(in_dev);
1383				ifa->ifa_dev = in_dev;
1384				ifa->ifa_scope = RT_SCOPE_HOST;
1385				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1386				set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1387						 INFINITY_LIFE_TIME);
1388				ipv4_devconf_setall(in_dev);
1389				neigh_parms_data_state_setall(in_dev->arp_parms);
1390				inet_insert_ifa(ifa);
1391			}
1392		}
1393		ip_mc_up(in_dev);
1394		/* fall through */
1395	case NETDEV_CHANGEADDR:
1396		if (!IN_DEV_ARP_NOTIFY(in_dev))
1397			break;
1398		/* fall through */
1399	case NETDEV_NOTIFY_PEERS:
1400		/* Send gratuitous ARP to notify of link change */
1401		inetdev_send_gratuitous_arp(dev, in_dev);
1402		break;
1403	case NETDEV_DOWN:
1404		ip_mc_down(in_dev);
1405		break;
1406	case NETDEV_PRE_TYPE_CHANGE:
1407		ip_mc_unmap(in_dev);
1408		break;
1409	case NETDEV_POST_TYPE_CHANGE:
1410		ip_mc_remap(in_dev);
1411		break;
1412	case NETDEV_CHANGEMTU:
1413		if (inetdev_valid_mtu(dev->mtu))
1414			break;
1415		/* disable IP when MTU is not enough */
1416	case NETDEV_UNREGISTER:
1417		inetdev_destroy(in_dev);
1418		break;
1419	case NETDEV_CHANGENAME:
1420		/* Do not notify about label change, this event is
1421		 * not interesting to applications using netlink.
1422		 */
1423		inetdev_changename(dev, in_dev);
1424
1425		devinet_sysctl_unregister(in_dev);
1426		devinet_sysctl_register(in_dev);
1427		break;
1428	}
1429out:
1430	return NOTIFY_DONE;
1431}
1432
1433static struct notifier_block ip_netdev_notifier = {
1434	.notifier_call = inetdev_event,
1435};
1436
1437static size_t inet_nlmsg_size(void)
1438{
1439	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1440	       + nla_total_size(4) /* IFA_ADDRESS */
1441	       + nla_total_size(4) /* IFA_LOCAL */
1442	       + nla_total_size(4) /* IFA_BROADCAST */
1443	       + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1444	       + nla_total_size(4);  /* IFA_FLAGS */
1445}
1446
1447static inline u32 cstamp_delta(unsigned long cstamp)
1448{
1449	return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1450}
1451
1452static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1453			 unsigned long tstamp, u32 preferred, u32 valid)
1454{
1455	struct ifa_cacheinfo ci;
1456
1457	ci.cstamp = cstamp_delta(cstamp);
1458	ci.tstamp = cstamp_delta(tstamp);
1459	ci.ifa_prefered = preferred;
1460	ci.ifa_valid = valid;
1461
1462	return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1463}
1464
1465static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1466			    u32 portid, u32 seq, int event, unsigned int flags)
1467{
1468	struct ifaddrmsg *ifm;
1469	struct nlmsghdr  *nlh;
1470	u32 preferred, valid;
1471
1472	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
1473	if (nlh == NULL)
1474		return -EMSGSIZE;
1475
1476	ifm = nlmsg_data(nlh);
1477	ifm->ifa_family = AF_INET;
1478	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1479	ifm->ifa_flags = ifa->ifa_flags;
1480	ifm->ifa_scope = ifa->ifa_scope;
1481	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1482
1483	if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1484		preferred = ifa->ifa_preferred_lft;
1485		valid = ifa->ifa_valid_lft;
1486		if (preferred != INFINITY_LIFE_TIME) {
1487			long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1488
1489			if (preferred > tval)
1490				preferred -= tval;
1491			else
1492				preferred = 0;
1493			if (valid != INFINITY_LIFE_TIME) {
1494				if (valid > tval)
1495					valid -= tval;
1496				else
1497					valid = 0;
1498			}
1499		}
1500	} else {
1501		preferred = INFINITY_LIFE_TIME;
1502		valid = INFINITY_LIFE_TIME;
1503	}
1504	if ((ifa->ifa_address &&
1505	     nla_put_be32(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1506	    (ifa->ifa_local &&
1507	     nla_put_be32(skb, IFA_LOCAL, ifa->ifa_local)) ||
1508	    (ifa->ifa_broadcast &&
1509	     nla_put_be32(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1510	    (ifa->ifa_label[0] &&
1511	     nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1512	    nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) ||
1513	    put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1514			  preferred, valid))
1515		goto nla_put_failure;
1516
1517	return nlmsg_end(skb, nlh);
1518
1519nla_put_failure:
1520	nlmsg_cancel(skb, nlh);
1521	return -EMSGSIZE;
1522}
1523
1524static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1525{
1526	struct net *net = sock_net(skb->sk);
1527	int h, s_h;
1528	int idx, s_idx;
1529	int ip_idx, s_ip_idx;
1530	struct net_device *dev;
1531	struct in_device *in_dev;
1532	struct in_ifaddr *ifa;
1533	struct hlist_head *head;
1534
1535	s_h = cb->args[0];
1536	s_idx = idx = cb->args[1];
1537	s_ip_idx = ip_idx = cb->args[2];
1538
1539	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1540		idx = 0;
1541		head = &net->dev_index_head[h];
1542		rcu_read_lock();
1543		cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1544			  net->dev_base_seq;
1545		hlist_for_each_entry_rcu(dev, head, index_hlist) {
1546			if (idx < s_idx)
1547				goto cont;
1548			if (h > s_h || idx > s_idx)
1549				s_ip_idx = 0;
1550			in_dev = __in_dev_get_rcu(dev);
1551			if (!in_dev)
1552				goto cont;
1553
1554			for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1555			     ifa = ifa->ifa_next, ip_idx++) {
1556				if (ip_idx < s_ip_idx)
1557					continue;
1558				if (inet_fill_ifaddr(skb, ifa,
1559					     NETLINK_CB(cb->skb).portid,
1560					     cb->nlh->nlmsg_seq,
1561					     RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1562					rcu_read_unlock();
1563					goto done;
1564				}
1565				nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1566			}
1567cont:
1568			idx++;
1569		}
1570		rcu_read_unlock();
1571	}
1572
1573done:
1574	cb->args[0] = h;
1575	cb->args[1] = idx;
1576	cb->args[2] = ip_idx;
1577
1578	return skb->len;
1579}
1580
1581static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1582		      u32 portid)
1583{
1584	struct sk_buff *skb;
1585	u32 seq = nlh ? nlh->nlmsg_seq : 0;
1586	int err = -ENOBUFS;
1587	struct net *net;
1588
1589	net = dev_net(ifa->ifa_dev->dev);
1590	skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1591	if (skb == NULL)
1592		goto errout;
1593
1594	err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0);
1595	if (err < 0) {
1596		/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1597		WARN_ON(err == -EMSGSIZE);
1598		kfree_skb(skb);
1599		goto errout;
1600	}
1601	rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1602	return;
1603errout:
1604	if (err < 0)
1605		rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1606}
1607
1608static size_t inet_get_link_af_size(const struct net_device *dev)
1609{
1610	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1611
1612	if (!in_dev)
1613		return 0;
1614
1615	return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1616}
1617
1618static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
1619{
1620	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1621	struct nlattr *nla;
1622	int i;
1623
1624	if (!in_dev)
1625		return -ENODATA;
1626
1627	nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1628	if (nla == NULL)
1629		return -EMSGSIZE;
1630
1631	for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1632		((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1633
1634	return 0;
1635}
1636
1637static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1638	[IFLA_INET_CONF]	= { .type = NLA_NESTED },
1639};
1640
1641static int inet_validate_link_af(const struct net_device *dev,
1642				 const struct nlattr *nla)
1643{
1644	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1645	int err, rem;
1646
1647	if (dev && !__in_dev_get_rtnl(dev))
1648		return -EAFNOSUPPORT;
1649
1650	err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
1651	if (err < 0)
1652		return err;
1653
1654	if (tb[IFLA_INET_CONF]) {
1655		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1656			int cfgid = nla_type(a);
1657
1658			if (nla_len(a) < 4)
1659				return -EINVAL;
1660
1661			if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1662				return -EINVAL;
1663		}
1664	}
1665
1666	return 0;
1667}
1668
1669static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1670{
1671	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1672	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1673	int rem;
1674
1675	if (!in_dev)
1676		return -EAFNOSUPPORT;
1677
1678	if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
1679		BUG();
1680
1681	if (tb[IFLA_INET_CONF]) {
1682		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1683			ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1684	}
1685
1686	return 0;
1687}
1688
1689static int inet_netconf_msgsize_devconf(int type)
1690{
1691	int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1692		   + nla_total_size(4);	/* NETCONFA_IFINDEX */
1693
1694	/* type -1 is used for ALL */
1695	if (type == -1 || type == NETCONFA_FORWARDING)
1696		size += nla_total_size(4);
1697	if (type == -1 || type == NETCONFA_RP_FILTER)
1698		size += nla_total_size(4);
1699	if (type == -1 || type == NETCONFA_MC_FORWARDING)
1700		size += nla_total_size(4);
1701	if (type == -1 || type == NETCONFA_PROXY_NEIGH)
1702		size += nla_total_size(4);
1703
1704	return size;
1705}
1706
1707static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1708				     struct ipv4_devconf *devconf, u32 portid,
1709				     u32 seq, int event, unsigned int flags,
1710				     int type)
1711{
1712	struct nlmsghdr  *nlh;
1713	struct netconfmsg *ncm;
1714
1715	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1716			flags);
1717	if (nlh == NULL)
1718		return -EMSGSIZE;
1719
1720	ncm = nlmsg_data(nlh);
1721	ncm->ncm_family = AF_INET;
1722
1723	if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1724		goto nla_put_failure;
1725
1726	/* type -1 is used for ALL */
1727	if ((type == -1 || type == NETCONFA_FORWARDING) &&
1728	    nla_put_s32(skb, NETCONFA_FORWARDING,
1729			IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
1730		goto nla_put_failure;
1731	if ((type == -1 || type == NETCONFA_RP_FILTER) &&
1732	    nla_put_s32(skb, NETCONFA_RP_FILTER,
1733			IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
1734		goto nla_put_failure;
1735	if ((type == -1 || type == NETCONFA_MC_FORWARDING) &&
1736	    nla_put_s32(skb, NETCONFA_MC_FORWARDING,
1737			IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
1738		goto nla_put_failure;
1739	if ((type == -1 || type == NETCONFA_PROXY_NEIGH) &&
1740	    nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
1741			IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
1742		goto nla_put_failure;
1743
1744	return nlmsg_end(skb, nlh);
1745
1746nla_put_failure:
1747	nlmsg_cancel(skb, nlh);
1748	return -EMSGSIZE;
1749}
1750
1751void inet_netconf_notify_devconf(struct net *net, int type, int ifindex,
1752				 struct ipv4_devconf *devconf)
1753{
1754	struct sk_buff *skb;
1755	int err = -ENOBUFS;
1756
1757	skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_ATOMIC);
1758	if (skb == NULL)
1759		goto errout;
1760
1761	err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
1762					RTM_NEWNETCONF, 0, type);
1763	if (err < 0) {
1764		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1765		WARN_ON(err == -EMSGSIZE);
1766		kfree_skb(skb);
1767		goto errout;
1768	}
1769	rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_ATOMIC);
1770	return;
1771errout:
1772	if (err < 0)
1773		rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
1774}
1775
1776static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
1777	[NETCONFA_IFINDEX]	= { .len = sizeof(int) },
1778	[NETCONFA_FORWARDING]	= { .len = sizeof(int) },
1779	[NETCONFA_RP_FILTER]	= { .len = sizeof(int) },
1780	[NETCONFA_PROXY_NEIGH]	= { .len = sizeof(int) },
1781};
1782
1783static int inet_netconf_get_devconf(struct sk_buff *in_skb,
1784				    struct nlmsghdr *nlh)
1785{
1786	struct net *net = sock_net(in_skb->sk);
1787	struct nlattr *tb[NETCONFA_MAX+1];
1788	struct netconfmsg *ncm;
1789	struct sk_buff *skb;
1790	struct ipv4_devconf *devconf;
1791	struct in_device *in_dev;
1792	struct net_device *dev;
1793	int ifindex;
1794	int err;
1795
1796	err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
1797			  devconf_ipv4_policy);
1798	if (err < 0)
1799		goto errout;
1800
1801	err = EINVAL;
1802	if (!tb[NETCONFA_IFINDEX])
1803		goto errout;
1804
1805	ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
1806	switch (ifindex) {
1807	case NETCONFA_IFINDEX_ALL:
1808		devconf = net->ipv4.devconf_all;
1809		break;
1810	case NETCONFA_IFINDEX_DEFAULT:
1811		devconf = net->ipv4.devconf_dflt;
1812		break;
1813	default:
1814		dev = __dev_get_by_index(net, ifindex);
1815		if (dev == NULL)
1816			goto errout;
1817		in_dev = __in_dev_get_rtnl(dev);
1818		if (in_dev == NULL)
1819			goto errout;
1820		devconf = &in_dev->cnf;
1821		break;
1822	}
1823
1824	err = -ENOBUFS;
1825	skb = nlmsg_new(inet_netconf_msgsize_devconf(-1), GFP_ATOMIC);
1826	if (skb == NULL)
1827		goto errout;
1828
1829	err = inet_netconf_fill_devconf(skb, ifindex, devconf,
1830					NETLINK_CB(in_skb).portid,
1831					nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
1832					-1);
1833	if (err < 0) {
1834		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1835		WARN_ON(err == -EMSGSIZE);
1836		kfree_skb(skb);
1837		goto errout;
1838	}
1839	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
1840errout:
1841	return err;
1842}
1843
1844static int inet_netconf_dump_devconf(struct sk_buff *skb,
1845				     struct netlink_callback *cb)
1846{
1847	struct net *net = sock_net(skb->sk);
1848	int h, s_h;
1849	int idx, s_idx;
1850	struct net_device *dev;
1851	struct in_device *in_dev;
1852	struct hlist_head *head;
1853
1854	s_h = cb->args[0];
1855	s_idx = idx = cb->args[1];
1856
1857	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1858		idx = 0;
1859		head = &net->dev_index_head[h];
1860		rcu_read_lock();
1861		cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1862			  net->dev_base_seq;
1863		hlist_for_each_entry_rcu(dev, head, index_hlist) {
1864			if (idx < s_idx)
1865				goto cont;
1866			in_dev = __in_dev_get_rcu(dev);
1867			if (!in_dev)
1868				goto cont;
1869
1870			if (inet_netconf_fill_devconf(skb, dev->ifindex,
1871						      &in_dev->cnf,
1872						      NETLINK_CB(cb->skb).portid,
1873						      cb->nlh->nlmsg_seq,
1874						      RTM_NEWNETCONF,
1875						      NLM_F_MULTI,
1876						      -1) <= 0) {
1877				rcu_read_unlock();
1878				goto done;
1879			}
1880			nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1881cont:
1882			idx++;
1883		}
1884		rcu_read_unlock();
1885	}
1886	if (h == NETDEV_HASHENTRIES) {
1887		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
1888					      net->ipv4.devconf_all,
1889					      NETLINK_CB(cb->skb).portid,
1890					      cb->nlh->nlmsg_seq,
1891					      RTM_NEWNETCONF, NLM_F_MULTI,
1892					      -1) <= 0)
1893			goto done;
1894		else
1895			h++;
1896	}
1897	if (h == NETDEV_HASHENTRIES + 1) {
1898		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
1899					      net->ipv4.devconf_dflt,
1900					      NETLINK_CB(cb->skb).portid,
1901					      cb->nlh->nlmsg_seq,
1902					      RTM_NEWNETCONF, NLM_F_MULTI,
1903					      -1) <= 0)
1904			goto done;
1905		else
1906			h++;
1907	}
1908done:
1909	cb->args[0] = h;
1910	cb->args[1] = idx;
1911
1912	return skb->len;
1913}
1914
1915#ifdef CONFIG_SYSCTL
1916
1917static void devinet_copy_dflt_conf(struct net *net, int i)
1918{
1919	struct net_device *dev;
1920
1921	rcu_read_lock();
1922	for_each_netdev_rcu(net, dev) {
1923		struct in_device *in_dev;
1924
1925		in_dev = __in_dev_get_rcu(dev);
1926		if (in_dev && !test_bit(i, in_dev->cnf.state))
1927			in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1928	}
1929	rcu_read_unlock();
1930}
1931
1932/* called with RTNL locked */
1933static void inet_forward_change(struct net *net)
1934{
1935	struct net_device *dev;
1936	int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1937
1938	IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1939	IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1940	inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1941				    NETCONFA_IFINDEX_ALL,
1942				    net->ipv4.devconf_all);
1943	inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1944				    NETCONFA_IFINDEX_DEFAULT,
1945				    net->ipv4.devconf_dflt);
1946
1947	for_each_netdev(net, dev) {
1948		struct in_device *in_dev;
1949		if (on)
1950			dev_disable_lro(dev);
1951		rcu_read_lock();
1952		in_dev = __in_dev_get_rcu(dev);
1953		if (in_dev) {
1954			IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1955			inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1956						    dev->ifindex, &in_dev->cnf);
1957		}
1958		rcu_read_unlock();
1959	}
1960}
1961
1962static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
1963{
1964	if (cnf == net->ipv4.devconf_dflt)
1965		return NETCONFA_IFINDEX_DEFAULT;
1966	else if (cnf == net->ipv4.devconf_all)
1967		return NETCONFA_IFINDEX_ALL;
1968	else {
1969		struct in_device *idev
1970			= container_of(cnf, struct in_device, cnf);
1971		return idev->dev->ifindex;
1972	}
1973}
1974
1975static int devinet_conf_proc(struct ctl_table *ctl, int write,
1976			     void __user *buffer,
1977			     size_t *lenp, loff_t *ppos)
1978{
1979	int old_value = *(int *)ctl->data;
1980	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1981	int new_value = *(int *)ctl->data;
1982
1983	if (write) {
1984		struct ipv4_devconf *cnf = ctl->extra1;
1985		struct net *net = ctl->extra2;
1986		int i = (int *)ctl->data - cnf->data;
1987		int ifindex;
1988
1989		set_bit(i, cnf->state);
1990
1991		if (cnf == net->ipv4.devconf_dflt)
1992			devinet_copy_dflt_conf(net, i);
1993		if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
1994		    i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
1995			if ((new_value == 0) && (old_value != 0))
1996				rt_cache_flush(net);
1997
1998		if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
1999		    new_value != old_value) {
2000			ifindex = devinet_conf_ifindex(net, cnf);
2001			inet_netconf_notify_devconf(net, NETCONFA_RP_FILTER,
2002						    ifindex, cnf);
2003		}
2004		if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2005		    new_value != old_value) {
2006			ifindex = devinet_conf_ifindex(net, cnf);
2007			inet_netconf_notify_devconf(net, NETCONFA_PROXY_NEIGH,
2008						    ifindex, cnf);
2009		}
2010	}
2011
2012	return ret;
2013}
2014
2015static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
2016				  void __user *buffer,
2017				  size_t *lenp, loff_t *ppos)
2018{
2019	int *valp = ctl->data;
2020	int val = *valp;
2021	loff_t pos = *ppos;
2022	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2023
2024	if (write && *valp != val) {
2025		struct net *net = ctl->extra2;
2026
2027		if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2028			if (!rtnl_trylock()) {
2029				/* Restore the original values before restarting */
2030				*valp = val;
2031				*ppos = pos;
2032				return restart_syscall();
2033			}
2034			if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2035				inet_forward_change(net);
2036			} else {
2037				struct ipv4_devconf *cnf = ctl->extra1;
2038				struct in_device *idev =
2039					container_of(cnf, struct in_device, cnf);
2040				if (*valp)
2041					dev_disable_lro(idev->dev);
2042				inet_netconf_notify_devconf(net,
2043							    NETCONFA_FORWARDING,
2044							    idev->dev->ifindex,
2045							    cnf);
2046			}
2047			rtnl_unlock();
2048			rt_cache_flush(net);
2049		} else
2050			inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
2051						    NETCONFA_IFINDEX_DEFAULT,
2052						    net->ipv4.devconf_dflt);
2053	}
2054
2055	return ret;
2056}
2057
2058static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2059				void __user *buffer,
2060				size_t *lenp, loff_t *ppos)
2061{
2062	int *valp = ctl->data;
2063	int val = *valp;
2064	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2065	struct net *net = ctl->extra2;
2066
2067	if (write && *valp != val)
2068		rt_cache_flush(net);
2069
2070	return ret;
2071}
2072
2073#define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2074	{ \
2075		.procname	= name, \
2076		.data		= ipv4_devconf.data + \
2077				  IPV4_DEVCONF_ ## attr - 1, \
2078		.maxlen		= sizeof(int), \
2079		.mode		= mval, \
2080		.proc_handler	= proc, \
2081		.extra1		= &ipv4_devconf, \
2082	}
2083
2084#define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2085	DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2086
2087#define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2088	DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2089
2090#define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2091	DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2092
2093#define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2094	DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2095
2096static struct devinet_sysctl_table {
2097	struct ctl_table_header *sysctl_header;
2098	struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2099} devinet_sysctl = {
2100	.devinet_vars = {
2101		DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2102					     devinet_sysctl_forward),
2103		DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2104
2105		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2106		DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2107		DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2108		DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2109		DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2110		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2111					"accept_source_route"),
2112		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2113		DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2114		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2115		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2116		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2117		DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2118		DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2119		DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2120		DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2121		DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2122		DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2123		DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2124		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2125		DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2126					"force_igmp_version"),
2127		DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2128					"igmpv2_unsolicited_report_interval"),
2129		DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2130					"igmpv3_unsolicited_report_interval"),
2131
2132		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2133		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2134		DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2135					      "promote_secondaries"),
2136		DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2137					      "route_localnet"),
2138	},
2139};
2140
2141static int __devinet_sysctl_register(struct net *net, char *dev_name,
2142					struct ipv4_devconf *p)
2143{
2144	int i;
2145	struct devinet_sysctl_table *t;
2146	char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2147
2148	t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
2149	if (!t)
2150		goto out;
2151
2152	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2153		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2154		t->devinet_vars[i].extra1 = p;
2155		t->devinet_vars[i].extra2 = net;
2156	}
2157
2158	snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2159
2160	t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2161	if (!t->sysctl_header)
2162		goto free;
2163
2164	p->sysctl = t;
2165	return 0;
2166
2167free:
2168	kfree(t);
2169out:
2170	return -ENOBUFS;
2171}
2172
2173static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
2174{
2175	struct devinet_sysctl_table *t = cnf->sysctl;
2176
2177	if (t == NULL)
2178		return;
2179
2180	cnf->sysctl = NULL;
2181	unregister_net_sysctl_table(t->sysctl_header);
2182	kfree(t);
2183}
2184
2185static void devinet_sysctl_register(struct in_device *idev)
2186{
2187	neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2188	__devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2189					&idev->cnf);
2190}
2191
2192static void devinet_sysctl_unregister(struct in_device *idev)
2193{
2194	__devinet_sysctl_unregister(&idev->cnf);
2195	neigh_sysctl_unregister(idev->arp_parms);
2196}
2197
2198static struct ctl_table ctl_forward_entry[] = {
2199	{
2200		.procname	= "ip_forward",
2201		.data		= &ipv4_devconf.data[
2202					IPV4_DEVCONF_FORWARDING - 1],
2203		.maxlen		= sizeof(int),
2204		.mode		= 0644,
2205		.proc_handler	= devinet_sysctl_forward,
2206		.extra1		= &ipv4_devconf,
2207		.extra2		= &init_net,
2208	},
2209	{ },
2210};
2211#endif
2212
2213static __net_init int devinet_init_net(struct net *net)
2214{
2215	int err;
2216	struct ipv4_devconf *all, *dflt;
2217#ifdef CONFIG_SYSCTL
2218	struct ctl_table *tbl = ctl_forward_entry;
2219	struct ctl_table_header *forw_hdr;
2220#endif
2221
2222	err = -ENOMEM;
2223	all = &ipv4_devconf;
2224	dflt = &ipv4_devconf_dflt;
2225
2226	if (!net_eq(net, &init_net)) {
2227		all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
2228		if (all == NULL)
2229			goto err_alloc_all;
2230
2231		dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2232		if (dflt == NULL)
2233			goto err_alloc_dflt;
2234
2235#ifdef CONFIG_SYSCTL
2236		tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
2237		if (tbl == NULL)
2238			goto err_alloc_ctl;
2239
2240		tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2241		tbl[0].extra1 = all;
2242		tbl[0].extra2 = net;
2243#endif
2244	}
2245
2246#ifdef CONFIG_SYSCTL
2247	err = __devinet_sysctl_register(net, "all", all);
2248	if (err < 0)
2249		goto err_reg_all;
2250
2251	err = __devinet_sysctl_register(net, "default", dflt);
2252	if (err < 0)
2253		goto err_reg_dflt;
2254
2255	err = -ENOMEM;
2256	forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2257	if (forw_hdr == NULL)
2258		goto err_reg_ctl;
2259	net->ipv4.forw_hdr = forw_hdr;
2260#endif
2261
2262	net->ipv4.devconf_all = all;
2263	net->ipv4.devconf_dflt = dflt;
2264	return 0;
2265
2266#ifdef CONFIG_SYSCTL
2267err_reg_ctl:
2268	__devinet_sysctl_unregister(dflt);
2269err_reg_dflt:
2270	__devinet_sysctl_unregister(all);
2271err_reg_all:
2272	if (tbl != ctl_forward_entry)
2273		kfree(tbl);
2274err_alloc_ctl:
2275#endif
2276	if (dflt != &ipv4_devconf_dflt)
2277		kfree(dflt);
2278err_alloc_dflt:
2279	if (all != &ipv4_devconf)
2280		kfree(all);
2281err_alloc_all:
2282	return err;
2283}
2284
2285static __net_exit void devinet_exit_net(struct net *net)
2286{
2287#ifdef CONFIG_SYSCTL
2288	struct ctl_table *tbl;
2289
2290	tbl = net->ipv4.forw_hdr->ctl_table_arg;
2291	unregister_net_sysctl_table(net->ipv4.forw_hdr);
2292	__devinet_sysctl_unregister(net->ipv4.devconf_dflt);
2293	__devinet_sysctl_unregister(net->ipv4.devconf_all);
2294	kfree(tbl);
2295#endif
2296	kfree(net->ipv4.devconf_dflt);
2297	kfree(net->ipv4.devconf_all);
2298}
2299
2300static __net_initdata struct pernet_operations devinet_ops = {
2301	.init = devinet_init_net,
2302	.exit = devinet_exit_net,
2303};
2304
2305static struct rtnl_af_ops inet_af_ops = {
2306	.family		  = AF_INET,
2307	.fill_link_af	  = inet_fill_link_af,
2308	.get_link_af_size = inet_get_link_af_size,
2309	.validate_link_af = inet_validate_link_af,
2310	.set_link_af	  = inet_set_link_af,
2311};
2312
2313void __init devinet_init(void)
2314{
2315	int i;
2316
2317	for (i = 0; i < IN4_ADDR_HSIZE; i++)
2318		INIT_HLIST_HEAD(&inet_addr_lst[i]);
2319
2320	register_pernet_subsys(&devinet_ops);
2321
2322	register_gifconf(PF_INET, inet_gifconf);
2323	register_netdevice_notifier(&ip_netdev_notifier);
2324
2325	schedule_delayed_work(&check_lifetime_work, 0);
2326
2327	rtnl_af_register(&inet_af_ops);
2328
2329	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
2330	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
2331	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
2332	rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2333		      inet_netconf_dump_devconf, NULL);
2334}
2335
2336