devinet.c revision b67bfe0d42cac56c512dd5da4b1b347a23f4b70a
1/*
2 *	NET3	IP device support routines.
3 *
4 *		This program is free software; you can redistribute it and/or
5 *		modify it under the terms of the GNU General Public License
6 *		as published by the Free Software Foundation; either version
7 *		2 of the License, or (at your option) any later version.
8 *
9 *	Derived from the IP parts of dev.c 1.0.19
10 * 		Authors:	Ross Biro
11 *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 *				Mark Evans, <evansmp@uhura.aston.ac.uk>
13 *
14 *	Additional Authors:
15 *		Alan Cox, <gw4pts@gw4pts.ampr.org>
16 *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17 *
18 *	Changes:
19 *		Alexey Kuznetsov:	pa_* fields are replaced with ifaddr
20 *					lists.
21 *		Cyrus Durgin:		updated for kmod
22 *		Matthias Andree:	in devinet_ioctl, compare label and
23 *					address (4.4BSD alias style support),
24 *					fall back to comparing just the label
25 *					if no match found.
26 */
27
28
29#include <asm/uaccess.h>
30#include <linux/bitops.h>
31#include <linux/capability.h>
32#include <linux/module.h>
33#include <linux/types.h>
34#include <linux/kernel.h>
35#include <linux/string.h>
36#include <linux/mm.h>
37#include <linux/socket.h>
38#include <linux/sockios.h>
39#include <linux/in.h>
40#include <linux/errno.h>
41#include <linux/interrupt.h>
42#include <linux/if_addr.h>
43#include <linux/if_ether.h>
44#include <linux/inet.h>
45#include <linux/netdevice.h>
46#include <linux/etherdevice.h>
47#include <linux/skbuff.h>
48#include <linux/init.h>
49#include <linux/notifier.h>
50#include <linux/inetdevice.h>
51#include <linux/igmp.h>
52#include <linux/slab.h>
53#include <linux/hash.h>
54#ifdef CONFIG_SYSCTL
55#include <linux/sysctl.h>
56#endif
57#include <linux/kmod.h>
58#include <linux/netconf.h>
59
60#include <net/arp.h>
61#include <net/ip.h>
62#include <net/route.h>
63#include <net/ip_fib.h>
64#include <net/rtnetlink.h>
65#include <net/net_namespace.h>
66#include <net/addrconf.h>
67
68#include "fib_lookup.h"
69
70static struct ipv4_devconf ipv4_devconf = {
71	.data = {
72		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
73		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
74		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
75		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
76	},
77};
78
79static struct ipv4_devconf ipv4_devconf_dflt = {
80	.data = {
81		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
82		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
83		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
84		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
85		[IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
86	},
87};
88
89#define IPV4_DEVCONF_DFLT(net, attr) \
90	IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
91
92static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
93	[IFA_LOCAL]     	= { .type = NLA_U32 },
94	[IFA_ADDRESS]   	= { .type = NLA_U32 },
95	[IFA_BROADCAST] 	= { .type = NLA_U32 },
96	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
97	[IFA_CACHEINFO]		= { .len = sizeof(struct ifa_cacheinfo) },
98};
99
100#define IN4_ADDR_HSIZE_SHIFT	8
101#define IN4_ADDR_HSIZE		(1U << IN4_ADDR_HSIZE_SHIFT)
102
103static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
104static DEFINE_SPINLOCK(inet_addr_hash_lock);
105
106static u32 inet_addr_hash(struct net *net, __be32 addr)
107{
108	u32 val = (__force u32) addr ^ net_hash_mix(net);
109
110	return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
111}
112
113static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
114{
115	u32 hash = inet_addr_hash(net, ifa->ifa_local);
116
117	spin_lock(&inet_addr_hash_lock);
118	hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
119	spin_unlock(&inet_addr_hash_lock);
120}
121
122static void inet_hash_remove(struct in_ifaddr *ifa)
123{
124	spin_lock(&inet_addr_hash_lock);
125	hlist_del_init_rcu(&ifa->hash);
126	spin_unlock(&inet_addr_hash_lock);
127}
128
129/**
130 * __ip_dev_find - find the first device with a given source address.
131 * @net: the net namespace
132 * @addr: the source address
133 * @devref: if true, take a reference on the found device
134 *
135 * If a caller uses devref=false, it should be protected by RCU, or RTNL
136 */
137struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
138{
139	u32 hash = inet_addr_hash(net, addr);
140	struct net_device *result = NULL;
141	struct in_ifaddr *ifa;
142
143	rcu_read_lock();
144	hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash) {
145		if (ifa->ifa_local == addr) {
146			struct net_device *dev = ifa->ifa_dev->dev;
147
148			if (!net_eq(dev_net(dev), net))
149				continue;
150			result = dev;
151			break;
152		}
153	}
154	if (!result) {
155		struct flowi4 fl4 = { .daddr = addr };
156		struct fib_result res = { 0 };
157		struct fib_table *local;
158
159		/* Fallback to FIB local table so that communication
160		 * over loopback subnets work.
161		 */
162		local = fib_get_table(net, RT_TABLE_LOCAL);
163		if (local &&
164		    !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
165		    res.type == RTN_LOCAL)
166			result = FIB_RES_DEV(res);
167	}
168	if (result && devref)
169		dev_hold(result);
170	rcu_read_unlock();
171	return result;
172}
173EXPORT_SYMBOL(__ip_dev_find);
174
175static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
176
177static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
178static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
179			 int destroy);
180#ifdef CONFIG_SYSCTL
181static void devinet_sysctl_register(struct in_device *idev);
182static void devinet_sysctl_unregister(struct in_device *idev);
183#else
184static void devinet_sysctl_register(struct in_device *idev)
185{
186}
187static void devinet_sysctl_unregister(struct in_device *idev)
188{
189}
190#endif
191
192/* Locks all the inet devices. */
193
194static struct in_ifaddr *inet_alloc_ifa(void)
195{
196	return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
197}
198
199static void inet_rcu_free_ifa(struct rcu_head *head)
200{
201	struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
202	if (ifa->ifa_dev)
203		in_dev_put(ifa->ifa_dev);
204	kfree(ifa);
205}
206
207static void inet_free_ifa(struct in_ifaddr *ifa)
208{
209	call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
210}
211
212void in_dev_finish_destroy(struct in_device *idev)
213{
214	struct net_device *dev = idev->dev;
215
216	WARN_ON(idev->ifa_list);
217	WARN_ON(idev->mc_list);
218#ifdef NET_REFCNT_DEBUG
219	pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
220#endif
221	dev_put(dev);
222	if (!idev->dead)
223		pr_err("Freeing alive in_device %p\n", idev);
224	else
225		kfree(idev);
226}
227EXPORT_SYMBOL(in_dev_finish_destroy);
228
229static struct in_device *inetdev_init(struct net_device *dev)
230{
231	struct in_device *in_dev;
232
233	ASSERT_RTNL();
234
235	in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
236	if (!in_dev)
237		goto out;
238	memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
239			sizeof(in_dev->cnf));
240	in_dev->cnf.sysctl = NULL;
241	in_dev->dev = dev;
242	in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
243	if (!in_dev->arp_parms)
244		goto out_kfree;
245	if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
246		dev_disable_lro(dev);
247	/* Reference in_dev->dev */
248	dev_hold(dev);
249	/* Account for reference dev->ip_ptr (below) */
250	in_dev_hold(in_dev);
251
252	devinet_sysctl_register(in_dev);
253	ip_mc_init_dev(in_dev);
254	if (dev->flags & IFF_UP)
255		ip_mc_up(in_dev);
256
257	/* we can receive as soon as ip_ptr is set -- do this last */
258	rcu_assign_pointer(dev->ip_ptr, in_dev);
259out:
260	return in_dev;
261out_kfree:
262	kfree(in_dev);
263	in_dev = NULL;
264	goto out;
265}
266
267static void in_dev_rcu_put(struct rcu_head *head)
268{
269	struct in_device *idev = container_of(head, struct in_device, rcu_head);
270	in_dev_put(idev);
271}
272
273static void inetdev_destroy(struct in_device *in_dev)
274{
275	struct in_ifaddr *ifa;
276	struct net_device *dev;
277
278	ASSERT_RTNL();
279
280	dev = in_dev->dev;
281
282	in_dev->dead = 1;
283
284	ip_mc_destroy_dev(in_dev);
285
286	while ((ifa = in_dev->ifa_list) != NULL) {
287		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
288		inet_free_ifa(ifa);
289	}
290
291	RCU_INIT_POINTER(dev->ip_ptr, NULL);
292
293	devinet_sysctl_unregister(in_dev);
294	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
295	arp_ifdown(dev);
296
297	call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
298}
299
300int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
301{
302	rcu_read_lock();
303	for_primary_ifa(in_dev) {
304		if (inet_ifa_match(a, ifa)) {
305			if (!b || inet_ifa_match(b, ifa)) {
306				rcu_read_unlock();
307				return 1;
308			}
309		}
310	} endfor_ifa(in_dev);
311	rcu_read_unlock();
312	return 0;
313}
314
315static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
316			 int destroy, struct nlmsghdr *nlh, u32 portid)
317{
318	struct in_ifaddr *promote = NULL;
319	struct in_ifaddr *ifa, *ifa1 = *ifap;
320	struct in_ifaddr *last_prim = in_dev->ifa_list;
321	struct in_ifaddr *prev_prom = NULL;
322	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
323
324	ASSERT_RTNL();
325
326	/* 1. Deleting primary ifaddr forces deletion all secondaries
327	 * unless alias promotion is set
328	 **/
329
330	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
331		struct in_ifaddr **ifap1 = &ifa1->ifa_next;
332
333		while ((ifa = *ifap1) != NULL) {
334			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
335			    ifa1->ifa_scope <= ifa->ifa_scope)
336				last_prim = ifa;
337
338			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
339			    ifa1->ifa_mask != ifa->ifa_mask ||
340			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
341				ifap1 = &ifa->ifa_next;
342				prev_prom = ifa;
343				continue;
344			}
345
346			if (!do_promote) {
347				inet_hash_remove(ifa);
348				*ifap1 = ifa->ifa_next;
349
350				rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
351				blocking_notifier_call_chain(&inetaddr_chain,
352						NETDEV_DOWN, ifa);
353				inet_free_ifa(ifa);
354			} else {
355				promote = ifa;
356				break;
357			}
358		}
359	}
360
361	/* On promotion all secondaries from subnet are changing
362	 * the primary IP, we must remove all their routes silently
363	 * and later to add them back with new prefsrc. Do this
364	 * while all addresses are on the device list.
365	 */
366	for (ifa = promote; ifa; ifa = ifa->ifa_next) {
367		if (ifa1->ifa_mask == ifa->ifa_mask &&
368		    inet_ifa_match(ifa1->ifa_address, ifa))
369			fib_del_ifaddr(ifa, ifa1);
370	}
371
372	/* 2. Unlink it */
373
374	*ifap = ifa1->ifa_next;
375	inet_hash_remove(ifa1);
376
377	/* 3. Announce address deletion */
378
379	/* Send message first, then call notifier.
380	   At first sight, FIB update triggered by notifier
381	   will refer to already deleted ifaddr, that could confuse
382	   netlink listeners. It is not true: look, gated sees
383	   that route deleted and if it still thinks that ifaddr
384	   is valid, it will try to restore deleted routes... Grr.
385	   So that, this order is correct.
386	 */
387	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
388	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
389
390	if (promote) {
391		struct in_ifaddr *next_sec = promote->ifa_next;
392
393		if (prev_prom) {
394			prev_prom->ifa_next = promote->ifa_next;
395			promote->ifa_next = last_prim->ifa_next;
396			last_prim->ifa_next = promote;
397		}
398
399		promote->ifa_flags &= ~IFA_F_SECONDARY;
400		rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
401		blocking_notifier_call_chain(&inetaddr_chain,
402				NETDEV_UP, promote);
403		for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
404			if (ifa1->ifa_mask != ifa->ifa_mask ||
405			    !inet_ifa_match(ifa1->ifa_address, ifa))
406					continue;
407			fib_add_ifaddr(ifa);
408		}
409
410	}
411	if (destroy)
412		inet_free_ifa(ifa1);
413}
414
415static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
416			 int destroy)
417{
418	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
419}
420
421static void check_lifetime(struct work_struct *work);
422
423static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
424
425static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
426			     u32 portid)
427{
428	struct in_device *in_dev = ifa->ifa_dev;
429	struct in_ifaddr *ifa1, **ifap, **last_primary;
430
431	ASSERT_RTNL();
432
433	if (!ifa->ifa_local) {
434		inet_free_ifa(ifa);
435		return 0;
436	}
437
438	ifa->ifa_flags &= ~IFA_F_SECONDARY;
439	last_primary = &in_dev->ifa_list;
440
441	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
442	     ifap = &ifa1->ifa_next) {
443		if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
444		    ifa->ifa_scope <= ifa1->ifa_scope)
445			last_primary = &ifa1->ifa_next;
446		if (ifa1->ifa_mask == ifa->ifa_mask &&
447		    inet_ifa_match(ifa1->ifa_address, ifa)) {
448			if (ifa1->ifa_local == ifa->ifa_local) {
449				inet_free_ifa(ifa);
450				return -EEXIST;
451			}
452			if (ifa1->ifa_scope != ifa->ifa_scope) {
453				inet_free_ifa(ifa);
454				return -EINVAL;
455			}
456			ifa->ifa_flags |= IFA_F_SECONDARY;
457		}
458	}
459
460	if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
461		net_srandom(ifa->ifa_local);
462		ifap = last_primary;
463	}
464
465	ifa->ifa_next = *ifap;
466	*ifap = ifa;
467
468	inet_hash_insert(dev_net(in_dev->dev), ifa);
469
470	cancel_delayed_work(&check_lifetime_work);
471	schedule_delayed_work(&check_lifetime_work, 0);
472
473	/* Send message first, then call notifier.
474	   Notifier will trigger FIB update, so that
475	   listeners of netlink will know about new ifaddr */
476	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
477	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
478
479	return 0;
480}
481
482static int inet_insert_ifa(struct in_ifaddr *ifa)
483{
484	return __inet_insert_ifa(ifa, NULL, 0);
485}
486
487static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
488{
489	struct in_device *in_dev = __in_dev_get_rtnl(dev);
490
491	ASSERT_RTNL();
492
493	if (!in_dev) {
494		inet_free_ifa(ifa);
495		return -ENOBUFS;
496	}
497	ipv4_devconf_setall(in_dev);
498	if (ifa->ifa_dev != in_dev) {
499		WARN_ON(ifa->ifa_dev);
500		in_dev_hold(in_dev);
501		ifa->ifa_dev = in_dev;
502	}
503	if (ipv4_is_loopback(ifa->ifa_local))
504		ifa->ifa_scope = RT_SCOPE_HOST;
505	return inet_insert_ifa(ifa);
506}
507
508/* Caller must hold RCU or RTNL :
509 * We dont take a reference on found in_device
510 */
511struct in_device *inetdev_by_index(struct net *net, int ifindex)
512{
513	struct net_device *dev;
514	struct in_device *in_dev = NULL;
515
516	rcu_read_lock();
517	dev = dev_get_by_index_rcu(net, ifindex);
518	if (dev)
519		in_dev = rcu_dereference_rtnl(dev->ip_ptr);
520	rcu_read_unlock();
521	return in_dev;
522}
523EXPORT_SYMBOL(inetdev_by_index);
524
525/* Called only from RTNL semaphored context. No locks. */
526
527struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
528				    __be32 mask)
529{
530	ASSERT_RTNL();
531
532	for_primary_ifa(in_dev) {
533		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
534			return ifa;
535	} endfor_ifa(in_dev);
536	return NULL;
537}
538
539static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
540{
541	struct net *net = sock_net(skb->sk);
542	struct nlattr *tb[IFA_MAX+1];
543	struct in_device *in_dev;
544	struct ifaddrmsg *ifm;
545	struct in_ifaddr *ifa, **ifap;
546	int err = -EINVAL;
547
548	ASSERT_RTNL();
549
550	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
551	if (err < 0)
552		goto errout;
553
554	ifm = nlmsg_data(nlh);
555	in_dev = inetdev_by_index(net, ifm->ifa_index);
556	if (in_dev == NULL) {
557		err = -ENODEV;
558		goto errout;
559	}
560
561	for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
562	     ifap = &ifa->ifa_next) {
563		if (tb[IFA_LOCAL] &&
564		    ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
565			continue;
566
567		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
568			continue;
569
570		if (tb[IFA_ADDRESS] &&
571		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
572		    !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
573			continue;
574
575		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
576		return 0;
577	}
578
579	err = -EADDRNOTAVAIL;
580errout:
581	return err;
582}
583
584#define INFINITY_LIFE_TIME	0xFFFFFFFF
585
586static void check_lifetime(struct work_struct *work)
587{
588	unsigned long now, next, next_sec, next_sched;
589	struct in_ifaddr *ifa;
590	int i;
591
592	now = jiffies;
593	next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
594
595	rcu_read_lock();
596	for (i = 0; i < IN4_ADDR_HSIZE; i++) {
597		hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
598			unsigned long age;
599
600			if (ifa->ifa_flags & IFA_F_PERMANENT)
601				continue;
602
603			/* We try to batch several events at once. */
604			age = (now - ifa->ifa_tstamp +
605			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
606
607			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
608			    age >= ifa->ifa_valid_lft) {
609				struct in_ifaddr **ifap ;
610
611				rtnl_lock();
612				for (ifap = &ifa->ifa_dev->ifa_list;
613				     *ifap != NULL; ifap = &ifa->ifa_next) {
614					if (*ifap == ifa)
615						inet_del_ifa(ifa->ifa_dev,
616							     ifap, 1);
617				}
618				rtnl_unlock();
619			} else if (ifa->ifa_preferred_lft ==
620				   INFINITY_LIFE_TIME) {
621				continue;
622			} else if (age >= ifa->ifa_preferred_lft) {
623				if (time_before(ifa->ifa_tstamp +
624						ifa->ifa_valid_lft * HZ, next))
625					next = ifa->ifa_tstamp +
626					       ifa->ifa_valid_lft * HZ;
627
628				if (!(ifa->ifa_flags & IFA_F_DEPRECATED)) {
629					ifa->ifa_flags |= IFA_F_DEPRECATED;
630					rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
631				}
632			} else if (time_before(ifa->ifa_tstamp +
633					       ifa->ifa_preferred_lft * HZ,
634					       next)) {
635				next = ifa->ifa_tstamp +
636				       ifa->ifa_preferred_lft * HZ;
637			}
638		}
639	}
640	rcu_read_unlock();
641
642	next_sec = round_jiffies_up(next);
643	next_sched = next;
644
645	/* If rounded timeout is accurate enough, accept it. */
646	if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
647		next_sched = next_sec;
648
649	now = jiffies;
650	/* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
651	if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
652		next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
653
654	schedule_delayed_work(&check_lifetime_work, next_sched - now);
655}
656
657static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
658			     __u32 prefered_lft)
659{
660	unsigned long timeout;
661
662	ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
663
664	timeout = addrconf_timeout_fixup(valid_lft, HZ);
665	if (addrconf_finite_timeout(timeout))
666		ifa->ifa_valid_lft = timeout;
667	else
668		ifa->ifa_flags |= IFA_F_PERMANENT;
669
670	timeout = addrconf_timeout_fixup(prefered_lft, HZ);
671	if (addrconf_finite_timeout(timeout)) {
672		if (timeout == 0)
673			ifa->ifa_flags |= IFA_F_DEPRECATED;
674		ifa->ifa_preferred_lft = timeout;
675	}
676	ifa->ifa_tstamp = jiffies;
677	if (!ifa->ifa_cstamp)
678		ifa->ifa_cstamp = ifa->ifa_tstamp;
679}
680
681static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
682				       __u32 *pvalid_lft, __u32 *pprefered_lft)
683{
684	struct nlattr *tb[IFA_MAX+1];
685	struct in_ifaddr *ifa;
686	struct ifaddrmsg *ifm;
687	struct net_device *dev;
688	struct in_device *in_dev;
689	int err;
690
691	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
692	if (err < 0)
693		goto errout;
694
695	ifm = nlmsg_data(nlh);
696	err = -EINVAL;
697	if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
698		goto errout;
699
700	dev = __dev_get_by_index(net, ifm->ifa_index);
701	err = -ENODEV;
702	if (dev == NULL)
703		goto errout;
704
705	in_dev = __in_dev_get_rtnl(dev);
706	err = -ENOBUFS;
707	if (in_dev == NULL)
708		goto errout;
709
710	ifa = inet_alloc_ifa();
711	if (ifa == NULL)
712		/*
713		 * A potential indev allocation can be left alive, it stays
714		 * assigned to its device and is destroy with it.
715		 */
716		goto errout;
717
718	ipv4_devconf_setall(in_dev);
719	in_dev_hold(in_dev);
720
721	if (tb[IFA_ADDRESS] == NULL)
722		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
723
724	INIT_HLIST_NODE(&ifa->hash);
725	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
726	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
727	ifa->ifa_flags = ifm->ifa_flags;
728	ifa->ifa_scope = ifm->ifa_scope;
729	ifa->ifa_dev = in_dev;
730
731	ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
732	ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
733
734	if (tb[IFA_BROADCAST])
735		ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
736
737	if (tb[IFA_LABEL])
738		nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
739	else
740		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
741
742	if (tb[IFA_CACHEINFO]) {
743		struct ifa_cacheinfo *ci;
744
745		ci = nla_data(tb[IFA_CACHEINFO]);
746		if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
747			err = -EINVAL;
748			goto errout;
749		}
750		*pvalid_lft = ci->ifa_valid;
751		*pprefered_lft = ci->ifa_prefered;
752	}
753
754	return ifa;
755
756errout:
757	return ERR_PTR(err);
758}
759
760static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
761{
762	struct in_device *in_dev = ifa->ifa_dev;
763	struct in_ifaddr *ifa1, **ifap;
764
765	if (!ifa->ifa_local)
766		return NULL;
767
768	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
769	     ifap = &ifa1->ifa_next) {
770		if (ifa1->ifa_mask == ifa->ifa_mask &&
771		    inet_ifa_match(ifa1->ifa_address, ifa) &&
772		    ifa1->ifa_local == ifa->ifa_local)
773			return ifa1;
774	}
775	return NULL;
776}
777
778static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
779{
780	struct net *net = sock_net(skb->sk);
781	struct in_ifaddr *ifa;
782	struct in_ifaddr *ifa_existing;
783	__u32 valid_lft = INFINITY_LIFE_TIME;
784	__u32 prefered_lft = INFINITY_LIFE_TIME;
785
786	ASSERT_RTNL();
787
788	ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft);
789	if (IS_ERR(ifa))
790		return PTR_ERR(ifa);
791
792	ifa_existing = find_matching_ifa(ifa);
793	if (!ifa_existing) {
794		/* It would be best to check for !NLM_F_CREATE here but
795		 * userspace alreay relies on not having to provide this.
796		 */
797		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
798		return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid);
799	} else {
800		inet_free_ifa(ifa);
801
802		if (nlh->nlmsg_flags & NLM_F_EXCL ||
803		    !(nlh->nlmsg_flags & NLM_F_REPLACE))
804			return -EEXIST;
805
806		set_ifa_lifetime(ifa_existing, valid_lft, prefered_lft);
807	}
808	return 0;
809}
810
811/*
812 *	Determine a default network mask, based on the IP address.
813 */
814
815static int inet_abc_len(__be32 addr)
816{
817	int rc = -1;	/* Something else, probably a multicast. */
818
819	if (ipv4_is_zeronet(addr))
820		rc = 0;
821	else {
822		__u32 haddr = ntohl(addr);
823
824		if (IN_CLASSA(haddr))
825			rc = 8;
826		else if (IN_CLASSB(haddr))
827			rc = 16;
828		else if (IN_CLASSC(haddr))
829			rc = 24;
830	}
831
832	return rc;
833}
834
835
836int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
837{
838	struct ifreq ifr;
839	struct sockaddr_in sin_orig;
840	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
841	struct in_device *in_dev;
842	struct in_ifaddr **ifap = NULL;
843	struct in_ifaddr *ifa = NULL;
844	struct net_device *dev;
845	char *colon;
846	int ret = -EFAULT;
847	int tryaddrmatch = 0;
848
849	/*
850	 *	Fetch the caller's info block into kernel space
851	 */
852
853	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
854		goto out;
855	ifr.ifr_name[IFNAMSIZ - 1] = 0;
856
857	/* save original address for comparison */
858	memcpy(&sin_orig, sin, sizeof(*sin));
859
860	colon = strchr(ifr.ifr_name, ':');
861	if (colon)
862		*colon = 0;
863
864	dev_load(net, ifr.ifr_name);
865
866	switch (cmd) {
867	case SIOCGIFADDR:	/* Get interface address */
868	case SIOCGIFBRDADDR:	/* Get the broadcast address */
869	case SIOCGIFDSTADDR:	/* Get the destination address */
870	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
871		/* Note that these ioctls will not sleep,
872		   so that we do not impose a lock.
873		   One day we will be forced to put shlock here (I mean SMP)
874		 */
875		tryaddrmatch = (sin_orig.sin_family == AF_INET);
876		memset(sin, 0, sizeof(*sin));
877		sin->sin_family = AF_INET;
878		break;
879
880	case SIOCSIFFLAGS:
881		ret = -EPERM;
882		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
883			goto out;
884		break;
885	case SIOCSIFADDR:	/* Set interface address (and family) */
886	case SIOCSIFBRDADDR:	/* Set the broadcast address */
887	case SIOCSIFDSTADDR:	/* Set the destination address */
888	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
889		ret = -EPERM;
890		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
891			goto out;
892		ret = -EINVAL;
893		if (sin->sin_family != AF_INET)
894			goto out;
895		break;
896	default:
897		ret = -EINVAL;
898		goto out;
899	}
900
901	rtnl_lock();
902
903	ret = -ENODEV;
904	dev = __dev_get_by_name(net, ifr.ifr_name);
905	if (!dev)
906		goto done;
907
908	if (colon)
909		*colon = ':';
910
911	in_dev = __in_dev_get_rtnl(dev);
912	if (in_dev) {
913		if (tryaddrmatch) {
914			/* Matthias Andree */
915			/* compare label and address (4.4BSD style) */
916			/* note: we only do this for a limited set of ioctls
917			   and only if the original address family was AF_INET.
918			   This is checked above. */
919			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
920			     ifap = &ifa->ifa_next) {
921				if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
922				    sin_orig.sin_addr.s_addr ==
923							ifa->ifa_local) {
924					break; /* found */
925				}
926			}
927		}
928		/* we didn't get a match, maybe the application is
929		   4.3BSD-style and passed in junk so we fall back to
930		   comparing just the label */
931		if (!ifa) {
932			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
933			     ifap = &ifa->ifa_next)
934				if (!strcmp(ifr.ifr_name, ifa->ifa_label))
935					break;
936		}
937	}
938
939	ret = -EADDRNOTAVAIL;
940	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
941		goto done;
942
943	switch (cmd) {
944	case SIOCGIFADDR:	/* Get interface address */
945		sin->sin_addr.s_addr = ifa->ifa_local;
946		goto rarok;
947
948	case SIOCGIFBRDADDR:	/* Get the broadcast address */
949		sin->sin_addr.s_addr = ifa->ifa_broadcast;
950		goto rarok;
951
952	case SIOCGIFDSTADDR:	/* Get the destination address */
953		sin->sin_addr.s_addr = ifa->ifa_address;
954		goto rarok;
955
956	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
957		sin->sin_addr.s_addr = ifa->ifa_mask;
958		goto rarok;
959
960	case SIOCSIFFLAGS:
961		if (colon) {
962			ret = -EADDRNOTAVAIL;
963			if (!ifa)
964				break;
965			ret = 0;
966			if (!(ifr.ifr_flags & IFF_UP))
967				inet_del_ifa(in_dev, ifap, 1);
968			break;
969		}
970		ret = dev_change_flags(dev, ifr.ifr_flags);
971		break;
972
973	case SIOCSIFADDR:	/* Set interface address (and family) */
974		ret = -EINVAL;
975		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
976			break;
977
978		if (!ifa) {
979			ret = -ENOBUFS;
980			ifa = inet_alloc_ifa();
981			if (!ifa)
982				break;
983			INIT_HLIST_NODE(&ifa->hash);
984			if (colon)
985				memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
986			else
987				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
988		} else {
989			ret = 0;
990			if (ifa->ifa_local == sin->sin_addr.s_addr)
991				break;
992			inet_del_ifa(in_dev, ifap, 0);
993			ifa->ifa_broadcast = 0;
994			ifa->ifa_scope = 0;
995		}
996
997		ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
998
999		if (!(dev->flags & IFF_POINTOPOINT)) {
1000			ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1001			ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1002			if ((dev->flags & IFF_BROADCAST) &&
1003			    ifa->ifa_prefixlen < 31)
1004				ifa->ifa_broadcast = ifa->ifa_address |
1005						     ~ifa->ifa_mask;
1006		} else {
1007			ifa->ifa_prefixlen = 32;
1008			ifa->ifa_mask = inet_make_mask(32);
1009		}
1010		set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1011		ret = inet_set_ifa(dev, ifa);
1012		break;
1013
1014	case SIOCSIFBRDADDR:	/* Set the broadcast address */
1015		ret = 0;
1016		if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1017			inet_del_ifa(in_dev, ifap, 0);
1018			ifa->ifa_broadcast = sin->sin_addr.s_addr;
1019			inet_insert_ifa(ifa);
1020		}
1021		break;
1022
1023	case SIOCSIFDSTADDR:	/* Set the destination address */
1024		ret = 0;
1025		if (ifa->ifa_address == sin->sin_addr.s_addr)
1026			break;
1027		ret = -EINVAL;
1028		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1029			break;
1030		ret = 0;
1031		inet_del_ifa(in_dev, ifap, 0);
1032		ifa->ifa_address = sin->sin_addr.s_addr;
1033		inet_insert_ifa(ifa);
1034		break;
1035
1036	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
1037
1038		/*
1039		 *	The mask we set must be legal.
1040		 */
1041		ret = -EINVAL;
1042		if (bad_mask(sin->sin_addr.s_addr, 0))
1043			break;
1044		ret = 0;
1045		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1046			__be32 old_mask = ifa->ifa_mask;
1047			inet_del_ifa(in_dev, ifap, 0);
1048			ifa->ifa_mask = sin->sin_addr.s_addr;
1049			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1050
1051			/* See if current broadcast address matches
1052			 * with current netmask, then recalculate
1053			 * the broadcast address. Otherwise it's a
1054			 * funny address, so don't touch it since
1055			 * the user seems to know what (s)he's doing...
1056			 */
1057			if ((dev->flags & IFF_BROADCAST) &&
1058			    (ifa->ifa_prefixlen < 31) &&
1059			    (ifa->ifa_broadcast ==
1060			     (ifa->ifa_local|~old_mask))) {
1061				ifa->ifa_broadcast = (ifa->ifa_local |
1062						      ~sin->sin_addr.s_addr);
1063			}
1064			inet_insert_ifa(ifa);
1065		}
1066		break;
1067	}
1068done:
1069	rtnl_unlock();
1070out:
1071	return ret;
1072rarok:
1073	rtnl_unlock();
1074	ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
1075	goto out;
1076}
1077
1078static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
1079{
1080	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1081	struct in_ifaddr *ifa;
1082	struct ifreq ifr;
1083	int done = 0;
1084
1085	if (!in_dev)
1086		goto out;
1087
1088	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1089		if (!buf) {
1090			done += sizeof(ifr);
1091			continue;
1092		}
1093		if (len < (int) sizeof(ifr))
1094			break;
1095		memset(&ifr, 0, sizeof(struct ifreq));
1096		if (ifa->ifa_label)
1097			strcpy(ifr.ifr_name, ifa->ifa_label);
1098		else
1099			strcpy(ifr.ifr_name, dev->name);
1100
1101		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1102		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1103								ifa->ifa_local;
1104
1105		if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
1106			done = -EFAULT;
1107			break;
1108		}
1109		buf  += sizeof(struct ifreq);
1110		len  -= sizeof(struct ifreq);
1111		done += sizeof(struct ifreq);
1112	}
1113out:
1114	return done;
1115}
1116
1117__be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1118{
1119	__be32 addr = 0;
1120	struct in_device *in_dev;
1121	struct net *net = dev_net(dev);
1122
1123	rcu_read_lock();
1124	in_dev = __in_dev_get_rcu(dev);
1125	if (!in_dev)
1126		goto no_in_dev;
1127
1128	for_primary_ifa(in_dev) {
1129		if (ifa->ifa_scope > scope)
1130			continue;
1131		if (!dst || inet_ifa_match(dst, ifa)) {
1132			addr = ifa->ifa_local;
1133			break;
1134		}
1135		if (!addr)
1136			addr = ifa->ifa_local;
1137	} endfor_ifa(in_dev);
1138
1139	if (addr)
1140		goto out_unlock;
1141no_in_dev:
1142
1143	/* Not loopback addresses on loopback should be preferred
1144	   in this case. It is importnat that lo is the first interface
1145	   in dev_base list.
1146	 */
1147	for_each_netdev_rcu(net, dev) {
1148		in_dev = __in_dev_get_rcu(dev);
1149		if (!in_dev)
1150			continue;
1151
1152		for_primary_ifa(in_dev) {
1153			if (ifa->ifa_scope != RT_SCOPE_LINK &&
1154			    ifa->ifa_scope <= scope) {
1155				addr = ifa->ifa_local;
1156				goto out_unlock;
1157			}
1158		} endfor_ifa(in_dev);
1159	}
1160out_unlock:
1161	rcu_read_unlock();
1162	return addr;
1163}
1164EXPORT_SYMBOL(inet_select_addr);
1165
1166static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1167			      __be32 local, int scope)
1168{
1169	int same = 0;
1170	__be32 addr = 0;
1171
1172	for_ifa(in_dev) {
1173		if (!addr &&
1174		    (local == ifa->ifa_local || !local) &&
1175		    ifa->ifa_scope <= scope) {
1176			addr = ifa->ifa_local;
1177			if (same)
1178				break;
1179		}
1180		if (!same) {
1181			same = (!local || inet_ifa_match(local, ifa)) &&
1182				(!dst || inet_ifa_match(dst, ifa));
1183			if (same && addr) {
1184				if (local || !dst)
1185					break;
1186				/* Is the selected addr into dst subnet? */
1187				if (inet_ifa_match(addr, ifa))
1188					break;
1189				/* No, then can we use new local src? */
1190				if (ifa->ifa_scope <= scope) {
1191					addr = ifa->ifa_local;
1192					break;
1193				}
1194				/* search for large dst subnet for addr */
1195				same = 0;
1196			}
1197		}
1198	} endfor_ifa(in_dev);
1199
1200	return same ? addr : 0;
1201}
1202
1203/*
1204 * Confirm that local IP address exists using wildcards:
1205 * - in_dev: only on this interface, 0=any interface
1206 * - dst: only in the same subnet as dst, 0=any dst
1207 * - local: address, 0=autoselect the local address
1208 * - scope: maximum allowed scope value for the local address
1209 */
1210__be32 inet_confirm_addr(struct in_device *in_dev,
1211			 __be32 dst, __be32 local, int scope)
1212{
1213	__be32 addr = 0;
1214	struct net_device *dev;
1215	struct net *net;
1216
1217	if (scope != RT_SCOPE_LINK)
1218		return confirm_addr_indev(in_dev, dst, local, scope);
1219
1220	net = dev_net(in_dev->dev);
1221	rcu_read_lock();
1222	for_each_netdev_rcu(net, dev) {
1223		in_dev = __in_dev_get_rcu(dev);
1224		if (in_dev) {
1225			addr = confirm_addr_indev(in_dev, dst, local, scope);
1226			if (addr)
1227				break;
1228		}
1229	}
1230	rcu_read_unlock();
1231
1232	return addr;
1233}
1234EXPORT_SYMBOL(inet_confirm_addr);
1235
1236/*
1237 *	Device notifier
1238 */
1239
1240int register_inetaddr_notifier(struct notifier_block *nb)
1241{
1242	return blocking_notifier_chain_register(&inetaddr_chain, nb);
1243}
1244EXPORT_SYMBOL(register_inetaddr_notifier);
1245
1246int unregister_inetaddr_notifier(struct notifier_block *nb)
1247{
1248	return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1249}
1250EXPORT_SYMBOL(unregister_inetaddr_notifier);
1251
1252/* Rename ifa_labels for a device name change. Make some effort to preserve
1253 * existing alias numbering and to create unique labels if possible.
1254*/
1255static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1256{
1257	struct in_ifaddr *ifa;
1258	int named = 0;
1259
1260	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1261		char old[IFNAMSIZ], *dot;
1262
1263		memcpy(old, ifa->ifa_label, IFNAMSIZ);
1264		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1265		if (named++ == 0)
1266			goto skip;
1267		dot = strchr(old, ':');
1268		if (dot == NULL) {
1269			sprintf(old, ":%d", named);
1270			dot = old;
1271		}
1272		if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1273			strcat(ifa->ifa_label, dot);
1274		else
1275			strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1276skip:
1277		rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1278	}
1279}
1280
1281static bool inetdev_valid_mtu(unsigned int mtu)
1282{
1283	return mtu >= 68;
1284}
1285
1286static void inetdev_send_gratuitous_arp(struct net_device *dev,
1287					struct in_device *in_dev)
1288
1289{
1290	struct in_ifaddr *ifa;
1291
1292	for (ifa = in_dev->ifa_list; ifa;
1293	     ifa = ifa->ifa_next) {
1294		arp_send(ARPOP_REQUEST, ETH_P_ARP,
1295			 ifa->ifa_local, dev,
1296			 ifa->ifa_local, NULL,
1297			 dev->dev_addr, NULL);
1298	}
1299}
1300
1301/* Called only under RTNL semaphore */
1302
1303static int inetdev_event(struct notifier_block *this, unsigned long event,
1304			 void *ptr)
1305{
1306	struct net_device *dev = ptr;
1307	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1308
1309	ASSERT_RTNL();
1310
1311	if (!in_dev) {
1312		if (event == NETDEV_REGISTER) {
1313			in_dev = inetdev_init(dev);
1314			if (!in_dev)
1315				return notifier_from_errno(-ENOMEM);
1316			if (dev->flags & IFF_LOOPBACK) {
1317				IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1318				IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1319			}
1320		} else if (event == NETDEV_CHANGEMTU) {
1321			/* Re-enabling IP */
1322			if (inetdev_valid_mtu(dev->mtu))
1323				in_dev = inetdev_init(dev);
1324		}
1325		goto out;
1326	}
1327
1328	switch (event) {
1329	case NETDEV_REGISTER:
1330		pr_debug("%s: bug\n", __func__);
1331		RCU_INIT_POINTER(dev->ip_ptr, NULL);
1332		break;
1333	case NETDEV_UP:
1334		if (!inetdev_valid_mtu(dev->mtu))
1335			break;
1336		if (dev->flags & IFF_LOOPBACK) {
1337			struct in_ifaddr *ifa = inet_alloc_ifa();
1338
1339			if (ifa) {
1340				INIT_HLIST_NODE(&ifa->hash);
1341				ifa->ifa_local =
1342				  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1343				ifa->ifa_prefixlen = 8;
1344				ifa->ifa_mask = inet_make_mask(8);
1345				in_dev_hold(in_dev);
1346				ifa->ifa_dev = in_dev;
1347				ifa->ifa_scope = RT_SCOPE_HOST;
1348				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1349				set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1350						 INFINITY_LIFE_TIME);
1351				inet_insert_ifa(ifa);
1352			}
1353		}
1354		ip_mc_up(in_dev);
1355		/* fall through */
1356	case NETDEV_CHANGEADDR:
1357		if (!IN_DEV_ARP_NOTIFY(in_dev))
1358			break;
1359		/* fall through */
1360	case NETDEV_NOTIFY_PEERS:
1361		/* Send gratuitous ARP to notify of link change */
1362		inetdev_send_gratuitous_arp(dev, in_dev);
1363		break;
1364	case NETDEV_DOWN:
1365		ip_mc_down(in_dev);
1366		break;
1367	case NETDEV_PRE_TYPE_CHANGE:
1368		ip_mc_unmap(in_dev);
1369		break;
1370	case NETDEV_POST_TYPE_CHANGE:
1371		ip_mc_remap(in_dev);
1372		break;
1373	case NETDEV_CHANGEMTU:
1374		if (inetdev_valid_mtu(dev->mtu))
1375			break;
1376		/* disable IP when MTU is not enough */
1377	case NETDEV_UNREGISTER:
1378		inetdev_destroy(in_dev);
1379		break;
1380	case NETDEV_CHANGENAME:
1381		/* Do not notify about label change, this event is
1382		 * not interesting to applications using netlink.
1383		 */
1384		inetdev_changename(dev, in_dev);
1385
1386		devinet_sysctl_unregister(in_dev);
1387		devinet_sysctl_register(in_dev);
1388		break;
1389	}
1390out:
1391	return NOTIFY_DONE;
1392}
1393
1394static struct notifier_block ip_netdev_notifier = {
1395	.notifier_call = inetdev_event,
1396};
1397
1398static size_t inet_nlmsg_size(void)
1399{
1400	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1401	       + nla_total_size(4) /* IFA_ADDRESS */
1402	       + nla_total_size(4) /* IFA_LOCAL */
1403	       + nla_total_size(4) /* IFA_BROADCAST */
1404	       + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1405}
1406
1407static inline u32 cstamp_delta(unsigned long cstamp)
1408{
1409	return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1410}
1411
1412static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1413			 unsigned long tstamp, u32 preferred, u32 valid)
1414{
1415	struct ifa_cacheinfo ci;
1416
1417	ci.cstamp = cstamp_delta(cstamp);
1418	ci.tstamp = cstamp_delta(tstamp);
1419	ci.ifa_prefered = preferred;
1420	ci.ifa_valid = valid;
1421
1422	return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1423}
1424
1425static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1426			    u32 portid, u32 seq, int event, unsigned int flags)
1427{
1428	struct ifaddrmsg *ifm;
1429	struct nlmsghdr  *nlh;
1430	u32 preferred, valid;
1431
1432	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
1433	if (nlh == NULL)
1434		return -EMSGSIZE;
1435
1436	ifm = nlmsg_data(nlh);
1437	ifm->ifa_family = AF_INET;
1438	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1439	ifm->ifa_flags = ifa->ifa_flags;
1440	ifm->ifa_scope = ifa->ifa_scope;
1441	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1442
1443	if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1444		preferred = ifa->ifa_preferred_lft;
1445		valid = ifa->ifa_valid_lft;
1446		if (preferred != INFINITY_LIFE_TIME) {
1447			long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1448
1449			if (preferred > tval)
1450				preferred -= tval;
1451			else
1452				preferred = 0;
1453			if (valid != INFINITY_LIFE_TIME) {
1454				if (valid > tval)
1455					valid -= tval;
1456				else
1457					valid = 0;
1458			}
1459		}
1460	} else {
1461		preferred = INFINITY_LIFE_TIME;
1462		valid = INFINITY_LIFE_TIME;
1463	}
1464	if ((ifa->ifa_address &&
1465	     nla_put_be32(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1466	    (ifa->ifa_local &&
1467	     nla_put_be32(skb, IFA_LOCAL, ifa->ifa_local)) ||
1468	    (ifa->ifa_broadcast &&
1469	     nla_put_be32(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1470	    (ifa->ifa_label[0] &&
1471	     nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1472	    put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1473			  preferred, valid))
1474		goto nla_put_failure;
1475
1476	return nlmsg_end(skb, nlh);
1477
1478nla_put_failure:
1479	nlmsg_cancel(skb, nlh);
1480	return -EMSGSIZE;
1481}
1482
1483static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1484{
1485	struct net *net = sock_net(skb->sk);
1486	int h, s_h;
1487	int idx, s_idx;
1488	int ip_idx, s_ip_idx;
1489	struct net_device *dev;
1490	struct in_device *in_dev;
1491	struct in_ifaddr *ifa;
1492	struct hlist_head *head;
1493
1494	s_h = cb->args[0];
1495	s_idx = idx = cb->args[1];
1496	s_ip_idx = ip_idx = cb->args[2];
1497
1498	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1499		idx = 0;
1500		head = &net->dev_index_head[h];
1501		rcu_read_lock();
1502		hlist_for_each_entry_rcu(dev, head, index_hlist) {
1503			if (idx < s_idx)
1504				goto cont;
1505			if (h > s_h || idx > s_idx)
1506				s_ip_idx = 0;
1507			in_dev = __in_dev_get_rcu(dev);
1508			if (!in_dev)
1509				goto cont;
1510
1511			for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1512			     ifa = ifa->ifa_next, ip_idx++) {
1513				if (ip_idx < s_ip_idx)
1514					continue;
1515				if (inet_fill_ifaddr(skb, ifa,
1516					     NETLINK_CB(cb->skb).portid,
1517					     cb->nlh->nlmsg_seq,
1518					     RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1519					rcu_read_unlock();
1520					goto done;
1521				}
1522			}
1523cont:
1524			idx++;
1525		}
1526		rcu_read_unlock();
1527	}
1528
1529done:
1530	cb->args[0] = h;
1531	cb->args[1] = idx;
1532	cb->args[2] = ip_idx;
1533
1534	return skb->len;
1535}
1536
1537static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1538		      u32 portid)
1539{
1540	struct sk_buff *skb;
1541	u32 seq = nlh ? nlh->nlmsg_seq : 0;
1542	int err = -ENOBUFS;
1543	struct net *net;
1544
1545	net = dev_net(ifa->ifa_dev->dev);
1546	skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1547	if (skb == NULL)
1548		goto errout;
1549
1550	err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0);
1551	if (err < 0) {
1552		/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1553		WARN_ON(err == -EMSGSIZE);
1554		kfree_skb(skb);
1555		goto errout;
1556	}
1557	rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1558	return;
1559errout:
1560	if (err < 0)
1561		rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1562}
1563
1564static size_t inet_get_link_af_size(const struct net_device *dev)
1565{
1566	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1567
1568	if (!in_dev)
1569		return 0;
1570
1571	return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1572}
1573
1574static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
1575{
1576	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1577	struct nlattr *nla;
1578	int i;
1579
1580	if (!in_dev)
1581		return -ENODATA;
1582
1583	nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1584	if (nla == NULL)
1585		return -EMSGSIZE;
1586
1587	for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1588		((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1589
1590	return 0;
1591}
1592
1593static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1594	[IFLA_INET_CONF]	= { .type = NLA_NESTED },
1595};
1596
1597static int inet_validate_link_af(const struct net_device *dev,
1598				 const struct nlattr *nla)
1599{
1600	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1601	int err, rem;
1602
1603	if (dev && !__in_dev_get_rtnl(dev))
1604		return -EAFNOSUPPORT;
1605
1606	err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
1607	if (err < 0)
1608		return err;
1609
1610	if (tb[IFLA_INET_CONF]) {
1611		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1612			int cfgid = nla_type(a);
1613
1614			if (nla_len(a) < 4)
1615				return -EINVAL;
1616
1617			if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1618				return -EINVAL;
1619		}
1620	}
1621
1622	return 0;
1623}
1624
1625static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1626{
1627	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1628	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1629	int rem;
1630
1631	if (!in_dev)
1632		return -EAFNOSUPPORT;
1633
1634	if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
1635		BUG();
1636
1637	if (tb[IFLA_INET_CONF]) {
1638		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1639			ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1640	}
1641
1642	return 0;
1643}
1644
1645static int inet_netconf_msgsize_devconf(int type)
1646{
1647	int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1648		   + nla_total_size(4);	/* NETCONFA_IFINDEX */
1649
1650	/* type -1 is used for ALL */
1651	if (type == -1 || type == NETCONFA_FORWARDING)
1652		size += nla_total_size(4);
1653	if (type == -1 || type == NETCONFA_RP_FILTER)
1654		size += nla_total_size(4);
1655	if (type == -1 || type == NETCONFA_MC_FORWARDING)
1656		size += nla_total_size(4);
1657
1658	return size;
1659}
1660
1661static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1662				     struct ipv4_devconf *devconf, u32 portid,
1663				     u32 seq, int event, unsigned int flags,
1664				     int type)
1665{
1666	struct nlmsghdr  *nlh;
1667	struct netconfmsg *ncm;
1668
1669	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1670			flags);
1671	if (nlh == NULL)
1672		return -EMSGSIZE;
1673
1674	ncm = nlmsg_data(nlh);
1675	ncm->ncm_family = AF_INET;
1676
1677	if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1678		goto nla_put_failure;
1679
1680	/* type -1 is used for ALL */
1681	if ((type == -1 || type == NETCONFA_FORWARDING) &&
1682	    nla_put_s32(skb, NETCONFA_FORWARDING,
1683			IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
1684		goto nla_put_failure;
1685	if ((type == -1 || type == NETCONFA_RP_FILTER) &&
1686	    nla_put_s32(skb, NETCONFA_RP_FILTER,
1687			IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
1688		goto nla_put_failure;
1689	if ((type == -1 || type == NETCONFA_MC_FORWARDING) &&
1690	    nla_put_s32(skb, NETCONFA_MC_FORWARDING,
1691			IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
1692		goto nla_put_failure;
1693
1694	return nlmsg_end(skb, nlh);
1695
1696nla_put_failure:
1697	nlmsg_cancel(skb, nlh);
1698	return -EMSGSIZE;
1699}
1700
1701void inet_netconf_notify_devconf(struct net *net, int type, int ifindex,
1702				 struct ipv4_devconf *devconf)
1703{
1704	struct sk_buff *skb;
1705	int err = -ENOBUFS;
1706
1707	skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_ATOMIC);
1708	if (skb == NULL)
1709		goto errout;
1710
1711	err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
1712					RTM_NEWNETCONF, 0, type);
1713	if (err < 0) {
1714		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1715		WARN_ON(err == -EMSGSIZE);
1716		kfree_skb(skb);
1717		goto errout;
1718	}
1719	rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_ATOMIC);
1720	return;
1721errout:
1722	if (err < 0)
1723		rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
1724}
1725
1726static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
1727	[NETCONFA_IFINDEX]	= { .len = sizeof(int) },
1728	[NETCONFA_FORWARDING]	= { .len = sizeof(int) },
1729	[NETCONFA_RP_FILTER]	= { .len = sizeof(int) },
1730};
1731
1732static int inet_netconf_get_devconf(struct sk_buff *in_skb,
1733				    struct nlmsghdr *nlh,
1734				    void *arg)
1735{
1736	struct net *net = sock_net(in_skb->sk);
1737	struct nlattr *tb[NETCONFA_MAX+1];
1738	struct netconfmsg *ncm;
1739	struct sk_buff *skb;
1740	struct ipv4_devconf *devconf;
1741	struct in_device *in_dev;
1742	struct net_device *dev;
1743	int ifindex;
1744	int err;
1745
1746	err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
1747			  devconf_ipv4_policy);
1748	if (err < 0)
1749		goto errout;
1750
1751	err = EINVAL;
1752	if (!tb[NETCONFA_IFINDEX])
1753		goto errout;
1754
1755	ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
1756	switch (ifindex) {
1757	case NETCONFA_IFINDEX_ALL:
1758		devconf = net->ipv4.devconf_all;
1759		break;
1760	case NETCONFA_IFINDEX_DEFAULT:
1761		devconf = net->ipv4.devconf_dflt;
1762		break;
1763	default:
1764		dev = __dev_get_by_index(net, ifindex);
1765		if (dev == NULL)
1766			goto errout;
1767		in_dev = __in_dev_get_rtnl(dev);
1768		if (in_dev == NULL)
1769			goto errout;
1770		devconf = &in_dev->cnf;
1771		break;
1772	}
1773
1774	err = -ENOBUFS;
1775	skb = nlmsg_new(inet_netconf_msgsize_devconf(-1), GFP_ATOMIC);
1776	if (skb == NULL)
1777		goto errout;
1778
1779	err = inet_netconf_fill_devconf(skb, ifindex, devconf,
1780					NETLINK_CB(in_skb).portid,
1781					nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
1782					-1);
1783	if (err < 0) {
1784		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1785		WARN_ON(err == -EMSGSIZE);
1786		kfree_skb(skb);
1787		goto errout;
1788	}
1789	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
1790errout:
1791	return err;
1792}
1793
1794#ifdef CONFIG_SYSCTL
1795
1796static void devinet_copy_dflt_conf(struct net *net, int i)
1797{
1798	struct net_device *dev;
1799
1800	rcu_read_lock();
1801	for_each_netdev_rcu(net, dev) {
1802		struct in_device *in_dev;
1803
1804		in_dev = __in_dev_get_rcu(dev);
1805		if (in_dev && !test_bit(i, in_dev->cnf.state))
1806			in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1807	}
1808	rcu_read_unlock();
1809}
1810
1811/* called with RTNL locked */
1812static void inet_forward_change(struct net *net)
1813{
1814	struct net_device *dev;
1815	int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1816
1817	IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1818	IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1819	inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1820				    NETCONFA_IFINDEX_ALL,
1821				    net->ipv4.devconf_all);
1822	inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1823				    NETCONFA_IFINDEX_DEFAULT,
1824				    net->ipv4.devconf_dflt);
1825
1826	for_each_netdev(net, dev) {
1827		struct in_device *in_dev;
1828		if (on)
1829			dev_disable_lro(dev);
1830		rcu_read_lock();
1831		in_dev = __in_dev_get_rcu(dev);
1832		if (in_dev) {
1833			IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1834			inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1835						    dev->ifindex, &in_dev->cnf);
1836		}
1837		rcu_read_unlock();
1838	}
1839}
1840
1841static int devinet_conf_proc(ctl_table *ctl, int write,
1842			     void __user *buffer,
1843			     size_t *lenp, loff_t *ppos)
1844{
1845	int old_value = *(int *)ctl->data;
1846	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1847	int new_value = *(int *)ctl->data;
1848
1849	if (write) {
1850		struct ipv4_devconf *cnf = ctl->extra1;
1851		struct net *net = ctl->extra2;
1852		int i = (int *)ctl->data - cnf->data;
1853
1854		set_bit(i, cnf->state);
1855
1856		if (cnf == net->ipv4.devconf_dflt)
1857			devinet_copy_dflt_conf(net, i);
1858		if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
1859		    i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
1860			if ((new_value == 0) && (old_value != 0))
1861				rt_cache_flush(net);
1862		if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
1863		    new_value != old_value) {
1864			int ifindex;
1865
1866			if (cnf == net->ipv4.devconf_dflt)
1867				ifindex = NETCONFA_IFINDEX_DEFAULT;
1868			else if (cnf == net->ipv4.devconf_all)
1869				ifindex = NETCONFA_IFINDEX_ALL;
1870			else {
1871				struct in_device *idev =
1872					container_of(cnf, struct in_device,
1873						     cnf);
1874				ifindex = idev->dev->ifindex;
1875			}
1876			inet_netconf_notify_devconf(net, NETCONFA_RP_FILTER,
1877						    ifindex, cnf);
1878		}
1879	}
1880
1881	return ret;
1882}
1883
1884static int devinet_sysctl_forward(ctl_table *ctl, int write,
1885				  void __user *buffer,
1886				  size_t *lenp, loff_t *ppos)
1887{
1888	int *valp = ctl->data;
1889	int val = *valp;
1890	loff_t pos = *ppos;
1891	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1892
1893	if (write && *valp != val) {
1894		struct net *net = ctl->extra2;
1895
1896		if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
1897			if (!rtnl_trylock()) {
1898				/* Restore the original values before restarting */
1899				*valp = val;
1900				*ppos = pos;
1901				return restart_syscall();
1902			}
1903			if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
1904				inet_forward_change(net);
1905			} else {
1906				struct ipv4_devconf *cnf = ctl->extra1;
1907				struct in_device *idev =
1908					container_of(cnf, struct in_device, cnf);
1909				if (*valp)
1910					dev_disable_lro(idev->dev);
1911				inet_netconf_notify_devconf(net,
1912							    NETCONFA_FORWARDING,
1913							    idev->dev->ifindex,
1914							    cnf);
1915			}
1916			rtnl_unlock();
1917			rt_cache_flush(net);
1918		} else
1919			inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1920						    NETCONFA_IFINDEX_DEFAULT,
1921						    net->ipv4.devconf_dflt);
1922	}
1923
1924	return ret;
1925}
1926
1927static int ipv4_doint_and_flush(ctl_table *ctl, int write,
1928				void __user *buffer,
1929				size_t *lenp, loff_t *ppos)
1930{
1931	int *valp = ctl->data;
1932	int val = *valp;
1933	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1934	struct net *net = ctl->extra2;
1935
1936	if (write && *valp != val)
1937		rt_cache_flush(net);
1938
1939	return ret;
1940}
1941
1942#define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
1943	{ \
1944		.procname	= name, \
1945		.data		= ipv4_devconf.data + \
1946				  IPV4_DEVCONF_ ## attr - 1, \
1947		.maxlen		= sizeof(int), \
1948		.mode		= mval, \
1949		.proc_handler	= proc, \
1950		.extra1		= &ipv4_devconf, \
1951	}
1952
1953#define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
1954	DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
1955
1956#define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
1957	DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
1958
1959#define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
1960	DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
1961
1962#define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
1963	DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
1964
1965static struct devinet_sysctl_table {
1966	struct ctl_table_header *sysctl_header;
1967	struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
1968} devinet_sysctl = {
1969	.devinet_vars = {
1970		DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
1971					     devinet_sysctl_forward),
1972		DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
1973
1974		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
1975		DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
1976		DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
1977		DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
1978		DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
1979		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
1980					"accept_source_route"),
1981		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
1982		DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
1983		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
1984		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
1985		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
1986		DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
1987		DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
1988		DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
1989		DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
1990		DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
1991		DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
1992		DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
1993		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
1994
1995		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
1996		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
1997		DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
1998					      "force_igmp_version"),
1999		DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2000					      "promote_secondaries"),
2001		DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2002					      "route_localnet"),
2003	},
2004};
2005
2006static int __devinet_sysctl_register(struct net *net, char *dev_name,
2007					struct ipv4_devconf *p)
2008{
2009	int i;
2010	struct devinet_sysctl_table *t;
2011	char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2012
2013	t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
2014	if (!t)
2015		goto out;
2016
2017	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2018		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2019		t->devinet_vars[i].extra1 = p;
2020		t->devinet_vars[i].extra2 = net;
2021	}
2022
2023	snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2024
2025	t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2026	if (!t->sysctl_header)
2027		goto free;
2028
2029	p->sysctl = t;
2030	return 0;
2031
2032free:
2033	kfree(t);
2034out:
2035	return -ENOBUFS;
2036}
2037
2038static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
2039{
2040	struct devinet_sysctl_table *t = cnf->sysctl;
2041
2042	if (t == NULL)
2043		return;
2044
2045	cnf->sysctl = NULL;
2046	unregister_net_sysctl_table(t->sysctl_header);
2047	kfree(t);
2048}
2049
2050static void devinet_sysctl_register(struct in_device *idev)
2051{
2052	neigh_sysctl_register(idev->dev, idev->arp_parms, "ipv4", NULL);
2053	__devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2054					&idev->cnf);
2055}
2056
2057static void devinet_sysctl_unregister(struct in_device *idev)
2058{
2059	__devinet_sysctl_unregister(&idev->cnf);
2060	neigh_sysctl_unregister(idev->arp_parms);
2061}
2062
2063static struct ctl_table ctl_forward_entry[] = {
2064	{
2065		.procname	= "ip_forward",
2066		.data		= &ipv4_devconf.data[
2067					IPV4_DEVCONF_FORWARDING - 1],
2068		.maxlen		= sizeof(int),
2069		.mode		= 0644,
2070		.proc_handler	= devinet_sysctl_forward,
2071		.extra1		= &ipv4_devconf,
2072		.extra2		= &init_net,
2073	},
2074	{ },
2075};
2076#endif
2077
2078static __net_init int devinet_init_net(struct net *net)
2079{
2080	int err;
2081	struct ipv4_devconf *all, *dflt;
2082#ifdef CONFIG_SYSCTL
2083	struct ctl_table *tbl = ctl_forward_entry;
2084	struct ctl_table_header *forw_hdr;
2085#endif
2086
2087	err = -ENOMEM;
2088	all = &ipv4_devconf;
2089	dflt = &ipv4_devconf_dflt;
2090
2091	if (!net_eq(net, &init_net)) {
2092		all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
2093		if (all == NULL)
2094			goto err_alloc_all;
2095
2096		dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2097		if (dflt == NULL)
2098			goto err_alloc_dflt;
2099
2100#ifdef CONFIG_SYSCTL
2101		tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
2102		if (tbl == NULL)
2103			goto err_alloc_ctl;
2104
2105		tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2106		tbl[0].extra1 = all;
2107		tbl[0].extra2 = net;
2108#endif
2109	}
2110
2111#ifdef CONFIG_SYSCTL
2112	err = __devinet_sysctl_register(net, "all", all);
2113	if (err < 0)
2114		goto err_reg_all;
2115
2116	err = __devinet_sysctl_register(net, "default", dflt);
2117	if (err < 0)
2118		goto err_reg_dflt;
2119
2120	err = -ENOMEM;
2121	forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2122	if (forw_hdr == NULL)
2123		goto err_reg_ctl;
2124	net->ipv4.forw_hdr = forw_hdr;
2125#endif
2126
2127	net->ipv4.devconf_all = all;
2128	net->ipv4.devconf_dflt = dflt;
2129	return 0;
2130
2131#ifdef CONFIG_SYSCTL
2132err_reg_ctl:
2133	__devinet_sysctl_unregister(dflt);
2134err_reg_dflt:
2135	__devinet_sysctl_unregister(all);
2136err_reg_all:
2137	if (tbl != ctl_forward_entry)
2138		kfree(tbl);
2139err_alloc_ctl:
2140#endif
2141	if (dflt != &ipv4_devconf_dflt)
2142		kfree(dflt);
2143err_alloc_dflt:
2144	if (all != &ipv4_devconf)
2145		kfree(all);
2146err_alloc_all:
2147	return err;
2148}
2149
2150static __net_exit void devinet_exit_net(struct net *net)
2151{
2152#ifdef CONFIG_SYSCTL
2153	struct ctl_table *tbl;
2154
2155	tbl = net->ipv4.forw_hdr->ctl_table_arg;
2156	unregister_net_sysctl_table(net->ipv4.forw_hdr);
2157	__devinet_sysctl_unregister(net->ipv4.devconf_dflt);
2158	__devinet_sysctl_unregister(net->ipv4.devconf_all);
2159	kfree(tbl);
2160#endif
2161	kfree(net->ipv4.devconf_dflt);
2162	kfree(net->ipv4.devconf_all);
2163}
2164
2165static __net_initdata struct pernet_operations devinet_ops = {
2166	.init = devinet_init_net,
2167	.exit = devinet_exit_net,
2168};
2169
2170static struct rtnl_af_ops inet_af_ops = {
2171	.family		  = AF_INET,
2172	.fill_link_af	  = inet_fill_link_af,
2173	.get_link_af_size = inet_get_link_af_size,
2174	.validate_link_af = inet_validate_link_af,
2175	.set_link_af	  = inet_set_link_af,
2176};
2177
2178void __init devinet_init(void)
2179{
2180	int i;
2181
2182	for (i = 0; i < IN4_ADDR_HSIZE; i++)
2183		INIT_HLIST_HEAD(&inet_addr_lst[i]);
2184
2185	register_pernet_subsys(&devinet_ops);
2186
2187	register_gifconf(PF_INET, inet_gifconf);
2188	register_netdevice_notifier(&ip_netdev_notifier);
2189
2190	schedule_delayed_work(&check_lifetime_work, 0);
2191
2192	rtnl_af_register(&inet_af_ops);
2193
2194	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
2195	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
2196	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
2197	rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2198		      NULL, NULL);
2199}
2200
2201