fib_frontend.c revision 8cced9eff1d413c28efac9c5ac5a75793c9251cf
1/*
2 * INET		An implementation of the TCP/IP protocol suite for the LINUX
3 *		operating system.  INET is implemented using the  BSD Socket
4 *		interface as the means of communication with the user level.
5 *
6 *		IPv4 Forwarding Information Base: FIB frontend.
7 *
8 * Version:	$Id: fib_frontend.c,v 1.26 2001/10/31 21:55:54 davem Exp $
9 *
10 * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
11 *
12 *		This program is free software; you can redistribute it and/or
13 *		modify it under the terms of the GNU General Public License
14 *		as published by the Free Software Foundation; either version
15 *		2 of the License, or (at your option) any later version.
16 */
17
18#include <linux/module.h>
19#include <asm/uaccess.h>
20#include <asm/system.h>
21#include <linux/bitops.h>
22#include <linux/capability.h>
23#include <linux/types.h>
24#include <linux/kernel.h>
25#include <linux/mm.h>
26#include <linux/string.h>
27#include <linux/socket.h>
28#include <linux/sockios.h>
29#include <linux/errno.h>
30#include <linux/in.h>
31#include <linux/inet.h>
32#include <linux/inetdevice.h>
33#include <linux/netdevice.h>
34#include <linux/if_addr.h>
35#include <linux/if_arp.h>
36#include <linux/skbuff.h>
37#include <linux/init.h>
38#include <linux/list.h>
39
40#include <net/ip.h>
41#include <net/protocol.h>
42#include <net/route.h>
43#include <net/tcp.h>
44#include <net/sock.h>
45#include <net/icmp.h>
46#include <net/arp.h>
47#include <net/ip_fib.h>
48#include <net/rtnetlink.h>
49
50#define FFprint(a...) printk(KERN_DEBUG a)
51
52#ifndef CONFIG_IP_MULTIPLE_TABLES
53
54static int __net_init fib4_rules_init(struct net *net)
55{
56	struct fib_table *local_table, *main_table;
57
58	local_table = fib_hash_init(RT_TABLE_LOCAL);
59	if (local_table == NULL)
60		return -ENOMEM;
61
62	main_table  = fib_hash_init(RT_TABLE_MAIN);
63	if (main_table == NULL)
64		goto fail;
65
66	hlist_add_head_rcu(&local_table->tb_hlist,
67				&net->ipv4.fib_table_hash[TABLE_LOCAL_INDEX]);
68	hlist_add_head_rcu(&main_table->tb_hlist,
69				&net->ipv4.fib_table_hash[TABLE_MAIN_INDEX]);
70	return 0;
71
72fail:
73	kfree(local_table);
74	return -ENOMEM;
75}
76#else
77
78struct fib_table *fib_new_table(struct net *net, u32 id)
79{
80	struct fib_table *tb;
81	unsigned int h;
82
83	if (id == 0)
84		id = RT_TABLE_MAIN;
85	tb = fib_get_table(net, id);
86	if (tb)
87		return tb;
88	tb = fib_hash_init(id);
89	if (!tb)
90		return NULL;
91	h = id & (FIB_TABLE_HASHSZ - 1);
92	hlist_add_head_rcu(&tb->tb_hlist, &net->ipv4.fib_table_hash[h]);
93	return tb;
94}
95
96struct fib_table *fib_get_table(struct net *net, u32 id)
97{
98	struct fib_table *tb;
99	struct hlist_node *node;
100	struct hlist_head *head;
101	unsigned int h;
102
103	if (id == 0)
104		id = RT_TABLE_MAIN;
105	h = id & (FIB_TABLE_HASHSZ - 1);
106
107	rcu_read_lock();
108	head = &net->ipv4.fib_table_hash[h];
109	hlist_for_each_entry_rcu(tb, node, head, tb_hlist) {
110		if (tb->tb_id == id) {
111			rcu_read_unlock();
112			return tb;
113		}
114	}
115	rcu_read_unlock();
116	return NULL;
117}
118#endif /* CONFIG_IP_MULTIPLE_TABLES */
119
120static void fib_flush(struct net *net)
121{
122	int flushed = 0;
123	struct fib_table *tb;
124	struct hlist_node *node;
125	struct hlist_head *head;
126	unsigned int h;
127
128	for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
129		head = &net->ipv4.fib_table_hash[h];
130		hlist_for_each_entry(tb, node, head, tb_hlist)
131			flushed += tb->tb_flush(tb);
132	}
133
134	if (flushed)
135		rt_cache_flush(-1);
136}
137
138/*
139 *	Find the first device with a given source address.
140 */
141
142struct net_device * ip_dev_find(__be32 addr)
143{
144	struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
145	struct fib_result res;
146	struct net_device *dev = NULL;
147	struct fib_table *local_table;
148
149#ifdef CONFIG_IP_MULTIPLE_TABLES
150	res.r = NULL;
151#endif
152
153	local_table = fib_get_table(&init_net, RT_TABLE_LOCAL);
154	if (!local_table || local_table->tb_lookup(local_table, &fl, &res))
155		return NULL;
156	if (res.type != RTN_LOCAL)
157		goto out;
158	dev = FIB_RES_DEV(res);
159
160	if (dev)
161		dev_hold(dev);
162out:
163	fib_res_put(&res);
164	return dev;
165}
166
167/*
168 * Find address type as if only "dev" was present in the system. If
169 * on_dev is NULL then all interfaces are taken into consideration.
170 */
171static inline unsigned __inet_dev_addr_type(struct net *net,
172					    const struct net_device *dev,
173					    __be32 addr)
174{
175	struct flowi		fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
176	struct fib_result	res;
177	unsigned ret = RTN_BROADCAST;
178	struct fib_table *local_table;
179
180	if (ipv4_is_zeronet(addr) || ipv4_is_badclass(addr))
181		return RTN_BROADCAST;
182	if (ipv4_is_multicast(addr))
183		return RTN_MULTICAST;
184
185#ifdef CONFIG_IP_MULTIPLE_TABLES
186	res.r = NULL;
187#endif
188
189	local_table = fib_get_table(net, RT_TABLE_LOCAL);
190	if (local_table) {
191		ret = RTN_UNICAST;
192		if (!local_table->tb_lookup(local_table, &fl, &res)) {
193			if (!dev || dev == res.fi->fib_dev)
194				ret = res.type;
195			fib_res_put(&res);
196		}
197	}
198	return ret;
199}
200
201unsigned int inet_addr_type(struct net *net, __be32 addr)
202{
203	return __inet_dev_addr_type(net, NULL, addr);
204}
205
206unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev,
207				__be32 addr)
208{
209       return __inet_dev_addr_type(net, dev, addr);
210}
211
212/* Given (packet source, input interface) and optional (dst, oif, tos):
213   - (main) check, that source is valid i.e. not broadcast or our local
214     address.
215   - figure out what "logical" interface this packet arrived
216     and calculate "specific destination" address.
217   - check, that packet arrived from expected physical interface.
218 */
219
220int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
221			struct net_device *dev, __be32 *spec_dst, u32 *itag)
222{
223	struct in_device *in_dev;
224	struct flowi fl = { .nl_u = { .ip4_u =
225				      { .daddr = src,
226					.saddr = dst,
227					.tos = tos } },
228			    .iif = oif };
229	struct fib_result res;
230	int no_addr, rpf;
231	int ret;
232
233	no_addr = rpf = 0;
234	rcu_read_lock();
235	in_dev = __in_dev_get_rcu(dev);
236	if (in_dev) {
237		no_addr = in_dev->ifa_list == NULL;
238		rpf = IN_DEV_RPFILTER(in_dev);
239	}
240	rcu_read_unlock();
241
242	if (in_dev == NULL)
243		goto e_inval;
244
245	if (fib_lookup(&fl, &res))
246		goto last_resort;
247	if (res.type != RTN_UNICAST)
248		goto e_inval_res;
249	*spec_dst = FIB_RES_PREFSRC(res);
250	fib_combine_itag(itag, &res);
251#ifdef CONFIG_IP_ROUTE_MULTIPATH
252	if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1)
253#else
254	if (FIB_RES_DEV(res) == dev)
255#endif
256	{
257		ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
258		fib_res_put(&res);
259		return ret;
260	}
261	fib_res_put(&res);
262	if (no_addr)
263		goto last_resort;
264	if (rpf)
265		goto e_inval;
266	fl.oif = dev->ifindex;
267
268	ret = 0;
269	if (fib_lookup(&fl, &res) == 0) {
270		if (res.type == RTN_UNICAST) {
271			*spec_dst = FIB_RES_PREFSRC(res);
272			ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
273		}
274		fib_res_put(&res);
275	}
276	return ret;
277
278last_resort:
279	if (rpf)
280		goto e_inval;
281	*spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
282	*itag = 0;
283	return 0;
284
285e_inval_res:
286	fib_res_put(&res);
287e_inval:
288	return -EINVAL;
289}
290
291static inline __be32 sk_extract_addr(struct sockaddr *addr)
292{
293	return ((struct sockaddr_in *) addr)->sin_addr.s_addr;
294}
295
296static int put_rtax(struct nlattr *mx, int len, int type, u32 value)
297{
298	struct nlattr *nla;
299
300	nla = (struct nlattr *) ((char *) mx + len);
301	nla->nla_type = type;
302	nla->nla_len = nla_attr_size(4);
303	*(u32 *) nla_data(nla) = value;
304
305	return len + nla_total_size(4);
306}
307
308static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt,
309				 struct fib_config *cfg)
310{
311	__be32 addr;
312	int plen;
313
314	memset(cfg, 0, sizeof(*cfg));
315	cfg->fc_nlinfo.nl_net = net;
316
317	if (rt->rt_dst.sa_family != AF_INET)
318		return -EAFNOSUPPORT;
319
320	/*
321	 * Check mask for validity:
322	 * a) it must be contiguous.
323	 * b) destination must have all host bits clear.
324	 * c) if application forgot to set correct family (AF_INET),
325	 *    reject request unless it is absolutely clear i.e.
326	 *    both family and mask are zero.
327	 */
328	plen = 32;
329	addr = sk_extract_addr(&rt->rt_dst);
330	if (!(rt->rt_flags & RTF_HOST)) {
331		__be32 mask = sk_extract_addr(&rt->rt_genmask);
332
333		if (rt->rt_genmask.sa_family != AF_INET) {
334			if (mask || rt->rt_genmask.sa_family)
335				return -EAFNOSUPPORT;
336		}
337
338		if (bad_mask(mask, addr))
339			return -EINVAL;
340
341		plen = inet_mask_len(mask);
342	}
343
344	cfg->fc_dst_len = plen;
345	cfg->fc_dst = addr;
346
347	if (cmd != SIOCDELRT) {
348		cfg->fc_nlflags = NLM_F_CREATE;
349		cfg->fc_protocol = RTPROT_BOOT;
350	}
351
352	if (rt->rt_metric)
353		cfg->fc_priority = rt->rt_metric - 1;
354
355	if (rt->rt_flags & RTF_REJECT) {
356		cfg->fc_scope = RT_SCOPE_HOST;
357		cfg->fc_type = RTN_UNREACHABLE;
358		return 0;
359	}
360
361	cfg->fc_scope = RT_SCOPE_NOWHERE;
362	cfg->fc_type = RTN_UNICAST;
363
364	if (rt->rt_dev) {
365		char *colon;
366		struct net_device *dev;
367		char devname[IFNAMSIZ];
368
369		if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1))
370			return -EFAULT;
371
372		devname[IFNAMSIZ-1] = 0;
373		colon = strchr(devname, ':');
374		if (colon)
375			*colon = 0;
376		dev = __dev_get_by_name(net, devname);
377		if (!dev)
378			return -ENODEV;
379		cfg->fc_oif = dev->ifindex;
380		if (colon) {
381			struct in_ifaddr *ifa;
382			struct in_device *in_dev = __in_dev_get_rtnl(dev);
383			if (!in_dev)
384				return -ENODEV;
385			*colon = ':';
386			for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
387				if (strcmp(ifa->ifa_label, devname) == 0)
388					break;
389			if (ifa == NULL)
390				return -ENODEV;
391			cfg->fc_prefsrc = ifa->ifa_local;
392		}
393	}
394
395	addr = sk_extract_addr(&rt->rt_gateway);
396	if (rt->rt_gateway.sa_family == AF_INET && addr) {
397		cfg->fc_gw = addr;
398		if (rt->rt_flags & RTF_GATEWAY &&
399		    inet_addr_type(net, addr) == RTN_UNICAST)
400			cfg->fc_scope = RT_SCOPE_UNIVERSE;
401	}
402
403	if (cmd == SIOCDELRT)
404		return 0;
405
406	if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw)
407		return -EINVAL;
408
409	if (cfg->fc_scope == RT_SCOPE_NOWHERE)
410		cfg->fc_scope = RT_SCOPE_LINK;
411
412	if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) {
413		struct nlattr *mx;
414		int len = 0;
415
416		mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL);
417		if (mx == NULL)
418			return -ENOMEM;
419
420		if (rt->rt_flags & RTF_MTU)
421			len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40);
422
423		if (rt->rt_flags & RTF_WINDOW)
424			len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window);
425
426		if (rt->rt_flags & RTF_IRTT)
427			len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3);
428
429		cfg->fc_mx = mx;
430		cfg->fc_mx_len = len;
431	}
432
433	return 0;
434}
435
436/*
437 *	Handle IP routing ioctl calls. These are used to manipulate the routing tables
438 */
439
440int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg)
441{
442	struct fib_config cfg;
443	struct rtentry rt;
444	int err;
445
446	switch (cmd) {
447	case SIOCADDRT:		/* Add a route */
448	case SIOCDELRT:		/* Delete a route */
449		if (!capable(CAP_NET_ADMIN))
450			return -EPERM;
451
452		if (copy_from_user(&rt, arg, sizeof(rt)))
453			return -EFAULT;
454
455		rtnl_lock();
456		err = rtentry_to_fib_config(net, cmd, &rt, &cfg);
457		if (err == 0) {
458			struct fib_table *tb;
459
460			if (cmd == SIOCDELRT) {
461				tb = fib_get_table(net, cfg.fc_table);
462				if (tb)
463					err = tb->tb_delete(tb, &cfg);
464				else
465					err = -ESRCH;
466			} else {
467				tb = fib_new_table(net, cfg.fc_table);
468				if (tb)
469					err = tb->tb_insert(tb, &cfg);
470				else
471					err = -ENOBUFS;
472			}
473
474			/* allocated by rtentry_to_fib_config() */
475			kfree(cfg.fc_mx);
476		}
477		rtnl_unlock();
478		return err;
479	}
480	return -EINVAL;
481}
482
483const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = {
484	[RTA_DST]		= { .type = NLA_U32 },
485	[RTA_SRC]		= { .type = NLA_U32 },
486	[RTA_IIF]		= { .type = NLA_U32 },
487	[RTA_OIF]		= { .type = NLA_U32 },
488	[RTA_GATEWAY]		= { .type = NLA_U32 },
489	[RTA_PRIORITY]		= { .type = NLA_U32 },
490	[RTA_PREFSRC]		= { .type = NLA_U32 },
491	[RTA_METRICS]		= { .type = NLA_NESTED },
492	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
493	[RTA_PROTOINFO]		= { .type = NLA_U32 },
494	[RTA_FLOW]		= { .type = NLA_U32 },
495};
496
497static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
498			    struct nlmsghdr *nlh, struct fib_config *cfg)
499{
500	struct nlattr *attr;
501	int err, remaining;
502	struct rtmsg *rtm;
503
504	err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy);
505	if (err < 0)
506		goto errout;
507
508	memset(cfg, 0, sizeof(*cfg));
509
510	rtm = nlmsg_data(nlh);
511	cfg->fc_dst_len = rtm->rtm_dst_len;
512	cfg->fc_tos = rtm->rtm_tos;
513	cfg->fc_table = rtm->rtm_table;
514	cfg->fc_protocol = rtm->rtm_protocol;
515	cfg->fc_scope = rtm->rtm_scope;
516	cfg->fc_type = rtm->rtm_type;
517	cfg->fc_flags = rtm->rtm_flags;
518	cfg->fc_nlflags = nlh->nlmsg_flags;
519
520	cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
521	cfg->fc_nlinfo.nlh = nlh;
522	cfg->fc_nlinfo.nl_net = net;
523
524	if (cfg->fc_type > RTN_MAX) {
525		err = -EINVAL;
526		goto errout;
527	}
528
529	nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) {
530		switch (nla_type(attr)) {
531		case RTA_DST:
532			cfg->fc_dst = nla_get_be32(attr);
533			break;
534		case RTA_OIF:
535			cfg->fc_oif = nla_get_u32(attr);
536			break;
537		case RTA_GATEWAY:
538			cfg->fc_gw = nla_get_be32(attr);
539			break;
540		case RTA_PRIORITY:
541			cfg->fc_priority = nla_get_u32(attr);
542			break;
543		case RTA_PREFSRC:
544			cfg->fc_prefsrc = nla_get_be32(attr);
545			break;
546		case RTA_METRICS:
547			cfg->fc_mx = nla_data(attr);
548			cfg->fc_mx_len = nla_len(attr);
549			break;
550		case RTA_MULTIPATH:
551			cfg->fc_mp = nla_data(attr);
552			cfg->fc_mp_len = nla_len(attr);
553			break;
554		case RTA_FLOW:
555			cfg->fc_flow = nla_get_u32(attr);
556			break;
557		case RTA_TABLE:
558			cfg->fc_table = nla_get_u32(attr);
559			break;
560		}
561	}
562
563	return 0;
564errout:
565	return err;
566}
567
568static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
569{
570	struct net *net = skb->sk->sk_net;
571	struct fib_config cfg;
572	struct fib_table *tb;
573	int err;
574
575	err = rtm_to_fib_config(net, skb, nlh, &cfg);
576	if (err < 0)
577		goto errout;
578
579	tb = fib_get_table(net, cfg.fc_table);
580	if (tb == NULL) {
581		err = -ESRCH;
582		goto errout;
583	}
584
585	err = tb->tb_delete(tb, &cfg);
586errout:
587	return err;
588}
589
590static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
591{
592	struct net *net = skb->sk->sk_net;
593	struct fib_config cfg;
594	struct fib_table *tb;
595	int err;
596
597	err = rtm_to_fib_config(net, skb, nlh, &cfg);
598	if (err < 0)
599		goto errout;
600
601	tb = fib_new_table(net, cfg.fc_table);
602	if (tb == NULL) {
603		err = -ENOBUFS;
604		goto errout;
605	}
606
607	err = tb->tb_insert(tb, &cfg);
608errout:
609	return err;
610}
611
612static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
613{
614	struct net *net = skb->sk->sk_net;
615	unsigned int h, s_h;
616	unsigned int e = 0, s_e;
617	struct fib_table *tb;
618	struct hlist_node *node;
619	struct hlist_head *head;
620	int dumped = 0;
621
622	if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
623	    ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
624		return ip_rt_dump(skb, cb);
625
626	s_h = cb->args[0];
627	s_e = cb->args[1];
628
629	for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
630		e = 0;
631		head = &net->ipv4.fib_table_hash[h];
632		hlist_for_each_entry(tb, node, head, tb_hlist) {
633			if (e < s_e)
634				goto next;
635			if (dumped)
636				memset(&cb->args[2], 0, sizeof(cb->args) -
637						 2 * sizeof(cb->args[0]));
638			if (tb->tb_dump(tb, skb, cb) < 0)
639				goto out;
640			dumped = 1;
641next:
642			e++;
643		}
644	}
645out:
646	cb->args[1] = e;
647	cb->args[0] = h;
648
649	return skb->len;
650}
651
652/* Prepare and feed intra-kernel routing request.
653   Really, it should be netlink message, but :-( netlink
654   can be not configured, so that we feed it directly
655   to fib engine. It is legal, because all events occur
656   only when netlink is already locked.
657 */
658
659static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
660{
661	struct net *net = ifa->ifa_dev->dev->nd_net;
662	struct fib_table *tb;
663	struct fib_config cfg = {
664		.fc_protocol = RTPROT_KERNEL,
665		.fc_type = type,
666		.fc_dst = dst,
667		.fc_dst_len = dst_len,
668		.fc_prefsrc = ifa->ifa_local,
669		.fc_oif = ifa->ifa_dev->dev->ifindex,
670		.fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
671		.fc_nlinfo = {
672			.nl_net = net,
673		},
674	};
675
676	if (type == RTN_UNICAST)
677		tb = fib_new_table(net, RT_TABLE_MAIN);
678	else
679		tb = fib_new_table(net, RT_TABLE_LOCAL);
680
681	if (tb == NULL)
682		return;
683
684	cfg.fc_table = tb->tb_id;
685
686	if (type != RTN_LOCAL)
687		cfg.fc_scope = RT_SCOPE_LINK;
688	else
689		cfg.fc_scope = RT_SCOPE_HOST;
690
691	if (cmd == RTM_NEWROUTE)
692		tb->tb_insert(tb, &cfg);
693	else
694		tb->tb_delete(tb, &cfg);
695}
696
697void fib_add_ifaddr(struct in_ifaddr *ifa)
698{
699	struct in_device *in_dev = ifa->ifa_dev;
700	struct net_device *dev = in_dev->dev;
701	struct in_ifaddr *prim = ifa;
702	__be32 mask = ifa->ifa_mask;
703	__be32 addr = ifa->ifa_local;
704	__be32 prefix = ifa->ifa_address&mask;
705
706	if (ifa->ifa_flags&IFA_F_SECONDARY) {
707		prim = inet_ifa_byprefix(in_dev, prefix, mask);
708		if (prim == NULL) {
709			printk(KERN_DEBUG "fib_add_ifaddr: bug: prim == NULL\n");
710			return;
711		}
712	}
713
714	fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
715
716	if (!(dev->flags&IFF_UP))
717		return;
718
719	/* Add broadcast address, if it is explicitly assigned. */
720	if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
721		fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
722
723	if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
724	    (prefix != addr || ifa->ifa_prefixlen < 32)) {
725		fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
726			  RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim);
727
728		/* Add network specific broadcasts, when it takes a sense */
729		if (ifa->ifa_prefixlen < 31) {
730			fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
731			fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim);
732		}
733	}
734}
735
736static void fib_del_ifaddr(struct in_ifaddr *ifa)
737{
738	struct in_device *in_dev = ifa->ifa_dev;
739	struct net_device *dev = in_dev->dev;
740	struct in_ifaddr *ifa1;
741	struct in_ifaddr *prim = ifa;
742	__be32 brd = ifa->ifa_address|~ifa->ifa_mask;
743	__be32 any = ifa->ifa_address&ifa->ifa_mask;
744#define LOCAL_OK	1
745#define BRD_OK		2
746#define BRD0_OK		4
747#define BRD1_OK		8
748	unsigned ok = 0;
749
750	if (!(ifa->ifa_flags&IFA_F_SECONDARY))
751		fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
752			  RTN_UNICAST, any, ifa->ifa_prefixlen, prim);
753	else {
754		prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
755		if (prim == NULL) {
756			printk(KERN_DEBUG "fib_del_ifaddr: bug: prim == NULL\n");
757			return;
758		}
759	}
760
761	/* Deletion is more complicated than add.
762	   We should take care of not to delete too much :-)
763
764	   Scan address list to be sure that addresses are really gone.
765	 */
766
767	for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
768		if (ifa->ifa_local == ifa1->ifa_local)
769			ok |= LOCAL_OK;
770		if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
771			ok |= BRD_OK;
772		if (brd == ifa1->ifa_broadcast)
773			ok |= BRD1_OK;
774		if (any == ifa1->ifa_broadcast)
775			ok |= BRD0_OK;
776	}
777
778	if (!(ok&BRD_OK))
779		fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
780	if (!(ok&BRD1_OK))
781		fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
782	if (!(ok&BRD0_OK))
783		fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
784	if (!(ok&LOCAL_OK)) {
785		fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
786
787		/* Check, that this local address finally disappeared. */
788		if (inet_addr_type(dev->nd_net, ifa->ifa_local) != RTN_LOCAL) {
789			/* And the last, but not the least thing.
790			   We must flush stray FIB entries.
791
792			   First of all, we scan fib_info list searching
793			   for stray nexthop entries, then ignite fib_flush.
794			*/
795			if (fib_sync_down(ifa->ifa_local, NULL, 0))
796				fib_flush(dev->nd_net);
797		}
798	}
799#undef LOCAL_OK
800#undef BRD_OK
801#undef BRD0_OK
802#undef BRD1_OK
803}
804
805static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
806{
807
808	struct fib_result       res;
809	struct flowi            fl = { .mark = frn->fl_mark,
810				       .nl_u = { .ip4_u = { .daddr = frn->fl_addr,
811							    .tos = frn->fl_tos,
812							    .scope = frn->fl_scope } } };
813
814#ifdef CONFIG_IP_MULTIPLE_TABLES
815	res.r = NULL;
816#endif
817
818	frn->err = -ENOENT;
819	if (tb) {
820		local_bh_disable();
821
822		frn->tb_id = tb->tb_id;
823		frn->err = tb->tb_lookup(tb, &fl, &res);
824
825		if (!frn->err) {
826			frn->prefixlen = res.prefixlen;
827			frn->nh_sel = res.nh_sel;
828			frn->type = res.type;
829			frn->scope = res.scope;
830			fib_res_put(&res);
831		}
832		local_bh_enable();
833	}
834}
835
836static void nl_fib_input(struct sk_buff *skb)
837{
838	struct net *net;
839	struct fib_result_nl *frn;
840	struct nlmsghdr *nlh;
841	struct fib_table *tb;
842	u32 pid;
843
844	net = skb->sk->sk_net;
845	nlh = nlmsg_hdr(skb);
846	if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
847	    nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn)))
848		return;
849
850	skb = skb_clone(skb, GFP_KERNEL);
851	if (skb == NULL)
852		return;
853	nlh = nlmsg_hdr(skb);
854
855	frn = (struct fib_result_nl *) NLMSG_DATA(nlh);
856	tb = fib_get_table(net, frn->tb_id_in);
857
858	nl_fib_lookup(frn, tb);
859
860	pid = NETLINK_CB(skb).pid;       /* pid of sending process */
861	NETLINK_CB(skb).pid = 0;         /* from kernel */
862	NETLINK_CB(skb).dst_group = 0;  /* unicast */
863	netlink_unicast(net->ipv4.fibnl, skb, pid, MSG_DONTWAIT);
864}
865
866static int nl_fib_lookup_init(struct net *net)
867{
868	struct sock *sk;
869	sk = netlink_kernel_create(net, NETLINK_FIB_LOOKUP, 0,
870				   nl_fib_input, NULL, THIS_MODULE);
871	if (sk == NULL)
872		return -EAFNOSUPPORT;
873	/* Don't hold an extra reference on the namespace */
874	put_net(sk->sk_net);
875	net->ipv4.fibnl = sk;
876	return 0;
877}
878
879static void nl_fib_lookup_exit(struct net *net)
880{
881	/* At the last minute lie and say this is a socket for the
882	 * initial network namespace. So the socket will  be safe to free.
883	 */
884	net->ipv4.fibnl->sk_net = get_net(&init_net);
885	sock_put(net->ipv4.fibnl);
886}
887
888static void fib_disable_ip(struct net_device *dev, int force)
889{
890	if (fib_sync_down(0, dev, force))
891		fib_flush(dev->nd_net);
892	rt_cache_flush(0);
893	arp_ifdown(dev);
894}
895
896static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
897{
898	struct in_ifaddr *ifa = (struct in_ifaddr*)ptr;
899
900	switch (event) {
901	case NETDEV_UP:
902		fib_add_ifaddr(ifa);
903#ifdef CONFIG_IP_ROUTE_MULTIPATH
904		fib_sync_up(ifa->ifa_dev->dev);
905#endif
906		rt_cache_flush(-1);
907		break;
908	case NETDEV_DOWN:
909		fib_del_ifaddr(ifa);
910		if (ifa->ifa_dev->ifa_list == NULL) {
911			/* Last address was deleted from this interface.
912			   Disable IP.
913			 */
914			fib_disable_ip(ifa->ifa_dev->dev, 1);
915		} else {
916			rt_cache_flush(-1);
917		}
918		break;
919	}
920	return NOTIFY_DONE;
921}
922
923static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
924{
925	struct net_device *dev = ptr;
926	struct in_device *in_dev = __in_dev_get_rtnl(dev);
927
928	if (event == NETDEV_UNREGISTER) {
929		fib_disable_ip(dev, 2);
930		return NOTIFY_DONE;
931	}
932
933	if (!in_dev)
934		return NOTIFY_DONE;
935
936	switch (event) {
937	case NETDEV_UP:
938		for_ifa(in_dev) {
939			fib_add_ifaddr(ifa);
940		} endfor_ifa(in_dev);
941#ifdef CONFIG_IP_ROUTE_MULTIPATH
942		fib_sync_up(dev);
943#endif
944		rt_cache_flush(-1);
945		break;
946	case NETDEV_DOWN:
947		fib_disable_ip(dev, 0);
948		break;
949	case NETDEV_CHANGEMTU:
950	case NETDEV_CHANGE:
951		rt_cache_flush(0);
952		break;
953	}
954	return NOTIFY_DONE;
955}
956
957static struct notifier_block fib_inetaddr_notifier = {
958	.notifier_call =fib_inetaddr_event,
959};
960
961static struct notifier_block fib_netdev_notifier = {
962	.notifier_call =fib_netdev_event,
963};
964
965static int __net_init ip_fib_net_init(struct net *net)
966{
967	unsigned int i;
968
969	net->ipv4.fib_table_hash = kzalloc(
970			sizeof(struct hlist_head)*FIB_TABLE_HASHSZ, GFP_KERNEL);
971	if (net->ipv4.fib_table_hash == NULL)
972		return -ENOMEM;
973
974	for (i = 0; i < FIB_TABLE_HASHSZ; i++)
975		INIT_HLIST_HEAD(&net->ipv4.fib_table_hash[i]);
976
977	return fib4_rules_init(net);
978}
979
980static void __net_exit ip_fib_net_exit(struct net *net)
981{
982	unsigned int i;
983
984#ifdef CONFIG_IP_MULTIPLE_TABLES
985	fib4_rules_exit(net);
986#endif
987
988	for (i = 0; i < FIB_TABLE_HASHSZ; i++) {
989		struct fib_table *tb;
990		struct hlist_head *head;
991		struct hlist_node *node, *tmp;
992
993		head = &net->ipv4.fib_table_hash[i];
994		hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) {
995			hlist_del(node);
996			tb->tb_flush(tb);
997			kfree(tb);
998		}
999	}
1000	kfree(net->ipv4.fib_table_hash);
1001}
1002
1003static int __net_init fib_net_init(struct net *net)
1004{
1005	int error;
1006
1007	error = ip_fib_net_init(net);
1008	if (error < 0)
1009		goto out;
1010	error = nl_fib_lookup_init(net);
1011	if (error < 0)
1012		goto out_nlfl;
1013	error = fib_proc_init(net);
1014	if (error < 0)
1015		goto out_proc;
1016out:
1017	return error;
1018
1019out_proc:
1020	nl_fib_lookup_exit(net);
1021out_nlfl:
1022	ip_fib_net_exit(net);
1023	goto out;
1024}
1025
1026static void __net_exit fib_net_exit(struct net *net)
1027{
1028	fib_proc_exit(net);
1029	nl_fib_lookup_exit(net);
1030	ip_fib_net_exit(net);
1031}
1032
1033static struct pernet_operations fib_net_ops = {
1034	.init = fib_net_init,
1035	.exit = fib_net_exit,
1036};
1037
1038void __init ip_fib_init(void)
1039{
1040	rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL);
1041	rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL);
1042	rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib);
1043
1044	register_pernet_subsys(&fib_net_ops);
1045	register_netdevice_notifier(&fib_netdev_notifier);
1046	register_inetaddr_notifier(&fib_inetaddr_notifier);
1047}
1048
1049EXPORT_SYMBOL(inet_addr_type);
1050EXPORT_SYMBOL(inet_dev_addr_type);
1051EXPORT_SYMBOL(ip_dev_find);
1052