fib_frontend.c revision dbb50165b512f6c9b7aae10af73ae5b6d811f4d0
1/*
2 * INET		An implementation of the TCP/IP protocol suite for the LINUX
3 *		operating system.  INET is implemented using the  BSD Socket
4 *		interface as the means of communication with the user level.
5 *
6 *		IPv4 Forwarding Information Base: FIB frontend.
7 *
8 * Version:	$Id: fib_frontend.c,v 1.26 2001/10/31 21:55:54 davem Exp $
9 *
10 * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
11 *
12 *		This program is free software; you can redistribute it and/or
13 *		modify it under the terms of the GNU General Public License
14 *		as published by the Free Software Foundation; either version
15 *		2 of the License, or (at your option) any later version.
16 */
17
18#include <linux/module.h>
19#include <asm/uaccess.h>
20#include <asm/system.h>
21#include <linux/bitops.h>
22#include <linux/capability.h>
23#include <linux/types.h>
24#include <linux/kernel.h>
25#include <linux/mm.h>
26#include <linux/string.h>
27#include <linux/socket.h>
28#include <linux/sockios.h>
29#include <linux/errno.h>
30#include <linux/in.h>
31#include <linux/inet.h>
32#include <linux/inetdevice.h>
33#include <linux/netdevice.h>
34#include <linux/if_addr.h>
35#include <linux/if_arp.h>
36#include <linux/skbuff.h>
37#include <linux/init.h>
38#include <linux/list.h>
39
40#include <net/ip.h>
41#include <net/protocol.h>
42#include <net/route.h>
43#include <net/tcp.h>
44#include <net/sock.h>
45#include <net/icmp.h>
46#include <net/arp.h>
47#include <net/ip_fib.h>
48#include <net/rtnetlink.h>
49
50#define FFprint(a...) printk(KERN_DEBUG a)
51
52static struct sock *fibnl;
53
54#ifndef CONFIG_IP_MULTIPLE_TABLES
55
56struct fib_table *ip_fib_local_table;
57struct fib_table *ip_fib_main_table;
58
59#define FIB_TABLE_HASHSZ 1
60static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
61
62static int __init fib4_rules_init(void)
63{
64	ip_fib_local_table = fib_hash_init(RT_TABLE_LOCAL);
65	if (ip_fib_local_table == NULL)
66		return -ENOMEM;
67
68	ip_fib_main_table  = fib_hash_init(RT_TABLE_MAIN);
69	if (ip_fib_main_table == NULL)
70		goto fail;
71
72	hlist_add_head_rcu(&ip_fib_local_table->tb_hlist, &fib_table_hash[0]);
73	hlist_add_head_rcu(&ip_fib_main_table->tb_hlist, &fib_table_hash[0]);
74	return 0;
75
76fail:
77	kfree(ip_fib_local_table);
78	ip_fib_local_table = NULL;
79	return -ENOMEM;
80}
81#else
82
83#define FIB_TABLE_HASHSZ 256
84static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
85
86struct fib_table *fib_new_table(u32 id)
87{
88	struct fib_table *tb;
89	unsigned int h;
90
91	if (id == 0)
92		id = RT_TABLE_MAIN;
93	tb = fib_get_table(id);
94	if (tb)
95		return tb;
96	tb = fib_hash_init(id);
97	if (!tb)
98		return NULL;
99	h = id & (FIB_TABLE_HASHSZ - 1);
100	hlist_add_head_rcu(&tb->tb_hlist, &fib_table_hash[h]);
101	return tb;
102}
103
104struct fib_table *fib_get_table(u32 id)
105{
106	struct fib_table *tb;
107	struct hlist_node *node;
108	unsigned int h;
109
110	if (id == 0)
111		id = RT_TABLE_MAIN;
112	h = id & (FIB_TABLE_HASHSZ - 1);
113	rcu_read_lock();
114	hlist_for_each_entry_rcu(tb, node, &fib_table_hash[h], tb_hlist) {
115		if (tb->tb_id == id) {
116			rcu_read_unlock();
117			return tb;
118		}
119	}
120	rcu_read_unlock();
121	return NULL;
122}
123#endif /* CONFIG_IP_MULTIPLE_TABLES */
124
125static void fib_flush(void)
126{
127	int flushed = 0;
128	struct fib_table *tb;
129	struct hlist_node *node;
130	unsigned int h;
131
132	for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
133		hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist)
134			flushed += tb->tb_flush(tb);
135	}
136
137	if (flushed)
138		rt_cache_flush(-1);
139}
140
141/*
142 *	Find the first device with a given source address.
143 */
144
145struct net_device * ip_dev_find(__be32 addr)
146{
147	struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
148	struct fib_result res;
149	struct net_device *dev = NULL;
150	struct fib_table *local_table;
151
152#ifdef CONFIG_IP_MULTIPLE_TABLES
153	res.r = NULL;
154#endif
155
156	local_table = fib_get_table(RT_TABLE_LOCAL);
157	if (!local_table || local_table->tb_lookup(local_table, &fl, &res))
158		return NULL;
159	if (res.type != RTN_LOCAL)
160		goto out;
161	dev = FIB_RES_DEV(res);
162
163	if (dev)
164		dev_hold(dev);
165out:
166	fib_res_put(&res);
167	return dev;
168}
169
170/*
171 * Find address type as if only "dev" was present in the system. If
172 * on_dev is NULL then all interfaces are taken into consideration.
173 */
174static inline unsigned __inet_dev_addr_type(const struct net_device *dev,
175					    __be32 addr)
176{
177	struct flowi		fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
178	struct fib_result	res;
179	unsigned ret = RTN_BROADCAST;
180	struct fib_table *local_table;
181
182	if (ipv4_is_zeronet(addr) || ipv4_is_badclass(addr))
183		return RTN_BROADCAST;
184	if (ipv4_is_multicast(addr))
185		return RTN_MULTICAST;
186
187#ifdef CONFIG_IP_MULTIPLE_TABLES
188	res.r = NULL;
189#endif
190
191	local_table = fib_get_table(RT_TABLE_LOCAL);
192	if (local_table) {
193		ret = RTN_UNICAST;
194		if (!local_table->tb_lookup(local_table, &fl, &res)) {
195			if (!dev || dev == res.fi->fib_dev)
196				ret = res.type;
197			fib_res_put(&res);
198		}
199	}
200	return ret;
201}
202
203unsigned int inet_addr_type(__be32 addr)
204{
205	return __inet_dev_addr_type(NULL, addr);
206}
207
208unsigned int inet_dev_addr_type(const struct net_device *dev, __be32 addr)
209{
210       return __inet_dev_addr_type(dev, addr);
211}
212
213/* Given (packet source, input interface) and optional (dst, oif, tos):
214   - (main) check, that source is valid i.e. not broadcast or our local
215     address.
216   - figure out what "logical" interface this packet arrived
217     and calculate "specific destination" address.
218   - check, that packet arrived from expected physical interface.
219 */
220
221int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
222			struct net_device *dev, __be32 *spec_dst, u32 *itag)
223{
224	struct in_device *in_dev;
225	struct flowi fl = { .nl_u = { .ip4_u =
226				      { .daddr = src,
227					.saddr = dst,
228					.tos = tos } },
229			    .iif = oif };
230	struct fib_result res;
231	int no_addr, rpf;
232	int ret;
233
234	no_addr = rpf = 0;
235	rcu_read_lock();
236	in_dev = __in_dev_get_rcu(dev);
237	if (in_dev) {
238		no_addr = in_dev->ifa_list == NULL;
239		rpf = IN_DEV_RPFILTER(in_dev);
240	}
241	rcu_read_unlock();
242
243	if (in_dev == NULL)
244		goto e_inval;
245
246	if (fib_lookup(&fl, &res))
247		goto last_resort;
248	if (res.type != RTN_UNICAST)
249		goto e_inval_res;
250	*spec_dst = FIB_RES_PREFSRC(res);
251	fib_combine_itag(itag, &res);
252#ifdef CONFIG_IP_ROUTE_MULTIPATH
253	if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1)
254#else
255	if (FIB_RES_DEV(res) == dev)
256#endif
257	{
258		ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
259		fib_res_put(&res);
260		return ret;
261	}
262	fib_res_put(&res);
263	if (no_addr)
264		goto last_resort;
265	if (rpf)
266		goto e_inval;
267	fl.oif = dev->ifindex;
268
269	ret = 0;
270	if (fib_lookup(&fl, &res) == 0) {
271		if (res.type == RTN_UNICAST) {
272			*spec_dst = FIB_RES_PREFSRC(res);
273			ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
274		}
275		fib_res_put(&res);
276	}
277	return ret;
278
279last_resort:
280	if (rpf)
281		goto e_inval;
282	*spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
283	*itag = 0;
284	return 0;
285
286e_inval_res:
287	fib_res_put(&res);
288e_inval:
289	return -EINVAL;
290}
291
292static inline __be32 sk_extract_addr(struct sockaddr *addr)
293{
294	return ((struct sockaddr_in *) addr)->sin_addr.s_addr;
295}
296
297static int put_rtax(struct nlattr *mx, int len, int type, u32 value)
298{
299	struct nlattr *nla;
300
301	nla = (struct nlattr *) ((char *) mx + len);
302	nla->nla_type = type;
303	nla->nla_len = nla_attr_size(4);
304	*(u32 *) nla_data(nla) = value;
305
306	return len + nla_total_size(4);
307}
308
309static int rtentry_to_fib_config(int cmd, struct rtentry *rt,
310				 struct fib_config *cfg)
311{
312	__be32 addr;
313	int plen;
314
315	memset(cfg, 0, sizeof(*cfg));
316
317	if (rt->rt_dst.sa_family != AF_INET)
318		return -EAFNOSUPPORT;
319
320	/*
321	 * Check mask for validity:
322	 * a) it must be contiguous.
323	 * b) destination must have all host bits clear.
324	 * c) if application forgot to set correct family (AF_INET),
325	 *    reject request unless it is absolutely clear i.e.
326	 *    both family and mask are zero.
327	 */
328	plen = 32;
329	addr = sk_extract_addr(&rt->rt_dst);
330	if (!(rt->rt_flags & RTF_HOST)) {
331		__be32 mask = sk_extract_addr(&rt->rt_genmask);
332
333		if (rt->rt_genmask.sa_family != AF_INET) {
334			if (mask || rt->rt_genmask.sa_family)
335				return -EAFNOSUPPORT;
336		}
337
338		if (bad_mask(mask, addr))
339			return -EINVAL;
340
341		plen = inet_mask_len(mask);
342	}
343
344	cfg->fc_dst_len = plen;
345	cfg->fc_dst = addr;
346
347	if (cmd != SIOCDELRT) {
348		cfg->fc_nlflags = NLM_F_CREATE;
349		cfg->fc_protocol = RTPROT_BOOT;
350	}
351
352	if (rt->rt_metric)
353		cfg->fc_priority = rt->rt_metric - 1;
354
355	if (rt->rt_flags & RTF_REJECT) {
356		cfg->fc_scope = RT_SCOPE_HOST;
357		cfg->fc_type = RTN_UNREACHABLE;
358		return 0;
359	}
360
361	cfg->fc_scope = RT_SCOPE_NOWHERE;
362	cfg->fc_type = RTN_UNICAST;
363
364	if (rt->rt_dev) {
365		char *colon;
366		struct net_device *dev;
367		char devname[IFNAMSIZ];
368
369		if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1))
370			return -EFAULT;
371
372		devname[IFNAMSIZ-1] = 0;
373		colon = strchr(devname, ':');
374		if (colon)
375			*colon = 0;
376		dev = __dev_get_by_name(&init_net, devname);
377		if (!dev)
378			return -ENODEV;
379		cfg->fc_oif = dev->ifindex;
380		if (colon) {
381			struct in_ifaddr *ifa;
382			struct in_device *in_dev = __in_dev_get_rtnl(dev);
383			if (!in_dev)
384				return -ENODEV;
385			*colon = ':';
386			for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
387				if (strcmp(ifa->ifa_label, devname) == 0)
388					break;
389			if (ifa == NULL)
390				return -ENODEV;
391			cfg->fc_prefsrc = ifa->ifa_local;
392		}
393	}
394
395	addr = sk_extract_addr(&rt->rt_gateway);
396	if (rt->rt_gateway.sa_family == AF_INET && addr) {
397		cfg->fc_gw = addr;
398		if (rt->rt_flags & RTF_GATEWAY &&
399		    inet_addr_type(addr) == RTN_UNICAST)
400			cfg->fc_scope = RT_SCOPE_UNIVERSE;
401	}
402
403	if (cmd == SIOCDELRT)
404		return 0;
405
406	if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw)
407		return -EINVAL;
408
409	if (cfg->fc_scope == RT_SCOPE_NOWHERE)
410		cfg->fc_scope = RT_SCOPE_LINK;
411
412	if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) {
413		struct nlattr *mx;
414		int len = 0;
415
416		mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL);
417		if (mx == NULL)
418			return -ENOMEM;
419
420		if (rt->rt_flags & RTF_MTU)
421			len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40);
422
423		if (rt->rt_flags & RTF_WINDOW)
424			len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window);
425
426		if (rt->rt_flags & RTF_IRTT)
427			len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3);
428
429		cfg->fc_mx = mx;
430		cfg->fc_mx_len = len;
431	}
432
433	return 0;
434}
435
436/*
437 *	Handle IP routing ioctl calls. These are used to manipulate the routing tables
438 */
439
440int ip_rt_ioctl(unsigned int cmd, void __user *arg)
441{
442	struct fib_config cfg;
443	struct rtentry rt;
444	int err;
445
446	switch (cmd) {
447	case SIOCADDRT:		/* Add a route */
448	case SIOCDELRT:		/* Delete a route */
449		if (!capable(CAP_NET_ADMIN))
450			return -EPERM;
451
452		if (copy_from_user(&rt, arg, sizeof(rt)))
453			return -EFAULT;
454
455		rtnl_lock();
456		err = rtentry_to_fib_config(cmd, &rt, &cfg);
457		if (err == 0) {
458			struct fib_table *tb;
459
460			if (cmd == SIOCDELRT) {
461				tb = fib_get_table(cfg.fc_table);
462				if (tb)
463					err = tb->tb_delete(tb, &cfg);
464				else
465					err = -ESRCH;
466			} else {
467				tb = fib_new_table(cfg.fc_table);
468				if (tb)
469					err = tb->tb_insert(tb, &cfg);
470				else
471					err = -ENOBUFS;
472			}
473
474			/* allocated by rtentry_to_fib_config() */
475			kfree(cfg.fc_mx);
476		}
477		rtnl_unlock();
478		return err;
479	}
480	return -EINVAL;
481}
482
483const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = {
484	[RTA_DST]		= { .type = NLA_U32 },
485	[RTA_SRC]		= { .type = NLA_U32 },
486	[RTA_IIF]		= { .type = NLA_U32 },
487	[RTA_OIF]		= { .type = NLA_U32 },
488	[RTA_GATEWAY]		= { .type = NLA_U32 },
489	[RTA_PRIORITY]		= { .type = NLA_U32 },
490	[RTA_PREFSRC]		= { .type = NLA_U32 },
491	[RTA_METRICS]		= { .type = NLA_NESTED },
492	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
493	[RTA_PROTOINFO]		= { .type = NLA_U32 },
494	[RTA_FLOW]		= { .type = NLA_U32 },
495};
496
497static int rtm_to_fib_config(struct sk_buff *skb, struct nlmsghdr *nlh,
498			     struct fib_config *cfg)
499{
500	struct nlattr *attr;
501	int err, remaining;
502	struct rtmsg *rtm;
503
504	err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy);
505	if (err < 0)
506		goto errout;
507
508	memset(cfg, 0, sizeof(*cfg));
509
510	rtm = nlmsg_data(nlh);
511	cfg->fc_dst_len = rtm->rtm_dst_len;
512	cfg->fc_tos = rtm->rtm_tos;
513	cfg->fc_table = rtm->rtm_table;
514	cfg->fc_protocol = rtm->rtm_protocol;
515	cfg->fc_scope = rtm->rtm_scope;
516	cfg->fc_type = rtm->rtm_type;
517	cfg->fc_flags = rtm->rtm_flags;
518	cfg->fc_nlflags = nlh->nlmsg_flags;
519
520	cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
521	cfg->fc_nlinfo.nlh = nlh;
522
523	if (cfg->fc_type > RTN_MAX) {
524		err = -EINVAL;
525		goto errout;
526	}
527
528	nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) {
529		switch (nla_type(attr)) {
530		case RTA_DST:
531			cfg->fc_dst = nla_get_be32(attr);
532			break;
533		case RTA_OIF:
534			cfg->fc_oif = nla_get_u32(attr);
535			break;
536		case RTA_GATEWAY:
537			cfg->fc_gw = nla_get_be32(attr);
538			break;
539		case RTA_PRIORITY:
540			cfg->fc_priority = nla_get_u32(attr);
541			break;
542		case RTA_PREFSRC:
543			cfg->fc_prefsrc = nla_get_be32(attr);
544			break;
545		case RTA_METRICS:
546			cfg->fc_mx = nla_data(attr);
547			cfg->fc_mx_len = nla_len(attr);
548			break;
549		case RTA_MULTIPATH:
550			cfg->fc_mp = nla_data(attr);
551			cfg->fc_mp_len = nla_len(attr);
552			break;
553		case RTA_FLOW:
554			cfg->fc_flow = nla_get_u32(attr);
555			break;
556		case RTA_TABLE:
557			cfg->fc_table = nla_get_u32(attr);
558			break;
559		}
560	}
561
562	return 0;
563errout:
564	return err;
565}
566
567static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
568{
569	struct net *net = skb->sk->sk_net;
570	struct fib_config cfg;
571	struct fib_table *tb;
572	int err;
573
574	if (net != &init_net)
575		return -EINVAL;
576
577	err = rtm_to_fib_config(skb, nlh, &cfg);
578	if (err < 0)
579		goto errout;
580
581	tb = fib_get_table(cfg.fc_table);
582	if (tb == NULL) {
583		err = -ESRCH;
584		goto errout;
585	}
586
587	err = tb->tb_delete(tb, &cfg);
588errout:
589	return err;
590}
591
592static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
593{
594	struct net *net = skb->sk->sk_net;
595	struct fib_config cfg;
596	struct fib_table *tb;
597	int err;
598
599	if (net != &init_net)
600		return -EINVAL;
601
602	err = rtm_to_fib_config(skb, nlh, &cfg);
603	if (err < 0)
604		goto errout;
605
606	tb = fib_new_table(cfg.fc_table);
607	if (tb == NULL) {
608		err = -ENOBUFS;
609		goto errout;
610	}
611
612	err = tb->tb_insert(tb, &cfg);
613errout:
614	return err;
615}
616
617static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
618{
619	struct net *net = skb->sk->sk_net;
620	unsigned int h, s_h;
621	unsigned int e = 0, s_e;
622	struct fib_table *tb;
623	struct hlist_node *node;
624	int dumped = 0;
625
626	if (net != &init_net)
627		return 0;
628
629	if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
630	    ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
631		return ip_rt_dump(skb, cb);
632
633	s_h = cb->args[0];
634	s_e = cb->args[1];
635
636	for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
637		e = 0;
638		hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist) {
639			if (e < s_e)
640				goto next;
641			if (dumped)
642				memset(&cb->args[2], 0, sizeof(cb->args) -
643						 2 * sizeof(cb->args[0]));
644			if (tb->tb_dump(tb, skb, cb) < 0)
645				goto out;
646			dumped = 1;
647next:
648			e++;
649		}
650	}
651out:
652	cb->args[1] = e;
653	cb->args[0] = h;
654
655	return skb->len;
656}
657
658/* Prepare and feed intra-kernel routing request.
659   Really, it should be netlink message, but :-( netlink
660   can be not configured, so that we feed it directly
661   to fib engine. It is legal, because all events occur
662   only when netlink is already locked.
663 */
664
665static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
666{
667	struct fib_table *tb;
668	struct fib_config cfg = {
669		.fc_protocol = RTPROT_KERNEL,
670		.fc_type = type,
671		.fc_dst = dst,
672		.fc_dst_len = dst_len,
673		.fc_prefsrc = ifa->ifa_local,
674		.fc_oif = ifa->ifa_dev->dev->ifindex,
675		.fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
676	};
677
678	if (type == RTN_UNICAST)
679		tb = fib_new_table(RT_TABLE_MAIN);
680	else
681		tb = fib_new_table(RT_TABLE_LOCAL);
682
683	if (tb == NULL)
684		return;
685
686	cfg.fc_table = tb->tb_id;
687
688	if (type != RTN_LOCAL)
689		cfg.fc_scope = RT_SCOPE_LINK;
690	else
691		cfg.fc_scope = RT_SCOPE_HOST;
692
693	if (cmd == RTM_NEWROUTE)
694		tb->tb_insert(tb, &cfg);
695	else
696		tb->tb_delete(tb, &cfg);
697}
698
699void fib_add_ifaddr(struct in_ifaddr *ifa)
700{
701	struct in_device *in_dev = ifa->ifa_dev;
702	struct net_device *dev = in_dev->dev;
703	struct in_ifaddr *prim = ifa;
704	__be32 mask = ifa->ifa_mask;
705	__be32 addr = ifa->ifa_local;
706	__be32 prefix = ifa->ifa_address&mask;
707
708	if (ifa->ifa_flags&IFA_F_SECONDARY) {
709		prim = inet_ifa_byprefix(in_dev, prefix, mask);
710		if (prim == NULL) {
711			printk(KERN_DEBUG "fib_add_ifaddr: bug: prim == NULL\n");
712			return;
713		}
714	}
715
716	fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
717
718	if (!(dev->flags&IFF_UP))
719		return;
720
721	/* Add broadcast address, if it is explicitly assigned. */
722	if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
723		fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
724
725	if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
726	    (prefix != addr || ifa->ifa_prefixlen < 32)) {
727		fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
728			  RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim);
729
730		/* Add network specific broadcasts, when it takes a sense */
731		if (ifa->ifa_prefixlen < 31) {
732			fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
733			fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim);
734		}
735	}
736}
737
738static void fib_del_ifaddr(struct in_ifaddr *ifa)
739{
740	struct in_device *in_dev = ifa->ifa_dev;
741	struct net_device *dev = in_dev->dev;
742	struct in_ifaddr *ifa1;
743	struct in_ifaddr *prim = ifa;
744	__be32 brd = ifa->ifa_address|~ifa->ifa_mask;
745	__be32 any = ifa->ifa_address&ifa->ifa_mask;
746#define LOCAL_OK	1
747#define BRD_OK		2
748#define BRD0_OK		4
749#define BRD1_OK		8
750	unsigned ok = 0;
751
752	if (!(ifa->ifa_flags&IFA_F_SECONDARY))
753		fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
754			  RTN_UNICAST, any, ifa->ifa_prefixlen, prim);
755	else {
756		prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
757		if (prim == NULL) {
758			printk(KERN_DEBUG "fib_del_ifaddr: bug: prim == NULL\n");
759			return;
760		}
761	}
762
763	/* Deletion is more complicated than add.
764	   We should take care of not to delete too much :-)
765
766	   Scan address list to be sure that addresses are really gone.
767	 */
768
769	for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
770		if (ifa->ifa_local == ifa1->ifa_local)
771			ok |= LOCAL_OK;
772		if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
773			ok |= BRD_OK;
774		if (brd == ifa1->ifa_broadcast)
775			ok |= BRD1_OK;
776		if (any == ifa1->ifa_broadcast)
777			ok |= BRD0_OK;
778	}
779
780	if (!(ok&BRD_OK))
781		fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
782	if (!(ok&BRD1_OK))
783		fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
784	if (!(ok&BRD0_OK))
785		fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
786	if (!(ok&LOCAL_OK)) {
787		fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
788
789		/* Check, that this local address finally disappeared. */
790		if (inet_addr_type(ifa->ifa_local) != RTN_LOCAL) {
791			/* And the last, but not the least thing.
792			   We must flush stray FIB entries.
793
794			   First of all, we scan fib_info list searching
795			   for stray nexthop entries, then ignite fib_flush.
796			*/
797			if (fib_sync_down(ifa->ifa_local, NULL, 0))
798				fib_flush();
799		}
800	}
801#undef LOCAL_OK
802#undef BRD_OK
803#undef BRD0_OK
804#undef BRD1_OK
805}
806
807static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
808{
809
810	struct fib_result       res;
811	struct flowi            fl = { .mark = frn->fl_mark,
812				       .nl_u = { .ip4_u = { .daddr = frn->fl_addr,
813							    .tos = frn->fl_tos,
814							    .scope = frn->fl_scope } } };
815
816#ifdef CONFIG_IP_MULTIPLE_TABLES
817	res.r = NULL;
818#endif
819
820	frn->err = -ENOENT;
821	if (tb) {
822		local_bh_disable();
823
824		frn->tb_id = tb->tb_id;
825		frn->err = tb->tb_lookup(tb, &fl, &res);
826
827		if (!frn->err) {
828			frn->prefixlen = res.prefixlen;
829			frn->nh_sel = res.nh_sel;
830			frn->type = res.type;
831			frn->scope = res.scope;
832			fib_res_put(&res);
833		}
834		local_bh_enable();
835	}
836}
837
838static void nl_fib_input(struct sk_buff *skb)
839{
840	struct fib_result_nl *frn;
841	struct nlmsghdr *nlh;
842	struct fib_table *tb;
843	u32 pid;
844
845	nlh = nlmsg_hdr(skb);
846	if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
847	    nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn)))
848		return;
849
850	skb = skb_clone(skb, GFP_KERNEL);
851	if (skb == NULL)
852		return;
853	nlh = nlmsg_hdr(skb);
854
855	frn = (struct fib_result_nl *) NLMSG_DATA(nlh);
856	tb = fib_get_table(frn->tb_id_in);
857
858	nl_fib_lookup(frn, tb);
859
860	pid = NETLINK_CB(skb).pid;       /* pid of sending process */
861	NETLINK_CB(skb).pid = 0;         /* from kernel */
862	NETLINK_CB(skb).dst_group = 0;  /* unicast */
863	netlink_unicast(fibnl, skb, pid, MSG_DONTWAIT);
864}
865
866static void nl_fib_lookup_init(void)
867{
868	fibnl = netlink_kernel_create(&init_net, NETLINK_FIB_LOOKUP, 0,
869				      nl_fib_input, NULL, THIS_MODULE);
870}
871
872static void fib_disable_ip(struct net_device *dev, int force)
873{
874	if (fib_sync_down(0, dev, force))
875		fib_flush();
876	rt_cache_flush(0);
877	arp_ifdown(dev);
878}
879
880static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
881{
882	struct in_ifaddr *ifa = (struct in_ifaddr*)ptr;
883
884	switch (event) {
885	case NETDEV_UP:
886		fib_add_ifaddr(ifa);
887#ifdef CONFIG_IP_ROUTE_MULTIPATH
888		fib_sync_up(ifa->ifa_dev->dev);
889#endif
890		rt_cache_flush(-1);
891		break;
892	case NETDEV_DOWN:
893		fib_del_ifaddr(ifa);
894		if (ifa->ifa_dev->ifa_list == NULL) {
895			/* Last address was deleted from this interface.
896			   Disable IP.
897			 */
898			fib_disable_ip(ifa->ifa_dev->dev, 1);
899		} else {
900			rt_cache_flush(-1);
901		}
902		break;
903	}
904	return NOTIFY_DONE;
905}
906
907static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
908{
909	struct net_device *dev = ptr;
910	struct in_device *in_dev = __in_dev_get_rtnl(dev);
911
912	if (dev->nd_net != &init_net)
913		return NOTIFY_DONE;
914
915	if (event == NETDEV_UNREGISTER) {
916		fib_disable_ip(dev, 2);
917		return NOTIFY_DONE;
918	}
919
920	if (!in_dev)
921		return NOTIFY_DONE;
922
923	switch (event) {
924	case NETDEV_UP:
925		for_ifa(in_dev) {
926			fib_add_ifaddr(ifa);
927		} endfor_ifa(in_dev);
928#ifdef CONFIG_IP_ROUTE_MULTIPATH
929		fib_sync_up(dev);
930#endif
931		rt_cache_flush(-1);
932		break;
933	case NETDEV_DOWN:
934		fib_disable_ip(dev, 0);
935		break;
936	case NETDEV_CHANGEMTU:
937	case NETDEV_CHANGE:
938		rt_cache_flush(0);
939		break;
940	}
941	return NOTIFY_DONE;
942}
943
944static struct notifier_block fib_inetaddr_notifier = {
945	.notifier_call =fib_inetaddr_event,
946};
947
948static struct notifier_block fib_netdev_notifier = {
949	.notifier_call =fib_netdev_event,
950};
951
952void __init ip_fib_init(void)
953{
954	unsigned int i;
955
956	for (i = 0; i < FIB_TABLE_HASHSZ; i++)
957		INIT_HLIST_HEAD(&fib_table_hash[i]);
958
959	BUG_ON(fib4_rules_init());
960
961	register_netdevice_notifier(&fib_netdev_notifier);
962	register_inetaddr_notifier(&fib_inetaddr_notifier);
963	nl_fib_lookup_init();
964
965	rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL);
966	rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL);
967	rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib);
968}
969
970EXPORT_SYMBOL(inet_addr_type);
971EXPORT_SYMBOL(inet_dev_addr_type);
972EXPORT_SYMBOL(ip_dev_find);
973