fib_frontend.c revision 4e9b82693542003b028c8494e9e3c49615b91ce7
1/*
2 * INET		An implementation of the TCP/IP protocol suite for the LINUX
3 *		operating system.  INET is implemented using the  BSD Socket
4 *		interface as the means of communication with the user level.
5 *
6 *		IPv4 Forwarding Information Base: FIB frontend.
7 *
8 * Version:	$Id: fib_frontend.c,v 1.26 2001/10/31 21:55:54 davem Exp $
9 *
10 * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
11 *
12 *		This program is free software; you can redistribute it and/or
13 *		modify it under the terms of the GNU General Public License
14 *		as published by the Free Software Foundation; either version
15 *		2 of the License, or (at your option) any later version.
16 */
17
18#include <linux/module.h>
19#include <asm/uaccess.h>
20#include <asm/system.h>
21#include <linux/bitops.h>
22#include <linux/capability.h>
23#include <linux/types.h>
24#include <linux/kernel.h>
25#include <linux/sched.h>
26#include <linux/mm.h>
27#include <linux/string.h>
28#include <linux/socket.h>
29#include <linux/sockios.h>
30#include <linux/errno.h>
31#include <linux/in.h>
32#include <linux/inet.h>
33#include <linux/inetdevice.h>
34#include <linux/netdevice.h>
35#include <linux/if_addr.h>
36#include <linux/if_arp.h>
37#include <linux/skbuff.h>
38#include <linux/netlink.h>
39#include <linux/init.h>
40#include <linux/list.h>
41
42#include <net/ip.h>
43#include <net/protocol.h>
44#include <net/route.h>
45#include <net/tcp.h>
46#include <net/sock.h>
47#include <net/icmp.h>
48#include <net/arp.h>
49#include <net/ip_fib.h>
50
51#define FFprint(a...) printk(KERN_DEBUG a)
52
53#ifndef CONFIG_IP_MULTIPLE_TABLES
54
55struct fib_table *ip_fib_local_table;
56struct fib_table *ip_fib_main_table;
57
58#define FIB_TABLE_HASHSZ 1
59static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
60
61#else
62
63#define FIB_TABLE_HASHSZ 256
64static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
65
66struct fib_table *fib_new_table(u32 id)
67{
68	struct fib_table *tb;
69	unsigned int h;
70
71	if (id == 0)
72		id = RT_TABLE_MAIN;
73	tb = fib_get_table(id);
74	if (tb)
75		return tb;
76	tb = fib_hash_init(id);
77	if (!tb)
78		return NULL;
79	h = id & (FIB_TABLE_HASHSZ - 1);
80	hlist_add_head_rcu(&tb->tb_hlist, &fib_table_hash[h]);
81	return tb;
82}
83
84struct fib_table *fib_get_table(u32 id)
85{
86	struct fib_table *tb;
87	struct hlist_node *node;
88	unsigned int h;
89
90	if (id == 0)
91		id = RT_TABLE_MAIN;
92	h = id & (FIB_TABLE_HASHSZ - 1);
93	rcu_read_lock();
94	hlist_for_each_entry_rcu(tb, node, &fib_table_hash[h], tb_hlist) {
95		if (tb->tb_id == id) {
96			rcu_read_unlock();
97			return tb;
98		}
99	}
100	rcu_read_unlock();
101	return NULL;
102}
103#endif /* CONFIG_IP_MULTIPLE_TABLES */
104
105static void fib_flush(void)
106{
107	int flushed = 0;
108	struct fib_table *tb;
109	struct hlist_node *node;
110	unsigned int h;
111
112	for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
113		hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist)
114			flushed += tb->tb_flush(tb);
115	}
116
117	if (flushed)
118		rt_cache_flush(-1);
119}
120
121/*
122 *	Find the first device with a given source address.
123 */
124
125struct net_device * ip_dev_find(__be32 addr)
126{
127	struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
128	struct fib_result res;
129	struct net_device *dev = NULL;
130
131#ifdef CONFIG_IP_MULTIPLE_TABLES
132	res.r = NULL;
133#endif
134
135	if (!ip_fib_local_table ||
136	    ip_fib_local_table->tb_lookup(ip_fib_local_table, &fl, &res))
137		return NULL;
138	if (res.type != RTN_LOCAL)
139		goto out;
140	dev = FIB_RES_DEV(res);
141
142	if (dev)
143		dev_hold(dev);
144out:
145	fib_res_put(&res);
146	return dev;
147}
148
149unsigned inet_addr_type(__be32 addr)
150{
151	struct flowi		fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
152	struct fib_result	res;
153	unsigned ret = RTN_BROADCAST;
154
155	if (ZERONET(addr) || BADCLASS(addr))
156		return RTN_BROADCAST;
157	if (MULTICAST(addr))
158		return RTN_MULTICAST;
159
160#ifdef CONFIG_IP_MULTIPLE_TABLES
161	res.r = NULL;
162#endif
163
164	if (ip_fib_local_table) {
165		ret = RTN_UNICAST;
166		if (!ip_fib_local_table->tb_lookup(ip_fib_local_table,
167						   &fl, &res)) {
168			ret = res.type;
169			fib_res_put(&res);
170		}
171	}
172	return ret;
173}
174
175/* Given (packet source, input interface) and optional (dst, oif, tos):
176   - (main) check, that source is valid i.e. not broadcast or our local
177     address.
178   - figure out what "logical" interface this packet arrived
179     and calculate "specific destination" address.
180   - check, that packet arrived from expected physical interface.
181 */
182
183int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
184			struct net_device *dev, __be32 *spec_dst, u32 *itag)
185{
186	struct in_device *in_dev;
187	struct flowi fl = { .nl_u = { .ip4_u =
188				      { .daddr = src,
189					.saddr = dst,
190					.tos = tos } },
191			    .iif = oif };
192	struct fib_result res;
193	int no_addr, rpf;
194	int ret;
195
196	no_addr = rpf = 0;
197	rcu_read_lock();
198	in_dev = __in_dev_get_rcu(dev);
199	if (in_dev) {
200		no_addr = in_dev->ifa_list == NULL;
201		rpf = IN_DEV_RPFILTER(in_dev);
202	}
203	rcu_read_unlock();
204
205	if (in_dev == NULL)
206		goto e_inval;
207
208	if (fib_lookup(&fl, &res))
209		goto last_resort;
210	if (res.type != RTN_UNICAST)
211		goto e_inval_res;
212	*spec_dst = FIB_RES_PREFSRC(res);
213	fib_combine_itag(itag, &res);
214#ifdef CONFIG_IP_ROUTE_MULTIPATH
215	if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1)
216#else
217	if (FIB_RES_DEV(res) == dev)
218#endif
219	{
220		ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
221		fib_res_put(&res);
222		return ret;
223	}
224	fib_res_put(&res);
225	if (no_addr)
226		goto last_resort;
227	if (rpf)
228		goto e_inval;
229	fl.oif = dev->ifindex;
230
231	ret = 0;
232	if (fib_lookup(&fl, &res) == 0) {
233		if (res.type == RTN_UNICAST) {
234			*spec_dst = FIB_RES_PREFSRC(res);
235			ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
236		}
237		fib_res_put(&res);
238	}
239	return ret;
240
241last_resort:
242	if (rpf)
243		goto e_inval;
244	*spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
245	*itag = 0;
246	return 0;
247
248e_inval_res:
249	fib_res_put(&res);
250e_inval:
251	return -EINVAL;
252}
253
254#ifndef CONFIG_IP_NOSIOCRT
255
256static inline __be32 sk_extract_addr(struct sockaddr *addr)
257{
258	return ((struct sockaddr_in *) addr)->sin_addr.s_addr;
259}
260
261static int put_rtax(struct nlattr *mx, int len, int type, u32 value)
262{
263	struct nlattr *nla;
264
265	nla = (struct nlattr *) ((char *) mx + len);
266	nla->nla_type = type;
267	nla->nla_len = nla_attr_size(4);
268	*(u32 *) nla_data(nla) = value;
269
270	return len + nla_total_size(4);
271}
272
273static int rtentry_to_fib_config(int cmd, struct rtentry *rt,
274				 struct fib_config *cfg)
275{
276	__be32 addr;
277	int plen;
278
279	memset(cfg, 0, sizeof(*cfg));
280
281	if (rt->rt_dst.sa_family != AF_INET)
282		return -EAFNOSUPPORT;
283
284	/*
285	 * Check mask for validity:
286	 * a) it must be contiguous.
287	 * b) destination must have all host bits clear.
288	 * c) if application forgot to set correct family (AF_INET),
289	 *    reject request unless it is absolutely clear i.e.
290	 *    both family and mask are zero.
291	 */
292	plen = 32;
293	addr = sk_extract_addr(&rt->rt_dst);
294	if (!(rt->rt_flags & RTF_HOST)) {
295		__be32 mask = sk_extract_addr(&rt->rt_genmask);
296
297		if (rt->rt_genmask.sa_family != AF_INET) {
298			if (mask || rt->rt_genmask.sa_family)
299				return -EAFNOSUPPORT;
300		}
301
302		if (bad_mask(mask, addr))
303			return -EINVAL;
304
305		plen = inet_mask_len(mask);
306	}
307
308	cfg->fc_dst_len = plen;
309	cfg->fc_dst = addr;
310
311	if (cmd != SIOCDELRT) {
312		cfg->fc_nlflags = NLM_F_CREATE;
313		cfg->fc_protocol = RTPROT_BOOT;
314	}
315
316	if (rt->rt_metric)
317		cfg->fc_priority = rt->rt_metric - 1;
318
319	if (rt->rt_flags & RTF_REJECT) {
320		cfg->fc_scope = RT_SCOPE_HOST;
321		cfg->fc_type = RTN_UNREACHABLE;
322		return 0;
323	}
324
325	cfg->fc_scope = RT_SCOPE_NOWHERE;
326	cfg->fc_type = RTN_UNICAST;
327
328	if (rt->rt_dev) {
329		char *colon;
330		struct net_device *dev;
331		char devname[IFNAMSIZ];
332
333		if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1))
334			return -EFAULT;
335
336		devname[IFNAMSIZ-1] = 0;
337		colon = strchr(devname, ':');
338		if (colon)
339			*colon = 0;
340		dev = __dev_get_by_name(devname);
341		if (!dev)
342			return -ENODEV;
343		cfg->fc_oif = dev->ifindex;
344		if (colon) {
345			struct in_ifaddr *ifa;
346			struct in_device *in_dev = __in_dev_get_rtnl(dev);
347			if (!in_dev)
348				return -ENODEV;
349			*colon = ':';
350			for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
351				if (strcmp(ifa->ifa_label, devname) == 0)
352					break;
353			if (ifa == NULL)
354				return -ENODEV;
355			cfg->fc_prefsrc = ifa->ifa_local;
356		}
357	}
358
359	addr = sk_extract_addr(&rt->rt_gateway);
360	if (rt->rt_gateway.sa_family == AF_INET && addr) {
361		cfg->fc_gw = addr;
362		if (rt->rt_flags & RTF_GATEWAY &&
363		    inet_addr_type(addr) == RTN_UNICAST)
364			cfg->fc_scope = RT_SCOPE_UNIVERSE;
365	}
366
367	if (cmd == SIOCDELRT)
368		return 0;
369
370	if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw)
371		return -EINVAL;
372
373	if (cfg->fc_scope == RT_SCOPE_NOWHERE)
374		cfg->fc_scope = RT_SCOPE_LINK;
375
376	if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) {
377		struct nlattr *mx;
378		int len = 0;
379
380		mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL);
381 		if (mx == NULL)
382			return -ENOMEM;
383
384		if (rt->rt_flags & RTF_MTU)
385			len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40);
386
387		if (rt->rt_flags & RTF_WINDOW)
388			len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window);
389
390		if (rt->rt_flags & RTF_IRTT)
391			len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3);
392
393		cfg->fc_mx = mx;
394		cfg->fc_mx_len = len;
395	}
396
397	return 0;
398}
399
400/*
401 *	Handle IP routing ioctl calls. These are used to manipulate the routing tables
402 */
403
404int ip_rt_ioctl(unsigned int cmd, void __user *arg)
405{
406	struct fib_config cfg;
407	struct rtentry rt;
408	int err;
409
410	switch (cmd) {
411	case SIOCADDRT:		/* Add a route */
412	case SIOCDELRT:		/* Delete a route */
413		if (!capable(CAP_NET_ADMIN))
414			return -EPERM;
415
416		if (copy_from_user(&rt, arg, sizeof(rt)))
417			return -EFAULT;
418
419		rtnl_lock();
420		err = rtentry_to_fib_config(cmd, &rt, &cfg);
421		if (err == 0) {
422			struct fib_table *tb;
423
424			if (cmd == SIOCDELRT) {
425				tb = fib_get_table(cfg.fc_table);
426				if (tb)
427					err = tb->tb_delete(tb, &cfg);
428				else
429					err = -ESRCH;
430			} else {
431				tb = fib_new_table(cfg.fc_table);
432				if (tb)
433					err = tb->tb_insert(tb, &cfg);
434				else
435					err = -ENOBUFS;
436			}
437
438			/* allocated by rtentry_to_fib_config() */
439			kfree(cfg.fc_mx);
440		}
441		rtnl_unlock();
442		return err;
443	}
444	return -EINVAL;
445}
446
447#else
448
449int ip_rt_ioctl(unsigned int cmd, void *arg)
450{
451	return -EINVAL;
452}
453
454#endif
455
456struct nla_policy rtm_ipv4_policy[RTA_MAX+1] __read_mostly = {
457	[RTA_DST]		= { .type = NLA_U32 },
458	[RTA_SRC]		= { .type = NLA_U32 },
459	[RTA_IIF]		= { .type = NLA_U32 },
460	[RTA_OIF]		= { .type = NLA_U32 },
461	[RTA_GATEWAY]		= { .type = NLA_U32 },
462	[RTA_PRIORITY]		= { .type = NLA_U32 },
463	[RTA_PREFSRC]		= { .type = NLA_U32 },
464	[RTA_METRICS]		= { .type = NLA_NESTED },
465	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
466	[RTA_PROTOINFO]		= { .type = NLA_U32 },
467	[RTA_FLOW]		= { .type = NLA_U32 },
468	[RTA_MP_ALGO]		= { .type = NLA_U32 },
469};
470
471static int rtm_to_fib_config(struct sk_buff *skb, struct nlmsghdr *nlh,
472			     struct fib_config *cfg)
473{
474	struct nlattr *attr;
475	int err, remaining;
476	struct rtmsg *rtm;
477
478	err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy);
479	if (err < 0)
480		goto errout;
481
482	memset(cfg, 0, sizeof(*cfg));
483
484	rtm = nlmsg_data(nlh);
485	cfg->fc_dst_len = rtm->rtm_dst_len;
486	cfg->fc_tos = rtm->rtm_tos;
487	cfg->fc_table = rtm->rtm_table;
488	cfg->fc_protocol = rtm->rtm_protocol;
489	cfg->fc_scope = rtm->rtm_scope;
490	cfg->fc_type = rtm->rtm_type;
491	cfg->fc_flags = rtm->rtm_flags;
492	cfg->fc_nlflags = nlh->nlmsg_flags;
493
494	cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
495	cfg->fc_nlinfo.nlh = nlh;
496
497	nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) {
498		switch (attr->nla_type) {
499		case RTA_DST:
500			cfg->fc_dst = nla_get_be32(attr);
501			break;
502		case RTA_OIF:
503			cfg->fc_oif = nla_get_u32(attr);
504			break;
505		case RTA_GATEWAY:
506			cfg->fc_gw = nla_get_be32(attr);
507			break;
508		case RTA_PRIORITY:
509			cfg->fc_priority = nla_get_u32(attr);
510			break;
511		case RTA_PREFSRC:
512			cfg->fc_prefsrc = nla_get_be32(attr);
513			break;
514		case RTA_METRICS:
515			cfg->fc_mx = nla_data(attr);
516			cfg->fc_mx_len = nla_len(attr);
517			break;
518		case RTA_MULTIPATH:
519			cfg->fc_mp = nla_data(attr);
520			cfg->fc_mp_len = nla_len(attr);
521			break;
522		case RTA_FLOW:
523			cfg->fc_flow = nla_get_u32(attr);
524			break;
525		case RTA_MP_ALGO:
526			cfg->fc_mp_alg = nla_get_u32(attr);
527			break;
528		case RTA_TABLE:
529			cfg->fc_table = nla_get_u32(attr);
530			break;
531		}
532	}
533
534	return 0;
535errout:
536	return err;
537}
538
539int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
540{
541	struct fib_config cfg;
542	struct fib_table *tb;
543	int err;
544
545	err = rtm_to_fib_config(skb, nlh, &cfg);
546	if (err < 0)
547		goto errout;
548
549	tb = fib_get_table(cfg.fc_table);
550	if (tb == NULL) {
551		err = -ESRCH;
552		goto errout;
553	}
554
555	err = tb->tb_delete(tb, &cfg);
556errout:
557	return err;
558}
559
560int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
561{
562	struct fib_config cfg;
563	struct fib_table *tb;
564	int err;
565
566	err = rtm_to_fib_config(skb, nlh, &cfg);
567	if (err < 0)
568		goto errout;
569
570	tb = fib_new_table(cfg.fc_table);
571	if (tb == NULL) {
572		err = -ENOBUFS;
573		goto errout;
574	}
575
576	err = tb->tb_insert(tb, &cfg);
577errout:
578	return err;
579}
580
581int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
582{
583	unsigned int h, s_h;
584	unsigned int e = 0, s_e;
585	struct fib_table *tb;
586	struct hlist_node *node;
587	int dumped = 0;
588
589	if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
590	    ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
591		return ip_rt_dump(skb, cb);
592
593	s_h = cb->args[0];
594	s_e = cb->args[1];
595
596	for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
597		e = 0;
598		hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist) {
599			if (e < s_e)
600				goto next;
601			if (dumped)
602				memset(&cb->args[2], 0, sizeof(cb->args) -
603				                 2 * sizeof(cb->args[0]));
604			if (tb->tb_dump(tb, skb, cb) < 0)
605				goto out;
606			dumped = 1;
607next:
608			e++;
609		}
610	}
611out:
612	cb->args[1] = e;
613	cb->args[0] = h;
614
615	return skb->len;
616}
617
618/* Prepare and feed intra-kernel routing request.
619   Really, it should be netlink message, but :-( netlink
620   can be not configured, so that we feed it directly
621   to fib engine. It is legal, because all events occur
622   only when netlink is already locked.
623 */
624
625static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
626{
627	struct fib_table *tb;
628	struct fib_config cfg = {
629		.fc_protocol = RTPROT_KERNEL,
630		.fc_type = type,
631		.fc_dst = dst,
632		.fc_dst_len = dst_len,
633		.fc_prefsrc = ifa->ifa_local,
634		.fc_oif = ifa->ifa_dev->dev->ifindex,
635		.fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
636	};
637
638	if (type == RTN_UNICAST)
639		tb = fib_new_table(RT_TABLE_MAIN);
640	else
641		tb = fib_new_table(RT_TABLE_LOCAL);
642
643	if (tb == NULL)
644		return;
645
646	cfg.fc_table = tb->tb_id;
647
648	if (type != RTN_LOCAL)
649		cfg.fc_scope = RT_SCOPE_LINK;
650	else
651		cfg.fc_scope = RT_SCOPE_HOST;
652
653	if (cmd == RTM_NEWROUTE)
654		tb->tb_insert(tb, &cfg);
655	else
656		tb->tb_delete(tb, &cfg);
657}
658
659void fib_add_ifaddr(struct in_ifaddr *ifa)
660{
661	struct in_device *in_dev = ifa->ifa_dev;
662	struct net_device *dev = in_dev->dev;
663	struct in_ifaddr *prim = ifa;
664	__be32 mask = ifa->ifa_mask;
665	__be32 addr = ifa->ifa_local;
666	__be32 prefix = ifa->ifa_address&mask;
667
668	if (ifa->ifa_flags&IFA_F_SECONDARY) {
669		prim = inet_ifa_byprefix(in_dev, prefix, mask);
670		if (prim == NULL) {
671			printk(KERN_DEBUG "fib_add_ifaddr: bug: prim == NULL\n");
672			return;
673		}
674	}
675
676	fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
677
678	if (!(dev->flags&IFF_UP))
679		return;
680
681	/* Add broadcast address, if it is explicitly assigned. */
682	if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
683		fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
684
685	if (!ZERONET(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
686	    (prefix != addr || ifa->ifa_prefixlen < 32)) {
687		fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
688			  RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim);
689
690		/* Add network specific broadcasts, when it takes a sense */
691		if (ifa->ifa_prefixlen < 31) {
692			fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
693			fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim);
694		}
695	}
696}
697
698static void fib_del_ifaddr(struct in_ifaddr *ifa)
699{
700	struct in_device *in_dev = ifa->ifa_dev;
701	struct net_device *dev = in_dev->dev;
702	struct in_ifaddr *ifa1;
703	struct in_ifaddr *prim = ifa;
704	__be32 brd = ifa->ifa_address|~ifa->ifa_mask;
705	__be32 any = ifa->ifa_address&ifa->ifa_mask;
706#define LOCAL_OK	1
707#define BRD_OK		2
708#define BRD0_OK		4
709#define BRD1_OK		8
710	unsigned ok = 0;
711
712	if (!(ifa->ifa_flags&IFA_F_SECONDARY))
713		fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
714			  RTN_UNICAST, any, ifa->ifa_prefixlen, prim);
715	else {
716		prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
717		if (prim == NULL) {
718			printk(KERN_DEBUG "fib_del_ifaddr: bug: prim == NULL\n");
719			return;
720		}
721	}
722
723	/* Deletion is more complicated than add.
724	   We should take care of not to delete too much :-)
725
726	   Scan address list to be sure that addresses are really gone.
727	 */
728
729	for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
730		if (ifa->ifa_local == ifa1->ifa_local)
731			ok |= LOCAL_OK;
732		if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
733			ok |= BRD_OK;
734		if (brd == ifa1->ifa_broadcast)
735			ok |= BRD1_OK;
736		if (any == ifa1->ifa_broadcast)
737			ok |= BRD0_OK;
738	}
739
740	if (!(ok&BRD_OK))
741		fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
742	if (!(ok&BRD1_OK))
743		fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
744	if (!(ok&BRD0_OK))
745		fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
746	if (!(ok&LOCAL_OK)) {
747		fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
748
749		/* Check, that this local address finally disappeared. */
750		if (inet_addr_type(ifa->ifa_local) != RTN_LOCAL) {
751			/* And the last, but not the least thing.
752			   We must flush stray FIB entries.
753
754			   First of all, we scan fib_info list searching
755			   for stray nexthop entries, then ignite fib_flush.
756			*/
757			if (fib_sync_down(ifa->ifa_local, NULL, 0))
758				fib_flush();
759		}
760	}
761#undef LOCAL_OK
762#undef BRD_OK
763#undef BRD0_OK
764#undef BRD1_OK
765}
766
767static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
768{
769
770	struct fib_result       res;
771	struct flowi            fl = { .mark = frn->fl_mark,
772				       .nl_u = { .ip4_u = { .daddr = frn->fl_addr,
773							    .tos = frn->fl_tos,
774							    .scope = frn->fl_scope } } };
775	if (tb) {
776		local_bh_disable();
777
778		frn->tb_id = tb->tb_id;
779		frn->err = tb->tb_lookup(tb, &fl, &res);
780
781		if (!frn->err) {
782			frn->prefixlen = res.prefixlen;
783			frn->nh_sel = res.nh_sel;
784			frn->type = res.type;
785			frn->scope = res.scope;
786		}
787		local_bh_enable();
788	}
789}
790
791static void nl_fib_input(struct sock *sk, int len)
792{
793	struct sk_buff *skb = NULL;
794        struct nlmsghdr *nlh = NULL;
795	struct fib_result_nl *frn;
796	u32 pid;
797	struct fib_table *tb;
798
799	skb = skb_dequeue(&sk->sk_receive_queue);
800	nlh = (struct nlmsghdr *)skb->data;
801	if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
802	    nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn))) {
803		kfree_skb(skb);
804		return;
805	}
806
807	frn = (struct fib_result_nl *) NLMSG_DATA(nlh);
808	tb = fib_get_table(frn->tb_id_in);
809
810	nl_fib_lookup(frn, tb);
811
812	pid = nlh->nlmsg_pid;           /*pid of sending process */
813	NETLINK_CB(skb).pid = 0;         /* from kernel */
814	NETLINK_CB(skb).dst_group = 0;  /* unicast */
815	netlink_unicast(sk, skb, pid, MSG_DONTWAIT);
816}
817
818static void nl_fib_lookup_init(void)
819{
820      netlink_kernel_create(NETLINK_FIB_LOOKUP, 0, nl_fib_input, THIS_MODULE);
821}
822
823static void fib_disable_ip(struct net_device *dev, int force)
824{
825	if (fib_sync_down(0, dev, force))
826		fib_flush();
827	rt_cache_flush(0);
828	arp_ifdown(dev);
829}
830
831static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
832{
833	struct in_ifaddr *ifa = (struct in_ifaddr*)ptr;
834
835	switch (event) {
836	case NETDEV_UP:
837		fib_add_ifaddr(ifa);
838#ifdef CONFIG_IP_ROUTE_MULTIPATH
839		fib_sync_up(ifa->ifa_dev->dev);
840#endif
841		rt_cache_flush(-1);
842		break;
843	case NETDEV_DOWN:
844		fib_del_ifaddr(ifa);
845		if (ifa->ifa_dev->ifa_list == NULL) {
846			/* Last address was deleted from this interface.
847			   Disable IP.
848			 */
849			fib_disable_ip(ifa->ifa_dev->dev, 1);
850		} else {
851			rt_cache_flush(-1);
852		}
853		break;
854	}
855	return NOTIFY_DONE;
856}
857
858static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
859{
860	struct net_device *dev = ptr;
861	struct in_device *in_dev = __in_dev_get_rtnl(dev);
862
863	if (event == NETDEV_UNREGISTER) {
864		fib_disable_ip(dev, 2);
865		return NOTIFY_DONE;
866	}
867
868	if (!in_dev)
869		return NOTIFY_DONE;
870
871	switch (event) {
872	case NETDEV_UP:
873		for_ifa(in_dev) {
874			fib_add_ifaddr(ifa);
875		} endfor_ifa(in_dev);
876#ifdef CONFIG_IP_ROUTE_MULTIPATH
877		fib_sync_up(dev);
878#endif
879		rt_cache_flush(-1);
880		break;
881	case NETDEV_DOWN:
882		fib_disable_ip(dev, 0);
883		break;
884	case NETDEV_CHANGEMTU:
885	case NETDEV_CHANGE:
886		rt_cache_flush(0);
887		break;
888	}
889	return NOTIFY_DONE;
890}
891
892static struct notifier_block fib_inetaddr_notifier = {
893	.notifier_call =fib_inetaddr_event,
894};
895
896static struct notifier_block fib_netdev_notifier = {
897	.notifier_call =fib_netdev_event,
898};
899
900void __init ip_fib_init(void)
901{
902	unsigned int i;
903
904	for (i = 0; i < FIB_TABLE_HASHSZ; i++)
905		INIT_HLIST_HEAD(&fib_table_hash[i]);
906#ifndef CONFIG_IP_MULTIPLE_TABLES
907	ip_fib_local_table = fib_hash_init(RT_TABLE_LOCAL);
908	hlist_add_head_rcu(&ip_fib_local_table->tb_hlist, &fib_table_hash[0]);
909	ip_fib_main_table  = fib_hash_init(RT_TABLE_MAIN);
910	hlist_add_head_rcu(&ip_fib_main_table->tb_hlist, &fib_table_hash[0]);
911#else
912	fib4_rules_init();
913#endif
914
915	register_netdevice_notifier(&fib_netdev_notifier);
916	register_inetaddr_notifier(&fib_inetaddr_notifier);
917	nl_fib_lookup_init();
918}
919
920EXPORT_SYMBOL(inet_addr_type);
921EXPORT_SYMBOL(ip_dev_find);
922