fib_frontend.c revision e06e7c615877026544ad7f8b309d1a3706410383
1/*
2 * INET		An implementation of the TCP/IP protocol suite for the LINUX
3 *		operating system.  INET is implemented using the  BSD Socket
4 *		interface as the means of communication with the user level.
5 *
6 *		IPv4 Forwarding Information Base: FIB frontend.
7 *
8 * Version:	$Id: fib_frontend.c,v 1.26 2001/10/31 21:55:54 davem Exp $
9 *
10 * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
11 *
12 *		This program is free software; you can redistribute it and/or
13 *		modify it under the terms of the GNU General Public License
14 *		as published by the Free Software Foundation; either version
15 *		2 of the License, or (at your option) any later version.
16 */
17
18#include <linux/module.h>
19#include <asm/uaccess.h>
20#include <asm/system.h>
21#include <linux/bitops.h>
22#include <linux/capability.h>
23#include <linux/types.h>
24#include <linux/kernel.h>
25#include <linux/mm.h>
26#include <linux/string.h>
27#include <linux/socket.h>
28#include <linux/sockios.h>
29#include <linux/errno.h>
30#include <linux/in.h>
31#include <linux/inet.h>
32#include <linux/inetdevice.h>
33#include <linux/netdevice.h>
34#include <linux/if_addr.h>
35#include <linux/if_arp.h>
36#include <linux/skbuff.h>
37#include <linux/init.h>
38#include <linux/list.h>
39
40#include <net/ip.h>
41#include <net/protocol.h>
42#include <net/route.h>
43#include <net/tcp.h>
44#include <net/sock.h>
45#include <net/icmp.h>
46#include <net/arp.h>
47#include <net/ip_fib.h>
48#include <net/rtnetlink.h>
49
50#define FFprint(a...) printk(KERN_DEBUG a)
51
52#ifndef CONFIG_IP_MULTIPLE_TABLES
53
54struct fib_table *ip_fib_local_table;
55struct fib_table *ip_fib_main_table;
56
57#define FIB_TABLE_HASHSZ 1
58static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
59
60#else
61
62#define FIB_TABLE_HASHSZ 256
63static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
64
65struct fib_table *fib_new_table(u32 id)
66{
67	struct fib_table *tb;
68	unsigned int h;
69
70	if (id == 0)
71		id = RT_TABLE_MAIN;
72	tb = fib_get_table(id);
73	if (tb)
74		return tb;
75	tb = fib_hash_init(id);
76	if (!tb)
77		return NULL;
78	h = id & (FIB_TABLE_HASHSZ - 1);
79	hlist_add_head_rcu(&tb->tb_hlist, &fib_table_hash[h]);
80	return tb;
81}
82
83struct fib_table *fib_get_table(u32 id)
84{
85	struct fib_table *tb;
86	struct hlist_node *node;
87	unsigned int h;
88
89	if (id == 0)
90		id = RT_TABLE_MAIN;
91	h = id & (FIB_TABLE_HASHSZ - 1);
92	rcu_read_lock();
93	hlist_for_each_entry_rcu(tb, node, &fib_table_hash[h], tb_hlist) {
94		if (tb->tb_id == id) {
95			rcu_read_unlock();
96			return tb;
97		}
98	}
99	rcu_read_unlock();
100	return NULL;
101}
102#endif /* CONFIG_IP_MULTIPLE_TABLES */
103
104static void fib_flush(void)
105{
106	int flushed = 0;
107	struct fib_table *tb;
108	struct hlist_node *node;
109	unsigned int h;
110
111	for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
112		hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist)
113			flushed += tb->tb_flush(tb);
114	}
115
116	if (flushed)
117		rt_cache_flush(-1);
118}
119
120/*
121 *	Find the first device with a given source address.
122 */
123
124struct net_device * ip_dev_find(__be32 addr)
125{
126	struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
127	struct fib_result res;
128	struct net_device *dev = NULL;
129
130#ifdef CONFIG_IP_MULTIPLE_TABLES
131	res.r = NULL;
132#endif
133
134	if (!ip_fib_local_table ||
135	    ip_fib_local_table->tb_lookup(ip_fib_local_table, &fl, &res))
136		return NULL;
137	if (res.type != RTN_LOCAL)
138		goto out;
139	dev = FIB_RES_DEV(res);
140
141	if (dev)
142		dev_hold(dev);
143out:
144	fib_res_put(&res);
145	return dev;
146}
147
148unsigned inet_addr_type(__be32 addr)
149{
150	struct flowi		fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
151	struct fib_result	res;
152	unsigned ret = RTN_BROADCAST;
153
154	if (ZERONET(addr) || BADCLASS(addr))
155		return RTN_BROADCAST;
156	if (MULTICAST(addr))
157		return RTN_MULTICAST;
158
159#ifdef CONFIG_IP_MULTIPLE_TABLES
160	res.r = NULL;
161#endif
162
163	if (ip_fib_local_table) {
164		ret = RTN_UNICAST;
165		if (!ip_fib_local_table->tb_lookup(ip_fib_local_table,
166						   &fl, &res)) {
167			ret = res.type;
168			fib_res_put(&res);
169		}
170	}
171	return ret;
172}
173
174/* Given (packet source, input interface) and optional (dst, oif, tos):
175   - (main) check, that source is valid i.e. not broadcast or our local
176     address.
177   - figure out what "logical" interface this packet arrived
178     and calculate "specific destination" address.
179   - check, that packet arrived from expected physical interface.
180 */
181
182int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
183			struct net_device *dev, __be32 *spec_dst, u32 *itag)
184{
185	struct in_device *in_dev;
186	struct flowi fl = { .nl_u = { .ip4_u =
187				      { .daddr = src,
188					.saddr = dst,
189					.tos = tos } },
190			    .iif = oif };
191	struct fib_result res;
192	int no_addr, rpf;
193	int ret;
194
195	no_addr = rpf = 0;
196	rcu_read_lock();
197	in_dev = __in_dev_get_rcu(dev);
198	if (in_dev) {
199		no_addr = in_dev->ifa_list == NULL;
200		rpf = IN_DEV_RPFILTER(in_dev);
201	}
202	rcu_read_unlock();
203
204	if (in_dev == NULL)
205		goto e_inval;
206
207	if (fib_lookup(&fl, &res))
208		goto last_resort;
209	if (res.type != RTN_UNICAST)
210		goto e_inval_res;
211	*spec_dst = FIB_RES_PREFSRC(res);
212	fib_combine_itag(itag, &res);
213#ifdef CONFIG_IP_ROUTE_MULTIPATH
214	if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1)
215#else
216	if (FIB_RES_DEV(res) == dev)
217#endif
218	{
219		ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
220		fib_res_put(&res);
221		return ret;
222	}
223	fib_res_put(&res);
224	if (no_addr)
225		goto last_resort;
226	if (rpf)
227		goto e_inval;
228	fl.oif = dev->ifindex;
229
230	ret = 0;
231	if (fib_lookup(&fl, &res) == 0) {
232		if (res.type == RTN_UNICAST) {
233			*spec_dst = FIB_RES_PREFSRC(res);
234			ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
235		}
236		fib_res_put(&res);
237	}
238	return ret;
239
240last_resort:
241	if (rpf)
242		goto e_inval;
243	*spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
244	*itag = 0;
245	return 0;
246
247e_inval_res:
248	fib_res_put(&res);
249e_inval:
250	return -EINVAL;
251}
252
253static inline __be32 sk_extract_addr(struct sockaddr *addr)
254{
255	return ((struct sockaddr_in *) addr)->sin_addr.s_addr;
256}
257
258static int put_rtax(struct nlattr *mx, int len, int type, u32 value)
259{
260	struct nlattr *nla;
261
262	nla = (struct nlattr *) ((char *) mx + len);
263	nla->nla_type = type;
264	nla->nla_len = nla_attr_size(4);
265	*(u32 *) nla_data(nla) = value;
266
267	return len + nla_total_size(4);
268}
269
270static int rtentry_to_fib_config(int cmd, struct rtentry *rt,
271				 struct fib_config *cfg)
272{
273	__be32 addr;
274	int plen;
275
276	memset(cfg, 0, sizeof(*cfg));
277
278	if (rt->rt_dst.sa_family != AF_INET)
279		return -EAFNOSUPPORT;
280
281	/*
282	 * Check mask for validity:
283	 * a) it must be contiguous.
284	 * b) destination must have all host bits clear.
285	 * c) if application forgot to set correct family (AF_INET),
286	 *    reject request unless it is absolutely clear i.e.
287	 *    both family and mask are zero.
288	 */
289	plen = 32;
290	addr = sk_extract_addr(&rt->rt_dst);
291	if (!(rt->rt_flags & RTF_HOST)) {
292		__be32 mask = sk_extract_addr(&rt->rt_genmask);
293
294		if (rt->rt_genmask.sa_family != AF_INET) {
295			if (mask || rt->rt_genmask.sa_family)
296				return -EAFNOSUPPORT;
297		}
298
299		if (bad_mask(mask, addr))
300			return -EINVAL;
301
302		plen = inet_mask_len(mask);
303	}
304
305	cfg->fc_dst_len = plen;
306	cfg->fc_dst = addr;
307
308	if (cmd != SIOCDELRT) {
309		cfg->fc_nlflags = NLM_F_CREATE;
310		cfg->fc_protocol = RTPROT_BOOT;
311	}
312
313	if (rt->rt_metric)
314		cfg->fc_priority = rt->rt_metric - 1;
315
316	if (rt->rt_flags & RTF_REJECT) {
317		cfg->fc_scope = RT_SCOPE_HOST;
318		cfg->fc_type = RTN_UNREACHABLE;
319		return 0;
320	}
321
322	cfg->fc_scope = RT_SCOPE_NOWHERE;
323	cfg->fc_type = RTN_UNICAST;
324
325	if (rt->rt_dev) {
326		char *colon;
327		struct net_device *dev;
328		char devname[IFNAMSIZ];
329
330		if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1))
331			return -EFAULT;
332
333		devname[IFNAMSIZ-1] = 0;
334		colon = strchr(devname, ':');
335		if (colon)
336			*colon = 0;
337		dev = __dev_get_by_name(devname);
338		if (!dev)
339			return -ENODEV;
340		cfg->fc_oif = dev->ifindex;
341		if (colon) {
342			struct in_ifaddr *ifa;
343			struct in_device *in_dev = __in_dev_get_rtnl(dev);
344			if (!in_dev)
345				return -ENODEV;
346			*colon = ':';
347			for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
348				if (strcmp(ifa->ifa_label, devname) == 0)
349					break;
350			if (ifa == NULL)
351				return -ENODEV;
352			cfg->fc_prefsrc = ifa->ifa_local;
353		}
354	}
355
356	addr = sk_extract_addr(&rt->rt_gateway);
357	if (rt->rt_gateway.sa_family == AF_INET && addr) {
358		cfg->fc_gw = addr;
359		if (rt->rt_flags & RTF_GATEWAY &&
360		    inet_addr_type(addr) == RTN_UNICAST)
361			cfg->fc_scope = RT_SCOPE_UNIVERSE;
362	}
363
364	if (cmd == SIOCDELRT)
365		return 0;
366
367	if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw)
368		return -EINVAL;
369
370	if (cfg->fc_scope == RT_SCOPE_NOWHERE)
371		cfg->fc_scope = RT_SCOPE_LINK;
372
373	if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) {
374		struct nlattr *mx;
375		int len = 0;
376
377		mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL);
378		if (mx == NULL)
379			return -ENOMEM;
380
381		if (rt->rt_flags & RTF_MTU)
382			len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40);
383
384		if (rt->rt_flags & RTF_WINDOW)
385			len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window);
386
387		if (rt->rt_flags & RTF_IRTT)
388			len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3);
389
390		cfg->fc_mx = mx;
391		cfg->fc_mx_len = len;
392	}
393
394	return 0;
395}
396
397/*
398 *	Handle IP routing ioctl calls. These are used to manipulate the routing tables
399 */
400
401int ip_rt_ioctl(unsigned int cmd, void __user *arg)
402{
403	struct fib_config cfg;
404	struct rtentry rt;
405	int err;
406
407	switch (cmd) {
408	case SIOCADDRT:		/* Add a route */
409	case SIOCDELRT:		/* Delete a route */
410		if (!capable(CAP_NET_ADMIN))
411			return -EPERM;
412
413		if (copy_from_user(&rt, arg, sizeof(rt)))
414			return -EFAULT;
415
416		rtnl_lock();
417		err = rtentry_to_fib_config(cmd, &rt, &cfg);
418		if (err == 0) {
419			struct fib_table *tb;
420
421			if (cmd == SIOCDELRT) {
422				tb = fib_get_table(cfg.fc_table);
423				if (tb)
424					err = tb->tb_delete(tb, &cfg);
425				else
426					err = -ESRCH;
427			} else {
428				tb = fib_new_table(cfg.fc_table);
429				if (tb)
430					err = tb->tb_insert(tb, &cfg);
431				else
432					err = -ENOBUFS;
433			}
434
435			/* allocated by rtentry_to_fib_config() */
436			kfree(cfg.fc_mx);
437		}
438		rtnl_unlock();
439		return err;
440	}
441	return -EINVAL;
442}
443
444const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = {
445	[RTA_DST]		= { .type = NLA_U32 },
446	[RTA_SRC]		= { .type = NLA_U32 },
447	[RTA_IIF]		= { .type = NLA_U32 },
448	[RTA_OIF]		= { .type = NLA_U32 },
449	[RTA_GATEWAY]		= { .type = NLA_U32 },
450	[RTA_PRIORITY]		= { .type = NLA_U32 },
451	[RTA_PREFSRC]		= { .type = NLA_U32 },
452	[RTA_METRICS]		= { .type = NLA_NESTED },
453	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
454	[RTA_PROTOINFO]		= { .type = NLA_U32 },
455	[RTA_FLOW]		= { .type = NLA_U32 },
456};
457
458static int rtm_to_fib_config(struct sk_buff *skb, struct nlmsghdr *nlh,
459			     struct fib_config *cfg)
460{
461	struct nlattr *attr;
462	int err, remaining;
463	struct rtmsg *rtm;
464
465	err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy);
466	if (err < 0)
467		goto errout;
468
469	memset(cfg, 0, sizeof(*cfg));
470
471	rtm = nlmsg_data(nlh);
472	cfg->fc_dst_len = rtm->rtm_dst_len;
473	cfg->fc_tos = rtm->rtm_tos;
474	cfg->fc_table = rtm->rtm_table;
475	cfg->fc_protocol = rtm->rtm_protocol;
476	cfg->fc_scope = rtm->rtm_scope;
477	cfg->fc_type = rtm->rtm_type;
478	cfg->fc_flags = rtm->rtm_flags;
479	cfg->fc_nlflags = nlh->nlmsg_flags;
480
481	cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
482	cfg->fc_nlinfo.nlh = nlh;
483
484	if (cfg->fc_type > RTN_MAX) {
485		err = -EINVAL;
486		goto errout;
487	}
488
489	nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) {
490		switch (attr->nla_type) {
491		case RTA_DST:
492			cfg->fc_dst = nla_get_be32(attr);
493			break;
494		case RTA_OIF:
495			cfg->fc_oif = nla_get_u32(attr);
496			break;
497		case RTA_GATEWAY:
498			cfg->fc_gw = nla_get_be32(attr);
499			break;
500		case RTA_PRIORITY:
501			cfg->fc_priority = nla_get_u32(attr);
502			break;
503		case RTA_PREFSRC:
504			cfg->fc_prefsrc = nla_get_be32(attr);
505			break;
506		case RTA_METRICS:
507			cfg->fc_mx = nla_data(attr);
508			cfg->fc_mx_len = nla_len(attr);
509			break;
510		case RTA_MULTIPATH:
511			cfg->fc_mp = nla_data(attr);
512			cfg->fc_mp_len = nla_len(attr);
513			break;
514		case RTA_FLOW:
515			cfg->fc_flow = nla_get_u32(attr);
516			break;
517		case RTA_TABLE:
518			cfg->fc_table = nla_get_u32(attr);
519			break;
520		}
521	}
522
523	return 0;
524errout:
525	return err;
526}
527
528static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
529{
530	struct fib_config cfg;
531	struct fib_table *tb;
532	int err;
533
534	err = rtm_to_fib_config(skb, nlh, &cfg);
535	if (err < 0)
536		goto errout;
537
538	tb = fib_get_table(cfg.fc_table);
539	if (tb == NULL) {
540		err = -ESRCH;
541		goto errout;
542	}
543
544	err = tb->tb_delete(tb, &cfg);
545errout:
546	return err;
547}
548
549static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
550{
551	struct fib_config cfg;
552	struct fib_table *tb;
553	int err;
554
555	err = rtm_to_fib_config(skb, nlh, &cfg);
556	if (err < 0)
557		goto errout;
558
559	tb = fib_new_table(cfg.fc_table);
560	if (tb == NULL) {
561		err = -ENOBUFS;
562		goto errout;
563	}
564
565	err = tb->tb_insert(tb, &cfg);
566errout:
567	return err;
568}
569
570static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
571{
572	unsigned int h, s_h;
573	unsigned int e = 0, s_e;
574	struct fib_table *tb;
575	struct hlist_node *node;
576	int dumped = 0;
577
578	if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
579	    ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
580		return ip_rt_dump(skb, cb);
581
582	s_h = cb->args[0];
583	s_e = cb->args[1];
584
585	for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
586		e = 0;
587		hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist) {
588			if (e < s_e)
589				goto next;
590			if (dumped)
591				memset(&cb->args[2], 0, sizeof(cb->args) -
592						 2 * sizeof(cb->args[0]));
593			if (tb->tb_dump(tb, skb, cb) < 0)
594				goto out;
595			dumped = 1;
596next:
597			e++;
598		}
599	}
600out:
601	cb->args[1] = e;
602	cb->args[0] = h;
603
604	return skb->len;
605}
606
607/* Prepare and feed intra-kernel routing request.
608   Really, it should be netlink message, but :-( netlink
609   can be not configured, so that we feed it directly
610   to fib engine. It is legal, because all events occur
611   only when netlink is already locked.
612 */
613
614static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
615{
616	struct fib_table *tb;
617	struct fib_config cfg = {
618		.fc_protocol = RTPROT_KERNEL,
619		.fc_type = type,
620		.fc_dst = dst,
621		.fc_dst_len = dst_len,
622		.fc_prefsrc = ifa->ifa_local,
623		.fc_oif = ifa->ifa_dev->dev->ifindex,
624		.fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
625	};
626
627	if (type == RTN_UNICAST)
628		tb = fib_new_table(RT_TABLE_MAIN);
629	else
630		tb = fib_new_table(RT_TABLE_LOCAL);
631
632	if (tb == NULL)
633		return;
634
635	cfg.fc_table = tb->tb_id;
636
637	if (type != RTN_LOCAL)
638		cfg.fc_scope = RT_SCOPE_LINK;
639	else
640		cfg.fc_scope = RT_SCOPE_HOST;
641
642	if (cmd == RTM_NEWROUTE)
643		tb->tb_insert(tb, &cfg);
644	else
645		tb->tb_delete(tb, &cfg);
646}
647
648void fib_add_ifaddr(struct in_ifaddr *ifa)
649{
650	struct in_device *in_dev = ifa->ifa_dev;
651	struct net_device *dev = in_dev->dev;
652	struct in_ifaddr *prim = ifa;
653	__be32 mask = ifa->ifa_mask;
654	__be32 addr = ifa->ifa_local;
655	__be32 prefix = ifa->ifa_address&mask;
656
657	if (ifa->ifa_flags&IFA_F_SECONDARY) {
658		prim = inet_ifa_byprefix(in_dev, prefix, mask);
659		if (prim == NULL) {
660			printk(KERN_DEBUG "fib_add_ifaddr: bug: prim == NULL\n");
661			return;
662		}
663	}
664
665	fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
666
667	if (!(dev->flags&IFF_UP))
668		return;
669
670	/* Add broadcast address, if it is explicitly assigned. */
671	if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
672		fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
673
674	if (!ZERONET(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
675	    (prefix != addr || ifa->ifa_prefixlen < 32)) {
676		fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
677			  RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim);
678
679		/* Add network specific broadcasts, when it takes a sense */
680		if (ifa->ifa_prefixlen < 31) {
681			fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
682			fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim);
683		}
684	}
685}
686
687static void fib_del_ifaddr(struct in_ifaddr *ifa)
688{
689	struct in_device *in_dev = ifa->ifa_dev;
690	struct net_device *dev = in_dev->dev;
691	struct in_ifaddr *ifa1;
692	struct in_ifaddr *prim = ifa;
693	__be32 brd = ifa->ifa_address|~ifa->ifa_mask;
694	__be32 any = ifa->ifa_address&ifa->ifa_mask;
695#define LOCAL_OK	1
696#define BRD_OK		2
697#define BRD0_OK		4
698#define BRD1_OK		8
699	unsigned ok = 0;
700
701	if (!(ifa->ifa_flags&IFA_F_SECONDARY))
702		fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
703			  RTN_UNICAST, any, ifa->ifa_prefixlen, prim);
704	else {
705		prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
706		if (prim == NULL) {
707			printk(KERN_DEBUG "fib_del_ifaddr: bug: prim == NULL\n");
708			return;
709		}
710	}
711
712	/* Deletion is more complicated than add.
713	   We should take care of not to delete too much :-)
714
715	   Scan address list to be sure that addresses are really gone.
716	 */
717
718	for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
719		if (ifa->ifa_local == ifa1->ifa_local)
720			ok |= LOCAL_OK;
721		if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
722			ok |= BRD_OK;
723		if (brd == ifa1->ifa_broadcast)
724			ok |= BRD1_OK;
725		if (any == ifa1->ifa_broadcast)
726			ok |= BRD0_OK;
727	}
728
729	if (!(ok&BRD_OK))
730		fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
731	if (!(ok&BRD1_OK))
732		fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
733	if (!(ok&BRD0_OK))
734		fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
735	if (!(ok&LOCAL_OK)) {
736		fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
737
738		/* Check, that this local address finally disappeared. */
739		if (inet_addr_type(ifa->ifa_local) != RTN_LOCAL) {
740			/* And the last, but not the least thing.
741			   We must flush stray FIB entries.
742
743			   First of all, we scan fib_info list searching
744			   for stray nexthop entries, then ignite fib_flush.
745			*/
746			if (fib_sync_down(ifa->ifa_local, NULL, 0))
747				fib_flush();
748		}
749	}
750#undef LOCAL_OK
751#undef BRD_OK
752#undef BRD0_OK
753#undef BRD1_OK
754}
755
756static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
757{
758
759	struct fib_result       res;
760	struct flowi            fl = { .mark = frn->fl_mark,
761				       .nl_u = { .ip4_u = { .daddr = frn->fl_addr,
762							    .tos = frn->fl_tos,
763							    .scope = frn->fl_scope } } };
764
765#ifdef CONFIG_IP_MULTIPLE_TABLES
766	res.r = NULL;
767#endif
768
769	frn->err = -ENOENT;
770	if (tb) {
771		local_bh_disable();
772
773		frn->tb_id = tb->tb_id;
774		frn->err = tb->tb_lookup(tb, &fl, &res);
775
776		if (!frn->err) {
777			frn->prefixlen = res.prefixlen;
778			frn->nh_sel = res.nh_sel;
779			frn->type = res.type;
780			frn->scope = res.scope;
781			fib_res_put(&res);
782		}
783		local_bh_enable();
784	}
785}
786
787static void nl_fib_input(struct sock *sk, int len)
788{
789	struct sk_buff *skb = NULL;
790	struct nlmsghdr *nlh = NULL;
791	struct fib_result_nl *frn;
792	u32 pid;
793	struct fib_table *tb;
794
795	skb = skb_dequeue(&sk->sk_receive_queue);
796	if (skb == NULL)
797		return;
798
799	nlh = nlmsg_hdr(skb);
800	if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
801	    nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn))) {
802		kfree_skb(skb);
803		return;
804	}
805
806	frn = (struct fib_result_nl *) NLMSG_DATA(nlh);
807	tb = fib_get_table(frn->tb_id_in);
808
809	nl_fib_lookup(frn, tb);
810
811	pid = NETLINK_CB(skb).pid;       /* pid of sending process */
812	NETLINK_CB(skb).pid = 0;         /* from kernel */
813	NETLINK_CB(skb).dst_group = 0;  /* unicast */
814	netlink_unicast(sk, skb, pid, MSG_DONTWAIT);
815}
816
817static void nl_fib_lookup_init(void)
818{
819      netlink_kernel_create(NETLINK_FIB_LOOKUP, 0, nl_fib_input, NULL,
820      			    THIS_MODULE);
821}
822
823static void fib_disable_ip(struct net_device *dev, int force)
824{
825	if (fib_sync_down(0, dev, force))
826		fib_flush();
827	rt_cache_flush(0);
828	arp_ifdown(dev);
829}
830
831static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
832{
833	struct in_ifaddr *ifa = (struct in_ifaddr*)ptr;
834
835	switch (event) {
836	case NETDEV_UP:
837		fib_add_ifaddr(ifa);
838#ifdef CONFIG_IP_ROUTE_MULTIPATH
839		fib_sync_up(ifa->ifa_dev->dev);
840#endif
841		rt_cache_flush(-1);
842		break;
843	case NETDEV_DOWN:
844		fib_del_ifaddr(ifa);
845		if (ifa->ifa_dev->ifa_list == NULL) {
846			/* Last address was deleted from this interface.
847			   Disable IP.
848			 */
849			fib_disable_ip(ifa->ifa_dev->dev, 1);
850		} else {
851			rt_cache_flush(-1);
852		}
853		break;
854	}
855	return NOTIFY_DONE;
856}
857
858static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
859{
860	struct net_device *dev = ptr;
861	struct in_device *in_dev = __in_dev_get_rtnl(dev);
862
863	if (event == NETDEV_UNREGISTER) {
864		fib_disable_ip(dev, 2);
865		return NOTIFY_DONE;
866	}
867
868	if (!in_dev)
869		return NOTIFY_DONE;
870
871	switch (event) {
872	case NETDEV_UP:
873		for_ifa(in_dev) {
874			fib_add_ifaddr(ifa);
875		} endfor_ifa(in_dev);
876#ifdef CONFIG_IP_ROUTE_MULTIPATH
877		fib_sync_up(dev);
878#endif
879		rt_cache_flush(-1);
880		break;
881	case NETDEV_DOWN:
882		fib_disable_ip(dev, 0);
883		break;
884	case NETDEV_CHANGEMTU:
885	case NETDEV_CHANGE:
886		rt_cache_flush(0);
887		break;
888	}
889	return NOTIFY_DONE;
890}
891
892static struct notifier_block fib_inetaddr_notifier = {
893	.notifier_call =fib_inetaddr_event,
894};
895
896static struct notifier_block fib_netdev_notifier = {
897	.notifier_call =fib_netdev_event,
898};
899
900void __init ip_fib_init(void)
901{
902	unsigned int i;
903
904	for (i = 0; i < FIB_TABLE_HASHSZ; i++)
905		INIT_HLIST_HEAD(&fib_table_hash[i]);
906#ifndef CONFIG_IP_MULTIPLE_TABLES
907	ip_fib_local_table = fib_hash_init(RT_TABLE_LOCAL);
908	hlist_add_head_rcu(&ip_fib_local_table->tb_hlist, &fib_table_hash[0]);
909	ip_fib_main_table  = fib_hash_init(RT_TABLE_MAIN);
910	hlist_add_head_rcu(&ip_fib_main_table->tb_hlist, &fib_table_hash[0]);
911#else
912	fib4_rules_init();
913#endif
914
915	register_netdevice_notifier(&fib_netdev_notifier);
916	register_inetaddr_notifier(&fib_inetaddr_notifier);
917	nl_fib_lookup_init();
918
919	rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL);
920	rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL);
921	rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib);
922}
923
924EXPORT_SYMBOL(inet_addr_type);
925EXPORT_SYMBOL(ip_dev_find);
926