fib_frontend.c revision 63f3444fb9a54c024d55f1205f8b94e7d2786595
1/*
2 * INET		An implementation of the TCP/IP protocol suite for the LINUX
3 *		operating system.  INET is implemented using the  BSD Socket
4 *		interface as the means of communication with the user level.
5 *
6 *		IPv4 Forwarding Information Base: FIB frontend.
7 *
8 * Version:	$Id: fib_frontend.c,v 1.26 2001/10/31 21:55:54 davem Exp $
9 *
10 * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
11 *
12 *		This program is free software; you can redistribute it and/or
13 *		modify it under the terms of the GNU General Public License
14 *		as published by the Free Software Foundation; either version
15 *		2 of the License, or (at your option) any later version.
16 */
17
18#include <linux/module.h>
19#include <asm/uaccess.h>
20#include <asm/system.h>
21#include <linux/bitops.h>
22#include <linux/capability.h>
23#include <linux/types.h>
24#include <linux/kernel.h>
25#include <linux/mm.h>
26#include <linux/string.h>
27#include <linux/socket.h>
28#include <linux/sockios.h>
29#include <linux/errno.h>
30#include <linux/in.h>
31#include <linux/inet.h>
32#include <linux/inetdevice.h>
33#include <linux/netdevice.h>
34#include <linux/if_addr.h>
35#include <linux/if_arp.h>
36#include <linux/skbuff.h>
37#include <linux/init.h>
38#include <linux/list.h>
39
40#include <net/ip.h>
41#include <net/protocol.h>
42#include <net/route.h>
43#include <net/tcp.h>
44#include <net/sock.h>
45#include <net/icmp.h>
46#include <net/arp.h>
47#include <net/ip_fib.h>
48#include <net/rtnetlink.h>
49
50#define FFprint(a...) printk(KERN_DEBUG a)
51
52#ifndef CONFIG_IP_MULTIPLE_TABLES
53
54struct fib_table *ip_fib_local_table;
55struct fib_table *ip_fib_main_table;
56
57#define FIB_TABLE_HASHSZ 1
58static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
59
60#else
61
62#define FIB_TABLE_HASHSZ 256
63static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
64
65struct fib_table *fib_new_table(u32 id)
66{
67	struct fib_table *tb;
68	unsigned int h;
69
70	if (id == 0)
71		id = RT_TABLE_MAIN;
72	tb = fib_get_table(id);
73	if (tb)
74		return tb;
75	tb = fib_hash_init(id);
76	if (!tb)
77		return NULL;
78	h = id & (FIB_TABLE_HASHSZ - 1);
79	hlist_add_head_rcu(&tb->tb_hlist, &fib_table_hash[h]);
80	return tb;
81}
82
83struct fib_table *fib_get_table(u32 id)
84{
85	struct fib_table *tb;
86	struct hlist_node *node;
87	unsigned int h;
88
89	if (id == 0)
90		id = RT_TABLE_MAIN;
91	h = id & (FIB_TABLE_HASHSZ - 1);
92	rcu_read_lock();
93	hlist_for_each_entry_rcu(tb, node, &fib_table_hash[h], tb_hlist) {
94		if (tb->tb_id == id) {
95			rcu_read_unlock();
96			return tb;
97		}
98	}
99	rcu_read_unlock();
100	return NULL;
101}
102#endif /* CONFIG_IP_MULTIPLE_TABLES */
103
104static void fib_flush(void)
105{
106	int flushed = 0;
107	struct fib_table *tb;
108	struct hlist_node *node;
109	unsigned int h;
110
111	for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
112		hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist)
113			flushed += tb->tb_flush(tb);
114	}
115
116	if (flushed)
117		rt_cache_flush(-1);
118}
119
120/*
121 *	Find the first device with a given source address.
122 */
123
124struct net_device * ip_dev_find(__be32 addr)
125{
126	struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
127	struct fib_result res;
128	struct net_device *dev = NULL;
129
130#ifdef CONFIG_IP_MULTIPLE_TABLES
131	res.r = NULL;
132#endif
133
134	if (!ip_fib_local_table ||
135	    ip_fib_local_table->tb_lookup(ip_fib_local_table, &fl, &res))
136		return NULL;
137	if (res.type != RTN_LOCAL)
138		goto out;
139	dev = FIB_RES_DEV(res);
140
141	if (dev)
142		dev_hold(dev);
143out:
144	fib_res_put(&res);
145	return dev;
146}
147
148unsigned inet_addr_type(__be32 addr)
149{
150	struct flowi		fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
151	struct fib_result	res;
152	unsigned ret = RTN_BROADCAST;
153
154	if (ZERONET(addr) || BADCLASS(addr))
155		return RTN_BROADCAST;
156	if (MULTICAST(addr))
157		return RTN_MULTICAST;
158
159#ifdef CONFIG_IP_MULTIPLE_TABLES
160	res.r = NULL;
161#endif
162
163	if (ip_fib_local_table) {
164		ret = RTN_UNICAST;
165		if (!ip_fib_local_table->tb_lookup(ip_fib_local_table,
166						   &fl, &res)) {
167			ret = res.type;
168			fib_res_put(&res);
169		}
170	}
171	return ret;
172}
173
174/* Given (packet source, input interface) and optional (dst, oif, tos):
175   - (main) check, that source is valid i.e. not broadcast or our local
176     address.
177   - figure out what "logical" interface this packet arrived
178     and calculate "specific destination" address.
179   - check, that packet arrived from expected physical interface.
180 */
181
182int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
183			struct net_device *dev, __be32 *spec_dst, u32 *itag)
184{
185	struct in_device *in_dev;
186	struct flowi fl = { .nl_u = { .ip4_u =
187				      { .daddr = src,
188					.saddr = dst,
189					.tos = tos } },
190			    .iif = oif };
191	struct fib_result res;
192	int no_addr, rpf;
193	int ret;
194
195	no_addr = rpf = 0;
196	rcu_read_lock();
197	in_dev = __in_dev_get_rcu(dev);
198	if (in_dev) {
199		no_addr = in_dev->ifa_list == NULL;
200		rpf = IN_DEV_RPFILTER(in_dev);
201	}
202	rcu_read_unlock();
203
204	if (in_dev == NULL)
205		goto e_inval;
206
207	if (fib_lookup(&fl, &res))
208		goto last_resort;
209	if (res.type != RTN_UNICAST)
210		goto e_inval_res;
211	*spec_dst = FIB_RES_PREFSRC(res);
212	fib_combine_itag(itag, &res);
213#ifdef CONFIG_IP_ROUTE_MULTIPATH
214	if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1)
215#else
216	if (FIB_RES_DEV(res) == dev)
217#endif
218	{
219		ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
220		fib_res_put(&res);
221		return ret;
222	}
223	fib_res_put(&res);
224	if (no_addr)
225		goto last_resort;
226	if (rpf)
227		goto e_inval;
228	fl.oif = dev->ifindex;
229
230	ret = 0;
231	if (fib_lookup(&fl, &res) == 0) {
232		if (res.type == RTN_UNICAST) {
233			*spec_dst = FIB_RES_PREFSRC(res);
234			ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
235		}
236		fib_res_put(&res);
237	}
238	return ret;
239
240last_resort:
241	if (rpf)
242		goto e_inval;
243	*spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
244	*itag = 0;
245	return 0;
246
247e_inval_res:
248	fib_res_put(&res);
249e_inval:
250	return -EINVAL;
251}
252
253#ifndef CONFIG_IP_NOSIOCRT
254
255static inline __be32 sk_extract_addr(struct sockaddr *addr)
256{
257	return ((struct sockaddr_in *) addr)->sin_addr.s_addr;
258}
259
260static int put_rtax(struct nlattr *mx, int len, int type, u32 value)
261{
262	struct nlattr *nla;
263
264	nla = (struct nlattr *) ((char *) mx + len);
265	nla->nla_type = type;
266	nla->nla_len = nla_attr_size(4);
267	*(u32 *) nla_data(nla) = value;
268
269	return len + nla_total_size(4);
270}
271
272static int rtentry_to_fib_config(int cmd, struct rtentry *rt,
273				 struct fib_config *cfg)
274{
275	__be32 addr;
276	int plen;
277
278	memset(cfg, 0, sizeof(*cfg));
279
280	if (rt->rt_dst.sa_family != AF_INET)
281		return -EAFNOSUPPORT;
282
283	/*
284	 * Check mask for validity:
285	 * a) it must be contiguous.
286	 * b) destination must have all host bits clear.
287	 * c) if application forgot to set correct family (AF_INET),
288	 *    reject request unless it is absolutely clear i.e.
289	 *    both family and mask are zero.
290	 */
291	plen = 32;
292	addr = sk_extract_addr(&rt->rt_dst);
293	if (!(rt->rt_flags & RTF_HOST)) {
294		__be32 mask = sk_extract_addr(&rt->rt_genmask);
295
296		if (rt->rt_genmask.sa_family != AF_INET) {
297			if (mask || rt->rt_genmask.sa_family)
298				return -EAFNOSUPPORT;
299		}
300
301		if (bad_mask(mask, addr))
302			return -EINVAL;
303
304		plen = inet_mask_len(mask);
305	}
306
307	cfg->fc_dst_len = plen;
308	cfg->fc_dst = addr;
309
310	if (cmd != SIOCDELRT) {
311		cfg->fc_nlflags = NLM_F_CREATE;
312		cfg->fc_protocol = RTPROT_BOOT;
313	}
314
315	if (rt->rt_metric)
316		cfg->fc_priority = rt->rt_metric - 1;
317
318	if (rt->rt_flags & RTF_REJECT) {
319		cfg->fc_scope = RT_SCOPE_HOST;
320		cfg->fc_type = RTN_UNREACHABLE;
321		return 0;
322	}
323
324	cfg->fc_scope = RT_SCOPE_NOWHERE;
325	cfg->fc_type = RTN_UNICAST;
326
327	if (rt->rt_dev) {
328		char *colon;
329		struct net_device *dev;
330		char devname[IFNAMSIZ];
331
332		if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1))
333			return -EFAULT;
334
335		devname[IFNAMSIZ-1] = 0;
336		colon = strchr(devname, ':');
337		if (colon)
338			*colon = 0;
339		dev = __dev_get_by_name(devname);
340		if (!dev)
341			return -ENODEV;
342		cfg->fc_oif = dev->ifindex;
343		if (colon) {
344			struct in_ifaddr *ifa;
345			struct in_device *in_dev = __in_dev_get_rtnl(dev);
346			if (!in_dev)
347				return -ENODEV;
348			*colon = ':';
349			for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
350				if (strcmp(ifa->ifa_label, devname) == 0)
351					break;
352			if (ifa == NULL)
353				return -ENODEV;
354			cfg->fc_prefsrc = ifa->ifa_local;
355		}
356	}
357
358	addr = sk_extract_addr(&rt->rt_gateway);
359	if (rt->rt_gateway.sa_family == AF_INET && addr) {
360		cfg->fc_gw = addr;
361		if (rt->rt_flags & RTF_GATEWAY &&
362		    inet_addr_type(addr) == RTN_UNICAST)
363			cfg->fc_scope = RT_SCOPE_UNIVERSE;
364	}
365
366	if (cmd == SIOCDELRT)
367		return 0;
368
369	if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw)
370		return -EINVAL;
371
372	if (cfg->fc_scope == RT_SCOPE_NOWHERE)
373		cfg->fc_scope = RT_SCOPE_LINK;
374
375	if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) {
376		struct nlattr *mx;
377		int len = 0;
378
379		mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL);
380		if (mx == NULL)
381			return -ENOMEM;
382
383		if (rt->rt_flags & RTF_MTU)
384			len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40);
385
386		if (rt->rt_flags & RTF_WINDOW)
387			len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window);
388
389		if (rt->rt_flags & RTF_IRTT)
390			len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3);
391
392		cfg->fc_mx = mx;
393		cfg->fc_mx_len = len;
394	}
395
396	return 0;
397}
398
399/*
400 *	Handle IP routing ioctl calls. These are used to manipulate the routing tables
401 */
402
403int ip_rt_ioctl(unsigned int cmd, void __user *arg)
404{
405	struct fib_config cfg;
406	struct rtentry rt;
407	int err;
408
409	switch (cmd) {
410	case SIOCADDRT:		/* Add a route */
411	case SIOCDELRT:		/* Delete a route */
412		if (!capable(CAP_NET_ADMIN))
413			return -EPERM;
414
415		if (copy_from_user(&rt, arg, sizeof(rt)))
416			return -EFAULT;
417
418		rtnl_lock();
419		err = rtentry_to_fib_config(cmd, &rt, &cfg);
420		if (err == 0) {
421			struct fib_table *tb;
422
423			if (cmd == SIOCDELRT) {
424				tb = fib_get_table(cfg.fc_table);
425				if (tb)
426					err = tb->tb_delete(tb, &cfg);
427				else
428					err = -ESRCH;
429			} else {
430				tb = fib_new_table(cfg.fc_table);
431				if (tb)
432					err = tb->tb_insert(tb, &cfg);
433				else
434					err = -ENOBUFS;
435			}
436
437			/* allocated by rtentry_to_fib_config() */
438			kfree(cfg.fc_mx);
439		}
440		rtnl_unlock();
441		return err;
442	}
443	return -EINVAL;
444}
445
446#else
447
448int ip_rt_ioctl(unsigned int cmd, void *arg)
449{
450	return -EINVAL;
451}
452
453#endif
454
455struct nla_policy rtm_ipv4_policy[RTA_MAX+1] __read_mostly = {
456	[RTA_DST]		= { .type = NLA_U32 },
457	[RTA_SRC]		= { .type = NLA_U32 },
458	[RTA_IIF]		= { .type = NLA_U32 },
459	[RTA_OIF]		= { .type = NLA_U32 },
460	[RTA_GATEWAY]		= { .type = NLA_U32 },
461	[RTA_PRIORITY]		= { .type = NLA_U32 },
462	[RTA_PREFSRC]		= { .type = NLA_U32 },
463	[RTA_METRICS]		= { .type = NLA_NESTED },
464	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
465	[RTA_PROTOINFO]		= { .type = NLA_U32 },
466	[RTA_FLOW]		= { .type = NLA_U32 },
467	[RTA_MP_ALGO]		= { .type = NLA_U32 },
468};
469
470static int rtm_to_fib_config(struct sk_buff *skb, struct nlmsghdr *nlh,
471			     struct fib_config *cfg)
472{
473	struct nlattr *attr;
474	int err, remaining;
475	struct rtmsg *rtm;
476
477	err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy);
478	if (err < 0)
479		goto errout;
480
481	memset(cfg, 0, sizeof(*cfg));
482
483	rtm = nlmsg_data(nlh);
484	cfg->fc_dst_len = rtm->rtm_dst_len;
485	cfg->fc_tos = rtm->rtm_tos;
486	cfg->fc_table = rtm->rtm_table;
487	cfg->fc_protocol = rtm->rtm_protocol;
488	cfg->fc_scope = rtm->rtm_scope;
489	cfg->fc_type = rtm->rtm_type;
490	cfg->fc_flags = rtm->rtm_flags;
491	cfg->fc_nlflags = nlh->nlmsg_flags;
492
493	cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
494	cfg->fc_nlinfo.nlh = nlh;
495
496	if (cfg->fc_type > RTN_MAX) {
497		err = -EINVAL;
498		goto errout;
499	}
500
501	nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) {
502		switch (attr->nla_type) {
503		case RTA_DST:
504			cfg->fc_dst = nla_get_be32(attr);
505			break;
506		case RTA_OIF:
507			cfg->fc_oif = nla_get_u32(attr);
508			break;
509		case RTA_GATEWAY:
510			cfg->fc_gw = nla_get_be32(attr);
511			break;
512		case RTA_PRIORITY:
513			cfg->fc_priority = nla_get_u32(attr);
514			break;
515		case RTA_PREFSRC:
516			cfg->fc_prefsrc = nla_get_be32(attr);
517			break;
518		case RTA_METRICS:
519			cfg->fc_mx = nla_data(attr);
520			cfg->fc_mx_len = nla_len(attr);
521			break;
522		case RTA_MULTIPATH:
523			cfg->fc_mp = nla_data(attr);
524			cfg->fc_mp_len = nla_len(attr);
525			break;
526		case RTA_FLOW:
527			cfg->fc_flow = nla_get_u32(attr);
528			break;
529		case RTA_MP_ALGO:
530			cfg->fc_mp_alg = nla_get_u32(attr);
531			break;
532		case RTA_TABLE:
533			cfg->fc_table = nla_get_u32(attr);
534			break;
535		}
536	}
537
538	return 0;
539errout:
540	return err;
541}
542
543static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
544{
545	struct fib_config cfg;
546	struct fib_table *tb;
547	int err;
548
549	err = rtm_to_fib_config(skb, nlh, &cfg);
550	if (err < 0)
551		goto errout;
552
553	tb = fib_get_table(cfg.fc_table);
554	if (tb == NULL) {
555		err = -ESRCH;
556		goto errout;
557	}
558
559	err = tb->tb_delete(tb, &cfg);
560errout:
561	return err;
562}
563
564static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
565{
566	struct fib_config cfg;
567	struct fib_table *tb;
568	int err;
569
570	err = rtm_to_fib_config(skb, nlh, &cfg);
571	if (err < 0)
572		goto errout;
573
574	tb = fib_new_table(cfg.fc_table);
575	if (tb == NULL) {
576		err = -ENOBUFS;
577		goto errout;
578	}
579
580	err = tb->tb_insert(tb, &cfg);
581errout:
582	return err;
583}
584
585static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
586{
587	unsigned int h, s_h;
588	unsigned int e = 0, s_e;
589	struct fib_table *tb;
590	struct hlist_node *node;
591	int dumped = 0;
592
593	if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
594	    ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
595		return ip_rt_dump(skb, cb);
596
597	s_h = cb->args[0];
598	s_e = cb->args[1];
599
600	for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
601		e = 0;
602		hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist) {
603			if (e < s_e)
604				goto next;
605			if (dumped)
606				memset(&cb->args[2], 0, sizeof(cb->args) -
607						 2 * sizeof(cb->args[0]));
608			if (tb->tb_dump(tb, skb, cb) < 0)
609				goto out;
610			dumped = 1;
611next:
612			e++;
613		}
614	}
615out:
616	cb->args[1] = e;
617	cb->args[0] = h;
618
619	return skb->len;
620}
621
622/* Prepare and feed intra-kernel routing request.
623   Really, it should be netlink message, but :-( netlink
624   can be not configured, so that we feed it directly
625   to fib engine. It is legal, because all events occur
626   only when netlink is already locked.
627 */
628
629static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
630{
631	struct fib_table *tb;
632	struct fib_config cfg = {
633		.fc_protocol = RTPROT_KERNEL,
634		.fc_type = type,
635		.fc_dst = dst,
636		.fc_dst_len = dst_len,
637		.fc_prefsrc = ifa->ifa_local,
638		.fc_oif = ifa->ifa_dev->dev->ifindex,
639		.fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
640	};
641
642	if (type == RTN_UNICAST)
643		tb = fib_new_table(RT_TABLE_MAIN);
644	else
645		tb = fib_new_table(RT_TABLE_LOCAL);
646
647	if (tb == NULL)
648		return;
649
650	cfg.fc_table = tb->tb_id;
651
652	if (type != RTN_LOCAL)
653		cfg.fc_scope = RT_SCOPE_LINK;
654	else
655		cfg.fc_scope = RT_SCOPE_HOST;
656
657	if (cmd == RTM_NEWROUTE)
658		tb->tb_insert(tb, &cfg);
659	else
660		tb->tb_delete(tb, &cfg);
661}
662
663void fib_add_ifaddr(struct in_ifaddr *ifa)
664{
665	struct in_device *in_dev = ifa->ifa_dev;
666	struct net_device *dev = in_dev->dev;
667	struct in_ifaddr *prim = ifa;
668	__be32 mask = ifa->ifa_mask;
669	__be32 addr = ifa->ifa_local;
670	__be32 prefix = ifa->ifa_address&mask;
671
672	if (ifa->ifa_flags&IFA_F_SECONDARY) {
673		prim = inet_ifa_byprefix(in_dev, prefix, mask);
674		if (prim == NULL) {
675			printk(KERN_DEBUG "fib_add_ifaddr: bug: prim == NULL\n");
676			return;
677		}
678	}
679
680	fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
681
682	if (!(dev->flags&IFF_UP))
683		return;
684
685	/* Add broadcast address, if it is explicitly assigned. */
686	if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
687		fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
688
689	if (!ZERONET(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
690	    (prefix != addr || ifa->ifa_prefixlen < 32)) {
691		fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
692			  RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim);
693
694		/* Add network specific broadcasts, when it takes a sense */
695		if (ifa->ifa_prefixlen < 31) {
696			fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
697			fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim);
698		}
699	}
700}
701
702static void fib_del_ifaddr(struct in_ifaddr *ifa)
703{
704	struct in_device *in_dev = ifa->ifa_dev;
705	struct net_device *dev = in_dev->dev;
706	struct in_ifaddr *ifa1;
707	struct in_ifaddr *prim = ifa;
708	__be32 brd = ifa->ifa_address|~ifa->ifa_mask;
709	__be32 any = ifa->ifa_address&ifa->ifa_mask;
710#define LOCAL_OK	1
711#define BRD_OK		2
712#define BRD0_OK		4
713#define BRD1_OK		8
714	unsigned ok = 0;
715
716	if (!(ifa->ifa_flags&IFA_F_SECONDARY))
717		fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
718			  RTN_UNICAST, any, ifa->ifa_prefixlen, prim);
719	else {
720		prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
721		if (prim == NULL) {
722			printk(KERN_DEBUG "fib_del_ifaddr: bug: prim == NULL\n");
723			return;
724		}
725	}
726
727	/* Deletion is more complicated than add.
728	   We should take care of not to delete too much :-)
729
730	   Scan address list to be sure that addresses are really gone.
731	 */
732
733	for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
734		if (ifa->ifa_local == ifa1->ifa_local)
735			ok |= LOCAL_OK;
736		if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
737			ok |= BRD_OK;
738		if (brd == ifa1->ifa_broadcast)
739			ok |= BRD1_OK;
740		if (any == ifa1->ifa_broadcast)
741			ok |= BRD0_OK;
742	}
743
744	if (!(ok&BRD_OK))
745		fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
746	if (!(ok&BRD1_OK))
747		fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
748	if (!(ok&BRD0_OK))
749		fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
750	if (!(ok&LOCAL_OK)) {
751		fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
752
753		/* Check, that this local address finally disappeared. */
754		if (inet_addr_type(ifa->ifa_local) != RTN_LOCAL) {
755			/* And the last, but not the least thing.
756			   We must flush stray FIB entries.
757
758			   First of all, we scan fib_info list searching
759			   for stray nexthop entries, then ignite fib_flush.
760			*/
761			if (fib_sync_down(ifa->ifa_local, NULL, 0))
762				fib_flush();
763		}
764	}
765#undef LOCAL_OK
766#undef BRD_OK
767#undef BRD0_OK
768#undef BRD1_OK
769}
770
771static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
772{
773
774	struct fib_result       res;
775	struct flowi            fl = { .mark = frn->fl_mark,
776				       .nl_u = { .ip4_u = { .daddr = frn->fl_addr,
777							    .tos = frn->fl_tos,
778							    .scope = frn->fl_scope } } };
779
780	frn->err = -ENOENT;
781	if (tb) {
782		local_bh_disable();
783
784		frn->tb_id = tb->tb_id;
785		frn->err = tb->tb_lookup(tb, &fl, &res);
786
787		if (!frn->err) {
788			frn->prefixlen = res.prefixlen;
789			frn->nh_sel = res.nh_sel;
790			frn->type = res.type;
791			frn->scope = res.scope;
792			fib_res_put(&res);
793		}
794		local_bh_enable();
795	}
796}
797
798static void nl_fib_input(struct sock *sk, int len)
799{
800	struct sk_buff *skb = NULL;
801	struct nlmsghdr *nlh = NULL;
802	struct fib_result_nl *frn;
803	u32 pid;
804	struct fib_table *tb;
805
806	skb = skb_dequeue(&sk->sk_receive_queue);
807	if (skb == NULL)
808		return;
809
810	nlh = nlmsg_hdr(skb);
811	if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
812	    nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn))) {
813		kfree_skb(skb);
814		return;
815	}
816
817	frn = (struct fib_result_nl *) NLMSG_DATA(nlh);
818	tb = fib_get_table(frn->tb_id_in);
819
820	nl_fib_lookup(frn, tb);
821
822	pid = NETLINK_CB(skb).pid;       /* pid of sending process */
823	NETLINK_CB(skb).pid = 0;         /* from kernel */
824	NETLINK_CB(skb).dst_group = 0;  /* unicast */
825	netlink_unicast(sk, skb, pid, MSG_DONTWAIT);
826}
827
828static void nl_fib_lookup_init(void)
829{
830      netlink_kernel_create(NETLINK_FIB_LOOKUP, 0, nl_fib_input, THIS_MODULE);
831}
832
833static void fib_disable_ip(struct net_device *dev, int force)
834{
835	if (fib_sync_down(0, dev, force))
836		fib_flush();
837	rt_cache_flush(0);
838	arp_ifdown(dev);
839}
840
841static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
842{
843	struct in_ifaddr *ifa = (struct in_ifaddr*)ptr;
844
845	switch (event) {
846	case NETDEV_UP:
847		fib_add_ifaddr(ifa);
848#ifdef CONFIG_IP_ROUTE_MULTIPATH
849		fib_sync_up(ifa->ifa_dev->dev);
850#endif
851		rt_cache_flush(-1);
852		break;
853	case NETDEV_DOWN:
854		fib_del_ifaddr(ifa);
855		if (ifa->ifa_dev->ifa_list == NULL) {
856			/* Last address was deleted from this interface.
857			   Disable IP.
858			 */
859			fib_disable_ip(ifa->ifa_dev->dev, 1);
860		} else {
861			rt_cache_flush(-1);
862		}
863		break;
864	}
865	return NOTIFY_DONE;
866}
867
868static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
869{
870	struct net_device *dev = ptr;
871	struct in_device *in_dev = __in_dev_get_rtnl(dev);
872
873	if (event == NETDEV_UNREGISTER) {
874		fib_disable_ip(dev, 2);
875		return NOTIFY_DONE;
876	}
877
878	if (!in_dev)
879		return NOTIFY_DONE;
880
881	switch (event) {
882	case NETDEV_UP:
883		for_ifa(in_dev) {
884			fib_add_ifaddr(ifa);
885		} endfor_ifa(in_dev);
886#ifdef CONFIG_IP_ROUTE_MULTIPATH
887		fib_sync_up(dev);
888#endif
889		rt_cache_flush(-1);
890		break;
891	case NETDEV_DOWN:
892		fib_disable_ip(dev, 0);
893		break;
894	case NETDEV_CHANGEMTU:
895	case NETDEV_CHANGE:
896		rt_cache_flush(0);
897		break;
898	}
899	return NOTIFY_DONE;
900}
901
902static struct notifier_block fib_inetaddr_notifier = {
903	.notifier_call =fib_inetaddr_event,
904};
905
906static struct notifier_block fib_netdev_notifier = {
907	.notifier_call =fib_netdev_event,
908};
909
910void __init ip_fib_init(void)
911{
912	unsigned int i;
913
914	for (i = 0; i < FIB_TABLE_HASHSZ; i++)
915		INIT_HLIST_HEAD(&fib_table_hash[i]);
916#ifndef CONFIG_IP_MULTIPLE_TABLES
917	ip_fib_local_table = fib_hash_init(RT_TABLE_LOCAL);
918	hlist_add_head_rcu(&ip_fib_local_table->tb_hlist, &fib_table_hash[0]);
919	ip_fib_main_table  = fib_hash_init(RT_TABLE_MAIN);
920	hlist_add_head_rcu(&ip_fib_main_table->tb_hlist, &fib_table_hash[0]);
921#else
922	fib4_rules_init();
923#endif
924
925	register_netdevice_notifier(&fib_netdev_notifier);
926	register_inetaddr_notifier(&fib_inetaddr_notifier);
927	nl_fib_lookup_init();
928
929	rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL);
930	rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL);
931	rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib);
932}
933
934EXPORT_SYMBOL(inet_addr_type);
935EXPORT_SYMBOL(ip_dev_find);
936