fib_frontend.c revision b854272b3c732316676e9128f7b9e6f1e1ff88b0
1/*
2 * INET		An implementation of the TCP/IP protocol suite for the LINUX
3 *		operating system.  INET is implemented using the  BSD Socket
4 *		interface as the means of communication with the user level.
5 *
6 *		IPv4 Forwarding Information Base: FIB frontend.
7 *
8 * Version:	$Id: fib_frontend.c,v 1.26 2001/10/31 21:55:54 davem Exp $
9 *
10 * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
11 *
12 *		This program is free software; you can redistribute it and/or
13 *		modify it under the terms of the GNU General Public License
14 *		as published by the Free Software Foundation; either version
15 *		2 of the License, or (at your option) any later version.
16 */
17
18#include <linux/module.h>
19#include <asm/uaccess.h>
20#include <asm/system.h>
21#include <linux/bitops.h>
22#include <linux/capability.h>
23#include <linux/types.h>
24#include <linux/kernel.h>
25#include <linux/mm.h>
26#include <linux/string.h>
27#include <linux/socket.h>
28#include <linux/sockios.h>
29#include <linux/errno.h>
30#include <linux/in.h>
31#include <linux/inet.h>
32#include <linux/inetdevice.h>
33#include <linux/netdevice.h>
34#include <linux/if_addr.h>
35#include <linux/if_arp.h>
36#include <linux/skbuff.h>
37#include <linux/init.h>
38#include <linux/list.h>
39
40#include <net/ip.h>
41#include <net/protocol.h>
42#include <net/route.h>
43#include <net/tcp.h>
44#include <net/sock.h>
45#include <net/icmp.h>
46#include <net/arp.h>
47#include <net/ip_fib.h>
48#include <net/rtnetlink.h>
49
50#define FFprint(a...) printk(KERN_DEBUG a)
51
52static struct sock *fibnl;
53
54#ifndef CONFIG_IP_MULTIPLE_TABLES
55
56struct fib_table *ip_fib_local_table;
57struct fib_table *ip_fib_main_table;
58
59#define FIB_TABLE_HASHSZ 1
60static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
61
62static void __init fib4_rules_init(void)
63{
64	ip_fib_local_table = fib_hash_init(RT_TABLE_LOCAL);
65	hlist_add_head_rcu(&ip_fib_local_table->tb_hlist, &fib_table_hash[0]);
66	ip_fib_main_table  = fib_hash_init(RT_TABLE_MAIN);
67	hlist_add_head_rcu(&ip_fib_main_table->tb_hlist, &fib_table_hash[0]);
68}
69#else
70
71#define FIB_TABLE_HASHSZ 256
72static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
73
74struct fib_table *fib_new_table(u32 id)
75{
76	struct fib_table *tb;
77	unsigned int h;
78
79	if (id == 0)
80		id = RT_TABLE_MAIN;
81	tb = fib_get_table(id);
82	if (tb)
83		return tb;
84	tb = fib_hash_init(id);
85	if (!tb)
86		return NULL;
87	h = id & (FIB_TABLE_HASHSZ - 1);
88	hlist_add_head_rcu(&tb->tb_hlist, &fib_table_hash[h]);
89	return tb;
90}
91
92struct fib_table *fib_get_table(u32 id)
93{
94	struct fib_table *tb;
95	struct hlist_node *node;
96	unsigned int h;
97
98	if (id == 0)
99		id = RT_TABLE_MAIN;
100	h = id & (FIB_TABLE_HASHSZ - 1);
101	rcu_read_lock();
102	hlist_for_each_entry_rcu(tb, node, &fib_table_hash[h], tb_hlist) {
103		if (tb->tb_id == id) {
104			rcu_read_unlock();
105			return tb;
106		}
107	}
108	rcu_read_unlock();
109	return NULL;
110}
111#endif /* CONFIG_IP_MULTIPLE_TABLES */
112
113static void fib_flush(void)
114{
115	int flushed = 0;
116	struct fib_table *tb;
117	struct hlist_node *node;
118	unsigned int h;
119
120	for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
121		hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist)
122			flushed += tb->tb_flush(tb);
123	}
124
125	if (flushed)
126		rt_cache_flush(-1);
127}
128
129/*
130 *	Find the first device with a given source address.
131 */
132
133struct net_device * ip_dev_find(__be32 addr)
134{
135	struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
136	struct fib_result res;
137	struct net_device *dev = NULL;
138	struct fib_table *local_table;
139
140#ifdef CONFIG_IP_MULTIPLE_TABLES
141	res.r = NULL;
142#endif
143
144	local_table = fib_get_table(RT_TABLE_LOCAL);
145	if (!local_table || local_table->tb_lookup(local_table, &fl, &res))
146		return NULL;
147	if (res.type != RTN_LOCAL)
148		goto out;
149	dev = FIB_RES_DEV(res);
150
151	if (dev)
152		dev_hold(dev);
153out:
154	fib_res_put(&res);
155	return dev;
156}
157
158unsigned inet_addr_type(__be32 addr)
159{
160	struct flowi		fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
161	struct fib_result	res;
162	unsigned ret = RTN_BROADCAST;
163	struct fib_table *local_table;
164
165	if (ZERONET(addr) || BADCLASS(addr))
166		return RTN_BROADCAST;
167	if (MULTICAST(addr))
168		return RTN_MULTICAST;
169
170#ifdef CONFIG_IP_MULTIPLE_TABLES
171	res.r = NULL;
172#endif
173
174	local_table = fib_get_table(RT_TABLE_LOCAL);
175	if (local_table) {
176		ret = RTN_UNICAST;
177		if (!local_table->tb_lookup(local_table, &fl, &res)) {
178			ret = res.type;
179			fib_res_put(&res);
180		}
181	}
182	return ret;
183}
184
185/* Given (packet source, input interface) and optional (dst, oif, tos):
186   - (main) check, that source is valid i.e. not broadcast or our local
187     address.
188   - figure out what "logical" interface this packet arrived
189     and calculate "specific destination" address.
190   - check, that packet arrived from expected physical interface.
191 */
192
193int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
194			struct net_device *dev, __be32 *spec_dst, u32 *itag)
195{
196	struct in_device *in_dev;
197	struct flowi fl = { .nl_u = { .ip4_u =
198				      { .daddr = src,
199					.saddr = dst,
200					.tos = tos } },
201			    .iif = oif };
202	struct fib_result res;
203	int no_addr, rpf;
204	int ret;
205
206	no_addr = rpf = 0;
207	rcu_read_lock();
208	in_dev = __in_dev_get_rcu(dev);
209	if (in_dev) {
210		no_addr = in_dev->ifa_list == NULL;
211		rpf = IN_DEV_RPFILTER(in_dev);
212	}
213	rcu_read_unlock();
214
215	if (in_dev == NULL)
216		goto e_inval;
217
218	if (fib_lookup(&fl, &res))
219		goto last_resort;
220	if (res.type != RTN_UNICAST)
221		goto e_inval_res;
222	*spec_dst = FIB_RES_PREFSRC(res);
223	fib_combine_itag(itag, &res);
224#ifdef CONFIG_IP_ROUTE_MULTIPATH
225	if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1)
226#else
227	if (FIB_RES_DEV(res) == dev)
228#endif
229	{
230		ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
231		fib_res_put(&res);
232		return ret;
233	}
234	fib_res_put(&res);
235	if (no_addr)
236		goto last_resort;
237	if (rpf)
238		goto e_inval;
239	fl.oif = dev->ifindex;
240
241	ret = 0;
242	if (fib_lookup(&fl, &res) == 0) {
243		if (res.type == RTN_UNICAST) {
244			*spec_dst = FIB_RES_PREFSRC(res);
245			ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
246		}
247		fib_res_put(&res);
248	}
249	return ret;
250
251last_resort:
252	if (rpf)
253		goto e_inval;
254	*spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
255	*itag = 0;
256	return 0;
257
258e_inval_res:
259	fib_res_put(&res);
260e_inval:
261	return -EINVAL;
262}
263
264static inline __be32 sk_extract_addr(struct sockaddr *addr)
265{
266	return ((struct sockaddr_in *) addr)->sin_addr.s_addr;
267}
268
269static int put_rtax(struct nlattr *mx, int len, int type, u32 value)
270{
271	struct nlattr *nla;
272
273	nla = (struct nlattr *) ((char *) mx + len);
274	nla->nla_type = type;
275	nla->nla_len = nla_attr_size(4);
276	*(u32 *) nla_data(nla) = value;
277
278	return len + nla_total_size(4);
279}
280
281static int rtentry_to_fib_config(int cmd, struct rtentry *rt,
282				 struct fib_config *cfg)
283{
284	__be32 addr;
285	int plen;
286
287	memset(cfg, 0, sizeof(*cfg));
288
289	if (rt->rt_dst.sa_family != AF_INET)
290		return -EAFNOSUPPORT;
291
292	/*
293	 * Check mask for validity:
294	 * a) it must be contiguous.
295	 * b) destination must have all host bits clear.
296	 * c) if application forgot to set correct family (AF_INET),
297	 *    reject request unless it is absolutely clear i.e.
298	 *    both family and mask are zero.
299	 */
300	plen = 32;
301	addr = sk_extract_addr(&rt->rt_dst);
302	if (!(rt->rt_flags & RTF_HOST)) {
303		__be32 mask = sk_extract_addr(&rt->rt_genmask);
304
305		if (rt->rt_genmask.sa_family != AF_INET) {
306			if (mask || rt->rt_genmask.sa_family)
307				return -EAFNOSUPPORT;
308		}
309
310		if (bad_mask(mask, addr))
311			return -EINVAL;
312
313		plen = inet_mask_len(mask);
314	}
315
316	cfg->fc_dst_len = plen;
317	cfg->fc_dst = addr;
318
319	if (cmd != SIOCDELRT) {
320		cfg->fc_nlflags = NLM_F_CREATE;
321		cfg->fc_protocol = RTPROT_BOOT;
322	}
323
324	if (rt->rt_metric)
325		cfg->fc_priority = rt->rt_metric - 1;
326
327	if (rt->rt_flags & RTF_REJECT) {
328		cfg->fc_scope = RT_SCOPE_HOST;
329		cfg->fc_type = RTN_UNREACHABLE;
330		return 0;
331	}
332
333	cfg->fc_scope = RT_SCOPE_NOWHERE;
334	cfg->fc_type = RTN_UNICAST;
335
336	if (rt->rt_dev) {
337		char *colon;
338		struct net_device *dev;
339		char devname[IFNAMSIZ];
340
341		if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1))
342			return -EFAULT;
343
344		devname[IFNAMSIZ-1] = 0;
345		colon = strchr(devname, ':');
346		if (colon)
347			*colon = 0;
348		dev = __dev_get_by_name(&init_net, devname);
349		if (!dev)
350			return -ENODEV;
351		cfg->fc_oif = dev->ifindex;
352		if (colon) {
353			struct in_ifaddr *ifa;
354			struct in_device *in_dev = __in_dev_get_rtnl(dev);
355			if (!in_dev)
356				return -ENODEV;
357			*colon = ':';
358			for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
359				if (strcmp(ifa->ifa_label, devname) == 0)
360					break;
361			if (ifa == NULL)
362				return -ENODEV;
363			cfg->fc_prefsrc = ifa->ifa_local;
364		}
365	}
366
367	addr = sk_extract_addr(&rt->rt_gateway);
368	if (rt->rt_gateway.sa_family == AF_INET && addr) {
369		cfg->fc_gw = addr;
370		if (rt->rt_flags & RTF_GATEWAY &&
371		    inet_addr_type(addr) == RTN_UNICAST)
372			cfg->fc_scope = RT_SCOPE_UNIVERSE;
373	}
374
375	if (cmd == SIOCDELRT)
376		return 0;
377
378	if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw)
379		return -EINVAL;
380
381	if (cfg->fc_scope == RT_SCOPE_NOWHERE)
382		cfg->fc_scope = RT_SCOPE_LINK;
383
384	if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) {
385		struct nlattr *mx;
386		int len = 0;
387
388		mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL);
389		if (mx == NULL)
390			return -ENOMEM;
391
392		if (rt->rt_flags & RTF_MTU)
393			len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40);
394
395		if (rt->rt_flags & RTF_WINDOW)
396			len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window);
397
398		if (rt->rt_flags & RTF_IRTT)
399			len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3);
400
401		cfg->fc_mx = mx;
402		cfg->fc_mx_len = len;
403	}
404
405	return 0;
406}
407
408/*
409 *	Handle IP routing ioctl calls. These are used to manipulate the routing tables
410 */
411
412int ip_rt_ioctl(unsigned int cmd, void __user *arg)
413{
414	struct fib_config cfg;
415	struct rtentry rt;
416	int err;
417
418	switch (cmd) {
419	case SIOCADDRT:		/* Add a route */
420	case SIOCDELRT:		/* Delete a route */
421		if (!capable(CAP_NET_ADMIN))
422			return -EPERM;
423
424		if (copy_from_user(&rt, arg, sizeof(rt)))
425			return -EFAULT;
426
427		rtnl_lock();
428		err = rtentry_to_fib_config(cmd, &rt, &cfg);
429		if (err == 0) {
430			struct fib_table *tb;
431
432			if (cmd == SIOCDELRT) {
433				tb = fib_get_table(cfg.fc_table);
434				if (tb)
435					err = tb->tb_delete(tb, &cfg);
436				else
437					err = -ESRCH;
438			} else {
439				tb = fib_new_table(cfg.fc_table);
440				if (tb)
441					err = tb->tb_insert(tb, &cfg);
442				else
443					err = -ENOBUFS;
444			}
445
446			/* allocated by rtentry_to_fib_config() */
447			kfree(cfg.fc_mx);
448		}
449		rtnl_unlock();
450		return err;
451	}
452	return -EINVAL;
453}
454
455const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = {
456	[RTA_DST]		= { .type = NLA_U32 },
457	[RTA_SRC]		= { .type = NLA_U32 },
458	[RTA_IIF]		= { .type = NLA_U32 },
459	[RTA_OIF]		= { .type = NLA_U32 },
460	[RTA_GATEWAY]		= { .type = NLA_U32 },
461	[RTA_PRIORITY]		= { .type = NLA_U32 },
462	[RTA_PREFSRC]		= { .type = NLA_U32 },
463	[RTA_METRICS]		= { .type = NLA_NESTED },
464	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
465	[RTA_PROTOINFO]		= { .type = NLA_U32 },
466	[RTA_FLOW]		= { .type = NLA_U32 },
467};
468
469static int rtm_to_fib_config(struct sk_buff *skb, struct nlmsghdr *nlh,
470			     struct fib_config *cfg)
471{
472	struct nlattr *attr;
473	int err, remaining;
474	struct rtmsg *rtm;
475
476	err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy);
477	if (err < 0)
478		goto errout;
479
480	memset(cfg, 0, sizeof(*cfg));
481
482	rtm = nlmsg_data(nlh);
483	cfg->fc_dst_len = rtm->rtm_dst_len;
484	cfg->fc_tos = rtm->rtm_tos;
485	cfg->fc_table = rtm->rtm_table;
486	cfg->fc_protocol = rtm->rtm_protocol;
487	cfg->fc_scope = rtm->rtm_scope;
488	cfg->fc_type = rtm->rtm_type;
489	cfg->fc_flags = rtm->rtm_flags;
490	cfg->fc_nlflags = nlh->nlmsg_flags;
491
492	cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
493	cfg->fc_nlinfo.nlh = nlh;
494
495	if (cfg->fc_type > RTN_MAX) {
496		err = -EINVAL;
497		goto errout;
498	}
499
500	nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) {
501		switch (nla_type(attr)) {
502		case RTA_DST:
503			cfg->fc_dst = nla_get_be32(attr);
504			break;
505		case RTA_OIF:
506			cfg->fc_oif = nla_get_u32(attr);
507			break;
508		case RTA_GATEWAY:
509			cfg->fc_gw = nla_get_be32(attr);
510			break;
511		case RTA_PRIORITY:
512			cfg->fc_priority = nla_get_u32(attr);
513			break;
514		case RTA_PREFSRC:
515			cfg->fc_prefsrc = nla_get_be32(attr);
516			break;
517		case RTA_METRICS:
518			cfg->fc_mx = nla_data(attr);
519			cfg->fc_mx_len = nla_len(attr);
520			break;
521		case RTA_MULTIPATH:
522			cfg->fc_mp = nla_data(attr);
523			cfg->fc_mp_len = nla_len(attr);
524			break;
525		case RTA_FLOW:
526			cfg->fc_flow = nla_get_u32(attr);
527			break;
528		case RTA_TABLE:
529			cfg->fc_table = nla_get_u32(attr);
530			break;
531		}
532	}
533
534	return 0;
535errout:
536	return err;
537}
538
539static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
540{
541	struct net *net = skb->sk->sk_net;
542	struct fib_config cfg;
543	struct fib_table *tb;
544	int err;
545
546	if (net != &init_net)
547		return -EINVAL;
548
549	err = rtm_to_fib_config(skb, nlh, &cfg);
550	if (err < 0)
551		goto errout;
552
553	tb = fib_get_table(cfg.fc_table);
554	if (tb == NULL) {
555		err = -ESRCH;
556		goto errout;
557	}
558
559	err = tb->tb_delete(tb, &cfg);
560errout:
561	return err;
562}
563
564static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
565{
566	struct net *net = skb->sk->sk_net;
567	struct fib_config cfg;
568	struct fib_table *tb;
569	int err;
570
571	if (net != &init_net)
572		return -EINVAL;
573
574	err = rtm_to_fib_config(skb, nlh, &cfg);
575	if (err < 0)
576		goto errout;
577
578	tb = fib_new_table(cfg.fc_table);
579	if (tb == NULL) {
580		err = -ENOBUFS;
581		goto errout;
582	}
583
584	err = tb->tb_insert(tb, &cfg);
585errout:
586	return err;
587}
588
589static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
590{
591	struct net *net = skb->sk->sk_net;
592	unsigned int h, s_h;
593	unsigned int e = 0, s_e;
594	struct fib_table *tb;
595	struct hlist_node *node;
596	int dumped = 0;
597
598	if (net != &init_net)
599		return 0;
600
601	if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
602	    ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
603		return ip_rt_dump(skb, cb);
604
605	s_h = cb->args[0];
606	s_e = cb->args[1];
607
608	for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
609		e = 0;
610		hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist) {
611			if (e < s_e)
612				goto next;
613			if (dumped)
614				memset(&cb->args[2], 0, sizeof(cb->args) -
615						 2 * sizeof(cb->args[0]));
616			if (tb->tb_dump(tb, skb, cb) < 0)
617				goto out;
618			dumped = 1;
619next:
620			e++;
621		}
622	}
623out:
624	cb->args[1] = e;
625	cb->args[0] = h;
626
627	return skb->len;
628}
629
630/* Prepare and feed intra-kernel routing request.
631   Really, it should be netlink message, but :-( netlink
632   can be not configured, so that we feed it directly
633   to fib engine. It is legal, because all events occur
634   only when netlink is already locked.
635 */
636
637static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
638{
639	struct fib_table *tb;
640	struct fib_config cfg = {
641		.fc_protocol = RTPROT_KERNEL,
642		.fc_type = type,
643		.fc_dst = dst,
644		.fc_dst_len = dst_len,
645		.fc_prefsrc = ifa->ifa_local,
646		.fc_oif = ifa->ifa_dev->dev->ifindex,
647		.fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
648	};
649
650	if (type == RTN_UNICAST)
651		tb = fib_new_table(RT_TABLE_MAIN);
652	else
653		tb = fib_new_table(RT_TABLE_LOCAL);
654
655	if (tb == NULL)
656		return;
657
658	cfg.fc_table = tb->tb_id;
659
660	if (type != RTN_LOCAL)
661		cfg.fc_scope = RT_SCOPE_LINK;
662	else
663		cfg.fc_scope = RT_SCOPE_HOST;
664
665	if (cmd == RTM_NEWROUTE)
666		tb->tb_insert(tb, &cfg);
667	else
668		tb->tb_delete(tb, &cfg);
669}
670
671void fib_add_ifaddr(struct in_ifaddr *ifa)
672{
673	struct in_device *in_dev = ifa->ifa_dev;
674	struct net_device *dev = in_dev->dev;
675	struct in_ifaddr *prim = ifa;
676	__be32 mask = ifa->ifa_mask;
677	__be32 addr = ifa->ifa_local;
678	__be32 prefix = ifa->ifa_address&mask;
679
680	if (ifa->ifa_flags&IFA_F_SECONDARY) {
681		prim = inet_ifa_byprefix(in_dev, prefix, mask);
682		if (prim == NULL) {
683			printk(KERN_DEBUG "fib_add_ifaddr: bug: prim == NULL\n");
684			return;
685		}
686	}
687
688	fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
689
690	if (!(dev->flags&IFF_UP))
691		return;
692
693	/* Add broadcast address, if it is explicitly assigned. */
694	if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
695		fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
696
697	if (!ZERONET(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
698	    (prefix != addr || ifa->ifa_prefixlen < 32)) {
699		fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
700			  RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim);
701
702		/* Add network specific broadcasts, when it takes a sense */
703		if (ifa->ifa_prefixlen < 31) {
704			fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
705			fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim);
706		}
707	}
708}
709
710static void fib_del_ifaddr(struct in_ifaddr *ifa)
711{
712	struct in_device *in_dev = ifa->ifa_dev;
713	struct net_device *dev = in_dev->dev;
714	struct in_ifaddr *ifa1;
715	struct in_ifaddr *prim = ifa;
716	__be32 brd = ifa->ifa_address|~ifa->ifa_mask;
717	__be32 any = ifa->ifa_address&ifa->ifa_mask;
718#define LOCAL_OK	1
719#define BRD_OK		2
720#define BRD0_OK		4
721#define BRD1_OK		8
722	unsigned ok = 0;
723
724	if (!(ifa->ifa_flags&IFA_F_SECONDARY))
725		fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
726			  RTN_UNICAST, any, ifa->ifa_prefixlen, prim);
727	else {
728		prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
729		if (prim == NULL) {
730			printk(KERN_DEBUG "fib_del_ifaddr: bug: prim == NULL\n");
731			return;
732		}
733	}
734
735	/* Deletion is more complicated than add.
736	   We should take care of not to delete too much :-)
737
738	   Scan address list to be sure that addresses are really gone.
739	 */
740
741	for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
742		if (ifa->ifa_local == ifa1->ifa_local)
743			ok |= LOCAL_OK;
744		if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
745			ok |= BRD_OK;
746		if (brd == ifa1->ifa_broadcast)
747			ok |= BRD1_OK;
748		if (any == ifa1->ifa_broadcast)
749			ok |= BRD0_OK;
750	}
751
752	if (!(ok&BRD_OK))
753		fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
754	if (!(ok&BRD1_OK))
755		fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
756	if (!(ok&BRD0_OK))
757		fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
758	if (!(ok&LOCAL_OK)) {
759		fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
760
761		/* Check, that this local address finally disappeared. */
762		if (inet_addr_type(ifa->ifa_local) != RTN_LOCAL) {
763			/* And the last, but not the least thing.
764			   We must flush stray FIB entries.
765
766			   First of all, we scan fib_info list searching
767			   for stray nexthop entries, then ignite fib_flush.
768			*/
769			if (fib_sync_down(ifa->ifa_local, NULL, 0))
770				fib_flush();
771		}
772	}
773#undef LOCAL_OK
774#undef BRD_OK
775#undef BRD0_OK
776#undef BRD1_OK
777}
778
779static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
780{
781
782	struct fib_result       res;
783	struct flowi            fl = { .mark = frn->fl_mark,
784				       .nl_u = { .ip4_u = { .daddr = frn->fl_addr,
785							    .tos = frn->fl_tos,
786							    .scope = frn->fl_scope } } };
787
788#ifdef CONFIG_IP_MULTIPLE_TABLES
789	res.r = NULL;
790#endif
791
792	frn->err = -ENOENT;
793	if (tb) {
794		local_bh_disable();
795
796		frn->tb_id = tb->tb_id;
797		frn->err = tb->tb_lookup(tb, &fl, &res);
798
799		if (!frn->err) {
800			frn->prefixlen = res.prefixlen;
801			frn->nh_sel = res.nh_sel;
802			frn->type = res.type;
803			frn->scope = res.scope;
804			fib_res_put(&res);
805		}
806		local_bh_enable();
807	}
808}
809
810static void nl_fib_input(struct sk_buff *skb)
811{
812	struct fib_result_nl *frn;
813	struct nlmsghdr *nlh;
814	struct fib_table *tb;
815	u32 pid;
816
817	nlh = nlmsg_hdr(skb);
818	if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
819	    nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn)))
820		return;
821
822	skb = skb_clone(skb, GFP_KERNEL);
823	if (skb == NULL)
824		return;
825	nlh = nlmsg_hdr(skb);
826
827	frn = (struct fib_result_nl *) NLMSG_DATA(nlh);
828	tb = fib_get_table(frn->tb_id_in);
829
830	nl_fib_lookup(frn, tb);
831
832	pid = NETLINK_CB(skb).pid;       /* pid of sending process */
833	NETLINK_CB(skb).pid = 0;         /* from kernel */
834	NETLINK_CB(skb).dst_group = 0;  /* unicast */
835	netlink_unicast(fibnl, skb, pid, MSG_DONTWAIT);
836}
837
838static void nl_fib_lookup_init(void)
839{
840	fibnl = netlink_kernel_create(&init_net, NETLINK_FIB_LOOKUP, 0,
841				      nl_fib_input, NULL, THIS_MODULE);
842}
843
844static void fib_disable_ip(struct net_device *dev, int force)
845{
846	if (fib_sync_down(0, dev, force))
847		fib_flush();
848	rt_cache_flush(0);
849	arp_ifdown(dev);
850}
851
852static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
853{
854	struct in_ifaddr *ifa = (struct in_ifaddr*)ptr;
855
856	switch (event) {
857	case NETDEV_UP:
858		fib_add_ifaddr(ifa);
859#ifdef CONFIG_IP_ROUTE_MULTIPATH
860		fib_sync_up(ifa->ifa_dev->dev);
861#endif
862		rt_cache_flush(-1);
863		break;
864	case NETDEV_DOWN:
865		fib_del_ifaddr(ifa);
866		if (ifa->ifa_dev->ifa_list == NULL) {
867			/* Last address was deleted from this interface.
868			   Disable IP.
869			 */
870			fib_disable_ip(ifa->ifa_dev->dev, 1);
871		} else {
872			rt_cache_flush(-1);
873		}
874		break;
875	}
876	return NOTIFY_DONE;
877}
878
879static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
880{
881	struct net_device *dev = ptr;
882	struct in_device *in_dev = __in_dev_get_rtnl(dev);
883
884	if (dev->nd_net != &init_net)
885		return NOTIFY_DONE;
886
887	if (event == NETDEV_UNREGISTER) {
888		fib_disable_ip(dev, 2);
889		return NOTIFY_DONE;
890	}
891
892	if (!in_dev)
893		return NOTIFY_DONE;
894
895	switch (event) {
896	case NETDEV_UP:
897		for_ifa(in_dev) {
898			fib_add_ifaddr(ifa);
899		} endfor_ifa(in_dev);
900#ifdef CONFIG_IP_ROUTE_MULTIPATH
901		fib_sync_up(dev);
902#endif
903		rt_cache_flush(-1);
904		break;
905	case NETDEV_DOWN:
906		fib_disable_ip(dev, 0);
907		break;
908	case NETDEV_CHANGEMTU:
909	case NETDEV_CHANGE:
910		rt_cache_flush(0);
911		break;
912	}
913	return NOTIFY_DONE;
914}
915
916static struct notifier_block fib_inetaddr_notifier = {
917	.notifier_call =fib_inetaddr_event,
918};
919
920static struct notifier_block fib_netdev_notifier = {
921	.notifier_call =fib_netdev_event,
922};
923
924void __init ip_fib_init(void)
925{
926	unsigned int i;
927
928	for (i = 0; i < FIB_TABLE_HASHSZ; i++)
929		INIT_HLIST_HEAD(&fib_table_hash[i]);
930
931	fib4_rules_init();
932
933	register_netdevice_notifier(&fib_netdev_notifier);
934	register_inetaddr_notifier(&fib_inetaddr_notifier);
935	nl_fib_lookup_init();
936
937	rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL);
938	rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL);
939	rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib);
940}
941
942EXPORT_SYMBOL(inet_addr_type);
943EXPORT_SYMBOL(ip_dev_find);
944