fib_frontend.c revision c3e9a353d8fc64a82ab11a07e21902e25e1e96d1
1/*
2 * INET		An implementation of the TCP/IP protocol suite for the LINUX
3 *		operating system.  INET is implemented using the  BSD Socket
4 *		interface as the means of communication with the user level.
5 *
6 *		IPv4 Forwarding Information Base: FIB frontend.
7 *
8 * Version:	$Id: fib_frontend.c,v 1.26 2001/10/31 21:55:54 davem Exp $
9 *
10 * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
11 *
12 *		This program is free software; you can redistribute it and/or
13 *		modify it under the terms of the GNU General Public License
14 *		as published by the Free Software Foundation; either version
15 *		2 of the License, or (at your option) any later version.
16 */
17
18#include <linux/module.h>
19#include <asm/uaccess.h>
20#include <asm/system.h>
21#include <linux/bitops.h>
22#include <linux/capability.h>
23#include <linux/types.h>
24#include <linux/kernel.h>
25#include <linux/mm.h>
26#include <linux/string.h>
27#include <linux/socket.h>
28#include <linux/sockios.h>
29#include <linux/errno.h>
30#include <linux/in.h>
31#include <linux/inet.h>
32#include <linux/inetdevice.h>
33#include <linux/netdevice.h>
34#include <linux/if_addr.h>
35#include <linux/if_arp.h>
36#include <linux/skbuff.h>
37#include <linux/init.h>
38#include <linux/list.h>
39
40#include <net/ip.h>
41#include <net/protocol.h>
42#include <net/route.h>
43#include <net/tcp.h>
44#include <net/sock.h>
45#include <net/icmp.h>
46#include <net/arp.h>
47#include <net/ip_fib.h>
48#include <net/rtnetlink.h>
49
50#define FFprint(a...) printk(KERN_DEBUG a)
51
52static struct sock *fibnl;
53
54#ifndef CONFIG_IP_MULTIPLE_TABLES
55
56struct fib_table *ip_fib_local_table;
57struct fib_table *ip_fib_main_table;
58
59#define FIB_TABLE_HASHSZ 1
60static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
61
62static void __init fib4_rules_init(void)
63{
64	ip_fib_local_table = fib_hash_init(RT_TABLE_LOCAL);
65	hlist_add_head_rcu(&ip_fib_local_table->tb_hlist, &fib_table_hash[0]);
66	ip_fib_main_table  = fib_hash_init(RT_TABLE_MAIN);
67	hlist_add_head_rcu(&ip_fib_main_table->tb_hlist, &fib_table_hash[0]);
68}
69#else
70
71#define FIB_TABLE_HASHSZ 256
72static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
73
74struct fib_table *fib_new_table(u32 id)
75{
76	struct fib_table *tb;
77	unsigned int h;
78
79	if (id == 0)
80		id = RT_TABLE_MAIN;
81	tb = fib_get_table(id);
82	if (tb)
83		return tb;
84	tb = fib_hash_init(id);
85	if (!tb)
86		return NULL;
87	h = id & (FIB_TABLE_HASHSZ - 1);
88	hlist_add_head_rcu(&tb->tb_hlist, &fib_table_hash[h]);
89	return tb;
90}
91
92struct fib_table *fib_get_table(u32 id)
93{
94	struct fib_table *tb;
95	struct hlist_node *node;
96	unsigned int h;
97
98	if (id == 0)
99		id = RT_TABLE_MAIN;
100	h = id & (FIB_TABLE_HASHSZ - 1);
101	rcu_read_lock();
102	hlist_for_each_entry_rcu(tb, node, &fib_table_hash[h], tb_hlist) {
103		if (tb->tb_id == id) {
104			rcu_read_unlock();
105			return tb;
106		}
107	}
108	rcu_read_unlock();
109	return NULL;
110}
111#endif /* CONFIG_IP_MULTIPLE_TABLES */
112
113static void fib_flush(void)
114{
115	int flushed = 0;
116	struct fib_table *tb;
117	struct hlist_node *node;
118	unsigned int h;
119
120	for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
121		hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist)
122			flushed += tb->tb_flush(tb);
123	}
124
125	if (flushed)
126		rt_cache_flush(-1);
127}
128
129/*
130 *	Find the first device with a given source address.
131 */
132
133struct net_device * ip_dev_find(__be32 addr)
134{
135	struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
136	struct fib_result res;
137	struct net_device *dev = NULL;
138	struct fib_table *local_table;
139
140#ifdef CONFIG_IP_MULTIPLE_TABLES
141	res.r = NULL;
142#endif
143
144	local_table = fib_get_table(RT_TABLE_LOCAL);
145	if (!local_table || local_table->tb_lookup(local_table, &fl, &res))
146		return NULL;
147	if (res.type != RTN_LOCAL)
148		goto out;
149	dev = FIB_RES_DEV(res);
150
151	if (dev)
152		dev_hold(dev);
153out:
154	fib_res_put(&res);
155	return dev;
156}
157
158unsigned inet_addr_type(__be32 addr)
159{
160	struct flowi		fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
161	struct fib_result	res;
162	unsigned ret = RTN_BROADCAST;
163	struct fib_table *local_table;
164
165	if (ZERONET(addr) || BADCLASS(addr))
166		return RTN_BROADCAST;
167	if (MULTICAST(addr))
168		return RTN_MULTICAST;
169
170#ifdef CONFIG_IP_MULTIPLE_TABLES
171	res.r = NULL;
172#endif
173
174	local_table = fib_get_table(RT_TABLE_LOCAL);
175	if (local_table) {
176		ret = RTN_UNICAST;
177		if (!local_table->tb_lookup(local_table, &fl, &res)) {
178			ret = res.type;
179			fib_res_put(&res);
180		}
181	}
182	return ret;
183}
184
185/* Given (packet source, input interface) and optional (dst, oif, tos):
186   - (main) check, that source is valid i.e. not broadcast or our local
187     address.
188   - figure out what "logical" interface this packet arrived
189     and calculate "specific destination" address.
190   - check, that packet arrived from expected physical interface.
191 */
192
193int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
194			struct net_device *dev, __be32 *spec_dst, u32 *itag)
195{
196	struct in_device *in_dev;
197	struct flowi fl = { .nl_u = { .ip4_u =
198				      { .daddr = src,
199					.saddr = dst,
200					.tos = tos } },
201			    .iif = oif };
202	struct fib_result res;
203	int no_addr, rpf;
204	int ret;
205
206	no_addr = rpf = 0;
207	rcu_read_lock();
208	in_dev = __in_dev_get_rcu(dev);
209	if (in_dev) {
210		no_addr = in_dev->ifa_list == NULL;
211		rpf = IN_DEV_RPFILTER(in_dev);
212	}
213	rcu_read_unlock();
214
215	if (in_dev == NULL)
216		goto e_inval;
217
218	if (fib_lookup(&fl, &res))
219		goto last_resort;
220	if (res.type != RTN_UNICAST)
221		goto e_inval_res;
222	*spec_dst = FIB_RES_PREFSRC(res);
223	fib_combine_itag(itag, &res);
224#ifdef CONFIG_IP_ROUTE_MULTIPATH
225	if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1)
226#else
227	if (FIB_RES_DEV(res) == dev)
228#endif
229	{
230		ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
231		fib_res_put(&res);
232		return ret;
233	}
234	fib_res_put(&res);
235	if (no_addr)
236		goto last_resort;
237	if (rpf)
238		goto e_inval;
239	fl.oif = dev->ifindex;
240
241	ret = 0;
242	if (fib_lookup(&fl, &res) == 0) {
243		if (res.type == RTN_UNICAST) {
244			*spec_dst = FIB_RES_PREFSRC(res);
245			ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
246		}
247		fib_res_put(&res);
248	}
249	return ret;
250
251last_resort:
252	if (rpf)
253		goto e_inval;
254	*spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
255	*itag = 0;
256	return 0;
257
258e_inval_res:
259	fib_res_put(&res);
260e_inval:
261	return -EINVAL;
262}
263
264static inline __be32 sk_extract_addr(struct sockaddr *addr)
265{
266	return ((struct sockaddr_in *) addr)->sin_addr.s_addr;
267}
268
269static int put_rtax(struct nlattr *mx, int len, int type, u32 value)
270{
271	struct nlattr *nla;
272
273	nla = (struct nlattr *) ((char *) mx + len);
274	nla->nla_type = type;
275	nla->nla_len = nla_attr_size(4);
276	*(u32 *) nla_data(nla) = value;
277
278	return len + nla_total_size(4);
279}
280
281static int rtentry_to_fib_config(int cmd, struct rtentry *rt,
282				 struct fib_config *cfg)
283{
284	__be32 addr;
285	int plen;
286
287	memset(cfg, 0, sizeof(*cfg));
288
289	if (rt->rt_dst.sa_family != AF_INET)
290		return -EAFNOSUPPORT;
291
292	/*
293	 * Check mask for validity:
294	 * a) it must be contiguous.
295	 * b) destination must have all host bits clear.
296	 * c) if application forgot to set correct family (AF_INET),
297	 *    reject request unless it is absolutely clear i.e.
298	 *    both family and mask are zero.
299	 */
300	plen = 32;
301	addr = sk_extract_addr(&rt->rt_dst);
302	if (!(rt->rt_flags & RTF_HOST)) {
303		__be32 mask = sk_extract_addr(&rt->rt_genmask);
304
305		if (rt->rt_genmask.sa_family != AF_INET) {
306			if (mask || rt->rt_genmask.sa_family)
307				return -EAFNOSUPPORT;
308		}
309
310		if (bad_mask(mask, addr))
311			return -EINVAL;
312
313		plen = inet_mask_len(mask);
314	}
315
316	cfg->fc_dst_len = plen;
317	cfg->fc_dst = addr;
318
319	if (cmd != SIOCDELRT) {
320		cfg->fc_nlflags = NLM_F_CREATE;
321		cfg->fc_protocol = RTPROT_BOOT;
322	}
323
324	if (rt->rt_metric)
325		cfg->fc_priority = rt->rt_metric - 1;
326
327	if (rt->rt_flags & RTF_REJECT) {
328		cfg->fc_scope = RT_SCOPE_HOST;
329		cfg->fc_type = RTN_UNREACHABLE;
330		return 0;
331	}
332
333	cfg->fc_scope = RT_SCOPE_NOWHERE;
334	cfg->fc_type = RTN_UNICAST;
335
336	if (rt->rt_dev) {
337		char *colon;
338		struct net_device *dev;
339		char devname[IFNAMSIZ];
340
341		if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1))
342			return -EFAULT;
343
344		devname[IFNAMSIZ-1] = 0;
345		colon = strchr(devname, ':');
346		if (colon)
347			*colon = 0;
348		dev = __dev_get_by_name(&init_net, devname);
349		if (!dev)
350			return -ENODEV;
351		cfg->fc_oif = dev->ifindex;
352		if (colon) {
353			struct in_ifaddr *ifa;
354			struct in_device *in_dev = __in_dev_get_rtnl(dev);
355			if (!in_dev)
356				return -ENODEV;
357			*colon = ':';
358			for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
359				if (strcmp(ifa->ifa_label, devname) == 0)
360					break;
361			if (ifa == NULL)
362				return -ENODEV;
363			cfg->fc_prefsrc = ifa->ifa_local;
364		}
365	}
366
367	addr = sk_extract_addr(&rt->rt_gateway);
368	if (rt->rt_gateway.sa_family == AF_INET && addr) {
369		cfg->fc_gw = addr;
370		if (rt->rt_flags & RTF_GATEWAY &&
371		    inet_addr_type(addr) == RTN_UNICAST)
372			cfg->fc_scope = RT_SCOPE_UNIVERSE;
373	}
374
375	if (cmd == SIOCDELRT)
376		return 0;
377
378	if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw)
379		return -EINVAL;
380
381	if (cfg->fc_scope == RT_SCOPE_NOWHERE)
382		cfg->fc_scope = RT_SCOPE_LINK;
383
384	if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) {
385		struct nlattr *mx;
386		int len = 0;
387
388		mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL);
389		if (mx == NULL)
390			return -ENOMEM;
391
392		if (rt->rt_flags & RTF_MTU)
393			len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40);
394
395		if (rt->rt_flags & RTF_WINDOW)
396			len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window);
397
398		if (rt->rt_flags & RTF_IRTT)
399			len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3);
400
401		cfg->fc_mx = mx;
402		cfg->fc_mx_len = len;
403	}
404
405	return 0;
406}
407
408/*
409 *	Handle IP routing ioctl calls. These are used to manipulate the routing tables
410 */
411
412int ip_rt_ioctl(unsigned int cmd, void __user *arg)
413{
414	struct fib_config cfg;
415	struct rtentry rt;
416	int err;
417
418	switch (cmd) {
419	case SIOCADDRT:		/* Add a route */
420	case SIOCDELRT:		/* Delete a route */
421		if (!capable(CAP_NET_ADMIN))
422			return -EPERM;
423
424		if (copy_from_user(&rt, arg, sizeof(rt)))
425			return -EFAULT;
426
427		rtnl_lock();
428		err = rtentry_to_fib_config(cmd, &rt, &cfg);
429		if (err == 0) {
430			struct fib_table *tb;
431
432			if (cmd == SIOCDELRT) {
433				tb = fib_get_table(cfg.fc_table);
434				if (tb)
435					err = tb->tb_delete(tb, &cfg);
436				else
437					err = -ESRCH;
438			} else {
439				tb = fib_new_table(cfg.fc_table);
440				if (tb)
441					err = tb->tb_insert(tb, &cfg);
442				else
443					err = -ENOBUFS;
444			}
445
446			/* allocated by rtentry_to_fib_config() */
447			kfree(cfg.fc_mx);
448		}
449		rtnl_unlock();
450		return err;
451	}
452	return -EINVAL;
453}
454
455const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = {
456	[RTA_DST]		= { .type = NLA_U32 },
457	[RTA_SRC]		= { .type = NLA_U32 },
458	[RTA_IIF]		= { .type = NLA_U32 },
459	[RTA_OIF]		= { .type = NLA_U32 },
460	[RTA_GATEWAY]		= { .type = NLA_U32 },
461	[RTA_PRIORITY]		= { .type = NLA_U32 },
462	[RTA_PREFSRC]		= { .type = NLA_U32 },
463	[RTA_METRICS]		= { .type = NLA_NESTED },
464	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
465	[RTA_PROTOINFO]		= { .type = NLA_U32 },
466	[RTA_FLOW]		= { .type = NLA_U32 },
467};
468
469static int rtm_to_fib_config(struct sk_buff *skb, struct nlmsghdr *nlh,
470			     struct fib_config *cfg)
471{
472	struct nlattr *attr;
473	int err, remaining;
474	struct rtmsg *rtm;
475
476	err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy);
477	if (err < 0)
478		goto errout;
479
480	memset(cfg, 0, sizeof(*cfg));
481
482	rtm = nlmsg_data(nlh);
483	cfg->fc_dst_len = rtm->rtm_dst_len;
484	cfg->fc_tos = rtm->rtm_tos;
485	cfg->fc_table = rtm->rtm_table;
486	cfg->fc_protocol = rtm->rtm_protocol;
487	cfg->fc_scope = rtm->rtm_scope;
488	cfg->fc_type = rtm->rtm_type;
489	cfg->fc_flags = rtm->rtm_flags;
490	cfg->fc_nlflags = nlh->nlmsg_flags;
491
492	cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
493	cfg->fc_nlinfo.nlh = nlh;
494
495	if (cfg->fc_type > RTN_MAX) {
496		err = -EINVAL;
497		goto errout;
498	}
499
500	nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) {
501		switch (nla_type(attr)) {
502		case RTA_DST:
503			cfg->fc_dst = nla_get_be32(attr);
504			break;
505		case RTA_OIF:
506			cfg->fc_oif = nla_get_u32(attr);
507			break;
508		case RTA_GATEWAY:
509			cfg->fc_gw = nla_get_be32(attr);
510			break;
511		case RTA_PRIORITY:
512			cfg->fc_priority = nla_get_u32(attr);
513			break;
514		case RTA_PREFSRC:
515			cfg->fc_prefsrc = nla_get_be32(attr);
516			break;
517		case RTA_METRICS:
518			cfg->fc_mx = nla_data(attr);
519			cfg->fc_mx_len = nla_len(attr);
520			break;
521		case RTA_MULTIPATH:
522			cfg->fc_mp = nla_data(attr);
523			cfg->fc_mp_len = nla_len(attr);
524			break;
525		case RTA_FLOW:
526			cfg->fc_flow = nla_get_u32(attr);
527			break;
528		case RTA_TABLE:
529			cfg->fc_table = nla_get_u32(attr);
530			break;
531		}
532	}
533
534	return 0;
535errout:
536	return err;
537}
538
539static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
540{
541	struct fib_config cfg;
542	struct fib_table *tb;
543	int err;
544
545	err = rtm_to_fib_config(skb, nlh, &cfg);
546	if (err < 0)
547		goto errout;
548
549	tb = fib_get_table(cfg.fc_table);
550	if (tb == NULL) {
551		err = -ESRCH;
552		goto errout;
553	}
554
555	err = tb->tb_delete(tb, &cfg);
556errout:
557	return err;
558}
559
560static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
561{
562	struct fib_config cfg;
563	struct fib_table *tb;
564	int err;
565
566	err = rtm_to_fib_config(skb, nlh, &cfg);
567	if (err < 0)
568		goto errout;
569
570	tb = fib_new_table(cfg.fc_table);
571	if (tb == NULL) {
572		err = -ENOBUFS;
573		goto errout;
574	}
575
576	err = tb->tb_insert(tb, &cfg);
577errout:
578	return err;
579}
580
581static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
582{
583	unsigned int h, s_h;
584	unsigned int e = 0, s_e;
585	struct fib_table *tb;
586	struct hlist_node *node;
587	int dumped = 0;
588
589	if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
590	    ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
591		return ip_rt_dump(skb, cb);
592
593	s_h = cb->args[0];
594	s_e = cb->args[1];
595
596	for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
597		e = 0;
598		hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist) {
599			if (e < s_e)
600				goto next;
601			if (dumped)
602				memset(&cb->args[2], 0, sizeof(cb->args) -
603						 2 * sizeof(cb->args[0]));
604			if (tb->tb_dump(tb, skb, cb) < 0)
605				goto out;
606			dumped = 1;
607next:
608			e++;
609		}
610	}
611out:
612	cb->args[1] = e;
613	cb->args[0] = h;
614
615	return skb->len;
616}
617
618/* Prepare and feed intra-kernel routing request.
619   Really, it should be netlink message, but :-( netlink
620   can be not configured, so that we feed it directly
621   to fib engine. It is legal, because all events occur
622   only when netlink is already locked.
623 */
624
625static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
626{
627	struct fib_table *tb;
628	struct fib_config cfg = {
629		.fc_protocol = RTPROT_KERNEL,
630		.fc_type = type,
631		.fc_dst = dst,
632		.fc_dst_len = dst_len,
633		.fc_prefsrc = ifa->ifa_local,
634		.fc_oif = ifa->ifa_dev->dev->ifindex,
635		.fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
636	};
637
638	if (type == RTN_UNICAST)
639		tb = fib_new_table(RT_TABLE_MAIN);
640	else
641		tb = fib_new_table(RT_TABLE_LOCAL);
642
643	if (tb == NULL)
644		return;
645
646	cfg.fc_table = tb->tb_id;
647
648	if (type != RTN_LOCAL)
649		cfg.fc_scope = RT_SCOPE_LINK;
650	else
651		cfg.fc_scope = RT_SCOPE_HOST;
652
653	if (cmd == RTM_NEWROUTE)
654		tb->tb_insert(tb, &cfg);
655	else
656		tb->tb_delete(tb, &cfg);
657}
658
659void fib_add_ifaddr(struct in_ifaddr *ifa)
660{
661	struct in_device *in_dev = ifa->ifa_dev;
662	struct net_device *dev = in_dev->dev;
663	struct in_ifaddr *prim = ifa;
664	__be32 mask = ifa->ifa_mask;
665	__be32 addr = ifa->ifa_local;
666	__be32 prefix = ifa->ifa_address&mask;
667
668	if (ifa->ifa_flags&IFA_F_SECONDARY) {
669		prim = inet_ifa_byprefix(in_dev, prefix, mask);
670		if (prim == NULL) {
671			printk(KERN_DEBUG "fib_add_ifaddr: bug: prim == NULL\n");
672			return;
673		}
674	}
675
676	fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
677
678	if (!(dev->flags&IFF_UP))
679		return;
680
681	/* Add broadcast address, if it is explicitly assigned. */
682	if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
683		fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
684
685	if (!ZERONET(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
686	    (prefix != addr || ifa->ifa_prefixlen < 32)) {
687		fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
688			  RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim);
689
690		/* Add network specific broadcasts, when it takes a sense */
691		if (ifa->ifa_prefixlen < 31) {
692			fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
693			fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim);
694		}
695	}
696}
697
698static void fib_del_ifaddr(struct in_ifaddr *ifa)
699{
700	struct in_device *in_dev = ifa->ifa_dev;
701	struct net_device *dev = in_dev->dev;
702	struct in_ifaddr *ifa1;
703	struct in_ifaddr *prim = ifa;
704	__be32 brd = ifa->ifa_address|~ifa->ifa_mask;
705	__be32 any = ifa->ifa_address&ifa->ifa_mask;
706#define LOCAL_OK	1
707#define BRD_OK		2
708#define BRD0_OK		4
709#define BRD1_OK		8
710	unsigned ok = 0;
711
712	if (!(ifa->ifa_flags&IFA_F_SECONDARY))
713		fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
714			  RTN_UNICAST, any, ifa->ifa_prefixlen, prim);
715	else {
716		prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
717		if (prim == NULL) {
718			printk(KERN_DEBUG "fib_del_ifaddr: bug: prim == NULL\n");
719			return;
720		}
721	}
722
723	/* Deletion is more complicated than add.
724	   We should take care of not to delete too much :-)
725
726	   Scan address list to be sure that addresses are really gone.
727	 */
728
729	for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
730		if (ifa->ifa_local == ifa1->ifa_local)
731			ok |= LOCAL_OK;
732		if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
733			ok |= BRD_OK;
734		if (brd == ifa1->ifa_broadcast)
735			ok |= BRD1_OK;
736		if (any == ifa1->ifa_broadcast)
737			ok |= BRD0_OK;
738	}
739
740	if (!(ok&BRD_OK))
741		fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
742	if (!(ok&BRD1_OK))
743		fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
744	if (!(ok&BRD0_OK))
745		fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
746	if (!(ok&LOCAL_OK)) {
747		fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
748
749		/* Check, that this local address finally disappeared. */
750		if (inet_addr_type(ifa->ifa_local) != RTN_LOCAL) {
751			/* And the last, but not the least thing.
752			   We must flush stray FIB entries.
753
754			   First of all, we scan fib_info list searching
755			   for stray nexthop entries, then ignite fib_flush.
756			*/
757			if (fib_sync_down(ifa->ifa_local, NULL, 0))
758				fib_flush();
759		}
760	}
761#undef LOCAL_OK
762#undef BRD_OK
763#undef BRD0_OK
764#undef BRD1_OK
765}
766
767static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
768{
769
770	struct fib_result       res;
771	struct flowi            fl = { .mark = frn->fl_mark,
772				       .nl_u = { .ip4_u = { .daddr = frn->fl_addr,
773							    .tos = frn->fl_tos,
774							    .scope = frn->fl_scope } } };
775
776#ifdef CONFIG_IP_MULTIPLE_TABLES
777	res.r = NULL;
778#endif
779
780	frn->err = -ENOENT;
781	if (tb) {
782		local_bh_disable();
783
784		frn->tb_id = tb->tb_id;
785		frn->err = tb->tb_lookup(tb, &fl, &res);
786
787		if (!frn->err) {
788			frn->prefixlen = res.prefixlen;
789			frn->nh_sel = res.nh_sel;
790			frn->type = res.type;
791			frn->scope = res.scope;
792			fib_res_put(&res);
793		}
794		local_bh_enable();
795	}
796}
797
798static void nl_fib_input(struct sk_buff *skb)
799{
800	struct fib_result_nl *frn;
801	struct nlmsghdr *nlh;
802	struct fib_table *tb;
803	u32 pid;
804
805	nlh = nlmsg_hdr(skb);
806	if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
807	    nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn))) {
808		kfree_skb(skb);
809		return;
810	}
811
812	frn = (struct fib_result_nl *) NLMSG_DATA(nlh);
813	tb = fib_get_table(frn->tb_id_in);
814
815	nl_fib_lookup(frn, tb);
816
817	pid = NETLINK_CB(skb).pid;       /* pid of sending process */
818	NETLINK_CB(skb).pid = 0;         /* from kernel */
819	NETLINK_CB(skb).dst_group = 0;  /* unicast */
820	netlink_unicast(fibnl, skb, pid, MSG_DONTWAIT);
821}
822
823static void nl_fib_lookup_init(void)
824{
825	fibnl = netlink_kernel_create(&init_net, NETLINK_FIB_LOOKUP, 0,
826				      nl_fib_input, NULL, THIS_MODULE);
827}
828
829static void fib_disable_ip(struct net_device *dev, int force)
830{
831	if (fib_sync_down(0, dev, force))
832		fib_flush();
833	rt_cache_flush(0);
834	arp_ifdown(dev);
835}
836
837static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
838{
839	struct in_ifaddr *ifa = (struct in_ifaddr*)ptr;
840
841	switch (event) {
842	case NETDEV_UP:
843		fib_add_ifaddr(ifa);
844#ifdef CONFIG_IP_ROUTE_MULTIPATH
845		fib_sync_up(ifa->ifa_dev->dev);
846#endif
847		rt_cache_flush(-1);
848		break;
849	case NETDEV_DOWN:
850		fib_del_ifaddr(ifa);
851		if (ifa->ifa_dev->ifa_list == NULL) {
852			/* Last address was deleted from this interface.
853			   Disable IP.
854			 */
855			fib_disable_ip(ifa->ifa_dev->dev, 1);
856		} else {
857			rt_cache_flush(-1);
858		}
859		break;
860	}
861	return NOTIFY_DONE;
862}
863
864static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
865{
866	struct net_device *dev = ptr;
867	struct in_device *in_dev = __in_dev_get_rtnl(dev);
868
869	if (dev->nd_net != &init_net)
870		return NOTIFY_DONE;
871
872	if (event == NETDEV_UNREGISTER) {
873		fib_disable_ip(dev, 2);
874		return NOTIFY_DONE;
875	}
876
877	if (!in_dev)
878		return NOTIFY_DONE;
879
880	switch (event) {
881	case NETDEV_UP:
882		for_ifa(in_dev) {
883			fib_add_ifaddr(ifa);
884		} endfor_ifa(in_dev);
885#ifdef CONFIG_IP_ROUTE_MULTIPATH
886		fib_sync_up(dev);
887#endif
888		rt_cache_flush(-1);
889		break;
890	case NETDEV_DOWN:
891		fib_disable_ip(dev, 0);
892		break;
893	case NETDEV_CHANGEMTU:
894	case NETDEV_CHANGE:
895		rt_cache_flush(0);
896		break;
897	}
898	return NOTIFY_DONE;
899}
900
901static struct notifier_block fib_inetaddr_notifier = {
902	.notifier_call =fib_inetaddr_event,
903};
904
905static struct notifier_block fib_netdev_notifier = {
906	.notifier_call =fib_netdev_event,
907};
908
909void __init ip_fib_init(void)
910{
911	unsigned int i;
912
913	for (i = 0; i < FIB_TABLE_HASHSZ; i++)
914		INIT_HLIST_HEAD(&fib_table_hash[i]);
915
916	fib4_rules_init();
917
918	register_netdevice_notifier(&fib_netdev_notifier);
919	register_inetaddr_notifier(&fib_inetaddr_notifier);
920	nl_fib_lookup_init();
921
922	rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL);
923	rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL);
924	rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib);
925}
926
927EXPORT_SYMBOL(inet_addr_type);
928EXPORT_SYMBOL(ip_dev_find);
929