fib_frontend.c revision d9c9df8c9368f4102324e8c3923edae83974602b
1/*
2 * INET		An implementation of the TCP/IP protocol suite for the LINUX
3 *		operating system.  INET is implemented using the  BSD Socket
4 *		interface as the means of communication with the user level.
5 *
6 *		IPv4 Forwarding Information Base: FIB frontend.
7 *
8 * Version:	$Id: fib_frontend.c,v 1.26 2001/10/31 21:55:54 davem Exp $
9 *
10 * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
11 *
12 *		This program is free software; you can redistribute it and/or
13 *		modify it under the terms of the GNU General Public License
14 *		as published by the Free Software Foundation; either version
15 *		2 of the License, or (at your option) any later version.
16 */
17
18#include <linux/module.h>
19#include <asm/uaccess.h>
20#include <asm/system.h>
21#include <linux/bitops.h>
22#include <linux/capability.h>
23#include <linux/types.h>
24#include <linux/kernel.h>
25#include <linux/sched.h>
26#include <linux/mm.h>
27#include <linux/string.h>
28#include <linux/socket.h>
29#include <linux/sockios.h>
30#include <linux/errno.h>
31#include <linux/in.h>
32#include <linux/inet.h>
33#include <linux/inetdevice.h>
34#include <linux/netdevice.h>
35#include <linux/if_addr.h>
36#include <linux/if_arp.h>
37#include <linux/skbuff.h>
38#include <linux/netlink.h>
39#include <linux/init.h>
40#include <linux/list.h>
41
42#include <net/ip.h>
43#include <net/protocol.h>
44#include <net/route.h>
45#include <net/tcp.h>
46#include <net/sock.h>
47#include <net/icmp.h>
48#include <net/arp.h>
49#include <net/ip_fib.h>
50
51#define FFprint(a...) printk(KERN_DEBUG a)
52
53#ifndef CONFIG_IP_MULTIPLE_TABLES
54
55struct fib_table *ip_fib_local_table;
56struct fib_table *ip_fib_main_table;
57
58#define FIB_TABLE_HASHSZ 1
59static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
60
61#else
62
63#define FIB_TABLE_HASHSZ 256
64static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
65
66struct fib_table *fib_new_table(u32 id)
67{
68	struct fib_table *tb;
69	unsigned int h;
70
71	if (id == 0)
72		id = RT_TABLE_MAIN;
73	tb = fib_get_table(id);
74	if (tb)
75		return tb;
76	tb = fib_hash_init(id);
77	if (!tb)
78		return NULL;
79	h = id & (FIB_TABLE_HASHSZ - 1);
80	hlist_add_head_rcu(&tb->tb_hlist, &fib_table_hash[h]);
81	return tb;
82}
83
84struct fib_table *fib_get_table(u32 id)
85{
86	struct fib_table *tb;
87	struct hlist_node *node;
88	unsigned int h;
89
90	if (id == 0)
91		id = RT_TABLE_MAIN;
92	h = id & (FIB_TABLE_HASHSZ - 1);
93	rcu_read_lock();
94	hlist_for_each_entry_rcu(tb, node, &fib_table_hash[h], tb_hlist) {
95		if (tb->tb_id == id) {
96			rcu_read_unlock();
97			return tb;
98		}
99	}
100	rcu_read_unlock();
101	return NULL;
102}
103#endif /* CONFIG_IP_MULTIPLE_TABLES */
104
105static void fib_flush(void)
106{
107	int flushed = 0;
108	struct fib_table *tb;
109	struct hlist_node *node;
110	unsigned int h;
111
112	for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
113		hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist)
114			flushed += tb->tb_flush(tb);
115	}
116
117	if (flushed)
118		rt_cache_flush(-1);
119}
120
121/*
122 *	Find the first device with a given source address.
123 */
124
125struct net_device * ip_dev_find(u32 addr)
126{
127	struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
128	struct fib_result res;
129	struct net_device *dev = NULL;
130
131#ifdef CONFIG_IP_MULTIPLE_TABLES
132	res.r = NULL;
133#endif
134
135	if (!ip_fib_local_table ||
136	    ip_fib_local_table->tb_lookup(ip_fib_local_table, &fl, &res))
137		return NULL;
138	if (res.type != RTN_LOCAL)
139		goto out;
140	dev = FIB_RES_DEV(res);
141
142	if (dev)
143		dev_hold(dev);
144out:
145	fib_res_put(&res);
146	return dev;
147}
148
149unsigned inet_addr_type(u32 addr)
150{
151	struct flowi		fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
152	struct fib_result	res;
153	unsigned ret = RTN_BROADCAST;
154
155	if (ZERONET(addr) || BADCLASS(addr))
156		return RTN_BROADCAST;
157	if (MULTICAST(addr))
158		return RTN_MULTICAST;
159
160#ifdef CONFIG_IP_MULTIPLE_TABLES
161	res.r = NULL;
162#endif
163
164	if (ip_fib_local_table) {
165		ret = RTN_UNICAST;
166		if (!ip_fib_local_table->tb_lookup(ip_fib_local_table,
167						   &fl, &res)) {
168			ret = res.type;
169			fib_res_put(&res);
170		}
171	}
172	return ret;
173}
174
175/* Given (packet source, input interface) and optional (dst, oif, tos):
176   - (main) check, that source is valid i.e. not broadcast or our local
177     address.
178   - figure out what "logical" interface this packet arrived
179     and calculate "specific destination" address.
180   - check, that packet arrived from expected physical interface.
181 */
182
183int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
184			struct net_device *dev, __be32 *spec_dst, u32 *itag)
185{
186	struct in_device *in_dev;
187	struct flowi fl = { .nl_u = { .ip4_u =
188				      { .daddr = src,
189					.saddr = dst,
190					.tos = tos } },
191			    .iif = oif };
192	struct fib_result res;
193	int no_addr, rpf;
194	int ret;
195
196	no_addr = rpf = 0;
197	rcu_read_lock();
198	in_dev = __in_dev_get_rcu(dev);
199	if (in_dev) {
200		no_addr = in_dev->ifa_list == NULL;
201		rpf = IN_DEV_RPFILTER(in_dev);
202	}
203	rcu_read_unlock();
204
205	if (in_dev == NULL)
206		goto e_inval;
207
208	if (fib_lookup(&fl, &res))
209		goto last_resort;
210	if (res.type != RTN_UNICAST)
211		goto e_inval_res;
212	*spec_dst = FIB_RES_PREFSRC(res);
213	fib_combine_itag(itag, &res);
214#ifdef CONFIG_IP_ROUTE_MULTIPATH
215	if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1)
216#else
217	if (FIB_RES_DEV(res) == dev)
218#endif
219	{
220		ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
221		fib_res_put(&res);
222		return ret;
223	}
224	fib_res_put(&res);
225	if (no_addr)
226		goto last_resort;
227	if (rpf)
228		goto e_inval;
229	fl.oif = dev->ifindex;
230
231	ret = 0;
232	if (fib_lookup(&fl, &res) == 0) {
233		if (res.type == RTN_UNICAST) {
234			*spec_dst = FIB_RES_PREFSRC(res);
235			ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
236		}
237		fib_res_put(&res);
238	}
239	return ret;
240
241last_resort:
242	if (rpf)
243		goto e_inval;
244	*spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
245	*itag = 0;
246	return 0;
247
248e_inval_res:
249	fib_res_put(&res);
250e_inval:
251	return -EINVAL;
252}
253
254#ifndef CONFIG_IP_NOSIOCRT
255
256static inline u32 sk_extract_addr(struct sockaddr *addr)
257{
258	return ((struct sockaddr_in *) addr)->sin_addr.s_addr;
259}
260
261static int put_rtax(struct nlattr *mx, int len, int type, u32 value)
262{
263	struct nlattr *nla;
264
265	nla = (struct nlattr *) ((char *) mx + len);
266	nla->nla_type = type;
267	nla->nla_len = nla_attr_size(4);
268	*(u32 *) nla_data(nla) = value;
269
270	return len + nla_total_size(4);
271}
272
273static int rtentry_to_fib_config(int cmd, struct rtentry *rt,
274				 struct fib_config *cfg)
275{
276	u32 addr;
277	int plen;
278
279	memset(cfg, 0, sizeof(*cfg));
280
281	if (rt->rt_dst.sa_family != AF_INET)
282		return -EAFNOSUPPORT;
283
284	/*
285	 * Check mask for validity:
286	 * a) it must be contiguous.
287	 * b) destination must have all host bits clear.
288	 * c) if application forgot to set correct family (AF_INET),
289	 *    reject request unless it is absolutely clear i.e.
290	 *    both family and mask are zero.
291	 */
292	plen = 32;
293	addr = sk_extract_addr(&rt->rt_dst);
294	if (!(rt->rt_flags & RTF_HOST)) {
295		u32 mask = sk_extract_addr(&rt->rt_genmask);
296
297		if (rt->rt_genmask.sa_family != AF_INET) {
298			if (mask || rt->rt_genmask.sa_family)
299				return -EAFNOSUPPORT;
300		}
301
302		if (bad_mask(mask, addr))
303			return -EINVAL;
304
305		plen = inet_mask_len(mask);
306	}
307
308	cfg->fc_dst_len = plen;
309	cfg->fc_dst = addr;
310
311	if (cmd != SIOCDELRT) {
312		cfg->fc_nlflags = NLM_F_CREATE;
313		cfg->fc_protocol = RTPROT_BOOT;
314	}
315
316	if (rt->rt_metric)
317		cfg->fc_priority = rt->rt_metric - 1;
318
319	if (rt->rt_flags & RTF_REJECT) {
320		cfg->fc_scope = RT_SCOPE_HOST;
321		cfg->fc_type = RTN_UNREACHABLE;
322		return 0;
323	}
324
325	cfg->fc_scope = RT_SCOPE_NOWHERE;
326	cfg->fc_type = RTN_UNICAST;
327
328	if (rt->rt_dev) {
329		char *colon;
330		struct net_device *dev;
331		char devname[IFNAMSIZ];
332
333		if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1))
334			return -EFAULT;
335
336		devname[IFNAMSIZ-1] = 0;
337		colon = strchr(devname, ':');
338		if (colon)
339			*colon = 0;
340		dev = __dev_get_by_name(devname);
341		if (!dev)
342			return -ENODEV;
343		cfg->fc_oif = dev->ifindex;
344		if (colon) {
345			struct in_ifaddr *ifa;
346			struct in_device *in_dev = __in_dev_get_rtnl(dev);
347			if (!in_dev)
348				return -ENODEV;
349			*colon = ':';
350			for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
351				if (strcmp(ifa->ifa_label, devname) == 0)
352					break;
353			if (ifa == NULL)
354				return -ENODEV;
355			cfg->fc_prefsrc = ifa->ifa_local;
356		}
357	}
358
359	addr = sk_extract_addr(&rt->rt_gateway);
360	if (rt->rt_gateway.sa_family == AF_INET && addr) {
361		cfg->fc_gw = addr;
362		if (rt->rt_flags & RTF_GATEWAY &&
363		    inet_addr_type(addr) == RTN_UNICAST)
364			cfg->fc_scope = RT_SCOPE_UNIVERSE;
365	}
366
367	if (cmd == SIOCDELRT)
368		return 0;
369
370	if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw)
371		return -EINVAL;
372
373	if (cfg->fc_scope == RT_SCOPE_NOWHERE)
374		cfg->fc_scope = RT_SCOPE_LINK;
375
376	if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) {
377		struct nlattr *mx;
378		int len = 0;
379
380		mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL);
381 		if (mx == NULL)
382			return -ENOMEM;
383
384		if (rt->rt_flags & RTF_MTU)
385			len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40);
386
387		if (rt->rt_flags & RTF_WINDOW)
388			len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window);
389
390		if (rt->rt_flags & RTF_IRTT)
391			len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3);
392
393		cfg->fc_mx = mx;
394		cfg->fc_mx_len = len;
395	}
396
397	return 0;
398}
399
400/*
401 *	Handle IP routing ioctl calls. These are used to manipulate the routing tables
402 */
403
404int ip_rt_ioctl(unsigned int cmd, void __user *arg)
405{
406	struct fib_config cfg;
407	struct rtentry rt;
408	int err;
409
410	switch (cmd) {
411	case SIOCADDRT:		/* Add a route */
412	case SIOCDELRT:		/* Delete a route */
413		if (!capable(CAP_NET_ADMIN))
414			return -EPERM;
415
416		if (copy_from_user(&rt, arg, sizeof(rt)))
417			return -EFAULT;
418
419		rtnl_lock();
420		err = rtentry_to_fib_config(cmd, &rt, &cfg);
421		if (err == 0) {
422			struct fib_table *tb;
423
424			if (cmd == SIOCDELRT) {
425				tb = fib_get_table(cfg.fc_table);
426				if (tb)
427					err = tb->tb_delete(tb, &cfg);
428				else
429					err = -ESRCH;
430			} else {
431				tb = fib_new_table(cfg.fc_table);
432				if (tb)
433					err = tb->tb_insert(tb, &cfg);
434				else
435					err = -ENOBUFS;
436			}
437
438			/* allocated by rtentry_to_fib_config() */
439			kfree(cfg.fc_mx);
440		}
441		rtnl_unlock();
442		return err;
443	}
444	return -EINVAL;
445}
446
447#else
448
449int ip_rt_ioctl(unsigned int cmd, void *arg)
450{
451	return -EINVAL;
452}
453
454#endif
455
456struct nla_policy rtm_ipv4_policy[RTA_MAX+1] __read_mostly = {
457	[RTA_DST]		= { .type = NLA_U32 },
458	[RTA_SRC]		= { .type = NLA_U32 },
459	[RTA_IIF]		= { .type = NLA_U32 },
460	[RTA_OIF]		= { .type = NLA_U32 },
461	[RTA_GATEWAY]		= { .type = NLA_U32 },
462	[RTA_PRIORITY]		= { .type = NLA_U32 },
463	[RTA_PREFSRC]		= { .type = NLA_U32 },
464	[RTA_METRICS]		= { .type = NLA_NESTED },
465	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
466	[RTA_PROTOINFO]		= { .type = NLA_U32 },
467	[RTA_FLOW]		= { .type = NLA_U32 },
468	[RTA_MP_ALGO]		= { .type = NLA_U32 },
469};
470
471static int rtm_to_fib_config(struct sk_buff *skb, struct nlmsghdr *nlh,
472			     struct fib_config *cfg)
473{
474	struct nlattr *attr;
475	int err, remaining;
476	struct rtmsg *rtm;
477
478	err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy);
479	if (err < 0)
480		goto errout;
481
482	memset(cfg, 0, sizeof(*cfg));
483
484	rtm = nlmsg_data(nlh);
485	cfg->fc_family = rtm->rtm_family;
486	cfg->fc_dst_len = rtm->rtm_dst_len;
487	cfg->fc_src_len = rtm->rtm_src_len;
488	cfg->fc_tos = rtm->rtm_tos;
489	cfg->fc_table = rtm->rtm_table;
490	cfg->fc_protocol = rtm->rtm_protocol;
491	cfg->fc_scope = rtm->rtm_scope;
492	cfg->fc_type = rtm->rtm_type;
493	cfg->fc_flags = rtm->rtm_flags;
494	cfg->fc_nlflags = nlh->nlmsg_flags;
495
496	cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
497	cfg->fc_nlinfo.nlh = nlh;
498
499	nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) {
500		switch (attr->nla_type) {
501		case RTA_DST:
502			cfg->fc_dst = nla_get_u32(attr);
503			break;
504		case RTA_SRC:
505			cfg->fc_src = nla_get_u32(attr);
506			break;
507		case RTA_OIF:
508			cfg->fc_oif = nla_get_u32(attr);
509			break;
510		case RTA_GATEWAY:
511			cfg->fc_gw = nla_get_u32(attr);
512			break;
513		case RTA_PRIORITY:
514			cfg->fc_priority = nla_get_u32(attr);
515			break;
516		case RTA_PREFSRC:
517			cfg->fc_prefsrc = nla_get_u32(attr);
518			break;
519		case RTA_METRICS:
520			cfg->fc_mx = nla_data(attr);
521			cfg->fc_mx_len = nla_len(attr);
522			break;
523		case RTA_MULTIPATH:
524			cfg->fc_mp = nla_data(attr);
525			cfg->fc_mp_len = nla_len(attr);
526			break;
527		case RTA_FLOW:
528			cfg->fc_flow = nla_get_u32(attr);
529			break;
530		case RTA_MP_ALGO:
531			cfg->fc_mp_alg = nla_get_u32(attr);
532			break;
533		case RTA_TABLE:
534			cfg->fc_table = nla_get_u32(attr);
535			break;
536		}
537	}
538
539	return 0;
540errout:
541	return err;
542}
543
544int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
545{
546	struct fib_config cfg;
547	struct fib_table *tb;
548	int err;
549
550	err = rtm_to_fib_config(skb, nlh, &cfg);
551	if (err < 0)
552		goto errout;
553
554	tb = fib_get_table(cfg.fc_table);
555	if (tb == NULL) {
556		err = -ESRCH;
557		goto errout;
558	}
559
560	err = tb->tb_delete(tb, &cfg);
561errout:
562	return err;
563}
564
565int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
566{
567	struct fib_config cfg;
568	struct fib_table *tb;
569	int err;
570
571	err = rtm_to_fib_config(skb, nlh, &cfg);
572	if (err < 0)
573		goto errout;
574
575	tb = fib_new_table(cfg.fc_table);
576	if (tb == NULL) {
577		err = -ENOBUFS;
578		goto errout;
579	}
580
581	err = tb->tb_insert(tb, &cfg);
582errout:
583	return err;
584}
585
586int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
587{
588	unsigned int h, s_h;
589	unsigned int e = 0, s_e;
590	struct fib_table *tb;
591	struct hlist_node *node;
592	int dumped = 0;
593
594	if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
595	    ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
596		return ip_rt_dump(skb, cb);
597
598	s_h = cb->args[0];
599	s_e = cb->args[1];
600
601	for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
602		e = 0;
603		hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist) {
604			if (e < s_e)
605				goto next;
606			if (dumped)
607				memset(&cb->args[2], 0, sizeof(cb->args) -
608				                 2 * sizeof(cb->args[0]));
609			if (tb->tb_dump(tb, skb, cb) < 0)
610				goto out;
611			dumped = 1;
612next:
613			e++;
614		}
615	}
616out:
617	cb->args[1] = e;
618	cb->args[0] = h;
619
620	return skb->len;
621}
622
623/* Prepare and feed intra-kernel routing request.
624   Really, it should be netlink message, but :-( netlink
625   can be not configured, so that we feed it directly
626   to fib engine. It is legal, because all events occur
627   only when netlink is already locked.
628 */
629
630static void fib_magic(int cmd, int type, u32 dst, int dst_len,
631		      struct in_ifaddr *ifa)
632{
633	struct fib_table *tb;
634	struct fib_config cfg = {
635		.fc_protocol = RTPROT_KERNEL,
636		.fc_type = type,
637		.fc_dst = dst,
638		.fc_dst_len = dst_len,
639		.fc_prefsrc = ifa->ifa_local,
640		.fc_oif = ifa->ifa_dev->dev->ifindex,
641		.fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
642	};
643
644	if (type == RTN_UNICAST)
645		tb = fib_new_table(RT_TABLE_MAIN);
646	else
647		tb = fib_new_table(RT_TABLE_LOCAL);
648
649	if (tb == NULL)
650		return;
651
652	cfg.fc_table = tb->tb_id;
653
654	if (type != RTN_LOCAL)
655		cfg.fc_scope = RT_SCOPE_LINK;
656	else
657		cfg.fc_scope = RT_SCOPE_HOST;
658
659	if (cmd == RTM_NEWROUTE)
660		tb->tb_insert(tb, &cfg);
661	else
662		tb->tb_delete(tb, &cfg);
663}
664
665void fib_add_ifaddr(struct in_ifaddr *ifa)
666{
667	struct in_device *in_dev = ifa->ifa_dev;
668	struct net_device *dev = in_dev->dev;
669	struct in_ifaddr *prim = ifa;
670	u32 mask = ifa->ifa_mask;
671	u32 addr = ifa->ifa_local;
672	u32 prefix = ifa->ifa_address&mask;
673
674	if (ifa->ifa_flags&IFA_F_SECONDARY) {
675		prim = inet_ifa_byprefix(in_dev, prefix, mask);
676		if (prim == NULL) {
677			printk(KERN_DEBUG "fib_add_ifaddr: bug: prim == NULL\n");
678			return;
679		}
680	}
681
682	fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
683
684	if (!(dev->flags&IFF_UP))
685		return;
686
687	/* Add broadcast address, if it is explicitly assigned. */
688	if (ifa->ifa_broadcast && ifa->ifa_broadcast != 0xFFFFFFFF)
689		fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
690
691	if (!ZERONET(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
692	    (prefix != addr || ifa->ifa_prefixlen < 32)) {
693		fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
694			  RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim);
695
696		/* Add network specific broadcasts, when it takes a sense */
697		if (ifa->ifa_prefixlen < 31) {
698			fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
699			fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim);
700		}
701	}
702}
703
704static void fib_del_ifaddr(struct in_ifaddr *ifa)
705{
706	struct in_device *in_dev = ifa->ifa_dev;
707	struct net_device *dev = in_dev->dev;
708	struct in_ifaddr *ifa1;
709	struct in_ifaddr *prim = ifa;
710	u32 brd = ifa->ifa_address|~ifa->ifa_mask;
711	u32 any = ifa->ifa_address&ifa->ifa_mask;
712#define LOCAL_OK	1
713#define BRD_OK		2
714#define BRD0_OK		4
715#define BRD1_OK		8
716	unsigned ok = 0;
717
718	if (!(ifa->ifa_flags&IFA_F_SECONDARY))
719		fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
720			  RTN_UNICAST, any, ifa->ifa_prefixlen, prim);
721	else {
722		prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
723		if (prim == NULL) {
724			printk(KERN_DEBUG "fib_del_ifaddr: bug: prim == NULL\n");
725			return;
726		}
727	}
728
729	/* Deletion is more complicated than add.
730	   We should take care of not to delete too much :-)
731
732	   Scan address list to be sure that addresses are really gone.
733	 */
734
735	for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
736		if (ifa->ifa_local == ifa1->ifa_local)
737			ok |= LOCAL_OK;
738		if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
739			ok |= BRD_OK;
740		if (brd == ifa1->ifa_broadcast)
741			ok |= BRD1_OK;
742		if (any == ifa1->ifa_broadcast)
743			ok |= BRD0_OK;
744	}
745
746	if (!(ok&BRD_OK))
747		fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
748	if (!(ok&BRD1_OK))
749		fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
750	if (!(ok&BRD0_OK))
751		fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
752	if (!(ok&LOCAL_OK)) {
753		fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
754
755		/* Check, that this local address finally disappeared. */
756		if (inet_addr_type(ifa->ifa_local) != RTN_LOCAL) {
757			/* And the last, but not the least thing.
758			   We must flush stray FIB entries.
759
760			   First of all, we scan fib_info list searching
761			   for stray nexthop entries, then ignite fib_flush.
762			*/
763			if (fib_sync_down(ifa->ifa_local, NULL, 0))
764				fib_flush();
765		}
766	}
767#undef LOCAL_OK
768#undef BRD_OK
769#undef BRD0_OK
770#undef BRD1_OK
771}
772
773static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
774{
775
776	struct fib_result       res;
777	struct flowi            fl = { .nl_u = { .ip4_u = { .daddr = frn->fl_addr,
778							    .fwmark = frn->fl_fwmark,
779							    .tos = frn->fl_tos,
780							    .scope = frn->fl_scope } } };
781	if (tb) {
782		local_bh_disable();
783
784		frn->tb_id = tb->tb_id;
785		frn->err = tb->tb_lookup(tb, &fl, &res);
786
787		if (!frn->err) {
788			frn->prefixlen = res.prefixlen;
789			frn->nh_sel = res.nh_sel;
790			frn->type = res.type;
791			frn->scope = res.scope;
792		}
793		local_bh_enable();
794	}
795}
796
797static void nl_fib_input(struct sock *sk, int len)
798{
799	struct sk_buff *skb = NULL;
800        struct nlmsghdr *nlh = NULL;
801	struct fib_result_nl *frn;
802	u32 pid;
803	struct fib_table *tb;
804
805	skb = skb_dequeue(&sk->sk_receive_queue);
806	nlh = (struct nlmsghdr *)skb->data;
807	if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
808	    nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn))) {
809		kfree_skb(skb);
810		return;
811	}
812
813	frn = (struct fib_result_nl *) NLMSG_DATA(nlh);
814	tb = fib_get_table(frn->tb_id_in);
815
816	nl_fib_lookup(frn, tb);
817
818	pid = nlh->nlmsg_pid;           /*pid of sending process */
819	NETLINK_CB(skb).pid = 0;         /* from kernel */
820	NETLINK_CB(skb).dst_pid = pid;
821	NETLINK_CB(skb).dst_group = 0;  /* unicast */
822	netlink_unicast(sk, skb, pid, MSG_DONTWAIT);
823}
824
825static void nl_fib_lookup_init(void)
826{
827      netlink_kernel_create(NETLINK_FIB_LOOKUP, 0, nl_fib_input, THIS_MODULE);
828}
829
830static void fib_disable_ip(struct net_device *dev, int force)
831{
832	if (fib_sync_down(0, dev, force))
833		fib_flush();
834	rt_cache_flush(0);
835	arp_ifdown(dev);
836}
837
838static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
839{
840	struct in_ifaddr *ifa = (struct in_ifaddr*)ptr;
841
842	switch (event) {
843	case NETDEV_UP:
844		fib_add_ifaddr(ifa);
845#ifdef CONFIG_IP_ROUTE_MULTIPATH
846		fib_sync_up(ifa->ifa_dev->dev);
847#endif
848		rt_cache_flush(-1);
849		break;
850	case NETDEV_DOWN:
851		fib_del_ifaddr(ifa);
852		if (ifa->ifa_dev->ifa_list == NULL) {
853			/* Last address was deleted from this interface.
854			   Disable IP.
855			 */
856			fib_disable_ip(ifa->ifa_dev->dev, 1);
857		} else {
858			rt_cache_flush(-1);
859		}
860		break;
861	}
862	return NOTIFY_DONE;
863}
864
865static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
866{
867	struct net_device *dev = ptr;
868	struct in_device *in_dev = __in_dev_get_rtnl(dev);
869
870	if (event == NETDEV_UNREGISTER) {
871		fib_disable_ip(dev, 2);
872		return NOTIFY_DONE;
873	}
874
875	if (!in_dev)
876		return NOTIFY_DONE;
877
878	switch (event) {
879	case NETDEV_UP:
880		for_ifa(in_dev) {
881			fib_add_ifaddr(ifa);
882		} endfor_ifa(in_dev);
883#ifdef CONFIG_IP_ROUTE_MULTIPATH
884		fib_sync_up(dev);
885#endif
886		rt_cache_flush(-1);
887		break;
888	case NETDEV_DOWN:
889		fib_disable_ip(dev, 0);
890		break;
891	case NETDEV_CHANGEMTU:
892	case NETDEV_CHANGE:
893		rt_cache_flush(0);
894		break;
895	}
896	return NOTIFY_DONE;
897}
898
899static struct notifier_block fib_inetaddr_notifier = {
900	.notifier_call =fib_inetaddr_event,
901};
902
903static struct notifier_block fib_netdev_notifier = {
904	.notifier_call =fib_netdev_event,
905};
906
907void __init ip_fib_init(void)
908{
909	unsigned int i;
910
911	for (i = 0; i < FIB_TABLE_HASHSZ; i++)
912		INIT_HLIST_HEAD(&fib_table_hash[i]);
913#ifndef CONFIG_IP_MULTIPLE_TABLES
914	ip_fib_local_table = fib_hash_init(RT_TABLE_LOCAL);
915	hlist_add_head_rcu(&ip_fib_local_table->tb_hlist, &fib_table_hash[0]);
916	ip_fib_main_table  = fib_hash_init(RT_TABLE_MAIN);
917	hlist_add_head_rcu(&ip_fib_main_table->tb_hlist, &fib_table_hash[0]);
918#else
919	fib4_rules_init();
920#endif
921
922	register_netdevice_notifier(&fib_netdev_notifier);
923	register_inetaddr_notifier(&fib_inetaddr_notifier);
924	nl_fib_lookup_init();
925}
926
927EXPORT_SYMBOL(inet_addr_type);
928EXPORT_SYMBOL(ip_dev_find);
929