fib_frontend.c revision 0553811612a6178365f3b062c30234913b218a96
1/*
2 * INET		An implementation of the TCP/IP protocol suite for the LINUX
3 *		operating system.  INET is implemented using the  BSD Socket
4 *		interface as the means of communication with the user level.
5 *
6 *		IPv4 Forwarding Information Base: FIB frontend.
7 *
8 * Version:	$Id: fib_frontend.c,v 1.26 2001/10/31 21:55:54 davem Exp $
9 *
10 * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
11 *
12 *		This program is free software; you can redistribute it and/or
13 *		modify it under the terms of the GNU General Public License
14 *		as published by the Free Software Foundation; either version
15 *		2 of the License, or (at your option) any later version.
16 */
17
18#include <linux/module.h>
19#include <asm/uaccess.h>
20#include <asm/system.h>
21#include <linux/bitops.h>
22#include <linux/capability.h>
23#include <linux/types.h>
24#include <linux/kernel.h>
25#include <linux/mm.h>
26#include <linux/string.h>
27#include <linux/socket.h>
28#include <linux/sockios.h>
29#include <linux/errno.h>
30#include <linux/in.h>
31#include <linux/inet.h>
32#include <linux/inetdevice.h>
33#include <linux/netdevice.h>
34#include <linux/if_addr.h>
35#include <linux/if_arp.h>
36#include <linux/skbuff.h>
37#include <linux/init.h>
38#include <linux/list.h>
39
40#include <net/ip.h>
41#include <net/protocol.h>
42#include <net/route.h>
43#include <net/tcp.h>
44#include <net/sock.h>
45#include <net/icmp.h>
46#include <net/arp.h>
47#include <net/ip_fib.h>
48#include <net/rtnetlink.h>
49
50#define FFprint(a...) printk(KERN_DEBUG a)
51
52static struct sock *fibnl;
53
54#ifndef CONFIG_IP_MULTIPLE_TABLES
55
56struct fib_table *ip_fib_local_table;
57struct fib_table *ip_fib_main_table;
58
59#define FIB_TABLE_HASHSZ 1
60static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
61
62static void __init fib4_rules_init(void)
63{
64	ip_fib_local_table = fib_hash_init(RT_TABLE_LOCAL);
65	hlist_add_head_rcu(&ip_fib_local_table->tb_hlist, &fib_table_hash[0]);
66	ip_fib_main_table  = fib_hash_init(RT_TABLE_MAIN);
67	hlist_add_head_rcu(&ip_fib_main_table->tb_hlist, &fib_table_hash[0]);
68}
69#else
70
71#define FIB_TABLE_HASHSZ 256
72static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
73
74struct fib_table *fib_new_table(u32 id)
75{
76	struct fib_table *tb;
77	unsigned int h;
78
79	if (id == 0)
80		id = RT_TABLE_MAIN;
81	tb = fib_get_table(id);
82	if (tb)
83		return tb;
84	tb = fib_hash_init(id);
85	if (!tb)
86		return NULL;
87	h = id & (FIB_TABLE_HASHSZ - 1);
88	hlist_add_head_rcu(&tb->tb_hlist, &fib_table_hash[h]);
89	return tb;
90}
91
92struct fib_table *fib_get_table(u32 id)
93{
94	struct fib_table *tb;
95	struct hlist_node *node;
96	unsigned int h;
97
98	if (id == 0)
99		id = RT_TABLE_MAIN;
100	h = id & (FIB_TABLE_HASHSZ - 1);
101	rcu_read_lock();
102	hlist_for_each_entry_rcu(tb, node, &fib_table_hash[h], tb_hlist) {
103		if (tb->tb_id == id) {
104			rcu_read_unlock();
105			return tb;
106		}
107	}
108	rcu_read_unlock();
109	return NULL;
110}
111#endif /* CONFIG_IP_MULTIPLE_TABLES */
112
113static void fib_flush(void)
114{
115	int flushed = 0;
116	struct fib_table *tb;
117	struct hlist_node *node;
118	unsigned int h;
119
120	for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
121		hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist)
122			flushed += tb->tb_flush(tb);
123	}
124
125	if (flushed)
126		rt_cache_flush(-1);
127}
128
129/*
130 *	Find the first device with a given source address.
131 */
132
133struct net_device * ip_dev_find(__be32 addr)
134{
135	struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
136	struct fib_result res;
137	struct net_device *dev = NULL;
138	struct fib_table *local_table;
139
140#ifdef CONFIG_IP_MULTIPLE_TABLES
141	res.r = NULL;
142#endif
143
144	local_table = fib_get_table(RT_TABLE_LOCAL);
145	if (!local_table || local_table->tb_lookup(local_table, &fl, &res))
146		return NULL;
147	if (res.type != RTN_LOCAL)
148		goto out;
149	dev = FIB_RES_DEV(res);
150
151	if (dev)
152		dev_hold(dev);
153out:
154	fib_res_put(&res);
155	return dev;
156}
157
158/*
159 * Find address type as if only "dev" was present in the system. If
160 * on_dev is NULL then all interfaces are taken into consideration.
161 */
162static inline unsigned __inet_dev_addr_type(const struct net_device *dev,
163					    __be32 addr)
164{
165	struct flowi		fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
166	struct fib_result	res;
167	unsigned ret = RTN_BROADCAST;
168	struct fib_table *local_table;
169
170	if (ZERONET(addr) || BADCLASS(addr))
171		return RTN_BROADCAST;
172	if (MULTICAST(addr))
173		return RTN_MULTICAST;
174
175#ifdef CONFIG_IP_MULTIPLE_TABLES
176	res.r = NULL;
177#endif
178
179	local_table = fib_get_table(RT_TABLE_LOCAL);
180	if (local_table) {
181		ret = RTN_UNICAST;
182		if (!local_table->tb_lookup(local_table, &fl, &res)) {
183			if (!dev || dev == res.fi->fib_dev)
184				ret = res.type;
185			fib_res_put(&res);
186		}
187	}
188	return ret;
189}
190
191unsigned int inet_addr_type(__be32 addr)
192{
193	return __inet_dev_addr_type(NULL, addr);
194}
195
196unsigned int inet_dev_addr_type(const struct net_device *dev, __be32 addr)
197{
198       return __inet_dev_addr_type(dev, addr);
199}
200
201/* Given (packet source, input interface) and optional (dst, oif, tos):
202   - (main) check, that source is valid i.e. not broadcast or our local
203     address.
204   - figure out what "logical" interface this packet arrived
205     and calculate "specific destination" address.
206   - check, that packet arrived from expected physical interface.
207 */
208
209int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
210			struct net_device *dev, __be32 *spec_dst, u32 *itag)
211{
212	struct in_device *in_dev;
213	struct flowi fl = { .nl_u = { .ip4_u =
214				      { .daddr = src,
215					.saddr = dst,
216					.tos = tos } },
217			    .iif = oif };
218	struct fib_result res;
219	int no_addr, rpf;
220	int ret;
221
222	no_addr = rpf = 0;
223	rcu_read_lock();
224	in_dev = __in_dev_get_rcu(dev);
225	if (in_dev) {
226		no_addr = in_dev->ifa_list == NULL;
227		rpf = IN_DEV_RPFILTER(in_dev);
228	}
229	rcu_read_unlock();
230
231	if (in_dev == NULL)
232		goto e_inval;
233
234	if (fib_lookup(&fl, &res))
235		goto last_resort;
236	if (res.type != RTN_UNICAST)
237		goto e_inval_res;
238	*spec_dst = FIB_RES_PREFSRC(res);
239	fib_combine_itag(itag, &res);
240#ifdef CONFIG_IP_ROUTE_MULTIPATH
241	if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1)
242#else
243	if (FIB_RES_DEV(res) == dev)
244#endif
245	{
246		ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
247		fib_res_put(&res);
248		return ret;
249	}
250	fib_res_put(&res);
251	if (no_addr)
252		goto last_resort;
253	if (rpf)
254		goto e_inval;
255	fl.oif = dev->ifindex;
256
257	ret = 0;
258	if (fib_lookup(&fl, &res) == 0) {
259		if (res.type == RTN_UNICAST) {
260			*spec_dst = FIB_RES_PREFSRC(res);
261			ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
262		}
263		fib_res_put(&res);
264	}
265	return ret;
266
267last_resort:
268	if (rpf)
269		goto e_inval;
270	*spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
271	*itag = 0;
272	return 0;
273
274e_inval_res:
275	fib_res_put(&res);
276e_inval:
277	return -EINVAL;
278}
279
280static inline __be32 sk_extract_addr(struct sockaddr *addr)
281{
282	return ((struct sockaddr_in *) addr)->sin_addr.s_addr;
283}
284
285static int put_rtax(struct nlattr *mx, int len, int type, u32 value)
286{
287	struct nlattr *nla;
288
289	nla = (struct nlattr *) ((char *) mx + len);
290	nla->nla_type = type;
291	nla->nla_len = nla_attr_size(4);
292	*(u32 *) nla_data(nla) = value;
293
294	return len + nla_total_size(4);
295}
296
297static int rtentry_to_fib_config(int cmd, struct rtentry *rt,
298				 struct fib_config *cfg)
299{
300	__be32 addr;
301	int plen;
302
303	memset(cfg, 0, sizeof(*cfg));
304
305	if (rt->rt_dst.sa_family != AF_INET)
306		return -EAFNOSUPPORT;
307
308	/*
309	 * Check mask for validity:
310	 * a) it must be contiguous.
311	 * b) destination must have all host bits clear.
312	 * c) if application forgot to set correct family (AF_INET),
313	 *    reject request unless it is absolutely clear i.e.
314	 *    both family and mask are zero.
315	 */
316	plen = 32;
317	addr = sk_extract_addr(&rt->rt_dst);
318	if (!(rt->rt_flags & RTF_HOST)) {
319		__be32 mask = sk_extract_addr(&rt->rt_genmask);
320
321		if (rt->rt_genmask.sa_family != AF_INET) {
322			if (mask || rt->rt_genmask.sa_family)
323				return -EAFNOSUPPORT;
324		}
325
326		if (bad_mask(mask, addr))
327			return -EINVAL;
328
329		plen = inet_mask_len(mask);
330	}
331
332	cfg->fc_dst_len = plen;
333	cfg->fc_dst = addr;
334
335	if (cmd != SIOCDELRT) {
336		cfg->fc_nlflags = NLM_F_CREATE;
337		cfg->fc_protocol = RTPROT_BOOT;
338	}
339
340	if (rt->rt_metric)
341		cfg->fc_priority = rt->rt_metric - 1;
342
343	if (rt->rt_flags & RTF_REJECT) {
344		cfg->fc_scope = RT_SCOPE_HOST;
345		cfg->fc_type = RTN_UNREACHABLE;
346		return 0;
347	}
348
349	cfg->fc_scope = RT_SCOPE_NOWHERE;
350	cfg->fc_type = RTN_UNICAST;
351
352	if (rt->rt_dev) {
353		char *colon;
354		struct net_device *dev;
355		char devname[IFNAMSIZ];
356
357		if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1))
358			return -EFAULT;
359
360		devname[IFNAMSIZ-1] = 0;
361		colon = strchr(devname, ':');
362		if (colon)
363			*colon = 0;
364		dev = __dev_get_by_name(&init_net, devname);
365		if (!dev)
366			return -ENODEV;
367		cfg->fc_oif = dev->ifindex;
368		if (colon) {
369			struct in_ifaddr *ifa;
370			struct in_device *in_dev = __in_dev_get_rtnl(dev);
371			if (!in_dev)
372				return -ENODEV;
373			*colon = ':';
374			for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
375				if (strcmp(ifa->ifa_label, devname) == 0)
376					break;
377			if (ifa == NULL)
378				return -ENODEV;
379			cfg->fc_prefsrc = ifa->ifa_local;
380		}
381	}
382
383	addr = sk_extract_addr(&rt->rt_gateway);
384	if (rt->rt_gateway.sa_family == AF_INET && addr) {
385		cfg->fc_gw = addr;
386		if (rt->rt_flags & RTF_GATEWAY &&
387		    inet_addr_type(addr) == RTN_UNICAST)
388			cfg->fc_scope = RT_SCOPE_UNIVERSE;
389	}
390
391	if (cmd == SIOCDELRT)
392		return 0;
393
394	if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw)
395		return -EINVAL;
396
397	if (cfg->fc_scope == RT_SCOPE_NOWHERE)
398		cfg->fc_scope = RT_SCOPE_LINK;
399
400	if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) {
401		struct nlattr *mx;
402		int len = 0;
403
404		mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL);
405		if (mx == NULL)
406			return -ENOMEM;
407
408		if (rt->rt_flags & RTF_MTU)
409			len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40);
410
411		if (rt->rt_flags & RTF_WINDOW)
412			len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window);
413
414		if (rt->rt_flags & RTF_IRTT)
415			len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3);
416
417		cfg->fc_mx = mx;
418		cfg->fc_mx_len = len;
419	}
420
421	return 0;
422}
423
424/*
425 *	Handle IP routing ioctl calls. These are used to manipulate the routing tables
426 */
427
428int ip_rt_ioctl(unsigned int cmd, void __user *arg)
429{
430	struct fib_config cfg;
431	struct rtentry rt;
432	int err;
433
434	switch (cmd) {
435	case SIOCADDRT:		/* Add a route */
436	case SIOCDELRT:		/* Delete a route */
437		if (!capable(CAP_NET_ADMIN))
438			return -EPERM;
439
440		if (copy_from_user(&rt, arg, sizeof(rt)))
441			return -EFAULT;
442
443		rtnl_lock();
444		err = rtentry_to_fib_config(cmd, &rt, &cfg);
445		if (err == 0) {
446			struct fib_table *tb;
447
448			if (cmd == SIOCDELRT) {
449				tb = fib_get_table(cfg.fc_table);
450				if (tb)
451					err = tb->tb_delete(tb, &cfg);
452				else
453					err = -ESRCH;
454			} else {
455				tb = fib_new_table(cfg.fc_table);
456				if (tb)
457					err = tb->tb_insert(tb, &cfg);
458				else
459					err = -ENOBUFS;
460			}
461
462			/* allocated by rtentry_to_fib_config() */
463			kfree(cfg.fc_mx);
464		}
465		rtnl_unlock();
466		return err;
467	}
468	return -EINVAL;
469}
470
471const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = {
472	[RTA_DST]		= { .type = NLA_U32 },
473	[RTA_SRC]		= { .type = NLA_U32 },
474	[RTA_IIF]		= { .type = NLA_U32 },
475	[RTA_OIF]		= { .type = NLA_U32 },
476	[RTA_GATEWAY]		= { .type = NLA_U32 },
477	[RTA_PRIORITY]		= { .type = NLA_U32 },
478	[RTA_PREFSRC]		= { .type = NLA_U32 },
479	[RTA_METRICS]		= { .type = NLA_NESTED },
480	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
481	[RTA_PROTOINFO]		= { .type = NLA_U32 },
482	[RTA_FLOW]		= { .type = NLA_U32 },
483};
484
485static int rtm_to_fib_config(struct sk_buff *skb, struct nlmsghdr *nlh,
486			     struct fib_config *cfg)
487{
488	struct nlattr *attr;
489	int err, remaining;
490	struct rtmsg *rtm;
491
492	err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy);
493	if (err < 0)
494		goto errout;
495
496	memset(cfg, 0, sizeof(*cfg));
497
498	rtm = nlmsg_data(nlh);
499	cfg->fc_dst_len = rtm->rtm_dst_len;
500	cfg->fc_tos = rtm->rtm_tos;
501	cfg->fc_table = rtm->rtm_table;
502	cfg->fc_protocol = rtm->rtm_protocol;
503	cfg->fc_scope = rtm->rtm_scope;
504	cfg->fc_type = rtm->rtm_type;
505	cfg->fc_flags = rtm->rtm_flags;
506	cfg->fc_nlflags = nlh->nlmsg_flags;
507
508	cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
509	cfg->fc_nlinfo.nlh = nlh;
510
511	if (cfg->fc_type > RTN_MAX) {
512		err = -EINVAL;
513		goto errout;
514	}
515
516	nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) {
517		switch (nla_type(attr)) {
518		case RTA_DST:
519			cfg->fc_dst = nla_get_be32(attr);
520			break;
521		case RTA_OIF:
522			cfg->fc_oif = nla_get_u32(attr);
523			break;
524		case RTA_GATEWAY:
525			cfg->fc_gw = nla_get_be32(attr);
526			break;
527		case RTA_PRIORITY:
528			cfg->fc_priority = nla_get_u32(attr);
529			break;
530		case RTA_PREFSRC:
531			cfg->fc_prefsrc = nla_get_be32(attr);
532			break;
533		case RTA_METRICS:
534			cfg->fc_mx = nla_data(attr);
535			cfg->fc_mx_len = nla_len(attr);
536			break;
537		case RTA_MULTIPATH:
538			cfg->fc_mp = nla_data(attr);
539			cfg->fc_mp_len = nla_len(attr);
540			break;
541		case RTA_FLOW:
542			cfg->fc_flow = nla_get_u32(attr);
543			break;
544		case RTA_TABLE:
545			cfg->fc_table = nla_get_u32(attr);
546			break;
547		}
548	}
549
550	return 0;
551errout:
552	return err;
553}
554
555static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
556{
557	struct net *net = skb->sk->sk_net;
558	struct fib_config cfg;
559	struct fib_table *tb;
560	int err;
561
562	if (net != &init_net)
563		return -EINVAL;
564
565	err = rtm_to_fib_config(skb, nlh, &cfg);
566	if (err < 0)
567		goto errout;
568
569	tb = fib_get_table(cfg.fc_table);
570	if (tb == NULL) {
571		err = -ESRCH;
572		goto errout;
573	}
574
575	err = tb->tb_delete(tb, &cfg);
576errout:
577	return err;
578}
579
580static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
581{
582	struct net *net = skb->sk->sk_net;
583	struct fib_config cfg;
584	struct fib_table *tb;
585	int err;
586
587	if (net != &init_net)
588		return -EINVAL;
589
590	err = rtm_to_fib_config(skb, nlh, &cfg);
591	if (err < 0)
592		goto errout;
593
594	tb = fib_new_table(cfg.fc_table);
595	if (tb == NULL) {
596		err = -ENOBUFS;
597		goto errout;
598	}
599
600	err = tb->tb_insert(tb, &cfg);
601errout:
602	return err;
603}
604
605static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
606{
607	struct net *net = skb->sk->sk_net;
608	unsigned int h, s_h;
609	unsigned int e = 0, s_e;
610	struct fib_table *tb;
611	struct hlist_node *node;
612	int dumped = 0;
613
614	if (net != &init_net)
615		return 0;
616
617	if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
618	    ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
619		return ip_rt_dump(skb, cb);
620
621	s_h = cb->args[0];
622	s_e = cb->args[1];
623
624	for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
625		e = 0;
626		hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist) {
627			if (e < s_e)
628				goto next;
629			if (dumped)
630				memset(&cb->args[2], 0, sizeof(cb->args) -
631						 2 * sizeof(cb->args[0]));
632			if (tb->tb_dump(tb, skb, cb) < 0)
633				goto out;
634			dumped = 1;
635next:
636			e++;
637		}
638	}
639out:
640	cb->args[1] = e;
641	cb->args[0] = h;
642
643	return skb->len;
644}
645
646/* Prepare and feed intra-kernel routing request.
647   Really, it should be netlink message, but :-( netlink
648   can be not configured, so that we feed it directly
649   to fib engine. It is legal, because all events occur
650   only when netlink is already locked.
651 */
652
653static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
654{
655	struct fib_table *tb;
656	struct fib_config cfg = {
657		.fc_protocol = RTPROT_KERNEL,
658		.fc_type = type,
659		.fc_dst = dst,
660		.fc_dst_len = dst_len,
661		.fc_prefsrc = ifa->ifa_local,
662		.fc_oif = ifa->ifa_dev->dev->ifindex,
663		.fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
664	};
665
666	if (type == RTN_UNICAST)
667		tb = fib_new_table(RT_TABLE_MAIN);
668	else
669		tb = fib_new_table(RT_TABLE_LOCAL);
670
671	if (tb == NULL)
672		return;
673
674	cfg.fc_table = tb->tb_id;
675
676	if (type != RTN_LOCAL)
677		cfg.fc_scope = RT_SCOPE_LINK;
678	else
679		cfg.fc_scope = RT_SCOPE_HOST;
680
681	if (cmd == RTM_NEWROUTE)
682		tb->tb_insert(tb, &cfg);
683	else
684		tb->tb_delete(tb, &cfg);
685}
686
687void fib_add_ifaddr(struct in_ifaddr *ifa)
688{
689	struct in_device *in_dev = ifa->ifa_dev;
690	struct net_device *dev = in_dev->dev;
691	struct in_ifaddr *prim = ifa;
692	__be32 mask = ifa->ifa_mask;
693	__be32 addr = ifa->ifa_local;
694	__be32 prefix = ifa->ifa_address&mask;
695
696	if (ifa->ifa_flags&IFA_F_SECONDARY) {
697		prim = inet_ifa_byprefix(in_dev, prefix, mask);
698		if (prim == NULL) {
699			printk(KERN_DEBUG "fib_add_ifaddr: bug: prim == NULL\n");
700			return;
701		}
702	}
703
704	fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
705
706	if (!(dev->flags&IFF_UP))
707		return;
708
709	/* Add broadcast address, if it is explicitly assigned. */
710	if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
711		fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
712
713	if (!ZERONET(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
714	    (prefix != addr || ifa->ifa_prefixlen < 32)) {
715		fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
716			  RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim);
717
718		/* Add network specific broadcasts, when it takes a sense */
719		if (ifa->ifa_prefixlen < 31) {
720			fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
721			fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim);
722		}
723	}
724}
725
726static void fib_del_ifaddr(struct in_ifaddr *ifa)
727{
728	struct in_device *in_dev = ifa->ifa_dev;
729	struct net_device *dev = in_dev->dev;
730	struct in_ifaddr *ifa1;
731	struct in_ifaddr *prim = ifa;
732	__be32 brd = ifa->ifa_address|~ifa->ifa_mask;
733	__be32 any = ifa->ifa_address&ifa->ifa_mask;
734#define LOCAL_OK	1
735#define BRD_OK		2
736#define BRD0_OK		4
737#define BRD1_OK		8
738	unsigned ok = 0;
739
740	if (!(ifa->ifa_flags&IFA_F_SECONDARY))
741		fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
742			  RTN_UNICAST, any, ifa->ifa_prefixlen, prim);
743	else {
744		prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
745		if (prim == NULL) {
746			printk(KERN_DEBUG "fib_del_ifaddr: bug: prim == NULL\n");
747			return;
748		}
749	}
750
751	/* Deletion is more complicated than add.
752	   We should take care of not to delete too much :-)
753
754	   Scan address list to be sure that addresses are really gone.
755	 */
756
757	for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
758		if (ifa->ifa_local == ifa1->ifa_local)
759			ok |= LOCAL_OK;
760		if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
761			ok |= BRD_OK;
762		if (brd == ifa1->ifa_broadcast)
763			ok |= BRD1_OK;
764		if (any == ifa1->ifa_broadcast)
765			ok |= BRD0_OK;
766	}
767
768	if (!(ok&BRD_OK))
769		fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
770	if (!(ok&BRD1_OK))
771		fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
772	if (!(ok&BRD0_OK))
773		fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
774	if (!(ok&LOCAL_OK)) {
775		fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
776
777		/* Check, that this local address finally disappeared. */
778		if (inet_addr_type(ifa->ifa_local) != RTN_LOCAL) {
779			/* And the last, but not the least thing.
780			   We must flush stray FIB entries.
781
782			   First of all, we scan fib_info list searching
783			   for stray nexthop entries, then ignite fib_flush.
784			*/
785			if (fib_sync_down(ifa->ifa_local, NULL, 0))
786				fib_flush();
787		}
788	}
789#undef LOCAL_OK
790#undef BRD_OK
791#undef BRD0_OK
792#undef BRD1_OK
793}
794
795static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
796{
797
798	struct fib_result       res;
799	struct flowi            fl = { .mark = frn->fl_mark,
800				       .nl_u = { .ip4_u = { .daddr = frn->fl_addr,
801							    .tos = frn->fl_tos,
802							    .scope = frn->fl_scope } } };
803
804#ifdef CONFIG_IP_MULTIPLE_TABLES
805	res.r = NULL;
806#endif
807
808	frn->err = -ENOENT;
809	if (tb) {
810		local_bh_disable();
811
812		frn->tb_id = tb->tb_id;
813		frn->err = tb->tb_lookup(tb, &fl, &res);
814
815		if (!frn->err) {
816			frn->prefixlen = res.prefixlen;
817			frn->nh_sel = res.nh_sel;
818			frn->type = res.type;
819			frn->scope = res.scope;
820			fib_res_put(&res);
821		}
822		local_bh_enable();
823	}
824}
825
826static void nl_fib_input(struct sk_buff *skb)
827{
828	struct fib_result_nl *frn;
829	struct nlmsghdr *nlh;
830	struct fib_table *tb;
831	u32 pid;
832
833	nlh = nlmsg_hdr(skb);
834	if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
835	    nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn)))
836		return;
837
838	skb = skb_clone(skb, GFP_KERNEL);
839	if (skb == NULL)
840		return;
841	nlh = nlmsg_hdr(skb);
842
843	frn = (struct fib_result_nl *) NLMSG_DATA(nlh);
844	tb = fib_get_table(frn->tb_id_in);
845
846	nl_fib_lookup(frn, tb);
847
848	pid = NETLINK_CB(skb).pid;       /* pid of sending process */
849	NETLINK_CB(skb).pid = 0;         /* from kernel */
850	NETLINK_CB(skb).dst_group = 0;  /* unicast */
851	netlink_unicast(fibnl, skb, pid, MSG_DONTWAIT);
852}
853
854static void nl_fib_lookup_init(void)
855{
856	fibnl = netlink_kernel_create(&init_net, NETLINK_FIB_LOOKUP, 0,
857				      nl_fib_input, NULL, THIS_MODULE);
858}
859
860static void fib_disable_ip(struct net_device *dev, int force)
861{
862	if (fib_sync_down(0, dev, force))
863		fib_flush();
864	rt_cache_flush(0);
865	arp_ifdown(dev);
866}
867
868static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
869{
870	struct in_ifaddr *ifa = (struct in_ifaddr*)ptr;
871
872	switch (event) {
873	case NETDEV_UP:
874		fib_add_ifaddr(ifa);
875#ifdef CONFIG_IP_ROUTE_MULTIPATH
876		fib_sync_up(ifa->ifa_dev->dev);
877#endif
878		rt_cache_flush(-1);
879		break;
880	case NETDEV_DOWN:
881		fib_del_ifaddr(ifa);
882		if (ifa->ifa_dev->ifa_list == NULL) {
883			/* Last address was deleted from this interface.
884			   Disable IP.
885			 */
886			fib_disable_ip(ifa->ifa_dev->dev, 1);
887		} else {
888			rt_cache_flush(-1);
889		}
890		break;
891	}
892	return NOTIFY_DONE;
893}
894
895static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
896{
897	struct net_device *dev = ptr;
898	struct in_device *in_dev = __in_dev_get_rtnl(dev);
899
900	if (dev->nd_net != &init_net)
901		return NOTIFY_DONE;
902
903	if (event == NETDEV_UNREGISTER) {
904		fib_disable_ip(dev, 2);
905		return NOTIFY_DONE;
906	}
907
908	if (!in_dev)
909		return NOTIFY_DONE;
910
911	switch (event) {
912	case NETDEV_UP:
913		for_ifa(in_dev) {
914			fib_add_ifaddr(ifa);
915		} endfor_ifa(in_dev);
916#ifdef CONFIG_IP_ROUTE_MULTIPATH
917		fib_sync_up(dev);
918#endif
919		rt_cache_flush(-1);
920		break;
921	case NETDEV_DOWN:
922		fib_disable_ip(dev, 0);
923		break;
924	case NETDEV_CHANGEMTU:
925	case NETDEV_CHANGE:
926		rt_cache_flush(0);
927		break;
928	}
929	return NOTIFY_DONE;
930}
931
932static struct notifier_block fib_inetaddr_notifier = {
933	.notifier_call =fib_inetaddr_event,
934};
935
936static struct notifier_block fib_netdev_notifier = {
937	.notifier_call =fib_netdev_event,
938};
939
940void __init ip_fib_init(void)
941{
942	unsigned int i;
943
944	for (i = 0; i < FIB_TABLE_HASHSZ; i++)
945		INIT_HLIST_HEAD(&fib_table_hash[i]);
946
947	fib4_rules_init();
948
949	register_netdevice_notifier(&fib_netdev_notifier);
950	register_inetaddr_notifier(&fib_inetaddr_notifier);
951	nl_fib_lookup_init();
952
953	rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL);
954	rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL);
955	rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib);
956}
957
958EXPORT_SYMBOL(inet_addr_type);
959EXPORT_SYMBOL(inet_dev_addr_type);
960EXPORT_SYMBOL(ip_dev_find);
961