fib_frontend.c revision a1e8733e557bb390e13aa00ef044a6022c8d0bb2
1/*
2 * INET		An implementation of the TCP/IP protocol suite for the LINUX
3 *		operating system.  INET is implemented using the  BSD Socket
4 *		interface as the means of communication with the user level.
5 *
6 *		IPv4 Forwarding Information Base: FIB frontend.
7 *
8 * Version:	$Id: fib_frontend.c,v 1.26 2001/10/31 21:55:54 davem Exp $
9 *
10 * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
11 *
12 *		This program is free software; you can redistribute it and/or
13 *		modify it under the terms of the GNU General Public License
14 *		as published by the Free Software Foundation; either version
15 *		2 of the License, or (at your option) any later version.
16 */
17
18#include <linux/config.h>
19#include <linux/module.h>
20#include <asm/uaccess.h>
21#include <asm/system.h>
22#include <linux/bitops.h>
23#include <linux/capability.h>
24#include <linux/types.h>
25#include <linux/kernel.h>
26#include <linux/sched.h>
27#include <linux/mm.h>
28#include <linux/string.h>
29#include <linux/socket.h>
30#include <linux/sockios.h>
31#include <linux/errno.h>
32#include <linux/in.h>
33#include <linux/inet.h>
34#include <linux/inetdevice.h>
35#include <linux/netdevice.h>
36#include <linux/if_arp.h>
37#include <linux/skbuff.h>
38#include <linux/netlink.h>
39#include <linux/init.h>
40
41#include <net/ip.h>
42#include <net/protocol.h>
43#include <net/route.h>
44#include <net/tcp.h>
45#include <net/sock.h>
46#include <net/icmp.h>
47#include <net/arp.h>
48#include <net/ip_fib.h>
49
50#define FFprint(a...) printk(KERN_DEBUG a)
51
52#ifndef CONFIG_IP_MULTIPLE_TABLES
53
54#define RT_TABLE_MIN RT_TABLE_MAIN
55
56struct fib_table *ip_fib_local_table;
57struct fib_table *ip_fib_main_table;
58
59#else
60
61#define RT_TABLE_MIN 1
62
63struct fib_table *fib_tables[RT_TABLE_MAX+1];
64
65struct fib_table *__fib_new_table(int id)
66{
67	struct fib_table *tb;
68
69	tb = fib_hash_init(id);
70	if (!tb)
71		return NULL;
72	fib_tables[id] = tb;
73	return tb;
74}
75
76
77#endif /* CONFIG_IP_MULTIPLE_TABLES */
78
79
80static void fib_flush(void)
81{
82	int flushed = 0;
83#ifdef CONFIG_IP_MULTIPLE_TABLES
84	struct fib_table *tb;
85	int id;
86
87	for (id = RT_TABLE_MAX; id>0; id--) {
88		if ((tb = fib_get_table(id))==NULL)
89			continue;
90		flushed += tb->tb_flush(tb);
91	}
92#else /* CONFIG_IP_MULTIPLE_TABLES */
93	flushed += ip_fib_main_table->tb_flush(ip_fib_main_table);
94	flushed += ip_fib_local_table->tb_flush(ip_fib_local_table);
95#endif /* CONFIG_IP_MULTIPLE_TABLES */
96
97	if (flushed)
98		rt_cache_flush(-1);
99}
100
101/*
102 *	Find the first device with a given source address.
103 */
104
105struct net_device * ip_dev_find(u32 addr)
106{
107	struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
108	struct fib_result res;
109	struct net_device *dev = NULL;
110
111#ifdef CONFIG_IP_MULTIPLE_TABLES
112	res.r = NULL;
113#endif
114
115	if (!ip_fib_local_table ||
116	    ip_fib_local_table->tb_lookup(ip_fib_local_table, &fl, &res))
117		return NULL;
118	if (res.type != RTN_LOCAL)
119		goto out;
120	dev = FIB_RES_DEV(res);
121
122	if (dev)
123		dev_hold(dev);
124out:
125	fib_res_put(&res);
126	return dev;
127}
128
129unsigned inet_addr_type(u32 addr)
130{
131	struct flowi		fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
132	struct fib_result	res;
133	unsigned ret = RTN_BROADCAST;
134
135	if (ZERONET(addr) || BADCLASS(addr))
136		return RTN_BROADCAST;
137	if (MULTICAST(addr))
138		return RTN_MULTICAST;
139
140#ifdef CONFIG_IP_MULTIPLE_TABLES
141	res.r = NULL;
142#endif
143
144	if (ip_fib_local_table) {
145		ret = RTN_UNICAST;
146		if (!ip_fib_local_table->tb_lookup(ip_fib_local_table,
147						   &fl, &res)) {
148			ret = res.type;
149			fib_res_put(&res);
150		}
151	}
152	return ret;
153}
154
155/* Given (packet source, input interface) and optional (dst, oif, tos):
156   - (main) check, that source is valid i.e. not broadcast or our local
157     address.
158   - figure out what "logical" interface this packet arrived
159     and calculate "specific destination" address.
160   - check, that packet arrived from expected physical interface.
161 */
162
163int fib_validate_source(u32 src, u32 dst, u8 tos, int oif,
164			struct net_device *dev, u32 *spec_dst, u32 *itag)
165{
166	struct in_device *in_dev;
167	struct flowi fl = { .nl_u = { .ip4_u =
168				      { .daddr = src,
169					.saddr = dst,
170					.tos = tos } },
171			    .iif = oif };
172	struct fib_result res;
173	int no_addr, rpf;
174	int ret;
175
176	no_addr = rpf = 0;
177	rcu_read_lock();
178	in_dev = __in_dev_get_rcu(dev);
179	if (in_dev) {
180		no_addr = in_dev->ifa_list == NULL;
181		rpf = IN_DEV_RPFILTER(in_dev);
182	}
183	rcu_read_unlock();
184
185	if (in_dev == NULL)
186		goto e_inval;
187
188	if (fib_lookup(&fl, &res))
189		goto last_resort;
190	if (res.type != RTN_UNICAST)
191		goto e_inval_res;
192	*spec_dst = FIB_RES_PREFSRC(res);
193	fib_combine_itag(itag, &res);
194#ifdef CONFIG_IP_ROUTE_MULTIPATH
195	if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1)
196#else
197	if (FIB_RES_DEV(res) == dev)
198#endif
199	{
200		ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
201		fib_res_put(&res);
202		return ret;
203	}
204	fib_res_put(&res);
205	if (no_addr)
206		goto last_resort;
207	if (rpf)
208		goto e_inval;
209	fl.oif = dev->ifindex;
210
211	ret = 0;
212	if (fib_lookup(&fl, &res) == 0) {
213		if (res.type == RTN_UNICAST) {
214			*spec_dst = FIB_RES_PREFSRC(res);
215			ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
216		}
217		fib_res_put(&res);
218	}
219	return ret;
220
221last_resort:
222	if (rpf)
223		goto e_inval;
224	*spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
225	*itag = 0;
226	return 0;
227
228e_inval_res:
229	fib_res_put(&res);
230e_inval:
231	return -EINVAL;
232}
233
234#ifndef CONFIG_IP_NOSIOCRT
235
236/*
237 *	Handle IP routing ioctl calls. These are used to manipulate the routing tables
238 */
239
240int ip_rt_ioctl(unsigned int cmd, void __user *arg)
241{
242	int err;
243	struct kern_rta rta;
244	struct rtentry  r;
245	struct {
246		struct nlmsghdr nlh;
247		struct rtmsg	rtm;
248	} req;
249
250	switch (cmd) {
251	case SIOCADDRT:		/* Add a route */
252	case SIOCDELRT:		/* Delete a route */
253		if (!capable(CAP_NET_ADMIN))
254			return -EPERM;
255		if (copy_from_user(&r, arg, sizeof(struct rtentry)))
256			return -EFAULT;
257		rtnl_lock();
258		err = fib_convert_rtentry(cmd, &req.nlh, &req.rtm, &rta, &r);
259		if (err == 0) {
260			if (cmd == SIOCDELRT) {
261				struct fib_table *tb = fib_get_table(req.rtm.rtm_table);
262				err = -ESRCH;
263				if (tb)
264					err = tb->tb_delete(tb, &req.rtm, &rta, &req.nlh, NULL);
265			} else {
266				struct fib_table *tb = fib_new_table(req.rtm.rtm_table);
267				err = -ENOBUFS;
268				if (tb)
269					err = tb->tb_insert(tb, &req.rtm, &rta, &req.nlh, NULL);
270			}
271			kfree(rta.rta_mx);
272		}
273		rtnl_unlock();
274		return err;
275	}
276	return -EINVAL;
277}
278
279#else
280
281int ip_rt_ioctl(unsigned int cmd, void *arg)
282{
283	return -EINVAL;
284}
285
286#endif
287
288static int inet_check_attr(struct rtmsg *r, struct rtattr **rta)
289{
290	int i;
291
292	for (i=1; i<=RTA_MAX; i++, rta++) {
293		struct rtattr *attr = *rta;
294		if (attr) {
295			if (RTA_PAYLOAD(attr) < 4)
296				return -EINVAL;
297			if (i != RTA_MULTIPATH && i != RTA_METRICS)
298				*rta = (struct rtattr*)RTA_DATA(attr);
299		}
300	}
301	return 0;
302}
303
304int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
305{
306	struct fib_table * tb;
307	struct rtattr **rta = arg;
308	struct rtmsg *r = NLMSG_DATA(nlh);
309
310	if (inet_check_attr(r, rta))
311		return -EINVAL;
312
313	tb = fib_get_table(r->rtm_table);
314	if (tb)
315		return tb->tb_delete(tb, r, (struct kern_rta*)rta, nlh, &NETLINK_CB(skb));
316	return -ESRCH;
317}
318
319int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
320{
321	struct fib_table * tb;
322	struct rtattr **rta = arg;
323	struct rtmsg *r = NLMSG_DATA(nlh);
324
325	if (inet_check_attr(r, rta))
326		return -EINVAL;
327
328	tb = fib_new_table(r->rtm_table);
329	if (tb)
330		return tb->tb_insert(tb, r, (struct kern_rta*)rta, nlh, &NETLINK_CB(skb));
331	return -ENOBUFS;
332}
333
334int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
335{
336	int t;
337	int s_t;
338	struct fib_table *tb;
339
340	if (NLMSG_PAYLOAD(cb->nlh, 0) >= sizeof(struct rtmsg) &&
341	    ((struct rtmsg*)NLMSG_DATA(cb->nlh))->rtm_flags&RTM_F_CLONED)
342		return ip_rt_dump(skb, cb);
343
344	s_t = cb->args[0];
345	if (s_t == 0)
346		s_t = cb->args[0] = RT_TABLE_MIN;
347
348	for (t=s_t; t<=RT_TABLE_MAX; t++) {
349		if (t < s_t) continue;
350		if (t > s_t)
351			memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
352		if ((tb = fib_get_table(t))==NULL)
353			continue;
354		if (tb->tb_dump(tb, skb, cb) < 0)
355			break;
356	}
357
358	cb->args[0] = t;
359
360	return skb->len;
361}
362
363/* Prepare and feed intra-kernel routing request.
364   Really, it should be netlink message, but :-( netlink
365   can be not configured, so that we feed it directly
366   to fib engine. It is legal, because all events occur
367   only when netlink is already locked.
368 */
369
370static void fib_magic(int cmd, int type, u32 dst, int dst_len, struct in_ifaddr *ifa)
371{
372	struct fib_table * tb;
373	struct {
374		struct nlmsghdr	nlh;
375		struct rtmsg	rtm;
376	} req;
377	struct kern_rta rta;
378
379	memset(&req.rtm, 0, sizeof(req.rtm));
380	memset(&rta, 0, sizeof(rta));
381
382	if (type == RTN_UNICAST)
383		tb = fib_new_table(RT_TABLE_MAIN);
384	else
385		tb = fib_new_table(RT_TABLE_LOCAL);
386
387	if (tb == NULL)
388		return;
389
390	req.nlh.nlmsg_len = sizeof(req);
391	req.nlh.nlmsg_type = cmd;
392	req.nlh.nlmsg_flags = NLM_F_REQUEST|NLM_F_CREATE|NLM_F_APPEND;
393	req.nlh.nlmsg_pid = 0;
394	req.nlh.nlmsg_seq = 0;
395
396	req.rtm.rtm_dst_len = dst_len;
397	req.rtm.rtm_table = tb->tb_id;
398	req.rtm.rtm_protocol = RTPROT_KERNEL;
399	req.rtm.rtm_scope = (type != RTN_LOCAL ? RT_SCOPE_LINK : RT_SCOPE_HOST);
400	req.rtm.rtm_type = type;
401
402	rta.rta_dst = &dst;
403	rta.rta_prefsrc = &ifa->ifa_local;
404	rta.rta_oif = &ifa->ifa_dev->dev->ifindex;
405
406	if (cmd == RTM_NEWROUTE)
407		tb->tb_insert(tb, &req.rtm, &rta, &req.nlh, NULL);
408	else
409		tb->tb_delete(tb, &req.rtm, &rta, &req.nlh, NULL);
410}
411
412void fib_add_ifaddr(struct in_ifaddr *ifa)
413{
414	struct in_device *in_dev = ifa->ifa_dev;
415	struct net_device *dev = in_dev->dev;
416	struct in_ifaddr *prim = ifa;
417	u32 mask = ifa->ifa_mask;
418	u32 addr = ifa->ifa_local;
419	u32 prefix = ifa->ifa_address&mask;
420
421	if (ifa->ifa_flags&IFA_F_SECONDARY) {
422		prim = inet_ifa_byprefix(in_dev, prefix, mask);
423		if (prim == NULL) {
424			printk(KERN_DEBUG "fib_add_ifaddr: bug: prim == NULL\n");
425			return;
426		}
427	}
428
429	fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
430
431	if (!(dev->flags&IFF_UP))
432		return;
433
434	/* Add broadcast address, if it is explicitly assigned. */
435	if (ifa->ifa_broadcast && ifa->ifa_broadcast != 0xFFFFFFFF)
436		fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
437
438	if (!ZERONET(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
439	    (prefix != addr || ifa->ifa_prefixlen < 32)) {
440		fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
441			  RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim);
442
443		/* Add network specific broadcasts, when it takes a sense */
444		if (ifa->ifa_prefixlen < 31) {
445			fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
446			fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim);
447		}
448	}
449}
450
451static void fib_del_ifaddr(struct in_ifaddr *ifa)
452{
453	struct in_device *in_dev = ifa->ifa_dev;
454	struct net_device *dev = in_dev->dev;
455	struct in_ifaddr *ifa1;
456	struct in_ifaddr *prim = ifa;
457	u32 brd = ifa->ifa_address|~ifa->ifa_mask;
458	u32 any = ifa->ifa_address&ifa->ifa_mask;
459#define LOCAL_OK	1
460#define BRD_OK		2
461#define BRD0_OK		4
462#define BRD1_OK		8
463	unsigned ok = 0;
464
465	if (!(ifa->ifa_flags&IFA_F_SECONDARY))
466		fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
467			  RTN_UNICAST, any, ifa->ifa_prefixlen, prim);
468	else {
469		prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
470		if (prim == NULL) {
471			printk(KERN_DEBUG "fib_del_ifaddr: bug: prim == NULL\n");
472			return;
473		}
474	}
475
476	/* Deletion is more complicated than add.
477	   We should take care of not to delete too much :-)
478
479	   Scan address list to be sure that addresses are really gone.
480	 */
481
482	for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
483		if (ifa->ifa_local == ifa1->ifa_local)
484			ok |= LOCAL_OK;
485		if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
486			ok |= BRD_OK;
487		if (brd == ifa1->ifa_broadcast)
488			ok |= BRD1_OK;
489		if (any == ifa1->ifa_broadcast)
490			ok |= BRD0_OK;
491	}
492
493	if (!(ok&BRD_OK))
494		fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
495	if (!(ok&BRD1_OK))
496		fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
497	if (!(ok&BRD0_OK))
498		fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
499	if (!(ok&LOCAL_OK)) {
500		fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
501
502		/* Check, that this local address finally disappeared. */
503		if (inet_addr_type(ifa->ifa_local) != RTN_LOCAL) {
504			/* And the last, but not the least thing.
505			   We must flush stray FIB entries.
506
507			   First of all, we scan fib_info list searching
508			   for stray nexthop entries, then ignite fib_flush.
509			*/
510			if (fib_sync_down(ifa->ifa_local, NULL, 0))
511				fib_flush();
512		}
513	}
514#undef LOCAL_OK
515#undef BRD_OK
516#undef BRD0_OK
517#undef BRD1_OK
518}
519
520static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
521{
522
523	struct fib_result       res;
524	struct flowi            fl = { .nl_u = { .ip4_u = { .daddr = frn->fl_addr,
525							    .fwmark = frn->fl_fwmark,
526							    .tos = frn->fl_tos,
527							    .scope = frn->fl_scope } } };
528	if (tb) {
529		local_bh_disable();
530
531		frn->tb_id = tb->tb_id;
532		frn->err = tb->tb_lookup(tb, &fl, &res);
533
534		if (!frn->err) {
535			frn->prefixlen = res.prefixlen;
536			frn->nh_sel = res.nh_sel;
537			frn->type = res.type;
538			frn->scope = res.scope;
539		}
540		local_bh_enable();
541	}
542}
543
544static void nl_fib_input(struct sock *sk, int len)
545{
546	struct sk_buff *skb = NULL;
547        struct nlmsghdr *nlh = NULL;
548	struct fib_result_nl *frn;
549	u32 pid;
550	struct fib_table *tb;
551
552	skb = skb_dequeue(&sk->sk_receive_queue);
553	nlh = (struct nlmsghdr *)skb->data;
554	if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
555	    nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn))) {
556		kfree_skb(skb);
557		return;
558	}
559
560	frn = (struct fib_result_nl *) NLMSG_DATA(nlh);
561	tb = fib_get_table(frn->tb_id_in);
562
563	nl_fib_lookup(frn, tb);
564
565	pid = nlh->nlmsg_pid;           /*pid of sending process */
566	NETLINK_CB(skb).pid = 0;         /* from kernel */
567	NETLINK_CB(skb).dst_pid = pid;
568	NETLINK_CB(skb).dst_group = 0;  /* unicast */
569	netlink_unicast(sk, skb, pid, MSG_DONTWAIT);
570}
571
572static void nl_fib_lookup_init(void)
573{
574      netlink_kernel_create(NETLINK_FIB_LOOKUP, 0, nl_fib_input, THIS_MODULE);
575}
576
577static void fib_disable_ip(struct net_device *dev, int force)
578{
579	if (fib_sync_down(0, dev, force))
580		fib_flush();
581	rt_cache_flush(0);
582	arp_ifdown(dev);
583}
584
585static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
586{
587	struct in_ifaddr *ifa = (struct in_ifaddr*)ptr;
588
589	switch (event) {
590	case NETDEV_UP:
591		fib_add_ifaddr(ifa);
592#ifdef CONFIG_IP_ROUTE_MULTIPATH
593		fib_sync_up(ifa->ifa_dev->dev);
594#endif
595		rt_cache_flush(-1);
596		break;
597	case NETDEV_DOWN:
598		fib_del_ifaddr(ifa);
599		if (ifa->ifa_dev->ifa_list == NULL) {
600			/* Last address was deleted from this interface.
601			   Disable IP.
602			 */
603			fib_disable_ip(ifa->ifa_dev->dev, 1);
604		} else {
605			rt_cache_flush(-1);
606		}
607		break;
608	}
609	return NOTIFY_DONE;
610}
611
612static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
613{
614	struct net_device *dev = ptr;
615	struct in_device *in_dev = __in_dev_get_rtnl(dev);
616
617	if (event == NETDEV_UNREGISTER) {
618		fib_disable_ip(dev, 2);
619		return NOTIFY_DONE;
620	}
621
622	if (!in_dev)
623		return NOTIFY_DONE;
624
625	switch (event) {
626	case NETDEV_UP:
627		for_ifa(in_dev) {
628			fib_add_ifaddr(ifa);
629		} endfor_ifa(in_dev);
630#ifdef CONFIG_IP_ROUTE_MULTIPATH
631		fib_sync_up(dev);
632#endif
633		rt_cache_flush(-1);
634		break;
635	case NETDEV_DOWN:
636		fib_disable_ip(dev, 0);
637		break;
638	case NETDEV_CHANGEMTU:
639	case NETDEV_CHANGE:
640		rt_cache_flush(0);
641		break;
642	}
643	return NOTIFY_DONE;
644}
645
646static struct notifier_block fib_inetaddr_notifier = {
647	.notifier_call =fib_inetaddr_event,
648};
649
650static struct notifier_block fib_netdev_notifier = {
651	.notifier_call =fib_netdev_event,
652};
653
654void __init ip_fib_init(void)
655{
656#ifndef CONFIG_IP_MULTIPLE_TABLES
657	ip_fib_local_table = fib_hash_init(RT_TABLE_LOCAL);
658	ip_fib_main_table  = fib_hash_init(RT_TABLE_MAIN);
659#else
660	fib_rules_init();
661#endif
662
663	register_netdevice_notifier(&fib_netdev_notifier);
664	register_inetaddr_notifier(&fib_inetaddr_notifier);
665	nl_fib_lookup_init();
666}
667
668EXPORT_SYMBOL(inet_addr_type);
669EXPORT_SYMBOL(ip_dev_find);
670