fib_frontend.c revision 9e762a4a89b302cb3b26a1f9bb33eff459eaeca9
1/*
2 * INET		An implementation of the TCP/IP protocol suite for the LINUX
3 *		operating system.  INET is implemented using the  BSD Socket
4 *		interface as the means of communication with the user level.
5 *
6 *		IPv4 Forwarding Information Base: FIB frontend.
7 *
8 * Version:	$Id: fib_frontend.c,v 1.26 2001/10/31 21:55:54 davem Exp $
9 *
10 * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
11 *
12 *		This program is free software; you can redistribute it and/or
13 *		modify it under the terms of the GNU General Public License
14 *		as published by the Free Software Foundation; either version
15 *		2 of the License, or (at your option) any later version.
16 */
17
18#include <linux/module.h>
19#include <asm/uaccess.h>
20#include <asm/system.h>
21#include <linux/bitops.h>
22#include <linux/capability.h>
23#include <linux/types.h>
24#include <linux/kernel.h>
25#include <linux/sched.h>
26#include <linux/mm.h>
27#include <linux/string.h>
28#include <linux/socket.h>
29#include <linux/sockios.h>
30#include <linux/errno.h>
31#include <linux/in.h>
32#include <linux/inet.h>
33#include <linux/inetdevice.h>
34#include <linux/netdevice.h>
35#include <linux/if_addr.h>
36#include <linux/if_arp.h>
37#include <linux/skbuff.h>
38#include <linux/netlink.h>
39#include <linux/init.h>
40
41#include <net/ip.h>
42#include <net/protocol.h>
43#include <net/route.h>
44#include <net/tcp.h>
45#include <net/sock.h>
46#include <net/icmp.h>
47#include <net/arp.h>
48#include <net/ip_fib.h>
49
50#define FFprint(a...) printk(KERN_DEBUG a)
51
52#ifndef CONFIG_IP_MULTIPLE_TABLES
53
54#define RT_TABLE_MIN RT_TABLE_MAIN
55
56struct fib_table *ip_fib_local_table;
57struct fib_table *ip_fib_main_table;
58
59#else
60
61#define RT_TABLE_MIN 1
62
63struct fib_table *fib_tables[RT_TABLE_MAX+1];
64
65struct fib_table *__fib_new_table(u32 id)
66{
67	struct fib_table *tb;
68
69	tb = fib_hash_init(id);
70	if (!tb)
71		return NULL;
72	fib_tables[id] = tb;
73	return tb;
74}
75
76
77#endif /* CONFIG_IP_MULTIPLE_TABLES */
78
79
80static void fib_flush(void)
81{
82	int flushed = 0;
83#ifdef CONFIG_IP_MULTIPLE_TABLES
84	struct fib_table *tb;
85	u32 id;
86
87	for (id = RT_TABLE_MAX; id>0; id--) {
88		if ((tb = fib_get_table(id))==NULL)
89			continue;
90		flushed += tb->tb_flush(tb);
91	}
92#else /* CONFIG_IP_MULTIPLE_TABLES */
93	flushed += ip_fib_main_table->tb_flush(ip_fib_main_table);
94	flushed += ip_fib_local_table->tb_flush(ip_fib_local_table);
95#endif /* CONFIG_IP_MULTIPLE_TABLES */
96
97	if (flushed)
98		rt_cache_flush(-1);
99}
100
101/*
102 *	Find the first device with a given source address.
103 */
104
105struct net_device * ip_dev_find(u32 addr)
106{
107	struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
108	struct fib_result res;
109	struct net_device *dev = NULL;
110
111#ifdef CONFIG_IP_MULTIPLE_TABLES
112	res.r = NULL;
113#endif
114
115	if (!ip_fib_local_table ||
116	    ip_fib_local_table->tb_lookup(ip_fib_local_table, &fl, &res))
117		return NULL;
118	if (res.type != RTN_LOCAL)
119		goto out;
120	dev = FIB_RES_DEV(res);
121
122	if (dev)
123		dev_hold(dev);
124out:
125	fib_res_put(&res);
126	return dev;
127}
128
129unsigned inet_addr_type(u32 addr)
130{
131	struct flowi		fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
132	struct fib_result	res;
133	unsigned ret = RTN_BROADCAST;
134
135	if (ZERONET(addr) || BADCLASS(addr))
136		return RTN_BROADCAST;
137	if (MULTICAST(addr))
138		return RTN_MULTICAST;
139
140#ifdef CONFIG_IP_MULTIPLE_TABLES
141	res.r = NULL;
142#endif
143
144	if (ip_fib_local_table) {
145		ret = RTN_UNICAST;
146		if (!ip_fib_local_table->tb_lookup(ip_fib_local_table,
147						   &fl, &res)) {
148			ret = res.type;
149			fib_res_put(&res);
150		}
151	}
152	return ret;
153}
154
155/* Given (packet source, input interface) and optional (dst, oif, tos):
156   - (main) check, that source is valid i.e. not broadcast or our local
157     address.
158   - figure out what "logical" interface this packet arrived
159     and calculate "specific destination" address.
160   - check, that packet arrived from expected physical interface.
161 */
162
163int fib_validate_source(u32 src, u32 dst, u8 tos, int oif,
164			struct net_device *dev, u32 *spec_dst, u32 *itag)
165{
166	struct in_device *in_dev;
167	struct flowi fl = { .nl_u = { .ip4_u =
168				      { .daddr = src,
169					.saddr = dst,
170					.tos = tos } },
171			    .iif = oif };
172	struct fib_result res;
173	int no_addr, rpf;
174	int ret;
175
176	no_addr = rpf = 0;
177	rcu_read_lock();
178	in_dev = __in_dev_get_rcu(dev);
179	if (in_dev) {
180		no_addr = in_dev->ifa_list == NULL;
181		rpf = IN_DEV_RPFILTER(in_dev);
182	}
183	rcu_read_unlock();
184
185	if (in_dev == NULL)
186		goto e_inval;
187
188	if (fib_lookup(&fl, &res))
189		goto last_resort;
190	if (res.type != RTN_UNICAST)
191		goto e_inval_res;
192	*spec_dst = FIB_RES_PREFSRC(res);
193	fib_combine_itag(itag, &res);
194#ifdef CONFIG_IP_ROUTE_MULTIPATH
195	if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1)
196#else
197	if (FIB_RES_DEV(res) == dev)
198#endif
199	{
200		ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
201		fib_res_put(&res);
202		return ret;
203	}
204	fib_res_put(&res);
205	if (no_addr)
206		goto last_resort;
207	if (rpf)
208		goto e_inval;
209	fl.oif = dev->ifindex;
210
211	ret = 0;
212	if (fib_lookup(&fl, &res) == 0) {
213		if (res.type == RTN_UNICAST) {
214			*spec_dst = FIB_RES_PREFSRC(res);
215			ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
216		}
217		fib_res_put(&res);
218	}
219	return ret;
220
221last_resort:
222	if (rpf)
223		goto e_inval;
224	*spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
225	*itag = 0;
226	return 0;
227
228e_inval_res:
229	fib_res_put(&res);
230e_inval:
231	return -EINVAL;
232}
233
234#ifndef CONFIG_IP_NOSIOCRT
235
236/*
237 *	Handle IP routing ioctl calls. These are used to manipulate the routing tables
238 */
239
240int ip_rt_ioctl(unsigned int cmd, void __user *arg)
241{
242	int err;
243	struct kern_rta rta;
244	struct rtentry  r;
245	struct {
246		struct nlmsghdr nlh;
247		struct rtmsg	rtm;
248	} req;
249
250	switch (cmd) {
251	case SIOCADDRT:		/* Add a route */
252	case SIOCDELRT:		/* Delete a route */
253		if (!capable(CAP_NET_ADMIN))
254			return -EPERM;
255		if (copy_from_user(&r, arg, sizeof(struct rtentry)))
256			return -EFAULT;
257		rtnl_lock();
258		err = fib_convert_rtentry(cmd, &req.nlh, &req.rtm, &rta, &r);
259		if (err == 0) {
260			if (cmd == SIOCDELRT) {
261				struct fib_table *tb = fib_get_table(req.rtm.rtm_table);
262				err = -ESRCH;
263				if (tb)
264					err = tb->tb_delete(tb, &req.rtm, &rta, &req.nlh, NULL);
265			} else {
266				struct fib_table *tb = fib_new_table(req.rtm.rtm_table);
267				err = -ENOBUFS;
268				if (tb)
269					err = tb->tb_insert(tb, &req.rtm, &rta, &req.nlh, NULL);
270			}
271			kfree(rta.rta_mx);
272		}
273		rtnl_unlock();
274		return err;
275	}
276	return -EINVAL;
277}
278
279#else
280
281int ip_rt_ioctl(unsigned int cmd, void *arg)
282{
283	return -EINVAL;
284}
285
286#endif
287
288static int inet_check_attr(struct rtmsg *r, struct rtattr **rta)
289{
290	int i;
291
292	for (i=1; i<=RTA_MAX; i++, rta++) {
293		struct rtattr *attr = *rta;
294		if (attr) {
295			if (RTA_PAYLOAD(attr) < 4)
296				return -EINVAL;
297			if (i != RTA_MULTIPATH && i != RTA_METRICS &&
298			    i != RTA_TABLE)
299				*rta = (struct rtattr*)RTA_DATA(attr);
300		}
301	}
302	return 0;
303}
304
305int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
306{
307	struct fib_table * tb;
308	struct rtattr **rta = arg;
309	struct rtmsg *r = NLMSG_DATA(nlh);
310
311	if (inet_check_attr(r, rta))
312		return -EINVAL;
313
314	tb = fib_get_table(rtm_get_table(rta, r->rtm_table));
315	if (tb)
316		return tb->tb_delete(tb, r, (struct kern_rta*)rta, nlh, &NETLINK_CB(skb));
317	return -ESRCH;
318}
319
320int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
321{
322	struct fib_table * tb;
323	struct rtattr **rta = arg;
324	struct rtmsg *r = NLMSG_DATA(nlh);
325
326	if (inet_check_attr(r, rta))
327		return -EINVAL;
328
329	tb = fib_new_table(rtm_get_table(rta, r->rtm_table));
330	if (tb)
331		return tb->tb_insert(tb, r, (struct kern_rta*)rta, nlh, &NETLINK_CB(skb));
332	return -ENOBUFS;
333}
334
335int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
336{
337	u32 t;
338	u32 s_t;
339	struct fib_table *tb;
340
341	if (NLMSG_PAYLOAD(cb->nlh, 0) >= sizeof(struct rtmsg) &&
342	    ((struct rtmsg*)NLMSG_DATA(cb->nlh))->rtm_flags&RTM_F_CLONED)
343		return ip_rt_dump(skb, cb);
344
345	s_t = cb->args[0];
346	if (s_t == 0)
347		s_t = cb->args[0] = RT_TABLE_MIN;
348
349	for (t=s_t; t<=RT_TABLE_MAX; t++) {
350		if (t < s_t) continue;
351		if (t > s_t)
352			memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
353		if ((tb = fib_get_table(t))==NULL)
354			continue;
355		if (tb->tb_dump(tb, skb, cb) < 0)
356			break;
357	}
358
359	cb->args[0] = t;
360
361	return skb->len;
362}
363
364/* Prepare and feed intra-kernel routing request.
365   Really, it should be netlink message, but :-( netlink
366   can be not configured, so that we feed it directly
367   to fib engine. It is legal, because all events occur
368   only when netlink is already locked.
369 */
370
371static void fib_magic(int cmd, int type, u32 dst, int dst_len, struct in_ifaddr *ifa)
372{
373	struct fib_table * tb;
374	struct {
375		struct nlmsghdr	nlh;
376		struct rtmsg	rtm;
377	} req;
378	struct kern_rta rta;
379
380	memset(&req.rtm, 0, sizeof(req.rtm));
381	memset(&rta, 0, sizeof(rta));
382
383	if (type == RTN_UNICAST)
384		tb = fib_new_table(RT_TABLE_MAIN);
385	else
386		tb = fib_new_table(RT_TABLE_LOCAL);
387
388	if (tb == NULL)
389		return;
390
391	req.nlh.nlmsg_len = sizeof(req);
392	req.nlh.nlmsg_type = cmd;
393	req.nlh.nlmsg_flags = NLM_F_REQUEST|NLM_F_CREATE|NLM_F_APPEND;
394	req.nlh.nlmsg_pid = 0;
395	req.nlh.nlmsg_seq = 0;
396
397	req.rtm.rtm_dst_len = dst_len;
398	req.rtm.rtm_table = tb->tb_id;
399	req.rtm.rtm_protocol = RTPROT_KERNEL;
400	req.rtm.rtm_scope = (type != RTN_LOCAL ? RT_SCOPE_LINK : RT_SCOPE_HOST);
401	req.rtm.rtm_type = type;
402
403	rta.rta_dst = &dst;
404	rta.rta_prefsrc = &ifa->ifa_local;
405	rta.rta_oif = &ifa->ifa_dev->dev->ifindex;
406
407	if (cmd == RTM_NEWROUTE)
408		tb->tb_insert(tb, &req.rtm, &rta, &req.nlh, NULL);
409	else
410		tb->tb_delete(tb, &req.rtm, &rta, &req.nlh, NULL);
411}
412
413void fib_add_ifaddr(struct in_ifaddr *ifa)
414{
415	struct in_device *in_dev = ifa->ifa_dev;
416	struct net_device *dev = in_dev->dev;
417	struct in_ifaddr *prim = ifa;
418	u32 mask = ifa->ifa_mask;
419	u32 addr = ifa->ifa_local;
420	u32 prefix = ifa->ifa_address&mask;
421
422	if (ifa->ifa_flags&IFA_F_SECONDARY) {
423		prim = inet_ifa_byprefix(in_dev, prefix, mask);
424		if (prim == NULL) {
425			printk(KERN_DEBUG "fib_add_ifaddr: bug: prim == NULL\n");
426			return;
427		}
428	}
429
430	fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
431
432	if (!(dev->flags&IFF_UP))
433		return;
434
435	/* Add broadcast address, if it is explicitly assigned. */
436	if (ifa->ifa_broadcast && ifa->ifa_broadcast != 0xFFFFFFFF)
437		fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
438
439	if (!ZERONET(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
440	    (prefix != addr || ifa->ifa_prefixlen < 32)) {
441		fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
442			  RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim);
443
444		/* Add network specific broadcasts, when it takes a sense */
445		if (ifa->ifa_prefixlen < 31) {
446			fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
447			fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim);
448		}
449	}
450}
451
452static void fib_del_ifaddr(struct in_ifaddr *ifa)
453{
454	struct in_device *in_dev = ifa->ifa_dev;
455	struct net_device *dev = in_dev->dev;
456	struct in_ifaddr *ifa1;
457	struct in_ifaddr *prim = ifa;
458	u32 brd = ifa->ifa_address|~ifa->ifa_mask;
459	u32 any = ifa->ifa_address&ifa->ifa_mask;
460#define LOCAL_OK	1
461#define BRD_OK		2
462#define BRD0_OK		4
463#define BRD1_OK		8
464	unsigned ok = 0;
465
466	if (!(ifa->ifa_flags&IFA_F_SECONDARY))
467		fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
468			  RTN_UNICAST, any, ifa->ifa_prefixlen, prim);
469	else {
470		prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
471		if (prim == NULL) {
472			printk(KERN_DEBUG "fib_del_ifaddr: bug: prim == NULL\n");
473			return;
474		}
475	}
476
477	/* Deletion is more complicated than add.
478	   We should take care of not to delete too much :-)
479
480	   Scan address list to be sure that addresses are really gone.
481	 */
482
483	for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
484		if (ifa->ifa_local == ifa1->ifa_local)
485			ok |= LOCAL_OK;
486		if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
487			ok |= BRD_OK;
488		if (brd == ifa1->ifa_broadcast)
489			ok |= BRD1_OK;
490		if (any == ifa1->ifa_broadcast)
491			ok |= BRD0_OK;
492	}
493
494	if (!(ok&BRD_OK))
495		fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
496	if (!(ok&BRD1_OK))
497		fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
498	if (!(ok&BRD0_OK))
499		fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
500	if (!(ok&LOCAL_OK)) {
501		fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
502
503		/* Check, that this local address finally disappeared. */
504		if (inet_addr_type(ifa->ifa_local) != RTN_LOCAL) {
505			/* And the last, but not the least thing.
506			   We must flush stray FIB entries.
507
508			   First of all, we scan fib_info list searching
509			   for stray nexthop entries, then ignite fib_flush.
510			*/
511			if (fib_sync_down(ifa->ifa_local, NULL, 0))
512				fib_flush();
513		}
514	}
515#undef LOCAL_OK
516#undef BRD_OK
517#undef BRD0_OK
518#undef BRD1_OK
519}
520
521static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
522{
523
524	struct fib_result       res;
525	struct flowi            fl = { .nl_u = { .ip4_u = { .daddr = frn->fl_addr,
526							    .fwmark = frn->fl_fwmark,
527							    .tos = frn->fl_tos,
528							    .scope = frn->fl_scope } } };
529	if (tb) {
530		local_bh_disable();
531
532		frn->tb_id = tb->tb_id;
533		frn->err = tb->tb_lookup(tb, &fl, &res);
534
535		if (!frn->err) {
536			frn->prefixlen = res.prefixlen;
537			frn->nh_sel = res.nh_sel;
538			frn->type = res.type;
539			frn->scope = res.scope;
540		}
541		local_bh_enable();
542	}
543}
544
545static void nl_fib_input(struct sock *sk, int len)
546{
547	struct sk_buff *skb = NULL;
548        struct nlmsghdr *nlh = NULL;
549	struct fib_result_nl *frn;
550	u32 pid;
551	struct fib_table *tb;
552
553	skb = skb_dequeue(&sk->sk_receive_queue);
554	nlh = (struct nlmsghdr *)skb->data;
555	if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
556	    nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn))) {
557		kfree_skb(skb);
558		return;
559	}
560
561	frn = (struct fib_result_nl *) NLMSG_DATA(nlh);
562	tb = fib_get_table(frn->tb_id_in);
563
564	nl_fib_lookup(frn, tb);
565
566	pid = nlh->nlmsg_pid;           /*pid of sending process */
567	NETLINK_CB(skb).pid = 0;         /* from kernel */
568	NETLINK_CB(skb).dst_pid = pid;
569	NETLINK_CB(skb).dst_group = 0;  /* unicast */
570	netlink_unicast(sk, skb, pid, MSG_DONTWAIT);
571}
572
573static void nl_fib_lookup_init(void)
574{
575      netlink_kernel_create(NETLINK_FIB_LOOKUP, 0, nl_fib_input, THIS_MODULE);
576}
577
578static void fib_disable_ip(struct net_device *dev, int force)
579{
580	if (fib_sync_down(0, dev, force))
581		fib_flush();
582	rt_cache_flush(0);
583	arp_ifdown(dev);
584}
585
586static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
587{
588	struct in_ifaddr *ifa = (struct in_ifaddr*)ptr;
589
590	switch (event) {
591	case NETDEV_UP:
592		fib_add_ifaddr(ifa);
593#ifdef CONFIG_IP_ROUTE_MULTIPATH
594		fib_sync_up(ifa->ifa_dev->dev);
595#endif
596		rt_cache_flush(-1);
597		break;
598	case NETDEV_DOWN:
599		fib_del_ifaddr(ifa);
600		if (ifa->ifa_dev->ifa_list == NULL) {
601			/* Last address was deleted from this interface.
602			   Disable IP.
603			 */
604			fib_disable_ip(ifa->ifa_dev->dev, 1);
605		} else {
606			rt_cache_flush(-1);
607		}
608		break;
609	}
610	return NOTIFY_DONE;
611}
612
613static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
614{
615	struct net_device *dev = ptr;
616	struct in_device *in_dev = __in_dev_get_rtnl(dev);
617
618	if (event == NETDEV_UNREGISTER) {
619		fib_disable_ip(dev, 2);
620		return NOTIFY_DONE;
621	}
622
623	if (!in_dev)
624		return NOTIFY_DONE;
625
626	switch (event) {
627	case NETDEV_UP:
628		for_ifa(in_dev) {
629			fib_add_ifaddr(ifa);
630		} endfor_ifa(in_dev);
631#ifdef CONFIG_IP_ROUTE_MULTIPATH
632		fib_sync_up(dev);
633#endif
634		rt_cache_flush(-1);
635		break;
636	case NETDEV_DOWN:
637		fib_disable_ip(dev, 0);
638		break;
639	case NETDEV_CHANGEMTU:
640	case NETDEV_CHANGE:
641		rt_cache_flush(0);
642		break;
643	}
644	return NOTIFY_DONE;
645}
646
647static struct notifier_block fib_inetaddr_notifier = {
648	.notifier_call =fib_inetaddr_event,
649};
650
651static struct notifier_block fib_netdev_notifier = {
652	.notifier_call =fib_netdev_event,
653};
654
655void __init ip_fib_init(void)
656{
657#ifndef CONFIG_IP_MULTIPLE_TABLES
658	ip_fib_local_table = fib_hash_init(RT_TABLE_LOCAL);
659	ip_fib_main_table  = fib_hash_init(RT_TABLE_MAIN);
660#else
661	fib4_rules_init();
662#endif
663
664	register_netdevice_notifier(&fib_netdev_notifier);
665	register_inetaddr_notifier(&fib_inetaddr_notifier);
666	nl_fib_lookup_init();
667}
668
669EXPORT_SYMBOL(inet_addr_type);
670EXPORT_SYMBOL(ip_dev_find);
671