fib_frontend.c revision 6ab3d5624e172c553004ecc862bfeac16d9d68b7
1/*
2 * INET		An implementation of the TCP/IP protocol suite for the LINUX
3 *		operating system.  INET is implemented using the  BSD Socket
4 *		interface as the means of communication with the user level.
5 *
6 *		IPv4 Forwarding Information Base: FIB frontend.
7 *
8 * Version:	$Id: fib_frontend.c,v 1.26 2001/10/31 21:55:54 davem Exp $
9 *
10 * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
11 *
12 *		This program is free software; you can redistribute it and/or
13 *		modify it under the terms of the GNU General Public License
14 *		as published by the Free Software Foundation; either version
15 *		2 of the License, or (at your option) any later version.
16 */
17
18#include <linux/module.h>
19#include <asm/uaccess.h>
20#include <asm/system.h>
21#include <linux/bitops.h>
22#include <linux/capability.h>
23#include <linux/types.h>
24#include <linux/kernel.h>
25#include <linux/sched.h>
26#include <linux/mm.h>
27#include <linux/string.h>
28#include <linux/socket.h>
29#include <linux/sockios.h>
30#include <linux/errno.h>
31#include <linux/in.h>
32#include <linux/inet.h>
33#include <linux/inetdevice.h>
34#include <linux/netdevice.h>
35#include <linux/if_arp.h>
36#include <linux/skbuff.h>
37#include <linux/netlink.h>
38#include <linux/init.h>
39
40#include <net/ip.h>
41#include <net/protocol.h>
42#include <net/route.h>
43#include <net/tcp.h>
44#include <net/sock.h>
45#include <net/icmp.h>
46#include <net/arp.h>
47#include <net/ip_fib.h>
48
49#define FFprint(a...) printk(KERN_DEBUG a)
50
51#ifndef CONFIG_IP_MULTIPLE_TABLES
52
53#define RT_TABLE_MIN RT_TABLE_MAIN
54
55struct fib_table *ip_fib_local_table;
56struct fib_table *ip_fib_main_table;
57
58#else
59
60#define RT_TABLE_MIN 1
61
62struct fib_table *fib_tables[RT_TABLE_MAX+1];
63
64struct fib_table *__fib_new_table(int id)
65{
66	struct fib_table *tb;
67
68	tb = fib_hash_init(id);
69	if (!tb)
70		return NULL;
71	fib_tables[id] = tb;
72	return tb;
73}
74
75
76#endif /* CONFIG_IP_MULTIPLE_TABLES */
77
78
79static void fib_flush(void)
80{
81	int flushed = 0;
82#ifdef CONFIG_IP_MULTIPLE_TABLES
83	struct fib_table *tb;
84	int id;
85
86	for (id = RT_TABLE_MAX; id>0; id--) {
87		if ((tb = fib_get_table(id))==NULL)
88			continue;
89		flushed += tb->tb_flush(tb);
90	}
91#else /* CONFIG_IP_MULTIPLE_TABLES */
92	flushed += ip_fib_main_table->tb_flush(ip_fib_main_table);
93	flushed += ip_fib_local_table->tb_flush(ip_fib_local_table);
94#endif /* CONFIG_IP_MULTIPLE_TABLES */
95
96	if (flushed)
97		rt_cache_flush(-1);
98}
99
100/*
101 *	Find the first device with a given source address.
102 */
103
104struct net_device * ip_dev_find(u32 addr)
105{
106	struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
107	struct fib_result res;
108	struct net_device *dev = NULL;
109
110#ifdef CONFIG_IP_MULTIPLE_TABLES
111	res.r = NULL;
112#endif
113
114	if (!ip_fib_local_table ||
115	    ip_fib_local_table->tb_lookup(ip_fib_local_table, &fl, &res))
116		return NULL;
117	if (res.type != RTN_LOCAL)
118		goto out;
119	dev = FIB_RES_DEV(res);
120
121	if (dev)
122		dev_hold(dev);
123out:
124	fib_res_put(&res);
125	return dev;
126}
127
128unsigned inet_addr_type(u32 addr)
129{
130	struct flowi		fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
131	struct fib_result	res;
132	unsigned ret = RTN_BROADCAST;
133
134	if (ZERONET(addr) || BADCLASS(addr))
135		return RTN_BROADCAST;
136	if (MULTICAST(addr))
137		return RTN_MULTICAST;
138
139#ifdef CONFIG_IP_MULTIPLE_TABLES
140	res.r = NULL;
141#endif
142
143	if (ip_fib_local_table) {
144		ret = RTN_UNICAST;
145		if (!ip_fib_local_table->tb_lookup(ip_fib_local_table,
146						   &fl, &res)) {
147			ret = res.type;
148			fib_res_put(&res);
149		}
150	}
151	return ret;
152}
153
154/* Given (packet source, input interface) and optional (dst, oif, tos):
155   - (main) check, that source is valid i.e. not broadcast or our local
156     address.
157   - figure out what "logical" interface this packet arrived
158     and calculate "specific destination" address.
159   - check, that packet arrived from expected physical interface.
160 */
161
162int fib_validate_source(u32 src, u32 dst, u8 tos, int oif,
163			struct net_device *dev, u32 *spec_dst, u32 *itag)
164{
165	struct in_device *in_dev;
166	struct flowi fl = { .nl_u = { .ip4_u =
167				      { .daddr = src,
168					.saddr = dst,
169					.tos = tos } },
170			    .iif = oif };
171	struct fib_result res;
172	int no_addr, rpf;
173	int ret;
174
175	no_addr = rpf = 0;
176	rcu_read_lock();
177	in_dev = __in_dev_get_rcu(dev);
178	if (in_dev) {
179		no_addr = in_dev->ifa_list == NULL;
180		rpf = IN_DEV_RPFILTER(in_dev);
181	}
182	rcu_read_unlock();
183
184	if (in_dev == NULL)
185		goto e_inval;
186
187	if (fib_lookup(&fl, &res))
188		goto last_resort;
189	if (res.type != RTN_UNICAST)
190		goto e_inval_res;
191	*spec_dst = FIB_RES_PREFSRC(res);
192	fib_combine_itag(itag, &res);
193#ifdef CONFIG_IP_ROUTE_MULTIPATH
194	if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1)
195#else
196	if (FIB_RES_DEV(res) == dev)
197#endif
198	{
199		ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
200		fib_res_put(&res);
201		return ret;
202	}
203	fib_res_put(&res);
204	if (no_addr)
205		goto last_resort;
206	if (rpf)
207		goto e_inval;
208	fl.oif = dev->ifindex;
209
210	ret = 0;
211	if (fib_lookup(&fl, &res) == 0) {
212		if (res.type == RTN_UNICAST) {
213			*spec_dst = FIB_RES_PREFSRC(res);
214			ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
215		}
216		fib_res_put(&res);
217	}
218	return ret;
219
220last_resort:
221	if (rpf)
222		goto e_inval;
223	*spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
224	*itag = 0;
225	return 0;
226
227e_inval_res:
228	fib_res_put(&res);
229e_inval:
230	return -EINVAL;
231}
232
233#ifndef CONFIG_IP_NOSIOCRT
234
235/*
236 *	Handle IP routing ioctl calls. These are used to manipulate the routing tables
237 */
238
239int ip_rt_ioctl(unsigned int cmd, void __user *arg)
240{
241	int err;
242	struct kern_rta rta;
243	struct rtentry  r;
244	struct {
245		struct nlmsghdr nlh;
246		struct rtmsg	rtm;
247	} req;
248
249	switch (cmd) {
250	case SIOCADDRT:		/* Add a route */
251	case SIOCDELRT:		/* Delete a route */
252		if (!capable(CAP_NET_ADMIN))
253			return -EPERM;
254		if (copy_from_user(&r, arg, sizeof(struct rtentry)))
255			return -EFAULT;
256		rtnl_lock();
257		err = fib_convert_rtentry(cmd, &req.nlh, &req.rtm, &rta, &r);
258		if (err == 0) {
259			if (cmd == SIOCDELRT) {
260				struct fib_table *tb = fib_get_table(req.rtm.rtm_table);
261				err = -ESRCH;
262				if (tb)
263					err = tb->tb_delete(tb, &req.rtm, &rta, &req.nlh, NULL);
264			} else {
265				struct fib_table *tb = fib_new_table(req.rtm.rtm_table);
266				err = -ENOBUFS;
267				if (tb)
268					err = tb->tb_insert(tb, &req.rtm, &rta, &req.nlh, NULL);
269			}
270			kfree(rta.rta_mx);
271		}
272		rtnl_unlock();
273		return err;
274	}
275	return -EINVAL;
276}
277
278#else
279
280int ip_rt_ioctl(unsigned int cmd, void *arg)
281{
282	return -EINVAL;
283}
284
285#endif
286
287static int inet_check_attr(struct rtmsg *r, struct rtattr **rta)
288{
289	int i;
290
291	for (i=1; i<=RTA_MAX; i++, rta++) {
292		struct rtattr *attr = *rta;
293		if (attr) {
294			if (RTA_PAYLOAD(attr) < 4)
295				return -EINVAL;
296			if (i != RTA_MULTIPATH && i != RTA_METRICS)
297				*rta = (struct rtattr*)RTA_DATA(attr);
298		}
299	}
300	return 0;
301}
302
303int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
304{
305	struct fib_table * tb;
306	struct rtattr **rta = arg;
307	struct rtmsg *r = NLMSG_DATA(nlh);
308
309	if (inet_check_attr(r, rta))
310		return -EINVAL;
311
312	tb = fib_get_table(r->rtm_table);
313	if (tb)
314		return tb->tb_delete(tb, r, (struct kern_rta*)rta, nlh, &NETLINK_CB(skb));
315	return -ESRCH;
316}
317
318int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
319{
320	struct fib_table * tb;
321	struct rtattr **rta = arg;
322	struct rtmsg *r = NLMSG_DATA(nlh);
323
324	if (inet_check_attr(r, rta))
325		return -EINVAL;
326
327	tb = fib_new_table(r->rtm_table);
328	if (tb)
329		return tb->tb_insert(tb, r, (struct kern_rta*)rta, nlh, &NETLINK_CB(skb));
330	return -ENOBUFS;
331}
332
333int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
334{
335	int t;
336	int s_t;
337	struct fib_table *tb;
338
339	if (NLMSG_PAYLOAD(cb->nlh, 0) >= sizeof(struct rtmsg) &&
340	    ((struct rtmsg*)NLMSG_DATA(cb->nlh))->rtm_flags&RTM_F_CLONED)
341		return ip_rt_dump(skb, cb);
342
343	s_t = cb->args[0];
344	if (s_t == 0)
345		s_t = cb->args[0] = RT_TABLE_MIN;
346
347	for (t=s_t; t<=RT_TABLE_MAX; t++) {
348		if (t < s_t) continue;
349		if (t > s_t)
350			memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
351		if ((tb = fib_get_table(t))==NULL)
352			continue;
353		if (tb->tb_dump(tb, skb, cb) < 0)
354			break;
355	}
356
357	cb->args[0] = t;
358
359	return skb->len;
360}
361
362/* Prepare and feed intra-kernel routing request.
363   Really, it should be netlink message, but :-( netlink
364   can be not configured, so that we feed it directly
365   to fib engine. It is legal, because all events occur
366   only when netlink is already locked.
367 */
368
369static void fib_magic(int cmd, int type, u32 dst, int dst_len, struct in_ifaddr *ifa)
370{
371	struct fib_table * tb;
372	struct {
373		struct nlmsghdr	nlh;
374		struct rtmsg	rtm;
375	} req;
376	struct kern_rta rta;
377
378	memset(&req.rtm, 0, sizeof(req.rtm));
379	memset(&rta, 0, sizeof(rta));
380
381	if (type == RTN_UNICAST)
382		tb = fib_new_table(RT_TABLE_MAIN);
383	else
384		tb = fib_new_table(RT_TABLE_LOCAL);
385
386	if (tb == NULL)
387		return;
388
389	req.nlh.nlmsg_len = sizeof(req);
390	req.nlh.nlmsg_type = cmd;
391	req.nlh.nlmsg_flags = NLM_F_REQUEST|NLM_F_CREATE|NLM_F_APPEND;
392	req.nlh.nlmsg_pid = 0;
393	req.nlh.nlmsg_seq = 0;
394
395	req.rtm.rtm_dst_len = dst_len;
396	req.rtm.rtm_table = tb->tb_id;
397	req.rtm.rtm_protocol = RTPROT_KERNEL;
398	req.rtm.rtm_scope = (type != RTN_LOCAL ? RT_SCOPE_LINK : RT_SCOPE_HOST);
399	req.rtm.rtm_type = type;
400
401	rta.rta_dst = &dst;
402	rta.rta_prefsrc = &ifa->ifa_local;
403	rta.rta_oif = &ifa->ifa_dev->dev->ifindex;
404
405	if (cmd == RTM_NEWROUTE)
406		tb->tb_insert(tb, &req.rtm, &rta, &req.nlh, NULL);
407	else
408		tb->tb_delete(tb, &req.rtm, &rta, &req.nlh, NULL);
409}
410
411void fib_add_ifaddr(struct in_ifaddr *ifa)
412{
413	struct in_device *in_dev = ifa->ifa_dev;
414	struct net_device *dev = in_dev->dev;
415	struct in_ifaddr *prim = ifa;
416	u32 mask = ifa->ifa_mask;
417	u32 addr = ifa->ifa_local;
418	u32 prefix = ifa->ifa_address&mask;
419
420	if (ifa->ifa_flags&IFA_F_SECONDARY) {
421		prim = inet_ifa_byprefix(in_dev, prefix, mask);
422		if (prim == NULL) {
423			printk(KERN_DEBUG "fib_add_ifaddr: bug: prim == NULL\n");
424			return;
425		}
426	}
427
428	fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
429
430	if (!(dev->flags&IFF_UP))
431		return;
432
433	/* Add broadcast address, if it is explicitly assigned. */
434	if (ifa->ifa_broadcast && ifa->ifa_broadcast != 0xFFFFFFFF)
435		fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
436
437	if (!ZERONET(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
438	    (prefix != addr || ifa->ifa_prefixlen < 32)) {
439		fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
440			  RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim);
441
442		/* Add network specific broadcasts, when it takes a sense */
443		if (ifa->ifa_prefixlen < 31) {
444			fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
445			fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim);
446		}
447	}
448}
449
450static void fib_del_ifaddr(struct in_ifaddr *ifa)
451{
452	struct in_device *in_dev = ifa->ifa_dev;
453	struct net_device *dev = in_dev->dev;
454	struct in_ifaddr *ifa1;
455	struct in_ifaddr *prim = ifa;
456	u32 brd = ifa->ifa_address|~ifa->ifa_mask;
457	u32 any = ifa->ifa_address&ifa->ifa_mask;
458#define LOCAL_OK	1
459#define BRD_OK		2
460#define BRD0_OK		4
461#define BRD1_OK		8
462	unsigned ok = 0;
463
464	if (!(ifa->ifa_flags&IFA_F_SECONDARY))
465		fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
466			  RTN_UNICAST, any, ifa->ifa_prefixlen, prim);
467	else {
468		prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
469		if (prim == NULL) {
470			printk(KERN_DEBUG "fib_del_ifaddr: bug: prim == NULL\n");
471			return;
472		}
473	}
474
475	/* Deletion is more complicated than add.
476	   We should take care of not to delete too much :-)
477
478	   Scan address list to be sure that addresses are really gone.
479	 */
480
481	for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
482		if (ifa->ifa_local == ifa1->ifa_local)
483			ok |= LOCAL_OK;
484		if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
485			ok |= BRD_OK;
486		if (brd == ifa1->ifa_broadcast)
487			ok |= BRD1_OK;
488		if (any == ifa1->ifa_broadcast)
489			ok |= BRD0_OK;
490	}
491
492	if (!(ok&BRD_OK))
493		fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
494	if (!(ok&BRD1_OK))
495		fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
496	if (!(ok&BRD0_OK))
497		fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
498	if (!(ok&LOCAL_OK)) {
499		fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
500
501		/* Check, that this local address finally disappeared. */
502		if (inet_addr_type(ifa->ifa_local) != RTN_LOCAL) {
503			/* And the last, but not the least thing.
504			   We must flush stray FIB entries.
505
506			   First of all, we scan fib_info list searching
507			   for stray nexthop entries, then ignite fib_flush.
508			*/
509			if (fib_sync_down(ifa->ifa_local, NULL, 0))
510				fib_flush();
511		}
512	}
513#undef LOCAL_OK
514#undef BRD_OK
515#undef BRD0_OK
516#undef BRD1_OK
517}
518
519static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
520{
521
522	struct fib_result       res;
523	struct flowi            fl = { .nl_u = { .ip4_u = { .daddr = frn->fl_addr,
524							    .fwmark = frn->fl_fwmark,
525							    .tos = frn->fl_tos,
526							    .scope = frn->fl_scope } } };
527	if (tb) {
528		local_bh_disable();
529
530		frn->tb_id = tb->tb_id;
531		frn->err = tb->tb_lookup(tb, &fl, &res);
532
533		if (!frn->err) {
534			frn->prefixlen = res.prefixlen;
535			frn->nh_sel = res.nh_sel;
536			frn->type = res.type;
537			frn->scope = res.scope;
538		}
539		local_bh_enable();
540	}
541}
542
543static void nl_fib_input(struct sock *sk, int len)
544{
545	struct sk_buff *skb = NULL;
546        struct nlmsghdr *nlh = NULL;
547	struct fib_result_nl *frn;
548	u32 pid;
549	struct fib_table *tb;
550
551	skb = skb_dequeue(&sk->sk_receive_queue);
552	nlh = (struct nlmsghdr *)skb->data;
553	if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
554	    nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn))) {
555		kfree_skb(skb);
556		return;
557	}
558
559	frn = (struct fib_result_nl *) NLMSG_DATA(nlh);
560	tb = fib_get_table(frn->tb_id_in);
561
562	nl_fib_lookup(frn, tb);
563
564	pid = nlh->nlmsg_pid;           /*pid of sending process */
565	NETLINK_CB(skb).pid = 0;         /* from kernel */
566	NETLINK_CB(skb).dst_pid = pid;
567	NETLINK_CB(skb).dst_group = 0;  /* unicast */
568	netlink_unicast(sk, skb, pid, MSG_DONTWAIT);
569}
570
571static void nl_fib_lookup_init(void)
572{
573      netlink_kernel_create(NETLINK_FIB_LOOKUP, 0, nl_fib_input, THIS_MODULE);
574}
575
576static void fib_disable_ip(struct net_device *dev, int force)
577{
578	if (fib_sync_down(0, dev, force))
579		fib_flush();
580	rt_cache_flush(0);
581	arp_ifdown(dev);
582}
583
584static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
585{
586	struct in_ifaddr *ifa = (struct in_ifaddr*)ptr;
587
588	switch (event) {
589	case NETDEV_UP:
590		fib_add_ifaddr(ifa);
591#ifdef CONFIG_IP_ROUTE_MULTIPATH
592		fib_sync_up(ifa->ifa_dev->dev);
593#endif
594		rt_cache_flush(-1);
595		break;
596	case NETDEV_DOWN:
597		fib_del_ifaddr(ifa);
598		if (ifa->ifa_dev->ifa_list == NULL) {
599			/* Last address was deleted from this interface.
600			   Disable IP.
601			 */
602			fib_disable_ip(ifa->ifa_dev->dev, 1);
603		} else {
604			rt_cache_flush(-1);
605		}
606		break;
607	}
608	return NOTIFY_DONE;
609}
610
611static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
612{
613	struct net_device *dev = ptr;
614	struct in_device *in_dev = __in_dev_get_rtnl(dev);
615
616	if (event == NETDEV_UNREGISTER) {
617		fib_disable_ip(dev, 2);
618		return NOTIFY_DONE;
619	}
620
621	if (!in_dev)
622		return NOTIFY_DONE;
623
624	switch (event) {
625	case NETDEV_UP:
626		for_ifa(in_dev) {
627			fib_add_ifaddr(ifa);
628		} endfor_ifa(in_dev);
629#ifdef CONFIG_IP_ROUTE_MULTIPATH
630		fib_sync_up(dev);
631#endif
632		rt_cache_flush(-1);
633		break;
634	case NETDEV_DOWN:
635		fib_disable_ip(dev, 0);
636		break;
637	case NETDEV_CHANGEMTU:
638	case NETDEV_CHANGE:
639		rt_cache_flush(0);
640		break;
641	}
642	return NOTIFY_DONE;
643}
644
645static struct notifier_block fib_inetaddr_notifier = {
646	.notifier_call =fib_inetaddr_event,
647};
648
649static struct notifier_block fib_netdev_notifier = {
650	.notifier_call =fib_netdev_event,
651};
652
653void __init ip_fib_init(void)
654{
655#ifndef CONFIG_IP_MULTIPLE_TABLES
656	ip_fib_local_table = fib_hash_init(RT_TABLE_LOCAL);
657	ip_fib_main_table  = fib_hash_init(RT_TABLE_MAIN);
658#else
659	fib_rules_init();
660#endif
661
662	register_netdevice_notifier(&fib_netdev_notifier);
663	register_inetaddr_notifier(&fib_inetaddr_notifier);
664	nl_fib_lookup_init();
665}
666
667EXPORT_SYMBOL(inet_addr_type);
668EXPORT_SYMBOL(ip_dev_find);
669