fib_frontend.c revision db080529798b497eb5a37b92a25e966be5a7dd5d
1/*
2 * INET		An implementation of the TCP/IP protocol suite for the LINUX
3 *		operating system.  INET is implemented using the  BSD Socket
4 *		interface as the means of communication with the user level.
5 *
6 *		IPv4 Forwarding Information Base: FIB frontend.
7 *
8 * Version:	$Id: fib_frontend.c,v 1.26 2001/10/31 21:55:54 davem Exp $
9 *
10 * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
11 *
12 *		This program is free software; you can redistribute it and/or
13 *		modify it under the terms of the GNU General Public License
14 *		as published by the Free Software Foundation; either version
15 *		2 of the License, or (at your option) any later version.
16 */
17
18#include <linux/config.h>
19#include <linux/module.h>
20#include <asm/uaccess.h>
21#include <asm/system.h>
22#include <linux/bitops.h>
23#include <linux/types.h>
24#include <linux/kernel.h>
25#include <linux/sched.h>
26#include <linux/mm.h>
27#include <linux/string.h>
28#include <linux/socket.h>
29#include <linux/sockios.h>
30#include <linux/errno.h>
31#include <linux/in.h>
32#include <linux/inet.h>
33#include <linux/netdevice.h>
34#include <linux/if_arp.h>
35#include <linux/skbuff.h>
36#include <linux/netlink.h>
37#include <linux/init.h>
38
39#include <net/ip.h>
40#include <net/protocol.h>
41#include <net/route.h>
42#include <net/tcp.h>
43#include <net/sock.h>
44#include <net/icmp.h>
45#include <net/arp.h>
46#include <net/ip_fib.h>
47
48#define FFprint(a...) printk(KERN_DEBUG a)
49
50#ifndef CONFIG_IP_MULTIPLE_TABLES
51
52#define RT_TABLE_MIN RT_TABLE_MAIN
53
54struct fib_table *ip_fib_local_table;
55struct fib_table *ip_fib_main_table;
56
57#else
58
59#define RT_TABLE_MIN 1
60
61struct fib_table *fib_tables[RT_TABLE_MAX+1];
62
63struct fib_table *__fib_new_table(int id)
64{
65	struct fib_table *tb;
66
67	tb = fib_hash_init(id);
68	if (!tb)
69		return NULL;
70	fib_tables[id] = tb;
71	return tb;
72}
73
74
75#endif /* CONFIG_IP_MULTIPLE_TABLES */
76
77
78static void fib_flush(void)
79{
80	int flushed = 0;
81#ifdef CONFIG_IP_MULTIPLE_TABLES
82	struct fib_table *tb;
83	int id;
84
85	for (id = RT_TABLE_MAX; id>0; id--) {
86		if ((tb = fib_get_table(id))==NULL)
87			continue;
88		flushed += tb->tb_flush(tb);
89	}
90#else /* CONFIG_IP_MULTIPLE_TABLES */
91	flushed += ip_fib_main_table->tb_flush(ip_fib_main_table);
92	flushed += ip_fib_local_table->tb_flush(ip_fib_local_table);
93#endif /* CONFIG_IP_MULTIPLE_TABLES */
94
95	if (flushed)
96		rt_cache_flush(-1);
97}
98
99/*
100 *	Find the first device with a given source address.
101 */
102
103struct net_device * ip_dev_find(u32 addr)
104{
105	struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
106	struct fib_result res;
107	struct net_device *dev = NULL;
108
109#ifdef CONFIG_IP_MULTIPLE_TABLES
110	res.r = NULL;
111#endif
112
113	if (!ip_fib_local_table ||
114	    ip_fib_local_table->tb_lookup(ip_fib_local_table, &fl, &res))
115		return NULL;
116	if (res.type != RTN_LOCAL)
117		goto out;
118	dev = FIB_RES_DEV(res);
119
120	if (dev)
121		dev_hold(dev);
122out:
123	fib_res_put(&res);
124	return dev;
125}
126
127unsigned inet_addr_type(u32 addr)
128{
129	struct flowi		fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
130	struct fib_result	res;
131	unsigned ret = RTN_BROADCAST;
132
133	if (ZERONET(addr) || BADCLASS(addr))
134		return RTN_BROADCAST;
135	if (MULTICAST(addr))
136		return RTN_MULTICAST;
137
138#ifdef CONFIG_IP_MULTIPLE_TABLES
139	res.r = NULL;
140#endif
141
142	if (ip_fib_local_table) {
143		ret = RTN_UNICAST;
144		if (!ip_fib_local_table->tb_lookup(ip_fib_local_table,
145						   &fl, &res)) {
146			ret = res.type;
147			fib_res_put(&res);
148		}
149	}
150	return ret;
151}
152
153/* Given (packet source, input interface) and optional (dst, oif, tos):
154   - (main) check, that source is valid i.e. not broadcast or our local
155     address.
156   - figure out what "logical" interface this packet arrived
157     and calculate "specific destination" address.
158   - check, that packet arrived from expected physical interface.
159 */
160
161int fib_validate_source(u32 src, u32 dst, u8 tos, int oif,
162			struct net_device *dev, u32 *spec_dst, u32 *itag)
163{
164	struct in_device *in_dev;
165	struct flowi fl = { .nl_u = { .ip4_u =
166				      { .daddr = src,
167					.saddr = dst,
168					.tos = tos } },
169			    .iif = oif };
170	struct fib_result res;
171	int no_addr, rpf;
172	int ret;
173
174	no_addr = rpf = 0;
175	rcu_read_lock();
176	in_dev = __in_dev_get(dev);
177	if (in_dev) {
178		no_addr = in_dev->ifa_list == NULL;
179		rpf = IN_DEV_RPFILTER(in_dev);
180	}
181	rcu_read_unlock();
182
183	if (in_dev == NULL)
184		goto e_inval;
185
186	if (fib_lookup(&fl, &res))
187		goto last_resort;
188	if (res.type != RTN_UNICAST)
189		goto e_inval_res;
190	*spec_dst = FIB_RES_PREFSRC(res);
191	fib_combine_itag(itag, &res);
192#ifdef CONFIG_IP_ROUTE_MULTIPATH
193	if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1)
194#else
195	if (FIB_RES_DEV(res) == dev)
196#endif
197	{
198		ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
199		fib_res_put(&res);
200		return ret;
201	}
202	fib_res_put(&res);
203	if (no_addr)
204		goto last_resort;
205	if (rpf)
206		goto e_inval;
207	fl.oif = dev->ifindex;
208
209	ret = 0;
210	if (fib_lookup(&fl, &res) == 0) {
211		if (res.type == RTN_UNICAST) {
212			*spec_dst = FIB_RES_PREFSRC(res);
213			ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
214		}
215		fib_res_put(&res);
216	}
217	return ret;
218
219last_resort:
220	if (rpf)
221		goto e_inval;
222	*spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
223	*itag = 0;
224	return 0;
225
226e_inval_res:
227	fib_res_put(&res);
228e_inval:
229	return -EINVAL;
230}
231
232#ifndef CONFIG_IP_NOSIOCRT
233
234/*
235 *	Handle IP routing ioctl calls. These are used to manipulate the routing tables
236 */
237
238int ip_rt_ioctl(unsigned int cmd, void __user *arg)
239{
240	int err;
241	struct kern_rta rta;
242	struct rtentry  r;
243	struct {
244		struct nlmsghdr nlh;
245		struct rtmsg	rtm;
246	} req;
247
248	switch (cmd) {
249	case SIOCADDRT:		/* Add a route */
250	case SIOCDELRT:		/* Delete a route */
251		if (!capable(CAP_NET_ADMIN))
252			return -EPERM;
253		if (copy_from_user(&r, arg, sizeof(struct rtentry)))
254			return -EFAULT;
255		rtnl_lock();
256		err = fib_convert_rtentry(cmd, &req.nlh, &req.rtm, &rta, &r);
257		if (err == 0) {
258			if (cmd == SIOCDELRT) {
259				struct fib_table *tb = fib_get_table(req.rtm.rtm_table);
260				err = -ESRCH;
261				if (tb)
262					err = tb->tb_delete(tb, &req.rtm, &rta, &req.nlh, NULL);
263			} else {
264				struct fib_table *tb = fib_new_table(req.rtm.rtm_table);
265				err = -ENOBUFS;
266				if (tb)
267					err = tb->tb_insert(tb, &req.rtm, &rta, &req.nlh, NULL);
268			}
269			if (rta.rta_mx)
270				kfree(rta.rta_mx);
271		}
272		rtnl_unlock();
273		return err;
274	}
275	return -EINVAL;
276}
277
278#else
279
280int ip_rt_ioctl(unsigned int cmd, void *arg)
281{
282	return -EINVAL;
283}
284
285#endif
286
287static int inet_check_attr(struct rtmsg *r, struct rtattr **rta)
288{
289	int i;
290
291	for (i=1; i<=RTA_MAX; i++) {
292		struct rtattr *attr = rta[i-1];
293		if (attr) {
294			if (RTA_PAYLOAD(attr) < 4)
295				return -EINVAL;
296			if (i != RTA_MULTIPATH && i != RTA_METRICS)
297				rta[i-1] = (struct rtattr*)RTA_DATA(attr);
298		}
299	}
300	return 0;
301}
302
303int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
304{
305	struct fib_table * tb;
306	struct rtattr **rta = arg;
307	struct rtmsg *r = NLMSG_DATA(nlh);
308
309	if (inet_check_attr(r, rta))
310		return -EINVAL;
311
312	tb = fib_get_table(r->rtm_table);
313	if (tb)
314		return tb->tb_delete(tb, r, (struct kern_rta*)rta, nlh, &NETLINK_CB(skb));
315	return -ESRCH;
316}
317
318int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
319{
320	struct fib_table * tb;
321	struct rtattr **rta = arg;
322	struct rtmsg *r = NLMSG_DATA(nlh);
323
324	if (inet_check_attr(r, rta))
325		return -EINVAL;
326
327	tb = fib_new_table(r->rtm_table);
328	if (tb)
329		return tb->tb_insert(tb, r, (struct kern_rta*)rta, nlh, &NETLINK_CB(skb));
330	return -ENOBUFS;
331}
332
333int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
334{
335	int t;
336	int s_t;
337	struct fib_table *tb;
338
339	if (NLMSG_PAYLOAD(cb->nlh, 0) >= sizeof(struct rtmsg) &&
340	    ((struct rtmsg*)NLMSG_DATA(cb->nlh))->rtm_flags&RTM_F_CLONED)
341		return ip_rt_dump(skb, cb);
342
343	s_t = cb->args[0];
344	if (s_t == 0)
345		s_t = cb->args[0] = RT_TABLE_MIN;
346
347	for (t=s_t; t<=RT_TABLE_MAX; t++) {
348		if (t < s_t) continue;
349		if (t > s_t)
350			memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
351		if ((tb = fib_get_table(t))==NULL)
352			continue;
353		if (tb->tb_dump(tb, skb, cb) < 0)
354			break;
355	}
356
357	cb->args[0] = t;
358
359	return skb->len;
360}
361
362/* Prepare and feed intra-kernel routing request.
363   Really, it should be netlink message, but :-( netlink
364   can be not configured, so that we feed it directly
365   to fib engine. It is legal, because all events occur
366   only when netlink is already locked.
367 */
368
369static void fib_magic(int cmd, int type, u32 dst, int dst_len, struct in_ifaddr *ifa)
370{
371	struct fib_table * tb;
372	struct {
373		struct nlmsghdr	nlh;
374		struct rtmsg	rtm;
375	} req;
376	struct kern_rta rta;
377
378	memset(&req.rtm, 0, sizeof(req.rtm));
379	memset(&rta, 0, sizeof(rta));
380
381	if (type == RTN_UNICAST)
382		tb = fib_new_table(RT_TABLE_MAIN);
383	else
384		tb = fib_new_table(RT_TABLE_LOCAL);
385
386	if (tb == NULL)
387		return;
388
389	req.nlh.nlmsg_len = sizeof(req);
390	req.nlh.nlmsg_type = cmd;
391	req.nlh.nlmsg_flags = NLM_F_REQUEST|NLM_F_CREATE|NLM_F_APPEND;
392	req.nlh.nlmsg_pid = 0;
393	req.nlh.nlmsg_seq = 0;
394
395	req.rtm.rtm_dst_len = dst_len;
396	req.rtm.rtm_table = tb->tb_id;
397	req.rtm.rtm_protocol = RTPROT_KERNEL;
398	req.rtm.rtm_scope = (type != RTN_LOCAL ? RT_SCOPE_LINK : RT_SCOPE_HOST);
399	req.rtm.rtm_type = type;
400
401	rta.rta_dst = &dst;
402	rta.rta_prefsrc = &ifa->ifa_local;
403	rta.rta_oif = &ifa->ifa_dev->dev->ifindex;
404
405	if (cmd == RTM_NEWROUTE)
406		tb->tb_insert(tb, &req.rtm, &rta, &req.nlh, NULL);
407	else
408		tb->tb_delete(tb, &req.rtm, &rta, &req.nlh, NULL);
409}
410
411static void fib_add_ifaddr(struct in_ifaddr *ifa)
412{
413	struct in_device *in_dev = ifa->ifa_dev;
414	struct net_device *dev = in_dev->dev;
415	struct in_ifaddr *prim = ifa;
416	u32 mask = ifa->ifa_mask;
417	u32 addr = ifa->ifa_local;
418	u32 prefix = ifa->ifa_address&mask;
419
420	if (ifa->ifa_flags&IFA_F_SECONDARY) {
421		prim = inet_ifa_byprefix(in_dev, prefix, mask);
422		if (prim == NULL) {
423			printk(KERN_DEBUG "fib_add_ifaddr: bug: prim == NULL\n");
424			return;
425		}
426	}
427
428	fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
429
430	if (!(dev->flags&IFF_UP))
431		return;
432
433	/* Add broadcast address, if it is explicitly assigned. */
434	if (ifa->ifa_broadcast && ifa->ifa_broadcast != 0xFFFFFFFF)
435		fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
436
437	if (!ZERONET(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
438	    (prefix != addr || ifa->ifa_prefixlen < 32)) {
439		fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
440			  RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim);
441
442		/* Add network specific broadcasts, when it takes a sense */
443		if (ifa->ifa_prefixlen < 31) {
444			fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
445			fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim);
446		}
447	}
448}
449
450static void fib_del_ifaddr(struct in_ifaddr *ifa)
451{
452	struct in_device *in_dev = ifa->ifa_dev;
453	struct net_device *dev = in_dev->dev;
454	struct in_ifaddr *ifa1;
455	struct in_ifaddr *prim = ifa;
456	u32 brd = ifa->ifa_address|~ifa->ifa_mask;
457	u32 any = ifa->ifa_address&ifa->ifa_mask;
458#define LOCAL_OK	1
459#define BRD_OK		2
460#define BRD0_OK		4
461#define BRD1_OK		8
462	unsigned ok = 0;
463
464	if (!(ifa->ifa_flags&IFA_F_SECONDARY))
465		fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
466			  RTN_UNICAST, any, ifa->ifa_prefixlen, prim);
467	else {
468		prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
469		if (prim == NULL) {
470			printk(KERN_DEBUG "fib_del_ifaddr: bug: prim == NULL\n");
471			return;
472		}
473	}
474
475	/* Deletion is more complicated than add.
476	   We should take care of not to delete too much :-)
477
478	   Scan address list to be sure that addresses are really gone.
479	 */
480
481	for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
482		if (ifa->ifa_local == ifa1->ifa_local)
483			ok |= LOCAL_OK;
484		if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
485			ok |= BRD_OK;
486		if (brd == ifa1->ifa_broadcast)
487			ok |= BRD1_OK;
488		if (any == ifa1->ifa_broadcast)
489			ok |= BRD0_OK;
490	}
491
492	if (!(ok&BRD_OK))
493		fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
494	if (!(ok&BRD1_OK))
495		fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
496	if (!(ok&BRD0_OK))
497		fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
498	if (!(ok&LOCAL_OK)) {
499		fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
500
501		/* Check, that this local address finally disappeared. */
502		if (inet_addr_type(ifa->ifa_local) != RTN_LOCAL) {
503			/* And the last, but not the least thing.
504			   We must flush stray FIB entries.
505
506			   First of all, we scan fib_info list searching
507			   for stray nexthop entries, then ignite fib_flush.
508			*/
509			if (fib_sync_down(ifa->ifa_local, NULL, 0))
510				fib_flush();
511		}
512	}
513#undef LOCAL_OK
514#undef BRD_OK
515#undef BRD0_OK
516#undef BRD1_OK
517}
518
519static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
520{
521
522	struct fib_result       res;
523	struct flowi            fl = { .nl_u = { .ip4_u = { .daddr = frn->fl_addr,
524							    .fwmark = frn->fl_fwmark,
525							    .tos = frn->fl_tos,
526							    .scope = frn->fl_scope } } };
527	if (tb) {
528		local_bh_disable();
529
530		frn->tb_id = tb->tb_id;
531		frn->err = tb->tb_lookup(tb, &fl, &res);
532
533		if (!frn->err) {
534			frn->prefixlen = res.prefixlen;
535			frn->nh_sel = res.nh_sel;
536			frn->type = res.type;
537			frn->scope = res.scope;
538		}
539		local_bh_enable();
540	}
541}
542
543static void nl_fib_input(struct sock *sk, int len)
544{
545	struct sk_buff *skb = NULL;
546        struct nlmsghdr *nlh = NULL;
547	struct fib_result_nl *frn;
548	int err;
549	u32 pid;
550	struct fib_table *tb;
551
552	skb = skb_recv_datagram(sk, 0, 0, &err);
553	nlh = (struct nlmsghdr *)skb->data;
554
555	frn = (struct fib_result_nl *) NLMSG_DATA(nlh);
556	tb = fib_get_table(frn->tb_id_in);
557
558	nl_fib_lookup(frn, tb);
559
560	pid = nlh->nlmsg_pid;           /*pid of sending process */
561	NETLINK_CB(skb).pid = 0;         /* from kernel */
562	NETLINK_CB(skb).dst_pid = pid;
563	NETLINK_CB(skb).dst_groups = 0;  /* unicast */
564	netlink_unicast(sk, skb, pid, MSG_DONTWAIT);
565}
566
567static void nl_fib_lookup_init(void)
568{
569      netlink_kernel_create(NETLINK_FIB_LOOKUP, nl_fib_input, THIS_MODULE);
570}
571
572static void fib_disable_ip(struct net_device *dev, int force)
573{
574	if (fib_sync_down(0, dev, force))
575		fib_flush();
576	rt_cache_flush(0);
577	arp_ifdown(dev);
578}
579
580static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
581{
582	struct in_ifaddr *ifa = (struct in_ifaddr*)ptr;
583
584	switch (event) {
585	case NETDEV_UP:
586		fib_add_ifaddr(ifa);
587#ifdef CONFIG_IP_ROUTE_MULTIPATH
588		fib_sync_up(ifa->ifa_dev->dev);
589#endif
590		rt_cache_flush(-1);
591		break;
592	case NETDEV_DOWN:
593		fib_del_ifaddr(ifa);
594		if (ifa->ifa_dev && ifa->ifa_dev->ifa_list == NULL) {
595			/* Last address was deleted from this interface.
596			   Disable IP.
597			 */
598			fib_disable_ip(ifa->ifa_dev->dev, 1);
599		} else {
600			rt_cache_flush(-1);
601		}
602		break;
603	}
604	return NOTIFY_DONE;
605}
606
607static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
608{
609	struct net_device *dev = ptr;
610	struct in_device *in_dev = __in_dev_get(dev);
611
612	if (event == NETDEV_UNREGISTER) {
613		fib_disable_ip(dev, 2);
614		return NOTIFY_DONE;
615	}
616
617	if (!in_dev)
618		return NOTIFY_DONE;
619
620	switch (event) {
621	case NETDEV_UP:
622		for_ifa(in_dev) {
623			fib_add_ifaddr(ifa);
624		} endfor_ifa(in_dev);
625#ifdef CONFIG_IP_ROUTE_MULTIPATH
626		fib_sync_up(dev);
627#endif
628		rt_cache_flush(-1);
629		break;
630	case NETDEV_DOWN:
631		fib_disable_ip(dev, 0);
632		break;
633	case NETDEV_CHANGEMTU:
634	case NETDEV_CHANGE:
635		rt_cache_flush(0);
636		break;
637	}
638	return NOTIFY_DONE;
639}
640
641static struct notifier_block fib_inetaddr_notifier = {
642	.notifier_call =fib_inetaddr_event,
643};
644
645static struct notifier_block fib_netdev_notifier = {
646	.notifier_call =fib_netdev_event,
647};
648
649void __init ip_fib_init(void)
650{
651#ifndef CONFIG_IP_MULTIPLE_TABLES
652	ip_fib_local_table = fib_hash_init(RT_TABLE_LOCAL);
653	ip_fib_main_table  = fib_hash_init(RT_TABLE_MAIN);
654#else
655	fib_rules_init();
656#endif
657
658	register_netdevice_notifier(&fib_netdev_notifier);
659	register_inetaddr_notifier(&fib_inetaddr_notifier);
660	nl_fib_lookup_init();
661}
662
663EXPORT_SYMBOL(inet_addr_type);
664EXPORT_SYMBOL(ip_rt_ioctl);
665