fib_frontend.c revision ea86575eaf99a9262a969309d934318028dbfacb
1/*
2 * INET		An implementation of the TCP/IP protocol suite for the LINUX
3 *		operating system.  INET is implemented using the  BSD Socket
4 *		interface as the means of communication with the user level.
5 *
6 *		IPv4 Forwarding Information Base: FIB frontend.
7 *
8 * Version:	$Id: fib_frontend.c,v 1.26 2001/10/31 21:55:54 davem Exp $
9 *
10 * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
11 *
12 *		This program is free software; you can redistribute it and/or
13 *		modify it under the terms of the GNU General Public License
14 *		as published by the Free Software Foundation; either version
15 *		2 of the License, or (at your option) any later version.
16 */
17
18#include <linux/config.h>
19#include <linux/module.h>
20#include <asm/uaccess.h>
21#include <asm/system.h>
22#include <linux/bitops.h>
23#include <linux/types.h>
24#include <linux/kernel.h>
25#include <linux/sched.h>
26#include <linux/mm.h>
27#include <linux/string.h>
28#include <linux/socket.h>
29#include <linux/sockios.h>
30#include <linux/errno.h>
31#include <linux/in.h>
32#include <linux/inet.h>
33#include <linux/netdevice.h>
34#include <linux/if_arp.h>
35#include <linux/skbuff.h>
36#include <linux/netlink.h>
37#include <linux/init.h>
38
39#include <net/ip.h>
40#include <net/protocol.h>
41#include <net/route.h>
42#include <net/tcp.h>
43#include <net/sock.h>
44#include <net/icmp.h>
45#include <net/arp.h>
46#include <net/ip_fib.h>
47
48#define FFprint(a...) printk(KERN_DEBUG a)
49
50#ifndef CONFIG_IP_MULTIPLE_TABLES
51
52#define RT_TABLE_MIN RT_TABLE_MAIN
53
54struct fib_table *ip_fib_local_table;
55struct fib_table *ip_fib_main_table;
56
57#else
58
59#define RT_TABLE_MIN 1
60
61struct fib_table *fib_tables[RT_TABLE_MAX+1];
62
63struct fib_table *__fib_new_table(int id)
64{
65	struct fib_table *tb;
66
67	tb = fib_hash_init(id);
68	if (!tb)
69		return NULL;
70	fib_tables[id] = tb;
71	return tb;
72}
73
74
75#endif /* CONFIG_IP_MULTIPLE_TABLES */
76
77
78static void fib_flush(void)
79{
80	int flushed = 0;
81#ifdef CONFIG_IP_MULTIPLE_TABLES
82	struct fib_table *tb;
83	int id;
84
85	for (id = RT_TABLE_MAX; id>0; id--) {
86		if ((tb = fib_get_table(id))==NULL)
87			continue;
88		flushed += tb->tb_flush(tb);
89	}
90#else /* CONFIG_IP_MULTIPLE_TABLES */
91	flushed += ip_fib_main_table->tb_flush(ip_fib_main_table);
92	flushed += ip_fib_local_table->tb_flush(ip_fib_local_table);
93#endif /* CONFIG_IP_MULTIPLE_TABLES */
94
95	if (flushed)
96		rt_cache_flush(-1);
97}
98
99/*
100 *	Find the first device with a given source address.
101 */
102
103struct net_device * ip_dev_find(u32 addr)
104{
105	struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
106	struct fib_result res;
107	struct net_device *dev = NULL;
108
109#ifdef CONFIG_IP_MULTIPLE_TABLES
110	res.r = NULL;
111#endif
112
113	if (!ip_fib_local_table ||
114	    ip_fib_local_table->tb_lookup(ip_fib_local_table, &fl, &res))
115		return NULL;
116	if (res.type != RTN_LOCAL)
117		goto out;
118	dev = FIB_RES_DEV(res);
119
120	if (dev)
121		dev_hold(dev);
122out:
123	fib_res_put(&res);
124	return dev;
125}
126
127unsigned inet_addr_type(u32 addr)
128{
129	struct flowi		fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
130	struct fib_result	res;
131	unsigned ret = RTN_BROADCAST;
132
133	if (ZERONET(addr) || BADCLASS(addr))
134		return RTN_BROADCAST;
135	if (MULTICAST(addr))
136		return RTN_MULTICAST;
137
138#ifdef CONFIG_IP_MULTIPLE_TABLES
139	res.r = NULL;
140#endif
141
142	if (ip_fib_local_table) {
143		ret = RTN_UNICAST;
144		if (!ip_fib_local_table->tb_lookup(ip_fib_local_table,
145						   &fl, &res)) {
146			ret = res.type;
147			fib_res_put(&res);
148		}
149	}
150	return ret;
151}
152
153/* Given (packet source, input interface) and optional (dst, oif, tos):
154   - (main) check, that source is valid i.e. not broadcast or our local
155     address.
156   - figure out what "logical" interface this packet arrived
157     and calculate "specific destination" address.
158   - check, that packet arrived from expected physical interface.
159 */
160
161int fib_validate_source(u32 src, u32 dst, u8 tos, int oif,
162			struct net_device *dev, u32 *spec_dst, u32 *itag)
163{
164	struct in_device *in_dev;
165	struct flowi fl = { .nl_u = { .ip4_u =
166				      { .daddr = src,
167					.saddr = dst,
168					.tos = tos } },
169			    .iif = oif };
170	struct fib_result res;
171	int no_addr, rpf;
172	int ret;
173
174	no_addr = rpf = 0;
175	rcu_read_lock();
176	in_dev = __in_dev_get_rcu(dev);
177	if (in_dev) {
178		no_addr = in_dev->ifa_list == NULL;
179		rpf = IN_DEV_RPFILTER(in_dev);
180	}
181	rcu_read_unlock();
182
183	if (in_dev == NULL)
184		goto e_inval;
185
186	if (fib_lookup(&fl, &res))
187		goto last_resort;
188	if (res.type != RTN_UNICAST)
189		goto e_inval_res;
190	*spec_dst = FIB_RES_PREFSRC(res);
191	fib_combine_itag(itag, &res);
192#ifdef CONFIG_IP_ROUTE_MULTIPATH
193	if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1)
194#else
195	if (FIB_RES_DEV(res) == dev)
196#endif
197	{
198		ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
199		fib_res_put(&res);
200		return ret;
201	}
202	fib_res_put(&res);
203	if (no_addr)
204		goto last_resort;
205	if (rpf)
206		goto e_inval;
207	fl.oif = dev->ifindex;
208
209	ret = 0;
210	if (fib_lookup(&fl, &res) == 0) {
211		if (res.type == RTN_UNICAST) {
212			*spec_dst = FIB_RES_PREFSRC(res);
213			ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
214		}
215		fib_res_put(&res);
216	}
217	return ret;
218
219last_resort:
220	if (rpf)
221		goto e_inval;
222	*spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
223	*itag = 0;
224	return 0;
225
226e_inval_res:
227	fib_res_put(&res);
228e_inval:
229	return -EINVAL;
230}
231
232#ifndef CONFIG_IP_NOSIOCRT
233
234/*
235 *	Handle IP routing ioctl calls. These are used to manipulate the routing tables
236 */
237
238int ip_rt_ioctl(unsigned int cmd, void __user *arg)
239{
240	int err;
241	struct kern_rta rta;
242	struct rtentry  r;
243	struct {
244		struct nlmsghdr nlh;
245		struct rtmsg	rtm;
246	} req;
247
248	switch (cmd) {
249	case SIOCADDRT:		/* Add a route */
250	case SIOCDELRT:		/* Delete a route */
251		if (!capable(CAP_NET_ADMIN))
252			return -EPERM;
253		if (copy_from_user(&r, arg, sizeof(struct rtentry)))
254			return -EFAULT;
255		rtnl_lock();
256		err = fib_convert_rtentry(cmd, &req.nlh, &req.rtm, &rta, &r);
257		if (err == 0) {
258			if (cmd == SIOCDELRT) {
259				struct fib_table *tb = fib_get_table(req.rtm.rtm_table);
260				err = -ESRCH;
261				if (tb)
262					err = tb->tb_delete(tb, &req.rtm, &rta, &req.nlh, NULL);
263			} else {
264				struct fib_table *tb = fib_new_table(req.rtm.rtm_table);
265				err = -ENOBUFS;
266				if (tb)
267					err = tb->tb_insert(tb, &req.rtm, &rta, &req.nlh, NULL);
268			}
269			kfree(rta.rta_mx);
270		}
271		rtnl_unlock();
272		return err;
273	}
274	return -EINVAL;
275}
276
277#else
278
279int ip_rt_ioctl(unsigned int cmd, void *arg)
280{
281	return -EINVAL;
282}
283
284#endif
285
286static int inet_check_attr(struct rtmsg *r, struct rtattr **rta)
287{
288	int i;
289
290	for (i=1; i<=RTA_MAX; i++) {
291		struct rtattr *attr = rta[i-1];
292		if (attr) {
293			if (RTA_PAYLOAD(attr) < 4)
294				return -EINVAL;
295			if (i != RTA_MULTIPATH && i != RTA_METRICS)
296				rta[i-1] = (struct rtattr*)RTA_DATA(attr);
297		}
298	}
299	return 0;
300}
301
302int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
303{
304	struct fib_table * tb;
305	struct rtattr **rta = arg;
306	struct rtmsg *r = NLMSG_DATA(nlh);
307
308	if (inet_check_attr(r, rta))
309		return -EINVAL;
310
311	tb = fib_get_table(r->rtm_table);
312	if (tb)
313		return tb->tb_delete(tb, r, (struct kern_rta*)rta, nlh, &NETLINK_CB(skb));
314	return -ESRCH;
315}
316
317int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
318{
319	struct fib_table * tb;
320	struct rtattr **rta = arg;
321	struct rtmsg *r = NLMSG_DATA(nlh);
322
323	if (inet_check_attr(r, rta))
324		return -EINVAL;
325
326	tb = fib_new_table(r->rtm_table);
327	if (tb)
328		return tb->tb_insert(tb, r, (struct kern_rta*)rta, nlh, &NETLINK_CB(skb));
329	return -ENOBUFS;
330}
331
332int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
333{
334	int t;
335	int s_t;
336	struct fib_table *tb;
337
338	if (NLMSG_PAYLOAD(cb->nlh, 0) >= sizeof(struct rtmsg) &&
339	    ((struct rtmsg*)NLMSG_DATA(cb->nlh))->rtm_flags&RTM_F_CLONED)
340		return ip_rt_dump(skb, cb);
341
342	s_t = cb->args[0];
343	if (s_t == 0)
344		s_t = cb->args[0] = RT_TABLE_MIN;
345
346	for (t=s_t; t<=RT_TABLE_MAX; t++) {
347		if (t < s_t) continue;
348		if (t > s_t)
349			memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
350		if ((tb = fib_get_table(t))==NULL)
351			continue;
352		if (tb->tb_dump(tb, skb, cb) < 0)
353			break;
354	}
355
356	cb->args[0] = t;
357
358	return skb->len;
359}
360
361/* Prepare and feed intra-kernel routing request.
362   Really, it should be netlink message, but :-( netlink
363   can be not configured, so that we feed it directly
364   to fib engine. It is legal, because all events occur
365   only when netlink is already locked.
366 */
367
368static void fib_magic(int cmd, int type, u32 dst, int dst_len, struct in_ifaddr *ifa)
369{
370	struct fib_table * tb;
371	struct {
372		struct nlmsghdr	nlh;
373		struct rtmsg	rtm;
374	} req;
375	struct kern_rta rta;
376
377	memset(&req.rtm, 0, sizeof(req.rtm));
378	memset(&rta, 0, sizeof(rta));
379
380	if (type == RTN_UNICAST)
381		tb = fib_new_table(RT_TABLE_MAIN);
382	else
383		tb = fib_new_table(RT_TABLE_LOCAL);
384
385	if (tb == NULL)
386		return;
387
388	req.nlh.nlmsg_len = sizeof(req);
389	req.nlh.nlmsg_type = cmd;
390	req.nlh.nlmsg_flags = NLM_F_REQUEST|NLM_F_CREATE|NLM_F_APPEND;
391	req.nlh.nlmsg_pid = 0;
392	req.nlh.nlmsg_seq = 0;
393
394	req.rtm.rtm_dst_len = dst_len;
395	req.rtm.rtm_table = tb->tb_id;
396	req.rtm.rtm_protocol = RTPROT_KERNEL;
397	req.rtm.rtm_scope = (type != RTN_LOCAL ? RT_SCOPE_LINK : RT_SCOPE_HOST);
398	req.rtm.rtm_type = type;
399
400	rta.rta_dst = &dst;
401	rta.rta_prefsrc = &ifa->ifa_local;
402	rta.rta_oif = &ifa->ifa_dev->dev->ifindex;
403
404	if (cmd == RTM_NEWROUTE)
405		tb->tb_insert(tb, &req.rtm, &rta, &req.nlh, NULL);
406	else
407		tb->tb_delete(tb, &req.rtm, &rta, &req.nlh, NULL);
408}
409
410void fib_add_ifaddr(struct in_ifaddr *ifa)
411{
412	struct in_device *in_dev = ifa->ifa_dev;
413	struct net_device *dev = in_dev->dev;
414	struct in_ifaddr *prim = ifa;
415	u32 mask = ifa->ifa_mask;
416	u32 addr = ifa->ifa_local;
417	u32 prefix = ifa->ifa_address&mask;
418
419	if (ifa->ifa_flags&IFA_F_SECONDARY) {
420		prim = inet_ifa_byprefix(in_dev, prefix, mask);
421		if (prim == NULL) {
422			printk(KERN_DEBUG "fib_add_ifaddr: bug: prim == NULL\n");
423			return;
424		}
425	}
426
427	fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
428
429	if (!(dev->flags&IFF_UP))
430		return;
431
432	/* Add broadcast address, if it is explicitly assigned. */
433	if (ifa->ifa_broadcast && ifa->ifa_broadcast != 0xFFFFFFFF)
434		fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
435
436	if (!ZERONET(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
437	    (prefix != addr || ifa->ifa_prefixlen < 32)) {
438		fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
439			  RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim);
440
441		/* Add network specific broadcasts, when it takes a sense */
442		if (ifa->ifa_prefixlen < 31) {
443			fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
444			fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim);
445		}
446	}
447}
448
449static void fib_del_ifaddr(struct in_ifaddr *ifa)
450{
451	struct in_device *in_dev = ifa->ifa_dev;
452	struct net_device *dev = in_dev->dev;
453	struct in_ifaddr *ifa1;
454	struct in_ifaddr *prim = ifa;
455	u32 brd = ifa->ifa_address|~ifa->ifa_mask;
456	u32 any = ifa->ifa_address&ifa->ifa_mask;
457#define LOCAL_OK	1
458#define BRD_OK		2
459#define BRD0_OK		4
460#define BRD1_OK		8
461	unsigned ok = 0;
462
463	if (!(ifa->ifa_flags&IFA_F_SECONDARY))
464		fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
465			  RTN_UNICAST, any, ifa->ifa_prefixlen, prim);
466	else {
467		prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
468		if (prim == NULL) {
469			printk(KERN_DEBUG "fib_del_ifaddr: bug: prim == NULL\n");
470			return;
471		}
472	}
473
474	/* Deletion is more complicated than add.
475	   We should take care of not to delete too much :-)
476
477	   Scan address list to be sure that addresses are really gone.
478	 */
479
480	for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
481		if (ifa->ifa_local == ifa1->ifa_local)
482			ok |= LOCAL_OK;
483		if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
484			ok |= BRD_OK;
485		if (brd == ifa1->ifa_broadcast)
486			ok |= BRD1_OK;
487		if (any == ifa1->ifa_broadcast)
488			ok |= BRD0_OK;
489	}
490
491	if (!(ok&BRD_OK))
492		fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
493	if (!(ok&BRD1_OK))
494		fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
495	if (!(ok&BRD0_OK))
496		fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
497	if (!(ok&LOCAL_OK)) {
498		fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
499
500		/* Check, that this local address finally disappeared. */
501		if (inet_addr_type(ifa->ifa_local) != RTN_LOCAL) {
502			/* And the last, but not the least thing.
503			   We must flush stray FIB entries.
504
505			   First of all, we scan fib_info list searching
506			   for stray nexthop entries, then ignite fib_flush.
507			*/
508			if (fib_sync_down(ifa->ifa_local, NULL, 0))
509				fib_flush();
510		}
511	}
512#undef LOCAL_OK
513#undef BRD_OK
514#undef BRD0_OK
515#undef BRD1_OK
516}
517
518static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
519{
520
521	struct fib_result       res;
522	struct flowi            fl = { .nl_u = { .ip4_u = { .daddr = frn->fl_addr,
523							    .fwmark = frn->fl_fwmark,
524							    .tos = frn->fl_tos,
525							    .scope = frn->fl_scope } } };
526	if (tb) {
527		local_bh_disable();
528
529		frn->tb_id = tb->tb_id;
530		frn->err = tb->tb_lookup(tb, &fl, &res);
531
532		if (!frn->err) {
533			frn->prefixlen = res.prefixlen;
534			frn->nh_sel = res.nh_sel;
535			frn->type = res.type;
536			frn->scope = res.scope;
537		}
538		local_bh_enable();
539	}
540}
541
542static void nl_fib_input(struct sock *sk, int len)
543{
544	struct sk_buff *skb = NULL;
545        struct nlmsghdr *nlh = NULL;
546	struct fib_result_nl *frn;
547	u32 pid;
548	struct fib_table *tb;
549
550	skb = skb_dequeue(&sk->sk_receive_queue);
551	nlh = (struct nlmsghdr *)skb->data;
552	if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
553	    nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn))) {
554		kfree_skb(skb);
555		return;
556	}
557
558	frn = (struct fib_result_nl *) NLMSG_DATA(nlh);
559	tb = fib_get_table(frn->tb_id_in);
560
561	nl_fib_lookup(frn, tb);
562
563	pid = nlh->nlmsg_pid;           /*pid of sending process */
564	NETLINK_CB(skb).pid = 0;         /* from kernel */
565	NETLINK_CB(skb).dst_pid = pid;
566	NETLINK_CB(skb).dst_group = 0;  /* unicast */
567	netlink_unicast(sk, skb, pid, MSG_DONTWAIT);
568}
569
570static void nl_fib_lookup_init(void)
571{
572      netlink_kernel_create(NETLINK_FIB_LOOKUP, 0, nl_fib_input, THIS_MODULE);
573}
574
575static void fib_disable_ip(struct net_device *dev, int force)
576{
577	if (fib_sync_down(0, dev, force))
578		fib_flush();
579	rt_cache_flush(0);
580	arp_ifdown(dev);
581}
582
583static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
584{
585	struct in_ifaddr *ifa = (struct in_ifaddr*)ptr;
586
587	switch (event) {
588	case NETDEV_UP:
589		fib_add_ifaddr(ifa);
590#ifdef CONFIG_IP_ROUTE_MULTIPATH
591		fib_sync_up(ifa->ifa_dev->dev);
592#endif
593		rt_cache_flush(-1);
594		break;
595	case NETDEV_DOWN:
596		fib_del_ifaddr(ifa);
597		if (ifa->ifa_dev->ifa_list == NULL) {
598			/* Last address was deleted from this interface.
599			   Disable IP.
600			 */
601			fib_disable_ip(ifa->ifa_dev->dev, 1);
602		} else {
603			rt_cache_flush(-1);
604		}
605		break;
606	}
607	return NOTIFY_DONE;
608}
609
610static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
611{
612	struct net_device *dev = ptr;
613	struct in_device *in_dev = __in_dev_get_rtnl(dev);
614
615	if (event == NETDEV_UNREGISTER) {
616		fib_disable_ip(dev, 2);
617		return NOTIFY_DONE;
618	}
619
620	if (!in_dev)
621		return NOTIFY_DONE;
622
623	switch (event) {
624	case NETDEV_UP:
625		for_ifa(in_dev) {
626			fib_add_ifaddr(ifa);
627		} endfor_ifa(in_dev);
628#ifdef CONFIG_IP_ROUTE_MULTIPATH
629		fib_sync_up(dev);
630#endif
631		rt_cache_flush(-1);
632		break;
633	case NETDEV_DOWN:
634		fib_disable_ip(dev, 0);
635		break;
636	case NETDEV_CHANGEMTU:
637	case NETDEV_CHANGE:
638		rt_cache_flush(0);
639		break;
640	}
641	return NOTIFY_DONE;
642}
643
644static struct notifier_block fib_inetaddr_notifier = {
645	.notifier_call =fib_inetaddr_event,
646};
647
648static struct notifier_block fib_netdev_notifier = {
649	.notifier_call =fib_netdev_event,
650};
651
652void __init ip_fib_init(void)
653{
654#ifndef CONFIG_IP_MULTIPLE_TABLES
655	ip_fib_local_table = fib_hash_init(RT_TABLE_LOCAL);
656	ip_fib_main_table  = fib_hash_init(RT_TABLE_MAIN);
657#else
658	fib_rules_init();
659#endif
660
661	register_netdevice_notifier(&fib_netdev_notifier);
662	register_inetaddr_notifier(&fib_inetaddr_notifier);
663	nl_fib_lookup_init();
664}
665
666EXPORT_SYMBOL(inet_addr_type);
667EXPORT_SYMBOL(ip_rt_ioctl);
668