fib_frontend.c revision 6bd48fcf73019219495f7599028296c65b749bb4
1/*
2 * INET		An implementation of the TCP/IP protocol suite for the LINUX
3 *		operating system.  INET is implemented using the  BSD Socket
4 *		interface as the means of communication with the user level.
5 *
6 *		IPv4 Forwarding Information Base: FIB frontend.
7 *
8 * Version:	$Id: fib_frontend.c,v 1.26 2001/10/31 21:55:54 davem Exp $
9 *
10 * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
11 *
12 *		This program is free software; you can redistribute it and/or
13 *		modify it under the terms of the GNU General Public License
14 *		as published by the Free Software Foundation; either version
15 *		2 of the License, or (at your option) any later version.
16 */
17
18#include <linux/module.h>
19#include <asm/uaccess.h>
20#include <asm/system.h>
21#include <linux/bitops.h>
22#include <linux/capability.h>
23#include <linux/types.h>
24#include <linux/kernel.h>
25#include <linux/mm.h>
26#include <linux/string.h>
27#include <linux/socket.h>
28#include <linux/sockios.h>
29#include <linux/errno.h>
30#include <linux/in.h>
31#include <linux/inet.h>
32#include <linux/inetdevice.h>
33#include <linux/netdevice.h>
34#include <linux/if_addr.h>
35#include <linux/if_arp.h>
36#include <linux/skbuff.h>
37#include <linux/init.h>
38#include <linux/list.h>
39
40#include <net/ip.h>
41#include <net/protocol.h>
42#include <net/route.h>
43#include <net/tcp.h>
44#include <net/sock.h>
45#include <net/icmp.h>
46#include <net/arp.h>
47#include <net/ip_fib.h>
48#include <net/rtnetlink.h>
49
50#define FFprint(a...) printk(KERN_DEBUG a)
51
52#ifndef CONFIG_IP_MULTIPLE_TABLES
53
54static int __net_init fib4_rules_init(struct net *net)
55{
56	struct fib_table *local_table, *main_table;
57
58	local_table = fib_hash_init(RT_TABLE_LOCAL);
59	if (local_table == NULL)
60		return -ENOMEM;
61
62	main_table  = fib_hash_init(RT_TABLE_MAIN);
63	if (main_table == NULL)
64		goto fail;
65
66	hlist_add_head_rcu(&local_table->tb_hlist,
67				&net->ipv4.fib_table_hash[TABLE_LOCAL_INDEX]);
68	hlist_add_head_rcu(&main_table->tb_hlist,
69				&net->ipv4.fib_table_hash[TABLE_MAIN_INDEX]);
70	return 0;
71
72fail:
73	kfree(local_table);
74	return -ENOMEM;
75}
76#else
77
78struct fib_table *fib_new_table(struct net *net, u32 id)
79{
80	struct fib_table *tb;
81	unsigned int h;
82
83	if (id == 0)
84		id = RT_TABLE_MAIN;
85	tb = fib_get_table(net, id);
86	if (tb)
87		return tb;
88	tb = fib_hash_init(id);
89	if (!tb)
90		return NULL;
91	h = id & (FIB_TABLE_HASHSZ - 1);
92	hlist_add_head_rcu(&tb->tb_hlist, &net->ipv4.fib_table_hash[h]);
93	return tb;
94}
95
96struct fib_table *fib_get_table(struct net *net, u32 id)
97{
98	struct fib_table *tb;
99	struct hlist_node *node;
100	struct hlist_head *head;
101	unsigned int h;
102
103	if (id == 0)
104		id = RT_TABLE_MAIN;
105	h = id & (FIB_TABLE_HASHSZ - 1);
106
107	rcu_read_lock();
108	head = &net->ipv4.fib_table_hash[h];
109	hlist_for_each_entry_rcu(tb, node, head, tb_hlist) {
110		if (tb->tb_id == id) {
111			rcu_read_unlock();
112			return tb;
113		}
114	}
115	rcu_read_unlock();
116	return NULL;
117}
118#endif /* CONFIG_IP_MULTIPLE_TABLES */
119
120static void fib_flush(struct net *net)
121{
122	int flushed = 0;
123	struct fib_table *tb;
124	struct hlist_node *node;
125	struct hlist_head *head;
126	unsigned int h;
127
128	for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
129		head = &net->ipv4.fib_table_hash[h];
130		hlist_for_each_entry(tb, node, head, tb_hlist)
131			flushed += tb->tb_flush(tb);
132	}
133
134	if (flushed)
135		rt_cache_flush(-1);
136}
137
138/*
139 *	Find the first device with a given source address.
140 */
141
142struct net_device * ip_dev_find(__be32 addr)
143{
144	struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
145	struct fib_result res;
146	struct net_device *dev = NULL;
147	struct fib_table *local_table;
148
149#ifdef CONFIG_IP_MULTIPLE_TABLES
150	res.r = NULL;
151#endif
152
153	local_table = fib_get_table(&init_net, RT_TABLE_LOCAL);
154	if (!local_table || local_table->tb_lookup(local_table, &fl, &res))
155		return NULL;
156	if (res.type != RTN_LOCAL)
157		goto out;
158	dev = FIB_RES_DEV(res);
159
160	if (dev)
161		dev_hold(dev);
162out:
163	fib_res_put(&res);
164	return dev;
165}
166
167/*
168 * Find address type as if only "dev" was present in the system. If
169 * on_dev is NULL then all interfaces are taken into consideration.
170 */
171static inline unsigned __inet_dev_addr_type(struct net *net,
172					    const struct net_device *dev,
173					    __be32 addr)
174{
175	struct flowi		fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
176	struct fib_result	res;
177	unsigned ret = RTN_BROADCAST;
178	struct fib_table *local_table;
179
180	if (ipv4_is_zeronet(addr) || ipv4_is_badclass(addr))
181		return RTN_BROADCAST;
182	if (ipv4_is_multicast(addr))
183		return RTN_MULTICAST;
184
185#ifdef CONFIG_IP_MULTIPLE_TABLES
186	res.r = NULL;
187#endif
188
189	local_table = fib_get_table(net, RT_TABLE_LOCAL);
190	if (local_table) {
191		ret = RTN_UNICAST;
192		if (!local_table->tb_lookup(local_table, &fl, &res)) {
193			if (!dev || dev == res.fi->fib_dev)
194				ret = res.type;
195			fib_res_put(&res);
196		}
197	}
198	return ret;
199}
200
201unsigned int inet_addr_type(struct net *net, __be32 addr)
202{
203	return __inet_dev_addr_type(net, NULL, addr);
204}
205
206unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev,
207				__be32 addr)
208{
209       return __inet_dev_addr_type(net, dev, addr);
210}
211
212/* Given (packet source, input interface) and optional (dst, oif, tos):
213   - (main) check, that source is valid i.e. not broadcast or our local
214     address.
215   - figure out what "logical" interface this packet arrived
216     and calculate "specific destination" address.
217   - check, that packet arrived from expected physical interface.
218 */
219
220int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
221			struct net_device *dev, __be32 *spec_dst, u32 *itag)
222{
223	struct in_device *in_dev;
224	struct flowi fl = { .nl_u = { .ip4_u =
225				      { .daddr = src,
226					.saddr = dst,
227					.tos = tos } },
228			    .iif = oif };
229	struct fib_result res;
230	int no_addr, rpf;
231	int ret;
232
233	no_addr = rpf = 0;
234	rcu_read_lock();
235	in_dev = __in_dev_get_rcu(dev);
236	if (in_dev) {
237		no_addr = in_dev->ifa_list == NULL;
238		rpf = IN_DEV_RPFILTER(in_dev);
239	}
240	rcu_read_unlock();
241
242	if (in_dev == NULL)
243		goto e_inval;
244
245	if (fib_lookup(&fl, &res))
246		goto last_resort;
247	if (res.type != RTN_UNICAST)
248		goto e_inval_res;
249	*spec_dst = FIB_RES_PREFSRC(res);
250	fib_combine_itag(itag, &res);
251#ifdef CONFIG_IP_ROUTE_MULTIPATH
252	if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1)
253#else
254	if (FIB_RES_DEV(res) == dev)
255#endif
256	{
257		ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
258		fib_res_put(&res);
259		return ret;
260	}
261	fib_res_put(&res);
262	if (no_addr)
263		goto last_resort;
264	if (rpf)
265		goto e_inval;
266	fl.oif = dev->ifindex;
267
268	ret = 0;
269	if (fib_lookup(&fl, &res) == 0) {
270		if (res.type == RTN_UNICAST) {
271			*spec_dst = FIB_RES_PREFSRC(res);
272			ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
273		}
274		fib_res_put(&res);
275	}
276	return ret;
277
278last_resort:
279	if (rpf)
280		goto e_inval;
281	*spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
282	*itag = 0;
283	return 0;
284
285e_inval_res:
286	fib_res_put(&res);
287e_inval:
288	return -EINVAL;
289}
290
291static inline __be32 sk_extract_addr(struct sockaddr *addr)
292{
293	return ((struct sockaddr_in *) addr)->sin_addr.s_addr;
294}
295
296static int put_rtax(struct nlattr *mx, int len, int type, u32 value)
297{
298	struct nlattr *nla;
299
300	nla = (struct nlattr *) ((char *) mx + len);
301	nla->nla_type = type;
302	nla->nla_len = nla_attr_size(4);
303	*(u32 *) nla_data(nla) = value;
304
305	return len + nla_total_size(4);
306}
307
308static int rtentry_to_fib_config(int cmd, struct rtentry *rt,
309				 struct fib_config *cfg)
310{
311	__be32 addr;
312	int plen;
313
314	memset(cfg, 0, sizeof(*cfg));
315	cfg->fc_nlinfo.nl_net = &init_net;
316
317	if (rt->rt_dst.sa_family != AF_INET)
318		return -EAFNOSUPPORT;
319
320	/*
321	 * Check mask for validity:
322	 * a) it must be contiguous.
323	 * b) destination must have all host bits clear.
324	 * c) if application forgot to set correct family (AF_INET),
325	 *    reject request unless it is absolutely clear i.e.
326	 *    both family and mask are zero.
327	 */
328	plen = 32;
329	addr = sk_extract_addr(&rt->rt_dst);
330	if (!(rt->rt_flags & RTF_HOST)) {
331		__be32 mask = sk_extract_addr(&rt->rt_genmask);
332
333		if (rt->rt_genmask.sa_family != AF_INET) {
334			if (mask || rt->rt_genmask.sa_family)
335				return -EAFNOSUPPORT;
336		}
337
338		if (bad_mask(mask, addr))
339			return -EINVAL;
340
341		plen = inet_mask_len(mask);
342	}
343
344	cfg->fc_dst_len = plen;
345	cfg->fc_dst = addr;
346
347	if (cmd != SIOCDELRT) {
348		cfg->fc_nlflags = NLM_F_CREATE;
349		cfg->fc_protocol = RTPROT_BOOT;
350	}
351
352	if (rt->rt_metric)
353		cfg->fc_priority = rt->rt_metric - 1;
354
355	if (rt->rt_flags & RTF_REJECT) {
356		cfg->fc_scope = RT_SCOPE_HOST;
357		cfg->fc_type = RTN_UNREACHABLE;
358		return 0;
359	}
360
361	cfg->fc_scope = RT_SCOPE_NOWHERE;
362	cfg->fc_type = RTN_UNICAST;
363
364	if (rt->rt_dev) {
365		char *colon;
366		struct net_device *dev;
367		char devname[IFNAMSIZ];
368
369		if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1))
370			return -EFAULT;
371
372		devname[IFNAMSIZ-1] = 0;
373		colon = strchr(devname, ':');
374		if (colon)
375			*colon = 0;
376		dev = __dev_get_by_name(&init_net, devname);
377		if (!dev)
378			return -ENODEV;
379		cfg->fc_oif = dev->ifindex;
380		if (colon) {
381			struct in_ifaddr *ifa;
382			struct in_device *in_dev = __in_dev_get_rtnl(dev);
383			if (!in_dev)
384				return -ENODEV;
385			*colon = ':';
386			for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
387				if (strcmp(ifa->ifa_label, devname) == 0)
388					break;
389			if (ifa == NULL)
390				return -ENODEV;
391			cfg->fc_prefsrc = ifa->ifa_local;
392		}
393	}
394
395	addr = sk_extract_addr(&rt->rt_gateway);
396	if (rt->rt_gateway.sa_family == AF_INET && addr) {
397		cfg->fc_gw = addr;
398		if (rt->rt_flags & RTF_GATEWAY &&
399		    inet_addr_type(&init_net, addr) == RTN_UNICAST)
400			cfg->fc_scope = RT_SCOPE_UNIVERSE;
401	}
402
403	if (cmd == SIOCDELRT)
404		return 0;
405
406	if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw)
407		return -EINVAL;
408
409	if (cfg->fc_scope == RT_SCOPE_NOWHERE)
410		cfg->fc_scope = RT_SCOPE_LINK;
411
412	if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) {
413		struct nlattr *mx;
414		int len = 0;
415
416		mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL);
417		if (mx == NULL)
418			return -ENOMEM;
419
420		if (rt->rt_flags & RTF_MTU)
421			len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40);
422
423		if (rt->rt_flags & RTF_WINDOW)
424			len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window);
425
426		if (rt->rt_flags & RTF_IRTT)
427			len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3);
428
429		cfg->fc_mx = mx;
430		cfg->fc_mx_len = len;
431	}
432
433	return 0;
434}
435
436/*
437 *	Handle IP routing ioctl calls. These are used to manipulate the routing tables
438 */
439
440int ip_rt_ioctl(unsigned int cmd, void __user *arg)
441{
442	struct fib_config cfg;
443	struct rtentry rt;
444	int err;
445
446	switch (cmd) {
447	case SIOCADDRT:		/* Add a route */
448	case SIOCDELRT:		/* Delete a route */
449		if (!capable(CAP_NET_ADMIN))
450			return -EPERM;
451
452		if (copy_from_user(&rt, arg, sizeof(rt)))
453			return -EFAULT;
454
455		rtnl_lock();
456		err = rtentry_to_fib_config(cmd, &rt, &cfg);
457		if (err == 0) {
458			struct fib_table *tb;
459
460			if (cmd == SIOCDELRT) {
461				tb = fib_get_table(&init_net, cfg.fc_table);
462				if (tb)
463					err = tb->tb_delete(tb, &cfg);
464				else
465					err = -ESRCH;
466			} else {
467				tb = fib_new_table(&init_net, cfg.fc_table);
468				if (tb)
469					err = tb->tb_insert(tb, &cfg);
470				else
471					err = -ENOBUFS;
472			}
473
474			/* allocated by rtentry_to_fib_config() */
475			kfree(cfg.fc_mx);
476		}
477		rtnl_unlock();
478		return err;
479	}
480	return -EINVAL;
481}
482
483const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = {
484	[RTA_DST]		= { .type = NLA_U32 },
485	[RTA_SRC]		= { .type = NLA_U32 },
486	[RTA_IIF]		= { .type = NLA_U32 },
487	[RTA_OIF]		= { .type = NLA_U32 },
488	[RTA_GATEWAY]		= { .type = NLA_U32 },
489	[RTA_PRIORITY]		= { .type = NLA_U32 },
490	[RTA_PREFSRC]		= { .type = NLA_U32 },
491	[RTA_METRICS]		= { .type = NLA_NESTED },
492	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
493	[RTA_PROTOINFO]		= { .type = NLA_U32 },
494	[RTA_FLOW]		= { .type = NLA_U32 },
495};
496
497static int rtm_to_fib_config(struct sk_buff *skb, struct nlmsghdr *nlh,
498			     struct fib_config *cfg)
499{
500	struct nlattr *attr;
501	int err, remaining;
502	struct rtmsg *rtm;
503
504	err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy);
505	if (err < 0)
506		goto errout;
507
508	memset(cfg, 0, sizeof(*cfg));
509
510	rtm = nlmsg_data(nlh);
511	cfg->fc_dst_len = rtm->rtm_dst_len;
512	cfg->fc_tos = rtm->rtm_tos;
513	cfg->fc_table = rtm->rtm_table;
514	cfg->fc_protocol = rtm->rtm_protocol;
515	cfg->fc_scope = rtm->rtm_scope;
516	cfg->fc_type = rtm->rtm_type;
517	cfg->fc_flags = rtm->rtm_flags;
518	cfg->fc_nlflags = nlh->nlmsg_flags;
519
520	cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
521	cfg->fc_nlinfo.nlh = nlh;
522	cfg->fc_nlinfo.nl_net = &init_net;
523
524	if (cfg->fc_type > RTN_MAX) {
525		err = -EINVAL;
526		goto errout;
527	}
528
529	nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) {
530		switch (nla_type(attr)) {
531		case RTA_DST:
532			cfg->fc_dst = nla_get_be32(attr);
533			break;
534		case RTA_OIF:
535			cfg->fc_oif = nla_get_u32(attr);
536			break;
537		case RTA_GATEWAY:
538			cfg->fc_gw = nla_get_be32(attr);
539			break;
540		case RTA_PRIORITY:
541			cfg->fc_priority = nla_get_u32(attr);
542			break;
543		case RTA_PREFSRC:
544			cfg->fc_prefsrc = nla_get_be32(attr);
545			break;
546		case RTA_METRICS:
547			cfg->fc_mx = nla_data(attr);
548			cfg->fc_mx_len = nla_len(attr);
549			break;
550		case RTA_MULTIPATH:
551			cfg->fc_mp = nla_data(attr);
552			cfg->fc_mp_len = nla_len(attr);
553			break;
554		case RTA_FLOW:
555			cfg->fc_flow = nla_get_u32(attr);
556			break;
557		case RTA_TABLE:
558			cfg->fc_table = nla_get_u32(attr);
559			break;
560		}
561	}
562
563	return 0;
564errout:
565	return err;
566}
567
568static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
569{
570	struct net *net = skb->sk->sk_net;
571	struct fib_config cfg;
572	struct fib_table *tb;
573	int err;
574
575	if (net != &init_net)
576		return -EINVAL;
577
578	err = rtm_to_fib_config(skb, nlh, &cfg);
579	if (err < 0)
580		goto errout;
581
582	tb = fib_get_table(net, cfg.fc_table);
583	if (tb == NULL) {
584		err = -ESRCH;
585		goto errout;
586	}
587
588	err = tb->tb_delete(tb, &cfg);
589errout:
590	return err;
591}
592
593static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
594{
595	struct net *net = skb->sk->sk_net;
596	struct fib_config cfg;
597	struct fib_table *tb;
598	int err;
599
600	if (net != &init_net)
601		return -EINVAL;
602
603	err = rtm_to_fib_config(skb, nlh, &cfg);
604	if (err < 0)
605		goto errout;
606
607	tb = fib_new_table(&init_net, cfg.fc_table);
608	if (tb == NULL) {
609		err = -ENOBUFS;
610		goto errout;
611	}
612
613	err = tb->tb_insert(tb, &cfg);
614errout:
615	return err;
616}
617
618static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
619{
620	struct net *net = skb->sk->sk_net;
621	unsigned int h, s_h;
622	unsigned int e = 0, s_e;
623	struct fib_table *tb;
624	struct hlist_node *node;
625	struct hlist_head *head;
626	int dumped = 0;
627
628	if (net != &init_net)
629		return 0;
630
631	if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
632	    ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
633		return ip_rt_dump(skb, cb);
634
635	s_h = cb->args[0];
636	s_e = cb->args[1];
637
638	for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
639		e = 0;
640		head = &net->ipv4.fib_table_hash[h];
641		hlist_for_each_entry(tb, node, head, tb_hlist) {
642			if (e < s_e)
643				goto next;
644			if (dumped)
645				memset(&cb->args[2], 0, sizeof(cb->args) -
646						 2 * sizeof(cb->args[0]));
647			if (tb->tb_dump(tb, skb, cb) < 0)
648				goto out;
649			dumped = 1;
650next:
651			e++;
652		}
653	}
654out:
655	cb->args[1] = e;
656	cb->args[0] = h;
657
658	return skb->len;
659}
660
661/* Prepare and feed intra-kernel routing request.
662   Really, it should be netlink message, but :-( netlink
663   can be not configured, so that we feed it directly
664   to fib engine. It is legal, because all events occur
665   only when netlink is already locked.
666 */
667
668static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
669{
670	struct fib_table *tb;
671	struct fib_config cfg = {
672		.fc_protocol = RTPROT_KERNEL,
673		.fc_type = type,
674		.fc_dst = dst,
675		.fc_dst_len = dst_len,
676		.fc_prefsrc = ifa->ifa_local,
677		.fc_oif = ifa->ifa_dev->dev->ifindex,
678		.fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
679		.fc_nlinfo = {
680			.nl_net = &init_net,
681		},
682	};
683
684	if (type == RTN_UNICAST)
685		tb = fib_new_table(&init_net, RT_TABLE_MAIN);
686	else
687		tb = fib_new_table(&init_net, RT_TABLE_LOCAL);
688
689	if (tb == NULL)
690		return;
691
692	cfg.fc_table = tb->tb_id;
693
694	if (type != RTN_LOCAL)
695		cfg.fc_scope = RT_SCOPE_LINK;
696	else
697		cfg.fc_scope = RT_SCOPE_HOST;
698
699	if (cmd == RTM_NEWROUTE)
700		tb->tb_insert(tb, &cfg);
701	else
702		tb->tb_delete(tb, &cfg);
703}
704
705void fib_add_ifaddr(struct in_ifaddr *ifa)
706{
707	struct in_device *in_dev = ifa->ifa_dev;
708	struct net_device *dev = in_dev->dev;
709	struct in_ifaddr *prim = ifa;
710	__be32 mask = ifa->ifa_mask;
711	__be32 addr = ifa->ifa_local;
712	__be32 prefix = ifa->ifa_address&mask;
713
714	if (ifa->ifa_flags&IFA_F_SECONDARY) {
715		prim = inet_ifa_byprefix(in_dev, prefix, mask);
716		if (prim == NULL) {
717			printk(KERN_DEBUG "fib_add_ifaddr: bug: prim == NULL\n");
718			return;
719		}
720	}
721
722	fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
723
724	if (!(dev->flags&IFF_UP))
725		return;
726
727	/* Add broadcast address, if it is explicitly assigned. */
728	if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
729		fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
730
731	if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
732	    (prefix != addr || ifa->ifa_prefixlen < 32)) {
733		fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
734			  RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim);
735
736		/* Add network specific broadcasts, when it takes a sense */
737		if (ifa->ifa_prefixlen < 31) {
738			fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
739			fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim);
740		}
741	}
742}
743
744static void fib_del_ifaddr(struct in_ifaddr *ifa)
745{
746	struct in_device *in_dev = ifa->ifa_dev;
747	struct net_device *dev = in_dev->dev;
748	struct in_ifaddr *ifa1;
749	struct in_ifaddr *prim = ifa;
750	__be32 brd = ifa->ifa_address|~ifa->ifa_mask;
751	__be32 any = ifa->ifa_address&ifa->ifa_mask;
752#define LOCAL_OK	1
753#define BRD_OK		2
754#define BRD0_OK		4
755#define BRD1_OK		8
756	unsigned ok = 0;
757
758	if (!(ifa->ifa_flags&IFA_F_SECONDARY))
759		fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
760			  RTN_UNICAST, any, ifa->ifa_prefixlen, prim);
761	else {
762		prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
763		if (prim == NULL) {
764			printk(KERN_DEBUG "fib_del_ifaddr: bug: prim == NULL\n");
765			return;
766		}
767	}
768
769	/* Deletion is more complicated than add.
770	   We should take care of not to delete too much :-)
771
772	   Scan address list to be sure that addresses are really gone.
773	 */
774
775	for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
776		if (ifa->ifa_local == ifa1->ifa_local)
777			ok |= LOCAL_OK;
778		if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
779			ok |= BRD_OK;
780		if (brd == ifa1->ifa_broadcast)
781			ok |= BRD1_OK;
782		if (any == ifa1->ifa_broadcast)
783			ok |= BRD0_OK;
784	}
785
786	if (!(ok&BRD_OK))
787		fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
788	if (!(ok&BRD1_OK))
789		fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
790	if (!(ok&BRD0_OK))
791		fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
792	if (!(ok&LOCAL_OK)) {
793		fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
794
795		/* Check, that this local address finally disappeared. */
796		if (inet_addr_type(&init_net, ifa->ifa_local) != RTN_LOCAL) {
797			/* And the last, but not the least thing.
798			   We must flush stray FIB entries.
799
800			   First of all, we scan fib_info list searching
801			   for stray nexthop entries, then ignite fib_flush.
802			*/
803			if (fib_sync_down(ifa->ifa_local, NULL, 0))
804				fib_flush(&init_net);
805		}
806	}
807#undef LOCAL_OK
808#undef BRD_OK
809#undef BRD0_OK
810#undef BRD1_OK
811}
812
813static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
814{
815
816	struct fib_result       res;
817	struct flowi            fl = { .mark = frn->fl_mark,
818				       .nl_u = { .ip4_u = { .daddr = frn->fl_addr,
819							    .tos = frn->fl_tos,
820							    .scope = frn->fl_scope } } };
821
822#ifdef CONFIG_IP_MULTIPLE_TABLES
823	res.r = NULL;
824#endif
825
826	frn->err = -ENOENT;
827	if (tb) {
828		local_bh_disable();
829
830		frn->tb_id = tb->tb_id;
831		frn->err = tb->tb_lookup(tb, &fl, &res);
832
833		if (!frn->err) {
834			frn->prefixlen = res.prefixlen;
835			frn->nh_sel = res.nh_sel;
836			frn->type = res.type;
837			frn->scope = res.scope;
838			fib_res_put(&res);
839		}
840		local_bh_enable();
841	}
842}
843
844static void nl_fib_input(struct sk_buff *skb)
845{
846	struct net *net;
847	struct fib_result_nl *frn;
848	struct nlmsghdr *nlh;
849	struct fib_table *tb;
850	u32 pid;
851
852	net = skb->sk->sk_net;
853	nlh = nlmsg_hdr(skb);
854	if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
855	    nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn)))
856		return;
857
858	skb = skb_clone(skb, GFP_KERNEL);
859	if (skb == NULL)
860		return;
861	nlh = nlmsg_hdr(skb);
862
863	frn = (struct fib_result_nl *) NLMSG_DATA(nlh);
864	tb = fib_get_table(net, frn->tb_id_in);
865
866	nl_fib_lookup(frn, tb);
867
868	pid = NETLINK_CB(skb).pid;       /* pid of sending process */
869	NETLINK_CB(skb).pid = 0;         /* from kernel */
870	NETLINK_CB(skb).dst_group = 0;  /* unicast */
871	netlink_unicast(net->ipv4.fibnl, skb, pid, MSG_DONTWAIT);
872}
873
874static int nl_fib_lookup_init(struct net *net)
875{
876	struct sock *sk;
877	sk = netlink_kernel_create(net, NETLINK_FIB_LOOKUP, 0,
878				   nl_fib_input, NULL, THIS_MODULE);
879	if (sk == NULL)
880		return -EAFNOSUPPORT;
881	/* Don't hold an extra reference on the namespace */
882	put_net(sk->sk_net);
883	net->ipv4.fibnl = sk;
884	return 0;
885}
886
887static void nl_fib_lookup_exit(struct net *net)
888{
889	/* At the last minute lie and say this is a socket for the
890	 * initial network namespace. So the socket will  be safe to free.
891	 */
892	net->ipv4.fibnl->sk_net = get_net(&init_net);
893	sock_put(net->ipv4.fibnl);
894}
895
896static void fib_disable_ip(struct net_device *dev, int force)
897{
898	if (fib_sync_down(0, dev, force))
899		fib_flush(&init_net);
900	rt_cache_flush(0);
901	arp_ifdown(dev);
902}
903
904static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
905{
906	struct in_ifaddr *ifa = (struct in_ifaddr*)ptr;
907
908	switch (event) {
909	case NETDEV_UP:
910		fib_add_ifaddr(ifa);
911#ifdef CONFIG_IP_ROUTE_MULTIPATH
912		fib_sync_up(ifa->ifa_dev->dev);
913#endif
914		rt_cache_flush(-1);
915		break;
916	case NETDEV_DOWN:
917		fib_del_ifaddr(ifa);
918		if (ifa->ifa_dev->ifa_list == NULL) {
919			/* Last address was deleted from this interface.
920			   Disable IP.
921			 */
922			fib_disable_ip(ifa->ifa_dev->dev, 1);
923		} else {
924			rt_cache_flush(-1);
925		}
926		break;
927	}
928	return NOTIFY_DONE;
929}
930
931static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
932{
933	struct net_device *dev = ptr;
934	struct in_device *in_dev = __in_dev_get_rtnl(dev);
935
936	if (dev->nd_net != &init_net)
937		return NOTIFY_DONE;
938
939	if (event == NETDEV_UNREGISTER) {
940		fib_disable_ip(dev, 2);
941		return NOTIFY_DONE;
942	}
943
944	if (!in_dev)
945		return NOTIFY_DONE;
946
947	switch (event) {
948	case NETDEV_UP:
949		for_ifa(in_dev) {
950			fib_add_ifaddr(ifa);
951		} endfor_ifa(in_dev);
952#ifdef CONFIG_IP_ROUTE_MULTIPATH
953		fib_sync_up(dev);
954#endif
955		rt_cache_flush(-1);
956		break;
957	case NETDEV_DOWN:
958		fib_disable_ip(dev, 0);
959		break;
960	case NETDEV_CHANGEMTU:
961	case NETDEV_CHANGE:
962		rt_cache_flush(0);
963		break;
964	}
965	return NOTIFY_DONE;
966}
967
968static struct notifier_block fib_inetaddr_notifier = {
969	.notifier_call =fib_inetaddr_event,
970};
971
972static struct notifier_block fib_netdev_notifier = {
973	.notifier_call =fib_netdev_event,
974};
975
976static int __net_init ip_fib_net_init(struct net *net)
977{
978	unsigned int i;
979
980	net->ipv4.fib_table_hash = kzalloc(
981			sizeof(struct hlist_head)*FIB_TABLE_HASHSZ, GFP_KERNEL);
982	if (net->ipv4.fib_table_hash == NULL)
983		return -ENOMEM;
984
985	for (i = 0; i < FIB_TABLE_HASHSZ; i++)
986		INIT_HLIST_HEAD(&net->ipv4.fib_table_hash[i]);
987
988	return fib4_rules_init(net);
989}
990
991static void __net_exit ip_fib_net_exit(struct net *net)
992{
993	unsigned int i;
994
995#ifdef CONFIG_IP_MULTIPLE_TABLES
996	fib4_rules_exit(net);
997#endif
998
999	for (i = 0; i < FIB_TABLE_HASHSZ; i++) {
1000		struct fib_table *tb;
1001		struct hlist_head *head;
1002		struct hlist_node *node, *tmp;
1003
1004		head = &net->ipv4.fib_table_hash[i];
1005		hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) {
1006			hlist_del(node);
1007			tb->tb_flush(tb);
1008			kfree(tb);
1009		}
1010	}
1011	kfree(net->ipv4.fib_table_hash);
1012}
1013
1014static int __net_init fib_net_init(struct net *net)
1015{
1016	int error;
1017
1018	error = 0;
1019	if (net != &init_net)
1020		goto out;
1021
1022	error = ip_fib_net_init(net);
1023	if (error < 0)
1024		goto out;
1025	error = nl_fib_lookup_init(net);
1026	if (error < 0)
1027		goto out_nlfl;
1028	error = fib_proc_init(net);
1029	if (error < 0)
1030		goto out_proc;
1031out:
1032	return error;
1033
1034out_proc:
1035	nl_fib_lookup_exit(net);
1036out_nlfl:
1037	ip_fib_net_exit(net);
1038	goto out;
1039}
1040
1041static void __net_exit fib_net_exit(struct net *net)
1042{
1043	fib_proc_exit(net);
1044	nl_fib_lookup_exit(net);
1045	ip_fib_net_exit(net);
1046}
1047
1048static struct pernet_operations fib_net_ops = {
1049	.init = fib_net_init,
1050	.exit = fib_net_exit,
1051};
1052
1053void __init ip_fib_init(void)
1054{
1055	rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL);
1056	rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL);
1057	rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib);
1058
1059	register_pernet_subsys(&fib_net_ops);
1060	register_netdevice_notifier(&fib_netdev_notifier);
1061	register_inetaddr_notifier(&fib_inetaddr_notifier);
1062}
1063
1064EXPORT_SYMBOL(inet_addr_type);
1065EXPORT_SYMBOL(inet_dev_addr_type);
1066EXPORT_SYMBOL(ip_dev_find);
1067