fib_frontend.c revision 226b0b4a51d1cc09928e569b121ca0abe2839169
1/*
2 * INET		An implementation of the TCP/IP protocol suite for the LINUX
3 *		operating system.  INET is implemented using the  BSD Socket
4 *		interface as the means of communication with the user level.
5 *
6 *		IPv4 Forwarding Information Base: FIB frontend.
7 *
8 * Version:	$Id: fib_frontend.c,v 1.26 2001/10/31 21:55:54 davem Exp $
9 *
10 * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
11 *
12 *		This program is free software; you can redistribute it and/or
13 *		modify it under the terms of the GNU General Public License
14 *		as published by the Free Software Foundation; either version
15 *		2 of the License, or (at your option) any later version.
16 */
17
18#include <linux/module.h>
19#include <asm/uaccess.h>
20#include <asm/system.h>
21#include <linux/bitops.h>
22#include <linux/capability.h>
23#include <linux/types.h>
24#include <linux/kernel.h>
25#include <linux/mm.h>
26#include <linux/string.h>
27#include <linux/socket.h>
28#include <linux/sockios.h>
29#include <linux/errno.h>
30#include <linux/in.h>
31#include <linux/inet.h>
32#include <linux/inetdevice.h>
33#include <linux/netdevice.h>
34#include <linux/if_addr.h>
35#include <linux/if_arp.h>
36#include <linux/skbuff.h>
37#include <linux/init.h>
38#include <linux/list.h>
39
40#include <net/ip.h>
41#include <net/protocol.h>
42#include <net/route.h>
43#include <net/tcp.h>
44#include <net/sock.h>
45#include <net/icmp.h>
46#include <net/arp.h>
47#include <net/ip_fib.h>
48#include <net/rtnetlink.h>
49
50#define FFprint(a...) printk(KERN_DEBUG a)
51
52#ifndef CONFIG_IP_MULTIPLE_TABLES
53
54static int __net_init fib4_rules_init(struct net *net)
55{
56	struct fib_table *local_table, *main_table;
57
58	local_table = fib_hash_init(RT_TABLE_LOCAL);
59	if (local_table == NULL)
60		return -ENOMEM;
61
62	main_table  = fib_hash_init(RT_TABLE_MAIN);
63	if (main_table == NULL)
64		goto fail;
65
66	hlist_add_head_rcu(&local_table->tb_hlist,
67				&net->ipv4.fib_table_hash[TABLE_LOCAL_INDEX]);
68	hlist_add_head_rcu(&main_table->tb_hlist,
69				&net->ipv4.fib_table_hash[TABLE_MAIN_INDEX]);
70	return 0;
71
72fail:
73	kfree(local_table);
74	return -ENOMEM;
75}
76#else
77
78struct fib_table *fib_new_table(struct net *net, u32 id)
79{
80	struct fib_table *tb;
81	unsigned int h;
82
83	if (id == 0)
84		id = RT_TABLE_MAIN;
85	tb = fib_get_table(net, id);
86	if (tb)
87		return tb;
88	tb = fib_hash_init(id);
89	if (!tb)
90		return NULL;
91	h = id & (FIB_TABLE_HASHSZ - 1);
92	hlist_add_head_rcu(&tb->tb_hlist, &net->ipv4.fib_table_hash[h]);
93	return tb;
94}
95
96struct fib_table *fib_get_table(struct net *net, u32 id)
97{
98	struct fib_table *tb;
99	struct hlist_node *node;
100	struct hlist_head *head;
101	unsigned int h;
102
103	if (id == 0)
104		id = RT_TABLE_MAIN;
105	h = id & (FIB_TABLE_HASHSZ - 1);
106
107	rcu_read_lock();
108	head = &net->ipv4.fib_table_hash[h];
109	hlist_for_each_entry_rcu(tb, node, head, tb_hlist) {
110		if (tb->tb_id == id) {
111			rcu_read_unlock();
112			return tb;
113		}
114	}
115	rcu_read_unlock();
116	return NULL;
117}
118#endif /* CONFIG_IP_MULTIPLE_TABLES */
119
120static void fib_flush(struct net *net)
121{
122	int flushed = 0;
123	struct fib_table *tb;
124	struct hlist_node *node;
125	struct hlist_head *head;
126	unsigned int h;
127
128	for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
129		head = &net->ipv4.fib_table_hash[h];
130		hlist_for_each_entry(tb, node, head, tb_hlist)
131			flushed += tb->tb_flush(tb);
132	}
133
134	if (flushed)
135		rt_cache_flush(-1);
136}
137
138/*
139 *	Find the first device with a given source address.
140 */
141
142struct net_device * ip_dev_find(__be32 addr)
143{
144	struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
145	struct fib_result res;
146	struct net_device *dev = NULL;
147	struct fib_table *local_table;
148
149#ifdef CONFIG_IP_MULTIPLE_TABLES
150	res.r = NULL;
151#endif
152
153	local_table = fib_get_table(&init_net, RT_TABLE_LOCAL);
154	if (!local_table || local_table->tb_lookup(local_table, &fl, &res))
155		return NULL;
156	if (res.type != RTN_LOCAL)
157		goto out;
158	dev = FIB_RES_DEV(res);
159
160	if (dev)
161		dev_hold(dev);
162out:
163	fib_res_put(&res);
164	return dev;
165}
166
167/*
168 * Find address type as if only "dev" was present in the system. If
169 * on_dev is NULL then all interfaces are taken into consideration.
170 */
171static inline unsigned __inet_dev_addr_type(struct net *net,
172					    const struct net_device *dev,
173					    __be32 addr)
174{
175	struct flowi		fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
176	struct fib_result	res;
177	unsigned ret = RTN_BROADCAST;
178	struct fib_table *local_table;
179
180	if (ipv4_is_zeronet(addr) || ipv4_is_badclass(addr))
181		return RTN_BROADCAST;
182	if (ipv4_is_multicast(addr))
183		return RTN_MULTICAST;
184
185#ifdef CONFIG_IP_MULTIPLE_TABLES
186	res.r = NULL;
187#endif
188
189	local_table = fib_get_table(net, RT_TABLE_LOCAL);
190	if (local_table) {
191		ret = RTN_UNICAST;
192		if (!local_table->tb_lookup(local_table, &fl, &res)) {
193			if (!dev || dev == res.fi->fib_dev)
194				ret = res.type;
195			fib_res_put(&res);
196		}
197	}
198	return ret;
199}
200
201unsigned int inet_addr_type(struct net *net, __be32 addr)
202{
203	return __inet_dev_addr_type(net, NULL, addr);
204}
205
206unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev,
207				__be32 addr)
208{
209       return __inet_dev_addr_type(net, dev, addr);
210}
211
212/* Given (packet source, input interface) and optional (dst, oif, tos):
213   - (main) check, that source is valid i.e. not broadcast or our local
214     address.
215   - figure out what "logical" interface this packet arrived
216     and calculate "specific destination" address.
217   - check, that packet arrived from expected physical interface.
218 */
219
220int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
221			struct net_device *dev, __be32 *spec_dst, u32 *itag)
222{
223	struct in_device *in_dev;
224	struct flowi fl = { .nl_u = { .ip4_u =
225				      { .daddr = src,
226					.saddr = dst,
227					.tos = tos } },
228			    .iif = oif };
229	struct fib_result res;
230	int no_addr, rpf;
231	int ret;
232
233	no_addr = rpf = 0;
234	rcu_read_lock();
235	in_dev = __in_dev_get_rcu(dev);
236	if (in_dev) {
237		no_addr = in_dev->ifa_list == NULL;
238		rpf = IN_DEV_RPFILTER(in_dev);
239	}
240	rcu_read_unlock();
241
242	if (in_dev == NULL)
243		goto e_inval;
244
245	if (fib_lookup(&fl, &res))
246		goto last_resort;
247	if (res.type != RTN_UNICAST)
248		goto e_inval_res;
249	*spec_dst = FIB_RES_PREFSRC(res);
250	fib_combine_itag(itag, &res);
251#ifdef CONFIG_IP_ROUTE_MULTIPATH
252	if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1)
253#else
254	if (FIB_RES_DEV(res) == dev)
255#endif
256	{
257		ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
258		fib_res_put(&res);
259		return ret;
260	}
261	fib_res_put(&res);
262	if (no_addr)
263		goto last_resort;
264	if (rpf)
265		goto e_inval;
266	fl.oif = dev->ifindex;
267
268	ret = 0;
269	if (fib_lookup(&fl, &res) == 0) {
270		if (res.type == RTN_UNICAST) {
271			*spec_dst = FIB_RES_PREFSRC(res);
272			ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
273		}
274		fib_res_put(&res);
275	}
276	return ret;
277
278last_resort:
279	if (rpf)
280		goto e_inval;
281	*spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
282	*itag = 0;
283	return 0;
284
285e_inval_res:
286	fib_res_put(&res);
287e_inval:
288	return -EINVAL;
289}
290
291static inline __be32 sk_extract_addr(struct sockaddr *addr)
292{
293	return ((struct sockaddr_in *) addr)->sin_addr.s_addr;
294}
295
296static int put_rtax(struct nlattr *mx, int len, int type, u32 value)
297{
298	struct nlattr *nla;
299
300	nla = (struct nlattr *) ((char *) mx + len);
301	nla->nla_type = type;
302	nla->nla_len = nla_attr_size(4);
303	*(u32 *) nla_data(nla) = value;
304
305	return len + nla_total_size(4);
306}
307
308static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt,
309				 struct fib_config *cfg)
310{
311	__be32 addr;
312	int plen;
313
314	memset(cfg, 0, sizeof(*cfg));
315	cfg->fc_nlinfo.nl_net = net;
316
317	if (rt->rt_dst.sa_family != AF_INET)
318		return -EAFNOSUPPORT;
319
320	/*
321	 * Check mask for validity:
322	 * a) it must be contiguous.
323	 * b) destination must have all host bits clear.
324	 * c) if application forgot to set correct family (AF_INET),
325	 *    reject request unless it is absolutely clear i.e.
326	 *    both family and mask are zero.
327	 */
328	plen = 32;
329	addr = sk_extract_addr(&rt->rt_dst);
330	if (!(rt->rt_flags & RTF_HOST)) {
331		__be32 mask = sk_extract_addr(&rt->rt_genmask);
332
333		if (rt->rt_genmask.sa_family != AF_INET) {
334			if (mask || rt->rt_genmask.sa_family)
335				return -EAFNOSUPPORT;
336		}
337
338		if (bad_mask(mask, addr))
339			return -EINVAL;
340
341		plen = inet_mask_len(mask);
342	}
343
344	cfg->fc_dst_len = plen;
345	cfg->fc_dst = addr;
346
347	if (cmd != SIOCDELRT) {
348		cfg->fc_nlflags = NLM_F_CREATE;
349		cfg->fc_protocol = RTPROT_BOOT;
350	}
351
352	if (rt->rt_metric)
353		cfg->fc_priority = rt->rt_metric - 1;
354
355	if (rt->rt_flags & RTF_REJECT) {
356		cfg->fc_scope = RT_SCOPE_HOST;
357		cfg->fc_type = RTN_UNREACHABLE;
358		return 0;
359	}
360
361	cfg->fc_scope = RT_SCOPE_NOWHERE;
362	cfg->fc_type = RTN_UNICAST;
363
364	if (rt->rt_dev) {
365		char *colon;
366		struct net_device *dev;
367		char devname[IFNAMSIZ];
368
369		if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1))
370			return -EFAULT;
371
372		devname[IFNAMSIZ-1] = 0;
373		colon = strchr(devname, ':');
374		if (colon)
375			*colon = 0;
376		dev = __dev_get_by_name(net, devname);
377		if (!dev)
378			return -ENODEV;
379		cfg->fc_oif = dev->ifindex;
380		if (colon) {
381			struct in_ifaddr *ifa;
382			struct in_device *in_dev = __in_dev_get_rtnl(dev);
383			if (!in_dev)
384				return -ENODEV;
385			*colon = ':';
386			for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
387				if (strcmp(ifa->ifa_label, devname) == 0)
388					break;
389			if (ifa == NULL)
390				return -ENODEV;
391			cfg->fc_prefsrc = ifa->ifa_local;
392		}
393	}
394
395	addr = sk_extract_addr(&rt->rt_gateway);
396	if (rt->rt_gateway.sa_family == AF_INET && addr) {
397		cfg->fc_gw = addr;
398		if (rt->rt_flags & RTF_GATEWAY &&
399		    inet_addr_type(net, addr) == RTN_UNICAST)
400			cfg->fc_scope = RT_SCOPE_UNIVERSE;
401	}
402
403	if (cmd == SIOCDELRT)
404		return 0;
405
406	if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw)
407		return -EINVAL;
408
409	if (cfg->fc_scope == RT_SCOPE_NOWHERE)
410		cfg->fc_scope = RT_SCOPE_LINK;
411
412	if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) {
413		struct nlattr *mx;
414		int len = 0;
415
416		mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL);
417		if (mx == NULL)
418			return -ENOMEM;
419
420		if (rt->rt_flags & RTF_MTU)
421			len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40);
422
423		if (rt->rt_flags & RTF_WINDOW)
424			len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window);
425
426		if (rt->rt_flags & RTF_IRTT)
427			len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3);
428
429		cfg->fc_mx = mx;
430		cfg->fc_mx_len = len;
431	}
432
433	return 0;
434}
435
436/*
437 *	Handle IP routing ioctl calls. These are used to manipulate the routing tables
438 */
439
440int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg)
441{
442	struct fib_config cfg;
443	struct rtentry rt;
444	int err;
445
446	switch (cmd) {
447	case SIOCADDRT:		/* Add a route */
448	case SIOCDELRT:		/* Delete a route */
449		if (!capable(CAP_NET_ADMIN))
450			return -EPERM;
451
452		if (copy_from_user(&rt, arg, sizeof(rt)))
453			return -EFAULT;
454
455		rtnl_lock();
456		err = rtentry_to_fib_config(net, cmd, &rt, &cfg);
457		if (err == 0) {
458			struct fib_table *tb;
459
460			if (cmd == SIOCDELRT) {
461				tb = fib_get_table(net, cfg.fc_table);
462				if (tb)
463					err = tb->tb_delete(tb, &cfg);
464				else
465					err = -ESRCH;
466			} else {
467				tb = fib_new_table(net, cfg.fc_table);
468				if (tb)
469					err = tb->tb_insert(tb, &cfg);
470				else
471					err = -ENOBUFS;
472			}
473
474			/* allocated by rtentry_to_fib_config() */
475			kfree(cfg.fc_mx);
476		}
477		rtnl_unlock();
478		return err;
479	}
480	return -EINVAL;
481}
482
483const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = {
484	[RTA_DST]		= { .type = NLA_U32 },
485	[RTA_SRC]		= { .type = NLA_U32 },
486	[RTA_IIF]		= { .type = NLA_U32 },
487	[RTA_OIF]		= { .type = NLA_U32 },
488	[RTA_GATEWAY]		= { .type = NLA_U32 },
489	[RTA_PRIORITY]		= { .type = NLA_U32 },
490	[RTA_PREFSRC]		= { .type = NLA_U32 },
491	[RTA_METRICS]		= { .type = NLA_NESTED },
492	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
493	[RTA_PROTOINFO]		= { .type = NLA_U32 },
494	[RTA_FLOW]		= { .type = NLA_U32 },
495};
496
497static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
498			    struct nlmsghdr *nlh, struct fib_config *cfg)
499{
500	struct nlattr *attr;
501	int err, remaining;
502	struct rtmsg *rtm;
503
504	err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy);
505	if (err < 0)
506		goto errout;
507
508	memset(cfg, 0, sizeof(*cfg));
509
510	rtm = nlmsg_data(nlh);
511	cfg->fc_dst_len = rtm->rtm_dst_len;
512	cfg->fc_tos = rtm->rtm_tos;
513	cfg->fc_table = rtm->rtm_table;
514	cfg->fc_protocol = rtm->rtm_protocol;
515	cfg->fc_scope = rtm->rtm_scope;
516	cfg->fc_type = rtm->rtm_type;
517	cfg->fc_flags = rtm->rtm_flags;
518	cfg->fc_nlflags = nlh->nlmsg_flags;
519
520	cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
521	cfg->fc_nlinfo.nlh = nlh;
522	cfg->fc_nlinfo.nl_net = net;
523
524	if (cfg->fc_type > RTN_MAX) {
525		err = -EINVAL;
526		goto errout;
527	}
528
529	nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) {
530		switch (nla_type(attr)) {
531		case RTA_DST:
532			cfg->fc_dst = nla_get_be32(attr);
533			break;
534		case RTA_OIF:
535			cfg->fc_oif = nla_get_u32(attr);
536			break;
537		case RTA_GATEWAY:
538			cfg->fc_gw = nla_get_be32(attr);
539			break;
540		case RTA_PRIORITY:
541			cfg->fc_priority = nla_get_u32(attr);
542			break;
543		case RTA_PREFSRC:
544			cfg->fc_prefsrc = nla_get_be32(attr);
545			break;
546		case RTA_METRICS:
547			cfg->fc_mx = nla_data(attr);
548			cfg->fc_mx_len = nla_len(attr);
549			break;
550		case RTA_MULTIPATH:
551			cfg->fc_mp = nla_data(attr);
552			cfg->fc_mp_len = nla_len(attr);
553			break;
554		case RTA_FLOW:
555			cfg->fc_flow = nla_get_u32(attr);
556			break;
557		case RTA_TABLE:
558			cfg->fc_table = nla_get_u32(attr);
559			break;
560		}
561	}
562
563	return 0;
564errout:
565	return err;
566}
567
568static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
569{
570	struct net *net = skb->sk->sk_net;
571	struct fib_config cfg;
572	struct fib_table *tb;
573	int err;
574
575	if (net != &init_net)
576		return -EINVAL;
577
578	err = rtm_to_fib_config(net, skb, nlh, &cfg);
579	if (err < 0)
580		goto errout;
581
582	tb = fib_get_table(net, cfg.fc_table);
583	if (tb == NULL) {
584		err = -ESRCH;
585		goto errout;
586	}
587
588	err = tb->tb_delete(tb, &cfg);
589errout:
590	return err;
591}
592
593static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
594{
595	struct net *net = skb->sk->sk_net;
596	struct fib_config cfg;
597	struct fib_table *tb;
598	int err;
599
600	if (net != &init_net)
601		return -EINVAL;
602
603	err = rtm_to_fib_config(net, skb, nlh, &cfg);
604	if (err < 0)
605		goto errout;
606
607	tb = fib_new_table(net, cfg.fc_table);
608	if (tb == NULL) {
609		err = -ENOBUFS;
610		goto errout;
611	}
612
613	err = tb->tb_insert(tb, &cfg);
614errout:
615	return err;
616}
617
618static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
619{
620	struct net *net = skb->sk->sk_net;
621	unsigned int h, s_h;
622	unsigned int e = 0, s_e;
623	struct fib_table *tb;
624	struct hlist_node *node;
625	struct hlist_head *head;
626	int dumped = 0;
627
628	if (net != &init_net)
629		return 0;
630
631	if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
632	    ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
633		return ip_rt_dump(skb, cb);
634
635	s_h = cb->args[0];
636	s_e = cb->args[1];
637
638	for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
639		e = 0;
640		head = &net->ipv4.fib_table_hash[h];
641		hlist_for_each_entry(tb, node, head, tb_hlist) {
642			if (e < s_e)
643				goto next;
644			if (dumped)
645				memset(&cb->args[2], 0, sizeof(cb->args) -
646						 2 * sizeof(cb->args[0]));
647			if (tb->tb_dump(tb, skb, cb) < 0)
648				goto out;
649			dumped = 1;
650next:
651			e++;
652		}
653	}
654out:
655	cb->args[1] = e;
656	cb->args[0] = h;
657
658	return skb->len;
659}
660
661/* Prepare and feed intra-kernel routing request.
662   Really, it should be netlink message, but :-( netlink
663   can be not configured, so that we feed it directly
664   to fib engine. It is legal, because all events occur
665   only when netlink is already locked.
666 */
667
668static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
669{
670	struct net *net = ifa->ifa_dev->dev->nd_net;
671	struct fib_table *tb;
672	struct fib_config cfg = {
673		.fc_protocol = RTPROT_KERNEL,
674		.fc_type = type,
675		.fc_dst = dst,
676		.fc_dst_len = dst_len,
677		.fc_prefsrc = ifa->ifa_local,
678		.fc_oif = ifa->ifa_dev->dev->ifindex,
679		.fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
680		.fc_nlinfo = {
681			.nl_net = net,
682		},
683	};
684
685	if (type == RTN_UNICAST)
686		tb = fib_new_table(net, RT_TABLE_MAIN);
687	else
688		tb = fib_new_table(net, RT_TABLE_LOCAL);
689
690	if (tb == NULL)
691		return;
692
693	cfg.fc_table = tb->tb_id;
694
695	if (type != RTN_LOCAL)
696		cfg.fc_scope = RT_SCOPE_LINK;
697	else
698		cfg.fc_scope = RT_SCOPE_HOST;
699
700	if (cmd == RTM_NEWROUTE)
701		tb->tb_insert(tb, &cfg);
702	else
703		tb->tb_delete(tb, &cfg);
704}
705
706void fib_add_ifaddr(struct in_ifaddr *ifa)
707{
708	struct in_device *in_dev = ifa->ifa_dev;
709	struct net_device *dev = in_dev->dev;
710	struct in_ifaddr *prim = ifa;
711	__be32 mask = ifa->ifa_mask;
712	__be32 addr = ifa->ifa_local;
713	__be32 prefix = ifa->ifa_address&mask;
714
715	if (ifa->ifa_flags&IFA_F_SECONDARY) {
716		prim = inet_ifa_byprefix(in_dev, prefix, mask);
717		if (prim == NULL) {
718			printk(KERN_DEBUG "fib_add_ifaddr: bug: prim == NULL\n");
719			return;
720		}
721	}
722
723	fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
724
725	if (!(dev->flags&IFF_UP))
726		return;
727
728	/* Add broadcast address, if it is explicitly assigned. */
729	if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
730		fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
731
732	if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
733	    (prefix != addr || ifa->ifa_prefixlen < 32)) {
734		fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
735			  RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim);
736
737		/* Add network specific broadcasts, when it takes a sense */
738		if (ifa->ifa_prefixlen < 31) {
739			fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
740			fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim);
741		}
742	}
743}
744
745static void fib_del_ifaddr(struct in_ifaddr *ifa)
746{
747	struct in_device *in_dev = ifa->ifa_dev;
748	struct net_device *dev = in_dev->dev;
749	struct in_ifaddr *ifa1;
750	struct in_ifaddr *prim = ifa;
751	__be32 brd = ifa->ifa_address|~ifa->ifa_mask;
752	__be32 any = ifa->ifa_address&ifa->ifa_mask;
753#define LOCAL_OK	1
754#define BRD_OK		2
755#define BRD0_OK		4
756#define BRD1_OK		8
757	unsigned ok = 0;
758
759	if (!(ifa->ifa_flags&IFA_F_SECONDARY))
760		fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
761			  RTN_UNICAST, any, ifa->ifa_prefixlen, prim);
762	else {
763		prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
764		if (prim == NULL) {
765			printk(KERN_DEBUG "fib_del_ifaddr: bug: prim == NULL\n");
766			return;
767		}
768	}
769
770	/* Deletion is more complicated than add.
771	   We should take care of not to delete too much :-)
772
773	   Scan address list to be sure that addresses are really gone.
774	 */
775
776	for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
777		if (ifa->ifa_local == ifa1->ifa_local)
778			ok |= LOCAL_OK;
779		if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
780			ok |= BRD_OK;
781		if (brd == ifa1->ifa_broadcast)
782			ok |= BRD1_OK;
783		if (any == ifa1->ifa_broadcast)
784			ok |= BRD0_OK;
785	}
786
787	if (!(ok&BRD_OK))
788		fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
789	if (!(ok&BRD1_OK))
790		fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
791	if (!(ok&BRD0_OK))
792		fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
793	if (!(ok&LOCAL_OK)) {
794		fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
795
796		/* Check, that this local address finally disappeared. */
797		if (inet_addr_type(dev->nd_net, ifa->ifa_local) != RTN_LOCAL) {
798			/* And the last, but not the least thing.
799			   We must flush stray FIB entries.
800
801			   First of all, we scan fib_info list searching
802			   for stray nexthop entries, then ignite fib_flush.
803			*/
804			if (fib_sync_down(ifa->ifa_local, NULL, 0))
805				fib_flush(dev->nd_net);
806		}
807	}
808#undef LOCAL_OK
809#undef BRD_OK
810#undef BRD0_OK
811#undef BRD1_OK
812}
813
814static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
815{
816
817	struct fib_result       res;
818	struct flowi            fl = { .mark = frn->fl_mark,
819				       .nl_u = { .ip4_u = { .daddr = frn->fl_addr,
820							    .tos = frn->fl_tos,
821							    .scope = frn->fl_scope } } };
822
823#ifdef CONFIG_IP_MULTIPLE_TABLES
824	res.r = NULL;
825#endif
826
827	frn->err = -ENOENT;
828	if (tb) {
829		local_bh_disable();
830
831		frn->tb_id = tb->tb_id;
832		frn->err = tb->tb_lookup(tb, &fl, &res);
833
834		if (!frn->err) {
835			frn->prefixlen = res.prefixlen;
836			frn->nh_sel = res.nh_sel;
837			frn->type = res.type;
838			frn->scope = res.scope;
839			fib_res_put(&res);
840		}
841		local_bh_enable();
842	}
843}
844
845static void nl_fib_input(struct sk_buff *skb)
846{
847	struct net *net;
848	struct fib_result_nl *frn;
849	struct nlmsghdr *nlh;
850	struct fib_table *tb;
851	u32 pid;
852
853	net = skb->sk->sk_net;
854	nlh = nlmsg_hdr(skb);
855	if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
856	    nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn)))
857		return;
858
859	skb = skb_clone(skb, GFP_KERNEL);
860	if (skb == NULL)
861		return;
862	nlh = nlmsg_hdr(skb);
863
864	frn = (struct fib_result_nl *) NLMSG_DATA(nlh);
865	tb = fib_get_table(net, frn->tb_id_in);
866
867	nl_fib_lookup(frn, tb);
868
869	pid = NETLINK_CB(skb).pid;       /* pid of sending process */
870	NETLINK_CB(skb).pid = 0;         /* from kernel */
871	NETLINK_CB(skb).dst_group = 0;  /* unicast */
872	netlink_unicast(net->ipv4.fibnl, skb, pid, MSG_DONTWAIT);
873}
874
875static int nl_fib_lookup_init(struct net *net)
876{
877	struct sock *sk;
878	sk = netlink_kernel_create(net, NETLINK_FIB_LOOKUP, 0,
879				   nl_fib_input, NULL, THIS_MODULE);
880	if (sk == NULL)
881		return -EAFNOSUPPORT;
882	/* Don't hold an extra reference on the namespace */
883	put_net(sk->sk_net);
884	net->ipv4.fibnl = sk;
885	return 0;
886}
887
888static void nl_fib_lookup_exit(struct net *net)
889{
890	/* At the last minute lie and say this is a socket for the
891	 * initial network namespace. So the socket will  be safe to free.
892	 */
893	net->ipv4.fibnl->sk_net = get_net(&init_net);
894	sock_put(net->ipv4.fibnl);
895}
896
897static void fib_disable_ip(struct net_device *dev, int force)
898{
899	if (fib_sync_down(0, dev, force))
900		fib_flush(dev->nd_net);
901	rt_cache_flush(0);
902	arp_ifdown(dev);
903}
904
905static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
906{
907	struct in_ifaddr *ifa = (struct in_ifaddr*)ptr;
908
909	switch (event) {
910	case NETDEV_UP:
911		fib_add_ifaddr(ifa);
912#ifdef CONFIG_IP_ROUTE_MULTIPATH
913		fib_sync_up(ifa->ifa_dev->dev);
914#endif
915		rt_cache_flush(-1);
916		break;
917	case NETDEV_DOWN:
918		fib_del_ifaddr(ifa);
919		if (ifa->ifa_dev->ifa_list == NULL) {
920			/* Last address was deleted from this interface.
921			   Disable IP.
922			 */
923			fib_disable_ip(ifa->ifa_dev->dev, 1);
924		} else {
925			rt_cache_flush(-1);
926		}
927		break;
928	}
929	return NOTIFY_DONE;
930}
931
932static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
933{
934	struct net_device *dev = ptr;
935	struct in_device *in_dev = __in_dev_get_rtnl(dev);
936
937	if (dev->nd_net != &init_net)
938		return NOTIFY_DONE;
939
940	if (event == NETDEV_UNREGISTER) {
941		fib_disable_ip(dev, 2);
942		return NOTIFY_DONE;
943	}
944
945	if (!in_dev)
946		return NOTIFY_DONE;
947
948	switch (event) {
949	case NETDEV_UP:
950		for_ifa(in_dev) {
951			fib_add_ifaddr(ifa);
952		} endfor_ifa(in_dev);
953#ifdef CONFIG_IP_ROUTE_MULTIPATH
954		fib_sync_up(dev);
955#endif
956		rt_cache_flush(-1);
957		break;
958	case NETDEV_DOWN:
959		fib_disable_ip(dev, 0);
960		break;
961	case NETDEV_CHANGEMTU:
962	case NETDEV_CHANGE:
963		rt_cache_flush(0);
964		break;
965	}
966	return NOTIFY_DONE;
967}
968
969static struct notifier_block fib_inetaddr_notifier = {
970	.notifier_call =fib_inetaddr_event,
971};
972
973static struct notifier_block fib_netdev_notifier = {
974	.notifier_call =fib_netdev_event,
975};
976
977static int __net_init ip_fib_net_init(struct net *net)
978{
979	unsigned int i;
980
981	net->ipv4.fib_table_hash = kzalloc(
982			sizeof(struct hlist_head)*FIB_TABLE_HASHSZ, GFP_KERNEL);
983	if (net->ipv4.fib_table_hash == NULL)
984		return -ENOMEM;
985
986	for (i = 0; i < FIB_TABLE_HASHSZ; i++)
987		INIT_HLIST_HEAD(&net->ipv4.fib_table_hash[i]);
988
989	return fib4_rules_init(net);
990}
991
992static void __net_exit ip_fib_net_exit(struct net *net)
993{
994	unsigned int i;
995
996#ifdef CONFIG_IP_MULTIPLE_TABLES
997	fib4_rules_exit(net);
998#endif
999
1000	for (i = 0; i < FIB_TABLE_HASHSZ; i++) {
1001		struct fib_table *tb;
1002		struct hlist_head *head;
1003		struct hlist_node *node, *tmp;
1004
1005		head = &net->ipv4.fib_table_hash[i];
1006		hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) {
1007			hlist_del(node);
1008			tb->tb_flush(tb);
1009			kfree(tb);
1010		}
1011	}
1012	kfree(net->ipv4.fib_table_hash);
1013}
1014
1015static int __net_init fib_net_init(struct net *net)
1016{
1017	int error;
1018
1019	error = 0;
1020	if (net != &init_net)
1021		goto out;
1022
1023	error = ip_fib_net_init(net);
1024	if (error < 0)
1025		goto out;
1026	error = nl_fib_lookup_init(net);
1027	if (error < 0)
1028		goto out_nlfl;
1029	error = fib_proc_init(net);
1030	if (error < 0)
1031		goto out_proc;
1032out:
1033	return error;
1034
1035out_proc:
1036	nl_fib_lookup_exit(net);
1037out_nlfl:
1038	ip_fib_net_exit(net);
1039	goto out;
1040}
1041
1042static void __net_exit fib_net_exit(struct net *net)
1043{
1044	fib_proc_exit(net);
1045	nl_fib_lookup_exit(net);
1046	ip_fib_net_exit(net);
1047}
1048
1049static struct pernet_operations fib_net_ops = {
1050	.init = fib_net_init,
1051	.exit = fib_net_exit,
1052};
1053
1054void __init ip_fib_init(void)
1055{
1056	rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL);
1057	rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL);
1058	rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib);
1059
1060	register_pernet_subsys(&fib_net_ops);
1061	register_netdevice_notifier(&fib_netdev_notifier);
1062	register_inetaddr_notifier(&fib_inetaddr_notifier);
1063}
1064
1065EXPORT_SYMBOL(inet_addr_type);
1066EXPORT_SYMBOL(inet_dev_addr_type);
1067EXPORT_SYMBOL(ip_dev_find);
1068