fib_frontend.c revision 8153a10c08f1312af563bb92532002e46d3f504a
1/*
2 * INET		An implementation of the TCP/IP protocol suite for the LINUX
3 *		operating system.  INET is implemented using the  BSD Socket
4 *		interface as the means of communication with the user level.
5 *
6 *		IPv4 Forwarding Information Base: FIB frontend.
7 *
8 * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
9 *
10 *		This program is free software; you can redistribute it and/or
11 *		modify it under the terms of the GNU General Public License
12 *		as published by the Free Software Foundation; either version
13 *		2 of the License, or (at your option) any later version.
14 */
15
16#include <linux/module.h>
17#include <asm/uaccess.h>
18#include <asm/system.h>
19#include <linux/bitops.h>
20#include <linux/capability.h>
21#include <linux/types.h>
22#include <linux/kernel.h>
23#include <linux/mm.h>
24#include <linux/string.h>
25#include <linux/socket.h>
26#include <linux/sockios.h>
27#include <linux/errno.h>
28#include <linux/in.h>
29#include <linux/inet.h>
30#include <linux/inetdevice.h>
31#include <linux/netdevice.h>
32#include <linux/if_addr.h>
33#include <linux/if_arp.h>
34#include <linux/skbuff.h>
35#include <linux/init.h>
36#include <linux/list.h>
37
38#include <net/ip.h>
39#include <net/protocol.h>
40#include <net/route.h>
41#include <net/tcp.h>
42#include <net/sock.h>
43#include <net/arp.h>
44#include <net/ip_fib.h>
45#include <net/rtnetlink.h>
46
47#ifndef CONFIG_IP_MULTIPLE_TABLES
48
49static int __net_init fib4_rules_init(struct net *net)
50{
51	struct fib_table *local_table, *main_table;
52
53	local_table = fib_hash_table(RT_TABLE_LOCAL);
54	if (local_table == NULL)
55		return -ENOMEM;
56
57	main_table  = fib_hash_table(RT_TABLE_MAIN);
58	if (main_table == NULL)
59		goto fail;
60
61	hlist_add_head_rcu(&local_table->tb_hlist,
62				&net->ipv4.fib_table_hash[TABLE_LOCAL_INDEX]);
63	hlist_add_head_rcu(&main_table->tb_hlist,
64				&net->ipv4.fib_table_hash[TABLE_MAIN_INDEX]);
65	return 0;
66
67fail:
68	kfree(local_table);
69	return -ENOMEM;
70}
71#else
72
73struct fib_table *fib_new_table(struct net *net, u32 id)
74{
75	struct fib_table *tb;
76	unsigned int h;
77
78	if (id == 0)
79		id = RT_TABLE_MAIN;
80	tb = fib_get_table(net, id);
81	if (tb)
82		return tb;
83
84	tb = fib_hash_table(id);
85	if (!tb)
86		return NULL;
87	h = id & (FIB_TABLE_HASHSZ - 1);
88	hlist_add_head_rcu(&tb->tb_hlist, &net->ipv4.fib_table_hash[h]);
89	return tb;
90}
91
92struct fib_table *fib_get_table(struct net *net, u32 id)
93{
94	struct fib_table *tb;
95	struct hlist_node *node;
96	struct hlist_head *head;
97	unsigned int h;
98
99	if (id == 0)
100		id = RT_TABLE_MAIN;
101	h = id & (FIB_TABLE_HASHSZ - 1);
102
103	rcu_read_lock();
104	head = &net->ipv4.fib_table_hash[h];
105	hlist_for_each_entry_rcu(tb, node, head, tb_hlist) {
106		if (tb->tb_id == id) {
107			rcu_read_unlock();
108			return tb;
109		}
110	}
111	rcu_read_unlock();
112	return NULL;
113}
114#endif /* CONFIG_IP_MULTIPLE_TABLES */
115
116void fib_select_default(struct net *net,
117			const struct flowi *flp, struct fib_result *res)
118{
119	struct fib_table *tb;
120	int table = RT_TABLE_MAIN;
121#ifdef CONFIG_IP_MULTIPLE_TABLES
122	if (res->r == NULL || res->r->action != FR_ACT_TO_TBL)
123		return;
124	table = res->r->table;
125#endif
126	tb = fib_get_table(net, table);
127	if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
128		fib_table_select_default(tb, flp, res);
129}
130
131static void fib_flush(struct net *net)
132{
133	int flushed = 0;
134	struct fib_table *tb;
135	struct hlist_node *node;
136	struct hlist_head *head;
137	unsigned int h;
138
139	for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
140		head = &net->ipv4.fib_table_hash[h];
141		hlist_for_each_entry(tb, node, head, tb_hlist)
142			flushed += fib_table_flush(tb);
143	}
144
145	if (flushed)
146		rt_cache_flush(net, -1);
147}
148
149/*
150 *	Find the first device with a given source address.
151 */
152
153struct net_device * ip_dev_find(struct net *net, __be32 addr)
154{
155	struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
156	struct fib_result res;
157	struct net_device *dev = NULL;
158	struct fib_table *local_table;
159
160#ifdef CONFIG_IP_MULTIPLE_TABLES
161	res.r = NULL;
162#endif
163
164	local_table = fib_get_table(net, RT_TABLE_LOCAL);
165	if (!local_table || fib_table_lookup(local_table, &fl, &res))
166		return NULL;
167	if (res.type != RTN_LOCAL)
168		goto out;
169	dev = FIB_RES_DEV(res);
170
171	if (dev)
172		dev_hold(dev);
173out:
174	fib_res_put(&res);
175	return dev;
176}
177
178/*
179 * Find address type as if only "dev" was present in the system. If
180 * on_dev is NULL then all interfaces are taken into consideration.
181 */
182static inline unsigned __inet_dev_addr_type(struct net *net,
183					    const struct net_device *dev,
184					    __be32 addr)
185{
186	struct flowi		fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
187	struct fib_result	res;
188	unsigned ret = RTN_BROADCAST;
189	struct fib_table *local_table;
190
191	if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr))
192		return RTN_BROADCAST;
193	if (ipv4_is_multicast(addr))
194		return RTN_MULTICAST;
195
196#ifdef CONFIG_IP_MULTIPLE_TABLES
197	res.r = NULL;
198#endif
199
200	local_table = fib_get_table(net, RT_TABLE_LOCAL);
201	if (local_table) {
202		ret = RTN_UNICAST;
203		if (!fib_table_lookup(local_table, &fl, &res)) {
204			if (!dev || dev == res.fi->fib_dev)
205				ret = res.type;
206			fib_res_put(&res);
207		}
208	}
209	return ret;
210}
211
212unsigned int inet_addr_type(struct net *net, __be32 addr)
213{
214	return __inet_dev_addr_type(net, NULL, addr);
215}
216
217unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev,
218				__be32 addr)
219{
220       return __inet_dev_addr_type(net, dev, addr);
221}
222
223/* Given (packet source, input interface) and optional (dst, oif, tos):
224   - (main) check, that source is valid i.e. not broadcast or our local
225     address.
226   - figure out what "logical" interface this packet arrived
227     and calculate "specific destination" address.
228   - check, that packet arrived from expected physical interface.
229 */
230
231int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
232			struct net_device *dev, __be32 *spec_dst,
233			u32 *itag, u32 mark)
234{
235	struct in_device *in_dev;
236	struct flowi fl = { .nl_u = { .ip4_u =
237				      { .daddr = src,
238					.saddr = dst,
239					.tos = tos } },
240			    .mark = mark,
241			    .iif = oif };
242
243	struct fib_result res;
244	int no_addr, rpf, accept_local;
245	int ret;
246	struct net *net;
247
248	no_addr = rpf = accept_local = 0;
249	rcu_read_lock();
250	in_dev = __in_dev_get_rcu(dev);
251	if (in_dev) {
252		no_addr = in_dev->ifa_list == NULL;
253		rpf = IN_DEV_RPFILTER(in_dev);
254		accept_local = IN_DEV_ACCEPT_LOCAL(in_dev);
255	}
256	rcu_read_unlock();
257
258	if (in_dev == NULL)
259		goto e_inval;
260
261	net = dev_net(dev);
262	if (fib_lookup(net, &fl, &res))
263		goto last_resort;
264	if (res.type != RTN_UNICAST) {
265		if (res.type != RTN_LOCAL || !accept_local)
266			goto e_inval_res;
267	}
268	*spec_dst = FIB_RES_PREFSRC(res);
269	fib_combine_itag(itag, &res);
270#ifdef CONFIG_IP_ROUTE_MULTIPATH
271	if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1)
272#else
273	if (FIB_RES_DEV(res) == dev)
274#endif
275	{
276		ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
277		fib_res_put(&res);
278		return ret;
279	}
280	fib_res_put(&res);
281	if (no_addr)
282		goto last_resort;
283	if (rpf == 1)
284		goto e_inval;
285	fl.oif = dev->ifindex;
286
287	ret = 0;
288	if (fib_lookup(net, &fl, &res) == 0) {
289		if (res.type == RTN_UNICAST) {
290			*spec_dst = FIB_RES_PREFSRC(res);
291			ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
292		}
293		fib_res_put(&res);
294	}
295	return ret;
296
297last_resort:
298	if (rpf)
299		goto e_inval;
300	*spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
301	*itag = 0;
302	return 0;
303
304e_inval_res:
305	fib_res_put(&res);
306e_inval:
307	return -EINVAL;
308}
309
310static inline __be32 sk_extract_addr(struct sockaddr *addr)
311{
312	return ((struct sockaddr_in *) addr)->sin_addr.s_addr;
313}
314
315static int put_rtax(struct nlattr *mx, int len, int type, u32 value)
316{
317	struct nlattr *nla;
318
319	nla = (struct nlattr *) ((char *) mx + len);
320	nla->nla_type = type;
321	nla->nla_len = nla_attr_size(4);
322	*(u32 *) nla_data(nla) = value;
323
324	return len + nla_total_size(4);
325}
326
327static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt,
328				 struct fib_config *cfg)
329{
330	__be32 addr;
331	int plen;
332
333	memset(cfg, 0, sizeof(*cfg));
334	cfg->fc_nlinfo.nl_net = net;
335
336	if (rt->rt_dst.sa_family != AF_INET)
337		return -EAFNOSUPPORT;
338
339	/*
340	 * Check mask for validity:
341	 * a) it must be contiguous.
342	 * b) destination must have all host bits clear.
343	 * c) if application forgot to set correct family (AF_INET),
344	 *    reject request unless it is absolutely clear i.e.
345	 *    both family and mask are zero.
346	 */
347	plen = 32;
348	addr = sk_extract_addr(&rt->rt_dst);
349	if (!(rt->rt_flags & RTF_HOST)) {
350		__be32 mask = sk_extract_addr(&rt->rt_genmask);
351
352		if (rt->rt_genmask.sa_family != AF_INET) {
353			if (mask || rt->rt_genmask.sa_family)
354				return -EAFNOSUPPORT;
355		}
356
357		if (bad_mask(mask, addr))
358			return -EINVAL;
359
360		plen = inet_mask_len(mask);
361	}
362
363	cfg->fc_dst_len = plen;
364	cfg->fc_dst = addr;
365
366	if (cmd != SIOCDELRT) {
367		cfg->fc_nlflags = NLM_F_CREATE;
368		cfg->fc_protocol = RTPROT_BOOT;
369	}
370
371	if (rt->rt_metric)
372		cfg->fc_priority = rt->rt_metric - 1;
373
374	if (rt->rt_flags & RTF_REJECT) {
375		cfg->fc_scope = RT_SCOPE_HOST;
376		cfg->fc_type = RTN_UNREACHABLE;
377		return 0;
378	}
379
380	cfg->fc_scope = RT_SCOPE_NOWHERE;
381	cfg->fc_type = RTN_UNICAST;
382
383	if (rt->rt_dev) {
384		char *colon;
385		struct net_device *dev;
386		char devname[IFNAMSIZ];
387
388		if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1))
389			return -EFAULT;
390
391		devname[IFNAMSIZ-1] = 0;
392		colon = strchr(devname, ':');
393		if (colon)
394			*colon = 0;
395		dev = __dev_get_by_name(net, devname);
396		if (!dev)
397			return -ENODEV;
398		cfg->fc_oif = dev->ifindex;
399		if (colon) {
400			struct in_ifaddr *ifa;
401			struct in_device *in_dev = __in_dev_get_rtnl(dev);
402			if (!in_dev)
403				return -ENODEV;
404			*colon = ':';
405			for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
406				if (strcmp(ifa->ifa_label, devname) == 0)
407					break;
408			if (ifa == NULL)
409				return -ENODEV;
410			cfg->fc_prefsrc = ifa->ifa_local;
411		}
412	}
413
414	addr = sk_extract_addr(&rt->rt_gateway);
415	if (rt->rt_gateway.sa_family == AF_INET && addr) {
416		cfg->fc_gw = addr;
417		if (rt->rt_flags & RTF_GATEWAY &&
418		    inet_addr_type(net, addr) == RTN_UNICAST)
419			cfg->fc_scope = RT_SCOPE_UNIVERSE;
420	}
421
422	if (cmd == SIOCDELRT)
423		return 0;
424
425	if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw)
426		return -EINVAL;
427
428	if (cfg->fc_scope == RT_SCOPE_NOWHERE)
429		cfg->fc_scope = RT_SCOPE_LINK;
430
431	if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) {
432		struct nlattr *mx;
433		int len = 0;
434
435		mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL);
436		if (mx == NULL)
437			return -ENOMEM;
438
439		if (rt->rt_flags & RTF_MTU)
440			len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40);
441
442		if (rt->rt_flags & RTF_WINDOW)
443			len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window);
444
445		if (rt->rt_flags & RTF_IRTT)
446			len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3);
447
448		cfg->fc_mx = mx;
449		cfg->fc_mx_len = len;
450	}
451
452	return 0;
453}
454
455/*
456 *	Handle IP routing ioctl calls. These are used to manipulate the routing tables
457 */
458
459int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg)
460{
461	struct fib_config cfg;
462	struct rtentry rt;
463	int err;
464
465	switch (cmd) {
466	case SIOCADDRT:		/* Add a route */
467	case SIOCDELRT:		/* Delete a route */
468		if (!capable(CAP_NET_ADMIN))
469			return -EPERM;
470
471		if (copy_from_user(&rt, arg, sizeof(rt)))
472			return -EFAULT;
473
474		rtnl_lock();
475		err = rtentry_to_fib_config(net, cmd, &rt, &cfg);
476		if (err == 0) {
477			struct fib_table *tb;
478
479			if (cmd == SIOCDELRT) {
480				tb = fib_get_table(net, cfg.fc_table);
481				if (tb)
482					err = fib_table_delete(tb, &cfg);
483				else
484					err = -ESRCH;
485			} else {
486				tb = fib_new_table(net, cfg.fc_table);
487				if (tb)
488					err = fib_table_insert(tb, &cfg);
489				else
490					err = -ENOBUFS;
491			}
492
493			/* allocated by rtentry_to_fib_config() */
494			kfree(cfg.fc_mx);
495		}
496		rtnl_unlock();
497		return err;
498	}
499	return -EINVAL;
500}
501
502const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = {
503	[RTA_DST]		= { .type = NLA_U32 },
504	[RTA_SRC]		= { .type = NLA_U32 },
505	[RTA_IIF]		= { .type = NLA_U32 },
506	[RTA_OIF]		= { .type = NLA_U32 },
507	[RTA_GATEWAY]		= { .type = NLA_U32 },
508	[RTA_PRIORITY]		= { .type = NLA_U32 },
509	[RTA_PREFSRC]		= { .type = NLA_U32 },
510	[RTA_METRICS]		= { .type = NLA_NESTED },
511	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
512	[RTA_FLOW]		= { .type = NLA_U32 },
513};
514
515static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
516			    struct nlmsghdr *nlh, struct fib_config *cfg)
517{
518	struct nlattr *attr;
519	int err, remaining;
520	struct rtmsg *rtm;
521
522	err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy);
523	if (err < 0)
524		goto errout;
525
526	memset(cfg, 0, sizeof(*cfg));
527
528	rtm = nlmsg_data(nlh);
529	cfg->fc_dst_len = rtm->rtm_dst_len;
530	cfg->fc_tos = rtm->rtm_tos;
531	cfg->fc_table = rtm->rtm_table;
532	cfg->fc_protocol = rtm->rtm_protocol;
533	cfg->fc_scope = rtm->rtm_scope;
534	cfg->fc_type = rtm->rtm_type;
535	cfg->fc_flags = rtm->rtm_flags;
536	cfg->fc_nlflags = nlh->nlmsg_flags;
537
538	cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
539	cfg->fc_nlinfo.nlh = nlh;
540	cfg->fc_nlinfo.nl_net = net;
541
542	if (cfg->fc_type > RTN_MAX) {
543		err = -EINVAL;
544		goto errout;
545	}
546
547	nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) {
548		switch (nla_type(attr)) {
549		case RTA_DST:
550			cfg->fc_dst = nla_get_be32(attr);
551			break;
552		case RTA_OIF:
553			cfg->fc_oif = nla_get_u32(attr);
554			break;
555		case RTA_GATEWAY:
556			cfg->fc_gw = nla_get_be32(attr);
557			break;
558		case RTA_PRIORITY:
559			cfg->fc_priority = nla_get_u32(attr);
560			break;
561		case RTA_PREFSRC:
562			cfg->fc_prefsrc = nla_get_be32(attr);
563			break;
564		case RTA_METRICS:
565			cfg->fc_mx = nla_data(attr);
566			cfg->fc_mx_len = nla_len(attr);
567			break;
568		case RTA_MULTIPATH:
569			cfg->fc_mp = nla_data(attr);
570			cfg->fc_mp_len = nla_len(attr);
571			break;
572		case RTA_FLOW:
573			cfg->fc_flow = nla_get_u32(attr);
574			break;
575		case RTA_TABLE:
576			cfg->fc_table = nla_get_u32(attr);
577			break;
578		}
579	}
580
581	return 0;
582errout:
583	return err;
584}
585
586static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
587{
588	struct net *net = sock_net(skb->sk);
589	struct fib_config cfg;
590	struct fib_table *tb;
591	int err;
592
593	err = rtm_to_fib_config(net, skb, nlh, &cfg);
594	if (err < 0)
595		goto errout;
596
597	tb = fib_get_table(net, cfg.fc_table);
598	if (tb == NULL) {
599		err = -ESRCH;
600		goto errout;
601	}
602
603	err = fib_table_delete(tb, &cfg);
604errout:
605	return err;
606}
607
608static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
609{
610	struct net *net = sock_net(skb->sk);
611	struct fib_config cfg;
612	struct fib_table *tb;
613	int err;
614
615	err = rtm_to_fib_config(net, skb, nlh, &cfg);
616	if (err < 0)
617		goto errout;
618
619	tb = fib_new_table(net, cfg.fc_table);
620	if (tb == NULL) {
621		err = -ENOBUFS;
622		goto errout;
623	}
624
625	err = fib_table_insert(tb, &cfg);
626errout:
627	return err;
628}
629
630static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
631{
632	struct net *net = sock_net(skb->sk);
633	unsigned int h, s_h;
634	unsigned int e = 0, s_e;
635	struct fib_table *tb;
636	struct hlist_node *node;
637	struct hlist_head *head;
638	int dumped = 0;
639
640	if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
641	    ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
642		return ip_rt_dump(skb, cb);
643
644	s_h = cb->args[0];
645	s_e = cb->args[1];
646
647	for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
648		e = 0;
649		head = &net->ipv4.fib_table_hash[h];
650		hlist_for_each_entry(tb, node, head, tb_hlist) {
651			if (e < s_e)
652				goto next;
653			if (dumped)
654				memset(&cb->args[2], 0, sizeof(cb->args) -
655						 2 * sizeof(cb->args[0]));
656			if (fib_table_dump(tb, skb, cb) < 0)
657				goto out;
658			dumped = 1;
659next:
660			e++;
661		}
662	}
663out:
664	cb->args[1] = e;
665	cb->args[0] = h;
666
667	return skb->len;
668}
669
670/* Prepare and feed intra-kernel routing request.
671   Really, it should be netlink message, but :-( netlink
672   can be not configured, so that we feed it directly
673   to fib engine. It is legal, because all events occur
674   only when netlink is already locked.
675 */
676
677static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
678{
679	struct net *net = dev_net(ifa->ifa_dev->dev);
680	struct fib_table *tb;
681	struct fib_config cfg = {
682		.fc_protocol = RTPROT_KERNEL,
683		.fc_type = type,
684		.fc_dst = dst,
685		.fc_dst_len = dst_len,
686		.fc_prefsrc = ifa->ifa_local,
687		.fc_oif = ifa->ifa_dev->dev->ifindex,
688		.fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
689		.fc_nlinfo = {
690			.nl_net = net,
691		},
692	};
693
694	if (type == RTN_UNICAST)
695		tb = fib_new_table(net, RT_TABLE_MAIN);
696	else
697		tb = fib_new_table(net, RT_TABLE_LOCAL);
698
699	if (tb == NULL)
700		return;
701
702	cfg.fc_table = tb->tb_id;
703
704	if (type != RTN_LOCAL)
705		cfg.fc_scope = RT_SCOPE_LINK;
706	else
707		cfg.fc_scope = RT_SCOPE_HOST;
708
709	if (cmd == RTM_NEWROUTE)
710		fib_table_insert(tb, &cfg);
711	else
712		fib_table_delete(tb, &cfg);
713}
714
715void fib_add_ifaddr(struct in_ifaddr *ifa)
716{
717	struct in_device *in_dev = ifa->ifa_dev;
718	struct net_device *dev = in_dev->dev;
719	struct in_ifaddr *prim = ifa;
720	__be32 mask = ifa->ifa_mask;
721	__be32 addr = ifa->ifa_local;
722	__be32 prefix = ifa->ifa_address&mask;
723
724	if (ifa->ifa_flags&IFA_F_SECONDARY) {
725		prim = inet_ifa_byprefix(in_dev, prefix, mask);
726		if (prim == NULL) {
727			printk(KERN_WARNING "fib_add_ifaddr: bug: prim == NULL\n");
728			return;
729		}
730	}
731
732	fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
733
734	if (!(dev->flags&IFF_UP))
735		return;
736
737	/* Add broadcast address, if it is explicitly assigned. */
738	if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
739		fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
740
741	if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
742	    (prefix != addr || ifa->ifa_prefixlen < 32)) {
743		fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
744			  RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim);
745
746		/* Add network specific broadcasts, when it takes a sense */
747		if (ifa->ifa_prefixlen < 31) {
748			fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
749			fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim);
750		}
751	}
752}
753
754static void fib_del_ifaddr(struct in_ifaddr *ifa)
755{
756	struct in_device *in_dev = ifa->ifa_dev;
757	struct net_device *dev = in_dev->dev;
758	struct in_ifaddr *ifa1;
759	struct in_ifaddr *prim = ifa;
760	__be32 brd = ifa->ifa_address|~ifa->ifa_mask;
761	__be32 any = ifa->ifa_address&ifa->ifa_mask;
762#define LOCAL_OK	1
763#define BRD_OK		2
764#define BRD0_OK		4
765#define BRD1_OK		8
766	unsigned ok = 0;
767
768	if (!(ifa->ifa_flags&IFA_F_SECONDARY))
769		fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
770			  RTN_UNICAST, any, ifa->ifa_prefixlen, prim);
771	else {
772		prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
773		if (prim == NULL) {
774			printk(KERN_WARNING "fib_del_ifaddr: bug: prim == NULL\n");
775			return;
776		}
777	}
778
779	/* Deletion is more complicated than add.
780	   We should take care of not to delete too much :-)
781
782	   Scan address list to be sure that addresses are really gone.
783	 */
784
785	for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
786		if (ifa->ifa_local == ifa1->ifa_local)
787			ok |= LOCAL_OK;
788		if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
789			ok |= BRD_OK;
790		if (brd == ifa1->ifa_broadcast)
791			ok |= BRD1_OK;
792		if (any == ifa1->ifa_broadcast)
793			ok |= BRD0_OK;
794	}
795
796	if (!(ok&BRD_OK))
797		fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
798	if (!(ok&BRD1_OK))
799		fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
800	if (!(ok&BRD0_OK))
801		fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
802	if (!(ok&LOCAL_OK)) {
803		fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
804
805		/* Check, that this local address finally disappeared. */
806		if (inet_addr_type(dev_net(dev), ifa->ifa_local) != RTN_LOCAL) {
807			/* And the last, but not the least thing.
808			   We must flush stray FIB entries.
809
810			   First of all, we scan fib_info list searching
811			   for stray nexthop entries, then ignite fib_flush.
812			*/
813			if (fib_sync_down_addr(dev_net(dev), ifa->ifa_local))
814				fib_flush(dev_net(dev));
815		}
816	}
817#undef LOCAL_OK
818#undef BRD_OK
819#undef BRD0_OK
820#undef BRD1_OK
821}
822
823static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
824{
825
826	struct fib_result       res;
827	struct flowi            fl = { .mark = frn->fl_mark,
828				       .nl_u = { .ip4_u = { .daddr = frn->fl_addr,
829							    .tos = frn->fl_tos,
830							    .scope = frn->fl_scope } } };
831
832#ifdef CONFIG_IP_MULTIPLE_TABLES
833	res.r = NULL;
834#endif
835
836	frn->err = -ENOENT;
837	if (tb) {
838		local_bh_disable();
839
840		frn->tb_id = tb->tb_id;
841		frn->err = fib_table_lookup(tb, &fl, &res);
842
843		if (!frn->err) {
844			frn->prefixlen = res.prefixlen;
845			frn->nh_sel = res.nh_sel;
846			frn->type = res.type;
847			frn->scope = res.scope;
848			fib_res_put(&res);
849		}
850		local_bh_enable();
851	}
852}
853
854static void nl_fib_input(struct sk_buff *skb)
855{
856	struct net *net;
857	struct fib_result_nl *frn;
858	struct nlmsghdr *nlh;
859	struct fib_table *tb;
860	u32 pid;
861
862	net = sock_net(skb->sk);
863	nlh = nlmsg_hdr(skb);
864	if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
865	    nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn)))
866		return;
867
868	skb = skb_clone(skb, GFP_KERNEL);
869	if (skb == NULL)
870		return;
871	nlh = nlmsg_hdr(skb);
872
873	frn = (struct fib_result_nl *) NLMSG_DATA(nlh);
874	tb = fib_get_table(net, frn->tb_id_in);
875
876	nl_fib_lookup(frn, tb);
877
878	pid = NETLINK_CB(skb).pid;       /* pid of sending process */
879	NETLINK_CB(skb).pid = 0;         /* from kernel */
880	NETLINK_CB(skb).dst_group = 0;  /* unicast */
881	netlink_unicast(net->ipv4.fibnl, skb, pid, MSG_DONTWAIT);
882}
883
884static int nl_fib_lookup_init(struct net *net)
885{
886	struct sock *sk;
887	sk = netlink_kernel_create(net, NETLINK_FIB_LOOKUP, 0,
888				   nl_fib_input, NULL, THIS_MODULE);
889	if (sk == NULL)
890		return -EAFNOSUPPORT;
891	net->ipv4.fibnl = sk;
892	return 0;
893}
894
895static void nl_fib_lookup_exit(struct net *net)
896{
897	netlink_kernel_release(net->ipv4.fibnl);
898	net->ipv4.fibnl = NULL;
899}
900
901static void fib_disable_ip(struct net_device *dev, int force, int delay)
902{
903	if (fib_sync_down_dev(dev, force))
904		fib_flush(dev_net(dev));
905	rt_cache_flush(dev_net(dev), delay);
906	arp_ifdown(dev);
907}
908
909static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
910{
911	struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
912	struct net_device *dev = ifa->ifa_dev->dev;
913
914	switch (event) {
915	case NETDEV_UP:
916		fib_add_ifaddr(ifa);
917#ifdef CONFIG_IP_ROUTE_MULTIPATH
918		fib_sync_up(dev);
919#endif
920		rt_cache_flush(dev_net(dev), -1);
921		break;
922	case NETDEV_DOWN:
923		fib_del_ifaddr(ifa);
924		if (ifa->ifa_dev->ifa_list == NULL) {
925			/* Last address was deleted from this interface.
926			   Disable IP.
927			 */
928			fib_disable_ip(dev, 1, 0);
929		} else {
930			rt_cache_flush(dev_net(dev), -1);
931		}
932		break;
933	}
934	return NOTIFY_DONE;
935}
936
937static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
938{
939	struct net_device *dev = ptr;
940	struct in_device *in_dev = __in_dev_get_rtnl(dev);
941
942	if (event == NETDEV_UNREGISTER) {
943		fib_disable_ip(dev, 2, -1);
944		return NOTIFY_DONE;
945	}
946
947	if (!in_dev)
948		return NOTIFY_DONE;
949
950	switch (event) {
951	case NETDEV_UP:
952		for_ifa(in_dev) {
953			fib_add_ifaddr(ifa);
954		} endfor_ifa(in_dev);
955#ifdef CONFIG_IP_ROUTE_MULTIPATH
956		fib_sync_up(dev);
957#endif
958		rt_cache_flush(dev_net(dev), -1);
959		break;
960	case NETDEV_DOWN:
961		fib_disable_ip(dev, 0, 0);
962		break;
963	case NETDEV_CHANGEMTU:
964	case NETDEV_CHANGE:
965		rt_cache_flush(dev_net(dev), 0);
966		break;
967	case NETDEV_UNREGISTER_BATCH:
968		rt_cache_flush_batch();
969		break;
970	}
971	return NOTIFY_DONE;
972}
973
974static struct notifier_block fib_inetaddr_notifier = {
975	.notifier_call = fib_inetaddr_event,
976};
977
978static struct notifier_block fib_netdev_notifier = {
979	.notifier_call = fib_netdev_event,
980};
981
982static int __net_init ip_fib_net_init(struct net *net)
983{
984	int err;
985	unsigned int i;
986
987	net->ipv4.fib_table_hash = kzalloc(
988			sizeof(struct hlist_head)*FIB_TABLE_HASHSZ, GFP_KERNEL);
989	if (net->ipv4.fib_table_hash == NULL)
990		return -ENOMEM;
991
992	for (i = 0; i < FIB_TABLE_HASHSZ; i++)
993		INIT_HLIST_HEAD(&net->ipv4.fib_table_hash[i]);
994
995	err = fib4_rules_init(net);
996	if (err < 0)
997		goto fail;
998	return 0;
999
1000fail:
1001	kfree(net->ipv4.fib_table_hash);
1002	return err;
1003}
1004
1005static void __net_exit ip_fib_net_exit(struct net *net)
1006{
1007	unsigned int i;
1008
1009#ifdef CONFIG_IP_MULTIPLE_TABLES
1010	fib4_rules_exit(net);
1011#endif
1012
1013	for (i = 0; i < FIB_TABLE_HASHSZ; i++) {
1014		struct fib_table *tb;
1015		struct hlist_head *head;
1016		struct hlist_node *node, *tmp;
1017
1018		head = &net->ipv4.fib_table_hash[i];
1019		hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) {
1020			hlist_del(node);
1021			fib_table_flush(tb);
1022			kfree(tb);
1023		}
1024	}
1025	kfree(net->ipv4.fib_table_hash);
1026}
1027
1028static int __net_init fib_net_init(struct net *net)
1029{
1030	int error;
1031
1032	error = ip_fib_net_init(net);
1033	if (error < 0)
1034		goto out;
1035	error = nl_fib_lookup_init(net);
1036	if (error < 0)
1037		goto out_nlfl;
1038	error = fib_proc_init(net);
1039	if (error < 0)
1040		goto out_proc;
1041out:
1042	return error;
1043
1044out_proc:
1045	nl_fib_lookup_exit(net);
1046out_nlfl:
1047	ip_fib_net_exit(net);
1048	goto out;
1049}
1050
1051static void __net_exit fib_net_exit(struct net *net)
1052{
1053	fib_proc_exit(net);
1054	nl_fib_lookup_exit(net);
1055	ip_fib_net_exit(net);
1056}
1057
1058static struct pernet_operations fib_net_ops = {
1059	.init = fib_net_init,
1060	.exit = fib_net_exit,
1061};
1062
1063void __init ip_fib_init(void)
1064{
1065	rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL);
1066	rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL);
1067	rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib);
1068
1069	register_pernet_subsys(&fib_net_ops);
1070	register_netdevice_notifier(&fib_netdev_notifier);
1071	register_inetaddr_notifier(&fib_inetaddr_notifier);
1072
1073	fib_hash_init();
1074}
1075
1076EXPORT_SYMBOL(inet_addr_type);
1077EXPORT_SYMBOL(inet_dev_addr_type);
1078EXPORT_SYMBOL(ip_dev_find);
1079