fib_frontend.c revision b5f7e7554753e2cc3ef3bef0271fdb32027df2ba
1/*
2 * INET		An implementation of the TCP/IP protocol suite for the LINUX
3 *		operating system.  INET is implemented using the  BSD Socket
4 *		interface as the means of communication with the user level.
5 *
6 *		IPv4 Forwarding Information Base: FIB frontend.
7 *
8 * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
9 *
10 *		This program is free software; you can redistribute it and/or
11 *		modify it under the terms of the GNU General Public License
12 *		as published by the Free Software Foundation; either version
13 *		2 of the License, or (at your option) any later version.
14 */
15
16#include <linux/module.h>
17#include <asm/uaccess.h>
18#include <asm/system.h>
19#include <linux/bitops.h>
20#include <linux/capability.h>
21#include <linux/types.h>
22#include <linux/kernel.h>
23#include <linux/mm.h>
24#include <linux/string.h>
25#include <linux/socket.h>
26#include <linux/sockios.h>
27#include <linux/errno.h>
28#include <linux/in.h>
29#include <linux/inet.h>
30#include <linux/inetdevice.h>
31#include <linux/netdevice.h>
32#include <linux/if_addr.h>
33#include <linux/if_arp.h>
34#include <linux/skbuff.h>
35#include <linux/init.h>
36#include <linux/list.h>
37#include <linux/slab.h>
38
39#include <net/ip.h>
40#include <net/protocol.h>
41#include <net/route.h>
42#include <net/tcp.h>
43#include <net/sock.h>
44#include <net/arp.h>
45#include <net/ip_fib.h>
46#include <net/rtnetlink.h>
47
48#ifndef CONFIG_IP_MULTIPLE_TABLES
49
50static int __net_init fib4_rules_init(struct net *net)
51{
52	struct fib_table *local_table, *main_table;
53
54	local_table = fib_hash_table(RT_TABLE_LOCAL);
55	if (local_table == NULL)
56		return -ENOMEM;
57
58	main_table  = fib_hash_table(RT_TABLE_MAIN);
59	if (main_table == NULL)
60		goto fail;
61
62	hlist_add_head_rcu(&local_table->tb_hlist,
63				&net->ipv4.fib_table_hash[TABLE_LOCAL_INDEX]);
64	hlist_add_head_rcu(&main_table->tb_hlist,
65				&net->ipv4.fib_table_hash[TABLE_MAIN_INDEX]);
66	return 0;
67
68fail:
69	kfree(local_table);
70	return -ENOMEM;
71}
72#else
73
74struct fib_table *fib_new_table(struct net *net, u32 id)
75{
76	struct fib_table *tb;
77	unsigned int h;
78
79	if (id == 0)
80		id = RT_TABLE_MAIN;
81	tb = fib_get_table(net, id);
82	if (tb)
83		return tb;
84
85	tb = fib_hash_table(id);
86	if (!tb)
87		return NULL;
88	h = id & (FIB_TABLE_HASHSZ - 1);
89	hlist_add_head_rcu(&tb->tb_hlist, &net->ipv4.fib_table_hash[h]);
90	return tb;
91}
92
93struct fib_table *fib_get_table(struct net *net, u32 id)
94{
95	struct fib_table *tb;
96	struct hlist_node *node;
97	struct hlist_head *head;
98	unsigned int h;
99
100	if (id == 0)
101		id = RT_TABLE_MAIN;
102	h = id & (FIB_TABLE_HASHSZ - 1);
103
104	rcu_read_lock();
105	head = &net->ipv4.fib_table_hash[h];
106	hlist_for_each_entry_rcu(tb, node, head, tb_hlist) {
107		if (tb->tb_id == id) {
108			rcu_read_unlock();
109			return tb;
110		}
111	}
112	rcu_read_unlock();
113	return NULL;
114}
115#endif /* CONFIG_IP_MULTIPLE_TABLES */
116
117void fib_select_default(struct net *net,
118			const struct flowi *flp, struct fib_result *res)
119{
120	struct fib_table *tb;
121	int table = RT_TABLE_MAIN;
122#ifdef CONFIG_IP_MULTIPLE_TABLES
123	if (res->r == NULL || res->r->action != FR_ACT_TO_TBL)
124		return;
125	table = res->r->table;
126#endif
127	tb = fib_get_table(net, table);
128	if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
129		fib_table_select_default(tb, flp, res);
130}
131
132static void fib_flush(struct net *net)
133{
134	int flushed = 0;
135	struct fib_table *tb;
136	struct hlist_node *node;
137	struct hlist_head *head;
138	unsigned int h;
139
140	for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
141		head = &net->ipv4.fib_table_hash[h];
142		hlist_for_each_entry(tb, node, head, tb_hlist)
143			flushed += fib_table_flush(tb);
144	}
145
146	if (flushed)
147		rt_cache_flush(net, -1);
148}
149
150/*
151 *	Find the first device with a given source address.
152 */
153
154struct net_device * ip_dev_find(struct net *net, __be32 addr)
155{
156	struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
157	struct fib_result res;
158	struct net_device *dev = NULL;
159	struct fib_table *local_table;
160
161#ifdef CONFIG_IP_MULTIPLE_TABLES
162	res.r = NULL;
163#endif
164
165	local_table = fib_get_table(net, RT_TABLE_LOCAL);
166	if (!local_table || fib_table_lookup(local_table, &fl, &res))
167		return NULL;
168	if (res.type != RTN_LOCAL)
169		goto out;
170	dev = FIB_RES_DEV(res);
171
172	if (dev)
173		dev_hold(dev);
174out:
175	fib_res_put(&res);
176	return dev;
177}
178
179/*
180 * Find address type as if only "dev" was present in the system. If
181 * on_dev is NULL then all interfaces are taken into consideration.
182 */
183static inline unsigned __inet_dev_addr_type(struct net *net,
184					    const struct net_device *dev,
185					    __be32 addr)
186{
187	struct flowi		fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
188	struct fib_result	res;
189	unsigned ret = RTN_BROADCAST;
190	struct fib_table *local_table;
191
192	if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr))
193		return RTN_BROADCAST;
194	if (ipv4_is_multicast(addr))
195		return RTN_MULTICAST;
196
197#ifdef CONFIG_IP_MULTIPLE_TABLES
198	res.r = NULL;
199#endif
200
201	local_table = fib_get_table(net, RT_TABLE_LOCAL);
202	if (local_table) {
203		ret = RTN_UNICAST;
204		if (!fib_table_lookup(local_table, &fl, &res)) {
205			if (!dev || dev == res.fi->fib_dev)
206				ret = res.type;
207			fib_res_put(&res);
208		}
209	}
210	return ret;
211}
212
213unsigned int inet_addr_type(struct net *net, __be32 addr)
214{
215	return __inet_dev_addr_type(net, NULL, addr);
216}
217
218unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev,
219				__be32 addr)
220{
221       return __inet_dev_addr_type(net, dev, addr);
222}
223
224/* Given (packet source, input interface) and optional (dst, oif, tos):
225   - (main) check, that source is valid i.e. not broadcast or our local
226     address.
227   - figure out what "logical" interface this packet arrived
228     and calculate "specific destination" address.
229   - check, that packet arrived from expected physical interface.
230 */
231
232int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
233			struct net_device *dev, __be32 *spec_dst,
234			u32 *itag, u32 mark)
235{
236	struct in_device *in_dev;
237	struct flowi fl = { .nl_u = { .ip4_u =
238				      { .daddr = src,
239					.saddr = dst,
240					.tos = tos } },
241			    .mark = mark,
242			    .iif = oif };
243
244	struct fib_result res;
245	int no_addr, rpf, accept_local;
246	int ret;
247	struct net *net;
248
249	no_addr = rpf = accept_local = 0;
250	rcu_read_lock();
251	in_dev = __in_dev_get_rcu(dev);
252	if (in_dev) {
253		no_addr = in_dev->ifa_list == NULL;
254		rpf = IN_DEV_RPFILTER(in_dev);
255		accept_local = IN_DEV_ACCEPT_LOCAL(in_dev);
256		if (mark && !IN_DEV_SRC_VMARK(in_dev))
257			fl.mark = 0;
258	}
259	rcu_read_unlock();
260
261	if (in_dev == NULL)
262		goto e_inval;
263
264	net = dev_net(dev);
265	if (fib_lookup(net, &fl, &res))
266		goto last_resort;
267	if (res.type != RTN_UNICAST) {
268		if (res.type != RTN_LOCAL || !accept_local)
269			goto e_inval_res;
270	}
271	*spec_dst = FIB_RES_PREFSRC(res);
272	fib_combine_itag(itag, &res);
273#ifdef CONFIG_IP_ROUTE_MULTIPATH
274	if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1)
275#else
276	if (FIB_RES_DEV(res) == dev)
277#endif
278	{
279		ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
280		fib_res_put(&res);
281		return ret;
282	}
283	fib_res_put(&res);
284	if (no_addr)
285		goto last_resort;
286	if (rpf == 1)
287		goto e_rpf;
288	fl.oif = dev->ifindex;
289
290	ret = 0;
291	if (fib_lookup(net, &fl, &res) == 0) {
292		if (res.type == RTN_UNICAST) {
293			*spec_dst = FIB_RES_PREFSRC(res);
294			ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
295		}
296		fib_res_put(&res);
297	}
298	return ret;
299
300last_resort:
301	if (rpf)
302		goto e_rpf;
303	*spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
304	*itag = 0;
305	return 0;
306
307e_inval_res:
308	fib_res_put(&res);
309e_inval:
310	return -EINVAL;
311e_rpf:
312	return -EXDEV;
313}
314
315static inline __be32 sk_extract_addr(struct sockaddr *addr)
316{
317	return ((struct sockaddr_in *) addr)->sin_addr.s_addr;
318}
319
320static int put_rtax(struct nlattr *mx, int len, int type, u32 value)
321{
322	struct nlattr *nla;
323
324	nla = (struct nlattr *) ((char *) mx + len);
325	nla->nla_type = type;
326	nla->nla_len = nla_attr_size(4);
327	*(u32 *) nla_data(nla) = value;
328
329	return len + nla_total_size(4);
330}
331
332static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt,
333				 struct fib_config *cfg)
334{
335	__be32 addr;
336	int plen;
337
338	memset(cfg, 0, sizeof(*cfg));
339	cfg->fc_nlinfo.nl_net = net;
340
341	if (rt->rt_dst.sa_family != AF_INET)
342		return -EAFNOSUPPORT;
343
344	/*
345	 * Check mask for validity:
346	 * a) it must be contiguous.
347	 * b) destination must have all host bits clear.
348	 * c) if application forgot to set correct family (AF_INET),
349	 *    reject request unless it is absolutely clear i.e.
350	 *    both family and mask are zero.
351	 */
352	plen = 32;
353	addr = sk_extract_addr(&rt->rt_dst);
354	if (!(rt->rt_flags & RTF_HOST)) {
355		__be32 mask = sk_extract_addr(&rt->rt_genmask);
356
357		if (rt->rt_genmask.sa_family != AF_INET) {
358			if (mask || rt->rt_genmask.sa_family)
359				return -EAFNOSUPPORT;
360		}
361
362		if (bad_mask(mask, addr))
363			return -EINVAL;
364
365		plen = inet_mask_len(mask);
366	}
367
368	cfg->fc_dst_len = plen;
369	cfg->fc_dst = addr;
370
371	if (cmd != SIOCDELRT) {
372		cfg->fc_nlflags = NLM_F_CREATE;
373		cfg->fc_protocol = RTPROT_BOOT;
374	}
375
376	if (rt->rt_metric)
377		cfg->fc_priority = rt->rt_metric - 1;
378
379	if (rt->rt_flags & RTF_REJECT) {
380		cfg->fc_scope = RT_SCOPE_HOST;
381		cfg->fc_type = RTN_UNREACHABLE;
382		return 0;
383	}
384
385	cfg->fc_scope = RT_SCOPE_NOWHERE;
386	cfg->fc_type = RTN_UNICAST;
387
388	if (rt->rt_dev) {
389		char *colon;
390		struct net_device *dev;
391		char devname[IFNAMSIZ];
392
393		if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1))
394			return -EFAULT;
395
396		devname[IFNAMSIZ-1] = 0;
397		colon = strchr(devname, ':');
398		if (colon)
399			*colon = 0;
400		dev = __dev_get_by_name(net, devname);
401		if (!dev)
402			return -ENODEV;
403		cfg->fc_oif = dev->ifindex;
404		if (colon) {
405			struct in_ifaddr *ifa;
406			struct in_device *in_dev = __in_dev_get_rtnl(dev);
407			if (!in_dev)
408				return -ENODEV;
409			*colon = ':';
410			for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
411				if (strcmp(ifa->ifa_label, devname) == 0)
412					break;
413			if (ifa == NULL)
414				return -ENODEV;
415			cfg->fc_prefsrc = ifa->ifa_local;
416		}
417	}
418
419	addr = sk_extract_addr(&rt->rt_gateway);
420	if (rt->rt_gateway.sa_family == AF_INET && addr) {
421		cfg->fc_gw = addr;
422		if (rt->rt_flags & RTF_GATEWAY &&
423		    inet_addr_type(net, addr) == RTN_UNICAST)
424			cfg->fc_scope = RT_SCOPE_UNIVERSE;
425	}
426
427	if (cmd == SIOCDELRT)
428		return 0;
429
430	if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw)
431		return -EINVAL;
432
433	if (cfg->fc_scope == RT_SCOPE_NOWHERE)
434		cfg->fc_scope = RT_SCOPE_LINK;
435
436	if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) {
437		struct nlattr *mx;
438		int len = 0;
439
440		mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL);
441		if (mx == NULL)
442			return -ENOMEM;
443
444		if (rt->rt_flags & RTF_MTU)
445			len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40);
446
447		if (rt->rt_flags & RTF_WINDOW)
448			len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window);
449
450		if (rt->rt_flags & RTF_IRTT)
451			len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3);
452
453		cfg->fc_mx = mx;
454		cfg->fc_mx_len = len;
455	}
456
457	return 0;
458}
459
460/*
461 *	Handle IP routing ioctl calls. These are used to manipulate the routing tables
462 */
463
464int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg)
465{
466	struct fib_config cfg;
467	struct rtentry rt;
468	int err;
469
470	switch (cmd) {
471	case SIOCADDRT:		/* Add a route */
472	case SIOCDELRT:		/* Delete a route */
473		if (!capable(CAP_NET_ADMIN))
474			return -EPERM;
475
476		if (copy_from_user(&rt, arg, sizeof(rt)))
477			return -EFAULT;
478
479		rtnl_lock();
480		err = rtentry_to_fib_config(net, cmd, &rt, &cfg);
481		if (err == 0) {
482			struct fib_table *tb;
483
484			if (cmd == SIOCDELRT) {
485				tb = fib_get_table(net, cfg.fc_table);
486				if (tb)
487					err = fib_table_delete(tb, &cfg);
488				else
489					err = -ESRCH;
490			} else {
491				tb = fib_new_table(net, cfg.fc_table);
492				if (tb)
493					err = fib_table_insert(tb, &cfg);
494				else
495					err = -ENOBUFS;
496			}
497
498			/* allocated by rtentry_to_fib_config() */
499			kfree(cfg.fc_mx);
500		}
501		rtnl_unlock();
502		return err;
503	}
504	return -EINVAL;
505}
506
507const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = {
508	[RTA_DST]		= { .type = NLA_U32 },
509	[RTA_SRC]		= { .type = NLA_U32 },
510	[RTA_IIF]		= { .type = NLA_U32 },
511	[RTA_OIF]		= { .type = NLA_U32 },
512	[RTA_GATEWAY]		= { .type = NLA_U32 },
513	[RTA_PRIORITY]		= { .type = NLA_U32 },
514	[RTA_PREFSRC]		= { .type = NLA_U32 },
515	[RTA_METRICS]		= { .type = NLA_NESTED },
516	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
517	[RTA_FLOW]		= { .type = NLA_U32 },
518};
519
520static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
521			    struct nlmsghdr *nlh, struct fib_config *cfg)
522{
523	struct nlattr *attr;
524	int err, remaining;
525	struct rtmsg *rtm;
526
527	err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy);
528	if (err < 0)
529		goto errout;
530
531	memset(cfg, 0, sizeof(*cfg));
532
533	rtm = nlmsg_data(nlh);
534	cfg->fc_dst_len = rtm->rtm_dst_len;
535	cfg->fc_tos = rtm->rtm_tos;
536	cfg->fc_table = rtm->rtm_table;
537	cfg->fc_protocol = rtm->rtm_protocol;
538	cfg->fc_scope = rtm->rtm_scope;
539	cfg->fc_type = rtm->rtm_type;
540	cfg->fc_flags = rtm->rtm_flags;
541	cfg->fc_nlflags = nlh->nlmsg_flags;
542
543	cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
544	cfg->fc_nlinfo.nlh = nlh;
545	cfg->fc_nlinfo.nl_net = net;
546
547	if (cfg->fc_type > RTN_MAX) {
548		err = -EINVAL;
549		goto errout;
550	}
551
552	nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) {
553		switch (nla_type(attr)) {
554		case RTA_DST:
555			cfg->fc_dst = nla_get_be32(attr);
556			break;
557		case RTA_OIF:
558			cfg->fc_oif = nla_get_u32(attr);
559			break;
560		case RTA_GATEWAY:
561			cfg->fc_gw = nla_get_be32(attr);
562			break;
563		case RTA_PRIORITY:
564			cfg->fc_priority = nla_get_u32(attr);
565			break;
566		case RTA_PREFSRC:
567			cfg->fc_prefsrc = nla_get_be32(attr);
568			break;
569		case RTA_METRICS:
570			cfg->fc_mx = nla_data(attr);
571			cfg->fc_mx_len = nla_len(attr);
572			break;
573		case RTA_MULTIPATH:
574			cfg->fc_mp = nla_data(attr);
575			cfg->fc_mp_len = nla_len(attr);
576			break;
577		case RTA_FLOW:
578			cfg->fc_flow = nla_get_u32(attr);
579			break;
580		case RTA_TABLE:
581			cfg->fc_table = nla_get_u32(attr);
582			break;
583		}
584	}
585
586	return 0;
587errout:
588	return err;
589}
590
591static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
592{
593	struct net *net = sock_net(skb->sk);
594	struct fib_config cfg;
595	struct fib_table *tb;
596	int err;
597
598	err = rtm_to_fib_config(net, skb, nlh, &cfg);
599	if (err < 0)
600		goto errout;
601
602	tb = fib_get_table(net, cfg.fc_table);
603	if (tb == NULL) {
604		err = -ESRCH;
605		goto errout;
606	}
607
608	err = fib_table_delete(tb, &cfg);
609errout:
610	return err;
611}
612
613static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
614{
615	struct net *net = sock_net(skb->sk);
616	struct fib_config cfg;
617	struct fib_table *tb;
618	int err;
619
620	err = rtm_to_fib_config(net, skb, nlh, &cfg);
621	if (err < 0)
622		goto errout;
623
624	tb = fib_new_table(net, cfg.fc_table);
625	if (tb == NULL) {
626		err = -ENOBUFS;
627		goto errout;
628	}
629
630	err = fib_table_insert(tb, &cfg);
631errout:
632	return err;
633}
634
635static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
636{
637	struct net *net = sock_net(skb->sk);
638	unsigned int h, s_h;
639	unsigned int e = 0, s_e;
640	struct fib_table *tb;
641	struct hlist_node *node;
642	struct hlist_head *head;
643	int dumped = 0;
644
645	if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
646	    ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
647		return ip_rt_dump(skb, cb);
648
649	s_h = cb->args[0];
650	s_e = cb->args[1];
651
652	for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
653		e = 0;
654		head = &net->ipv4.fib_table_hash[h];
655		hlist_for_each_entry(tb, node, head, tb_hlist) {
656			if (e < s_e)
657				goto next;
658			if (dumped)
659				memset(&cb->args[2], 0, sizeof(cb->args) -
660						 2 * sizeof(cb->args[0]));
661			if (fib_table_dump(tb, skb, cb) < 0)
662				goto out;
663			dumped = 1;
664next:
665			e++;
666		}
667	}
668out:
669	cb->args[1] = e;
670	cb->args[0] = h;
671
672	return skb->len;
673}
674
675/* Prepare and feed intra-kernel routing request.
676   Really, it should be netlink message, but :-( netlink
677   can be not configured, so that we feed it directly
678   to fib engine. It is legal, because all events occur
679   only when netlink is already locked.
680 */
681
682static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
683{
684	struct net *net = dev_net(ifa->ifa_dev->dev);
685	struct fib_table *tb;
686	struct fib_config cfg = {
687		.fc_protocol = RTPROT_KERNEL,
688		.fc_type = type,
689		.fc_dst = dst,
690		.fc_dst_len = dst_len,
691		.fc_prefsrc = ifa->ifa_local,
692		.fc_oif = ifa->ifa_dev->dev->ifindex,
693		.fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
694		.fc_nlinfo = {
695			.nl_net = net,
696		},
697	};
698
699	if (type == RTN_UNICAST)
700		tb = fib_new_table(net, RT_TABLE_MAIN);
701	else
702		tb = fib_new_table(net, RT_TABLE_LOCAL);
703
704	if (tb == NULL)
705		return;
706
707	cfg.fc_table = tb->tb_id;
708
709	if (type != RTN_LOCAL)
710		cfg.fc_scope = RT_SCOPE_LINK;
711	else
712		cfg.fc_scope = RT_SCOPE_HOST;
713
714	if (cmd == RTM_NEWROUTE)
715		fib_table_insert(tb, &cfg);
716	else
717		fib_table_delete(tb, &cfg);
718}
719
720void fib_add_ifaddr(struct in_ifaddr *ifa)
721{
722	struct in_device *in_dev = ifa->ifa_dev;
723	struct net_device *dev = in_dev->dev;
724	struct in_ifaddr *prim = ifa;
725	__be32 mask = ifa->ifa_mask;
726	__be32 addr = ifa->ifa_local;
727	__be32 prefix = ifa->ifa_address&mask;
728
729	if (ifa->ifa_flags&IFA_F_SECONDARY) {
730		prim = inet_ifa_byprefix(in_dev, prefix, mask);
731		if (prim == NULL) {
732			printk(KERN_WARNING "fib_add_ifaddr: bug: prim == NULL\n");
733			return;
734		}
735	}
736
737	fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
738
739	if (!(dev->flags&IFF_UP))
740		return;
741
742	/* Add broadcast address, if it is explicitly assigned. */
743	if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
744		fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
745
746	if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
747	    (prefix != addr || ifa->ifa_prefixlen < 32)) {
748		fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
749			  RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim);
750
751		/* Add network specific broadcasts, when it takes a sense */
752		if (ifa->ifa_prefixlen < 31) {
753			fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
754			fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim);
755		}
756	}
757}
758
759static void fib_del_ifaddr(struct in_ifaddr *ifa)
760{
761	struct in_device *in_dev = ifa->ifa_dev;
762	struct net_device *dev = in_dev->dev;
763	struct in_ifaddr *ifa1;
764	struct in_ifaddr *prim = ifa;
765	__be32 brd = ifa->ifa_address|~ifa->ifa_mask;
766	__be32 any = ifa->ifa_address&ifa->ifa_mask;
767#define LOCAL_OK	1
768#define BRD_OK		2
769#define BRD0_OK		4
770#define BRD1_OK		8
771	unsigned ok = 0;
772
773	if (!(ifa->ifa_flags&IFA_F_SECONDARY))
774		fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
775			  RTN_UNICAST, any, ifa->ifa_prefixlen, prim);
776	else {
777		prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
778		if (prim == NULL) {
779			printk(KERN_WARNING "fib_del_ifaddr: bug: prim == NULL\n");
780			return;
781		}
782	}
783
784	/* Deletion is more complicated than add.
785	   We should take care of not to delete too much :-)
786
787	   Scan address list to be sure that addresses are really gone.
788	 */
789
790	for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
791		if (ifa->ifa_local == ifa1->ifa_local)
792			ok |= LOCAL_OK;
793		if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
794			ok |= BRD_OK;
795		if (brd == ifa1->ifa_broadcast)
796			ok |= BRD1_OK;
797		if (any == ifa1->ifa_broadcast)
798			ok |= BRD0_OK;
799	}
800
801	if (!(ok&BRD_OK))
802		fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
803	if (!(ok&BRD1_OK))
804		fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
805	if (!(ok&BRD0_OK))
806		fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
807	if (!(ok&LOCAL_OK)) {
808		fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
809
810		/* Check, that this local address finally disappeared. */
811		if (inet_addr_type(dev_net(dev), ifa->ifa_local) != RTN_LOCAL) {
812			/* And the last, but not the least thing.
813			   We must flush stray FIB entries.
814
815			   First of all, we scan fib_info list searching
816			   for stray nexthop entries, then ignite fib_flush.
817			*/
818			if (fib_sync_down_addr(dev_net(dev), ifa->ifa_local))
819				fib_flush(dev_net(dev));
820		}
821	}
822#undef LOCAL_OK
823#undef BRD_OK
824#undef BRD0_OK
825#undef BRD1_OK
826}
827
828static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
829{
830
831	struct fib_result       res;
832	struct flowi            fl = { .mark = frn->fl_mark,
833				       .nl_u = { .ip4_u = { .daddr = frn->fl_addr,
834							    .tos = frn->fl_tos,
835							    .scope = frn->fl_scope } } };
836
837#ifdef CONFIG_IP_MULTIPLE_TABLES
838	res.r = NULL;
839#endif
840
841	frn->err = -ENOENT;
842	if (tb) {
843		local_bh_disable();
844
845		frn->tb_id = tb->tb_id;
846		frn->err = fib_table_lookup(tb, &fl, &res);
847
848		if (!frn->err) {
849			frn->prefixlen = res.prefixlen;
850			frn->nh_sel = res.nh_sel;
851			frn->type = res.type;
852			frn->scope = res.scope;
853			fib_res_put(&res);
854		}
855		local_bh_enable();
856	}
857}
858
859static void nl_fib_input(struct sk_buff *skb)
860{
861	struct net *net;
862	struct fib_result_nl *frn;
863	struct nlmsghdr *nlh;
864	struct fib_table *tb;
865	u32 pid;
866
867	net = sock_net(skb->sk);
868	nlh = nlmsg_hdr(skb);
869	if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
870	    nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn)))
871		return;
872
873	skb = skb_clone(skb, GFP_KERNEL);
874	if (skb == NULL)
875		return;
876	nlh = nlmsg_hdr(skb);
877
878	frn = (struct fib_result_nl *) NLMSG_DATA(nlh);
879	tb = fib_get_table(net, frn->tb_id_in);
880
881	nl_fib_lookup(frn, tb);
882
883	pid = NETLINK_CB(skb).pid;       /* pid of sending process */
884	NETLINK_CB(skb).pid = 0;         /* from kernel */
885	NETLINK_CB(skb).dst_group = 0;  /* unicast */
886	netlink_unicast(net->ipv4.fibnl, skb, pid, MSG_DONTWAIT);
887}
888
889static int __net_init nl_fib_lookup_init(struct net *net)
890{
891	struct sock *sk;
892	sk = netlink_kernel_create(net, NETLINK_FIB_LOOKUP, 0,
893				   nl_fib_input, NULL, THIS_MODULE);
894	if (sk == NULL)
895		return -EAFNOSUPPORT;
896	net->ipv4.fibnl = sk;
897	return 0;
898}
899
900static void nl_fib_lookup_exit(struct net *net)
901{
902	netlink_kernel_release(net->ipv4.fibnl);
903	net->ipv4.fibnl = NULL;
904}
905
906static void fib_disable_ip(struct net_device *dev, int force, int delay)
907{
908	if (fib_sync_down_dev(dev, force))
909		fib_flush(dev_net(dev));
910	rt_cache_flush(dev_net(dev), delay);
911	arp_ifdown(dev);
912}
913
914static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
915{
916	struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
917	struct net_device *dev = ifa->ifa_dev->dev;
918
919	switch (event) {
920	case NETDEV_UP:
921		fib_add_ifaddr(ifa);
922#ifdef CONFIG_IP_ROUTE_MULTIPATH
923		fib_sync_up(dev);
924#endif
925		rt_cache_flush(dev_net(dev), -1);
926		break;
927	case NETDEV_DOWN:
928		fib_del_ifaddr(ifa);
929		if (ifa->ifa_dev->ifa_list == NULL) {
930			/* Last address was deleted from this interface.
931			   Disable IP.
932			 */
933			fib_disable_ip(dev, 1, 0);
934		} else {
935			rt_cache_flush(dev_net(dev), -1);
936		}
937		break;
938	}
939	return NOTIFY_DONE;
940}
941
942static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
943{
944	struct net_device *dev = ptr;
945	struct in_device *in_dev = __in_dev_get_rtnl(dev);
946
947	if (event == NETDEV_UNREGISTER) {
948		fib_disable_ip(dev, 2, -1);
949		return NOTIFY_DONE;
950	}
951
952	if (!in_dev)
953		return NOTIFY_DONE;
954
955	switch (event) {
956	case NETDEV_UP:
957		for_ifa(in_dev) {
958			fib_add_ifaddr(ifa);
959		} endfor_ifa(in_dev);
960#ifdef CONFIG_IP_ROUTE_MULTIPATH
961		fib_sync_up(dev);
962#endif
963		rt_cache_flush(dev_net(dev), -1);
964		break;
965	case NETDEV_DOWN:
966		fib_disable_ip(dev, 0, 0);
967		break;
968	case NETDEV_CHANGEMTU:
969	case NETDEV_CHANGE:
970		rt_cache_flush(dev_net(dev), 0);
971		break;
972	case NETDEV_UNREGISTER_BATCH:
973		rt_cache_flush_batch();
974		break;
975	}
976	return NOTIFY_DONE;
977}
978
979static struct notifier_block fib_inetaddr_notifier = {
980	.notifier_call = fib_inetaddr_event,
981};
982
983static struct notifier_block fib_netdev_notifier = {
984	.notifier_call = fib_netdev_event,
985};
986
987static int __net_init ip_fib_net_init(struct net *net)
988{
989	int err;
990	unsigned int i;
991
992	net->ipv4.fib_table_hash = kzalloc(
993			sizeof(struct hlist_head)*FIB_TABLE_HASHSZ, GFP_KERNEL);
994	if (net->ipv4.fib_table_hash == NULL)
995		return -ENOMEM;
996
997	for (i = 0; i < FIB_TABLE_HASHSZ; i++)
998		INIT_HLIST_HEAD(&net->ipv4.fib_table_hash[i]);
999
1000	err = fib4_rules_init(net);
1001	if (err < 0)
1002		goto fail;
1003	return 0;
1004
1005fail:
1006	kfree(net->ipv4.fib_table_hash);
1007	return err;
1008}
1009
1010static void ip_fib_net_exit(struct net *net)
1011{
1012	unsigned int i;
1013
1014#ifdef CONFIG_IP_MULTIPLE_TABLES
1015	fib4_rules_exit(net);
1016#endif
1017
1018	for (i = 0; i < FIB_TABLE_HASHSZ; i++) {
1019		struct fib_table *tb;
1020		struct hlist_head *head;
1021		struct hlist_node *node, *tmp;
1022
1023		head = &net->ipv4.fib_table_hash[i];
1024		hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) {
1025			hlist_del(node);
1026			fib_table_flush(tb);
1027			kfree(tb);
1028		}
1029	}
1030	kfree(net->ipv4.fib_table_hash);
1031}
1032
1033static int __net_init fib_net_init(struct net *net)
1034{
1035	int error;
1036
1037	error = ip_fib_net_init(net);
1038	if (error < 0)
1039		goto out;
1040	error = nl_fib_lookup_init(net);
1041	if (error < 0)
1042		goto out_nlfl;
1043	error = fib_proc_init(net);
1044	if (error < 0)
1045		goto out_proc;
1046out:
1047	return error;
1048
1049out_proc:
1050	nl_fib_lookup_exit(net);
1051out_nlfl:
1052	ip_fib_net_exit(net);
1053	goto out;
1054}
1055
1056static void __net_exit fib_net_exit(struct net *net)
1057{
1058	fib_proc_exit(net);
1059	nl_fib_lookup_exit(net);
1060	ip_fib_net_exit(net);
1061}
1062
1063static struct pernet_operations fib_net_ops = {
1064	.init = fib_net_init,
1065	.exit = fib_net_exit,
1066};
1067
1068void __init ip_fib_init(void)
1069{
1070	rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL);
1071	rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL);
1072	rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib);
1073
1074	register_pernet_subsys(&fib_net_ops);
1075	register_netdevice_notifier(&fib_netdev_notifier);
1076	register_inetaddr_notifier(&fib_inetaddr_notifier);
1077
1078	fib_hash_init();
1079}
1080
1081EXPORT_SYMBOL(inet_addr_type);
1082EXPORT_SYMBOL(inet_dev_addr_type);
1083EXPORT_SYMBOL(ip_dev_find);
1084