fib_frontend.c revision 28f6aeea3f12d37bd258b2c0d5ba891bff4ec479
1/*
2 * INET		An implementation of the TCP/IP protocol suite for the LINUX
3 *		operating system.  INET is implemented using the  BSD Socket
4 *		interface as the means of communication with the user level.
5 *
6 *		IPv4 Forwarding Information Base: FIB frontend.
7 *
8 * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
9 *
10 *		This program is free software; you can redistribute it and/or
11 *		modify it under the terms of the GNU General Public License
12 *		as published by the Free Software Foundation; either version
13 *		2 of the License, or (at your option) any later version.
14 */
15
16#include <linux/module.h>
17#include <asm/uaccess.h>
18#include <asm/system.h>
19#include <linux/bitops.h>
20#include <linux/capability.h>
21#include <linux/types.h>
22#include <linux/kernel.h>
23#include <linux/mm.h>
24#include <linux/string.h>
25#include <linux/socket.h>
26#include <linux/sockios.h>
27#include <linux/errno.h>
28#include <linux/in.h>
29#include <linux/inet.h>
30#include <linux/inetdevice.h>
31#include <linux/netdevice.h>
32#include <linux/if_addr.h>
33#include <linux/if_arp.h>
34#include <linux/skbuff.h>
35#include <linux/init.h>
36#include <linux/list.h>
37
38#include <net/ip.h>
39#include <net/protocol.h>
40#include <net/route.h>
41#include <net/tcp.h>
42#include <net/sock.h>
43#include <net/arp.h>
44#include <net/ip_fib.h>
45#include <net/rtnetlink.h>
46
47#ifndef CONFIG_IP_MULTIPLE_TABLES
48
49static int __net_init fib4_rules_init(struct net *net)
50{
51	struct fib_table *local_table, *main_table;
52
53	local_table = fib_hash_table(RT_TABLE_LOCAL);
54	if (local_table == NULL)
55		return -ENOMEM;
56
57	main_table  = fib_hash_table(RT_TABLE_MAIN);
58	if (main_table == NULL)
59		goto fail;
60
61	hlist_add_head_rcu(&local_table->tb_hlist,
62				&net->ipv4.fib_table_hash[TABLE_LOCAL_INDEX]);
63	hlist_add_head_rcu(&main_table->tb_hlist,
64				&net->ipv4.fib_table_hash[TABLE_MAIN_INDEX]);
65	return 0;
66
67fail:
68	kfree(local_table);
69	return -ENOMEM;
70}
71#else
72
73struct fib_table *fib_new_table(struct net *net, u32 id)
74{
75	struct fib_table *tb;
76	unsigned int h;
77
78	if (id == 0)
79		id = RT_TABLE_MAIN;
80	tb = fib_get_table(net, id);
81	if (tb)
82		return tb;
83
84	tb = fib_hash_table(id);
85	if (!tb)
86		return NULL;
87	h = id & (FIB_TABLE_HASHSZ - 1);
88	hlist_add_head_rcu(&tb->tb_hlist, &net->ipv4.fib_table_hash[h]);
89	return tb;
90}
91
92struct fib_table *fib_get_table(struct net *net, u32 id)
93{
94	struct fib_table *tb;
95	struct hlist_node *node;
96	struct hlist_head *head;
97	unsigned int h;
98
99	if (id == 0)
100		id = RT_TABLE_MAIN;
101	h = id & (FIB_TABLE_HASHSZ - 1);
102
103	rcu_read_lock();
104	head = &net->ipv4.fib_table_hash[h];
105	hlist_for_each_entry_rcu(tb, node, head, tb_hlist) {
106		if (tb->tb_id == id) {
107			rcu_read_unlock();
108			return tb;
109		}
110	}
111	rcu_read_unlock();
112	return NULL;
113}
114#endif /* CONFIG_IP_MULTIPLE_TABLES */
115
116void fib_select_default(struct net *net,
117			const struct flowi *flp, struct fib_result *res)
118{
119	struct fib_table *tb;
120	int table = RT_TABLE_MAIN;
121#ifdef CONFIG_IP_MULTIPLE_TABLES
122	if (res->r == NULL || res->r->action != FR_ACT_TO_TBL)
123		return;
124	table = res->r->table;
125#endif
126	tb = fib_get_table(net, table);
127	if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
128		fib_table_select_default(tb, flp, res);
129}
130
131static void fib_flush(struct net *net)
132{
133	int flushed = 0;
134	struct fib_table *tb;
135	struct hlist_node *node;
136	struct hlist_head *head;
137	unsigned int h;
138
139	for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
140		head = &net->ipv4.fib_table_hash[h];
141		hlist_for_each_entry(tb, node, head, tb_hlist)
142			flushed += fib_table_flush(tb);
143	}
144
145	if (flushed)
146		rt_cache_flush(net, -1);
147}
148
149/*
150 *	Find the first device with a given source address.
151 */
152
153struct net_device * ip_dev_find(struct net *net, __be32 addr)
154{
155	struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
156	struct fib_result res;
157	struct net_device *dev = NULL;
158	struct fib_table *local_table;
159
160#ifdef CONFIG_IP_MULTIPLE_TABLES
161	res.r = NULL;
162#endif
163
164	local_table = fib_get_table(net, RT_TABLE_LOCAL);
165	if (!local_table || fib_table_lookup(local_table, &fl, &res))
166		return NULL;
167	if (res.type != RTN_LOCAL)
168		goto out;
169	dev = FIB_RES_DEV(res);
170
171	if (dev)
172		dev_hold(dev);
173out:
174	fib_res_put(&res);
175	return dev;
176}
177
178/*
179 * Find address type as if only "dev" was present in the system. If
180 * on_dev is NULL then all interfaces are taken into consideration.
181 */
182static inline unsigned __inet_dev_addr_type(struct net *net,
183					    const struct net_device *dev,
184					    __be32 addr)
185{
186	struct flowi		fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
187	struct fib_result	res;
188	unsigned ret = RTN_BROADCAST;
189	struct fib_table *local_table;
190
191	if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr))
192		return RTN_BROADCAST;
193	if (ipv4_is_multicast(addr))
194		return RTN_MULTICAST;
195
196#ifdef CONFIG_IP_MULTIPLE_TABLES
197	res.r = NULL;
198#endif
199
200	local_table = fib_get_table(net, RT_TABLE_LOCAL);
201	if (local_table) {
202		ret = RTN_UNICAST;
203		if (!fib_table_lookup(local_table, &fl, &res)) {
204			if (!dev || dev == res.fi->fib_dev)
205				ret = res.type;
206			fib_res_put(&res);
207		}
208	}
209	return ret;
210}
211
212unsigned int inet_addr_type(struct net *net, __be32 addr)
213{
214	return __inet_dev_addr_type(net, NULL, addr);
215}
216
217unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev,
218				__be32 addr)
219{
220       return __inet_dev_addr_type(net, dev, addr);
221}
222
223/* Given (packet source, input interface) and optional (dst, oif, tos):
224   - (main) check, that source is valid i.e. not broadcast or our local
225     address.
226   - figure out what "logical" interface this packet arrived
227     and calculate "specific destination" address.
228   - check, that packet arrived from expected physical interface.
229 */
230
231int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
232			struct net_device *dev, __be32 *spec_dst,
233			u32 *itag, u32 mark)
234{
235	struct in_device *in_dev;
236	struct flowi fl = { .nl_u = { .ip4_u =
237				      { .daddr = src,
238					.saddr = dst,
239					.tos = tos } },
240			    .mark = mark,
241			    .iif = oif };
242
243	struct fib_result res;
244	int no_addr, rpf, accept_local;
245	int ret;
246	struct net *net;
247
248	no_addr = rpf = accept_local = 0;
249	rcu_read_lock();
250	in_dev = __in_dev_get_rcu(dev);
251	if (in_dev) {
252		no_addr = in_dev->ifa_list == NULL;
253		rpf = IN_DEV_RPFILTER(in_dev);
254		accept_local = IN_DEV_ACCEPT_LOCAL(in_dev);
255		if (mark && !IN_DEV_SRC_VMARK(in_dev))
256			fl.mark = 0;
257	}
258	rcu_read_unlock();
259
260	if (in_dev == NULL)
261		goto e_inval;
262
263	net = dev_net(dev);
264	if (fib_lookup(net, &fl, &res))
265		goto last_resort;
266	if (res.type != RTN_UNICAST) {
267		if (res.type != RTN_LOCAL || !accept_local)
268			goto e_inval_res;
269	}
270	*spec_dst = FIB_RES_PREFSRC(res);
271	fib_combine_itag(itag, &res);
272#ifdef CONFIG_IP_ROUTE_MULTIPATH
273	if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1)
274#else
275	if (FIB_RES_DEV(res) == dev)
276#endif
277	{
278		ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
279		fib_res_put(&res);
280		return ret;
281	}
282	fib_res_put(&res);
283	if (no_addr)
284		goto last_resort;
285	if (rpf == 1)
286		goto e_inval;
287	fl.oif = dev->ifindex;
288
289	ret = 0;
290	if (fib_lookup(net, &fl, &res) == 0) {
291		if (res.type == RTN_UNICAST) {
292			*spec_dst = FIB_RES_PREFSRC(res);
293			ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
294		}
295		fib_res_put(&res);
296	}
297	return ret;
298
299last_resort:
300	if (rpf)
301		goto e_inval;
302	*spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
303	*itag = 0;
304	return 0;
305
306e_inval_res:
307	fib_res_put(&res);
308e_inval:
309	return -EINVAL;
310}
311
312static inline __be32 sk_extract_addr(struct sockaddr *addr)
313{
314	return ((struct sockaddr_in *) addr)->sin_addr.s_addr;
315}
316
317static int put_rtax(struct nlattr *mx, int len, int type, u32 value)
318{
319	struct nlattr *nla;
320
321	nla = (struct nlattr *) ((char *) mx + len);
322	nla->nla_type = type;
323	nla->nla_len = nla_attr_size(4);
324	*(u32 *) nla_data(nla) = value;
325
326	return len + nla_total_size(4);
327}
328
329static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt,
330				 struct fib_config *cfg)
331{
332	__be32 addr;
333	int plen;
334
335	memset(cfg, 0, sizeof(*cfg));
336	cfg->fc_nlinfo.nl_net = net;
337
338	if (rt->rt_dst.sa_family != AF_INET)
339		return -EAFNOSUPPORT;
340
341	/*
342	 * Check mask for validity:
343	 * a) it must be contiguous.
344	 * b) destination must have all host bits clear.
345	 * c) if application forgot to set correct family (AF_INET),
346	 *    reject request unless it is absolutely clear i.e.
347	 *    both family and mask are zero.
348	 */
349	plen = 32;
350	addr = sk_extract_addr(&rt->rt_dst);
351	if (!(rt->rt_flags & RTF_HOST)) {
352		__be32 mask = sk_extract_addr(&rt->rt_genmask);
353
354		if (rt->rt_genmask.sa_family != AF_INET) {
355			if (mask || rt->rt_genmask.sa_family)
356				return -EAFNOSUPPORT;
357		}
358
359		if (bad_mask(mask, addr))
360			return -EINVAL;
361
362		plen = inet_mask_len(mask);
363	}
364
365	cfg->fc_dst_len = plen;
366	cfg->fc_dst = addr;
367
368	if (cmd != SIOCDELRT) {
369		cfg->fc_nlflags = NLM_F_CREATE;
370		cfg->fc_protocol = RTPROT_BOOT;
371	}
372
373	if (rt->rt_metric)
374		cfg->fc_priority = rt->rt_metric - 1;
375
376	if (rt->rt_flags & RTF_REJECT) {
377		cfg->fc_scope = RT_SCOPE_HOST;
378		cfg->fc_type = RTN_UNREACHABLE;
379		return 0;
380	}
381
382	cfg->fc_scope = RT_SCOPE_NOWHERE;
383	cfg->fc_type = RTN_UNICAST;
384
385	if (rt->rt_dev) {
386		char *colon;
387		struct net_device *dev;
388		char devname[IFNAMSIZ];
389
390		if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1))
391			return -EFAULT;
392
393		devname[IFNAMSIZ-1] = 0;
394		colon = strchr(devname, ':');
395		if (colon)
396			*colon = 0;
397		dev = __dev_get_by_name(net, devname);
398		if (!dev)
399			return -ENODEV;
400		cfg->fc_oif = dev->ifindex;
401		if (colon) {
402			struct in_ifaddr *ifa;
403			struct in_device *in_dev = __in_dev_get_rtnl(dev);
404			if (!in_dev)
405				return -ENODEV;
406			*colon = ':';
407			for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
408				if (strcmp(ifa->ifa_label, devname) == 0)
409					break;
410			if (ifa == NULL)
411				return -ENODEV;
412			cfg->fc_prefsrc = ifa->ifa_local;
413		}
414	}
415
416	addr = sk_extract_addr(&rt->rt_gateway);
417	if (rt->rt_gateway.sa_family == AF_INET && addr) {
418		cfg->fc_gw = addr;
419		if (rt->rt_flags & RTF_GATEWAY &&
420		    inet_addr_type(net, addr) == RTN_UNICAST)
421			cfg->fc_scope = RT_SCOPE_UNIVERSE;
422	}
423
424	if (cmd == SIOCDELRT)
425		return 0;
426
427	if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw)
428		return -EINVAL;
429
430	if (cfg->fc_scope == RT_SCOPE_NOWHERE)
431		cfg->fc_scope = RT_SCOPE_LINK;
432
433	if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) {
434		struct nlattr *mx;
435		int len = 0;
436
437		mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL);
438		if (mx == NULL)
439			return -ENOMEM;
440
441		if (rt->rt_flags & RTF_MTU)
442			len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40);
443
444		if (rt->rt_flags & RTF_WINDOW)
445			len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window);
446
447		if (rt->rt_flags & RTF_IRTT)
448			len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3);
449
450		cfg->fc_mx = mx;
451		cfg->fc_mx_len = len;
452	}
453
454	return 0;
455}
456
457/*
458 *	Handle IP routing ioctl calls. These are used to manipulate the routing tables
459 */
460
461int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg)
462{
463	struct fib_config cfg;
464	struct rtentry rt;
465	int err;
466
467	switch (cmd) {
468	case SIOCADDRT:		/* Add a route */
469	case SIOCDELRT:		/* Delete a route */
470		if (!capable(CAP_NET_ADMIN))
471			return -EPERM;
472
473		if (copy_from_user(&rt, arg, sizeof(rt)))
474			return -EFAULT;
475
476		rtnl_lock();
477		err = rtentry_to_fib_config(net, cmd, &rt, &cfg);
478		if (err == 0) {
479			struct fib_table *tb;
480
481			if (cmd == SIOCDELRT) {
482				tb = fib_get_table(net, cfg.fc_table);
483				if (tb)
484					err = fib_table_delete(tb, &cfg);
485				else
486					err = -ESRCH;
487			} else {
488				tb = fib_new_table(net, cfg.fc_table);
489				if (tb)
490					err = fib_table_insert(tb, &cfg);
491				else
492					err = -ENOBUFS;
493			}
494
495			/* allocated by rtentry_to_fib_config() */
496			kfree(cfg.fc_mx);
497		}
498		rtnl_unlock();
499		return err;
500	}
501	return -EINVAL;
502}
503
504const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = {
505	[RTA_DST]		= { .type = NLA_U32 },
506	[RTA_SRC]		= { .type = NLA_U32 },
507	[RTA_IIF]		= { .type = NLA_U32 },
508	[RTA_OIF]		= { .type = NLA_U32 },
509	[RTA_GATEWAY]		= { .type = NLA_U32 },
510	[RTA_PRIORITY]		= { .type = NLA_U32 },
511	[RTA_PREFSRC]		= { .type = NLA_U32 },
512	[RTA_METRICS]		= { .type = NLA_NESTED },
513	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
514	[RTA_FLOW]		= { .type = NLA_U32 },
515};
516
517static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
518			    struct nlmsghdr *nlh, struct fib_config *cfg)
519{
520	struct nlattr *attr;
521	int err, remaining;
522	struct rtmsg *rtm;
523
524	err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy);
525	if (err < 0)
526		goto errout;
527
528	memset(cfg, 0, sizeof(*cfg));
529
530	rtm = nlmsg_data(nlh);
531	cfg->fc_dst_len = rtm->rtm_dst_len;
532	cfg->fc_tos = rtm->rtm_tos;
533	cfg->fc_table = rtm->rtm_table;
534	cfg->fc_protocol = rtm->rtm_protocol;
535	cfg->fc_scope = rtm->rtm_scope;
536	cfg->fc_type = rtm->rtm_type;
537	cfg->fc_flags = rtm->rtm_flags;
538	cfg->fc_nlflags = nlh->nlmsg_flags;
539
540	cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
541	cfg->fc_nlinfo.nlh = nlh;
542	cfg->fc_nlinfo.nl_net = net;
543
544	if (cfg->fc_type > RTN_MAX) {
545		err = -EINVAL;
546		goto errout;
547	}
548
549	nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) {
550		switch (nla_type(attr)) {
551		case RTA_DST:
552			cfg->fc_dst = nla_get_be32(attr);
553			break;
554		case RTA_OIF:
555			cfg->fc_oif = nla_get_u32(attr);
556			break;
557		case RTA_GATEWAY:
558			cfg->fc_gw = nla_get_be32(attr);
559			break;
560		case RTA_PRIORITY:
561			cfg->fc_priority = nla_get_u32(attr);
562			break;
563		case RTA_PREFSRC:
564			cfg->fc_prefsrc = nla_get_be32(attr);
565			break;
566		case RTA_METRICS:
567			cfg->fc_mx = nla_data(attr);
568			cfg->fc_mx_len = nla_len(attr);
569			break;
570		case RTA_MULTIPATH:
571			cfg->fc_mp = nla_data(attr);
572			cfg->fc_mp_len = nla_len(attr);
573			break;
574		case RTA_FLOW:
575			cfg->fc_flow = nla_get_u32(attr);
576			break;
577		case RTA_TABLE:
578			cfg->fc_table = nla_get_u32(attr);
579			break;
580		}
581	}
582
583	return 0;
584errout:
585	return err;
586}
587
588static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
589{
590	struct net *net = sock_net(skb->sk);
591	struct fib_config cfg;
592	struct fib_table *tb;
593	int err;
594
595	err = rtm_to_fib_config(net, skb, nlh, &cfg);
596	if (err < 0)
597		goto errout;
598
599	tb = fib_get_table(net, cfg.fc_table);
600	if (tb == NULL) {
601		err = -ESRCH;
602		goto errout;
603	}
604
605	err = fib_table_delete(tb, &cfg);
606errout:
607	return err;
608}
609
610static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
611{
612	struct net *net = sock_net(skb->sk);
613	struct fib_config cfg;
614	struct fib_table *tb;
615	int err;
616
617	err = rtm_to_fib_config(net, skb, nlh, &cfg);
618	if (err < 0)
619		goto errout;
620
621	tb = fib_new_table(net, cfg.fc_table);
622	if (tb == NULL) {
623		err = -ENOBUFS;
624		goto errout;
625	}
626
627	err = fib_table_insert(tb, &cfg);
628errout:
629	return err;
630}
631
632static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
633{
634	struct net *net = sock_net(skb->sk);
635	unsigned int h, s_h;
636	unsigned int e = 0, s_e;
637	struct fib_table *tb;
638	struct hlist_node *node;
639	struct hlist_head *head;
640	int dumped = 0;
641
642	if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
643	    ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
644		return ip_rt_dump(skb, cb);
645
646	s_h = cb->args[0];
647	s_e = cb->args[1];
648
649	for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
650		e = 0;
651		head = &net->ipv4.fib_table_hash[h];
652		hlist_for_each_entry(tb, node, head, tb_hlist) {
653			if (e < s_e)
654				goto next;
655			if (dumped)
656				memset(&cb->args[2], 0, sizeof(cb->args) -
657						 2 * sizeof(cb->args[0]));
658			if (fib_table_dump(tb, skb, cb) < 0)
659				goto out;
660			dumped = 1;
661next:
662			e++;
663		}
664	}
665out:
666	cb->args[1] = e;
667	cb->args[0] = h;
668
669	return skb->len;
670}
671
672/* Prepare and feed intra-kernel routing request.
673   Really, it should be netlink message, but :-( netlink
674   can be not configured, so that we feed it directly
675   to fib engine. It is legal, because all events occur
676   only when netlink is already locked.
677 */
678
679static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
680{
681	struct net *net = dev_net(ifa->ifa_dev->dev);
682	struct fib_table *tb;
683	struct fib_config cfg = {
684		.fc_protocol = RTPROT_KERNEL,
685		.fc_type = type,
686		.fc_dst = dst,
687		.fc_dst_len = dst_len,
688		.fc_prefsrc = ifa->ifa_local,
689		.fc_oif = ifa->ifa_dev->dev->ifindex,
690		.fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
691		.fc_nlinfo = {
692			.nl_net = net,
693		},
694	};
695
696	if (type == RTN_UNICAST)
697		tb = fib_new_table(net, RT_TABLE_MAIN);
698	else
699		tb = fib_new_table(net, RT_TABLE_LOCAL);
700
701	if (tb == NULL)
702		return;
703
704	cfg.fc_table = tb->tb_id;
705
706	if (type != RTN_LOCAL)
707		cfg.fc_scope = RT_SCOPE_LINK;
708	else
709		cfg.fc_scope = RT_SCOPE_HOST;
710
711	if (cmd == RTM_NEWROUTE)
712		fib_table_insert(tb, &cfg);
713	else
714		fib_table_delete(tb, &cfg);
715}
716
717void fib_add_ifaddr(struct in_ifaddr *ifa)
718{
719	struct in_device *in_dev = ifa->ifa_dev;
720	struct net_device *dev = in_dev->dev;
721	struct in_ifaddr *prim = ifa;
722	__be32 mask = ifa->ifa_mask;
723	__be32 addr = ifa->ifa_local;
724	__be32 prefix = ifa->ifa_address&mask;
725
726	if (ifa->ifa_flags&IFA_F_SECONDARY) {
727		prim = inet_ifa_byprefix(in_dev, prefix, mask);
728		if (prim == NULL) {
729			printk(KERN_WARNING "fib_add_ifaddr: bug: prim == NULL\n");
730			return;
731		}
732	}
733
734	fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
735
736	if (!(dev->flags&IFF_UP))
737		return;
738
739	/* Add broadcast address, if it is explicitly assigned. */
740	if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
741		fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
742
743	if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
744	    (prefix != addr || ifa->ifa_prefixlen < 32)) {
745		fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
746			  RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim);
747
748		/* Add network specific broadcasts, when it takes a sense */
749		if (ifa->ifa_prefixlen < 31) {
750			fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
751			fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim);
752		}
753	}
754}
755
756static void fib_del_ifaddr(struct in_ifaddr *ifa)
757{
758	struct in_device *in_dev = ifa->ifa_dev;
759	struct net_device *dev = in_dev->dev;
760	struct in_ifaddr *ifa1;
761	struct in_ifaddr *prim = ifa;
762	__be32 brd = ifa->ifa_address|~ifa->ifa_mask;
763	__be32 any = ifa->ifa_address&ifa->ifa_mask;
764#define LOCAL_OK	1
765#define BRD_OK		2
766#define BRD0_OK		4
767#define BRD1_OK		8
768	unsigned ok = 0;
769
770	if (!(ifa->ifa_flags&IFA_F_SECONDARY))
771		fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
772			  RTN_UNICAST, any, ifa->ifa_prefixlen, prim);
773	else {
774		prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
775		if (prim == NULL) {
776			printk(KERN_WARNING "fib_del_ifaddr: bug: prim == NULL\n");
777			return;
778		}
779	}
780
781	/* Deletion is more complicated than add.
782	   We should take care of not to delete too much :-)
783
784	   Scan address list to be sure that addresses are really gone.
785	 */
786
787	for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
788		if (ifa->ifa_local == ifa1->ifa_local)
789			ok |= LOCAL_OK;
790		if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
791			ok |= BRD_OK;
792		if (brd == ifa1->ifa_broadcast)
793			ok |= BRD1_OK;
794		if (any == ifa1->ifa_broadcast)
795			ok |= BRD0_OK;
796	}
797
798	if (!(ok&BRD_OK))
799		fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
800	if (!(ok&BRD1_OK))
801		fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
802	if (!(ok&BRD0_OK))
803		fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
804	if (!(ok&LOCAL_OK)) {
805		fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
806
807		/* Check, that this local address finally disappeared. */
808		if (inet_addr_type(dev_net(dev), ifa->ifa_local) != RTN_LOCAL) {
809			/* And the last, but not the least thing.
810			   We must flush stray FIB entries.
811
812			   First of all, we scan fib_info list searching
813			   for stray nexthop entries, then ignite fib_flush.
814			*/
815			if (fib_sync_down_addr(dev_net(dev), ifa->ifa_local))
816				fib_flush(dev_net(dev));
817		}
818	}
819#undef LOCAL_OK
820#undef BRD_OK
821#undef BRD0_OK
822#undef BRD1_OK
823}
824
825static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
826{
827
828	struct fib_result       res;
829	struct flowi            fl = { .mark = frn->fl_mark,
830				       .nl_u = { .ip4_u = { .daddr = frn->fl_addr,
831							    .tos = frn->fl_tos,
832							    .scope = frn->fl_scope } } };
833
834#ifdef CONFIG_IP_MULTIPLE_TABLES
835	res.r = NULL;
836#endif
837
838	frn->err = -ENOENT;
839	if (tb) {
840		local_bh_disable();
841
842		frn->tb_id = tb->tb_id;
843		frn->err = fib_table_lookup(tb, &fl, &res);
844
845		if (!frn->err) {
846			frn->prefixlen = res.prefixlen;
847			frn->nh_sel = res.nh_sel;
848			frn->type = res.type;
849			frn->scope = res.scope;
850			fib_res_put(&res);
851		}
852		local_bh_enable();
853	}
854}
855
856static void nl_fib_input(struct sk_buff *skb)
857{
858	struct net *net;
859	struct fib_result_nl *frn;
860	struct nlmsghdr *nlh;
861	struct fib_table *tb;
862	u32 pid;
863
864	net = sock_net(skb->sk);
865	nlh = nlmsg_hdr(skb);
866	if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
867	    nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn)))
868		return;
869
870	skb = skb_clone(skb, GFP_KERNEL);
871	if (skb == NULL)
872		return;
873	nlh = nlmsg_hdr(skb);
874
875	frn = (struct fib_result_nl *) NLMSG_DATA(nlh);
876	tb = fib_get_table(net, frn->tb_id_in);
877
878	nl_fib_lookup(frn, tb);
879
880	pid = NETLINK_CB(skb).pid;       /* pid of sending process */
881	NETLINK_CB(skb).pid = 0;         /* from kernel */
882	NETLINK_CB(skb).dst_group = 0;  /* unicast */
883	netlink_unicast(net->ipv4.fibnl, skb, pid, MSG_DONTWAIT);
884}
885
886static int nl_fib_lookup_init(struct net *net)
887{
888	struct sock *sk;
889	sk = netlink_kernel_create(net, NETLINK_FIB_LOOKUP, 0,
890				   nl_fib_input, NULL, THIS_MODULE);
891	if (sk == NULL)
892		return -EAFNOSUPPORT;
893	net->ipv4.fibnl = sk;
894	return 0;
895}
896
897static void nl_fib_lookup_exit(struct net *net)
898{
899	netlink_kernel_release(net->ipv4.fibnl);
900	net->ipv4.fibnl = NULL;
901}
902
903static void fib_disable_ip(struct net_device *dev, int force, int delay)
904{
905	if (fib_sync_down_dev(dev, force))
906		fib_flush(dev_net(dev));
907	rt_cache_flush(dev_net(dev), delay);
908	arp_ifdown(dev);
909}
910
911static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
912{
913	struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
914	struct net_device *dev = ifa->ifa_dev->dev;
915
916	switch (event) {
917	case NETDEV_UP:
918		fib_add_ifaddr(ifa);
919#ifdef CONFIG_IP_ROUTE_MULTIPATH
920		fib_sync_up(dev);
921#endif
922		rt_cache_flush(dev_net(dev), -1);
923		break;
924	case NETDEV_DOWN:
925		fib_del_ifaddr(ifa);
926		if (ifa->ifa_dev->ifa_list == NULL) {
927			/* Last address was deleted from this interface.
928			   Disable IP.
929			 */
930			fib_disable_ip(dev, 1, 0);
931		} else {
932			rt_cache_flush(dev_net(dev), -1);
933		}
934		break;
935	}
936	return NOTIFY_DONE;
937}
938
939static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
940{
941	struct net_device *dev = ptr;
942	struct in_device *in_dev = __in_dev_get_rtnl(dev);
943
944	if (event == NETDEV_UNREGISTER) {
945		fib_disable_ip(dev, 2, -1);
946		return NOTIFY_DONE;
947	}
948
949	if (!in_dev)
950		return NOTIFY_DONE;
951
952	switch (event) {
953	case NETDEV_UP:
954		for_ifa(in_dev) {
955			fib_add_ifaddr(ifa);
956		} endfor_ifa(in_dev);
957#ifdef CONFIG_IP_ROUTE_MULTIPATH
958		fib_sync_up(dev);
959#endif
960		rt_cache_flush(dev_net(dev), -1);
961		break;
962	case NETDEV_DOWN:
963		fib_disable_ip(dev, 0, 0);
964		break;
965	case NETDEV_CHANGEMTU:
966	case NETDEV_CHANGE:
967		rt_cache_flush(dev_net(dev), 0);
968		break;
969	case NETDEV_UNREGISTER_BATCH:
970		rt_cache_flush_batch();
971		break;
972	}
973	return NOTIFY_DONE;
974}
975
976static struct notifier_block fib_inetaddr_notifier = {
977	.notifier_call = fib_inetaddr_event,
978};
979
980static struct notifier_block fib_netdev_notifier = {
981	.notifier_call = fib_netdev_event,
982};
983
984static int __net_init ip_fib_net_init(struct net *net)
985{
986	int err;
987	unsigned int i;
988
989	net->ipv4.fib_table_hash = kzalloc(
990			sizeof(struct hlist_head)*FIB_TABLE_HASHSZ, GFP_KERNEL);
991	if (net->ipv4.fib_table_hash == NULL)
992		return -ENOMEM;
993
994	for (i = 0; i < FIB_TABLE_HASHSZ; i++)
995		INIT_HLIST_HEAD(&net->ipv4.fib_table_hash[i]);
996
997	err = fib4_rules_init(net);
998	if (err < 0)
999		goto fail;
1000	return 0;
1001
1002fail:
1003	kfree(net->ipv4.fib_table_hash);
1004	return err;
1005}
1006
1007static void __net_exit ip_fib_net_exit(struct net *net)
1008{
1009	unsigned int i;
1010
1011#ifdef CONFIG_IP_MULTIPLE_TABLES
1012	fib4_rules_exit(net);
1013#endif
1014
1015	for (i = 0; i < FIB_TABLE_HASHSZ; i++) {
1016		struct fib_table *tb;
1017		struct hlist_head *head;
1018		struct hlist_node *node, *tmp;
1019
1020		head = &net->ipv4.fib_table_hash[i];
1021		hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) {
1022			hlist_del(node);
1023			fib_table_flush(tb);
1024			kfree(tb);
1025		}
1026	}
1027	kfree(net->ipv4.fib_table_hash);
1028}
1029
1030static int __net_init fib_net_init(struct net *net)
1031{
1032	int error;
1033
1034	error = ip_fib_net_init(net);
1035	if (error < 0)
1036		goto out;
1037	error = nl_fib_lookup_init(net);
1038	if (error < 0)
1039		goto out_nlfl;
1040	error = fib_proc_init(net);
1041	if (error < 0)
1042		goto out_proc;
1043out:
1044	return error;
1045
1046out_proc:
1047	nl_fib_lookup_exit(net);
1048out_nlfl:
1049	ip_fib_net_exit(net);
1050	goto out;
1051}
1052
1053static void __net_exit fib_net_exit(struct net *net)
1054{
1055	fib_proc_exit(net);
1056	nl_fib_lookup_exit(net);
1057	ip_fib_net_exit(net);
1058}
1059
1060static struct pernet_operations fib_net_ops = {
1061	.init = fib_net_init,
1062	.exit = fib_net_exit,
1063};
1064
1065void __init ip_fib_init(void)
1066{
1067	rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL);
1068	rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL);
1069	rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib);
1070
1071	register_pernet_subsys(&fib_net_ops);
1072	register_netdevice_notifier(&fib_netdev_notifier);
1073	register_inetaddr_notifier(&fib_inetaddr_notifier);
1074
1075	fib_hash_init();
1076}
1077
1078EXPORT_SYMBOL(inet_addr_type);
1079EXPORT_SYMBOL(inet_dev_addr_type);
1080EXPORT_SYMBOL(ip_dev_find);
1081