fib_frontend.c revision 16c6cf8bb471392fd09b48b7c27e7d83a446b4bc
1/*
2 * INET		An implementation of the TCP/IP protocol suite for the LINUX
3 *		operating system.  INET is implemented using the  BSD Socket
4 *		interface as the means of communication with the user level.
5 *
6 *		IPv4 Forwarding Information Base: FIB frontend.
7 *
8 * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
9 *
10 *		This program is free software; you can redistribute it and/or
11 *		modify it under the terms of the GNU General Public License
12 *		as published by the Free Software Foundation; either version
13 *		2 of the License, or (at your option) any later version.
14 */
15
16#include <linux/module.h>
17#include <asm/uaccess.h>
18#include <asm/system.h>
19#include <linux/bitops.h>
20#include <linux/capability.h>
21#include <linux/types.h>
22#include <linux/kernel.h>
23#include <linux/mm.h>
24#include <linux/string.h>
25#include <linux/socket.h>
26#include <linux/sockios.h>
27#include <linux/errno.h>
28#include <linux/in.h>
29#include <linux/inet.h>
30#include <linux/inetdevice.h>
31#include <linux/netdevice.h>
32#include <linux/if_addr.h>
33#include <linux/if_arp.h>
34#include <linux/skbuff.h>
35#include <linux/init.h>
36#include <linux/list.h>
37
38#include <net/ip.h>
39#include <net/protocol.h>
40#include <net/route.h>
41#include <net/tcp.h>
42#include <net/sock.h>
43#include <net/arp.h>
44#include <net/ip_fib.h>
45#include <net/rtnetlink.h>
46
47#ifndef CONFIG_IP_MULTIPLE_TABLES
48
49static int __net_init fib4_rules_init(struct net *net)
50{
51	struct fib_table *local_table, *main_table;
52
53	local_table = fib_hash_table(RT_TABLE_LOCAL);
54	if (local_table == NULL)
55		return -ENOMEM;
56
57	main_table  = fib_hash_table(RT_TABLE_MAIN);
58	if (main_table == NULL)
59		goto fail;
60
61	hlist_add_head_rcu(&local_table->tb_hlist,
62				&net->ipv4.fib_table_hash[TABLE_LOCAL_INDEX]);
63	hlist_add_head_rcu(&main_table->tb_hlist,
64				&net->ipv4.fib_table_hash[TABLE_MAIN_INDEX]);
65	return 0;
66
67fail:
68	kfree(local_table);
69	return -ENOMEM;
70}
71#else
72
73struct fib_table *fib_new_table(struct net *net, u32 id)
74{
75	struct fib_table *tb;
76	unsigned int h;
77
78	if (id == 0)
79		id = RT_TABLE_MAIN;
80	tb = fib_get_table(net, id);
81	if (tb)
82		return tb;
83
84	tb = fib_hash_table(id);
85	if (!tb)
86		return NULL;
87	h = id & (FIB_TABLE_HASHSZ - 1);
88	hlist_add_head_rcu(&tb->tb_hlist, &net->ipv4.fib_table_hash[h]);
89	return tb;
90}
91
92struct fib_table *fib_get_table(struct net *net, u32 id)
93{
94	struct fib_table *tb;
95	struct hlist_node *node;
96	struct hlist_head *head;
97	unsigned int h;
98
99	if (id == 0)
100		id = RT_TABLE_MAIN;
101	h = id & (FIB_TABLE_HASHSZ - 1);
102
103	rcu_read_lock();
104	head = &net->ipv4.fib_table_hash[h];
105	hlist_for_each_entry_rcu(tb, node, head, tb_hlist) {
106		if (tb->tb_id == id) {
107			rcu_read_unlock();
108			return tb;
109		}
110	}
111	rcu_read_unlock();
112	return NULL;
113}
114#endif /* CONFIG_IP_MULTIPLE_TABLES */
115
116void fib_select_default(struct net *net,
117			const struct flowi *flp, struct fib_result *res)
118{
119	struct fib_table *tb;
120	int table = RT_TABLE_MAIN;
121#ifdef CONFIG_IP_MULTIPLE_TABLES
122	if (res->r == NULL || res->r->action != FR_ACT_TO_TBL)
123		return;
124	table = res->r->table;
125#endif
126	tb = fib_get_table(net, table);
127	if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
128		fib_table_select_default(tb, flp, res);
129}
130
131static void fib_flush(struct net *net)
132{
133	int flushed = 0;
134	struct fib_table *tb;
135	struct hlist_node *node;
136	struct hlist_head *head;
137	unsigned int h;
138
139	for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
140		head = &net->ipv4.fib_table_hash[h];
141		hlist_for_each_entry(tb, node, head, tb_hlist)
142			flushed += fib_table_flush(tb);
143	}
144
145	if (flushed)
146		rt_cache_flush(net, -1);
147}
148
149/*
150 *	Find the first device with a given source address.
151 */
152
153struct net_device * ip_dev_find(struct net *net, __be32 addr)
154{
155	struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
156	struct fib_result res;
157	struct net_device *dev = NULL;
158	struct fib_table *local_table;
159
160#ifdef CONFIG_IP_MULTIPLE_TABLES
161	res.r = NULL;
162#endif
163
164	local_table = fib_get_table(net, RT_TABLE_LOCAL);
165	if (!local_table || fib_table_lookup(local_table, &fl, &res))
166		return NULL;
167	if (res.type != RTN_LOCAL)
168		goto out;
169	dev = FIB_RES_DEV(res);
170
171	if (dev)
172		dev_hold(dev);
173out:
174	fib_res_put(&res);
175	return dev;
176}
177
178/*
179 * Find address type as if only "dev" was present in the system. If
180 * on_dev is NULL then all interfaces are taken into consideration.
181 */
182static inline unsigned __inet_dev_addr_type(struct net *net,
183					    const struct net_device *dev,
184					    __be32 addr)
185{
186	struct flowi		fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
187	struct fib_result	res;
188	unsigned ret = RTN_BROADCAST;
189	struct fib_table *local_table;
190
191	if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr))
192		return RTN_BROADCAST;
193	if (ipv4_is_multicast(addr))
194		return RTN_MULTICAST;
195
196#ifdef CONFIG_IP_MULTIPLE_TABLES
197	res.r = NULL;
198#endif
199
200	local_table = fib_get_table(net, RT_TABLE_LOCAL);
201	if (local_table) {
202		ret = RTN_UNICAST;
203		if (!fib_table_lookup(local_table, &fl, &res)) {
204			if (!dev || dev == res.fi->fib_dev)
205				ret = res.type;
206			fib_res_put(&res);
207		}
208	}
209	return ret;
210}
211
212unsigned int inet_addr_type(struct net *net, __be32 addr)
213{
214	return __inet_dev_addr_type(net, NULL, addr);
215}
216
217unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev,
218				__be32 addr)
219{
220       return __inet_dev_addr_type(net, dev, addr);
221}
222
223/* Given (packet source, input interface) and optional (dst, oif, tos):
224   - (main) check, that source is valid i.e. not broadcast or our local
225     address.
226   - figure out what "logical" interface this packet arrived
227     and calculate "specific destination" address.
228   - check, that packet arrived from expected physical interface.
229 */
230
231int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
232			struct net_device *dev, __be32 *spec_dst, u32 *itag)
233{
234	struct in_device *in_dev;
235	struct flowi fl = { .nl_u = { .ip4_u =
236				      { .daddr = src,
237					.saddr = dst,
238					.tos = tos } },
239			    .iif = oif };
240	struct fib_result res;
241	int no_addr, rpf;
242	int ret;
243	struct net *net;
244
245	no_addr = rpf = 0;
246	rcu_read_lock();
247	in_dev = __in_dev_get_rcu(dev);
248	if (in_dev) {
249		no_addr = in_dev->ifa_list == NULL;
250		rpf = IN_DEV_RPFILTER(in_dev);
251	}
252	rcu_read_unlock();
253
254	if (in_dev == NULL)
255		goto e_inval;
256
257	net = dev_net(dev);
258	if (fib_lookup(net, &fl, &res))
259		goto last_resort;
260	if (res.type != RTN_UNICAST)
261		goto e_inval_res;
262	*spec_dst = FIB_RES_PREFSRC(res);
263	fib_combine_itag(itag, &res);
264#ifdef CONFIG_IP_ROUTE_MULTIPATH
265	if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1)
266#else
267	if (FIB_RES_DEV(res) == dev)
268#endif
269	{
270		ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
271		fib_res_put(&res);
272		return ret;
273	}
274	fib_res_put(&res);
275	if (no_addr)
276		goto last_resort;
277	if (rpf == 1)
278		goto e_inval;
279	fl.oif = dev->ifindex;
280
281	ret = 0;
282	if (fib_lookup(net, &fl, &res) == 0) {
283		if (res.type == RTN_UNICAST) {
284			*spec_dst = FIB_RES_PREFSRC(res);
285			ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
286		}
287		fib_res_put(&res);
288	}
289	return ret;
290
291last_resort:
292	if (rpf)
293		goto e_inval;
294	*spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
295	*itag = 0;
296	return 0;
297
298e_inval_res:
299	fib_res_put(&res);
300e_inval:
301	return -EINVAL;
302}
303
304static inline __be32 sk_extract_addr(struct sockaddr *addr)
305{
306	return ((struct sockaddr_in *) addr)->sin_addr.s_addr;
307}
308
309static int put_rtax(struct nlattr *mx, int len, int type, u32 value)
310{
311	struct nlattr *nla;
312
313	nla = (struct nlattr *) ((char *) mx + len);
314	nla->nla_type = type;
315	nla->nla_len = nla_attr_size(4);
316	*(u32 *) nla_data(nla) = value;
317
318	return len + nla_total_size(4);
319}
320
321static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt,
322				 struct fib_config *cfg)
323{
324	__be32 addr;
325	int plen;
326
327	memset(cfg, 0, sizeof(*cfg));
328	cfg->fc_nlinfo.nl_net = net;
329
330	if (rt->rt_dst.sa_family != AF_INET)
331		return -EAFNOSUPPORT;
332
333	/*
334	 * Check mask for validity:
335	 * a) it must be contiguous.
336	 * b) destination must have all host bits clear.
337	 * c) if application forgot to set correct family (AF_INET),
338	 *    reject request unless it is absolutely clear i.e.
339	 *    both family and mask are zero.
340	 */
341	plen = 32;
342	addr = sk_extract_addr(&rt->rt_dst);
343	if (!(rt->rt_flags & RTF_HOST)) {
344		__be32 mask = sk_extract_addr(&rt->rt_genmask);
345
346		if (rt->rt_genmask.sa_family != AF_INET) {
347			if (mask || rt->rt_genmask.sa_family)
348				return -EAFNOSUPPORT;
349		}
350
351		if (bad_mask(mask, addr))
352			return -EINVAL;
353
354		plen = inet_mask_len(mask);
355	}
356
357	cfg->fc_dst_len = plen;
358	cfg->fc_dst = addr;
359
360	if (cmd != SIOCDELRT) {
361		cfg->fc_nlflags = NLM_F_CREATE;
362		cfg->fc_protocol = RTPROT_BOOT;
363	}
364
365	if (rt->rt_metric)
366		cfg->fc_priority = rt->rt_metric - 1;
367
368	if (rt->rt_flags & RTF_REJECT) {
369		cfg->fc_scope = RT_SCOPE_HOST;
370		cfg->fc_type = RTN_UNREACHABLE;
371		return 0;
372	}
373
374	cfg->fc_scope = RT_SCOPE_NOWHERE;
375	cfg->fc_type = RTN_UNICAST;
376
377	if (rt->rt_dev) {
378		char *colon;
379		struct net_device *dev;
380		char devname[IFNAMSIZ];
381
382		if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1))
383			return -EFAULT;
384
385		devname[IFNAMSIZ-1] = 0;
386		colon = strchr(devname, ':');
387		if (colon)
388			*colon = 0;
389		dev = __dev_get_by_name(net, devname);
390		if (!dev)
391			return -ENODEV;
392		cfg->fc_oif = dev->ifindex;
393		if (colon) {
394			struct in_ifaddr *ifa;
395			struct in_device *in_dev = __in_dev_get_rtnl(dev);
396			if (!in_dev)
397				return -ENODEV;
398			*colon = ':';
399			for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
400				if (strcmp(ifa->ifa_label, devname) == 0)
401					break;
402			if (ifa == NULL)
403				return -ENODEV;
404			cfg->fc_prefsrc = ifa->ifa_local;
405		}
406	}
407
408	addr = sk_extract_addr(&rt->rt_gateway);
409	if (rt->rt_gateway.sa_family == AF_INET && addr) {
410		cfg->fc_gw = addr;
411		if (rt->rt_flags & RTF_GATEWAY &&
412		    inet_addr_type(net, addr) == RTN_UNICAST)
413			cfg->fc_scope = RT_SCOPE_UNIVERSE;
414	}
415
416	if (cmd == SIOCDELRT)
417		return 0;
418
419	if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw)
420		return -EINVAL;
421
422	if (cfg->fc_scope == RT_SCOPE_NOWHERE)
423		cfg->fc_scope = RT_SCOPE_LINK;
424
425	if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) {
426		struct nlattr *mx;
427		int len = 0;
428
429		mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL);
430		if (mx == NULL)
431			return -ENOMEM;
432
433		if (rt->rt_flags & RTF_MTU)
434			len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40);
435
436		if (rt->rt_flags & RTF_WINDOW)
437			len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window);
438
439		if (rt->rt_flags & RTF_IRTT)
440			len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3);
441
442		cfg->fc_mx = mx;
443		cfg->fc_mx_len = len;
444	}
445
446	return 0;
447}
448
449/*
450 *	Handle IP routing ioctl calls. These are used to manipulate the routing tables
451 */
452
453int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg)
454{
455	struct fib_config cfg;
456	struct rtentry rt;
457	int err;
458
459	switch (cmd) {
460	case SIOCADDRT:		/* Add a route */
461	case SIOCDELRT:		/* Delete a route */
462		if (!capable(CAP_NET_ADMIN))
463			return -EPERM;
464
465		if (copy_from_user(&rt, arg, sizeof(rt)))
466			return -EFAULT;
467
468		rtnl_lock();
469		err = rtentry_to_fib_config(net, cmd, &rt, &cfg);
470		if (err == 0) {
471			struct fib_table *tb;
472
473			if (cmd == SIOCDELRT) {
474				tb = fib_get_table(net, cfg.fc_table);
475				if (tb)
476					err = fib_table_delete(tb, &cfg);
477				else
478					err = -ESRCH;
479			} else {
480				tb = fib_new_table(net, cfg.fc_table);
481				if (tb)
482					err = fib_table_insert(tb, &cfg);
483				else
484					err = -ENOBUFS;
485			}
486
487			/* allocated by rtentry_to_fib_config() */
488			kfree(cfg.fc_mx);
489		}
490		rtnl_unlock();
491		return err;
492	}
493	return -EINVAL;
494}
495
496const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = {
497	[RTA_DST]		= { .type = NLA_U32 },
498	[RTA_SRC]		= { .type = NLA_U32 },
499	[RTA_IIF]		= { .type = NLA_U32 },
500	[RTA_OIF]		= { .type = NLA_U32 },
501	[RTA_GATEWAY]		= { .type = NLA_U32 },
502	[RTA_PRIORITY]		= { .type = NLA_U32 },
503	[RTA_PREFSRC]		= { .type = NLA_U32 },
504	[RTA_METRICS]		= { .type = NLA_NESTED },
505	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
506	[RTA_FLOW]		= { .type = NLA_U32 },
507};
508
509static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
510			    struct nlmsghdr *nlh, struct fib_config *cfg)
511{
512	struct nlattr *attr;
513	int err, remaining;
514	struct rtmsg *rtm;
515
516	err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy);
517	if (err < 0)
518		goto errout;
519
520	memset(cfg, 0, sizeof(*cfg));
521
522	rtm = nlmsg_data(nlh);
523	cfg->fc_dst_len = rtm->rtm_dst_len;
524	cfg->fc_tos = rtm->rtm_tos;
525	cfg->fc_table = rtm->rtm_table;
526	cfg->fc_protocol = rtm->rtm_protocol;
527	cfg->fc_scope = rtm->rtm_scope;
528	cfg->fc_type = rtm->rtm_type;
529	cfg->fc_flags = rtm->rtm_flags;
530	cfg->fc_nlflags = nlh->nlmsg_flags;
531
532	cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
533	cfg->fc_nlinfo.nlh = nlh;
534	cfg->fc_nlinfo.nl_net = net;
535
536	if (cfg->fc_type > RTN_MAX) {
537		err = -EINVAL;
538		goto errout;
539	}
540
541	nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) {
542		switch (nla_type(attr)) {
543		case RTA_DST:
544			cfg->fc_dst = nla_get_be32(attr);
545			break;
546		case RTA_OIF:
547			cfg->fc_oif = nla_get_u32(attr);
548			break;
549		case RTA_GATEWAY:
550			cfg->fc_gw = nla_get_be32(attr);
551			break;
552		case RTA_PRIORITY:
553			cfg->fc_priority = nla_get_u32(attr);
554			break;
555		case RTA_PREFSRC:
556			cfg->fc_prefsrc = nla_get_be32(attr);
557			break;
558		case RTA_METRICS:
559			cfg->fc_mx = nla_data(attr);
560			cfg->fc_mx_len = nla_len(attr);
561			break;
562		case RTA_MULTIPATH:
563			cfg->fc_mp = nla_data(attr);
564			cfg->fc_mp_len = nla_len(attr);
565			break;
566		case RTA_FLOW:
567			cfg->fc_flow = nla_get_u32(attr);
568			break;
569		case RTA_TABLE:
570			cfg->fc_table = nla_get_u32(attr);
571			break;
572		}
573	}
574
575	return 0;
576errout:
577	return err;
578}
579
580static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
581{
582	struct net *net = sock_net(skb->sk);
583	struct fib_config cfg;
584	struct fib_table *tb;
585	int err;
586
587	err = rtm_to_fib_config(net, skb, nlh, &cfg);
588	if (err < 0)
589		goto errout;
590
591	tb = fib_get_table(net, cfg.fc_table);
592	if (tb == NULL) {
593		err = -ESRCH;
594		goto errout;
595	}
596
597	err = fib_table_delete(tb, &cfg);
598errout:
599	return err;
600}
601
602static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
603{
604	struct net *net = sock_net(skb->sk);
605	struct fib_config cfg;
606	struct fib_table *tb;
607	int err;
608
609	err = rtm_to_fib_config(net, skb, nlh, &cfg);
610	if (err < 0)
611		goto errout;
612
613	tb = fib_new_table(net, cfg.fc_table);
614	if (tb == NULL) {
615		err = -ENOBUFS;
616		goto errout;
617	}
618
619	err = fib_table_insert(tb, &cfg);
620errout:
621	return err;
622}
623
624static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
625{
626	struct net *net = sock_net(skb->sk);
627	unsigned int h, s_h;
628	unsigned int e = 0, s_e;
629	struct fib_table *tb;
630	struct hlist_node *node;
631	struct hlist_head *head;
632	int dumped = 0;
633
634	if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
635	    ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
636		return ip_rt_dump(skb, cb);
637
638	s_h = cb->args[0];
639	s_e = cb->args[1];
640
641	for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
642		e = 0;
643		head = &net->ipv4.fib_table_hash[h];
644		hlist_for_each_entry(tb, node, head, tb_hlist) {
645			if (e < s_e)
646				goto next;
647			if (dumped)
648				memset(&cb->args[2], 0, sizeof(cb->args) -
649						 2 * sizeof(cb->args[0]));
650			if (fib_table_dump(tb, skb, cb) < 0)
651				goto out;
652			dumped = 1;
653next:
654			e++;
655		}
656	}
657out:
658	cb->args[1] = e;
659	cb->args[0] = h;
660
661	return skb->len;
662}
663
664/* Prepare and feed intra-kernel routing request.
665   Really, it should be netlink message, but :-( netlink
666   can be not configured, so that we feed it directly
667   to fib engine. It is legal, because all events occur
668   only when netlink is already locked.
669 */
670
671static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
672{
673	struct net *net = dev_net(ifa->ifa_dev->dev);
674	struct fib_table *tb;
675	struct fib_config cfg = {
676		.fc_protocol = RTPROT_KERNEL,
677		.fc_type = type,
678		.fc_dst = dst,
679		.fc_dst_len = dst_len,
680		.fc_prefsrc = ifa->ifa_local,
681		.fc_oif = ifa->ifa_dev->dev->ifindex,
682		.fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
683		.fc_nlinfo = {
684			.nl_net = net,
685		},
686	};
687
688	if (type == RTN_UNICAST)
689		tb = fib_new_table(net, RT_TABLE_MAIN);
690	else
691		tb = fib_new_table(net, RT_TABLE_LOCAL);
692
693	if (tb == NULL)
694		return;
695
696	cfg.fc_table = tb->tb_id;
697
698	if (type != RTN_LOCAL)
699		cfg.fc_scope = RT_SCOPE_LINK;
700	else
701		cfg.fc_scope = RT_SCOPE_HOST;
702
703	if (cmd == RTM_NEWROUTE)
704		fib_table_insert(tb, &cfg);
705	else
706		fib_table_delete(tb, &cfg);
707}
708
709void fib_add_ifaddr(struct in_ifaddr *ifa)
710{
711	struct in_device *in_dev = ifa->ifa_dev;
712	struct net_device *dev = in_dev->dev;
713	struct in_ifaddr *prim = ifa;
714	__be32 mask = ifa->ifa_mask;
715	__be32 addr = ifa->ifa_local;
716	__be32 prefix = ifa->ifa_address&mask;
717
718	if (ifa->ifa_flags&IFA_F_SECONDARY) {
719		prim = inet_ifa_byprefix(in_dev, prefix, mask);
720		if (prim == NULL) {
721			printk(KERN_WARNING "fib_add_ifaddr: bug: prim == NULL\n");
722			return;
723		}
724	}
725
726	fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
727
728	if (!(dev->flags&IFF_UP))
729		return;
730
731	/* Add broadcast address, if it is explicitly assigned. */
732	if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
733		fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
734
735	if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
736	    (prefix != addr || ifa->ifa_prefixlen < 32)) {
737		fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
738			  RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim);
739
740		/* Add network specific broadcasts, when it takes a sense */
741		if (ifa->ifa_prefixlen < 31) {
742			fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
743			fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim);
744		}
745	}
746}
747
748static void fib_del_ifaddr(struct in_ifaddr *ifa)
749{
750	struct in_device *in_dev = ifa->ifa_dev;
751	struct net_device *dev = in_dev->dev;
752	struct in_ifaddr *ifa1;
753	struct in_ifaddr *prim = ifa;
754	__be32 brd = ifa->ifa_address|~ifa->ifa_mask;
755	__be32 any = ifa->ifa_address&ifa->ifa_mask;
756#define LOCAL_OK	1
757#define BRD_OK		2
758#define BRD0_OK		4
759#define BRD1_OK		8
760	unsigned ok = 0;
761
762	if (!(ifa->ifa_flags&IFA_F_SECONDARY))
763		fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
764			  RTN_UNICAST, any, ifa->ifa_prefixlen, prim);
765	else {
766		prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
767		if (prim == NULL) {
768			printk(KERN_WARNING "fib_del_ifaddr: bug: prim == NULL\n");
769			return;
770		}
771	}
772
773	/* Deletion is more complicated than add.
774	   We should take care of not to delete too much :-)
775
776	   Scan address list to be sure that addresses are really gone.
777	 */
778
779	for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
780		if (ifa->ifa_local == ifa1->ifa_local)
781			ok |= LOCAL_OK;
782		if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
783			ok |= BRD_OK;
784		if (brd == ifa1->ifa_broadcast)
785			ok |= BRD1_OK;
786		if (any == ifa1->ifa_broadcast)
787			ok |= BRD0_OK;
788	}
789
790	if (!(ok&BRD_OK))
791		fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
792	if (!(ok&BRD1_OK))
793		fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
794	if (!(ok&BRD0_OK))
795		fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
796	if (!(ok&LOCAL_OK)) {
797		fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
798
799		/* Check, that this local address finally disappeared. */
800		if (inet_addr_type(dev_net(dev), ifa->ifa_local) != RTN_LOCAL) {
801			/* And the last, but not the least thing.
802			   We must flush stray FIB entries.
803
804			   First of all, we scan fib_info list searching
805			   for stray nexthop entries, then ignite fib_flush.
806			*/
807			if (fib_sync_down_addr(dev_net(dev), ifa->ifa_local))
808				fib_flush(dev_net(dev));
809		}
810	}
811#undef LOCAL_OK
812#undef BRD_OK
813#undef BRD0_OK
814#undef BRD1_OK
815}
816
817static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
818{
819
820	struct fib_result       res;
821	struct flowi            fl = { .mark = frn->fl_mark,
822				       .nl_u = { .ip4_u = { .daddr = frn->fl_addr,
823							    .tos = frn->fl_tos,
824							    .scope = frn->fl_scope } } };
825
826#ifdef CONFIG_IP_MULTIPLE_TABLES
827	res.r = NULL;
828#endif
829
830	frn->err = -ENOENT;
831	if (tb) {
832		local_bh_disable();
833
834		frn->tb_id = tb->tb_id;
835		frn->err = fib_table_lookup(tb, &fl, &res);
836
837		if (!frn->err) {
838			frn->prefixlen = res.prefixlen;
839			frn->nh_sel = res.nh_sel;
840			frn->type = res.type;
841			frn->scope = res.scope;
842			fib_res_put(&res);
843		}
844		local_bh_enable();
845	}
846}
847
848static void nl_fib_input(struct sk_buff *skb)
849{
850	struct net *net;
851	struct fib_result_nl *frn;
852	struct nlmsghdr *nlh;
853	struct fib_table *tb;
854	u32 pid;
855
856	net = sock_net(skb->sk);
857	nlh = nlmsg_hdr(skb);
858	if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
859	    nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn)))
860		return;
861
862	skb = skb_clone(skb, GFP_KERNEL);
863	if (skb == NULL)
864		return;
865	nlh = nlmsg_hdr(skb);
866
867	frn = (struct fib_result_nl *) NLMSG_DATA(nlh);
868	tb = fib_get_table(net, frn->tb_id_in);
869
870	nl_fib_lookup(frn, tb);
871
872	pid = NETLINK_CB(skb).pid;       /* pid of sending process */
873	NETLINK_CB(skb).pid = 0;         /* from kernel */
874	NETLINK_CB(skb).dst_group = 0;  /* unicast */
875	netlink_unicast(net->ipv4.fibnl, skb, pid, MSG_DONTWAIT);
876}
877
878static int nl_fib_lookup_init(struct net *net)
879{
880	struct sock *sk;
881	sk = netlink_kernel_create(net, NETLINK_FIB_LOOKUP, 0,
882				   nl_fib_input, NULL, THIS_MODULE);
883	if (sk == NULL)
884		return -EAFNOSUPPORT;
885	net->ipv4.fibnl = sk;
886	return 0;
887}
888
889static void nl_fib_lookup_exit(struct net *net)
890{
891	netlink_kernel_release(net->ipv4.fibnl);
892	net->ipv4.fibnl = NULL;
893}
894
895static void fib_disable_ip(struct net_device *dev, int force)
896{
897	if (fib_sync_down_dev(dev, force))
898		fib_flush(dev_net(dev));
899	rt_cache_flush(dev_net(dev), 0);
900	arp_ifdown(dev);
901}
902
903static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
904{
905	struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
906	struct net_device *dev = ifa->ifa_dev->dev;
907
908	switch (event) {
909	case NETDEV_UP:
910		fib_add_ifaddr(ifa);
911#ifdef CONFIG_IP_ROUTE_MULTIPATH
912		fib_sync_up(dev);
913#endif
914		rt_cache_flush(dev_net(dev), -1);
915		break;
916	case NETDEV_DOWN:
917		fib_del_ifaddr(ifa);
918		if (ifa->ifa_dev->ifa_list == NULL) {
919			/* Last address was deleted from this interface.
920			   Disable IP.
921			 */
922			fib_disable_ip(dev, 1);
923		} else {
924			rt_cache_flush(dev_net(dev), -1);
925		}
926		break;
927	}
928	return NOTIFY_DONE;
929}
930
931static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
932{
933	struct net_device *dev = ptr;
934	struct in_device *in_dev = __in_dev_get_rtnl(dev);
935
936	if (event == NETDEV_UNREGISTER) {
937		fib_disable_ip(dev, 2);
938		return NOTIFY_DONE;
939	}
940
941	if (!in_dev)
942		return NOTIFY_DONE;
943
944	switch (event) {
945	case NETDEV_UP:
946		for_ifa(in_dev) {
947			fib_add_ifaddr(ifa);
948		} endfor_ifa(in_dev);
949#ifdef CONFIG_IP_ROUTE_MULTIPATH
950		fib_sync_up(dev);
951#endif
952		rt_cache_flush(dev_net(dev), -1);
953		break;
954	case NETDEV_DOWN:
955		fib_disable_ip(dev, 0);
956		break;
957	case NETDEV_CHANGEMTU:
958	case NETDEV_CHANGE:
959		rt_cache_flush(dev_net(dev), 0);
960		break;
961	}
962	return NOTIFY_DONE;
963}
964
965static struct notifier_block fib_inetaddr_notifier = {
966	.notifier_call = fib_inetaddr_event,
967};
968
969static struct notifier_block fib_netdev_notifier = {
970	.notifier_call = fib_netdev_event,
971};
972
973static int __net_init ip_fib_net_init(struct net *net)
974{
975	int err;
976	unsigned int i;
977
978	net->ipv4.fib_table_hash = kzalloc(
979			sizeof(struct hlist_head)*FIB_TABLE_HASHSZ, GFP_KERNEL);
980	if (net->ipv4.fib_table_hash == NULL)
981		return -ENOMEM;
982
983	for (i = 0; i < FIB_TABLE_HASHSZ; i++)
984		INIT_HLIST_HEAD(&net->ipv4.fib_table_hash[i]);
985
986	err = fib4_rules_init(net);
987	if (err < 0)
988		goto fail;
989	return 0;
990
991fail:
992	kfree(net->ipv4.fib_table_hash);
993	return err;
994}
995
996static void __net_exit ip_fib_net_exit(struct net *net)
997{
998	unsigned int i;
999
1000#ifdef CONFIG_IP_MULTIPLE_TABLES
1001	fib4_rules_exit(net);
1002#endif
1003
1004	for (i = 0; i < FIB_TABLE_HASHSZ; i++) {
1005		struct fib_table *tb;
1006		struct hlist_head *head;
1007		struct hlist_node *node, *tmp;
1008
1009		head = &net->ipv4.fib_table_hash[i];
1010		hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) {
1011			hlist_del(node);
1012			fib_table_flush(tb);
1013			kfree(tb);
1014		}
1015	}
1016	kfree(net->ipv4.fib_table_hash);
1017}
1018
1019static int __net_init fib_net_init(struct net *net)
1020{
1021	int error;
1022
1023	error = ip_fib_net_init(net);
1024	if (error < 0)
1025		goto out;
1026	error = nl_fib_lookup_init(net);
1027	if (error < 0)
1028		goto out_nlfl;
1029	error = fib_proc_init(net);
1030	if (error < 0)
1031		goto out_proc;
1032out:
1033	return error;
1034
1035out_proc:
1036	nl_fib_lookup_exit(net);
1037out_nlfl:
1038	ip_fib_net_exit(net);
1039	goto out;
1040}
1041
1042static void __net_exit fib_net_exit(struct net *net)
1043{
1044	fib_proc_exit(net);
1045	nl_fib_lookup_exit(net);
1046	ip_fib_net_exit(net);
1047}
1048
1049static struct pernet_operations fib_net_ops = {
1050	.init = fib_net_init,
1051	.exit = fib_net_exit,
1052};
1053
1054void __init ip_fib_init(void)
1055{
1056	rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL);
1057	rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL);
1058	rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib);
1059
1060	register_pernet_subsys(&fib_net_ops);
1061	register_netdevice_notifier(&fib_netdev_notifier);
1062	register_inetaddr_notifier(&fib_inetaddr_notifier);
1063
1064	fib_hash_init();
1065}
1066
1067EXPORT_SYMBOL(inet_addr_type);
1068EXPORT_SYMBOL(inet_dev_addr_type);
1069EXPORT_SYMBOL(ip_dev_find);
1070