fib_frontend.c revision 5b707aaae4ca7b7204eb4a472721c84866d85f0f
1/*
2 * INET		An implementation of the TCP/IP protocol suite for the LINUX
3 *		operating system.  INET is implemented using the  BSD Socket
4 *		interface as the means of communication with the user level.
5 *
6 *		IPv4 Forwarding Information Base: FIB frontend.
7 *
8 * Version:	$Id: fib_frontend.c,v 1.26 2001/10/31 21:55:54 davem Exp $
9 *
10 * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
11 *
12 *		This program is free software; you can redistribute it and/or
13 *		modify it under the terms of the GNU General Public License
14 *		as published by the Free Software Foundation; either version
15 *		2 of the License, or (at your option) any later version.
16 */
17
18#include <linux/module.h>
19#include <asm/uaccess.h>
20#include <asm/system.h>
21#include <linux/bitops.h>
22#include <linux/capability.h>
23#include <linux/types.h>
24#include <linux/kernel.h>
25#include <linux/mm.h>
26#include <linux/string.h>
27#include <linux/socket.h>
28#include <linux/sockios.h>
29#include <linux/errno.h>
30#include <linux/in.h>
31#include <linux/inet.h>
32#include <linux/inetdevice.h>
33#include <linux/netdevice.h>
34#include <linux/if_addr.h>
35#include <linux/if_arp.h>
36#include <linux/skbuff.h>
37#include <linux/init.h>
38#include <linux/list.h>
39
40#include <net/ip.h>
41#include <net/protocol.h>
42#include <net/route.h>
43#include <net/tcp.h>
44#include <net/sock.h>
45#include <net/icmp.h>
46#include <net/arp.h>
47#include <net/ip_fib.h>
48#include <net/rtnetlink.h>
49
50#ifndef CONFIG_IP_MULTIPLE_TABLES
51
52static int __net_init fib4_rules_init(struct net *net)
53{
54	struct fib_table *local_table, *main_table;
55
56	local_table = fib_hash_table(RT_TABLE_LOCAL);
57	if (local_table == NULL)
58		return -ENOMEM;
59
60	main_table  = fib_hash_table(RT_TABLE_MAIN);
61	if (main_table == NULL)
62		goto fail;
63
64	hlist_add_head_rcu(&local_table->tb_hlist,
65				&net->ipv4.fib_table_hash[TABLE_LOCAL_INDEX]);
66	hlist_add_head_rcu(&main_table->tb_hlist,
67				&net->ipv4.fib_table_hash[TABLE_MAIN_INDEX]);
68	return 0;
69
70fail:
71	kfree(local_table);
72	return -ENOMEM;
73}
74#else
75
76struct fib_table *fib_new_table(struct net *net, u32 id)
77{
78	struct fib_table *tb;
79	unsigned int h;
80
81	if (id == 0)
82		id = RT_TABLE_MAIN;
83	tb = fib_get_table(net, id);
84	if (tb)
85		return tb;
86
87	tb = fib_hash_table(id);
88	if (!tb)
89		return NULL;
90	h = id & (FIB_TABLE_HASHSZ - 1);
91	hlist_add_head_rcu(&tb->tb_hlist, &net->ipv4.fib_table_hash[h]);
92	return tb;
93}
94
95struct fib_table *fib_get_table(struct net *net, u32 id)
96{
97	struct fib_table *tb;
98	struct hlist_node *node;
99	struct hlist_head *head;
100	unsigned int h;
101
102	if (id == 0)
103		id = RT_TABLE_MAIN;
104	h = id & (FIB_TABLE_HASHSZ - 1);
105
106	rcu_read_lock();
107	head = &net->ipv4.fib_table_hash[h];
108	hlist_for_each_entry_rcu(tb, node, head, tb_hlist) {
109		if (tb->tb_id == id) {
110			rcu_read_unlock();
111			return tb;
112		}
113	}
114	rcu_read_unlock();
115	return NULL;
116}
117#endif /* CONFIG_IP_MULTIPLE_TABLES */
118
119static void fib_flush(struct net *net)
120{
121	int flushed = 0;
122	struct fib_table *tb;
123	struct hlist_node *node;
124	struct hlist_head *head;
125	unsigned int h;
126
127	for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
128		head = &net->ipv4.fib_table_hash[h];
129		hlist_for_each_entry(tb, node, head, tb_hlist)
130			flushed += tb->tb_flush(tb);
131	}
132
133	if (flushed)
134		rt_cache_flush(-1);
135}
136
137/*
138 *	Find the first device with a given source address.
139 */
140
141struct net_device * ip_dev_find(__be32 addr)
142{
143	struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
144	struct fib_result res;
145	struct net_device *dev = NULL;
146	struct fib_table *local_table;
147
148#ifdef CONFIG_IP_MULTIPLE_TABLES
149	res.r = NULL;
150#endif
151
152	local_table = fib_get_table(&init_net, RT_TABLE_LOCAL);
153	if (!local_table || local_table->tb_lookup(local_table, &fl, &res))
154		return NULL;
155	if (res.type != RTN_LOCAL)
156		goto out;
157	dev = FIB_RES_DEV(res);
158
159	if (dev)
160		dev_hold(dev);
161out:
162	fib_res_put(&res);
163	return dev;
164}
165
166/*
167 * Find address type as if only "dev" was present in the system. If
168 * on_dev is NULL then all interfaces are taken into consideration.
169 */
170static inline unsigned __inet_dev_addr_type(struct net *net,
171					    const struct net_device *dev,
172					    __be32 addr)
173{
174	struct flowi		fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
175	struct fib_result	res;
176	unsigned ret = RTN_BROADCAST;
177	struct fib_table *local_table;
178
179	if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr))
180		return RTN_BROADCAST;
181	if (ipv4_is_multicast(addr))
182		return RTN_MULTICAST;
183
184#ifdef CONFIG_IP_MULTIPLE_TABLES
185	res.r = NULL;
186#endif
187
188	local_table = fib_get_table(net, RT_TABLE_LOCAL);
189	if (local_table) {
190		ret = RTN_UNICAST;
191		if (!local_table->tb_lookup(local_table, &fl, &res)) {
192			if (!dev || dev == res.fi->fib_dev)
193				ret = res.type;
194			fib_res_put(&res);
195		}
196	}
197	return ret;
198}
199
200unsigned int inet_addr_type(struct net *net, __be32 addr)
201{
202	return __inet_dev_addr_type(net, NULL, addr);
203}
204
205unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev,
206				__be32 addr)
207{
208       return __inet_dev_addr_type(net, dev, addr);
209}
210
211/* Given (packet source, input interface) and optional (dst, oif, tos):
212   - (main) check, that source is valid i.e. not broadcast or our local
213     address.
214   - figure out what "logical" interface this packet arrived
215     and calculate "specific destination" address.
216   - check, that packet arrived from expected physical interface.
217 */
218
219int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
220			struct net_device *dev, __be32 *spec_dst, u32 *itag)
221{
222	struct in_device *in_dev;
223	struct flowi fl = { .nl_u = { .ip4_u =
224				      { .daddr = src,
225					.saddr = dst,
226					.tos = tos } },
227			    .iif = oif };
228	struct fib_result res;
229	int no_addr, rpf;
230	int ret;
231	struct net *net;
232
233	no_addr = rpf = 0;
234	rcu_read_lock();
235	in_dev = __in_dev_get_rcu(dev);
236	if (in_dev) {
237		no_addr = in_dev->ifa_list == NULL;
238		rpf = IN_DEV_RPFILTER(in_dev);
239	}
240	rcu_read_unlock();
241
242	if (in_dev == NULL)
243		goto e_inval;
244
245	net = dev->nd_net;
246	if (fib_lookup(net, &fl, &res))
247		goto last_resort;
248	if (res.type != RTN_UNICAST)
249		goto e_inval_res;
250	*spec_dst = FIB_RES_PREFSRC(res);
251	fib_combine_itag(itag, &res);
252#ifdef CONFIG_IP_ROUTE_MULTIPATH
253	if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1)
254#else
255	if (FIB_RES_DEV(res) == dev)
256#endif
257	{
258		ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
259		fib_res_put(&res);
260		return ret;
261	}
262	fib_res_put(&res);
263	if (no_addr)
264		goto last_resort;
265	if (rpf)
266		goto e_inval;
267	fl.oif = dev->ifindex;
268
269	ret = 0;
270	if (fib_lookup(net, &fl, &res) == 0) {
271		if (res.type == RTN_UNICAST) {
272			*spec_dst = FIB_RES_PREFSRC(res);
273			ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
274		}
275		fib_res_put(&res);
276	}
277	return ret;
278
279last_resort:
280	if (rpf)
281		goto e_inval;
282	*spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
283	*itag = 0;
284	return 0;
285
286e_inval_res:
287	fib_res_put(&res);
288e_inval:
289	return -EINVAL;
290}
291
292static inline __be32 sk_extract_addr(struct sockaddr *addr)
293{
294	return ((struct sockaddr_in *) addr)->sin_addr.s_addr;
295}
296
297static int put_rtax(struct nlattr *mx, int len, int type, u32 value)
298{
299	struct nlattr *nla;
300
301	nla = (struct nlattr *) ((char *) mx + len);
302	nla->nla_type = type;
303	nla->nla_len = nla_attr_size(4);
304	*(u32 *) nla_data(nla) = value;
305
306	return len + nla_total_size(4);
307}
308
309static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt,
310				 struct fib_config *cfg)
311{
312	__be32 addr;
313	int plen;
314
315	memset(cfg, 0, sizeof(*cfg));
316	cfg->fc_nlinfo.nl_net = net;
317
318	if (rt->rt_dst.sa_family != AF_INET)
319		return -EAFNOSUPPORT;
320
321	/*
322	 * Check mask for validity:
323	 * a) it must be contiguous.
324	 * b) destination must have all host bits clear.
325	 * c) if application forgot to set correct family (AF_INET),
326	 *    reject request unless it is absolutely clear i.e.
327	 *    both family and mask are zero.
328	 */
329	plen = 32;
330	addr = sk_extract_addr(&rt->rt_dst);
331	if (!(rt->rt_flags & RTF_HOST)) {
332		__be32 mask = sk_extract_addr(&rt->rt_genmask);
333
334		if (rt->rt_genmask.sa_family != AF_INET) {
335			if (mask || rt->rt_genmask.sa_family)
336				return -EAFNOSUPPORT;
337		}
338
339		if (bad_mask(mask, addr))
340			return -EINVAL;
341
342		plen = inet_mask_len(mask);
343	}
344
345	cfg->fc_dst_len = plen;
346	cfg->fc_dst = addr;
347
348	if (cmd != SIOCDELRT) {
349		cfg->fc_nlflags = NLM_F_CREATE;
350		cfg->fc_protocol = RTPROT_BOOT;
351	}
352
353	if (rt->rt_metric)
354		cfg->fc_priority = rt->rt_metric - 1;
355
356	if (rt->rt_flags & RTF_REJECT) {
357		cfg->fc_scope = RT_SCOPE_HOST;
358		cfg->fc_type = RTN_UNREACHABLE;
359		return 0;
360	}
361
362	cfg->fc_scope = RT_SCOPE_NOWHERE;
363	cfg->fc_type = RTN_UNICAST;
364
365	if (rt->rt_dev) {
366		char *colon;
367		struct net_device *dev;
368		char devname[IFNAMSIZ];
369
370		if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1))
371			return -EFAULT;
372
373		devname[IFNAMSIZ-1] = 0;
374		colon = strchr(devname, ':');
375		if (colon)
376			*colon = 0;
377		dev = __dev_get_by_name(net, devname);
378		if (!dev)
379			return -ENODEV;
380		cfg->fc_oif = dev->ifindex;
381		if (colon) {
382			struct in_ifaddr *ifa;
383			struct in_device *in_dev = __in_dev_get_rtnl(dev);
384			if (!in_dev)
385				return -ENODEV;
386			*colon = ':';
387			for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
388				if (strcmp(ifa->ifa_label, devname) == 0)
389					break;
390			if (ifa == NULL)
391				return -ENODEV;
392			cfg->fc_prefsrc = ifa->ifa_local;
393		}
394	}
395
396	addr = sk_extract_addr(&rt->rt_gateway);
397	if (rt->rt_gateway.sa_family == AF_INET && addr) {
398		cfg->fc_gw = addr;
399		if (rt->rt_flags & RTF_GATEWAY &&
400		    inet_addr_type(net, addr) == RTN_UNICAST)
401			cfg->fc_scope = RT_SCOPE_UNIVERSE;
402	}
403
404	if (cmd == SIOCDELRT)
405		return 0;
406
407	if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw)
408		return -EINVAL;
409
410	if (cfg->fc_scope == RT_SCOPE_NOWHERE)
411		cfg->fc_scope = RT_SCOPE_LINK;
412
413	if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) {
414		struct nlattr *mx;
415		int len = 0;
416
417		mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL);
418		if (mx == NULL)
419			return -ENOMEM;
420
421		if (rt->rt_flags & RTF_MTU)
422			len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40);
423
424		if (rt->rt_flags & RTF_WINDOW)
425			len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window);
426
427		if (rt->rt_flags & RTF_IRTT)
428			len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3);
429
430		cfg->fc_mx = mx;
431		cfg->fc_mx_len = len;
432	}
433
434	return 0;
435}
436
437/*
438 *	Handle IP routing ioctl calls. These are used to manipulate the routing tables
439 */
440
441int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg)
442{
443	struct fib_config cfg;
444	struct rtentry rt;
445	int err;
446
447	switch (cmd) {
448	case SIOCADDRT:		/* Add a route */
449	case SIOCDELRT:		/* Delete a route */
450		if (!capable(CAP_NET_ADMIN))
451			return -EPERM;
452
453		if (copy_from_user(&rt, arg, sizeof(rt)))
454			return -EFAULT;
455
456		rtnl_lock();
457		err = rtentry_to_fib_config(net, cmd, &rt, &cfg);
458		if (err == 0) {
459			struct fib_table *tb;
460
461			if (cmd == SIOCDELRT) {
462				tb = fib_get_table(net, cfg.fc_table);
463				if (tb)
464					err = tb->tb_delete(tb, &cfg);
465				else
466					err = -ESRCH;
467			} else {
468				tb = fib_new_table(net, cfg.fc_table);
469				if (tb)
470					err = tb->tb_insert(tb, &cfg);
471				else
472					err = -ENOBUFS;
473			}
474
475			/* allocated by rtentry_to_fib_config() */
476			kfree(cfg.fc_mx);
477		}
478		rtnl_unlock();
479		return err;
480	}
481	return -EINVAL;
482}
483
484const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = {
485	[RTA_DST]		= { .type = NLA_U32 },
486	[RTA_SRC]		= { .type = NLA_U32 },
487	[RTA_IIF]		= { .type = NLA_U32 },
488	[RTA_OIF]		= { .type = NLA_U32 },
489	[RTA_GATEWAY]		= { .type = NLA_U32 },
490	[RTA_PRIORITY]		= { .type = NLA_U32 },
491	[RTA_PREFSRC]		= { .type = NLA_U32 },
492	[RTA_METRICS]		= { .type = NLA_NESTED },
493	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
494	[RTA_PROTOINFO]		= { .type = NLA_U32 },
495	[RTA_FLOW]		= { .type = NLA_U32 },
496};
497
498static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
499			    struct nlmsghdr *nlh, struct fib_config *cfg)
500{
501	struct nlattr *attr;
502	int err, remaining;
503	struct rtmsg *rtm;
504
505	err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy);
506	if (err < 0)
507		goto errout;
508
509	memset(cfg, 0, sizeof(*cfg));
510
511	rtm = nlmsg_data(nlh);
512	cfg->fc_dst_len = rtm->rtm_dst_len;
513	cfg->fc_tos = rtm->rtm_tos;
514	cfg->fc_table = rtm->rtm_table;
515	cfg->fc_protocol = rtm->rtm_protocol;
516	cfg->fc_scope = rtm->rtm_scope;
517	cfg->fc_type = rtm->rtm_type;
518	cfg->fc_flags = rtm->rtm_flags;
519	cfg->fc_nlflags = nlh->nlmsg_flags;
520
521	cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
522	cfg->fc_nlinfo.nlh = nlh;
523	cfg->fc_nlinfo.nl_net = net;
524
525	if (cfg->fc_type > RTN_MAX) {
526		err = -EINVAL;
527		goto errout;
528	}
529
530	nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) {
531		switch (nla_type(attr)) {
532		case RTA_DST:
533			cfg->fc_dst = nla_get_be32(attr);
534			break;
535		case RTA_OIF:
536			cfg->fc_oif = nla_get_u32(attr);
537			break;
538		case RTA_GATEWAY:
539			cfg->fc_gw = nla_get_be32(attr);
540			break;
541		case RTA_PRIORITY:
542			cfg->fc_priority = nla_get_u32(attr);
543			break;
544		case RTA_PREFSRC:
545			cfg->fc_prefsrc = nla_get_be32(attr);
546			break;
547		case RTA_METRICS:
548			cfg->fc_mx = nla_data(attr);
549			cfg->fc_mx_len = nla_len(attr);
550			break;
551		case RTA_MULTIPATH:
552			cfg->fc_mp = nla_data(attr);
553			cfg->fc_mp_len = nla_len(attr);
554			break;
555		case RTA_FLOW:
556			cfg->fc_flow = nla_get_u32(attr);
557			break;
558		case RTA_TABLE:
559			cfg->fc_table = nla_get_u32(attr);
560			break;
561		}
562	}
563
564	return 0;
565errout:
566	return err;
567}
568
569static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
570{
571	struct net *net = skb->sk->sk_net;
572	struct fib_config cfg;
573	struct fib_table *tb;
574	int err;
575
576	err = rtm_to_fib_config(net, skb, nlh, &cfg);
577	if (err < 0)
578		goto errout;
579
580	tb = fib_get_table(net, cfg.fc_table);
581	if (tb == NULL) {
582		err = -ESRCH;
583		goto errout;
584	}
585
586	err = tb->tb_delete(tb, &cfg);
587errout:
588	return err;
589}
590
591static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
592{
593	struct net *net = skb->sk->sk_net;
594	struct fib_config cfg;
595	struct fib_table *tb;
596	int err;
597
598	err = rtm_to_fib_config(net, skb, nlh, &cfg);
599	if (err < 0)
600		goto errout;
601
602	tb = fib_new_table(net, cfg.fc_table);
603	if (tb == NULL) {
604		err = -ENOBUFS;
605		goto errout;
606	}
607
608	err = tb->tb_insert(tb, &cfg);
609errout:
610	return err;
611}
612
613static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
614{
615	struct net *net = skb->sk->sk_net;
616	unsigned int h, s_h;
617	unsigned int e = 0, s_e;
618	struct fib_table *tb;
619	struct hlist_node *node;
620	struct hlist_head *head;
621	int dumped = 0;
622
623	if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
624	    ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
625		return ip_rt_dump(skb, cb);
626
627	s_h = cb->args[0];
628	s_e = cb->args[1];
629
630	for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
631		e = 0;
632		head = &net->ipv4.fib_table_hash[h];
633		hlist_for_each_entry(tb, node, head, tb_hlist) {
634			if (e < s_e)
635				goto next;
636			if (dumped)
637				memset(&cb->args[2], 0, sizeof(cb->args) -
638						 2 * sizeof(cb->args[0]));
639			if (tb->tb_dump(tb, skb, cb) < 0)
640				goto out;
641			dumped = 1;
642next:
643			e++;
644		}
645	}
646out:
647	cb->args[1] = e;
648	cb->args[0] = h;
649
650	return skb->len;
651}
652
653/* Prepare and feed intra-kernel routing request.
654   Really, it should be netlink message, but :-( netlink
655   can be not configured, so that we feed it directly
656   to fib engine. It is legal, because all events occur
657   only when netlink is already locked.
658 */
659
660static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
661{
662	struct net *net = ifa->ifa_dev->dev->nd_net;
663	struct fib_table *tb;
664	struct fib_config cfg = {
665		.fc_protocol = RTPROT_KERNEL,
666		.fc_type = type,
667		.fc_dst = dst,
668		.fc_dst_len = dst_len,
669		.fc_prefsrc = ifa->ifa_local,
670		.fc_oif = ifa->ifa_dev->dev->ifindex,
671		.fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
672		.fc_nlinfo = {
673			.nl_net = net,
674		},
675	};
676
677	if (type == RTN_UNICAST)
678		tb = fib_new_table(net, RT_TABLE_MAIN);
679	else
680		tb = fib_new_table(net, RT_TABLE_LOCAL);
681
682	if (tb == NULL)
683		return;
684
685	cfg.fc_table = tb->tb_id;
686
687	if (type != RTN_LOCAL)
688		cfg.fc_scope = RT_SCOPE_LINK;
689	else
690		cfg.fc_scope = RT_SCOPE_HOST;
691
692	if (cmd == RTM_NEWROUTE)
693		tb->tb_insert(tb, &cfg);
694	else
695		tb->tb_delete(tb, &cfg);
696}
697
698void fib_add_ifaddr(struct in_ifaddr *ifa)
699{
700	struct in_device *in_dev = ifa->ifa_dev;
701	struct net_device *dev = in_dev->dev;
702	struct in_ifaddr *prim = ifa;
703	__be32 mask = ifa->ifa_mask;
704	__be32 addr = ifa->ifa_local;
705	__be32 prefix = ifa->ifa_address&mask;
706
707	if (ifa->ifa_flags&IFA_F_SECONDARY) {
708		prim = inet_ifa_byprefix(in_dev, prefix, mask);
709		if (prim == NULL) {
710			printk(KERN_WARNING "fib_add_ifaddr: bug: prim == NULL\n");
711			return;
712		}
713	}
714
715	fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
716
717	if (!(dev->flags&IFF_UP))
718		return;
719
720	/* Add broadcast address, if it is explicitly assigned. */
721	if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
722		fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
723
724	if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
725	    (prefix != addr || ifa->ifa_prefixlen < 32)) {
726		fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
727			  RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim);
728
729		/* Add network specific broadcasts, when it takes a sense */
730		if (ifa->ifa_prefixlen < 31) {
731			fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
732			fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim);
733		}
734	}
735}
736
737static void fib_del_ifaddr(struct in_ifaddr *ifa)
738{
739	struct in_device *in_dev = ifa->ifa_dev;
740	struct net_device *dev = in_dev->dev;
741	struct in_ifaddr *ifa1;
742	struct in_ifaddr *prim = ifa;
743	__be32 brd = ifa->ifa_address|~ifa->ifa_mask;
744	__be32 any = ifa->ifa_address&ifa->ifa_mask;
745#define LOCAL_OK	1
746#define BRD_OK		2
747#define BRD0_OK		4
748#define BRD1_OK		8
749	unsigned ok = 0;
750
751	if (!(ifa->ifa_flags&IFA_F_SECONDARY))
752		fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
753			  RTN_UNICAST, any, ifa->ifa_prefixlen, prim);
754	else {
755		prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
756		if (prim == NULL) {
757			printk(KERN_WARNING "fib_del_ifaddr: bug: prim == NULL\n");
758			return;
759		}
760	}
761
762	/* Deletion is more complicated than add.
763	   We should take care of not to delete too much :-)
764
765	   Scan address list to be sure that addresses are really gone.
766	 */
767
768	for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
769		if (ifa->ifa_local == ifa1->ifa_local)
770			ok |= LOCAL_OK;
771		if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
772			ok |= BRD_OK;
773		if (brd == ifa1->ifa_broadcast)
774			ok |= BRD1_OK;
775		if (any == ifa1->ifa_broadcast)
776			ok |= BRD0_OK;
777	}
778
779	if (!(ok&BRD_OK))
780		fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
781	if (!(ok&BRD1_OK))
782		fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
783	if (!(ok&BRD0_OK))
784		fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
785	if (!(ok&LOCAL_OK)) {
786		fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
787
788		/* Check, that this local address finally disappeared. */
789		if (inet_addr_type(dev->nd_net, ifa->ifa_local) != RTN_LOCAL) {
790			/* And the last, but not the least thing.
791			   We must flush stray FIB entries.
792
793			   First of all, we scan fib_info list searching
794			   for stray nexthop entries, then ignite fib_flush.
795			*/
796			if (fib_sync_down(ifa->ifa_local, NULL, 0))
797				fib_flush(dev->nd_net);
798		}
799	}
800#undef LOCAL_OK
801#undef BRD_OK
802#undef BRD0_OK
803#undef BRD1_OK
804}
805
806static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
807{
808
809	struct fib_result       res;
810	struct flowi            fl = { .mark = frn->fl_mark,
811				       .nl_u = { .ip4_u = { .daddr = frn->fl_addr,
812							    .tos = frn->fl_tos,
813							    .scope = frn->fl_scope } } };
814
815#ifdef CONFIG_IP_MULTIPLE_TABLES
816	res.r = NULL;
817#endif
818
819	frn->err = -ENOENT;
820	if (tb) {
821		local_bh_disable();
822
823		frn->tb_id = tb->tb_id;
824		frn->err = tb->tb_lookup(tb, &fl, &res);
825
826		if (!frn->err) {
827			frn->prefixlen = res.prefixlen;
828			frn->nh_sel = res.nh_sel;
829			frn->type = res.type;
830			frn->scope = res.scope;
831			fib_res_put(&res);
832		}
833		local_bh_enable();
834	}
835}
836
837static void nl_fib_input(struct sk_buff *skb)
838{
839	struct net *net;
840	struct fib_result_nl *frn;
841	struct nlmsghdr *nlh;
842	struct fib_table *tb;
843	u32 pid;
844
845	net = skb->sk->sk_net;
846	nlh = nlmsg_hdr(skb);
847	if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
848	    nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn)))
849		return;
850
851	skb = skb_clone(skb, GFP_KERNEL);
852	if (skb == NULL)
853		return;
854	nlh = nlmsg_hdr(skb);
855
856	frn = (struct fib_result_nl *) NLMSG_DATA(nlh);
857	tb = fib_get_table(net, frn->tb_id_in);
858
859	nl_fib_lookup(frn, tb);
860
861	pid = NETLINK_CB(skb).pid;       /* pid of sending process */
862	NETLINK_CB(skb).pid = 0;         /* from kernel */
863	NETLINK_CB(skb).dst_group = 0;  /* unicast */
864	netlink_unicast(net->ipv4.fibnl, skb, pid, MSG_DONTWAIT);
865}
866
867static int nl_fib_lookup_init(struct net *net)
868{
869	struct sock *sk;
870	sk = netlink_kernel_create(net, NETLINK_FIB_LOOKUP, 0,
871				   nl_fib_input, NULL, THIS_MODULE);
872	if (sk == NULL)
873		return -EAFNOSUPPORT;
874	net->ipv4.fibnl = sk;
875	return 0;
876}
877
878static void nl_fib_lookup_exit(struct net *net)
879{
880	netlink_kernel_release(net->ipv4.fibnl);
881	net->ipv4.fibnl = NULL;
882}
883
884static void fib_disable_ip(struct net_device *dev, int force)
885{
886	if (fib_sync_down(0, dev, force))
887		fib_flush(dev->nd_net);
888	rt_cache_flush(0);
889	arp_ifdown(dev);
890}
891
892static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
893{
894	struct in_ifaddr *ifa = (struct in_ifaddr*)ptr;
895
896	switch (event) {
897	case NETDEV_UP:
898		fib_add_ifaddr(ifa);
899#ifdef CONFIG_IP_ROUTE_MULTIPATH
900		fib_sync_up(ifa->ifa_dev->dev);
901#endif
902		rt_cache_flush(-1);
903		break;
904	case NETDEV_DOWN:
905		fib_del_ifaddr(ifa);
906		if (ifa->ifa_dev->ifa_list == NULL) {
907			/* Last address was deleted from this interface.
908			   Disable IP.
909			 */
910			fib_disable_ip(ifa->ifa_dev->dev, 1);
911		} else {
912			rt_cache_flush(-1);
913		}
914		break;
915	}
916	return NOTIFY_DONE;
917}
918
919static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
920{
921	struct net_device *dev = ptr;
922	struct in_device *in_dev = __in_dev_get_rtnl(dev);
923
924	if (event == NETDEV_UNREGISTER) {
925		fib_disable_ip(dev, 2);
926		return NOTIFY_DONE;
927	}
928
929	if (!in_dev)
930		return NOTIFY_DONE;
931
932	switch (event) {
933	case NETDEV_UP:
934		for_ifa(in_dev) {
935			fib_add_ifaddr(ifa);
936		} endfor_ifa(in_dev);
937#ifdef CONFIG_IP_ROUTE_MULTIPATH
938		fib_sync_up(dev);
939#endif
940		rt_cache_flush(-1);
941		break;
942	case NETDEV_DOWN:
943		fib_disable_ip(dev, 0);
944		break;
945	case NETDEV_CHANGEMTU:
946	case NETDEV_CHANGE:
947		rt_cache_flush(0);
948		break;
949	}
950	return NOTIFY_DONE;
951}
952
953static struct notifier_block fib_inetaddr_notifier = {
954	.notifier_call =fib_inetaddr_event,
955};
956
957static struct notifier_block fib_netdev_notifier = {
958	.notifier_call =fib_netdev_event,
959};
960
961static int __net_init ip_fib_net_init(struct net *net)
962{
963	unsigned int i;
964
965	net->ipv4.fib_table_hash = kzalloc(
966			sizeof(struct hlist_head)*FIB_TABLE_HASHSZ, GFP_KERNEL);
967	if (net->ipv4.fib_table_hash == NULL)
968		return -ENOMEM;
969
970	for (i = 0; i < FIB_TABLE_HASHSZ; i++)
971		INIT_HLIST_HEAD(&net->ipv4.fib_table_hash[i]);
972
973	return fib4_rules_init(net);
974}
975
976static void __net_exit ip_fib_net_exit(struct net *net)
977{
978	unsigned int i;
979
980#ifdef CONFIG_IP_MULTIPLE_TABLES
981	fib4_rules_exit(net);
982#endif
983
984	for (i = 0; i < FIB_TABLE_HASHSZ; i++) {
985		struct fib_table *tb;
986		struct hlist_head *head;
987		struct hlist_node *node, *tmp;
988
989		head = &net->ipv4.fib_table_hash[i];
990		hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) {
991			hlist_del(node);
992			tb->tb_flush(tb);
993			kfree(tb);
994		}
995	}
996	kfree(net->ipv4.fib_table_hash);
997}
998
999static int __net_init fib_net_init(struct net *net)
1000{
1001	int error;
1002
1003	error = ip_fib_net_init(net);
1004	if (error < 0)
1005		goto out;
1006	error = nl_fib_lookup_init(net);
1007	if (error < 0)
1008		goto out_nlfl;
1009	error = fib_proc_init(net);
1010	if (error < 0)
1011		goto out_proc;
1012out:
1013	return error;
1014
1015out_proc:
1016	nl_fib_lookup_exit(net);
1017out_nlfl:
1018	ip_fib_net_exit(net);
1019	goto out;
1020}
1021
1022static void __net_exit fib_net_exit(struct net *net)
1023{
1024	fib_proc_exit(net);
1025	nl_fib_lookup_exit(net);
1026	ip_fib_net_exit(net);
1027}
1028
1029static struct pernet_operations fib_net_ops = {
1030	.init = fib_net_init,
1031	.exit = fib_net_exit,
1032};
1033
1034void __init ip_fib_init(void)
1035{
1036	rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL);
1037	rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL);
1038	rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib);
1039
1040	register_pernet_subsys(&fib_net_ops);
1041	register_netdevice_notifier(&fib_netdev_notifier);
1042	register_inetaddr_notifier(&fib_inetaddr_notifier);
1043
1044	fib_hash_init();
1045}
1046
1047EXPORT_SYMBOL(inet_addr_type);
1048EXPORT_SYMBOL(inet_dev_addr_type);
1049EXPORT_SYMBOL(ip_dev_find);
1050