fib_frontend.c revision 775516bfa2bd7993620c9039191a0c30b8d8a496
1/*
2 * INET		An implementation of the TCP/IP protocol suite for the LINUX
3 *		operating system.  INET is implemented using the  BSD Socket
4 *		interface as the means of communication with the user level.
5 *
6 *		IPv4 Forwarding Information Base: FIB frontend.
7 *
8 * Version:	$Id: fib_frontend.c,v 1.26 2001/10/31 21:55:54 davem Exp $
9 *
10 * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
11 *
12 *		This program is free software; you can redistribute it and/or
13 *		modify it under the terms of the GNU General Public License
14 *		as published by the Free Software Foundation; either version
15 *		2 of the License, or (at your option) any later version.
16 */
17
18#include <linux/module.h>
19#include <asm/uaccess.h>
20#include <asm/system.h>
21#include <linux/bitops.h>
22#include <linux/capability.h>
23#include <linux/types.h>
24#include <linux/kernel.h>
25#include <linux/mm.h>
26#include <linux/string.h>
27#include <linux/socket.h>
28#include <linux/sockios.h>
29#include <linux/errno.h>
30#include <linux/in.h>
31#include <linux/inet.h>
32#include <linux/inetdevice.h>
33#include <linux/netdevice.h>
34#include <linux/if_addr.h>
35#include <linux/if_arp.h>
36#include <linux/skbuff.h>
37#include <linux/init.h>
38#include <linux/list.h>
39
40#include <net/ip.h>
41#include <net/protocol.h>
42#include <net/route.h>
43#include <net/tcp.h>
44#include <net/sock.h>
45#include <net/icmp.h>
46#include <net/arp.h>
47#include <net/ip_fib.h>
48#include <net/rtnetlink.h>
49
50#ifndef CONFIG_IP_MULTIPLE_TABLES
51
52static int __net_init fib4_rules_init(struct net *net)
53{
54	struct fib_table *local_table, *main_table;
55
56	local_table = fib_hash_table(RT_TABLE_LOCAL);
57	if (local_table == NULL)
58		return -ENOMEM;
59
60	main_table  = fib_hash_table(RT_TABLE_MAIN);
61	if (main_table == NULL)
62		goto fail;
63
64	hlist_add_head_rcu(&local_table->tb_hlist,
65				&net->ipv4.fib_table_hash[TABLE_LOCAL_INDEX]);
66	hlist_add_head_rcu(&main_table->tb_hlist,
67				&net->ipv4.fib_table_hash[TABLE_MAIN_INDEX]);
68	return 0;
69
70fail:
71	kfree(local_table);
72	return -ENOMEM;
73}
74#else
75
76struct fib_table *fib_new_table(struct net *net, u32 id)
77{
78	struct fib_table *tb;
79	unsigned int h;
80
81	if (id == 0)
82		id = RT_TABLE_MAIN;
83	tb = fib_get_table(net, id);
84	if (tb)
85		return tb;
86
87	tb = fib_hash_table(id);
88	if (!tb)
89		return NULL;
90	h = id & (FIB_TABLE_HASHSZ - 1);
91	hlist_add_head_rcu(&tb->tb_hlist, &net->ipv4.fib_table_hash[h]);
92	return tb;
93}
94
95struct fib_table *fib_get_table(struct net *net, u32 id)
96{
97	struct fib_table *tb;
98	struct hlist_node *node;
99	struct hlist_head *head;
100	unsigned int h;
101
102	if (id == 0)
103		id = RT_TABLE_MAIN;
104	h = id & (FIB_TABLE_HASHSZ - 1);
105
106	rcu_read_lock();
107	head = &net->ipv4.fib_table_hash[h];
108	hlist_for_each_entry_rcu(tb, node, head, tb_hlist) {
109		if (tb->tb_id == id) {
110			rcu_read_unlock();
111			return tb;
112		}
113	}
114	rcu_read_unlock();
115	return NULL;
116}
117#endif /* CONFIG_IP_MULTIPLE_TABLES */
118
119static void fib_flush(struct net *net)
120{
121	int flushed = 0;
122	struct fib_table *tb;
123	struct hlist_node *node;
124	struct hlist_head *head;
125	unsigned int h;
126
127	for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
128		head = &net->ipv4.fib_table_hash[h];
129		hlist_for_each_entry(tb, node, head, tb_hlist)
130			flushed += tb->tb_flush(tb);
131	}
132
133	if (flushed)
134		rt_cache_flush(-1);
135}
136
137/*
138 *	Find the first device with a given source address.
139 */
140
141struct net_device * ip_dev_find(__be32 addr)
142{
143	struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
144	struct fib_result res;
145	struct net_device *dev = NULL;
146	struct fib_table *local_table;
147
148#ifdef CONFIG_IP_MULTIPLE_TABLES
149	res.r = NULL;
150#endif
151
152	local_table = fib_get_table(&init_net, RT_TABLE_LOCAL);
153	if (!local_table || local_table->tb_lookup(local_table, &fl, &res))
154		return NULL;
155	if (res.type != RTN_LOCAL)
156		goto out;
157	dev = FIB_RES_DEV(res);
158
159	if (dev)
160		dev_hold(dev);
161out:
162	fib_res_put(&res);
163	return dev;
164}
165
166/*
167 * Find address type as if only "dev" was present in the system. If
168 * on_dev is NULL then all interfaces are taken into consideration.
169 */
170static inline unsigned __inet_dev_addr_type(struct net *net,
171					    const struct net_device *dev,
172					    __be32 addr)
173{
174	struct flowi		fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
175	struct fib_result	res;
176	unsigned ret = RTN_BROADCAST;
177	struct fib_table *local_table;
178
179	if (ipv4_is_zeronet(addr) || ipv4_is_badclass(addr))
180		return RTN_BROADCAST;
181	if (ipv4_is_multicast(addr))
182		return RTN_MULTICAST;
183
184#ifdef CONFIG_IP_MULTIPLE_TABLES
185	res.r = NULL;
186#endif
187
188	local_table = fib_get_table(net, RT_TABLE_LOCAL);
189	if (local_table) {
190		ret = RTN_UNICAST;
191		if (!local_table->tb_lookup(local_table, &fl, &res)) {
192			if (!dev || dev == res.fi->fib_dev)
193				ret = res.type;
194			fib_res_put(&res);
195		}
196	}
197	return ret;
198}
199
200unsigned int inet_addr_type(struct net *net, __be32 addr)
201{
202	return __inet_dev_addr_type(net, NULL, addr);
203}
204
205unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev,
206				__be32 addr)
207{
208       return __inet_dev_addr_type(net, dev, addr);
209}
210
211/* Given (packet source, input interface) and optional (dst, oif, tos):
212   - (main) check, that source is valid i.e. not broadcast or our local
213     address.
214   - figure out what "logical" interface this packet arrived
215     and calculate "specific destination" address.
216   - check, that packet arrived from expected physical interface.
217 */
218
219int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
220			struct net_device *dev, __be32 *spec_dst, u32 *itag)
221{
222	struct in_device *in_dev;
223	struct flowi fl = { .nl_u = { .ip4_u =
224				      { .daddr = src,
225					.saddr = dst,
226					.tos = tos } },
227			    .iif = oif };
228	struct fib_result res;
229	int no_addr, rpf;
230	int ret;
231
232	no_addr = rpf = 0;
233	rcu_read_lock();
234	in_dev = __in_dev_get_rcu(dev);
235	if (in_dev) {
236		no_addr = in_dev->ifa_list == NULL;
237		rpf = IN_DEV_RPFILTER(in_dev);
238	}
239	rcu_read_unlock();
240
241	if (in_dev == NULL)
242		goto e_inval;
243
244	if (fib_lookup(&fl, &res))
245		goto last_resort;
246	if (res.type != RTN_UNICAST)
247		goto e_inval_res;
248	*spec_dst = FIB_RES_PREFSRC(res);
249	fib_combine_itag(itag, &res);
250#ifdef CONFIG_IP_ROUTE_MULTIPATH
251	if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1)
252#else
253	if (FIB_RES_DEV(res) == dev)
254#endif
255	{
256		ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
257		fib_res_put(&res);
258		return ret;
259	}
260	fib_res_put(&res);
261	if (no_addr)
262		goto last_resort;
263	if (rpf)
264		goto e_inval;
265	fl.oif = dev->ifindex;
266
267	ret = 0;
268	if (fib_lookup(&fl, &res) == 0) {
269		if (res.type == RTN_UNICAST) {
270			*spec_dst = FIB_RES_PREFSRC(res);
271			ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
272		}
273		fib_res_put(&res);
274	}
275	return ret;
276
277last_resort:
278	if (rpf)
279		goto e_inval;
280	*spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
281	*itag = 0;
282	return 0;
283
284e_inval_res:
285	fib_res_put(&res);
286e_inval:
287	return -EINVAL;
288}
289
290static inline __be32 sk_extract_addr(struct sockaddr *addr)
291{
292	return ((struct sockaddr_in *) addr)->sin_addr.s_addr;
293}
294
295static int put_rtax(struct nlattr *mx, int len, int type, u32 value)
296{
297	struct nlattr *nla;
298
299	nla = (struct nlattr *) ((char *) mx + len);
300	nla->nla_type = type;
301	nla->nla_len = nla_attr_size(4);
302	*(u32 *) nla_data(nla) = value;
303
304	return len + nla_total_size(4);
305}
306
307static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt,
308				 struct fib_config *cfg)
309{
310	__be32 addr;
311	int plen;
312
313	memset(cfg, 0, sizeof(*cfg));
314	cfg->fc_nlinfo.nl_net = net;
315
316	if (rt->rt_dst.sa_family != AF_INET)
317		return -EAFNOSUPPORT;
318
319	/*
320	 * Check mask for validity:
321	 * a) it must be contiguous.
322	 * b) destination must have all host bits clear.
323	 * c) if application forgot to set correct family (AF_INET),
324	 *    reject request unless it is absolutely clear i.e.
325	 *    both family and mask are zero.
326	 */
327	plen = 32;
328	addr = sk_extract_addr(&rt->rt_dst);
329	if (!(rt->rt_flags & RTF_HOST)) {
330		__be32 mask = sk_extract_addr(&rt->rt_genmask);
331
332		if (rt->rt_genmask.sa_family != AF_INET) {
333			if (mask || rt->rt_genmask.sa_family)
334				return -EAFNOSUPPORT;
335		}
336
337		if (bad_mask(mask, addr))
338			return -EINVAL;
339
340		plen = inet_mask_len(mask);
341	}
342
343	cfg->fc_dst_len = plen;
344	cfg->fc_dst = addr;
345
346	if (cmd != SIOCDELRT) {
347		cfg->fc_nlflags = NLM_F_CREATE;
348		cfg->fc_protocol = RTPROT_BOOT;
349	}
350
351	if (rt->rt_metric)
352		cfg->fc_priority = rt->rt_metric - 1;
353
354	if (rt->rt_flags & RTF_REJECT) {
355		cfg->fc_scope = RT_SCOPE_HOST;
356		cfg->fc_type = RTN_UNREACHABLE;
357		return 0;
358	}
359
360	cfg->fc_scope = RT_SCOPE_NOWHERE;
361	cfg->fc_type = RTN_UNICAST;
362
363	if (rt->rt_dev) {
364		char *colon;
365		struct net_device *dev;
366		char devname[IFNAMSIZ];
367
368		if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1))
369			return -EFAULT;
370
371		devname[IFNAMSIZ-1] = 0;
372		colon = strchr(devname, ':');
373		if (colon)
374			*colon = 0;
375		dev = __dev_get_by_name(net, devname);
376		if (!dev)
377			return -ENODEV;
378		cfg->fc_oif = dev->ifindex;
379		if (colon) {
380			struct in_ifaddr *ifa;
381			struct in_device *in_dev = __in_dev_get_rtnl(dev);
382			if (!in_dev)
383				return -ENODEV;
384			*colon = ':';
385			for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
386				if (strcmp(ifa->ifa_label, devname) == 0)
387					break;
388			if (ifa == NULL)
389				return -ENODEV;
390			cfg->fc_prefsrc = ifa->ifa_local;
391		}
392	}
393
394	addr = sk_extract_addr(&rt->rt_gateway);
395	if (rt->rt_gateway.sa_family == AF_INET && addr) {
396		cfg->fc_gw = addr;
397		if (rt->rt_flags & RTF_GATEWAY &&
398		    inet_addr_type(net, addr) == RTN_UNICAST)
399			cfg->fc_scope = RT_SCOPE_UNIVERSE;
400	}
401
402	if (cmd == SIOCDELRT)
403		return 0;
404
405	if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw)
406		return -EINVAL;
407
408	if (cfg->fc_scope == RT_SCOPE_NOWHERE)
409		cfg->fc_scope = RT_SCOPE_LINK;
410
411	if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) {
412		struct nlattr *mx;
413		int len = 0;
414
415		mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL);
416		if (mx == NULL)
417			return -ENOMEM;
418
419		if (rt->rt_flags & RTF_MTU)
420			len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40);
421
422		if (rt->rt_flags & RTF_WINDOW)
423			len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window);
424
425		if (rt->rt_flags & RTF_IRTT)
426			len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3);
427
428		cfg->fc_mx = mx;
429		cfg->fc_mx_len = len;
430	}
431
432	return 0;
433}
434
435/*
436 *	Handle IP routing ioctl calls. These are used to manipulate the routing tables
437 */
438
439int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg)
440{
441	struct fib_config cfg;
442	struct rtentry rt;
443	int err;
444
445	switch (cmd) {
446	case SIOCADDRT:		/* Add a route */
447	case SIOCDELRT:		/* Delete a route */
448		if (!capable(CAP_NET_ADMIN))
449			return -EPERM;
450
451		if (copy_from_user(&rt, arg, sizeof(rt)))
452			return -EFAULT;
453
454		rtnl_lock();
455		err = rtentry_to_fib_config(net, cmd, &rt, &cfg);
456		if (err == 0) {
457			struct fib_table *tb;
458
459			if (cmd == SIOCDELRT) {
460				tb = fib_get_table(net, cfg.fc_table);
461				if (tb)
462					err = tb->tb_delete(tb, &cfg);
463				else
464					err = -ESRCH;
465			} else {
466				tb = fib_new_table(net, cfg.fc_table);
467				if (tb)
468					err = tb->tb_insert(tb, &cfg);
469				else
470					err = -ENOBUFS;
471			}
472
473			/* allocated by rtentry_to_fib_config() */
474			kfree(cfg.fc_mx);
475		}
476		rtnl_unlock();
477		return err;
478	}
479	return -EINVAL;
480}
481
482const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = {
483	[RTA_DST]		= { .type = NLA_U32 },
484	[RTA_SRC]		= { .type = NLA_U32 },
485	[RTA_IIF]		= { .type = NLA_U32 },
486	[RTA_OIF]		= { .type = NLA_U32 },
487	[RTA_GATEWAY]		= { .type = NLA_U32 },
488	[RTA_PRIORITY]		= { .type = NLA_U32 },
489	[RTA_PREFSRC]		= { .type = NLA_U32 },
490	[RTA_METRICS]		= { .type = NLA_NESTED },
491	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
492	[RTA_PROTOINFO]		= { .type = NLA_U32 },
493	[RTA_FLOW]		= { .type = NLA_U32 },
494};
495
496static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
497			    struct nlmsghdr *nlh, struct fib_config *cfg)
498{
499	struct nlattr *attr;
500	int err, remaining;
501	struct rtmsg *rtm;
502
503	err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy);
504	if (err < 0)
505		goto errout;
506
507	memset(cfg, 0, sizeof(*cfg));
508
509	rtm = nlmsg_data(nlh);
510	cfg->fc_dst_len = rtm->rtm_dst_len;
511	cfg->fc_tos = rtm->rtm_tos;
512	cfg->fc_table = rtm->rtm_table;
513	cfg->fc_protocol = rtm->rtm_protocol;
514	cfg->fc_scope = rtm->rtm_scope;
515	cfg->fc_type = rtm->rtm_type;
516	cfg->fc_flags = rtm->rtm_flags;
517	cfg->fc_nlflags = nlh->nlmsg_flags;
518
519	cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
520	cfg->fc_nlinfo.nlh = nlh;
521	cfg->fc_nlinfo.nl_net = net;
522
523	if (cfg->fc_type > RTN_MAX) {
524		err = -EINVAL;
525		goto errout;
526	}
527
528	nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) {
529		switch (nla_type(attr)) {
530		case RTA_DST:
531			cfg->fc_dst = nla_get_be32(attr);
532			break;
533		case RTA_OIF:
534			cfg->fc_oif = nla_get_u32(attr);
535			break;
536		case RTA_GATEWAY:
537			cfg->fc_gw = nla_get_be32(attr);
538			break;
539		case RTA_PRIORITY:
540			cfg->fc_priority = nla_get_u32(attr);
541			break;
542		case RTA_PREFSRC:
543			cfg->fc_prefsrc = nla_get_be32(attr);
544			break;
545		case RTA_METRICS:
546			cfg->fc_mx = nla_data(attr);
547			cfg->fc_mx_len = nla_len(attr);
548			break;
549		case RTA_MULTIPATH:
550			cfg->fc_mp = nla_data(attr);
551			cfg->fc_mp_len = nla_len(attr);
552			break;
553		case RTA_FLOW:
554			cfg->fc_flow = nla_get_u32(attr);
555			break;
556		case RTA_TABLE:
557			cfg->fc_table = nla_get_u32(attr);
558			break;
559		}
560	}
561
562	return 0;
563errout:
564	return err;
565}
566
567static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
568{
569	struct net *net = skb->sk->sk_net;
570	struct fib_config cfg;
571	struct fib_table *tb;
572	int err;
573
574	err = rtm_to_fib_config(net, skb, nlh, &cfg);
575	if (err < 0)
576		goto errout;
577
578	tb = fib_get_table(net, cfg.fc_table);
579	if (tb == NULL) {
580		err = -ESRCH;
581		goto errout;
582	}
583
584	err = tb->tb_delete(tb, &cfg);
585errout:
586	return err;
587}
588
589static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
590{
591	struct net *net = skb->sk->sk_net;
592	struct fib_config cfg;
593	struct fib_table *tb;
594	int err;
595
596	err = rtm_to_fib_config(net, skb, nlh, &cfg);
597	if (err < 0)
598		goto errout;
599
600	tb = fib_new_table(net, cfg.fc_table);
601	if (tb == NULL) {
602		err = -ENOBUFS;
603		goto errout;
604	}
605
606	err = tb->tb_insert(tb, &cfg);
607errout:
608	return err;
609}
610
611static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
612{
613	struct net *net = skb->sk->sk_net;
614	unsigned int h, s_h;
615	unsigned int e = 0, s_e;
616	struct fib_table *tb;
617	struct hlist_node *node;
618	struct hlist_head *head;
619	int dumped = 0;
620
621	if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
622	    ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
623		return ip_rt_dump(skb, cb);
624
625	s_h = cb->args[0];
626	s_e = cb->args[1];
627
628	for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
629		e = 0;
630		head = &net->ipv4.fib_table_hash[h];
631		hlist_for_each_entry(tb, node, head, tb_hlist) {
632			if (e < s_e)
633				goto next;
634			if (dumped)
635				memset(&cb->args[2], 0, sizeof(cb->args) -
636						 2 * sizeof(cb->args[0]));
637			if (tb->tb_dump(tb, skb, cb) < 0)
638				goto out;
639			dumped = 1;
640next:
641			e++;
642		}
643	}
644out:
645	cb->args[1] = e;
646	cb->args[0] = h;
647
648	return skb->len;
649}
650
651/* Prepare and feed intra-kernel routing request.
652   Really, it should be netlink message, but :-( netlink
653   can be not configured, so that we feed it directly
654   to fib engine. It is legal, because all events occur
655   only when netlink is already locked.
656 */
657
658static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
659{
660	struct net *net = ifa->ifa_dev->dev->nd_net;
661	struct fib_table *tb;
662	struct fib_config cfg = {
663		.fc_protocol = RTPROT_KERNEL,
664		.fc_type = type,
665		.fc_dst = dst,
666		.fc_dst_len = dst_len,
667		.fc_prefsrc = ifa->ifa_local,
668		.fc_oif = ifa->ifa_dev->dev->ifindex,
669		.fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
670		.fc_nlinfo = {
671			.nl_net = net,
672		},
673	};
674
675	if (type == RTN_UNICAST)
676		tb = fib_new_table(net, RT_TABLE_MAIN);
677	else
678		tb = fib_new_table(net, RT_TABLE_LOCAL);
679
680	if (tb == NULL)
681		return;
682
683	cfg.fc_table = tb->tb_id;
684
685	if (type != RTN_LOCAL)
686		cfg.fc_scope = RT_SCOPE_LINK;
687	else
688		cfg.fc_scope = RT_SCOPE_HOST;
689
690	if (cmd == RTM_NEWROUTE)
691		tb->tb_insert(tb, &cfg);
692	else
693		tb->tb_delete(tb, &cfg);
694}
695
696void fib_add_ifaddr(struct in_ifaddr *ifa)
697{
698	struct in_device *in_dev = ifa->ifa_dev;
699	struct net_device *dev = in_dev->dev;
700	struct in_ifaddr *prim = ifa;
701	__be32 mask = ifa->ifa_mask;
702	__be32 addr = ifa->ifa_local;
703	__be32 prefix = ifa->ifa_address&mask;
704
705	if (ifa->ifa_flags&IFA_F_SECONDARY) {
706		prim = inet_ifa_byprefix(in_dev, prefix, mask);
707		if (prim == NULL) {
708			printk(KERN_WARNING "fib_add_ifaddr: bug: prim == NULL\n");
709			return;
710		}
711	}
712
713	fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
714
715	if (!(dev->flags&IFF_UP))
716		return;
717
718	/* Add broadcast address, if it is explicitly assigned. */
719	if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
720		fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
721
722	if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
723	    (prefix != addr || ifa->ifa_prefixlen < 32)) {
724		fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
725			  RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim);
726
727		/* Add network specific broadcasts, when it takes a sense */
728		if (ifa->ifa_prefixlen < 31) {
729			fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
730			fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim);
731		}
732	}
733}
734
735static void fib_del_ifaddr(struct in_ifaddr *ifa)
736{
737	struct in_device *in_dev = ifa->ifa_dev;
738	struct net_device *dev = in_dev->dev;
739	struct in_ifaddr *ifa1;
740	struct in_ifaddr *prim = ifa;
741	__be32 brd = ifa->ifa_address|~ifa->ifa_mask;
742	__be32 any = ifa->ifa_address&ifa->ifa_mask;
743#define LOCAL_OK	1
744#define BRD_OK		2
745#define BRD0_OK		4
746#define BRD1_OK		8
747	unsigned ok = 0;
748
749	if (!(ifa->ifa_flags&IFA_F_SECONDARY))
750		fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
751			  RTN_UNICAST, any, ifa->ifa_prefixlen, prim);
752	else {
753		prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
754		if (prim == NULL) {
755			printk(KERN_WARNING "fib_del_ifaddr: bug: prim == NULL\n");
756			return;
757		}
758	}
759
760	/* Deletion is more complicated than add.
761	   We should take care of not to delete too much :-)
762
763	   Scan address list to be sure that addresses are really gone.
764	 */
765
766	for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
767		if (ifa->ifa_local == ifa1->ifa_local)
768			ok |= LOCAL_OK;
769		if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
770			ok |= BRD_OK;
771		if (brd == ifa1->ifa_broadcast)
772			ok |= BRD1_OK;
773		if (any == ifa1->ifa_broadcast)
774			ok |= BRD0_OK;
775	}
776
777	if (!(ok&BRD_OK))
778		fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
779	if (!(ok&BRD1_OK))
780		fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
781	if (!(ok&BRD0_OK))
782		fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
783	if (!(ok&LOCAL_OK)) {
784		fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
785
786		/* Check, that this local address finally disappeared. */
787		if (inet_addr_type(dev->nd_net, ifa->ifa_local) != RTN_LOCAL) {
788			/* And the last, but not the least thing.
789			   We must flush stray FIB entries.
790
791			   First of all, we scan fib_info list searching
792			   for stray nexthop entries, then ignite fib_flush.
793			*/
794			if (fib_sync_down(ifa->ifa_local, NULL, 0))
795				fib_flush(dev->nd_net);
796		}
797	}
798#undef LOCAL_OK
799#undef BRD_OK
800#undef BRD0_OK
801#undef BRD1_OK
802}
803
804static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
805{
806
807	struct fib_result       res;
808	struct flowi            fl = { .mark = frn->fl_mark,
809				       .nl_u = { .ip4_u = { .daddr = frn->fl_addr,
810							    .tos = frn->fl_tos,
811							    .scope = frn->fl_scope } } };
812
813#ifdef CONFIG_IP_MULTIPLE_TABLES
814	res.r = NULL;
815#endif
816
817	frn->err = -ENOENT;
818	if (tb) {
819		local_bh_disable();
820
821		frn->tb_id = tb->tb_id;
822		frn->err = tb->tb_lookup(tb, &fl, &res);
823
824		if (!frn->err) {
825			frn->prefixlen = res.prefixlen;
826			frn->nh_sel = res.nh_sel;
827			frn->type = res.type;
828			frn->scope = res.scope;
829			fib_res_put(&res);
830		}
831		local_bh_enable();
832	}
833}
834
835static void nl_fib_input(struct sk_buff *skb)
836{
837	struct net *net;
838	struct fib_result_nl *frn;
839	struct nlmsghdr *nlh;
840	struct fib_table *tb;
841	u32 pid;
842
843	net = skb->sk->sk_net;
844	nlh = nlmsg_hdr(skb);
845	if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
846	    nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn)))
847		return;
848
849	skb = skb_clone(skb, GFP_KERNEL);
850	if (skb == NULL)
851		return;
852	nlh = nlmsg_hdr(skb);
853
854	frn = (struct fib_result_nl *) NLMSG_DATA(nlh);
855	tb = fib_get_table(net, frn->tb_id_in);
856
857	nl_fib_lookup(frn, tb);
858
859	pid = NETLINK_CB(skb).pid;       /* pid of sending process */
860	NETLINK_CB(skb).pid = 0;         /* from kernel */
861	NETLINK_CB(skb).dst_group = 0;  /* unicast */
862	netlink_unicast(net->ipv4.fibnl, skb, pid, MSG_DONTWAIT);
863}
864
865static int nl_fib_lookup_init(struct net *net)
866{
867	struct sock *sk;
868	sk = netlink_kernel_create(net, NETLINK_FIB_LOOKUP, 0,
869				   nl_fib_input, NULL, THIS_MODULE);
870	if (sk == NULL)
871		return -EAFNOSUPPORT;
872	net->ipv4.fibnl = sk;
873	return 0;
874}
875
876static void nl_fib_lookup_exit(struct net *net)
877{
878	netlink_kernel_release(net->ipv4.fibnl);
879	net->ipv4.fibnl = NULL;
880}
881
882static void fib_disable_ip(struct net_device *dev, int force)
883{
884	if (fib_sync_down(0, dev, force))
885		fib_flush(dev->nd_net);
886	rt_cache_flush(0);
887	arp_ifdown(dev);
888}
889
890static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
891{
892	struct in_ifaddr *ifa = (struct in_ifaddr*)ptr;
893
894	switch (event) {
895	case NETDEV_UP:
896		fib_add_ifaddr(ifa);
897#ifdef CONFIG_IP_ROUTE_MULTIPATH
898		fib_sync_up(ifa->ifa_dev->dev);
899#endif
900		rt_cache_flush(-1);
901		break;
902	case NETDEV_DOWN:
903		fib_del_ifaddr(ifa);
904		if (ifa->ifa_dev->ifa_list == NULL) {
905			/* Last address was deleted from this interface.
906			   Disable IP.
907			 */
908			fib_disable_ip(ifa->ifa_dev->dev, 1);
909		} else {
910			rt_cache_flush(-1);
911		}
912		break;
913	}
914	return NOTIFY_DONE;
915}
916
917static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
918{
919	struct net_device *dev = ptr;
920	struct in_device *in_dev = __in_dev_get_rtnl(dev);
921
922	if (event == NETDEV_UNREGISTER) {
923		fib_disable_ip(dev, 2);
924		return NOTIFY_DONE;
925	}
926
927	if (!in_dev)
928		return NOTIFY_DONE;
929
930	switch (event) {
931	case NETDEV_UP:
932		for_ifa(in_dev) {
933			fib_add_ifaddr(ifa);
934		} endfor_ifa(in_dev);
935#ifdef CONFIG_IP_ROUTE_MULTIPATH
936		fib_sync_up(dev);
937#endif
938		rt_cache_flush(-1);
939		break;
940	case NETDEV_DOWN:
941		fib_disable_ip(dev, 0);
942		break;
943	case NETDEV_CHANGEMTU:
944	case NETDEV_CHANGE:
945		rt_cache_flush(0);
946		break;
947	}
948	return NOTIFY_DONE;
949}
950
951static struct notifier_block fib_inetaddr_notifier = {
952	.notifier_call =fib_inetaddr_event,
953};
954
955static struct notifier_block fib_netdev_notifier = {
956	.notifier_call =fib_netdev_event,
957};
958
959static int __net_init ip_fib_net_init(struct net *net)
960{
961	unsigned int i;
962
963	net->ipv4.fib_table_hash = kzalloc(
964			sizeof(struct hlist_head)*FIB_TABLE_HASHSZ, GFP_KERNEL);
965	if (net->ipv4.fib_table_hash == NULL)
966		return -ENOMEM;
967
968	for (i = 0; i < FIB_TABLE_HASHSZ; i++)
969		INIT_HLIST_HEAD(&net->ipv4.fib_table_hash[i]);
970
971	return fib4_rules_init(net);
972}
973
974static void __net_exit ip_fib_net_exit(struct net *net)
975{
976	unsigned int i;
977
978#ifdef CONFIG_IP_MULTIPLE_TABLES
979	fib4_rules_exit(net);
980#endif
981
982	for (i = 0; i < FIB_TABLE_HASHSZ; i++) {
983		struct fib_table *tb;
984		struct hlist_head *head;
985		struct hlist_node *node, *tmp;
986
987		head = &net->ipv4.fib_table_hash[i];
988		hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) {
989			hlist_del(node);
990			tb->tb_flush(tb);
991			kfree(tb);
992		}
993	}
994	kfree(net->ipv4.fib_table_hash);
995}
996
997static int __net_init fib_net_init(struct net *net)
998{
999	int error;
1000
1001	error = ip_fib_net_init(net);
1002	if (error < 0)
1003		goto out;
1004	error = nl_fib_lookup_init(net);
1005	if (error < 0)
1006		goto out_nlfl;
1007	error = fib_proc_init(net);
1008	if (error < 0)
1009		goto out_proc;
1010out:
1011	return error;
1012
1013out_proc:
1014	nl_fib_lookup_exit(net);
1015out_nlfl:
1016	ip_fib_net_exit(net);
1017	goto out;
1018}
1019
1020static void __net_exit fib_net_exit(struct net *net)
1021{
1022	fib_proc_exit(net);
1023	nl_fib_lookup_exit(net);
1024	ip_fib_net_exit(net);
1025}
1026
1027static struct pernet_operations fib_net_ops = {
1028	.init = fib_net_init,
1029	.exit = fib_net_exit,
1030};
1031
1032void __init ip_fib_init(void)
1033{
1034	rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL);
1035	rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL);
1036	rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib);
1037
1038	register_pernet_subsys(&fib_net_ops);
1039	register_netdevice_notifier(&fib_netdev_notifier);
1040	register_inetaddr_notifier(&fib_inetaddr_notifier);
1041
1042	fib_hash_init();
1043}
1044
1045EXPORT_SYMBOL(inet_addr_type);
1046EXPORT_SYMBOL(inet_dev_addr_type);
1047EXPORT_SYMBOL(ip_dev_find);
1048