fib_frontend.c revision a6db9010922f2c02db2bbea8c17c50e451be38d9
1/*
2 * INET		An implementation of the TCP/IP protocol suite for the LINUX
3 *		operating system.  INET is implemented using the  BSD Socket
4 *		interface as the means of communication with the user level.
5 *
6 *		IPv4 Forwarding Information Base: FIB frontend.
7 *
8 * Version:	$Id: fib_frontend.c,v 1.26 2001/10/31 21:55:54 davem Exp $
9 *
10 * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
11 *
12 *		This program is free software; you can redistribute it and/or
13 *		modify it under the terms of the GNU General Public License
14 *		as published by the Free Software Foundation; either version
15 *		2 of the License, or (at your option) any later version.
16 */
17
18#include <linux/module.h>
19#include <asm/uaccess.h>
20#include <asm/system.h>
21#include <linux/bitops.h>
22#include <linux/capability.h>
23#include <linux/types.h>
24#include <linux/kernel.h>
25#include <linux/mm.h>
26#include <linux/string.h>
27#include <linux/socket.h>
28#include <linux/sockios.h>
29#include <linux/errno.h>
30#include <linux/in.h>
31#include <linux/inet.h>
32#include <linux/inetdevice.h>
33#include <linux/netdevice.h>
34#include <linux/if_addr.h>
35#include <linux/if_arp.h>
36#include <linux/skbuff.h>
37#include <linux/init.h>
38#include <linux/list.h>
39
40#include <net/ip.h>
41#include <net/protocol.h>
42#include <net/route.h>
43#include <net/tcp.h>
44#include <net/sock.h>
45#include <net/icmp.h>
46#include <net/arp.h>
47#include <net/ip_fib.h>
48#include <net/rtnetlink.h>
49
50#ifndef CONFIG_IP_MULTIPLE_TABLES
51
52static int __net_init fib4_rules_init(struct net *net)
53{
54	struct fib_table *local_table, *main_table;
55
56	local_table = fib_hash_init(RT_TABLE_LOCAL);
57	if (local_table == NULL)
58		return -ENOMEM;
59
60	main_table  = fib_hash_init(RT_TABLE_MAIN);
61	if (main_table == NULL)
62		goto fail;
63
64	hlist_add_head_rcu(&local_table->tb_hlist,
65				&net->ipv4.fib_table_hash[TABLE_LOCAL_INDEX]);
66	hlist_add_head_rcu(&main_table->tb_hlist,
67				&net->ipv4.fib_table_hash[TABLE_MAIN_INDEX]);
68	return 0;
69
70fail:
71	kfree(local_table);
72	return -ENOMEM;
73}
74#else
75
76struct fib_table *fib_new_table(struct net *net, u32 id)
77{
78	struct fib_table *tb;
79	unsigned int h;
80
81	if (id == 0)
82		id = RT_TABLE_MAIN;
83	tb = fib_get_table(net, id);
84	if (tb)
85		return tb;
86	tb = fib_hash_init(id);
87	if (!tb)
88		return NULL;
89	h = id & (FIB_TABLE_HASHSZ - 1);
90	hlist_add_head_rcu(&tb->tb_hlist, &net->ipv4.fib_table_hash[h]);
91	return tb;
92}
93
94struct fib_table *fib_get_table(struct net *net, u32 id)
95{
96	struct fib_table *tb;
97	struct hlist_node *node;
98	struct hlist_head *head;
99	unsigned int h;
100
101	if (id == 0)
102		id = RT_TABLE_MAIN;
103	h = id & (FIB_TABLE_HASHSZ - 1);
104
105	rcu_read_lock();
106	head = &net->ipv4.fib_table_hash[h];
107	hlist_for_each_entry_rcu(tb, node, head, tb_hlist) {
108		if (tb->tb_id == id) {
109			rcu_read_unlock();
110			return tb;
111		}
112	}
113	rcu_read_unlock();
114	return NULL;
115}
116#endif /* CONFIG_IP_MULTIPLE_TABLES */
117
118static void fib_flush(struct net *net)
119{
120	int flushed = 0;
121	struct fib_table *tb;
122	struct hlist_node *node;
123	struct hlist_head *head;
124	unsigned int h;
125
126	for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
127		head = &net->ipv4.fib_table_hash[h];
128		hlist_for_each_entry(tb, node, head, tb_hlist)
129			flushed += tb->tb_flush(tb);
130	}
131
132	if (flushed)
133		rt_cache_flush(-1);
134}
135
136/*
137 *	Find the first device with a given source address.
138 */
139
140struct net_device * ip_dev_find(__be32 addr)
141{
142	struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
143	struct fib_result res;
144	struct net_device *dev = NULL;
145	struct fib_table *local_table;
146
147#ifdef CONFIG_IP_MULTIPLE_TABLES
148	res.r = NULL;
149#endif
150
151	local_table = fib_get_table(&init_net, RT_TABLE_LOCAL);
152	if (!local_table || local_table->tb_lookup(local_table, &fl, &res))
153		return NULL;
154	if (res.type != RTN_LOCAL)
155		goto out;
156	dev = FIB_RES_DEV(res);
157
158	if (dev)
159		dev_hold(dev);
160out:
161	fib_res_put(&res);
162	return dev;
163}
164
165/*
166 * Find address type as if only "dev" was present in the system. If
167 * on_dev is NULL then all interfaces are taken into consideration.
168 */
169static inline unsigned __inet_dev_addr_type(struct net *net,
170					    const struct net_device *dev,
171					    __be32 addr)
172{
173	struct flowi		fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
174	struct fib_result	res;
175	unsigned ret = RTN_BROADCAST;
176	struct fib_table *local_table;
177
178	if (ipv4_is_zeronet(addr) || ipv4_is_badclass(addr))
179		return RTN_BROADCAST;
180	if (ipv4_is_multicast(addr))
181		return RTN_MULTICAST;
182
183#ifdef CONFIG_IP_MULTIPLE_TABLES
184	res.r = NULL;
185#endif
186
187	local_table = fib_get_table(net, RT_TABLE_LOCAL);
188	if (local_table) {
189		ret = RTN_UNICAST;
190		if (!local_table->tb_lookup(local_table, &fl, &res)) {
191			if (!dev || dev == res.fi->fib_dev)
192				ret = res.type;
193			fib_res_put(&res);
194		}
195	}
196	return ret;
197}
198
199unsigned int inet_addr_type(struct net *net, __be32 addr)
200{
201	return __inet_dev_addr_type(net, NULL, addr);
202}
203
204unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev,
205				__be32 addr)
206{
207       return __inet_dev_addr_type(net, dev, addr);
208}
209
210/* Given (packet source, input interface) and optional (dst, oif, tos):
211   - (main) check, that source is valid i.e. not broadcast or our local
212     address.
213   - figure out what "logical" interface this packet arrived
214     and calculate "specific destination" address.
215   - check, that packet arrived from expected physical interface.
216 */
217
218int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
219			struct net_device *dev, __be32 *spec_dst, u32 *itag)
220{
221	struct in_device *in_dev;
222	struct flowi fl = { .nl_u = { .ip4_u =
223				      { .daddr = src,
224					.saddr = dst,
225					.tos = tos } },
226			    .iif = oif };
227	struct fib_result res;
228	int no_addr, rpf;
229	int ret;
230
231	no_addr = rpf = 0;
232	rcu_read_lock();
233	in_dev = __in_dev_get_rcu(dev);
234	if (in_dev) {
235		no_addr = in_dev->ifa_list == NULL;
236		rpf = IN_DEV_RPFILTER(in_dev);
237	}
238	rcu_read_unlock();
239
240	if (in_dev == NULL)
241		goto e_inval;
242
243	if (fib_lookup(&fl, &res))
244		goto last_resort;
245	if (res.type != RTN_UNICAST)
246		goto e_inval_res;
247	*spec_dst = FIB_RES_PREFSRC(res);
248	fib_combine_itag(itag, &res);
249#ifdef CONFIG_IP_ROUTE_MULTIPATH
250	if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1)
251#else
252	if (FIB_RES_DEV(res) == dev)
253#endif
254	{
255		ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
256		fib_res_put(&res);
257		return ret;
258	}
259	fib_res_put(&res);
260	if (no_addr)
261		goto last_resort;
262	if (rpf)
263		goto e_inval;
264	fl.oif = dev->ifindex;
265
266	ret = 0;
267	if (fib_lookup(&fl, &res) == 0) {
268		if (res.type == RTN_UNICAST) {
269			*spec_dst = FIB_RES_PREFSRC(res);
270			ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
271		}
272		fib_res_put(&res);
273	}
274	return ret;
275
276last_resort:
277	if (rpf)
278		goto e_inval;
279	*spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
280	*itag = 0;
281	return 0;
282
283e_inval_res:
284	fib_res_put(&res);
285e_inval:
286	return -EINVAL;
287}
288
289static inline __be32 sk_extract_addr(struct sockaddr *addr)
290{
291	return ((struct sockaddr_in *) addr)->sin_addr.s_addr;
292}
293
294static int put_rtax(struct nlattr *mx, int len, int type, u32 value)
295{
296	struct nlattr *nla;
297
298	nla = (struct nlattr *) ((char *) mx + len);
299	nla->nla_type = type;
300	nla->nla_len = nla_attr_size(4);
301	*(u32 *) nla_data(nla) = value;
302
303	return len + nla_total_size(4);
304}
305
306static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt,
307				 struct fib_config *cfg)
308{
309	__be32 addr;
310	int plen;
311
312	memset(cfg, 0, sizeof(*cfg));
313	cfg->fc_nlinfo.nl_net = net;
314
315	if (rt->rt_dst.sa_family != AF_INET)
316		return -EAFNOSUPPORT;
317
318	/*
319	 * Check mask for validity:
320	 * a) it must be contiguous.
321	 * b) destination must have all host bits clear.
322	 * c) if application forgot to set correct family (AF_INET),
323	 *    reject request unless it is absolutely clear i.e.
324	 *    both family and mask are zero.
325	 */
326	plen = 32;
327	addr = sk_extract_addr(&rt->rt_dst);
328	if (!(rt->rt_flags & RTF_HOST)) {
329		__be32 mask = sk_extract_addr(&rt->rt_genmask);
330
331		if (rt->rt_genmask.sa_family != AF_INET) {
332			if (mask || rt->rt_genmask.sa_family)
333				return -EAFNOSUPPORT;
334		}
335
336		if (bad_mask(mask, addr))
337			return -EINVAL;
338
339		plen = inet_mask_len(mask);
340	}
341
342	cfg->fc_dst_len = plen;
343	cfg->fc_dst = addr;
344
345	if (cmd != SIOCDELRT) {
346		cfg->fc_nlflags = NLM_F_CREATE;
347		cfg->fc_protocol = RTPROT_BOOT;
348	}
349
350	if (rt->rt_metric)
351		cfg->fc_priority = rt->rt_metric - 1;
352
353	if (rt->rt_flags & RTF_REJECT) {
354		cfg->fc_scope = RT_SCOPE_HOST;
355		cfg->fc_type = RTN_UNREACHABLE;
356		return 0;
357	}
358
359	cfg->fc_scope = RT_SCOPE_NOWHERE;
360	cfg->fc_type = RTN_UNICAST;
361
362	if (rt->rt_dev) {
363		char *colon;
364		struct net_device *dev;
365		char devname[IFNAMSIZ];
366
367		if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1))
368			return -EFAULT;
369
370		devname[IFNAMSIZ-1] = 0;
371		colon = strchr(devname, ':');
372		if (colon)
373			*colon = 0;
374		dev = __dev_get_by_name(net, devname);
375		if (!dev)
376			return -ENODEV;
377		cfg->fc_oif = dev->ifindex;
378		if (colon) {
379			struct in_ifaddr *ifa;
380			struct in_device *in_dev = __in_dev_get_rtnl(dev);
381			if (!in_dev)
382				return -ENODEV;
383			*colon = ':';
384			for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
385				if (strcmp(ifa->ifa_label, devname) == 0)
386					break;
387			if (ifa == NULL)
388				return -ENODEV;
389			cfg->fc_prefsrc = ifa->ifa_local;
390		}
391	}
392
393	addr = sk_extract_addr(&rt->rt_gateway);
394	if (rt->rt_gateway.sa_family == AF_INET && addr) {
395		cfg->fc_gw = addr;
396		if (rt->rt_flags & RTF_GATEWAY &&
397		    inet_addr_type(net, addr) == RTN_UNICAST)
398			cfg->fc_scope = RT_SCOPE_UNIVERSE;
399	}
400
401	if (cmd == SIOCDELRT)
402		return 0;
403
404	if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw)
405		return -EINVAL;
406
407	if (cfg->fc_scope == RT_SCOPE_NOWHERE)
408		cfg->fc_scope = RT_SCOPE_LINK;
409
410	if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) {
411		struct nlattr *mx;
412		int len = 0;
413
414		mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL);
415		if (mx == NULL)
416			return -ENOMEM;
417
418		if (rt->rt_flags & RTF_MTU)
419			len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40);
420
421		if (rt->rt_flags & RTF_WINDOW)
422			len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window);
423
424		if (rt->rt_flags & RTF_IRTT)
425			len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3);
426
427		cfg->fc_mx = mx;
428		cfg->fc_mx_len = len;
429	}
430
431	return 0;
432}
433
434/*
435 *	Handle IP routing ioctl calls. These are used to manipulate the routing tables
436 */
437
438int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg)
439{
440	struct fib_config cfg;
441	struct rtentry rt;
442	int err;
443
444	switch (cmd) {
445	case SIOCADDRT:		/* Add a route */
446	case SIOCDELRT:		/* Delete a route */
447		if (!capable(CAP_NET_ADMIN))
448			return -EPERM;
449
450		if (copy_from_user(&rt, arg, sizeof(rt)))
451			return -EFAULT;
452
453		rtnl_lock();
454		err = rtentry_to_fib_config(net, cmd, &rt, &cfg);
455		if (err == 0) {
456			struct fib_table *tb;
457
458			if (cmd == SIOCDELRT) {
459				tb = fib_get_table(net, cfg.fc_table);
460				if (tb)
461					err = tb->tb_delete(tb, &cfg);
462				else
463					err = -ESRCH;
464			} else {
465				tb = fib_new_table(net, cfg.fc_table);
466				if (tb)
467					err = tb->tb_insert(tb, &cfg);
468				else
469					err = -ENOBUFS;
470			}
471
472			/* allocated by rtentry_to_fib_config() */
473			kfree(cfg.fc_mx);
474		}
475		rtnl_unlock();
476		return err;
477	}
478	return -EINVAL;
479}
480
481const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = {
482	[RTA_DST]		= { .type = NLA_U32 },
483	[RTA_SRC]		= { .type = NLA_U32 },
484	[RTA_IIF]		= { .type = NLA_U32 },
485	[RTA_OIF]		= { .type = NLA_U32 },
486	[RTA_GATEWAY]		= { .type = NLA_U32 },
487	[RTA_PRIORITY]		= { .type = NLA_U32 },
488	[RTA_PREFSRC]		= { .type = NLA_U32 },
489	[RTA_METRICS]		= { .type = NLA_NESTED },
490	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
491	[RTA_PROTOINFO]		= { .type = NLA_U32 },
492	[RTA_FLOW]		= { .type = NLA_U32 },
493};
494
495static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
496			    struct nlmsghdr *nlh, struct fib_config *cfg)
497{
498	struct nlattr *attr;
499	int err, remaining;
500	struct rtmsg *rtm;
501
502	err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy);
503	if (err < 0)
504		goto errout;
505
506	memset(cfg, 0, sizeof(*cfg));
507
508	rtm = nlmsg_data(nlh);
509	cfg->fc_dst_len = rtm->rtm_dst_len;
510	cfg->fc_tos = rtm->rtm_tos;
511	cfg->fc_table = rtm->rtm_table;
512	cfg->fc_protocol = rtm->rtm_protocol;
513	cfg->fc_scope = rtm->rtm_scope;
514	cfg->fc_type = rtm->rtm_type;
515	cfg->fc_flags = rtm->rtm_flags;
516	cfg->fc_nlflags = nlh->nlmsg_flags;
517
518	cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
519	cfg->fc_nlinfo.nlh = nlh;
520	cfg->fc_nlinfo.nl_net = net;
521
522	if (cfg->fc_type > RTN_MAX) {
523		err = -EINVAL;
524		goto errout;
525	}
526
527	nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) {
528		switch (nla_type(attr)) {
529		case RTA_DST:
530			cfg->fc_dst = nla_get_be32(attr);
531			break;
532		case RTA_OIF:
533			cfg->fc_oif = nla_get_u32(attr);
534			break;
535		case RTA_GATEWAY:
536			cfg->fc_gw = nla_get_be32(attr);
537			break;
538		case RTA_PRIORITY:
539			cfg->fc_priority = nla_get_u32(attr);
540			break;
541		case RTA_PREFSRC:
542			cfg->fc_prefsrc = nla_get_be32(attr);
543			break;
544		case RTA_METRICS:
545			cfg->fc_mx = nla_data(attr);
546			cfg->fc_mx_len = nla_len(attr);
547			break;
548		case RTA_MULTIPATH:
549			cfg->fc_mp = nla_data(attr);
550			cfg->fc_mp_len = nla_len(attr);
551			break;
552		case RTA_FLOW:
553			cfg->fc_flow = nla_get_u32(attr);
554			break;
555		case RTA_TABLE:
556			cfg->fc_table = nla_get_u32(attr);
557			break;
558		}
559	}
560
561	return 0;
562errout:
563	return err;
564}
565
566static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
567{
568	struct net *net = skb->sk->sk_net;
569	struct fib_config cfg;
570	struct fib_table *tb;
571	int err;
572
573	err = rtm_to_fib_config(net, skb, nlh, &cfg);
574	if (err < 0)
575		goto errout;
576
577	tb = fib_get_table(net, cfg.fc_table);
578	if (tb == NULL) {
579		err = -ESRCH;
580		goto errout;
581	}
582
583	err = tb->tb_delete(tb, &cfg);
584errout:
585	return err;
586}
587
588static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
589{
590	struct net *net = skb->sk->sk_net;
591	struct fib_config cfg;
592	struct fib_table *tb;
593	int err;
594
595	err = rtm_to_fib_config(net, skb, nlh, &cfg);
596	if (err < 0)
597		goto errout;
598
599	tb = fib_new_table(net, cfg.fc_table);
600	if (tb == NULL) {
601		err = -ENOBUFS;
602		goto errout;
603	}
604
605	err = tb->tb_insert(tb, &cfg);
606errout:
607	return err;
608}
609
610static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
611{
612	struct net *net = skb->sk->sk_net;
613	unsigned int h, s_h;
614	unsigned int e = 0, s_e;
615	struct fib_table *tb;
616	struct hlist_node *node;
617	struct hlist_head *head;
618	int dumped = 0;
619
620	if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
621	    ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
622		return ip_rt_dump(skb, cb);
623
624	s_h = cb->args[0];
625	s_e = cb->args[1];
626
627	for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
628		e = 0;
629		head = &net->ipv4.fib_table_hash[h];
630		hlist_for_each_entry(tb, node, head, tb_hlist) {
631			if (e < s_e)
632				goto next;
633			if (dumped)
634				memset(&cb->args[2], 0, sizeof(cb->args) -
635						 2 * sizeof(cb->args[0]));
636			if (tb->tb_dump(tb, skb, cb) < 0)
637				goto out;
638			dumped = 1;
639next:
640			e++;
641		}
642	}
643out:
644	cb->args[1] = e;
645	cb->args[0] = h;
646
647	return skb->len;
648}
649
650/* Prepare and feed intra-kernel routing request.
651   Really, it should be netlink message, but :-( netlink
652   can be not configured, so that we feed it directly
653   to fib engine. It is legal, because all events occur
654   only when netlink is already locked.
655 */
656
657static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
658{
659	struct net *net = ifa->ifa_dev->dev->nd_net;
660	struct fib_table *tb;
661	struct fib_config cfg = {
662		.fc_protocol = RTPROT_KERNEL,
663		.fc_type = type,
664		.fc_dst = dst,
665		.fc_dst_len = dst_len,
666		.fc_prefsrc = ifa->ifa_local,
667		.fc_oif = ifa->ifa_dev->dev->ifindex,
668		.fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
669		.fc_nlinfo = {
670			.nl_net = net,
671		},
672	};
673
674	if (type == RTN_UNICAST)
675		tb = fib_new_table(net, RT_TABLE_MAIN);
676	else
677		tb = fib_new_table(net, RT_TABLE_LOCAL);
678
679	if (tb == NULL)
680		return;
681
682	cfg.fc_table = tb->tb_id;
683
684	if (type != RTN_LOCAL)
685		cfg.fc_scope = RT_SCOPE_LINK;
686	else
687		cfg.fc_scope = RT_SCOPE_HOST;
688
689	if (cmd == RTM_NEWROUTE)
690		tb->tb_insert(tb, &cfg);
691	else
692		tb->tb_delete(tb, &cfg);
693}
694
695void fib_add_ifaddr(struct in_ifaddr *ifa)
696{
697	struct in_device *in_dev = ifa->ifa_dev;
698	struct net_device *dev = in_dev->dev;
699	struct in_ifaddr *prim = ifa;
700	__be32 mask = ifa->ifa_mask;
701	__be32 addr = ifa->ifa_local;
702	__be32 prefix = ifa->ifa_address&mask;
703
704	if (ifa->ifa_flags&IFA_F_SECONDARY) {
705		prim = inet_ifa_byprefix(in_dev, prefix, mask);
706		if (prim == NULL) {
707			printk(KERN_WARNING "fib_add_ifaddr: bug: prim == NULL\n");
708			return;
709		}
710	}
711
712	fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
713
714	if (!(dev->flags&IFF_UP))
715		return;
716
717	/* Add broadcast address, if it is explicitly assigned. */
718	if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
719		fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
720
721	if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
722	    (prefix != addr || ifa->ifa_prefixlen < 32)) {
723		fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
724			  RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim);
725
726		/* Add network specific broadcasts, when it takes a sense */
727		if (ifa->ifa_prefixlen < 31) {
728			fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
729			fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim);
730		}
731	}
732}
733
734static void fib_del_ifaddr(struct in_ifaddr *ifa)
735{
736	struct in_device *in_dev = ifa->ifa_dev;
737	struct net_device *dev = in_dev->dev;
738	struct in_ifaddr *ifa1;
739	struct in_ifaddr *prim = ifa;
740	__be32 brd = ifa->ifa_address|~ifa->ifa_mask;
741	__be32 any = ifa->ifa_address&ifa->ifa_mask;
742#define LOCAL_OK	1
743#define BRD_OK		2
744#define BRD0_OK		4
745#define BRD1_OK		8
746	unsigned ok = 0;
747
748	if (!(ifa->ifa_flags&IFA_F_SECONDARY))
749		fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
750			  RTN_UNICAST, any, ifa->ifa_prefixlen, prim);
751	else {
752		prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
753		if (prim == NULL) {
754			printk(KERN_WARNING "fib_del_ifaddr: bug: prim == NULL\n");
755			return;
756		}
757	}
758
759	/* Deletion is more complicated than add.
760	   We should take care of not to delete too much :-)
761
762	   Scan address list to be sure that addresses are really gone.
763	 */
764
765	for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
766		if (ifa->ifa_local == ifa1->ifa_local)
767			ok |= LOCAL_OK;
768		if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
769			ok |= BRD_OK;
770		if (brd == ifa1->ifa_broadcast)
771			ok |= BRD1_OK;
772		if (any == ifa1->ifa_broadcast)
773			ok |= BRD0_OK;
774	}
775
776	if (!(ok&BRD_OK))
777		fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
778	if (!(ok&BRD1_OK))
779		fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
780	if (!(ok&BRD0_OK))
781		fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
782	if (!(ok&LOCAL_OK)) {
783		fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
784
785		/* Check, that this local address finally disappeared. */
786		if (inet_addr_type(dev->nd_net, ifa->ifa_local) != RTN_LOCAL) {
787			/* And the last, but not the least thing.
788			   We must flush stray FIB entries.
789
790			   First of all, we scan fib_info list searching
791			   for stray nexthop entries, then ignite fib_flush.
792			*/
793			if (fib_sync_down(ifa->ifa_local, NULL, 0))
794				fib_flush(dev->nd_net);
795		}
796	}
797#undef LOCAL_OK
798#undef BRD_OK
799#undef BRD0_OK
800#undef BRD1_OK
801}
802
803static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
804{
805
806	struct fib_result       res;
807	struct flowi            fl = { .mark = frn->fl_mark,
808				       .nl_u = { .ip4_u = { .daddr = frn->fl_addr,
809							    .tos = frn->fl_tos,
810							    .scope = frn->fl_scope } } };
811
812#ifdef CONFIG_IP_MULTIPLE_TABLES
813	res.r = NULL;
814#endif
815
816	frn->err = -ENOENT;
817	if (tb) {
818		local_bh_disable();
819
820		frn->tb_id = tb->tb_id;
821		frn->err = tb->tb_lookup(tb, &fl, &res);
822
823		if (!frn->err) {
824			frn->prefixlen = res.prefixlen;
825			frn->nh_sel = res.nh_sel;
826			frn->type = res.type;
827			frn->scope = res.scope;
828			fib_res_put(&res);
829		}
830		local_bh_enable();
831	}
832}
833
834static void nl_fib_input(struct sk_buff *skb)
835{
836	struct net *net;
837	struct fib_result_nl *frn;
838	struct nlmsghdr *nlh;
839	struct fib_table *tb;
840	u32 pid;
841
842	net = skb->sk->sk_net;
843	nlh = nlmsg_hdr(skb);
844	if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
845	    nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn)))
846		return;
847
848	skb = skb_clone(skb, GFP_KERNEL);
849	if (skb == NULL)
850		return;
851	nlh = nlmsg_hdr(skb);
852
853	frn = (struct fib_result_nl *) NLMSG_DATA(nlh);
854	tb = fib_get_table(net, frn->tb_id_in);
855
856	nl_fib_lookup(frn, tb);
857
858	pid = NETLINK_CB(skb).pid;       /* pid of sending process */
859	NETLINK_CB(skb).pid = 0;         /* from kernel */
860	NETLINK_CB(skb).dst_group = 0;  /* unicast */
861	netlink_unicast(net->ipv4.fibnl, skb, pid, MSG_DONTWAIT);
862}
863
864static int nl_fib_lookup_init(struct net *net)
865{
866	struct sock *sk;
867	sk = netlink_kernel_create(net, NETLINK_FIB_LOOKUP, 0,
868				   nl_fib_input, NULL, THIS_MODULE);
869	if (sk == NULL)
870		return -EAFNOSUPPORT;
871	/* Don't hold an extra reference on the namespace */
872	put_net(sk->sk_net);
873	net->ipv4.fibnl = sk;
874	return 0;
875}
876
877static void nl_fib_lookup_exit(struct net *net)
878{
879	/* At the last minute lie and say this is a socket for the
880	 * initial network namespace. So the socket will  be safe to free.
881	 */
882	net->ipv4.fibnl->sk_net = get_net(&init_net);
883	sock_put(net->ipv4.fibnl);
884}
885
886static void fib_disable_ip(struct net_device *dev, int force)
887{
888	if (fib_sync_down(0, dev, force))
889		fib_flush(dev->nd_net);
890	rt_cache_flush(0);
891	arp_ifdown(dev);
892}
893
894static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
895{
896	struct in_ifaddr *ifa = (struct in_ifaddr*)ptr;
897
898	switch (event) {
899	case NETDEV_UP:
900		fib_add_ifaddr(ifa);
901#ifdef CONFIG_IP_ROUTE_MULTIPATH
902		fib_sync_up(ifa->ifa_dev->dev);
903#endif
904		rt_cache_flush(-1);
905		break;
906	case NETDEV_DOWN:
907		fib_del_ifaddr(ifa);
908		if (ifa->ifa_dev->ifa_list == NULL) {
909			/* Last address was deleted from this interface.
910			   Disable IP.
911			 */
912			fib_disable_ip(ifa->ifa_dev->dev, 1);
913		} else {
914			rt_cache_flush(-1);
915		}
916		break;
917	}
918	return NOTIFY_DONE;
919}
920
921static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
922{
923	struct net_device *dev = ptr;
924	struct in_device *in_dev = __in_dev_get_rtnl(dev);
925
926	if (event == NETDEV_UNREGISTER) {
927		fib_disable_ip(dev, 2);
928		return NOTIFY_DONE;
929	}
930
931	if (!in_dev)
932		return NOTIFY_DONE;
933
934	switch (event) {
935	case NETDEV_UP:
936		for_ifa(in_dev) {
937			fib_add_ifaddr(ifa);
938		} endfor_ifa(in_dev);
939#ifdef CONFIG_IP_ROUTE_MULTIPATH
940		fib_sync_up(dev);
941#endif
942		rt_cache_flush(-1);
943		break;
944	case NETDEV_DOWN:
945		fib_disable_ip(dev, 0);
946		break;
947	case NETDEV_CHANGEMTU:
948	case NETDEV_CHANGE:
949		rt_cache_flush(0);
950		break;
951	}
952	return NOTIFY_DONE;
953}
954
955static struct notifier_block fib_inetaddr_notifier = {
956	.notifier_call =fib_inetaddr_event,
957};
958
959static struct notifier_block fib_netdev_notifier = {
960	.notifier_call =fib_netdev_event,
961};
962
963static int __net_init ip_fib_net_init(struct net *net)
964{
965	unsigned int i;
966
967	net->ipv4.fib_table_hash = kzalloc(
968			sizeof(struct hlist_head)*FIB_TABLE_HASHSZ, GFP_KERNEL);
969	if (net->ipv4.fib_table_hash == NULL)
970		return -ENOMEM;
971
972	for (i = 0; i < FIB_TABLE_HASHSZ; i++)
973		INIT_HLIST_HEAD(&net->ipv4.fib_table_hash[i]);
974
975	return fib4_rules_init(net);
976}
977
978static void __net_exit ip_fib_net_exit(struct net *net)
979{
980	unsigned int i;
981
982#ifdef CONFIG_IP_MULTIPLE_TABLES
983	fib4_rules_exit(net);
984#endif
985
986	for (i = 0; i < FIB_TABLE_HASHSZ; i++) {
987		struct fib_table *tb;
988		struct hlist_head *head;
989		struct hlist_node *node, *tmp;
990
991		head = &net->ipv4.fib_table_hash[i];
992		hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) {
993			hlist_del(node);
994			tb->tb_flush(tb);
995			kfree(tb);
996		}
997	}
998	kfree(net->ipv4.fib_table_hash);
999}
1000
1001static int __net_init fib_net_init(struct net *net)
1002{
1003	int error;
1004
1005	error = ip_fib_net_init(net);
1006	if (error < 0)
1007		goto out;
1008	error = nl_fib_lookup_init(net);
1009	if (error < 0)
1010		goto out_nlfl;
1011	error = fib_proc_init(net);
1012	if (error < 0)
1013		goto out_proc;
1014out:
1015	return error;
1016
1017out_proc:
1018	nl_fib_lookup_exit(net);
1019out_nlfl:
1020	ip_fib_net_exit(net);
1021	goto out;
1022}
1023
1024static void __net_exit fib_net_exit(struct net *net)
1025{
1026	fib_proc_exit(net);
1027	nl_fib_lookup_exit(net);
1028	ip_fib_net_exit(net);
1029}
1030
1031static struct pernet_operations fib_net_ops = {
1032	.init = fib_net_init,
1033	.exit = fib_net_exit,
1034};
1035
1036void __init ip_fib_init(void)
1037{
1038	rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL);
1039	rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL);
1040	rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib);
1041
1042	register_pernet_subsys(&fib_net_ops);
1043	register_netdevice_notifier(&fib_netdev_notifier);
1044	register_inetaddr_notifier(&fib_inetaddr_notifier);
1045}
1046
1047EXPORT_SYMBOL(inet_addr_type);
1048EXPORT_SYMBOL(inet_dev_addr_type);
1049EXPORT_SYMBOL(ip_dev_find);
1050