fib_frontend.c revision 03cf786c4e83dba404ad23ca58f49147ae52dffd
1/*
2 * INET		An implementation of the TCP/IP protocol suite for the LINUX
3 *		operating system.  INET is implemented using the  BSD Socket
4 *		interface as the means of communication with the user level.
5 *
6 *		IPv4 Forwarding Information Base: FIB frontend.
7 *
8 * Version:	$Id: fib_frontend.c,v 1.26 2001/10/31 21:55:54 davem Exp $
9 *
10 * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
11 *
12 *		This program is free software; you can redistribute it and/or
13 *		modify it under the terms of the GNU General Public License
14 *		as published by the Free Software Foundation; either version
15 *		2 of the License, or (at your option) any later version.
16 */
17
18#include <linux/module.h>
19#include <asm/uaccess.h>
20#include <asm/system.h>
21#include <linux/bitops.h>
22#include <linux/capability.h>
23#include <linux/types.h>
24#include <linux/kernel.h>
25#include <linux/mm.h>
26#include <linux/string.h>
27#include <linux/socket.h>
28#include <linux/sockios.h>
29#include <linux/errno.h>
30#include <linux/in.h>
31#include <linux/inet.h>
32#include <linux/inetdevice.h>
33#include <linux/netdevice.h>
34#include <linux/if_addr.h>
35#include <linux/if_arp.h>
36#include <linux/skbuff.h>
37#include <linux/init.h>
38#include <linux/list.h>
39
40#include <net/ip.h>
41#include <net/protocol.h>
42#include <net/route.h>
43#include <net/tcp.h>
44#include <net/sock.h>
45#include <net/icmp.h>
46#include <net/arp.h>
47#include <net/ip_fib.h>
48#include <net/rtnetlink.h>
49
50#define FFprint(a...) printk(KERN_DEBUG a)
51
52static struct sock *fibnl;
53
54#ifndef CONFIG_IP_MULTIPLE_TABLES
55
56struct fib_table *ip_fib_local_table;
57struct fib_table *ip_fib_main_table;
58
59#define FIB_TABLE_HASHSZ 1
60static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
61
62#else
63
64#define FIB_TABLE_HASHSZ 256
65static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
66
67struct fib_table *fib_new_table(u32 id)
68{
69	struct fib_table *tb;
70	unsigned int h;
71
72	if (id == 0)
73		id = RT_TABLE_MAIN;
74	tb = fib_get_table(id);
75	if (tb)
76		return tb;
77	tb = fib_hash_init(id);
78	if (!tb)
79		return NULL;
80	h = id & (FIB_TABLE_HASHSZ - 1);
81	hlist_add_head_rcu(&tb->tb_hlist, &fib_table_hash[h]);
82	return tb;
83}
84
85struct fib_table *fib_get_table(u32 id)
86{
87	struct fib_table *tb;
88	struct hlist_node *node;
89	unsigned int h;
90
91	if (id == 0)
92		id = RT_TABLE_MAIN;
93	h = id & (FIB_TABLE_HASHSZ - 1);
94	rcu_read_lock();
95	hlist_for_each_entry_rcu(tb, node, &fib_table_hash[h], tb_hlist) {
96		if (tb->tb_id == id) {
97			rcu_read_unlock();
98			return tb;
99		}
100	}
101	rcu_read_unlock();
102	return NULL;
103}
104#endif /* CONFIG_IP_MULTIPLE_TABLES */
105
106static void fib_flush(void)
107{
108	int flushed = 0;
109	struct fib_table *tb;
110	struct hlist_node *node;
111	unsigned int h;
112
113	for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
114		hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist)
115			flushed += tb->tb_flush(tb);
116	}
117
118	if (flushed)
119		rt_cache_flush(-1);
120}
121
122/*
123 *	Find the first device with a given source address.
124 */
125
126struct net_device * ip_dev_find(__be32 addr)
127{
128	struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
129	struct fib_result res;
130	struct net_device *dev = NULL;
131	struct fib_table *local_table;
132
133#ifdef CONFIG_IP_MULTIPLE_TABLES
134	res.r = NULL;
135#endif
136
137	local_table = fib_get_table(RT_TABLE_LOCAL);
138	if (!local_table || local_table->tb_lookup(local_table, &fl, &res))
139		return NULL;
140	if (res.type != RTN_LOCAL)
141		goto out;
142	dev = FIB_RES_DEV(res);
143
144	if (dev)
145		dev_hold(dev);
146out:
147	fib_res_put(&res);
148	return dev;
149}
150
151unsigned inet_addr_type(__be32 addr)
152{
153	struct flowi		fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
154	struct fib_result	res;
155	unsigned ret = RTN_BROADCAST;
156	struct fib_table *local_table;
157
158	if (ZERONET(addr) || BADCLASS(addr))
159		return RTN_BROADCAST;
160	if (MULTICAST(addr))
161		return RTN_MULTICAST;
162
163#ifdef CONFIG_IP_MULTIPLE_TABLES
164	res.r = NULL;
165#endif
166
167	local_table = fib_get_table(RT_TABLE_LOCAL);
168	if (local_table) {
169		ret = RTN_UNICAST;
170		if (!local_table->tb_lookup(local_table, &fl, &res)) {
171			ret = res.type;
172			fib_res_put(&res);
173		}
174	}
175	return ret;
176}
177
178/* Given (packet source, input interface) and optional (dst, oif, tos):
179   - (main) check, that source is valid i.e. not broadcast or our local
180     address.
181   - figure out what "logical" interface this packet arrived
182     and calculate "specific destination" address.
183   - check, that packet arrived from expected physical interface.
184 */
185
186int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
187			struct net_device *dev, __be32 *spec_dst, u32 *itag)
188{
189	struct in_device *in_dev;
190	struct flowi fl = { .nl_u = { .ip4_u =
191				      { .daddr = src,
192					.saddr = dst,
193					.tos = tos } },
194			    .iif = oif };
195	struct fib_result res;
196	int no_addr, rpf;
197	int ret;
198
199	no_addr = rpf = 0;
200	rcu_read_lock();
201	in_dev = __in_dev_get_rcu(dev);
202	if (in_dev) {
203		no_addr = in_dev->ifa_list == NULL;
204		rpf = IN_DEV_RPFILTER(in_dev);
205	}
206	rcu_read_unlock();
207
208	if (in_dev == NULL)
209		goto e_inval;
210
211	if (fib_lookup(&fl, &res))
212		goto last_resort;
213	if (res.type != RTN_UNICAST)
214		goto e_inval_res;
215	*spec_dst = FIB_RES_PREFSRC(res);
216	fib_combine_itag(itag, &res);
217#ifdef CONFIG_IP_ROUTE_MULTIPATH
218	if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1)
219#else
220	if (FIB_RES_DEV(res) == dev)
221#endif
222	{
223		ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
224		fib_res_put(&res);
225		return ret;
226	}
227	fib_res_put(&res);
228	if (no_addr)
229		goto last_resort;
230	if (rpf)
231		goto e_inval;
232	fl.oif = dev->ifindex;
233
234	ret = 0;
235	if (fib_lookup(&fl, &res) == 0) {
236		if (res.type == RTN_UNICAST) {
237			*spec_dst = FIB_RES_PREFSRC(res);
238			ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
239		}
240		fib_res_put(&res);
241	}
242	return ret;
243
244last_resort:
245	if (rpf)
246		goto e_inval;
247	*spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
248	*itag = 0;
249	return 0;
250
251e_inval_res:
252	fib_res_put(&res);
253e_inval:
254	return -EINVAL;
255}
256
257static inline __be32 sk_extract_addr(struct sockaddr *addr)
258{
259	return ((struct sockaddr_in *) addr)->sin_addr.s_addr;
260}
261
262static int put_rtax(struct nlattr *mx, int len, int type, u32 value)
263{
264	struct nlattr *nla;
265
266	nla = (struct nlattr *) ((char *) mx + len);
267	nla->nla_type = type;
268	nla->nla_len = nla_attr_size(4);
269	*(u32 *) nla_data(nla) = value;
270
271	return len + nla_total_size(4);
272}
273
274static int rtentry_to_fib_config(int cmd, struct rtentry *rt,
275				 struct fib_config *cfg)
276{
277	__be32 addr;
278	int plen;
279
280	memset(cfg, 0, sizeof(*cfg));
281
282	if (rt->rt_dst.sa_family != AF_INET)
283		return -EAFNOSUPPORT;
284
285	/*
286	 * Check mask for validity:
287	 * a) it must be contiguous.
288	 * b) destination must have all host bits clear.
289	 * c) if application forgot to set correct family (AF_INET),
290	 *    reject request unless it is absolutely clear i.e.
291	 *    both family and mask are zero.
292	 */
293	plen = 32;
294	addr = sk_extract_addr(&rt->rt_dst);
295	if (!(rt->rt_flags & RTF_HOST)) {
296		__be32 mask = sk_extract_addr(&rt->rt_genmask);
297
298		if (rt->rt_genmask.sa_family != AF_INET) {
299			if (mask || rt->rt_genmask.sa_family)
300				return -EAFNOSUPPORT;
301		}
302
303		if (bad_mask(mask, addr))
304			return -EINVAL;
305
306		plen = inet_mask_len(mask);
307	}
308
309	cfg->fc_dst_len = plen;
310	cfg->fc_dst = addr;
311
312	if (cmd != SIOCDELRT) {
313		cfg->fc_nlflags = NLM_F_CREATE;
314		cfg->fc_protocol = RTPROT_BOOT;
315	}
316
317	if (rt->rt_metric)
318		cfg->fc_priority = rt->rt_metric - 1;
319
320	if (rt->rt_flags & RTF_REJECT) {
321		cfg->fc_scope = RT_SCOPE_HOST;
322		cfg->fc_type = RTN_UNREACHABLE;
323		return 0;
324	}
325
326	cfg->fc_scope = RT_SCOPE_NOWHERE;
327	cfg->fc_type = RTN_UNICAST;
328
329	if (rt->rt_dev) {
330		char *colon;
331		struct net_device *dev;
332		char devname[IFNAMSIZ];
333
334		if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1))
335			return -EFAULT;
336
337		devname[IFNAMSIZ-1] = 0;
338		colon = strchr(devname, ':');
339		if (colon)
340			*colon = 0;
341		dev = __dev_get_by_name(&init_net, devname);
342		if (!dev)
343			return -ENODEV;
344		cfg->fc_oif = dev->ifindex;
345		if (colon) {
346			struct in_ifaddr *ifa;
347			struct in_device *in_dev = __in_dev_get_rtnl(dev);
348			if (!in_dev)
349				return -ENODEV;
350			*colon = ':';
351			for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
352				if (strcmp(ifa->ifa_label, devname) == 0)
353					break;
354			if (ifa == NULL)
355				return -ENODEV;
356			cfg->fc_prefsrc = ifa->ifa_local;
357		}
358	}
359
360	addr = sk_extract_addr(&rt->rt_gateway);
361	if (rt->rt_gateway.sa_family == AF_INET && addr) {
362		cfg->fc_gw = addr;
363		if (rt->rt_flags & RTF_GATEWAY &&
364		    inet_addr_type(addr) == RTN_UNICAST)
365			cfg->fc_scope = RT_SCOPE_UNIVERSE;
366	}
367
368	if (cmd == SIOCDELRT)
369		return 0;
370
371	if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw)
372		return -EINVAL;
373
374	if (cfg->fc_scope == RT_SCOPE_NOWHERE)
375		cfg->fc_scope = RT_SCOPE_LINK;
376
377	if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) {
378		struct nlattr *mx;
379		int len = 0;
380
381		mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL);
382		if (mx == NULL)
383			return -ENOMEM;
384
385		if (rt->rt_flags & RTF_MTU)
386			len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40);
387
388		if (rt->rt_flags & RTF_WINDOW)
389			len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window);
390
391		if (rt->rt_flags & RTF_IRTT)
392			len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3);
393
394		cfg->fc_mx = mx;
395		cfg->fc_mx_len = len;
396	}
397
398	return 0;
399}
400
401/*
402 *	Handle IP routing ioctl calls. These are used to manipulate the routing tables
403 */
404
405int ip_rt_ioctl(unsigned int cmd, void __user *arg)
406{
407	struct fib_config cfg;
408	struct rtentry rt;
409	int err;
410
411	switch (cmd) {
412	case SIOCADDRT:		/* Add a route */
413	case SIOCDELRT:		/* Delete a route */
414		if (!capable(CAP_NET_ADMIN))
415			return -EPERM;
416
417		if (copy_from_user(&rt, arg, sizeof(rt)))
418			return -EFAULT;
419
420		rtnl_lock();
421		err = rtentry_to_fib_config(cmd, &rt, &cfg);
422		if (err == 0) {
423			struct fib_table *tb;
424
425			if (cmd == SIOCDELRT) {
426				tb = fib_get_table(cfg.fc_table);
427				if (tb)
428					err = tb->tb_delete(tb, &cfg);
429				else
430					err = -ESRCH;
431			} else {
432				tb = fib_new_table(cfg.fc_table);
433				if (tb)
434					err = tb->tb_insert(tb, &cfg);
435				else
436					err = -ENOBUFS;
437			}
438
439			/* allocated by rtentry_to_fib_config() */
440			kfree(cfg.fc_mx);
441		}
442		rtnl_unlock();
443		return err;
444	}
445	return -EINVAL;
446}
447
448const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = {
449	[RTA_DST]		= { .type = NLA_U32 },
450	[RTA_SRC]		= { .type = NLA_U32 },
451	[RTA_IIF]		= { .type = NLA_U32 },
452	[RTA_OIF]		= { .type = NLA_U32 },
453	[RTA_GATEWAY]		= { .type = NLA_U32 },
454	[RTA_PRIORITY]		= { .type = NLA_U32 },
455	[RTA_PREFSRC]		= { .type = NLA_U32 },
456	[RTA_METRICS]		= { .type = NLA_NESTED },
457	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
458	[RTA_PROTOINFO]		= { .type = NLA_U32 },
459	[RTA_FLOW]		= { .type = NLA_U32 },
460};
461
462static int rtm_to_fib_config(struct sk_buff *skb, struct nlmsghdr *nlh,
463			     struct fib_config *cfg)
464{
465	struct nlattr *attr;
466	int err, remaining;
467	struct rtmsg *rtm;
468
469	err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy);
470	if (err < 0)
471		goto errout;
472
473	memset(cfg, 0, sizeof(*cfg));
474
475	rtm = nlmsg_data(nlh);
476	cfg->fc_dst_len = rtm->rtm_dst_len;
477	cfg->fc_tos = rtm->rtm_tos;
478	cfg->fc_table = rtm->rtm_table;
479	cfg->fc_protocol = rtm->rtm_protocol;
480	cfg->fc_scope = rtm->rtm_scope;
481	cfg->fc_type = rtm->rtm_type;
482	cfg->fc_flags = rtm->rtm_flags;
483	cfg->fc_nlflags = nlh->nlmsg_flags;
484
485	cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
486	cfg->fc_nlinfo.nlh = nlh;
487
488	if (cfg->fc_type > RTN_MAX) {
489		err = -EINVAL;
490		goto errout;
491	}
492
493	nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) {
494		switch (nla_type(attr)) {
495		case RTA_DST:
496			cfg->fc_dst = nla_get_be32(attr);
497			break;
498		case RTA_OIF:
499			cfg->fc_oif = nla_get_u32(attr);
500			break;
501		case RTA_GATEWAY:
502			cfg->fc_gw = nla_get_be32(attr);
503			break;
504		case RTA_PRIORITY:
505			cfg->fc_priority = nla_get_u32(attr);
506			break;
507		case RTA_PREFSRC:
508			cfg->fc_prefsrc = nla_get_be32(attr);
509			break;
510		case RTA_METRICS:
511			cfg->fc_mx = nla_data(attr);
512			cfg->fc_mx_len = nla_len(attr);
513			break;
514		case RTA_MULTIPATH:
515			cfg->fc_mp = nla_data(attr);
516			cfg->fc_mp_len = nla_len(attr);
517			break;
518		case RTA_FLOW:
519			cfg->fc_flow = nla_get_u32(attr);
520			break;
521		case RTA_TABLE:
522			cfg->fc_table = nla_get_u32(attr);
523			break;
524		}
525	}
526
527	return 0;
528errout:
529	return err;
530}
531
532static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
533{
534	struct fib_config cfg;
535	struct fib_table *tb;
536	int err;
537
538	err = rtm_to_fib_config(skb, nlh, &cfg);
539	if (err < 0)
540		goto errout;
541
542	tb = fib_get_table(cfg.fc_table);
543	if (tb == NULL) {
544		err = -ESRCH;
545		goto errout;
546	}
547
548	err = tb->tb_delete(tb, &cfg);
549errout:
550	return err;
551}
552
553static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
554{
555	struct fib_config cfg;
556	struct fib_table *tb;
557	int err;
558
559	err = rtm_to_fib_config(skb, nlh, &cfg);
560	if (err < 0)
561		goto errout;
562
563	tb = fib_new_table(cfg.fc_table);
564	if (tb == NULL) {
565		err = -ENOBUFS;
566		goto errout;
567	}
568
569	err = tb->tb_insert(tb, &cfg);
570errout:
571	return err;
572}
573
574static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
575{
576	unsigned int h, s_h;
577	unsigned int e = 0, s_e;
578	struct fib_table *tb;
579	struct hlist_node *node;
580	int dumped = 0;
581
582	if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
583	    ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
584		return ip_rt_dump(skb, cb);
585
586	s_h = cb->args[0];
587	s_e = cb->args[1];
588
589	for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
590		e = 0;
591		hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist) {
592			if (e < s_e)
593				goto next;
594			if (dumped)
595				memset(&cb->args[2], 0, sizeof(cb->args) -
596						 2 * sizeof(cb->args[0]));
597			if (tb->tb_dump(tb, skb, cb) < 0)
598				goto out;
599			dumped = 1;
600next:
601			e++;
602		}
603	}
604out:
605	cb->args[1] = e;
606	cb->args[0] = h;
607
608	return skb->len;
609}
610
611/* Prepare and feed intra-kernel routing request.
612   Really, it should be netlink message, but :-( netlink
613   can be not configured, so that we feed it directly
614   to fib engine. It is legal, because all events occur
615   only when netlink is already locked.
616 */
617
618static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
619{
620	struct fib_table *tb;
621	struct fib_config cfg = {
622		.fc_protocol = RTPROT_KERNEL,
623		.fc_type = type,
624		.fc_dst = dst,
625		.fc_dst_len = dst_len,
626		.fc_prefsrc = ifa->ifa_local,
627		.fc_oif = ifa->ifa_dev->dev->ifindex,
628		.fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
629	};
630
631	if (type == RTN_UNICAST)
632		tb = fib_new_table(RT_TABLE_MAIN);
633	else
634		tb = fib_new_table(RT_TABLE_LOCAL);
635
636	if (tb == NULL)
637		return;
638
639	cfg.fc_table = tb->tb_id;
640
641	if (type != RTN_LOCAL)
642		cfg.fc_scope = RT_SCOPE_LINK;
643	else
644		cfg.fc_scope = RT_SCOPE_HOST;
645
646	if (cmd == RTM_NEWROUTE)
647		tb->tb_insert(tb, &cfg);
648	else
649		tb->tb_delete(tb, &cfg);
650}
651
652void fib_add_ifaddr(struct in_ifaddr *ifa)
653{
654	struct in_device *in_dev = ifa->ifa_dev;
655	struct net_device *dev = in_dev->dev;
656	struct in_ifaddr *prim = ifa;
657	__be32 mask = ifa->ifa_mask;
658	__be32 addr = ifa->ifa_local;
659	__be32 prefix = ifa->ifa_address&mask;
660
661	if (ifa->ifa_flags&IFA_F_SECONDARY) {
662		prim = inet_ifa_byprefix(in_dev, prefix, mask);
663		if (prim == NULL) {
664			printk(KERN_DEBUG "fib_add_ifaddr: bug: prim == NULL\n");
665			return;
666		}
667	}
668
669	fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
670
671	if (!(dev->flags&IFF_UP))
672		return;
673
674	/* Add broadcast address, if it is explicitly assigned. */
675	if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
676		fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
677
678	if (!ZERONET(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
679	    (prefix != addr || ifa->ifa_prefixlen < 32)) {
680		fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
681			  RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim);
682
683		/* Add network specific broadcasts, when it takes a sense */
684		if (ifa->ifa_prefixlen < 31) {
685			fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
686			fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim);
687		}
688	}
689}
690
691static void fib_del_ifaddr(struct in_ifaddr *ifa)
692{
693	struct in_device *in_dev = ifa->ifa_dev;
694	struct net_device *dev = in_dev->dev;
695	struct in_ifaddr *ifa1;
696	struct in_ifaddr *prim = ifa;
697	__be32 brd = ifa->ifa_address|~ifa->ifa_mask;
698	__be32 any = ifa->ifa_address&ifa->ifa_mask;
699#define LOCAL_OK	1
700#define BRD_OK		2
701#define BRD0_OK		4
702#define BRD1_OK		8
703	unsigned ok = 0;
704
705	if (!(ifa->ifa_flags&IFA_F_SECONDARY))
706		fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
707			  RTN_UNICAST, any, ifa->ifa_prefixlen, prim);
708	else {
709		prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
710		if (prim == NULL) {
711			printk(KERN_DEBUG "fib_del_ifaddr: bug: prim == NULL\n");
712			return;
713		}
714	}
715
716	/* Deletion is more complicated than add.
717	   We should take care of not to delete too much :-)
718
719	   Scan address list to be sure that addresses are really gone.
720	 */
721
722	for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
723		if (ifa->ifa_local == ifa1->ifa_local)
724			ok |= LOCAL_OK;
725		if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
726			ok |= BRD_OK;
727		if (brd == ifa1->ifa_broadcast)
728			ok |= BRD1_OK;
729		if (any == ifa1->ifa_broadcast)
730			ok |= BRD0_OK;
731	}
732
733	if (!(ok&BRD_OK))
734		fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
735	if (!(ok&BRD1_OK))
736		fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
737	if (!(ok&BRD0_OK))
738		fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
739	if (!(ok&LOCAL_OK)) {
740		fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
741
742		/* Check, that this local address finally disappeared. */
743		if (inet_addr_type(ifa->ifa_local) != RTN_LOCAL) {
744			/* And the last, but not the least thing.
745			   We must flush stray FIB entries.
746
747			   First of all, we scan fib_info list searching
748			   for stray nexthop entries, then ignite fib_flush.
749			*/
750			if (fib_sync_down(ifa->ifa_local, NULL, 0))
751				fib_flush();
752		}
753	}
754#undef LOCAL_OK
755#undef BRD_OK
756#undef BRD0_OK
757#undef BRD1_OK
758}
759
760static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
761{
762
763	struct fib_result       res;
764	struct flowi            fl = { .mark = frn->fl_mark,
765				       .nl_u = { .ip4_u = { .daddr = frn->fl_addr,
766							    .tos = frn->fl_tos,
767							    .scope = frn->fl_scope } } };
768
769#ifdef CONFIG_IP_MULTIPLE_TABLES
770	res.r = NULL;
771#endif
772
773	frn->err = -ENOENT;
774	if (tb) {
775		local_bh_disable();
776
777		frn->tb_id = tb->tb_id;
778		frn->err = tb->tb_lookup(tb, &fl, &res);
779
780		if (!frn->err) {
781			frn->prefixlen = res.prefixlen;
782			frn->nh_sel = res.nh_sel;
783			frn->type = res.type;
784			frn->scope = res.scope;
785			fib_res_put(&res);
786		}
787		local_bh_enable();
788	}
789}
790
791static void nl_fib_input(struct sk_buff *skb)
792{
793	struct fib_result_nl *frn;
794	struct nlmsghdr *nlh;
795	struct fib_table *tb;
796	u32 pid;
797
798	nlh = nlmsg_hdr(skb);
799	if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
800	    nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn))) {
801		kfree_skb(skb);
802		return;
803	}
804
805	frn = (struct fib_result_nl *) NLMSG_DATA(nlh);
806	tb = fib_get_table(frn->tb_id_in);
807
808	nl_fib_lookup(frn, tb);
809
810	pid = NETLINK_CB(skb).pid;       /* pid of sending process */
811	NETLINK_CB(skb).pid = 0;         /* from kernel */
812	NETLINK_CB(skb).dst_group = 0;  /* unicast */
813	netlink_unicast(fibnl, skb, pid, MSG_DONTWAIT);
814}
815
816static void nl_fib_lookup_init(void)
817{
818	fibnl = netlink_kernel_create(&init_net, NETLINK_FIB_LOOKUP, 0,
819				      nl_fib_input, NULL, THIS_MODULE);
820}
821
822static void fib_disable_ip(struct net_device *dev, int force)
823{
824	if (fib_sync_down(0, dev, force))
825		fib_flush();
826	rt_cache_flush(0);
827	arp_ifdown(dev);
828}
829
830static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
831{
832	struct in_ifaddr *ifa = (struct in_ifaddr*)ptr;
833
834	switch (event) {
835	case NETDEV_UP:
836		fib_add_ifaddr(ifa);
837#ifdef CONFIG_IP_ROUTE_MULTIPATH
838		fib_sync_up(ifa->ifa_dev->dev);
839#endif
840		rt_cache_flush(-1);
841		break;
842	case NETDEV_DOWN:
843		fib_del_ifaddr(ifa);
844		if (ifa->ifa_dev->ifa_list == NULL) {
845			/* Last address was deleted from this interface.
846			   Disable IP.
847			 */
848			fib_disable_ip(ifa->ifa_dev->dev, 1);
849		} else {
850			rt_cache_flush(-1);
851		}
852		break;
853	}
854	return NOTIFY_DONE;
855}
856
857static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
858{
859	struct net_device *dev = ptr;
860	struct in_device *in_dev = __in_dev_get_rtnl(dev);
861
862	if (dev->nd_net != &init_net)
863		return NOTIFY_DONE;
864
865	if (event == NETDEV_UNREGISTER) {
866		fib_disable_ip(dev, 2);
867		return NOTIFY_DONE;
868	}
869
870	if (!in_dev)
871		return NOTIFY_DONE;
872
873	switch (event) {
874	case NETDEV_UP:
875		for_ifa(in_dev) {
876			fib_add_ifaddr(ifa);
877		} endfor_ifa(in_dev);
878#ifdef CONFIG_IP_ROUTE_MULTIPATH
879		fib_sync_up(dev);
880#endif
881		rt_cache_flush(-1);
882		break;
883	case NETDEV_DOWN:
884		fib_disable_ip(dev, 0);
885		break;
886	case NETDEV_CHANGEMTU:
887	case NETDEV_CHANGE:
888		rt_cache_flush(0);
889		break;
890	}
891	return NOTIFY_DONE;
892}
893
894static struct notifier_block fib_inetaddr_notifier = {
895	.notifier_call =fib_inetaddr_event,
896};
897
898static struct notifier_block fib_netdev_notifier = {
899	.notifier_call =fib_netdev_event,
900};
901
902void __init ip_fib_init(void)
903{
904	unsigned int i;
905
906	for (i = 0; i < FIB_TABLE_HASHSZ; i++)
907		INIT_HLIST_HEAD(&fib_table_hash[i]);
908#ifndef CONFIG_IP_MULTIPLE_TABLES
909	ip_fib_local_table = fib_hash_init(RT_TABLE_LOCAL);
910	hlist_add_head_rcu(&ip_fib_local_table->tb_hlist, &fib_table_hash[0]);
911	ip_fib_main_table  = fib_hash_init(RT_TABLE_MAIN);
912	hlist_add_head_rcu(&ip_fib_main_table->tb_hlist, &fib_table_hash[0]);
913#else
914	fib4_rules_init();
915#endif
916
917	register_netdevice_notifier(&fib_netdev_notifier);
918	register_inetaddr_notifier(&fib_inetaddr_notifier);
919	nl_fib_lookup_init();
920
921	rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL);
922	rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL);
923	rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib);
924}
925
926EXPORT_SYMBOL(inet_addr_type);
927EXPORT_SYMBOL(ip_dev_find);
928