fib_frontend.c revision 010278ec4cdf404aefc0bbd5e7406674fec95286
1/*
2 * INET		An implementation of the TCP/IP protocol suite for the LINUX
3 *		operating system.  INET is implemented using the  BSD Socket
4 *		interface as the means of communication with the user level.
5 *
6 *		IPv4 Forwarding Information Base: FIB frontend.
7 *
8 * Version:	$Id: fib_frontend.c,v 1.26 2001/10/31 21:55:54 davem Exp $
9 *
10 * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
11 *
12 *		This program is free software; you can redistribute it and/or
13 *		modify it under the terms of the GNU General Public License
14 *		as published by the Free Software Foundation; either version
15 *		2 of the License, or (at your option) any later version.
16 */
17
18#include <linux/module.h>
19#include <asm/uaccess.h>
20#include <asm/system.h>
21#include <linux/bitops.h>
22#include <linux/capability.h>
23#include <linux/types.h>
24#include <linux/kernel.h>
25#include <linux/mm.h>
26#include <linux/string.h>
27#include <linux/socket.h>
28#include <linux/sockios.h>
29#include <linux/errno.h>
30#include <linux/in.h>
31#include <linux/inet.h>
32#include <linux/inetdevice.h>
33#include <linux/netdevice.h>
34#include <linux/if_addr.h>
35#include <linux/if_arp.h>
36#include <linux/skbuff.h>
37#include <linux/init.h>
38#include <linux/list.h>
39
40#include <net/ip.h>
41#include <net/protocol.h>
42#include <net/route.h>
43#include <net/tcp.h>
44#include <net/sock.h>
45#include <net/icmp.h>
46#include <net/arp.h>
47#include <net/ip_fib.h>
48#include <net/rtnetlink.h>
49
50#ifndef CONFIG_IP_MULTIPLE_TABLES
51
52static int __net_init fib4_rules_init(struct net *net)
53{
54	struct fib_table *local_table, *main_table;
55
56	local_table = fib_hash_table(RT_TABLE_LOCAL);
57	if (local_table == NULL)
58		return -ENOMEM;
59
60	main_table  = fib_hash_table(RT_TABLE_MAIN);
61	if (main_table == NULL)
62		goto fail;
63
64	hlist_add_head_rcu(&local_table->tb_hlist,
65				&net->ipv4.fib_table_hash[TABLE_LOCAL_INDEX]);
66	hlist_add_head_rcu(&main_table->tb_hlist,
67				&net->ipv4.fib_table_hash[TABLE_MAIN_INDEX]);
68	return 0;
69
70fail:
71	kfree(local_table);
72	return -ENOMEM;
73}
74#else
75
76struct fib_table *fib_new_table(struct net *net, u32 id)
77{
78	struct fib_table *tb;
79	unsigned int h;
80
81	if (id == 0)
82		id = RT_TABLE_MAIN;
83	tb = fib_get_table(net, id);
84	if (tb)
85		return tb;
86
87	tb = fib_hash_table(id);
88	if (!tb)
89		return NULL;
90	h = id & (FIB_TABLE_HASHSZ - 1);
91	hlist_add_head_rcu(&tb->tb_hlist, &net->ipv4.fib_table_hash[h]);
92	return tb;
93}
94
95struct fib_table *fib_get_table(struct net *net, u32 id)
96{
97	struct fib_table *tb;
98	struct hlist_node *node;
99	struct hlist_head *head;
100	unsigned int h;
101
102	if (id == 0)
103		id = RT_TABLE_MAIN;
104	h = id & (FIB_TABLE_HASHSZ - 1);
105
106	rcu_read_lock();
107	head = &net->ipv4.fib_table_hash[h];
108	hlist_for_each_entry_rcu(tb, node, head, tb_hlist) {
109		if (tb->tb_id == id) {
110			rcu_read_unlock();
111			return tb;
112		}
113	}
114	rcu_read_unlock();
115	return NULL;
116}
117#endif /* CONFIG_IP_MULTIPLE_TABLES */
118
119void fib_select_default(struct net *net,
120			const struct flowi *flp, struct fib_result *res)
121{
122	struct fib_table *tb;
123	int table = RT_TABLE_MAIN;
124#ifdef CONFIG_IP_MULTIPLE_TABLES
125	if (res->r == NULL || res->r->action != FR_ACT_TO_TBL)
126		return;
127	table = res->r->table;
128#endif
129	tb = fib_get_table(net, table);
130	if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
131		tb->tb_select_default(tb, flp, res);
132}
133
134static void fib_flush(struct net *net)
135{
136	int flushed = 0;
137	struct fib_table *tb;
138	struct hlist_node *node;
139	struct hlist_head *head;
140	unsigned int h;
141
142	for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
143		head = &net->ipv4.fib_table_hash[h];
144		hlist_for_each_entry(tb, node, head, tb_hlist)
145			flushed += tb->tb_flush(tb);
146	}
147
148	if (flushed)
149		rt_cache_flush(-1);
150}
151
152/*
153 *	Find the first device with a given source address.
154 */
155
156struct net_device * ip_dev_find(__be32 addr)
157{
158	struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
159	struct fib_result res;
160	struct net_device *dev = NULL;
161	struct fib_table *local_table;
162
163#ifdef CONFIG_IP_MULTIPLE_TABLES
164	res.r = NULL;
165#endif
166
167	local_table = fib_get_table(&init_net, RT_TABLE_LOCAL);
168	if (!local_table || local_table->tb_lookup(local_table, &fl, &res))
169		return NULL;
170	if (res.type != RTN_LOCAL)
171		goto out;
172	dev = FIB_RES_DEV(res);
173
174	if (dev)
175		dev_hold(dev);
176out:
177	fib_res_put(&res);
178	return dev;
179}
180
181/*
182 * Find address type as if only "dev" was present in the system. If
183 * on_dev is NULL then all interfaces are taken into consideration.
184 */
185static inline unsigned __inet_dev_addr_type(struct net *net,
186					    const struct net_device *dev,
187					    __be32 addr)
188{
189	struct flowi		fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
190	struct fib_result	res;
191	unsigned ret = RTN_BROADCAST;
192	struct fib_table *local_table;
193
194	if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr))
195		return RTN_BROADCAST;
196	if (ipv4_is_multicast(addr))
197		return RTN_MULTICAST;
198
199#ifdef CONFIG_IP_MULTIPLE_TABLES
200	res.r = NULL;
201#endif
202
203	local_table = fib_get_table(net, RT_TABLE_LOCAL);
204	if (local_table) {
205		ret = RTN_UNICAST;
206		if (!local_table->tb_lookup(local_table, &fl, &res)) {
207			if (!dev || dev == res.fi->fib_dev)
208				ret = res.type;
209			fib_res_put(&res);
210		}
211	}
212	return ret;
213}
214
215unsigned int inet_addr_type(struct net *net, __be32 addr)
216{
217	return __inet_dev_addr_type(net, NULL, addr);
218}
219
220unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev,
221				__be32 addr)
222{
223       return __inet_dev_addr_type(net, dev, addr);
224}
225
226/* Given (packet source, input interface) and optional (dst, oif, tos):
227   - (main) check, that source is valid i.e. not broadcast or our local
228     address.
229   - figure out what "logical" interface this packet arrived
230     and calculate "specific destination" address.
231   - check, that packet arrived from expected physical interface.
232 */
233
234int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
235			struct net_device *dev, __be32 *spec_dst, u32 *itag)
236{
237	struct in_device *in_dev;
238	struct flowi fl = { .nl_u = { .ip4_u =
239				      { .daddr = src,
240					.saddr = dst,
241					.tos = tos } },
242			    .iif = oif };
243	struct fib_result res;
244	int no_addr, rpf;
245	int ret;
246	struct net *net;
247
248	no_addr = rpf = 0;
249	rcu_read_lock();
250	in_dev = __in_dev_get_rcu(dev);
251	if (in_dev) {
252		no_addr = in_dev->ifa_list == NULL;
253		rpf = IN_DEV_RPFILTER(in_dev);
254	}
255	rcu_read_unlock();
256
257	if (in_dev == NULL)
258		goto e_inval;
259
260	net = dev->nd_net;
261	if (fib_lookup(net, &fl, &res))
262		goto last_resort;
263	if (res.type != RTN_UNICAST)
264		goto e_inval_res;
265	*spec_dst = FIB_RES_PREFSRC(res);
266	fib_combine_itag(itag, &res);
267#ifdef CONFIG_IP_ROUTE_MULTIPATH
268	if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1)
269#else
270	if (FIB_RES_DEV(res) == dev)
271#endif
272	{
273		ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
274		fib_res_put(&res);
275		return ret;
276	}
277	fib_res_put(&res);
278	if (no_addr)
279		goto last_resort;
280	if (rpf)
281		goto e_inval;
282	fl.oif = dev->ifindex;
283
284	ret = 0;
285	if (fib_lookup(net, &fl, &res) == 0) {
286		if (res.type == RTN_UNICAST) {
287			*spec_dst = FIB_RES_PREFSRC(res);
288			ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
289		}
290		fib_res_put(&res);
291	}
292	return ret;
293
294last_resort:
295	if (rpf)
296		goto e_inval;
297	*spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
298	*itag = 0;
299	return 0;
300
301e_inval_res:
302	fib_res_put(&res);
303e_inval:
304	return -EINVAL;
305}
306
307static inline __be32 sk_extract_addr(struct sockaddr *addr)
308{
309	return ((struct sockaddr_in *) addr)->sin_addr.s_addr;
310}
311
312static int put_rtax(struct nlattr *mx, int len, int type, u32 value)
313{
314	struct nlattr *nla;
315
316	nla = (struct nlattr *) ((char *) mx + len);
317	nla->nla_type = type;
318	nla->nla_len = nla_attr_size(4);
319	*(u32 *) nla_data(nla) = value;
320
321	return len + nla_total_size(4);
322}
323
324static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt,
325				 struct fib_config *cfg)
326{
327	__be32 addr;
328	int plen;
329
330	memset(cfg, 0, sizeof(*cfg));
331	cfg->fc_nlinfo.nl_net = net;
332
333	if (rt->rt_dst.sa_family != AF_INET)
334		return -EAFNOSUPPORT;
335
336	/*
337	 * Check mask for validity:
338	 * a) it must be contiguous.
339	 * b) destination must have all host bits clear.
340	 * c) if application forgot to set correct family (AF_INET),
341	 *    reject request unless it is absolutely clear i.e.
342	 *    both family and mask are zero.
343	 */
344	plen = 32;
345	addr = sk_extract_addr(&rt->rt_dst);
346	if (!(rt->rt_flags & RTF_HOST)) {
347		__be32 mask = sk_extract_addr(&rt->rt_genmask);
348
349		if (rt->rt_genmask.sa_family != AF_INET) {
350			if (mask || rt->rt_genmask.sa_family)
351				return -EAFNOSUPPORT;
352		}
353
354		if (bad_mask(mask, addr))
355			return -EINVAL;
356
357		plen = inet_mask_len(mask);
358	}
359
360	cfg->fc_dst_len = plen;
361	cfg->fc_dst = addr;
362
363	if (cmd != SIOCDELRT) {
364		cfg->fc_nlflags = NLM_F_CREATE;
365		cfg->fc_protocol = RTPROT_BOOT;
366	}
367
368	if (rt->rt_metric)
369		cfg->fc_priority = rt->rt_metric - 1;
370
371	if (rt->rt_flags & RTF_REJECT) {
372		cfg->fc_scope = RT_SCOPE_HOST;
373		cfg->fc_type = RTN_UNREACHABLE;
374		return 0;
375	}
376
377	cfg->fc_scope = RT_SCOPE_NOWHERE;
378	cfg->fc_type = RTN_UNICAST;
379
380	if (rt->rt_dev) {
381		char *colon;
382		struct net_device *dev;
383		char devname[IFNAMSIZ];
384
385		if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1))
386			return -EFAULT;
387
388		devname[IFNAMSIZ-1] = 0;
389		colon = strchr(devname, ':');
390		if (colon)
391			*colon = 0;
392		dev = __dev_get_by_name(net, devname);
393		if (!dev)
394			return -ENODEV;
395		cfg->fc_oif = dev->ifindex;
396		if (colon) {
397			struct in_ifaddr *ifa;
398			struct in_device *in_dev = __in_dev_get_rtnl(dev);
399			if (!in_dev)
400				return -ENODEV;
401			*colon = ':';
402			for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
403				if (strcmp(ifa->ifa_label, devname) == 0)
404					break;
405			if (ifa == NULL)
406				return -ENODEV;
407			cfg->fc_prefsrc = ifa->ifa_local;
408		}
409	}
410
411	addr = sk_extract_addr(&rt->rt_gateway);
412	if (rt->rt_gateway.sa_family == AF_INET && addr) {
413		cfg->fc_gw = addr;
414		if (rt->rt_flags & RTF_GATEWAY &&
415		    inet_addr_type(net, addr) == RTN_UNICAST)
416			cfg->fc_scope = RT_SCOPE_UNIVERSE;
417	}
418
419	if (cmd == SIOCDELRT)
420		return 0;
421
422	if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw)
423		return -EINVAL;
424
425	if (cfg->fc_scope == RT_SCOPE_NOWHERE)
426		cfg->fc_scope = RT_SCOPE_LINK;
427
428	if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) {
429		struct nlattr *mx;
430		int len = 0;
431
432		mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL);
433		if (mx == NULL)
434			return -ENOMEM;
435
436		if (rt->rt_flags & RTF_MTU)
437			len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40);
438
439		if (rt->rt_flags & RTF_WINDOW)
440			len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window);
441
442		if (rt->rt_flags & RTF_IRTT)
443			len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3);
444
445		cfg->fc_mx = mx;
446		cfg->fc_mx_len = len;
447	}
448
449	return 0;
450}
451
452/*
453 *	Handle IP routing ioctl calls. These are used to manipulate the routing tables
454 */
455
456int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg)
457{
458	struct fib_config cfg;
459	struct rtentry rt;
460	int err;
461
462	switch (cmd) {
463	case SIOCADDRT:		/* Add a route */
464	case SIOCDELRT:		/* Delete a route */
465		if (!capable(CAP_NET_ADMIN))
466			return -EPERM;
467
468		if (copy_from_user(&rt, arg, sizeof(rt)))
469			return -EFAULT;
470
471		rtnl_lock();
472		err = rtentry_to_fib_config(net, cmd, &rt, &cfg);
473		if (err == 0) {
474			struct fib_table *tb;
475
476			if (cmd == SIOCDELRT) {
477				tb = fib_get_table(net, cfg.fc_table);
478				if (tb)
479					err = tb->tb_delete(tb, &cfg);
480				else
481					err = -ESRCH;
482			} else {
483				tb = fib_new_table(net, cfg.fc_table);
484				if (tb)
485					err = tb->tb_insert(tb, &cfg);
486				else
487					err = -ENOBUFS;
488			}
489
490			/* allocated by rtentry_to_fib_config() */
491			kfree(cfg.fc_mx);
492		}
493		rtnl_unlock();
494		return err;
495	}
496	return -EINVAL;
497}
498
499const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = {
500	[RTA_DST]		= { .type = NLA_U32 },
501	[RTA_SRC]		= { .type = NLA_U32 },
502	[RTA_IIF]		= { .type = NLA_U32 },
503	[RTA_OIF]		= { .type = NLA_U32 },
504	[RTA_GATEWAY]		= { .type = NLA_U32 },
505	[RTA_PRIORITY]		= { .type = NLA_U32 },
506	[RTA_PREFSRC]		= { .type = NLA_U32 },
507	[RTA_METRICS]		= { .type = NLA_NESTED },
508	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
509	[RTA_PROTOINFO]		= { .type = NLA_U32 },
510	[RTA_FLOW]		= { .type = NLA_U32 },
511};
512
513static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
514			    struct nlmsghdr *nlh, struct fib_config *cfg)
515{
516	struct nlattr *attr;
517	int err, remaining;
518	struct rtmsg *rtm;
519
520	err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy);
521	if (err < 0)
522		goto errout;
523
524	memset(cfg, 0, sizeof(*cfg));
525
526	rtm = nlmsg_data(nlh);
527	cfg->fc_dst_len = rtm->rtm_dst_len;
528	cfg->fc_tos = rtm->rtm_tos;
529	cfg->fc_table = rtm->rtm_table;
530	cfg->fc_protocol = rtm->rtm_protocol;
531	cfg->fc_scope = rtm->rtm_scope;
532	cfg->fc_type = rtm->rtm_type;
533	cfg->fc_flags = rtm->rtm_flags;
534	cfg->fc_nlflags = nlh->nlmsg_flags;
535
536	cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
537	cfg->fc_nlinfo.nlh = nlh;
538	cfg->fc_nlinfo.nl_net = net;
539
540	if (cfg->fc_type > RTN_MAX) {
541		err = -EINVAL;
542		goto errout;
543	}
544
545	nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) {
546		switch (nla_type(attr)) {
547		case RTA_DST:
548			cfg->fc_dst = nla_get_be32(attr);
549			break;
550		case RTA_OIF:
551			cfg->fc_oif = nla_get_u32(attr);
552			break;
553		case RTA_GATEWAY:
554			cfg->fc_gw = nla_get_be32(attr);
555			break;
556		case RTA_PRIORITY:
557			cfg->fc_priority = nla_get_u32(attr);
558			break;
559		case RTA_PREFSRC:
560			cfg->fc_prefsrc = nla_get_be32(attr);
561			break;
562		case RTA_METRICS:
563			cfg->fc_mx = nla_data(attr);
564			cfg->fc_mx_len = nla_len(attr);
565			break;
566		case RTA_MULTIPATH:
567			cfg->fc_mp = nla_data(attr);
568			cfg->fc_mp_len = nla_len(attr);
569			break;
570		case RTA_FLOW:
571			cfg->fc_flow = nla_get_u32(attr);
572			break;
573		case RTA_TABLE:
574			cfg->fc_table = nla_get_u32(attr);
575			break;
576		}
577	}
578
579	return 0;
580errout:
581	return err;
582}
583
584static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
585{
586	struct net *net = skb->sk->sk_net;
587	struct fib_config cfg;
588	struct fib_table *tb;
589	int err;
590
591	err = rtm_to_fib_config(net, skb, nlh, &cfg);
592	if (err < 0)
593		goto errout;
594
595	tb = fib_get_table(net, cfg.fc_table);
596	if (tb == NULL) {
597		err = -ESRCH;
598		goto errout;
599	}
600
601	err = tb->tb_delete(tb, &cfg);
602errout:
603	return err;
604}
605
606static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
607{
608	struct net *net = skb->sk->sk_net;
609	struct fib_config cfg;
610	struct fib_table *tb;
611	int err;
612
613	err = rtm_to_fib_config(net, skb, nlh, &cfg);
614	if (err < 0)
615		goto errout;
616
617	tb = fib_new_table(net, cfg.fc_table);
618	if (tb == NULL) {
619		err = -ENOBUFS;
620		goto errout;
621	}
622
623	err = tb->tb_insert(tb, &cfg);
624errout:
625	return err;
626}
627
628static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
629{
630	struct net *net = skb->sk->sk_net;
631	unsigned int h, s_h;
632	unsigned int e = 0, s_e;
633	struct fib_table *tb;
634	struct hlist_node *node;
635	struct hlist_head *head;
636	int dumped = 0;
637
638	if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
639	    ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
640		return ip_rt_dump(skb, cb);
641
642	s_h = cb->args[0];
643	s_e = cb->args[1];
644
645	for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
646		e = 0;
647		head = &net->ipv4.fib_table_hash[h];
648		hlist_for_each_entry(tb, node, head, tb_hlist) {
649			if (e < s_e)
650				goto next;
651			if (dumped)
652				memset(&cb->args[2], 0, sizeof(cb->args) -
653						 2 * sizeof(cb->args[0]));
654			if (tb->tb_dump(tb, skb, cb) < 0)
655				goto out;
656			dumped = 1;
657next:
658			e++;
659		}
660	}
661out:
662	cb->args[1] = e;
663	cb->args[0] = h;
664
665	return skb->len;
666}
667
668/* Prepare and feed intra-kernel routing request.
669   Really, it should be netlink message, but :-( netlink
670   can be not configured, so that we feed it directly
671   to fib engine. It is legal, because all events occur
672   only when netlink is already locked.
673 */
674
675static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
676{
677	struct net *net = ifa->ifa_dev->dev->nd_net;
678	struct fib_table *tb;
679	struct fib_config cfg = {
680		.fc_protocol = RTPROT_KERNEL,
681		.fc_type = type,
682		.fc_dst = dst,
683		.fc_dst_len = dst_len,
684		.fc_prefsrc = ifa->ifa_local,
685		.fc_oif = ifa->ifa_dev->dev->ifindex,
686		.fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
687		.fc_nlinfo = {
688			.nl_net = net,
689		},
690	};
691
692	if (type == RTN_UNICAST)
693		tb = fib_new_table(net, RT_TABLE_MAIN);
694	else
695		tb = fib_new_table(net, RT_TABLE_LOCAL);
696
697	if (tb == NULL)
698		return;
699
700	cfg.fc_table = tb->tb_id;
701
702	if (type != RTN_LOCAL)
703		cfg.fc_scope = RT_SCOPE_LINK;
704	else
705		cfg.fc_scope = RT_SCOPE_HOST;
706
707	if (cmd == RTM_NEWROUTE)
708		tb->tb_insert(tb, &cfg);
709	else
710		tb->tb_delete(tb, &cfg);
711}
712
713void fib_add_ifaddr(struct in_ifaddr *ifa)
714{
715	struct in_device *in_dev = ifa->ifa_dev;
716	struct net_device *dev = in_dev->dev;
717	struct in_ifaddr *prim = ifa;
718	__be32 mask = ifa->ifa_mask;
719	__be32 addr = ifa->ifa_local;
720	__be32 prefix = ifa->ifa_address&mask;
721
722	if (ifa->ifa_flags&IFA_F_SECONDARY) {
723		prim = inet_ifa_byprefix(in_dev, prefix, mask);
724		if (prim == NULL) {
725			printk(KERN_WARNING "fib_add_ifaddr: bug: prim == NULL\n");
726			return;
727		}
728	}
729
730	fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
731
732	if (!(dev->flags&IFF_UP))
733		return;
734
735	/* Add broadcast address, if it is explicitly assigned. */
736	if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
737		fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
738
739	if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
740	    (prefix != addr || ifa->ifa_prefixlen < 32)) {
741		fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
742			  RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim);
743
744		/* Add network specific broadcasts, when it takes a sense */
745		if (ifa->ifa_prefixlen < 31) {
746			fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
747			fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim);
748		}
749	}
750}
751
752static void fib_del_ifaddr(struct in_ifaddr *ifa)
753{
754	struct in_device *in_dev = ifa->ifa_dev;
755	struct net_device *dev = in_dev->dev;
756	struct in_ifaddr *ifa1;
757	struct in_ifaddr *prim = ifa;
758	__be32 brd = ifa->ifa_address|~ifa->ifa_mask;
759	__be32 any = ifa->ifa_address&ifa->ifa_mask;
760#define LOCAL_OK	1
761#define BRD_OK		2
762#define BRD0_OK		4
763#define BRD1_OK		8
764	unsigned ok = 0;
765
766	if (!(ifa->ifa_flags&IFA_F_SECONDARY))
767		fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
768			  RTN_UNICAST, any, ifa->ifa_prefixlen, prim);
769	else {
770		prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
771		if (prim == NULL) {
772			printk(KERN_WARNING "fib_del_ifaddr: bug: prim == NULL\n");
773			return;
774		}
775	}
776
777	/* Deletion is more complicated than add.
778	   We should take care of not to delete too much :-)
779
780	   Scan address list to be sure that addresses are really gone.
781	 */
782
783	for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
784		if (ifa->ifa_local == ifa1->ifa_local)
785			ok |= LOCAL_OK;
786		if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
787			ok |= BRD_OK;
788		if (brd == ifa1->ifa_broadcast)
789			ok |= BRD1_OK;
790		if (any == ifa1->ifa_broadcast)
791			ok |= BRD0_OK;
792	}
793
794	if (!(ok&BRD_OK))
795		fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
796	if (!(ok&BRD1_OK))
797		fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
798	if (!(ok&BRD0_OK))
799		fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
800	if (!(ok&LOCAL_OK)) {
801		fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
802
803		/* Check, that this local address finally disappeared. */
804		if (inet_addr_type(dev->nd_net, ifa->ifa_local) != RTN_LOCAL) {
805			/* And the last, but not the least thing.
806			   We must flush stray FIB entries.
807
808			   First of all, we scan fib_info list searching
809			   for stray nexthop entries, then ignite fib_flush.
810			*/
811			if (fib_sync_down(ifa->ifa_local, NULL, 0))
812				fib_flush(dev->nd_net);
813		}
814	}
815#undef LOCAL_OK
816#undef BRD_OK
817#undef BRD0_OK
818#undef BRD1_OK
819}
820
821static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
822{
823
824	struct fib_result       res;
825	struct flowi            fl = { .mark = frn->fl_mark,
826				       .nl_u = { .ip4_u = { .daddr = frn->fl_addr,
827							    .tos = frn->fl_tos,
828							    .scope = frn->fl_scope } } };
829
830#ifdef CONFIG_IP_MULTIPLE_TABLES
831	res.r = NULL;
832#endif
833
834	frn->err = -ENOENT;
835	if (tb) {
836		local_bh_disable();
837
838		frn->tb_id = tb->tb_id;
839		frn->err = tb->tb_lookup(tb, &fl, &res);
840
841		if (!frn->err) {
842			frn->prefixlen = res.prefixlen;
843			frn->nh_sel = res.nh_sel;
844			frn->type = res.type;
845			frn->scope = res.scope;
846			fib_res_put(&res);
847		}
848		local_bh_enable();
849	}
850}
851
852static void nl_fib_input(struct sk_buff *skb)
853{
854	struct net *net;
855	struct fib_result_nl *frn;
856	struct nlmsghdr *nlh;
857	struct fib_table *tb;
858	u32 pid;
859
860	net = skb->sk->sk_net;
861	nlh = nlmsg_hdr(skb);
862	if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
863	    nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn)))
864		return;
865
866	skb = skb_clone(skb, GFP_KERNEL);
867	if (skb == NULL)
868		return;
869	nlh = nlmsg_hdr(skb);
870
871	frn = (struct fib_result_nl *) NLMSG_DATA(nlh);
872	tb = fib_get_table(net, frn->tb_id_in);
873
874	nl_fib_lookup(frn, tb);
875
876	pid = NETLINK_CB(skb).pid;       /* pid of sending process */
877	NETLINK_CB(skb).pid = 0;         /* from kernel */
878	NETLINK_CB(skb).dst_group = 0;  /* unicast */
879	netlink_unicast(net->ipv4.fibnl, skb, pid, MSG_DONTWAIT);
880}
881
882static int nl_fib_lookup_init(struct net *net)
883{
884	struct sock *sk;
885	sk = netlink_kernel_create(net, NETLINK_FIB_LOOKUP, 0,
886				   nl_fib_input, NULL, THIS_MODULE);
887	if (sk == NULL)
888		return -EAFNOSUPPORT;
889	net->ipv4.fibnl = sk;
890	return 0;
891}
892
893static void nl_fib_lookup_exit(struct net *net)
894{
895	netlink_kernel_release(net->ipv4.fibnl);
896	net->ipv4.fibnl = NULL;
897}
898
899static void fib_disable_ip(struct net_device *dev, int force)
900{
901	if (fib_sync_down(0, dev, force))
902		fib_flush(dev->nd_net);
903	rt_cache_flush(0);
904	arp_ifdown(dev);
905}
906
907static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
908{
909	struct in_ifaddr *ifa = (struct in_ifaddr*)ptr;
910
911	switch (event) {
912	case NETDEV_UP:
913		fib_add_ifaddr(ifa);
914#ifdef CONFIG_IP_ROUTE_MULTIPATH
915		fib_sync_up(ifa->ifa_dev->dev);
916#endif
917		rt_cache_flush(-1);
918		break;
919	case NETDEV_DOWN:
920		fib_del_ifaddr(ifa);
921		if (ifa->ifa_dev->ifa_list == NULL) {
922			/* Last address was deleted from this interface.
923			   Disable IP.
924			 */
925			fib_disable_ip(ifa->ifa_dev->dev, 1);
926		} else {
927			rt_cache_flush(-1);
928		}
929		break;
930	}
931	return NOTIFY_DONE;
932}
933
934static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
935{
936	struct net_device *dev = ptr;
937	struct in_device *in_dev = __in_dev_get_rtnl(dev);
938
939	if (event == NETDEV_UNREGISTER) {
940		fib_disable_ip(dev, 2);
941		return NOTIFY_DONE;
942	}
943
944	if (!in_dev)
945		return NOTIFY_DONE;
946
947	switch (event) {
948	case NETDEV_UP:
949		for_ifa(in_dev) {
950			fib_add_ifaddr(ifa);
951		} endfor_ifa(in_dev);
952#ifdef CONFIG_IP_ROUTE_MULTIPATH
953		fib_sync_up(dev);
954#endif
955		rt_cache_flush(-1);
956		break;
957	case NETDEV_DOWN:
958		fib_disable_ip(dev, 0);
959		break;
960	case NETDEV_CHANGEMTU:
961	case NETDEV_CHANGE:
962		rt_cache_flush(0);
963		break;
964	}
965	return NOTIFY_DONE;
966}
967
968static struct notifier_block fib_inetaddr_notifier = {
969	.notifier_call =fib_inetaddr_event,
970};
971
972static struct notifier_block fib_netdev_notifier = {
973	.notifier_call =fib_netdev_event,
974};
975
976static int __net_init ip_fib_net_init(struct net *net)
977{
978	unsigned int i;
979
980	net->ipv4.fib_table_hash = kzalloc(
981			sizeof(struct hlist_head)*FIB_TABLE_HASHSZ, GFP_KERNEL);
982	if (net->ipv4.fib_table_hash == NULL)
983		return -ENOMEM;
984
985	for (i = 0; i < FIB_TABLE_HASHSZ; i++)
986		INIT_HLIST_HEAD(&net->ipv4.fib_table_hash[i]);
987
988	return fib4_rules_init(net);
989}
990
991static void __net_exit ip_fib_net_exit(struct net *net)
992{
993	unsigned int i;
994
995#ifdef CONFIG_IP_MULTIPLE_TABLES
996	fib4_rules_exit(net);
997#endif
998
999	for (i = 0; i < FIB_TABLE_HASHSZ; i++) {
1000		struct fib_table *tb;
1001		struct hlist_head *head;
1002		struct hlist_node *node, *tmp;
1003
1004		head = &net->ipv4.fib_table_hash[i];
1005		hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) {
1006			hlist_del(node);
1007			tb->tb_flush(tb);
1008			kfree(tb);
1009		}
1010	}
1011	kfree(net->ipv4.fib_table_hash);
1012}
1013
1014static int __net_init fib_net_init(struct net *net)
1015{
1016	int error;
1017
1018	error = ip_fib_net_init(net);
1019	if (error < 0)
1020		goto out;
1021	error = nl_fib_lookup_init(net);
1022	if (error < 0)
1023		goto out_nlfl;
1024	error = fib_proc_init(net);
1025	if (error < 0)
1026		goto out_proc;
1027out:
1028	return error;
1029
1030out_proc:
1031	nl_fib_lookup_exit(net);
1032out_nlfl:
1033	ip_fib_net_exit(net);
1034	goto out;
1035}
1036
1037static void __net_exit fib_net_exit(struct net *net)
1038{
1039	fib_proc_exit(net);
1040	nl_fib_lookup_exit(net);
1041	ip_fib_net_exit(net);
1042}
1043
1044static struct pernet_operations fib_net_ops = {
1045	.init = fib_net_init,
1046	.exit = fib_net_exit,
1047};
1048
1049void __init ip_fib_init(void)
1050{
1051	rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL);
1052	rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL);
1053	rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib);
1054
1055	register_pernet_subsys(&fib_net_ops);
1056	register_netdevice_notifier(&fib_netdev_notifier);
1057	register_inetaddr_notifier(&fib_inetaddr_notifier);
1058
1059	fib_hash_init();
1060}
1061
1062EXPORT_SYMBOL(inet_addr_type);
1063EXPORT_SYMBOL(inet_dev_addr_type);
1064EXPORT_SYMBOL(ip_dev_find);
1065