fib_frontend.c revision 64c2d5382954ccf6054424653f4c7f4f04c1ff21
1/*
2 * INET		An implementation of the TCP/IP protocol suite for the LINUX
3 *		operating system.  INET is implemented using the  BSD Socket
4 *		interface as the means of communication with the user level.
5 *
6 *		IPv4 Forwarding Information Base: FIB frontend.
7 *
8 * Version:	$Id: fib_frontend.c,v 1.26 2001/10/31 21:55:54 davem Exp $
9 *
10 * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
11 *
12 *		This program is free software; you can redistribute it and/or
13 *		modify it under the terms of the GNU General Public License
14 *		as published by the Free Software Foundation; either version
15 *		2 of the License, or (at your option) any later version.
16 */
17
18#include <linux/module.h>
19#include <asm/uaccess.h>
20#include <asm/system.h>
21#include <linux/bitops.h>
22#include <linux/capability.h>
23#include <linux/types.h>
24#include <linux/kernel.h>
25#include <linux/mm.h>
26#include <linux/string.h>
27#include <linux/socket.h>
28#include <linux/sockios.h>
29#include <linux/errno.h>
30#include <linux/in.h>
31#include <linux/inet.h>
32#include <linux/inetdevice.h>
33#include <linux/netdevice.h>
34#include <linux/if_addr.h>
35#include <linux/if_arp.h>
36#include <linux/skbuff.h>
37#include <linux/init.h>
38#include <linux/list.h>
39
40#include <net/ip.h>
41#include <net/protocol.h>
42#include <net/route.h>
43#include <net/tcp.h>
44#include <net/sock.h>
45#include <net/icmp.h>
46#include <net/arp.h>
47#include <net/ip_fib.h>
48#include <net/rtnetlink.h>
49
50#ifndef CONFIG_IP_MULTIPLE_TABLES
51
52static int __net_init fib4_rules_init(struct net *net)
53{
54	struct fib_table *local_table, *main_table;
55
56	local_table = fib_hash_table(RT_TABLE_LOCAL);
57	if (local_table == NULL)
58		return -ENOMEM;
59
60	main_table  = fib_hash_table(RT_TABLE_MAIN);
61	if (main_table == NULL)
62		goto fail;
63
64	hlist_add_head_rcu(&local_table->tb_hlist,
65				&net->ipv4.fib_table_hash[TABLE_LOCAL_INDEX]);
66	hlist_add_head_rcu(&main_table->tb_hlist,
67				&net->ipv4.fib_table_hash[TABLE_MAIN_INDEX]);
68	return 0;
69
70fail:
71	kfree(local_table);
72	return -ENOMEM;
73}
74#else
75
76struct fib_table *fib_new_table(struct net *net, u32 id)
77{
78	struct fib_table *tb;
79	unsigned int h;
80
81	if (id == 0)
82		id = RT_TABLE_MAIN;
83	tb = fib_get_table(net, id);
84	if (tb)
85		return tb;
86
87	tb = fib_hash_table(id);
88	if (!tb)
89		return NULL;
90	h = id & (FIB_TABLE_HASHSZ - 1);
91	hlist_add_head_rcu(&tb->tb_hlist, &net->ipv4.fib_table_hash[h]);
92	return tb;
93}
94
95struct fib_table *fib_get_table(struct net *net, u32 id)
96{
97	struct fib_table *tb;
98	struct hlist_node *node;
99	struct hlist_head *head;
100	unsigned int h;
101
102	if (id == 0)
103		id = RT_TABLE_MAIN;
104	h = id & (FIB_TABLE_HASHSZ - 1);
105
106	rcu_read_lock();
107	head = &net->ipv4.fib_table_hash[h];
108	hlist_for_each_entry_rcu(tb, node, head, tb_hlist) {
109		if (tb->tb_id == id) {
110			rcu_read_unlock();
111			return tb;
112		}
113	}
114	rcu_read_unlock();
115	return NULL;
116}
117#endif /* CONFIG_IP_MULTIPLE_TABLES */
118
119void fib_select_default(const struct flowi *flp, struct fib_result *res)
120{
121	struct fib_table *tb;
122	int table = RT_TABLE_MAIN;
123#ifdef CONFIG_IP_MULTIPLE_TABLES
124	if (res->r == NULL || res->r->action != FR_ACT_TO_TBL)
125		return;
126	table = res->r->table;
127#endif
128	tb = fib_get_table(&init_net, table);
129	if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
130		tb->tb_select_default(tb, flp, res);
131}
132
133static void fib_flush(struct net *net)
134{
135	int flushed = 0;
136	struct fib_table *tb;
137	struct hlist_node *node;
138	struct hlist_head *head;
139	unsigned int h;
140
141	for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
142		head = &net->ipv4.fib_table_hash[h];
143		hlist_for_each_entry(tb, node, head, tb_hlist)
144			flushed += tb->tb_flush(tb);
145	}
146
147	if (flushed)
148		rt_cache_flush(-1);
149}
150
151/*
152 *	Find the first device with a given source address.
153 */
154
155struct net_device * ip_dev_find(__be32 addr)
156{
157	struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
158	struct fib_result res;
159	struct net_device *dev = NULL;
160	struct fib_table *local_table;
161
162#ifdef CONFIG_IP_MULTIPLE_TABLES
163	res.r = NULL;
164#endif
165
166	local_table = fib_get_table(&init_net, RT_TABLE_LOCAL);
167	if (!local_table || local_table->tb_lookup(local_table, &fl, &res))
168		return NULL;
169	if (res.type != RTN_LOCAL)
170		goto out;
171	dev = FIB_RES_DEV(res);
172
173	if (dev)
174		dev_hold(dev);
175out:
176	fib_res_put(&res);
177	return dev;
178}
179
180/*
181 * Find address type as if only "dev" was present in the system. If
182 * on_dev is NULL then all interfaces are taken into consideration.
183 */
184static inline unsigned __inet_dev_addr_type(struct net *net,
185					    const struct net_device *dev,
186					    __be32 addr)
187{
188	struct flowi		fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
189	struct fib_result	res;
190	unsigned ret = RTN_BROADCAST;
191	struct fib_table *local_table;
192
193	if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr))
194		return RTN_BROADCAST;
195	if (ipv4_is_multicast(addr))
196		return RTN_MULTICAST;
197
198#ifdef CONFIG_IP_MULTIPLE_TABLES
199	res.r = NULL;
200#endif
201
202	local_table = fib_get_table(net, RT_TABLE_LOCAL);
203	if (local_table) {
204		ret = RTN_UNICAST;
205		if (!local_table->tb_lookup(local_table, &fl, &res)) {
206			if (!dev || dev == res.fi->fib_dev)
207				ret = res.type;
208			fib_res_put(&res);
209		}
210	}
211	return ret;
212}
213
214unsigned int inet_addr_type(struct net *net, __be32 addr)
215{
216	return __inet_dev_addr_type(net, NULL, addr);
217}
218
219unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev,
220				__be32 addr)
221{
222       return __inet_dev_addr_type(net, dev, addr);
223}
224
225/* Given (packet source, input interface) and optional (dst, oif, tos):
226   - (main) check, that source is valid i.e. not broadcast or our local
227     address.
228   - figure out what "logical" interface this packet arrived
229     and calculate "specific destination" address.
230   - check, that packet arrived from expected physical interface.
231 */
232
233int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
234			struct net_device *dev, __be32 *spec_dst, u32 *itag)
235{
236	struct in_device *in_dev;
237	struct flowi fl = { .nl_u = { .ip4_u =
238				      { .daddr = src,
239					.saddr = dst,
240					.tos = tos } },
241			    .iif = oif };
242	struct fib_result res;
243	int no_addr, rpf;
244	int ret;
245	struct net *net;
246
247	no_addr = rpf = 0;
248	rcu_read_lock();
249	in_dev = __in_dev_get_rcu(dev);
250	if (in_dev) {
251		no_addr = in_dev->ifa_list == NULL;
252		rpf = IN_DEV_RPFILTER(in_dev);
253	}
254	rcu_read_unlock();
255
256	if (in_dev == NULL)
257		goto e_inval;
258
259	net = dev->nd_net;
260	if (fib_lookup(net, &fl, &res))
261		goto last_resort;
262	if (res.type != RTN_UNICAST)
263		goto e_inval_res;
264	*spec_dst = FIB_RES_PREFSRC(res);
265	fib_combine_itag(itag, &res);
266#ifdef CONFIG_IP_ROUTE_MULTIPATH
267	if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1)
268#else
269	if (FIB_RES_DEV(res) == dev)
270#endif
271	{
272		ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
273		fib_res_put(&res);
274		return ret;
275	}
276	fib_res_put(&res);
277	if (no_addr)
278		goto last_resort;
279	if (rpf)
280		goto e_inval;
281	fl.oif = dev->ifindex;
282
283	ret = 0;
284	if (fib_lookup(net, &fl, &res) == 0) {
285		if (res.type == RTN_UNICAST) {
286			*spec_dst = FIB_RES_PREFSRC(res);
287			ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
288		}
289		fib_res_put(&res);
290	}
291	return ret;
292
293last_resort:
294	if (rpf)
295		goto e_inval;
296	*spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
297	*itag = 0;
298	return 0;
299
300e_inval_res:
301	fib_res_put(&res);
302e_inval:
303	return -EINVAL;
304}
305
306static inline __be32 sk_extract_addr(struct sockaddr *addr)
307{
308	return ((struct sockaddr_in *) addr)->sin_addr.s_addr;
309}
310
311static int put_rtax(struct nlattr *mx, int len, int type, u32 value)
312{
313	struct nlattr *nla;
314
315	nla = (struct nlattr *) ((char *) mx + len);
316	nla->nla_type = type;
317	nla->nla_len = nla_attr_size(4);
318	*(u32 *) nla_data(nla) = value;
319
320	return len + nla_total_size(4);
321}
322
323static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt,
324				 struct fib_config *cfg)
325{
326	__be32 addr;
327	int plen;
328
329	memset(cfg, 0, sizeof(*cfg));
330	cfg->fc_nlinfo.nl_net = net;
331
332	if (rt->rt_dst.sa_family != AF_INET)
333		return -EAFNOSUPPORT;
334
335	/*
336	 * Check mask for validity:
337	 * a) it must be contiguous.
338	 * b) destination must have all host bits clear.
339	 * c) if application forgot to set correct family (AF_INET),
340	 *    reject request unless it is absolutely clear i.e.
341	 *    both family and mask are zero.
342	 */
343	plen = 32;
344	addr = sk_extract_addr(&rt->rt_dst);
345	if (!(rt->rt_flags & RTF_HOST)) {
346		__be32 mask = sk_extract_addr(&rt->rt_genmask);
347
348		if (rt->rt_genmask.sa_family != AF_INET) {
349			if (mask || rt->rt_genmask.sa_family)
350				return -EAFNOSUPPORT;
351		}
352
353		if (bad_mask(mask, addr))
354			return -EINVAL;
355
356		plen = inet_mask_len(mask);
357	}
358
359	cfg->fc_dst_len = plen;
360	cfg->fc_dst = addr;
361
362	if (cmd != SIOCDELRT) {
363		cfg->fc_nlflags = NLM_F_CREATE;
364		cfg->fc_protocol = RTPROT_BOOT;
365	}
366
367	if (rt->rt_metric)
368		cfg->fc_priority = rt->rt_metric - 1;
369
370	if (rt->rt_flags & RTF_REJECT) {
371		cfg->fc_scope = RT_SCOPE_HOST;
372		cfg->fc_type = RTN_UNREACHABLE;
373		return 0;
374	}
375
376	cfg->fc_scope = RT_SCOPE_NOWHERE;
377	cfg->fc_type = RTN_UNICAST;
378
379	if (rt->rt_dev) {
380		char *colon;
381		struct net_device *dev;
382		char devname[IFNAMSIZ];
383
384		if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1))
385			return -EFAULT;
386
387		devname[IFNAMSIZ-1] = 0;
388		colon = strchr(devname, ':');
389		if (colon)
390			*colon = 0;
391		dev = __dev_get_by_name(net, devname);
392		if (!dev)
393			return -ENODEV;
394		cfg->fc_oif = dev->ifindex;
395		if (colon) {
396			struct in_ifaddr *ifa;
397			struct in_device *in_dev = __in_dev_get_rtnl(dev);
398			if (!in_dev)
399				return -ENODEV;
400			*colon = ':';
401			for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
402				if (strcmp(ifa->ifa_label, devname) == 0)
403					break;
404			if (ifa == NULL)
405				return -ENODEV;
406			cfg->fc_prefsrc = ifa->ifa_local;
407		}
408	}
409
410	addr = sk_extract_addr(&rt->rt_gateway);
411	if (rt->rt_gateway.sa_family == AF_INET && addr) {
412		cfg->fc_gw = addr;
413		if (rt->rt_flags & RTF_GATEWAY &&
414		    inet_addr_type(net, addr) == RTN_UNICAST)
415			cfg->fc_scope = RT_SCOPE_UNIVERSE;
416	}
417
418	if (cmd == SIOCDELRT)
419		return 0;
420
421	if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw)
422		return -EINVAL;
423
424	if (cfg->fc_scope == RT_SCOPE_NOWHERE)
425		cfg->fc_scope = RT_SCOPE_LINK;
426
427	if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) {
428		struct nlattr *mx;
429		int len = 0;
430
431		mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL);
432		if (mx == NULL)
433			return -ENOMEM;
434
435		if (rt->rt_flags & RTF_MTU)
436			len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40);
437
438		if (rt->rt_flags & RTF_WINDOW)
439			len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window);
440
441		if (rt->rt_flags & RTF_IRTT)
442			len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3);
443
444		cfg->fc_mx = mx;
445		cfg->fc_mx_len = len;
446	}
447
448	return 0;
449}
450
451/*
452 *	Handle IP routing ioctl calls. These are used to manipulate the routing tables
453 */
454
455int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg)
456{
457	struct fib_config cfg;
458	struct rtentry rt;
459	int err;
460
461	switch (cmd) {
462	case SIOCADDRT:		/* Add a route */
463	case SIOCDELRT:		/* Delete a route */
464		if (!capable(CAP_NET_ADMIN))
465			return -EPERM;
466
467		if (copy_from_user(&rt, arg, sizeof(rt)))
468			return -EFAULT;
469
470		rtnl_lock();
471		err = rtentry_to_fib_config(net, cmd, &rt, &cfg);
472		if (err == 0) {
473			struct fib_table *tb;
474
475			if (cmd == SIOCDELRT) {
476				tb = fib_get_table(net, cfg.fc_table);
477				if (tb)
478					err = tb->tb_delete(tb, &cfg);
479				else
480					err = -ESRCH;
481			} else {
482				tb = fib_new_table(net, cfg.fc_table);
483				if (tb)
484					err = tb->tb_insert(tb, &cfg);
485				else
486					err = -ENOBUFS;
487			}
488
489			/* allocated by rtentry_to_fib_config() */
490			kfree(cfg.fc_mx);
491		}
492		rtnl_unlock();
493		return err;
494	}
495	return -EINVAL;
496}
497
498const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = {
499	[RTA_DST]		= { .type = NLA_U32 },
500	[RTA_SRC]		= { .type = NLA_U32 },
501	[RTA_IIF]		= { .type = NLA_U32 },
502	[RTA_OIF]		= { .type = NLA_U32 },
503	[RTA_GATEWAY]		= { .type = NLA_U32 },
504	[RTA_PRIORITY]		= { .type = NLA_U32 },
505	[RTA_PREFSRC]		= { .type = NLA_U32 },
506	[RTA_METRICS]		= { .type = NLA_NESTED },
507	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
508	[RTA_PROTOINFO]		= { .type = NLA_U32 },
509	[RTA_FLOW]		= { .type = NLA_U32 },
510};
511
512static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
513			    struct nlmsghdr *nlh, struct fib_config *cfg)
514{
515	struct nlattr *attr;
516	int err, remaining;
517	struct rtmsg *rtm;
518
519	err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy);
520	if (err < 0)
521		goto errout;
522
523	memset(cfg, 0, sizeof(*cfg));
524
525	rtm = nlmsg_data(nlh);
526	cfg->fc_dst_len = rtm->rtm_dst_len;
527	cfg->fc_tos = rtm->rtm_tos;
528	cfg->fc_table = rtm->rtm_table;
529	cfg->fc_protocol = rtm->rtm_protocol;
530	cfg->fc_scope = rtm->rtm_scope;
531	cfg->fc_type = rtm->rtm_type;
532	cfg->fc_flags = rtm->rtm_flags;
533	cfg->fc_nlflags = nlh->nlmsg_flags;
534
535	cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
536	cfg->fc_nlinfo.nlh = nlh;
537	cfg->fc_nlinfo.nl_net = net;
538
539	if (cfg->fc_type > RTN_MAX) {
540		err = -EINVAL;
541		goto errout;
542	}
543
544	nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) {
545		switch (nla_type(attr)) {
546		case RTA_DST:
547			cfg->fc_dst = nla_get_be32(attr);
548			break;
549		case RTA_OIF:
550			cfg->fc_oif = nla_get_u32(attr);
551			break;
552		case RTA_GATEWAY:
553			cfg->fc_gw = nla_get_be32(attr);
554			break;
555		case RTA_PRIORITY:
556			cfg->fc_priority = nla_get_u32(attr);
557			break;
558		case RTA_PREFSRC:
559			cfg->fc_prefsrc = nla_get_be32(attr);
560			break;
561		case RTA_METRICS:
562			cfg->fc_mx = nla_data(attr);
563			cfg->fc_mx_len = nla_len(attr);
564			break;
565		case RTA_MULTIPATH:
566			cfg->fc_mp = nla_data(attr);
567			cfg->fc_mp_len = nla_len(attr);
568			break;
569		case RTA_FLOW:
570			cfg->fc_flow = nla_get_u32(attr);
571			break;
572		case RTA_TABLE:
573			cfg->fc_table = nla_get_u32(attr);
574			break;
575		}
576	}
577
578	return 0;
579errout:
580	return err;
581}
582
583static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
584{
585	struct net *net = skb->sk->sk_net;
586	struct fib_config cfg;
587	struct fib_table *tb;
588	int err;
589
590	err = rtm_to_fib_config(net, skb, nlh, &cfg);
591	if (err < 0)
592		goto errout;
593
594	tb = fib_get_table(net, cfg.fc_table);
595	if (tb == NULL) {
596		err = -ESRCH;
597		goto errout;
598	}
599
600	err = tb->tb_delete(tb, &cfg);
601errout:
602	return err;
603}
604
605static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
606{
607	struct net *net = skb->sk->sk_net;
608	struct fib_config cfg;
609	struct fib_table *tb;
610	int err;
611
612	err = rtm_to_fib_config(net, skb, nlh, &cfg);
613	if (err < 0)
614		goto errout;
615
616	tb = fib_new_table(net, cfg.fc_table);
617	if (tb == NULL) {
618		err = -ENOBUFS;
619		goto errout;
620	}
621
622	err = tb->tb_insert(tb, &cfg);
623errout:
624	return err;
625}
626
627static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
628{
629	struct net *net = skb->sk->sk_net;
630	unsigned int h, s_h;
631	unsigned int e = 0, s_e;
632	struct fib_table *tb;
633	struct hlist_node *node;
634	struct hlist_head *head;
635	int dumped = 0;
636
637	if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
638	    ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
639		return ip_rt_dump(skb, cb);
640
641	s_h = cb->args[0];
642	s_e = cb->args[1];
643
644	for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
645		e = 0;
646		head = &net->ipv4.fib_table_hash[h];
647		hlist_for_each_entry(tb, node, head, tb_hlist) {
648			if (e < s_e)
649				goto next;
650			if (dumped)
651				memset(&cb->args[2], 0, sizeof(cb->args) -
652						 2 * sizeof(cb->args[0]));
653			if (tb->tb_dump(tb, skb, cb) < 0)
654				goto out;
655			dumped = 1;
656next:
657			e++;
658		}
659	}
660out:
661	cb->args[1] = e;
662	cb->args[0] = h;
663
664	return skb->len;
665}
666
667/* Prepare and feed intra-kernel routing request.
668   Really, it should be netlink message, but :-( netlink
669   can be not configured, so that we feed it directly
670   to fib engine. It is legal, because all events occur
671   only when netlink is already locked.
672 */
673
674static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
675{
676	struct net *net = ifa->ifa_dev->dev->nd_net;
677	struct fib_table *tb;
678	struct fib_config cfg = {
679		.fc_protocol = RTPROT_KERNEL,
680		.fc_type = type,
681		.fc_dst = dst,
682		.fc_dst_len = dst_len,
683		.fc_prefsrc = ifa->ifa_local,
684		.fc_oif = ifa->ifa_dev->dev->ifindex,
685		.fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
686		.fc_nlinfo = {
687			.nl_net = net,
688		},
689	};
690
691	if (type == RTN_UNICAST)
692		tb = fib_new_table(net, RT_TABLE_MAIN);
693	else
694		tb = fib_new_table(net, RT_TABLE_LOCAL);
695
696	if (tb == NULL)
697		return;
698
699	cfg.fc_table = tb->tb_id;
700
701	if (type != RTN_LOCAL)
702		cfg.fc_scope = RT_SCOPE_LINK;
703	else
704		cfg.fc_scope = RT_SCOPE_HOST;
705
706	if (cmd == RTM_NEWROUTE)
707		tb->tb_insert(tb, &cfg);
708	else
709		tb->tb_delete(tb, &cfg);
710}
711
712void fib_add_ifaddr(struct in_ifaddr *ifa)
713{
714	struct in_device *in_dev = ifa->ifa_dev;
715	struct net_device *dev = in_dev->dev;
716	struct in_ifaddr *prim = ifa;
717	__be32 mask = ifa->ifa_mask;
718	__be32 addr = ifa->ifa_local;
719	__be32 prefix = ifa->ifa_address&mask;
720
721	if (ifa->ifa_flags&IFA_F_SECONDARY) {
722		prim = inet_ifa_byprefix(in_dev, prefix, mask);
723		if (prim == NULL) {
724			printk(KERN_WARNING "fib_add_ifaddr: bug: prim == NULL\n");
725			return;
726		}
727	}
728
729	fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
730
731	if (!(dev->flags&IFF_UP))
732		return;
733
734	/* Add broadcast address, if it is explicitly assigned. */
735	if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
736		fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
737
738	if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
739	    (prefix != addr || ifa->ifa_prefixlen < 32)) {
740		fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
741			  RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim);
742
743		/* Add network specific broadcasts, when it takes a sense */
744		if (ifa->ifa_prefixlen < 31) {
745			fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
746			fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim);
747		}
748	}
749}
750
751static void fib_del_ifaddr(struct in_ifaddr *ifa)
752{
753	struct in_device *in_dev = ifa->ifa_dev;
754	struct net_device *dev = in_dev->dev;
755	struct in_ifaddr *ifa1;
756	struct in_ifaddr *prim = ifa;
757	__be32 brd = ifa->ifa_address|~ifa->ifa_mask;
758	__be32 any = ifa->ifa_address&ifa->ifa_mask;
759#define LOCAL_OK	1
760#define BRD_OK		2
761#define BRD0_OK		4
762#define BRD1_OK		8
763	unsigned ok = 0;
764
765	if (!(ifa->ifa_flags&IFA_F_SECONDARY))
766		fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
767			  RTN_UNICAST, any, ifa->ifa_prefixlen, prim);
768	else {
769		prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
770		if (prim == NULL) {
771			printk(KERN_WARNING "fib_del_ifaddr: bug: prim == NULL\n");
772			return;
773		}
774	}
775
776	/* Deletion is more complicated than add.
777	   We should take care of not to delete too much :-)
778
779	   Scan address list to be sure that addresses are really gone.
780	 */
781
782	for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
783		if (ifa->ifa_local == ifa1->ifa_local)
784			ok |= LOCAL_OK;
785		if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
786			ok |= BRD_OK;
787		if (brd == ifa1->ifa_broadcast)
788			ok |= BRD1_OK;
789		if (any == ifa1->ifa_broadcast)
790			ok |= BRD0_OK;
791	}
792
793	if (!(ok&BRD_OK))
794		fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
795	if (!(ok&BRD1_OK))
796		fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
797	if (!(ok&BRD0_OK))
798		fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
799	if (!(ok&LOCAL_OK)) {
800		fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
801
802		/* Check, that this local address finally disappeared. */
803		if (inet_addr_type(dev->nd_net, ifa->ifa_local) != RTN_LOCAL) {
804			/* And the last, but not the least thing.
805			   We must flush stray FIB entries.
806
807			   First of all, we scan fib_info list searching
808			   for stray nexthop entries, then ignite fib_flush.
809			*/
810			if (fib_sync_down(ifa->ifa_local, NULL, 0))
811				fib_flush(dev->nd_net);
812		}
813	}
814#undef LOCAL_OK
815#undef BRD_OK
816#undef BRD0_OK
817#undef BRD1_OK
818}
819
820static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
821{
822
823	struct fib_result       res;
824	struct flowi            fl = { .mark = frn->fl_mark,
825				       .nl_u = { .ip4_u = { .daddr = frn->fl_addr,
826							    .tos = frn->fl_tos,
827							    .scope = frn->fl_scope } } };
828
829#ifdef CONFIG_IP_MULTIPLE_TABLES
830	res.r = NULL;
831#endif
832
833	frn->err = -ENOENT;
834	if (tb) {
835		local_bh_disable();
836
837		frn->tb_id = tb->tb_id;
838		frn->err = tb->tb_lookup(tb, &fl, &res);
839
840		if (!frn->err) {
841			frn->prefixlen = res.prefixlen;
842			frn->nh_sel = res.nh_sel;
843			frn->type = res.type;
844			frn->scope = res.scope;
845			fib_res_put(&res);
846		}
847		local_bh_enable();
848	}
849}
850
851static void nl_fib_input(struct sk_buff *skb)
852{
853	struct net *net;
854	struct fib_result_nl *frn;
855	struct nlmsghdr *nlh;
856	struct fib_table *tb;
857	u32 pid;
858
859	net = skb->sk->sk_net;
860	nlh = nlmsg_hdr(skb);
861	if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
862	    nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn)))
863		return;
864
865	skb = skb_clone(skb, GFP_KERNEL);
866	if (skb == NULL)
867		return;
868	nlh = nlmsg_hdr(skb);
869
870	frn = (struct fib_result_nl *) NLMSG_DATA(nlh);
871	tb = fib_get_table(net, frn->tb_id_in);
872
873	nl_fib_lookup(frn, tb);
874
875	pid = NETLINK_CB(skb).pid;       /* pid of sending process */
876	NETLINK_CB(skb).pid = 0;         /* from kernel */
877	NETLINK_CB(skb).dst_group = 0;  /* unicast */
878	netlink_unicast(net->ipv4.fibnl, skb, pid, MSG_DONTWAIT);
879}
880
881static int nl_fib_lookup_init(struct net *net)
882{
883	struct sock *sk;
884	sk = netlink_kernel_create(net, NETLINK_FIB_LOOKUP, 0,
885				   nl_fib_input, NULL, THIS_MODULE);
886	if (sk == NULL)
887		return -EAFNOSUPPORT;
888	net->ipv4.fibnl = sk;
889	return 0;
890}
891
892static void nl_fib_lookup_exit(struct net *net)
893{
894	netlink_kernel_release(net->ipv4.fibnl);
895	net->ipv4.fibnl = NULL;
896}
897
898static void fib_disable_ip(struct net_device *dev, int force)
899{
900	if (fib_sync_down(0, dev, force))
901		fib_flush(dev->nd_net);
902	rt_cache_flush(0);
903	arp_ifdown(dev);
904}
905
906static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
907{
908	struct in_ifaddr *ifa = (struct in_ifaddr*)ptr;
909
910	switch (event) {
911	case NETDEV_UP:
912		fib_add_ifaddr(ifa);
913#ifdef CONFIG_IP_ROUTE_MULTIPATH
914		fib_sync_up(ifa->ifa_dev->dev);
915#endif
916		rt_cache_flush(-1);
917		break;
918	case NETDEV_DOWN:
919		fib_del_ifaddr(ifa);
920		if (ifa->ifa_dev->ifa_list == NULL) {
921			/* Last address was deleted from this interface.
922			   Disable IP.
923			 */
924			fib_disable_ip(ifa->ifa_dev->dev, 1);
925		} else {
926			rt_cache_flush(-1);
927		}
928		break;
929	}
930	return NOTIFY_DONE;
931}
932
933static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
934{
935	struct net_device *dev = ptr;
936	struct in_device *in_dev = __in_dev_get_rtnl(dev);
937
938	if (event == NETDEV_UNREGISTER) {
939		fib_disable_ip(dev, 2);
940		return NOTIFY_DONE;
941	}
942
943	if (!in_dev)
944		return NOTIFY_DONE;
945
946	switch (event) {
947	case NETDEV_UP:
948		for_ifa(in_dev) {
949			fib_add_ifaddr(ifa);
950		} endfor_ifa(in_dev);
951#ifdef CONFIG_IP_ROUTE_MULTIPATH
952		fib_sync_up(dev);
953#endif
954		rt_cache_flush(-1);
955		break;
956	case NETDEV_DOWN:
957		fib_disable_ip(dev, 0);
958		break;
959	case NETDEV_CHANGEMTU:
960	case NETDEV_CHANGE:
961		rt_cache_flush(0);
962		break;
963	}
964	return NOTIFY_DONE;
965}
966
967static struct notifier_block fib_inetaddr_notifier = {
968	.notifier_call =fib_inetaddr_event,
969};
970
971static struct notifier_block fib_netdev_notifier = {
972	.notifier_call =fib_netdev_event,
973};
974
975static int __net_init ip_fib_net_init(struct net *net)
976{
977	unsigned int i;
978
979	net->ipv4.fib_table_hash = kzalloc(
980			sizeof(struct hlist_head)*FIB_TABLE_HASHSZ, GFP_KERNEL);
981	if (net->ipv4.fib_table_hash == NULL)
982		return -ENOMEM;
983
984	for (i = 0; i < FIB_TABLE_HASHSZ; i++)
985		INIT_HLIST_HEAD(&net->ipv4.fib_table_hash[i]);
986
987	return fib4_rules_init(net);
988}
989
990static void __net_exit ip_fib_net_exit(struct net *net)
991{
992	unsigned int i;
993
994#ifdef CONFIG_IP_MULTIPLE_TABLES
995	fib4_rules_exit(net);
996#endif
997
998	for (i = 0; i < FIB_TABLE_HASHSZ; i++) {
999		struct fib_table *tb;
1000		struct hlist_head *head;
1001		struct hlist_node *node, *tmp;
1002
1003		head = &net->ipv4.fib_table_hash[i];
1004		hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) {
1005			hlist_del(node);
1006			tb->tb_flush(tb);
1007			kfree(tb);
1008		}
1009	}
1010	kfree(net->ipv4.fib_table_hash);
1011}
1012
1013static int __net_init fib_net_init(struct net *net)
1014{
1015	int error;
1016
1017	error = ip_fib_net_init(net);
1018	if (error < 0)
1019		goto out;
1020	error = nl_fib_lookup_init(net);
1021	if (error < 0)
1022		goto out_nlfl;
1023	error = fib_proc_init(net);
1024	if (error < 0)
1025		goto out_proc;
1026out:
1027	return error;
1028
1029out_proc:
1030	nl_fib_lookup_exit(net);
1031out_nlfl:
1032	ip_fib_net_exit(net);
1033	goto out;
1034}
1035
1036static void __net_exit fib_net_exit(struct net *net)
1037{
1038	fib_proc_exit(net);
1039	nl_fib_lookup_exit(net);
1040	ip_fib_net_exit(net);
1041}
1042
1043static struct pernet_operations fib_net_ops = {
1044	.init = fib_net_init,
1045	.exit = fib_net_exit,
1046};
1047
1048void __init ip_fib_init(void)
1049{
1050	rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL);
1051	rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL);
1052	rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib);
1053
1054	register_pernet_subsys(&fib_net_ops);
1055	register_netdevice_notifier(&fib_netdev_notifier);
1056	register_inetaddr_notifier(&fib_inetaddr_notifier);
1057
1058	fib_hash_init();
1059}
1060
1061EXPORT_SYMBOL(inet_addr_type);
1062EXPORT_SYMBOL(inet_dev_addr_type);
1063EXPORT_SYMBOL(ip_dev_find);
1064