fib_frontend.c revision 0b040829952d84bf2a62526f0e24b624e0699447
1/*
2 * INET		An implementation of the TCP/IP protocol suite for the LINUX
3 *		operating system.  INET is implemented using the  BSD Socket
4 *		interface as the means of communication with the user level.
5 *
6 *		IPv4 Forwarding Information Base: FIB frontend.
7 *
8 * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
9 *
10 *		This program is free software; you can redistribute it and/or
11 *		modify it under the terms of the GNU General Public License
12 *		as published by the Free Software Foundation; either version
13 *		2 of the License, or (at your option) any later version.
14 */
15
16#include <linux/module.h>
17#include <asm/uaccess.h>
18#include <asm/system.h>
19#include <linux/bitops.h>
20#include <linux/capability.h>
21#include <linux/types.h>
22#include <linux/kernel.h>
23#include <linux/mm.h>
24#include <linux/string.h>
25#include <linux/socket.h>
26#include <linux/sockios.h>
27#include <linux/errno.h>
28#include <linux/in.h>
29#include <linux/inet.h>
30#include <linux/inetdevice.h>
31#include <linux/netdevice.h>
32#include <linux/if_addr.h>
33#include <linux/if_arp.h>
34#include <linux/skbuff.h>
35#include <linux/init.h>
36#include <linux/list.h>
37
38#include <net/ip.h>
39#include <net/protocol.h>
40#include <net/route.h>
41#include <net/tcp.h>
42#include <net/sock.h>
43#include <net/icmp.h>
44#include <net/arp.h>
45#include <net/ip_fib.h>
46#include <net/rtnetlink.h>
47
48#ifndef CONFIG_IP_MULTIPLE_TABLES
49
50static int __net_init fib4_rules_init(struct net *net)
51{
52	struct fib_table *local_table, *main_table;
53
54	local_table = fib_hash_table(RT_TABLE_LOCAL);
55	if (local_table == NULL)
56		return -ENOMEM;
57
58	main_table  = fib_hash_table(RT_TABLE_MAIN);
59	if (main_table == NULL)
60		goto fail;
61
62	hlist_add_head_rcu(&local_table->tb_hlist,
63				&net->ipv4.fib_table_hash[TABLE_LOCAL_INDEX]);
64	hlist_add_head_rcu(&main_table->tb_hlist,
65				&net->ipv4.fib_table_hash[TABLE_MAIN_INDEX]);
66	return 0;
67
68fail:
69	kfree(local_table);
70	return -ENOMEM;
71}
72#else
73
74struct fib_table *fib_new_table(struct net *net, u32 id)
75{
76	struct fib_table *tb;
77	unsigned int h;
78
79	if (id == 0)
80		id = RT_TABLE_MAIN;
81	tb = fib_get_table(net, id);
82	if (tb)
83		return tb;
84
85	tb = fib_hash_table(id);
86	if (!tb)
87		return NULL;
88	h = id & (FIB_TABLE_HASHSZ - 1);
89	hlist_add_head_rcu(&tb->tb_hlist, &net->ipv4.fib_table_hash[h]);
90	return tb;
91}
92
93struct fib_table *fib_get_table(struct net *net, u32 id)
94{
95	struct fib_table *tb;
96	struct hlist_node *node;
97	struct hlist_head *head;
98	unsigned int h;
99
100	if (id == 0)
101		id = RT_TABLE_MAIN;
102	h = id & (FIB_TABLE_HASHSZ - 1);
103
104	rcu_read_lock();
105	head = &net->ipv4.fib_table_hash[h];
106	hlist_for_each_entry_rcu(tb, node, head, tb_hlist) {
107		if (tb->tb_id == id) {
108			rcu_read_unlock();
109			return tb;
110		}
111	}
112	rcu_read_unlock();
113	return NULL;
114}
115#endif /* CONFIG_IP_MULTIPLE_TABLES */
116
117void fib_select_default(struct net *net,
118			const struct flowi *flp, struct fib_result *res)
119{
120	struct fib_table *tb;
121	int table = RT_TABLE_MAIN;
122#ifdef CONFIG_IP_MULTIPLE_TABLES
123	if (res->r == NULL || res->r->action != FR_ACT_TO_TBL)
124		return;
125	table = res->r->table;
126#endif
127	tb = fib_get_table(net, table);
128	if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
129		tb->tb_select_default(tb, flp, res);
130}
131
132static void fib_flush(struct net *net)
133{
134	int flushed = 0;
135	struct fib_table *tb;
136	struct hlist_node *node;
137	struct hlist_head *head;
138	unsigned int h;
139
140	for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
141		head = &net->ipv4.fib_table_hash[h];
142		hlist_for_each_entry(tb, node, head, tb_hlist)
143			flushed += tb->tb_flush(tb);
144	}
145
146	if (flushed)
147		rt_cache_flush(-1);
148}
149
150/*
151 *	Find the first device with a given source address.
152 */
153
154struct net_device * ip_dev_find(struct net *net, __be32 addr)
155{
156	struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
157	struct fib_result res;
158	struct net_device *dev = NULL;
159	struct fib_table *local_table;
160
161#ifdef CONFIG_IP_MULTIPLE_TABLES
162	res.r = NULL;
163#endif
164
165	local_table = fib_get_table(net, RT_TABLE_LOCAL);
166	if (!local_table || local_table->tb_lookup(local_table, &fl, &res))
167		return NULL;
168	if (res.type != RTN_LOCAL)
169		goto out;
170	dev = FIB_RES_DEV(res);
171
172	if (dev)
173		dev_hold(dev);
174out:
175	fib_res_put(&res);
176	return dev;
177}
178
179/*
180 * Find address type as if only "dev" was present in the system. If
181 * on_dev is NULL then all interfaces are taken into consideration.
182 */
183static inline unsigned __inet_dev_addr_type(struct net *net,
184					    const struct net_device *dev,
185					    __be32 addr)
186{
187	struct flowi		fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
188	struct fib_result	res;
189	unsigned ret = RTN_BROADCAST;
190	struct fib_table *local_table;
191
192	if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr))
193		return RTN_BROADCAST;
194	if (ipv4_is_multicast(addr))
195		return RTN_MULTICAST;
196
197#ifdef CONFIG_IP_MULTIPLE_TABLES
198	res.r = NULL;
199#endif
200
201	local_table = fib_get_table(net, RT_TABLE_LOCAL);
202	if (local_table) {
203		ret = RTN_UNICAST;
204		if (!local_table->tb_lookup(local_table, &fl, &res)) {
205			if (!dev || dev == res.fi->fib_dev)
206				ret = res.type;
207			fib_res_put(&res);
208		}
209	}
210	return ret;
211}
212
213unsigned int inet_addr_type(struct net *net, __be32 addr)
214{
215	return __inet_dev_addr_type(net, NULL, addr);
216}
217
218unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev,
219				__be32 addr)
220{
221       return __inet_dev_addr_type(net, dev, addr);
222}
223
224/* Given (packet source, input interface) and optional (dst, oif, tos):
225   - (main) check, that source is valid i.e. not broadcast or our local
226     address.
227   - figure out what "logical" interface this packet arrived
228     and calculate "specific destination" address.
229   - check, that packet arrived from expected physical interface.
230 */
231
232int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
233			struct net_device *dev, __be32 *spec_dst, u32 *itag)
234{
235	struct in_device *in_dev;
236	struct flowi fl = { .nl_u = { .ip4_u =
237				      { .daddr = src,
238					.saddr = dst,
239					.tos = tos } },
240			    .iif = oif };
241	struct fib_result res;
242	int no_addr, rpf;
243	int ret;
244	struct net *net;
245
246	no_addr = rpf = 0;
247	rcu_read_lock();
248	in_dev = __in_dev_get_rcu(dev);
249	if (in_dev) {
250		no_addr = in_dev->ifa_list == NULL;
251		rpf = IN_DEV_RPFILTER(in_dev);
252	}
253	rcu_read_unlock();
254
255	if (in_dev == NULL)
256		goto e_inval;
257
258	net = dev_net(dev);
259	if (fib_lookup(net, &fl, &res))
260		goto last_resort;
261	if (res.type != RTN_UNICAST)
262		goto e_inval_res;
263	*spec_dst = FIB_RES_PREFSRC(res);
264	fib_combine_itag(itag, &res);
265#ifdef CONFIG_IP_ROUTE_MULTIPATH
266	if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1)
267#else
268	if (FIB_RES_DEV(res) == dev)
269#endif
270	{
271		ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
272		fib_res_put(&res);
273		return ret;
274	}
275	fib_res_put(&res);
276	if (no_addr)
277		goto last_resort;
278	if (rpf)
279		goto e_inval;
280	fl.oif = dev->ifindex;
281
282	ret = 0;
283	if (fib_lookup(net, &fl, &res) == 0) {
284		if (res.type == RTN_UNICAST) {
285			*spec_dst = FIB_RES_PREFSRC(res);
286			ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
287		}
288		fib_res_put(&res);
289	}
290	return ret;
291
292last_resort:
293	if (rpf)
294		goto e_inval;
295	*spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
296	*itag = 0;
297	return 0;
298
299e_inval_res:
300	fib_res_put(&res);
301e_inval:
302	return -EINVAL;
303}
304
305static inline __be32 sk_extract_addr(struct sockaddr *addr)
306{
307	return ((struct sockaddr_in *) addr)->sin_addr.s_addr;
308}
309
310static int put_rtax(struct nlattr *mx, int len, int type, u32 value)
311{
312	struct nlattr *nla;
313
314	nla = (struct nlattr *) ((char *) mx + len);
315	nla->nla_type = type;
316	nla->nla_len = nla_attr_size(4);
317	*(u32 *) nla_data(nla) = value;
318
319	return len + nla_total_size(4);
320}
321
322static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt,
323				 struct fib_config *cfg)
324{
325	__be32 addr;
326	int plen;
327
328	memset(cfg, 0, sizeof(*cfg));
329	cfg->fc_nlinfo.nl_net = net;
330
331	if (rt->rt_dst.sa_family != AF_INET)
332		return -EAFNOSUPPORT;
333
334	/*
335	 * Check mask for validity:
336	 * a) it must be contiguous.
337	 * b) destination must have all host bits clear.
338	 * c) if application forgot to set correct family (AF_INET),
339	 *    reject request unless it is absolutely clear i.e.
340	 *    both family and mask are zero.
341	 */
342	plen = 32;
343	addr = sk_extract_addr(&rt->rt_dst);
344	if (!(rt->rt_flags & RTF_HOST)) {
345		__be32 mask = sk_extract_addr(&rt->rt_genmask);
346
347		if (rt->rt_genmask.sa_family != AF_INET) {
348			if (mask || rt->rt_genmask.sa_family)
349				return -EAFNOSUPPORT;
350		}
351
352		if (bad_mask(mask, addr))
353			return -EINVAL;
354
355		plen = inet_mask_len(mask);
356	}
357
358	cfg->fc_dst_len = plen;
359	cfg->fc_dst = addr;
360
361	if (cmd != SIOCDELRT) {
362		cfg->fc_nlflags = NLM_F_CREATE;
363		cfg->fc_protocol = RTPROT_BOOT;
364	}
365
366	if (rt->rt_metric)
367		cfg->fc_priority = rt->rt_metric - 1;
368
369	if (rt->rt_flags & RTF_REJECT) {
370		cfg->fc_scope = RT_SCOPE_HOST;
371		cfg->fc_type = RTN_UNREACHABLE;
372		return 0;
373	}
374
375	cfg->fc_scope = RT_SCOPE_NOWHERE;
376	cfg->fc_type = RTN_UNICAST;
377
378	if (rt->rt_dev) {
379		char *colon;
380		struct net_device *dev;
381		char devname[IFNAMSIZ];
382
383		if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1))
384			return -EFAULT;
385
386		devname[IFNAMSIZ-1] = 0;
387		colon = strchr(devname, ':');
388		if (colon)
389			*colon = 0;
390		dev = __dev_get_by_name(net, devname);
391		if (!dev)
392			return -ENODEV;
393		cfg->fc_oif = dev->ifindex;
394		if (colon) {
395			struct in_ifaddr *ifa;
396			struct in_device *in_dev = __in_dev_get_rtnl(dev);
397			if (!in_dev)
398				return -ENODEV;
399			*colon = ':';
400			for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
401				if (strcmp(ifa->ifa_label, devname) == 0)
402					break;
403			if (ifa == NULL)
404				return -ENODEV;
405			cfg->fc_prefsrc = ifa->ifa_local;
406		}
407	}
408
409	addr = sk_extract_addr(&rt->rt_gateway);
410	if (rt->rt_gateway.sa_family == AF_INET && addr) {
411		cfg->fc_gw = addr;
412		if (rt->rt_flags & RTF_GATEWAY &&
413		    inet_addr_type(net, addr) == RTN_UNICAST)
414			cfg->fc_scope = RT_SCOPE_UNIVERSE;
415	}
416
417	if (cmd == SIOCDELRT)
418		return 0;
419
420	if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw)
421		return -EINVAL;
422
423	if (cfg->fc_scope == RT_SCOPE_NOWHERE)
424		cfg->fc_scope = RT_SCOPE_LINK;
425
426	if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) {
427		struct nlattr *mx;
428		int len = 0;
429
430		mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL);
431		if (mx == NULL)
432			return -ENOMEM;
433
434		if (rt->rt_flags & RTF_MTU)
435			len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40);
436
437		if (rt->rt_flags & RTF_WINDOW)
438			len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window);
439
440		if (rt->rt_flags & RTF_IRTT)
441			len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3);
442
443		cfg->fc_mx = mx;
444		cfg->fc_mx_len = len;
445	}
446
447	return 0;
448}
449
450/*
451 *	Handle IP routing ioctl calls. These are used to manipulate the routing tables
452 */
453
454int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg)
455{
456	struct fib_config cfg;
457	struct rtentry rt;
458	int err;
459
460	switch (cmd) {
461	case SIOCADDRT:		/* Add a route */
462	case SIOCDELRT:		/* Delete a route */
463		if (!capable(CAP_NET_ADMIN))
464			return -EPERM;
465
466		if (copy_from_user(&rt, arg, sizeof(rt)))
467			return -EFAULT;
468
469		rtnl_lock();
470		err = rtentry_to_fib_config(net, cmd, &rt, &cfg);
471		if (err == 0) {
472			struct fib_table *tb;
473
474			if (cmd == SIOCDELRT) {
475				tb = fib_get_table(net, cfg.fc_table);
476				if (tb)
477					err = tb->tb_delete(tb, &cfg);
478				else
479					err = -ESRCH;
480			} else {
481				tb = fib_new_table(net, cfg.fc_table);
482				if (tb)
483					err = tb->tb_insert(tb, &cfg);
484				else
485					err = -ENOBUFS;
486			}
487
488			/* allocated by rtentry_to_fib_config() */
489			kfree(cfg.fc_mx);
490		}
491		rtnl_unlock();
492		return err;
493	}
494	return -EINVAL;
495}
496
497const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = {
498	[RTA_DST]		= { .type = NLA_U32 },
499	[RTA_SRC]		= { .type = NLA_U32 },
500	[RTA_IIF]		= { .type = NLA_U32 },
501	[RTA_OIF]		= { .type = NLA_U32 },
502	[RTA_GATEWAY]		= { .type = NLA_U32 },
503	[RTA_PRIORITY]		= { .type = NLA_U32 },
504	[RTA_PREFSRC]		= { .type = NLA_U32 },
505	[RTA_METRICS]		= { .type = NLA_NESTED },
506	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
507	[RTA_FLOW]		= { .type = NLA_U32 },
508};
509
510static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
511			    struct nlmsghdr *nlh, struct fib_config *cfg)
512{
513	struct nlattr *attr;
514	int err, remaining;
515	struct rtmsg *rtm;
516
517	err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy);
518	if (err < 0)
519		goto errout;
520
521	memset(cfg, 0, sizeof(*cfg));
522
523	rtm = nlmsg_data(nlh);
524	cfg->fc_dst_len = rtm->rtm_dst_len;
525	cfg->fc_tos = rtm->rtm_tos;
526	cfg->fc_table = rtm->rtm_table;
527	cfg->fc_protocol = rtm->rtm_protocol;
528	cfg->fc_scope = rtm->rtm_scope;
529	cfg->fc_type = rtm->rtm_type;
530	cfg->fc_flags = rtm->rtm_flags;
531	cfg->fc_nlflags = nlh->nlmsg_flags;
532
533	cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
534	cfg->fc_nlinfo.nlh = nlh;
535	cfg->fc_nlinfo.nl_net = net;
536
537	if (cfg->fc_type > RTN_MAX) {
538		err = -EINVAL;
539		goto errout;
540	}
541
542	nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) {
543		switch (nla_type(attr)) {
544		case RTA_DST:
545			cfg->fc_dst = nla_get_be32(attr);
546			break;
547		case RTA_OIF:
548			cfg->fc_oif = nla_get_u32(attr);
549			break;
550		case RTA_GATEWAY:
551			cfg->fc_gw = nla_get_be32(attr);
552			break;
553		case RTA_PRIORITY:
554			cfg->fc_priority = nla_get_u32(attr);
555			break;
556		case RTA_PREFSRC:
557			cfg->fc_prefsrc = nla_get_be32(attr);
558			break;
559		case RTA_METRICS:
560			cfg->fc_mx = nla_data(attr);
561			cfg->fc_mx_len = nla_len(attr);
562			break;
563		case RTA_MULTIPATH:
564			cfg->fc_mp = nla_data(attr);
565			cfg->fc_mp_len = nla_len(attr);
566			break;
567		case RTA_FLOW:
568			cfg->fc_flow = nla_get_u32(attr);
569			break;
570		case RTA_TABLE:
571			cfg->fc_table = nla_get_u32(attr);
572			break;
573		}
574	}
575
576	return 0;
577errout:
578	return err;
579}
580
581static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
582{
583	struct net *net = sock_net(skb->sk);
584	struct fib_config cfg;
585	struct fib_table *tb;
586	int err;
587
588	err = rtm_to_fib_config(net, skb, nlh, &cfg);
589	if (err < 0)
590		goto errout;
591
592	tb = fib_get_table(net, cfg.fc_table);
593	if (tb == NULL) {
594		err = -ESRCH;
595		goto errout;
596	}
597
598	err = tb->tb_delete(tb, &cfg);
599errout:
600	return err;
601}
602
603static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
604{
605	struct net *net = sock_net(skb->sk);
606	struct fib_config cfg;
607	struct fib_table *tb;
608	int err;
609
610	err = rtm_to_fib_config(net, skb, nlh, &cfg);
611	if (err < 0)
612		goto errout;
613
614	tb = fib_new_table(net, cfg.fc_table);
615	if (tb == NULL) {
616		err = -ENOBUFS;
617		goto errout;
618	}
619
620	err = tb->tb_insert(tb, &cfg);
621errout:
622	return err;
623}
624
625static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
626{
627	struct net *net = sock_net(skb->sk);
628	unsigned int h, s_h;
629	unsigned int e = 0, s_e;
630	struct fib_table *tb;
631	struct hlist_node *node;
632	struct hlist_head *head;
633	int dumped = 0;
634
635	if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
636	    ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
637		return ip_rt_dump(skb, cb);
638
639	s_h = cb->args[0];
640	s_e = cb->args[1];
641
642	for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
643		e = 0;
644		head = &net->ipv4.fib_table_hash[h];
645		hlist_for_each_entry(tb, node, head, tb_hlist) {
646			if (e < s_e)
647				goto next;
648			if (dumped)
649				memset(&cb->args[2], 0, sizeof(cb->args) -
650						 2 * sizeof(cb->args[0]));
651			if (tb->tb_dump(tb, skb, cb) < 0)
652				goto out;
653			dumped = 1;
654next:
655			e++;
656		}
657	}
658out:
659	cb->args[1] = e;
660	cb->args[0] = h;
661
662	return skb->len;
663}
664
665/* Prepare and feed intra-kernel routing request.
666   Really, it should be netlink message, but :-( netlink
667   can be not configured, so that we feed it directly
668   to fib engine. It is legal, because all events occur
669   only when netlink is already locked.
670 */
671
672static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
673{
674	struct net *net = dev_net(ifa->ifa_dev->dev);
675	struct fib_table *tb;
676	struct fib_config cfg = {
677		.fc_protocol = RTPROT_KERNEL,
678		.fc_type = type,
679		.fc_dst = dst,
680		.fc_dst_len = dst_len,
681		.fc_prefsrc = ifa->ifa_local,
682		.fc_oif = ifa->ifa_dev->dev->ifindex,
683		.fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
684		.fc_nlinfo = {
685			.nl_net = net,
686		},
687	};
688
689	if (type == RTN_UNICAST)
690		tb = fib_new_table(net, RT_TABLE_MAIN);
691	else
692		tb = fib_new_table(net, RT_TABLE_LOCAL);
693
694	if (tb == NULL)
695		return;
696
697	cfg.fc_table = tb->tb_id;
698
699	if (type != RTN_LOCAL)
700		cfg.fc_scope = RT_SCOPE_LINK;
701	else
702		cfg.fc_scope = RT_SCOPE_HOST;
703
704	if (cmd == RTM_NEWROUTE)
705		tb->tb_insert(tb, &cfg);
706	else
707		tb->tb_delete(tb, &cfg);
708}
709
710void fib_add_ifaddr(struct in_ifaddr *ifa)
711{
712	struct in_device *in_dev = ifa->ifa_dev;
713	struct net_device *dev = in_dev->dev;
714	struct in_ifaddr *prim = ifa;
715	__be32 mask = ifa->ifa_mask;
716	__be32 addr = ifa->ifa_local;
717	__be32 prefix = ifa->ifa_address&mask;
718
719	if (ifa->ifa_flags&IFA_F_SECONDARY) {
720		prim = inet_ifa_byprefix(in_dev, prefix, mask);
721		if (prim == NULL) {
722			printk(KERN_WARNING "fib_add_ifaddr: bug: prim == NULL\n");
723			return;
724		}
725	}
726
727	fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
728
729	if (!(dev->flags&IFF_UP))
730		return;
731
732	/* Add broadcast address, if it is explicitly assigned. */
733	if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
734		fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
735
736	if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
737	    (prefix != addr || ifa->ifa_prefixlen < 32)) {
738		fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
739			  RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim);
740
741		/* Add network specific broadcasts, when it takes a sense */
742		if (ifa->ifa_prefixlen < 31) {
743			fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
744			fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim);
745		}
746	}
747}
748
749static void fib_del_ifaddr(struct in_ifaddr *ifa)
750{
751	struct in_device *in_dev = ifa->ifa_dev;
752	struct net_device *dev = in_dev->dev;
753	struct in_ifaddr *ifa1;
754	struct in_ifaddr *prim = ifa;
755	__be32 brd = ifa->ifa_address|~ifa->ifa_mask;
756	__be32 any = ifa->ifa_address&ifa->ifa_mask;
757#define LOCAL_OK	1
758#define BRD_OK		2
759#define BRD0_OK		4
760#define BRD1_OK		8
761	unsigned ok = 0;
762
763	if (!(ifa->ifa_flags&IFA_F_SECONDARY))
764		fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
765			  RTN_UNICAST, any, ifa->ifa_prefixlen, prim);
766	else {
767		prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
768		if (prim == NULL) {
769			printk(KERN_WARNING "fib_del_ifaddr: bug: prim == NULL\n");
770			return;
771		}
772	}
773
774	/* Deletion is more complicated than add.
775	   We should take care of not to delete too much :-)
776
777	   Scan address list to be sure that addresses are really gone.
778	 */
779
780	for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
781		if (ifa->ifa_local == ifa1->ifa_local)
782			ok |= LOCAL_OK;
783		if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
784			ok |= BRD_OK;
785		if (brd == ifa1->ifa_broadcast)
786			ok |= BRD1_OK;
787		if (any == ifa1->ifa_broadcast)
788			ok |= BRD0_OK;
789	}
790
791	if (!(ok&BRD_OK))
792		fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
793	if (!(ok&BRD1_OK))
794		fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
795	if (!(ok&BRD0_OK))
796		fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
797	if (!(ok&LOCAL_OK)) {
798		fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
799
800		/* Check, that this local address finally disappeared. */
801		if (inet_addr_type(dev_net(dev), ifa->ifa_local) != RTN_LOCAL) {
802			/* And the last, but not the least thing.
803			   We must flush stray FIB entries.
804
805			   First of all, we scan fib_info list searching
806			   for stray nexthop entries, then ignite fib_flush.
807			*/
808			if (fib_sync_down_addr(dev_net(dev), ifa->ifa_local))
809				fib_flush(dev_net(dev));
810		}
811	}
812#undef LOCAL_OK
813#undef BRD_OK
814#undef BRD0_OK
815#undef BRD1_OK
816}
817
818static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
819{
820
821	struct fib_result       res;
822	struct flowi            fl = { .mark = frn->fl_mark,
823				       .nl_u = { .ip4_u = { .daddr = frn->fl_addr,
824							    .tos = frn->fl_tos,
825							    .scope = frn->fl_scope } } };
826
827#ifdef CONFIG_IP_MULTIPLE_TABLES
828	res.r = NULL;
829#endif
830
831	frn->err = -ENOENT;
832	if (tb) {
833		local_bh_disable();
834
835		frn->tb_id = tb->tb_id;
836		frn->err = tb->tb_lookup(tb, &fl, &res);
837
838		if (!frn->err) {
839			frn->prefixlen = res.prefixlen;
840			frn->nh_sel = res.nh_sel;
841			frn->type = res.type;
842			frn->scope = res.scope;
843			fib_res_put(&res);
844		}
845		local_bh_enable();
846	}
847}
848
849static void nl_fib_input(struct sk_buff *skb)
850{
851	struct net *net;
852	struct fib_result_nl *frn;
853	struct nlmsghdr *nlh;
854	struct fib_table *tb;
855	u32 pid;
856
857	net = sock_net(skb->sk);
858	nlh = nlmsg_hdr(skb);
859	if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
860	    nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn)))
861		return;
862
863	skb = skb_clone(skb, GFP_KERNEL);
864	if (skb == NULL)
865		return;
866	nlh = nlmsg_hdr(skb);
867
868	frn = (struct fib_result_nl *) NLMSG_DATA(nlh);
869	tb = fib_get_table(net, frn->tb_id_in);
870
871	nl_fib_lookup(frn, tb);
872
873	pid = NETLINK_CB(skb).pid;       /* pid of sending process */
874	NETLINK_CB(skb).pid = 0;         /* from kernel */
875	NETLINK_CB(skb).dst_group = 0;  /* unicast */
876	netlink_unicast(net->ipv4.fibnl, skb, pid, MSG_DONTWAIT);
877}
878
879static int nl_fib_lookup_init(struct net *net)
880{
881	struct sock *sk;
882	sk = netlink_kernel_create(net, NETLINK_FIB_LOOKUP, 0,
883				   nl_fib_input, NULL, THIS_MODULE);
884	if (sk == NULL)
885		return -EAFNOSUPPORT;
886	net->ipv4.fibnl = sk;
887	return 0;
888}
889
890static void nl_fib_lookup_exit(struct net *net)
891{
892	netlink_kernel_release(net->ipv4.fibnl);
893	net->ipv4.fibnl = NULL;
894}
895
896static void fib_disable_ip(struct net_device *dev, int force)
897{
898	if (fib_sync_down_dev(dev, force))
899		fib_flush(dev_net(dev));
900	rt_cache_flush(0);
901	arp_ifdown(dev);
902}
903
904static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
905{
906	struct in_ifaddr *ifa = (struct in_ifaddr*)ptr;
907
908	switch (event) {
909	case NETDEV_UP:
910		fib_add_ifaddr(ifa);
911#ifdef CONFIG_IP_ROUTE_MULTIPATH
912		fib_sync_up(ifa->ifa_dev->dev);
913#endif
914		rt_cache_flush(-1);
915		break;
916	case NETDEV_DOWN:
917		fib_del_ifaddr(ifa);
918		if (ifa->ifa_dev->ifa_list == NULL) {
919			/* Last address was deleted from this interface.
920			   Disable IP.
921			 */
922			fib_disable_ip(ifa->ifa_dev->dev, 1);
923		} else {
924			rt_cache_flush(-1);
925		}
926		break;
927	}
928	return NOTIFY_DONE;
929}
930
931static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
932{
933	struct net_device *dev = ptr;
934	struct in_device *in_dev = __in_dev_get_rtnl(dev);
935
936	if (event == NETDEV_UNREGISTER) {
937		fib_disable_ip(dev, 2);
938		return NOTIFY_DONE;
939	}
940
941	if (!in_dev)
942		return NOTIFY_DONE;
943
944	switch (event) {
945	case NETDEV_UP:
946		for_ifa(in_dev) {
947			fib_add_ifaddr(ifa);
948		} endfor_ifa(in_dev);
949#ifdef CONFIG_IP_ROUTE_MULTIPATH
950		fib_sync_up(dev);
951#endif
952		rt_cache_flush(-1);
953		break;
954	case NETDEV_DOWN:
955		fib_disable_ip(dev, 0);
956		break;
957	case NETDEV_CHANGEMTU:
958	case NETDEV_CHANGE:
959		rt_cache_flush(0);
960		break;
961	}
962	return NOTIFY_DONE;
963}
964
965static struct notifier_block fib_inetaddr_notifier = {
966	.notifier_call =fib_inetaddr_event,
967};
968
969static struct notifier_block fib_netdev_notifier = {
970	.notifier_call =fib_netdev_event,
971};
972
973static int __net_init ip_fib_net_init(struct net *net)
974{
975	int err;
976	unsigned int i;
977
978	net->ipv4.fib_table_hash = kzalloc(
979			sizeof(struct hlist_head)*FIB_TABLE_HASHSZ, GFP_KERNEL);
980	if (net->ipv4.fib_table_hash == NULL)
981		return -ENOMEM;
982
983	for (i = 0; i < FIB_TABLE_HASHSZ; i++)
984		INIT_HLIST_HEAD(&net->ipv4.fib_table_hash[i]);
985
986	err = fib4_rules_init(net);
987	if (err < 0)
988		goto fail;
989	return 0;
990
991fail:
992	kfree(net->ipv4.fib_table_hash);
993	return err;
994}
995
996static void __net_exit ip_fib_net_exit(struct net *net)
997{
998	unsigned int i;
999
1000#ifdef CONFIG_IP_MULTIPLE_TABLES
1001	fib4_rules_exit(net);
1002#endif
1003
1004	for (i = 0; i < FIB_TABLE_HASHSZ; i++) {
1005		struct fib_table *tb;
1006		struct hlist_head *head;
1007		struct hlist_node *node, *tmp;
1008
1009		head = &net->ipv4.fib_table_hash[i];
1010		hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) {
1011			hlist_del(node);
1012			tb->tb_flush(tb);
1013			kfree(tb);
1014		}
1015	}
1016	kfree(net->ipv4.fib_table_hash);
1017}
1018
1019static int __net_init fib_net_init(struct net *net)
1020{
1021	int error;
1022
1023	error = ip_fib_net_init(net);
1024	if (error < 0)
1025		goto out;
1026	error = nl_fib_lookup_init(net);
1027	if (error < 0)
1028		goto out_nlfl;
1029	error = fib_proc_init(net);
1030	if (error < 0)
1031		goto out_proc;
1032out:
1033	return error;
1034
1035out_proc:
1036	nl_fib_lookup_exit(net);
1037out_nlfl:
1038	ip_fib_net_exit(net);
1039	goto out;
1040}
1041
1042static void __net_exit fib_net_exit(struct net *net)
1043{
1044	fib_proc_exit(net);
1045	nl_fib_lookup_exit(net);
1046	ip_fib_net_exit(net);
1047}
1048
1049static struct pernet_operations fib_net_ops = {
1050	.init = fib_net_init,
1051	.exit = fib_net_exit,
1052};
1053
1054void __init ip_fib_init(void)
1055{
1056	rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL);
1057	rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL);
1058	rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib);
1059
1060	register_pernet_subsys(&fib_net_ops);
1061	register_netdevice_notifier(&fib_netdev_notifier);
1062	register_inetaddr_notifier(&fib_inetaddr_notifier);
1063
1064	fib_hash_init();
1065}
1066
1067EXPORT_SYMBOL(inet_addr_type);
1068EXPORT_SYMBOL(inet_dev_addr_type);
1069EXPORT_SYMBOL(ip_dev_find);
1070