fib_frontend.c revision e2ce146848c81af2f6d42e67990191c284bf0c33
1/*
2 * INET		An implementation of the TCP/IP protocol suite for the LINUX
3 *		operating system.  INET is implemented using the  BSD Socket
4 *		interface as the means of communication with the user level.
5 *
6 *		IPv4 Forwarding Information Base: FIB frontend.
7 *
8 * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
9 *
10 *		This program is free software; you can redistribute it and/or
11 *		modify it under the terms of the GNU General Public License
12 *		as published by the Free Software Foundation; either version
13 *		2 of the License, or (at your option) any later version.
14 */
15
16#include <linux/module.h>
17#include <asm/uaccess.h>
18#include <asm/system.h>
19#include <linux/bitops.h>
20#include <linux/capability.h>
21#include <linux/types.h>
22#include <linux/kernel.h>
23#include <linux/mm.h>
24#include <linux/string.h>
25#include <linux/socket.h>
26#include <linux/sockios.h>
27#include <linux/errno.h>
28#include <linux/in.h>
29#include <linux/inet.h>
30#include <linux/inetdevice.h>
31#include <linux/netdevice.h>
32#include <linux/if_addr.h>
33#include <linux/if_arp.h>
34#include <linux/skbuff.h>
35#include <linux/init.h>
36#include <linux/list.h>
37
38#include <net/ip.h>
39#include <net/protocol.h>
40#include <net/route.h>
41#include <net/tcp.h>
42#include <net/sock.h>
43#include <net/arp.h>
44#include <net/ip_fib.h>
45#include <net/rtnetlink.h>
46
47#ifndef CONFIG_IP_MULTIPLE_TABLES
48
49static int __net_init fib4_rules_init(struct net *net)
50{
51	struct fib_table *local_table, *main_table;
52
53	local_table = fib_hash_table(RT_TABLE_LOCAL);
54	if (local_table == NULL)
55		return -ENOMEM;
56
57	main_table  = fib_hash_table(RT_TABLE_MAIN);
58	if (main_table == NULL)
59		goto fail;
60
61	hlist_add_head_rcu(&local_table->tb_hlist,
62				&net->ipv4.fib_table_hash[TABLE_LOCAL_INDEX]);
63	hlist_add_head_rcu(&main_table->tb_hlist,
64				&net->ipv4.fib_table_hash[TABLE_MAIN_INDEX]);
65	return 0;
66
67fail:
68	kfree(local_table);
69	return -ENOMEM;
70}
71#else
72
73struct fib_table *fib_new_table(struct net *net, u32 id)
74{
75	struct fib_table *tb;
76	unsigned int h;
77
78	if (id == 0)
79		id = RT_TABLE_MAIN;
80	tb = fib_get_table(net, id);
81	if (tb)
82		return tb;
83
84	tb = fib_hash_table(id);
85	if (!tb)
86		return NULL;
87	h = id & (FIB_TABLE_HASHSZ - 1);
88	hlist_add_head_rcu(&tb->tb_hlist, &net->ipv4.fib_table_hash[h]);
89	return tb;
90}
91
92struct fib_table *fib_get_table(struct net *net, u32 id)
93{
94	struct fib_table *tb;
95	struct hlist_node *node;
96	struct hlist_head *head;
97	unsigned int h;
98
99	if (id == 0)
100		id = RT_TABLE_MAIN;
101	h = id & (FIB_TABLE_HASHSZ - 1);
102
103	rcu_read_lock();
104	head = &net->ipv4.fib_table_hash[h];
105	hlist_for_each_entry_rcu(tb, node, head, tb_hlist) {
106		if (tb->tb_id == id) {
107			rcu_read_unlock();
108			return tb;
109		}
110	}
111	rcu_read_unlock();
112	return NULL;
113}
114#endif /* CONFIG_IP_MULTIPLE_TABLES */
115
116void fib_select_default(struct net *net,
117			const struct flowi *flp, struct fib_result *res)
118{
119	struct fib_table *tb;
120	int table = RT_TABLE_MAIN;
121#ifdef CONFIG_IP_MULTIPLE_TABLES
122	if (res->r == NULL || res->r->action != FR_ACT_TO_TBL)
123		return;
124	table = res->r->table;
125#endif
126	tb = fib_get_table(net, table);
127	if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
128		fib_table_select_default(tb, flp, res);
129}
130
131static void fib_flush(struct net *net)
132{
133	int flushed = 0;
134	struct fib_table *tb;
135	struct hlist_node *node;
136	struct hlist_head *head;
137	unsigned int h;
138
139	for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
140		head = &net->ipv4.fib_table_hash[h];
141		hlist_for_each_entry(tb, node, head, tb_hlist)
142			flushed += fib_table_flush(tb);
143	}
144
145	if (flushed)
146		rt_cache_flush(net, -1);
147}
148
149/*
150 *	Find the first device with a given source address.
151 */
152
153struct net_device * ip_dev_find(struct net *net, __be32 addr)
154{
155	struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
156	struct fib_result res;
157	struct net_device *dev = NULL;
158	struct fib_table *local_table;
159
160#ifdef CONFIG_IP_MULTIPLE_TABLES
161	res.r = NULL;
162#endif
163
164	local_table = fib_get_table(net, RT_TABLE_LOCAL);
165	if (!local_table || fib_table_lookup(local_table, &fl, &res))
166		return NULL;
167	if (res.type != RTN_LOCAL)
168		goto out;
169	dev = FIB_RES_DEV(res);
170
171	if (dev)
172		dev_hold(dev);
173out:
174	fib_res_put(&res);
175	return dev;
176}
177
178/*
179 * Find address type as if only "dev" was present in the system. If
180 * on_dev is NULL then all interfaces are taken into consideration.
181 */
182static inline unsigned __inet_dev_addr_type(struct net *net,
183					    const struct net_device *dev,
184					    __be32 addr)
185{
186	struct flowi		fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
187	struct fib_result	res;
188	unsigned ret = RTN_BROADCAST;
189	struct fib_table *local_table;
190
191	if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr))
192		return RTN_BROADCAST;
193	if (ipv4_is_multicast(addr))
194		return RTN_MULTICAST;
195
196#ifdef CONFIG_IP_MULTIPLE_TABLES
197	res.r = NULL;
198#endif
199
200	local_table = fib_get_table(net, RT_TABLE_LOCAL);
201	if (local_table) {
202		ret = RTN_UNICAST;
203		if (!fib_table_lookup(local_table, &fl, &res)) {
204			if (!dev || dev == res.fi->fib_dev)
205				ret = res.type;
206			fib_res_put(&res);
207		}
208	}
209	return ret;
210}
211
212unsigned int inet_addr_type(struct net *net, __be32 addr)
213{
214	return __inet_dev_addr_type(net, NULL, addr);
215}
216
217unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev,
218				__be32 addr)
219{
220       return __inet_dev_addr_type(net, dev, addr);
221}
222
223/* Given (packet source, input interface) and optional (dst, oif, tos):
224   - (main) check, that source is valid i.e. not broadcast or our local
225     address.
226   - figure out what "logical" interface this packet arrived
227     and calculate "specific destination" address.
228   - check, that packet arrived from expected physical interface.
229 */
230
231int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
232			struct net_device *dev, __be32 *spec_dst,
233			u32 *itag, u32 mark)
234{
235	struct in_device *in_dev;
236	struct flowi fl = { .nl_u = { .ip4_u =
237				      { .daddr = src,
238					.saddr = dst,
239					.tos = tos } },
240			    .mark = mark,
241			    .iif = oif };
242
243	struct fib_result res;
244	int no_addr, rpf;
245	int ret;
246	struct net *net;
247
248	no_addr = rpf = 0;
249	rcu_read_lock();
250	in_dev = __in_dev_get_rcu(dev);
251	if (in_dev) {
252		no_addr = in_dev->ifa_list == NULL;
253		rpf = IN_DEV_RPFILTER(in_dev);
254	}
255	rcu_read_unlock();
256
257	if (in_dev == NULL)
258		goto e_inval;
259
260	net = dev_net(dev);
261	if (fib_lookup(net, &fl, &res))
262		goto last_resort;
263	if (res.type != RTN_UNICAST)
264		goto e_inval_res;
265	*spec_dst = FIB_RES_PREFSRC(res);
266	fib_combine_itag(itag, &res);
267#ifdef CONFIG_IP_ROUTE_MULTIPATH
268	if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1)
269#else
270	if (FIB_RES_DEV(res) == dev)
271#endif
272	{
273		ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
274		fib_res_put(&res);
275		return ret;
276	}
277	fib_res_put(&res);
278	if (no_addr)
279		goto last_resort;
280	if (rpf == 1)
281		goto e_inval;
282	fl.oif = dev->ifindex;
283
284	ret = 0;
285	if (fib_lookup(net, &fl, &res) == 0) {
286		if (res.type == RTN_UNICAST) {
287			*spec_dst = FIB_RES_PREFSRC(res);
288			ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
289		}
290		fib_res_put(&res);
291	}
292	return ret;
293
294last_resort:
295	if (rpf)
296		goto e_inval;
297	*spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
298	*itag = 0;
299	return 0;
300
301e_inval_res:
302	fib_res_put(&res);
303e_inval:
304	return -EINVAL;
305}
306
307static inline __be32 sk_extract_addr(struct sockaddr *addr)
308{
309	return ((struct sockaddr_in *) addr)->sin_addr.s_addr;
310}
311
312static int put_rtax(struct nlattr *mx, int len, int type, u32 value)
313{
314	struct nlattr *nla;
315
316	nla = (struct nlattr *) ((char *) mx + len);
317	nla->nla_type = type;
318	nla->nla_len = nla_attr_size(4);
319	*(u32 *) nla_data(nla) = value;
320
321	return len + nla_total_size(4);
322}
323
324static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt,
325				 struct fib_config *cfg)
326{
327	__be32 addr;
328	int plen;
329
330	memset(cfg, 0, sizeof(*cfg));
331	cfg->fc_nlinfo.nl_net = net;
332
333	if (rt->rt_dst.sa_family != AF_INET)
334		return -EAFNOSUPPORT;
335
336	/*
337	 * Check mask for validity:
338	 * a) it must be contiguous.
339	 * b) destination must have all host bits clear.
340	 * c) if application forgot to set correct family (AF_INET),
341	 *    reject request unless it is absolutely clear i.e.
342	 *    both family and mask are zero.
343	 */
344	plen = 32;
345	addr = sk_extract_addr(&rt->rt_dst);
346	if (!(rt->rt_flags & RTF_HOST)) {
347		__be32 mask = sk_extract_addr(&rt->rt_genmask);
348
349		if (rt->rt_genmask.sa_family != AF_INET) {
350			if (mask || rt->rt_genmask.sa_family)
351				return -EAFNOSUPPORT;
352		}
353
354		if (bad_mask(mask, addr))
355			return -EINVAL;
356
357		plen = inet_mask_len(mask);
358	}
359
360	cfg->fc_dst_len = plen;
361	cfg->fc_dst = addr;
362
363	if (cmd != SIOCDELRT) {
364		cfg->fc_nlflags = NLM_F_CREATE;
365		cfg->fc_protocol = RTPROT_BOOT;
366	}
367
368	if (rt->rt_metric)
369		cfg->fc_priority = rt->rt_metric - 1;
370
371	if (rt->rt_flags & RTF_REJECT) {
372		cfg->fc_scope = RT_SCOPE_HOST;
373		cfg->fc_type = RTN_UNREACHABLE;
374		return 0;
375	}
376
377	cfg->fc_scope = RT_SCOPE_NOWHERE;
378	cfg->fc_type = RTN_UNICAST;
379
380	if (rt->rt_dev) {
381		char *colon;
382		struct net_device *dev;
383		char devname[IFNAMSIZ];
384
385		if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1))
386			return -EFAULT;
387
388		devname[IFNAMSIZ-1] = 0;
389		colon = strchr(devname, ':');
390		if (colon)
391			*colon = 0;
392		dev = __dev_get_by_name(net, devname);
393		if (!dev)
394			return -ENODEV;
395		cfg->fc_oif = dev->ifindex;
396		if (colon) {
397			struct in_ifaddr *ifa;
398			struct in_device *in_dev = __in_dev_get_rtnl(dev);
399			if (!in_dev)
400				return -ENODEV;
401			*colon = ':';
402			for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
403				if (strcmp(ifa->ifa_label, devname) == 0)
404					break;
405			if (ifa == NULL)
406				return -ENODEV;
407			cfg->fc_prefsrc = ifa->ifa_local;
408		}
409	}
410
411	addr = sk_extract_addr(&rt->rt_gateway);
412	if (rt->rt_gateway.sa_family == AF_INET && addr) {
413		cfg->fc_gw = addr;
414		if (rt->rt_flags & RTF_GATEWAY &&
415		    inet_addr_type(net, addr) == RTN_UNICAST)
416			cfg->fc_scope = RT_SCOPE_UNIVERSE;
417	}
418
419	if (cmd == SIOCDELRT)
420		return 0;
421
422	if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw)
423		return -EINVAL;
424
425	if (cfg->fc_scope == RT_SCOPE_NOWHERE)
426		cfg->fc_scope = RT_SCOPE_LINK;
427
428	if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) {
429		struct nlattr *mx;
430		int len = 0;
431
432		mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL);
433		if (mx == NULL)
434			return -ENOMEM;
435
436		if (rt->rt_flags & RTF_MTU)
437			len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40);
438
439		if (rt->rt_flags & RTF_WINDOW)
440			len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window);
441
442		if (rt->rt_flags & RTF_IRTT)
443			len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3);
444
445		cfg->fc_mx = mx;
446		cfg->fc_mx_len = len;
447	}
448
449	return 0;
450}
451
452/*
453 *	Handle IP routing ioctl calls. These are used to manipulate the routing tables
454 */
455
456int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg)
457{
458	struct fib_config cfg;
459	struct rtentry rt;
460	int err;
461
462	switch (cmd) {
463	case SIOCADDRT:		/* Add a route */
464	case SIOCDELRT:		/* Delete a route */
465		if (!capable(CAP_NET_ADMIN))
466			return -EPERM;
467
468		if (copy_from_user(&rt, arg, sizeof(rt)))
469			return -EFAULT;
470
471		rtnl_lock();
472		err = rtentry_to_fib_config(net, cmd, &rt, &cfg);
473		if (err == 0) {
474			struct fib_table *tb;
475
476			if (cmd == SIOCDELRT) {
477				tb = fib_get_table(net, cfg.fc_table);
478				if (tb)
479					err = fib_table_delete(tb, &cfg);
480				else
481					err = -ESRCH;
482			} else {
483				tb = fib_new_table(net, cfg.fc_table);
484				if (tb)
485					err = fib_table_insert(tb, &cfg);
486				else
487					err = -ENOBUFS;
488			}
489
490			/* allocated by rtentry_to_fib_config() */
491			kfree(cfg.fc_mx);
492		}
493		rtnl_unlock();
494		return err;
495	}
496	return -EINVAL;
497}
498
499const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = {
500	[RTA_DST]		= { .type = NLA_U32 },
501	[RTA_SRC]		= { .type = NLA_U32 },
502	[RTA_IIF]		= { .type = NLA_U32 },
503	[RTA_OIF]		= { .type = NLA_U32 },
504	[RTA_GATEWAY]		= { .type = NLA_U32 },
505	[RTA_PRIORITY]		= { .type = NLA_U32 },
506	[RTA_PREFSRC]		= { .type = NLA_U32 },
507	[RTA_METRICS]		= { .type = NLA_NESTED },
508	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
509	[RTA_FLOW]		= { .type = NLA_U32 },
510};
511
512static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
513			    struct nlmsghdr *nlh, struct fib_config *cfg)
514{
515	struct nlattr *attr;
516	int err, remaining;
517	struct rtmsg *rtm;
518
519	err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy);
520	if (err < 0)
521		goto errout;
522
523	memset(cfg, 0, sizeof(*cfg));
524
525	rtm = nlmsg_data(nlh);
526	cfg->fc_dst_len = rtm->rtm_dst_len;
527	cfg->fc_tos = rtm->rtm_tos;
528	cfg->fc_table = rtm->rtm_table;
529	cfg->fc_protocol = rtm->rtm_protocol;
530	cfg->fc_scope = rtm->rtm_scope;
531	cfg->fc_type = rtm->rtm_type;
532	cfg->fc_flags = rtm->rtm_flags;
533	cfg->fc_nlflags = nlh->nlmsg_flags;
534
535	cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
536	cfg->fc_nlinfo.nlh = nlh;
537	cfg->fc_nlinfo.nl_net = net;
538
539	if (cfg->fc_type > RTN_MAX) {
540		err = -EINVAL;
541		goto errout;
542	}
543
544	nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) {
545		switch (nla_type(attr)) {
546		case RTA_DST:
547			cfg->fc_dst = nla_get_be32(attr);
548			break;
549		case RTA_OIF:
550			cfg->fc_oif = nla_get_u32(attr);
551			break;
552		case RTA_GATEWAY:
553			cfg->fc_gw = nla_get_be32(attr);
554			break;
555		case RTA_PRIORITY:
556			cfg->fc_priority = nla_get_u32(attr);
557			break;
558		case RTA_PREFSRC:
559			cfg->fc_prefsrc = nla_get_be32(attr);
560			break;
561		case RTA_METRICS:
562			cfg->fc_mx = nla_data(attr);
563			cfg->fc_mx_len = nla_len(attr);
564			break;
565		case RTA_MULTIPATH:
566			cfg->fc_mp = nla_data(attr);
567			cfg->fc_mp_len = nla_len(attr);
568			break;
569		case RTA_FLOW:
570			cfg->fc_flow = nla_get_u32(attr);
571			break;
572		case RTA_TABLE:
573			cfg->fc_table = nla_get_u32(attr);
574			break;
575		}
576	}
577
578	return 0;
579errout:
580	return err;
581}
582
583static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
584{
585	struct net *net = sock_net(skb->sk);
586	struct fib_config cfg;
587	struct fib_table *tb;
588	int err;
589
590	err = rtm_to_fib_config(net, skb, nlh, &cfg);
591	if (err < 0)
592		goto errout;
593
594	tb = fib_get_table(net, cfg.fc_table);
595	if (tb == NULL) {
596		err = -ESRCH;
597		goto errout;
598	}
599
600	err = fib_table_delete(tb, &cfg);
601errout:
602	return err;
603}
604
605static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
606{
607	struct net *net = sock_net(skb->sk);
608	struct fib_config cfg;
609	struct fib_table *tb;
610	int err;
611
612	err = rtm_to_fib_config(net, skb, nlh, &cfg);
613	if (err < 0)
614		goto errout;
615
616	tb = fib_new_table(net, cfg.fc_table);
617	if (tb == NULL) {
618		err = -ENOBUFS;
619		goto errout;
620	}
621
622	err = fib_table_insert(tb, &cfg);
623errout:
624	return err;
625}
626
627static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
628{
629	struct net *net = sock_net(skb->sk);
630	unsigned int h, s_h;
631	unsigned int e = 0, s_e;
632	struct fib_table *tb;
633	struct hlist_node *node;
634	struct hlist_head *head;
635	int dumped = 0;
636
637	if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
638	    ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
639		return ip_rt_dump(skb, cb);
640
641	s_h = cb->args[0];
642	s_e = cb->args[1];
643
644	for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
645		e = 0;
646		head = &net->ipv4.fib_table_hash[h];
647		hlist_for_each_entry(tb, node, head, tb_hlist) {
648			if (e < s_e)
649				goto next;
650			if (dumped)
651				memset(&cb->args[2], 0, sizeof(cb->args) -
652						 2 * sizeof(cb->args[0]));
653			if (fib_table_dump(tb, skb, cb) < 0)
654				goto out;
655			dumped = 1;
656next:
657			e++;
658		}
659	}
660out:
661	cb->args[1] = e;
662	cb->args[0] = h;
663
664	return skb->len;
665}
666
667/* Prepare and feed intra-kernel routing request.
668   Really, it should be netlink message, but :-( netlink
669   can be not configured, so that we feed it directly
670   to fib engine. It is legal, because all events occur
671   only when netlink is already locked.
672 */
673
674static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
675{
676	struct net *net = dev_net(ifa->ifa_dev->dev);
677	struct fib_table *tb;
678	struct fib_config cfg = {
679		.fc_protocol = RTPROT_KERNEL,
680		.fc_type = type,
681		.fc_dst = dst,
682		.fc_dst_len = dst_len,
683		.fc_prefsrc = ifa->ifa_local,
684		.fc_oif = ifa->ifa_dev->dev->ifindex,
685		.fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
686		.fc_nlinfo = {
687			.nl_net = net,
688		},
689	};
690
691	if (type == RTN_UNICAST)
692		tb = fib_new_table(net, RT_TABLE_MAIN);
693	else
694		tb = fib_new_table(net, RT_TABLE_LOCAL);
695
696	if (tb == NULL)
697		return;
698
699	cfg.fc_table = tb->tb_id;
700
701	if (type != RTN_LOCAL)
702		cfg.fc_scope = RT_SCOPE_LINK;
703	else
704		cfg.fc_scope = RT_SCOPE_HOST;
705
706	if (cmd == RTM_NEWROUTE)
707		fib_table_insert(tb, &cfg);
708	else
709		fib_table_delete(tb, &cfg);
710}
711
712void fib_add_ifaddr(struct in_ifaddr *ifa)
713{
714	struct in_device *in_dev = ifa->ifa_dev;
715	struct net_device *dev = in_dev->dev;
716	struct in_ifaddr *prim = ifa;
717	__be32 mask = ifa->ifa_mask;
718	__be32 addr = ifa->ifa_local;
719	__be32 prefix = ifa->ifa_address&mask;
720
721	if (ifa->ifa_flags&IFA_F_SECONDARY) {
722		prim = inet_ifa_byprefix(in_dev, prefix, mask);
723		if (prim == NULL) {
724			printk(KERN_WARNING "fib_add_ifaddr: bug: prim == NULL\n");
725			return;
726		}
727	}
728
729	fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
730
731	if (!(dev->flags&IFF_UP))
732		return;
733
734	/* Add broadcast address, if it is explicitly assigned. */
735	if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
736		fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
737
738	if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
739	    (prefix != addr || ifa->ifa_prefixlen < 32)) {
740		fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
741			  RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim);
742
743		/* Add network specific broadcasts, when it takes a sense */
744		if (ifa->ifa_prefixlen < 31) {
745			fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
746			fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim);
747		}
748	}
749}
750
751static void fib_del_ifaddr(struct in_ifaddr *ifa)
752{
753	struct in_device *in_dev = ifa->ifa_dev;
754	struct net_device *dev = in_dev->dev;
755	struct in_ifaddr *ifa1;
756	struct in_ifaddr *prim = ifa;
757	__be32 brd = ifa->ifa_address|~ifa->ifa_mask;
758	__be32 any = ifa->ifa_address&ifa->ifa_mask;
759#define LOCAL_OK	1
760#define BRD_OK		2
761#define BRD0_OK		4
762#define BRD1_OK		8
763	unsigned ok = 0;
764
765	if (!(ifa->ifa_flags&IFA_F_SECONDARY))
766		fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
767			  RTN_UNICAST, any, ifa->ifa_prefixlen, prim);
768	else {
769		prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
770		if (prim == NULL) {
771			printk(KERN_WARNING "fib_del_ifaddr: bug: prim == NULL\n");
772			return;
773		}
774	}
775
776	/* Deletion is more complicated than add.
777	   We should take care of not to delete too much :-)
778
779	   Scan address list to be sure that addresses are really gone.
780	 */
781
782	for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
783		if (ifa->ifa_local == ifa1->ifa_local)
784			ok |= LOCAL_OK;
785		if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
786			ok |= BRD_OK;
787		if (brd == ifa1->ifa_broadcast)
788			ok |= BRD1_OK;
789		if (any == ifa1->ifa_broadcast)
790			ok |= BRD0_OK;
791	}
792
793	if (!(ok&BRD_OK))
794		fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
795	if (!(ok&BRD1_OK))
796		fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
797	if (!(ok&BRD0_OK))
798		fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
799	if (!(ok&LOCAL_OK)) {
800		fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
801
802		/* Check, that this local address finally disappeared. */
803		if (inet_addr_type(dev_net(dev), ifa->ifa_local) != RTN_LOCAL) {
804			/* And the last, but not the least thing.
805			   We must flush stray FIB entries.
806
807			   First of all, we scan fib_info list searching
808			   for stray nexthop entries, then ignite fib_flush.
809			*/
810			if (fib_sync_down_addr(dev_net(dev), ifa->ifa_local))
811				fib_flush(dev_net(dev));
812		}
813	}
814#undef LOCAL_OK
815#undef BRD_OK
816#undef BRD0_OK
817#undef BRD1_OK
818}
819
820static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
821{
822
823	struct fib_result       res;
824	struct flowi            fl = { .mark = frn->fl_mark,
825				       .nl_u = { .ip4_u = { .daddr = frn->fl_addr,
826							    .tos = frn->fl_tos,
827							    .scope = frn->fl_scope } } };
828
829#ifdef CONFIG_IP_MULTIPLE_TABLES
830	res.r = NULL;
831#endif
832
833	frn->err = -ENOENT;
834	if (tb) {
835		local_bh_disable();
836
837		frn->tb_id = tb->tb_id;
838		frn->err = fib_table_lookup(tb, &fl, &res);
839
840		if (!frn->err) {
841			frn->prefixlen = res.prefixlen;
842			frn->nh_sel = res.nh_sel;
843			frn->type = res.type;
844			frn->scope = res.scope;
845			fib_res_put(&res);
846		}
847		local_bh_enable();
848	}
849}
850
851static void nl_fib_input(struct sk_buff *skb)
852{
853	struct net *net;
854	struct fib_result_nl *frn;
855	struct nlmsghdr *nlh;
856	struct fib_table *tb;
857	u32 pid;
858
859	net = sock_net(skb->sk);
860	nlh = nlmsg_hdr(skb);
861	if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
862	    nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn)))
863		return;
864
865	skb = skb_clone(skb, GFP_KERNEL);
866	if (skb == NULL)
867		return;
868	nlh = nlmsg_hdr(skb);
869
870	frn = (struct fib_result_nl *) NLMSG_DATA(nlh);
871	tb = fib_get_table(net, frn->tb_id_in);
872
873	nl_fib_lookup(frn, tb);
874
875	pid = NETLINK_CB(skb).pid;       /* pid of sending process */
876	NETLINK_CB(skb).pid = 0;         /* from kernel */
877	NETLINK_CB(skb).dst_group = 0;  /* unicast */
878	netlink_unicast(net->ipv4.fibnl, skb, pid, MSG_DONTWAIT);
879}
880
881static int nl_fib_lookup_init(struct net *net)
882{
883	struct sock *sk;
884	sk = netlink_kernel_create(net, NETLINK_FIB_LOOKUP, 0,
885				   nl_fib_input, NULL, THIS_MODULE);
886	if (sk == NULL)
887		return -EAFNOSUPPORT;
888	net->ipv4.fibnl = sk;
889	return 0;
890}
891
892static void nl_fib_lookup_exit(struct net *net)
893{
894	netlink_kernel_release(net->ipv4.fibnl);
895	net->ipv4.fibnl = NULL;
896}
897
898static void fib_disable_ip(struct net_device *dev, int force, int delay)
899{
900	if (fib_sync_down_dev(dev, force))
901		fib_flush(dev_net(dev));
902	rt_cache_flush(dev_net(dev), delay);
903	arp_ifdown(dev);
904}
905
906static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
907{
908	struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
909	struct net_device *dev = ifa->ifa_dev->dev;
910
911	switch (event) {
912	case NETDEV_UP:
913		fib_add_ifaddr(ifa);
914#ifdef CONFIG_IP_ROUTE_MULTIPATH
915		fib_sync_up(dev);
916#endif
917		rt_cache_flush(dev_net(dev), -1);
918		break;
919	case NETDEV_DOWN:
920		fib_del_ifaddr(ifa);
921		if (ifa->ifa_dev->ifa_list == NULL) {
922			/* Last address was deleted from this interface.
923			   Disable IP.
924			 */
925			fib_disable_ip(dev, 1, 0);
926		} else {
927			rt_cache_flush(dev_net(dev), -1);
928		}
929		break;
930	}
931	return NOTIFY_DONE;
932}
933
934static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
935{
936	struct net_device *dev = ptr;
937	struct in_device *in_dev = __in_dev_get_rtnl(dev);
938
939	if (event == NETDEV_UNREGISTER) {
940		fib_disable_ip(dev, 2, -1);
941		return NOTIFY_DONE;
942	}
943
944	if (!in_dev)
945		return NOTIFY_DONE;
946
947	switch (event) {
948	case NETDEV_UP:
949		for_ifa(in_dev) {
950			fib_add_ifaddr(ifa);
951		} endfor_ifa(in_dev);
952#ifdef CONFIG_IP_ROUTE_MULTIPATH
953		fib_sync_up(dev);
954#endif
955		rt_cache_flush(dev_net(dev), -1);
956		break;
957	case NETDEV_DOWN:
958		fib_disable_ip(dev, 0, 0);
959		break;
960	case NETDEV_CHANGEMTU:
961	case NETDEV_CHANGE:
962	case NETDEV_UNREGISTER_PERNET:
963		rt_cache_flush(dev_net(dev), 0);
964		break;
965	}
966	return NOTIFY_DONE;
967}
968
969static struct notifier_block fib_inetaddr_notifier = {
970	.notifier_call = fib_inetaddr_event,
971};
972
973static struct notifier_block fib_netdev_notifier = {
974	.notifier_call = fib_netdev_event,
975};
976
977static int __net_init ip_fib_net_init(struct net *net)
978{
979	int err;
980	unsigned int i;
981
982	net->ipv4.fib_table_hash = kzalloc(
983			sizeof(struct hlist_head)*FIB_TABLE_HASHSZ, GFP_KERNEL);
984	if (net->ipv4.fib_table_hash == NULL)
985		return -ENOMEM;
986
987	for (i = 0; i < FIB_TABLE_HASHSZ; i++)
988		INIT_HLIST_HEAD(&net->ipv4.fib_table_hash[i]);
989
990	err = fib4_rules_init(net);
991	if (err < 0)
992		goto fail;
993	return 0;
994
995fail:
996	kfree(net->ipv4.fib_table_hash);
997	return err;
998}
999
1000static void __net_exit ip_fib_net_exit(struct net *net)
1001{
1002	unsigned int i;
1003
1004#ifdef CONFIG_IP_MULTIPLE_TABLES
1005	fib4_rules_exit(net);
1006#endif
1007
1008	for (i = 0; i < FIB_TABLE_HASHSZ; i++) {
1009		struct fib_table *tb;
1010		struct hlist_head *head;
1011		struct hlist_node *node, *tmp;
1012
1013		head = &net->ipv4.fib_table_hash[i];
1014		hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) {
1015			hlist_del(node);
1016			fib_table_flush(tb);
1017			kfree(tb);
1018		}
1019	}
1020	kfree(net->ipv4.fib_table_hash);
1021}
1022
1023static int __net_init fib_net_init(struct net *net)
1024{
1025	int error;
1026
1027	error = ip_fib_net_init(net);
1028	if (error < 0)
1029		goto out;
1030	error = nl_fib_lookup_init(net);
1031	if (error < 0)
1032		goto out_nlfl;
1033	error = fib_proc_init(net);
1034	if (error < 0)
1035		goto out_proc;
1036out:
1037	return error;
1038
1039out_proc:
1040	nl_fib_lookup_exit(net);
1041out_nlfl:
1042	ip_fib_net_exit(net);
1043	goto out;
1044}
1045
1046static void __net_exit fib_net_exit(struct net *net)
1047{
1048	fib_proc_exit(net);
1049	nl_fib_lookup_exit(net);
1050	ip_fib_net_exit(net);
1051}
1052
1053static struct pernet_operations fib_net_ops = {
1054	.init = fib_net_init,
1055	.exit = fib_net_exit,
1056};
1057
1058void __init ip_fib_init(void)
1059{
1060	rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL);
1061	rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL);
1062	rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib);
1063
1064	register_pernet_subsys(&fib_net_ops);
1065	register_netdevice_notifier(&fib_netdev_notifier);
1066	register_inetaddr_notifier(&fib_inetaddr_notifier);
1067
1068	fib_hash_init();
1069}
1070
1071EXPORT_SYMBOL(inet_addr_type);
1072EXPORT_SYMBOL(inet_dev_addr_type);
1073EXPORT_SYMBOL(ip_dev_find);
1074