addr.c revision e4dd23d753c3cb0d8533d353069e8b2e8a666360
1/*
2 * Copyright (c) 2005 Voltaire Inc.  All rights reserved.
3 * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved.
4 * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved.
5 * Copyright (c) 2005 Intel Corporation.  All rights reserved.
6 *
7 * This software is available to you under a choice of one of two
8 * licenses.  You may choose to be licensed under the terms of the GNU
9 * General Public License (GPL) Version 2, available from the file
10 * COPYING in the main directory of this source tree, or the
11 * OpenIB.org BSD license below:
12 *
13 *     Redistribution and use in source and binary forms, with or
14 *     without modification, are permitted provided that the following
15 *     conditions are met:
16 *
17 *      - Redistributions of source code must retain the above
18 *        copyright notice, this list of conditions and the following
19 *        disclaimer.
20 *
21 *      - Redistributions in binary form must reproduce the above
22 *        copyright notice, this list of conditions and the following
23 *        disclaimer in the documentation and/or other materials
24 *        provided with the distribution.
25 *
26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 * SOFTWARE.
34 */
35
36#include <linux/mutex.h>
37#include <linux/inetdevice.h>
38#include <linux/slab.h>
39#include <linux/workqueue.h>
40#include <linux/module.h>
41#include <net/arp.h>
42#include <net/neighbour.h>
43#include <net/route.h>
44#include <net/netevent.h>
45#include <net/addrconf.h>
46#include <net/ip6_route.h>
47#include <rdma/ib_addr.h>
48
49MODULE_AUTHOR("Sean Hefty");
50MODULE_DESCRIPTION("IB Address Translation");
51MODULE_LICENSE("Dual BSD/GPL");
52
53struct addr_req {
54	struct list_head list;
55	struct sockaddr_storage src_addr;
56	struct sockaddr_storage dst_addr;
57	struct rdma_dev_addr *addr;
58	struct rdma_addr_client *client;
59	void *context;
60	void (*callback)(int status, struct sockaddr *src_addr,
61			 struct rdma_dev_addr *addr, void *context);
62	unsigned long timeout;
63	int status;
64};
65
66static void process_req(struct work_struct *work);
67
68static DEFINE_MUTEX(lock);
69static LIST_HEAD(req_list);
70static DECLARE_DELAYED_WORK(work, process_req);
71static struct workqueue_struct *addr_wq;
72
73void rdma_addr_register_client(struct rdma_addr_client *client)
74{
75	atomic_set(&client->refcount, 1);
76	init_completion(&client->comp);
77}
78EXPORT_SYMBOL(rdma_addr_register_client);
79
80static inline void put_client(struct rdma_addr_client *client)
81{
82	if (atomic_dec_and_test(&client->refcount))
83		complete(&client->comp);
84}
85
86void rdma_addr_unregister_client(struct rdma_addr_client *client)
87{
88	put_client(client);
89	wait_for_completion(&client->comp);
90}
91EXPORT_SYMBOL(rdma_addr_unregister_client);
92
93int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
94		     const unsigned char *dst_dev_addr)
95{
96	dev_addr->dev_type = dev->type;
97	memcpy(dev_addr->src_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
98	memcpy(dev_addr->broadcast, dev->broadcast, MAX_ADDR_LEN);
99	if (dst_dev_addr)
100		memcpy(dev_addr->dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN);
101	dev_addr->bound_dev_if = dev->ifindex;
102	return 0;
103}
104EXPORT_SYMBOL(rdma_copy_addr);
105
106int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)
107{
108	struct net_device *dev;
109	int ret = -EADDRNOTAVAIL;
110
111	if (dev_addr->bound_dev_if) {
112		dev = dev_get_by_index(&init_net, dev_addr->bound_dev_if);
113		if (!dev)
114			return -ENODEV;
115		ret = rdma_copy_addr(dev_addr, dev, NULL);
116		dev_put(dev);
117		return ret;
118	}
119
120	switch (addr->sa_family) {
121	case AF_INET:
122		dev = ip_dev_find(&init_net,
123			((struct sockaddr_in *) addr)->sin_addr.s_addr);
124
125		if (!dev)
126			return ret;
127
128		ret = rdma_copy_addr(dev_addr, dev, NULL);
129		dev_put(dev);
130		break;
131
132#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
133	case AF_INET6:
134		rcu_read_lock();
135		for_each_netdev_rcu(&init_net, dev) {
136			if (ipv6_chk_addr(&init_net,
137					  &((struct sockaddr_in6 *) addr)->sin6_addr,
138					  dev, 1)) {
139				ret = rdma_copy_addr(dev_addr, dev, NULL);
140				break;
141			}
142		}
143		rcu_read_unlock();
144		break;
145#endif
146	}
147	return ret;
148}
149EXPORT_SYMBOL(rdma_translate_ip);
150
151static void set_timeout(unsigned long time)
152{
153	unsigned long delay;
154
155	cancel_delayed_work(&work);
156
157	delay = time - jiffies;
158	if ((long)delay <= 0)
159		delay = 1;
160
161	queue_delayed_work(addr_wq, &work, delay);
162}
163
164static void queue_req(struct addr_req *req)
165{
166	struct addr_req *temp_req;
167
168	mutex_lock(&lock);
169	list_for_each_entry_reverse(temp_req, &req_list, list) {
170		if (time_after_eq(req->timeout, temp_req->timeout))
171			break;
172	}
173
174	list_add(&req->list, &temp_req->list);
175
176	if (req_list.next == &req->list)
177		set_timeout(req->timeout);
178	mutex_unlock(&lock);
179}
180
181static int addr4_resolve(struct sockaddr_in *src_in,
182			 struct sockaddr_in *dst_in,
183			 struct rdma_dev_addr *addr)
184{
185	__be32 src_ip = src_in->sin_addr.s_addr;
186	__be32 dst_ip = dst_in->sin_addr.s_addr;
187	struct rtable *rt;
188	struct neighbour *neigh;
189	struct flowi4 fl4;
190	int ret;
191
192	memset(&fl4, 0, sizeof(fl4));
193	fl4.daddr = dst_ip;
194	fl4.saddr = src_ip;
195	fl4.flowi4_oif = addr->bound_dev_if;
196	rt = ip_route_output_key(&init_net, &fl4);
197	if (IS_ERR(rt)) {
198		ret = PTR_ERR(rt);
199		goto out;
200	}
201	src_in->sin_family = AF_INET;
202	src_in->sin_addr.s_addr = fl4.saddr;
203
204	if (rt->dst.dev->flags & IFF_LOOPBACK) {
205		ret = rdma_translate_ip((struct sockaddr *) dst_in, addr);
206		if (!ret)
207			memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN);
208		goto put;
209	}
210
211	/* If the device does ARP internally, return 'done' */
212	if (rt->dst.dev->flags & IFF_NOARP) {
213		ret = rdma_copy_addr(addr, rt->dst.dev, NULL);
214		goto put;
215	}
216
217	neigh = neigh_lookup(&arp_tbl, &rt->rt_gateway, rt->dst.dev);
218	if (!neigh || !(neigh->nud_state & NUD_VALID)) {
219		neigh_event_send(dst_get_neighbour(&rt->dst), NULL);
220		ret = -ENODATA;
221		if (neigh)
222			goto release;
223		goto put;
224	}
225
226	ret = rdma_copy_addr(addr, neigh->dev, neigh->ha);
227release:
228	neigh_release(neigh);
229put:
230	ip_rt_put(rt);
231out:
232	return ret;
233}
234
235#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
236static int addr6_resolve(struct sockaddr_in6 *src_in,
237			 struct sockaddr_in6 *dst_in,
238			 struct rdma_dev_addr *addr)
239{
240	struct flowi6 fl6;
241	struct neighbour *neigh;
242	struct dst_entry *dst;
243	int ret;
244
245	memset(&fl6, 0, sizeof fl6);
246	ipv6_addr_copy(&fl6.daddr, &dst_in->sin6_addr);
247	ipv6_addr_copy(&fl6.saddr, &src_in->sin6_addr);
248	fl6.flowi6_oif = addr->bound_dev_if;
249
250	dst = ip6_route_output(&init_net, NULL, &fl6);
251	if ((ret = dst->error))
252		goto put;
253
254	if (ipv6_addr_any(&fl6.saddr)) {
255		ret = ipv6_dev_get_saddr(&init_net, ip6_dst_idev(dst)->dev,
256					 &fl6.daddr, 0, &fl6.saddr);
257		if (ret)
258			goto put;
259
260		src_in->sin6_family = AF_INET6;
261		ipv6_addr_copy(&src_in->sin6_addr, &fl6.saddr);
262	}
263
264	if (dst->dev->flags & IFF_LOOPBACK) {
265		ret = rdma_translate_ip((struct sockaddr *) dst_in, addr);
266		if (!ret)
267			memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN);
268		goto put;
269	}
270
271	/* If the device does ARP internally, return 'done' */
272	if (dst->dev->flags & IFF_NOARP) {
273		ret = rdma_copy_addr(addr, dst->dev, NULL);
274		goto put;
275	}
276
277	neigh = dst_get_neighbour(dst);
278	if (!neigh || !(neigh->nud_state & NUD_VALID)) {
279		if (neigh)
280			neigh_event_send(neigh, NULL);
281		ret = -ENODATA;
282		goto put;
283	}
284
285	ret = rdma_copy_addr(addr, dst->dev, neigh->ha);
286put:
287	dst_release(dst);
288	return ret;
289}
290#else
291static int addr6_resolve(struct sockaddr_in6 *src_in,
292			 struct sockaddr_in6 *dst_in,
293			 struct rdma_dev_addr *addr)
294{
295	return -EADDRNOTAVAIL;
296}
297#endif
298
299static int addr_resolve(struct sockaddr *src_in,
300			struct sockaddr *dst_in,
301			struct rdma_dev_addr *addr)
302{
303	if (src_in->sa_family == AF_INET) {
304		return addr4_resolve((struct sockaddr_in *) src_in,
305			(struct sockaddr_in *) dst_in, addr);
306	} else
307		return addr6_resolve((struct sockaddr_in6 *) src_in,
308			(struct sockaddr_in6 *) dst_in, addr);
309}
310
311static void process_req(struct work_struct *work)
312{
313	struct addr_req *req, *temp_req;
314	struct sockaddr *src_in, *dst_in;
315	struct list_head done_list;
316
317	INIT_LIST_HEAD(&done_list);
318
319	mutex_lock(&lock);
320	list_for_each_entry_safe(req, temp_req, &req_list, list) {
321		if (req->status == -ENODATA) {
322			src_in = (struct sockaddr *) &req->src_addr;
323			dst_in = (struct sockaddr *) &req->dst_addr;
324			req->status = addr_resolve(src_in, dst_in, req->addr);
325			if (req->status && time_after_eq(jiffies, req->timeout))
326				req->status = -ETIMEDOUT;
327			else if (req->status == -ENODATA)
328				continue;
329		}
330		list_move_tail(&req->list, &done_list);
331	}
332
333	if (!list_empty(&req_list)) {
334		req = list_entry(req_list.next, struct addr_req, list);
335		set_timeout(req->timeout);
336	}
337	mutex_unlock(&lock);
338
339	list_for_each_entry_safe(req, temp_req, &done_list, list) {
340		list_del(&req->list);
341		req->callback(req->status, (struct sockaddr *) &req->src_addr,
342			req->addr, req->context);
343		put_client(req->client);
344		kfree(req);
345	}
346}
347
348int rdma_resolve_ip(struct rdma_addr_client *client,
349		    struct sockaddr *src_addr, struct sockaddr *dst_addr,
350		    struct rdma_dev_addr *addr, int timeout_ms,
351		    void (*callback)(int status, struct sockaddr *src_addr,
352				     struct rdma_dev_addr *addr, void *context),
353		    void *context)
354{
355	struct sockaddr *src_in, *dst_in;
356	struct addr_req *req;
357	int ret = 0;
358
359	req = kzalloc(sizeof *req, GFP_KERNEL);
360	if (!req)
361		return -ENOMEM;
362
363	src_in = (struct sockaddr *) &req->src_addr;
364	dst_in = (struct sockaddr *) &req->dst_addr;
365
366	if (src_addr) {
367		if (src_addr->sa_family != dst_addr->sa_family) {
368			ret = -EINVAL;
369			goto err;
370		}
371
372		memcpy(src_in, src_addr, ip_addr_size(src_addr));
373	} else {
374		src_in->sa_family = dst_addr->sa_family;
375	}
376
377	memcpy(dst_in, dst_addr, ip_addr_size(dst_addr));
378	req->addr = addr;
379	req->callback = callback;
380	req->context = context;
381	req->client = client;
382	atomic_inc(&client->refcount);
383
384	req->status = addr_resolve(src_in, dst_in, addr);
385	switch (req->status) {
386	case 0:
387		req->timeout = jiffies;
388		queue_req(req);
389		break;
390	case -ENODATA:
391		req->timeout = msecs_to_jiffies(timeout_ms) + jiffies;
392		queue_req(req);
393		break;
394	default:
395		ret = req->status;
396		atomic_dec(&client->refcount);
397		goto err;
398	}
399	return ret;
400err:
401	kfree(req);
402	return ret;
403}
404EXPORT_SYMBOL(rdma_resolve_ip);
405
406void rdma_addr_cancel(struct rdma_dev_addr *addr)
407{
408	struct addr_req *req, *temp_req;
409
410	mutex_lock(&lock);
411	list_for_each_entry_safe(req, temp_req, &req_list, list) {
412		if (req->addr == addr) {
413			req->status = -ECANCELED;
414			req->timeout = jiffies;
415			list_move(&req->list, &req_list);
416			set_timeout(req->timeout);
417			break;
418		}
419	}
420	mutex_unlock(&lock);
421}
422EXPORT_SYMBOL(rdma_addr_cancel);
423
424static int netevent_callback(struct notifier_block *self, unsigned long event,
425	void *ctx)
426{
427	if (event == NETEVENT_NEIGH_UPDATE) {
428		struct neighbour *neigh = ctx;
429
430		if (neigh->nud_state & NUD_VALID) {
431			set_timeout(jiffies);
432		}
433	}
434	return 0;
435}
436
437static struct notifier_block nb = {
438	.notifier_call = netevent_callback
439};
440
441static int __init addr_init(void)
442{
443	addr_wq = create_singlethread_workqueue("ib_addr");
444	if (!addr_wq)
445		return -ENOMEM;
446
447	register_netevent_notifier(&nb);
448	return 0;
449}
450
451static void __exit addr_cleanup(void)
452{
453	unregister_netevent_notifier(&nb);
454	destroy_workqueue(addr_wq);
455}
456
457module_init(addr_init);
458module_exit(addr_cleanup);
459