1/*
2 * Copyright (c) 2005-2006 Intel Corporation.  All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses.  You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 *     Redistribution and use in source and binary forms, with or
11 *     without modification, are permitted provided that the following
12 *     conditions are met:
13 *
14 *      - Redistributions of source code must retain the above
15 *	copyright notice, this list of conditions and the following
16 *	disclaimer.
17 *
18 *      - Redistributions in binary form must reproduce the above
19 *	copyright notice, this list of conditions and the following
20 *	disclaimer in the documentation and/or other materials
21 *	provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <linux/completion.h>
34#include <linux/file.h>
35#include <linux/mutex.h>
36#include <linux/poll.h>
37#include <linux/sched.h>
38#include <linux/idr.h>
39#include <linux/in.h>
40#include <linux/in6.h>
41#include <linux/miscdevice.h>
42#include <linux/slab.h>
43#include <linux/sysctl.h>
44#include <linux/module.h>
45
46#include <rdma/rdma_user_cm.h>
47#include <rdma/ib_marshall.h>
48#include <rdma/rdma_cm.h>
49#include <rdma/rdma_cm_ib.h>
50
51MODULE_AUTHOR("Sean Hefty");
52MODULE_DESCRIPTION("RDMA Userspace Connection Manager Access");
53MODULE_LICENSE("Dual BSD/GPL");
54
55static unsigned int max_backlog = 1024;
56
57static struct ctl_table_header *ucma_ctl_table_hdr;
58static ctl_table ucma_ctl_table[] = {
59	{
60		.procname	= "max_backlog",
61		.data		= &max_backlog,
62		.maxlen		= sizeof max_backlog,
63		.mode		= 0644,
64		.proc_handler	= proc_dointvec,
65	},
66	{ }
67};
68
69static struct ctl_path ucma_ctl_path[] = {
70	{ .procname = "net" },
71	{ .procname = "rdma_ucm" },
72	{ }
73};
74
75struct ucma_file {
76	struct mutex		mut;
77	struct file		*filp;
78	struct list_head	ctx_list;
79	struct list_head	event_list;
80	wait_queue_head_t	poll_wait;
81};
82
83struct ucma_context {
84	int			id;
85	struct completion	comp;
86	atomic_t		ref;
87	int			events_reported;
88	int			backlog;
89
90	struct ucma_file	*file;
91	struct rdma_cm_id	*cm_id;
92	u64			uid;
93
94	struct list_head	list;
95	struct list_head	mc_list;
96};
97
98struct ucma_multicast {
99	struct ucma_context	*ctx;
100	int			id;
101	int			events_reported;
102
103	u64			uid;
104	struct list_head	list;
105	struct sockaddr_storage	addr;
106};
107
108struct ucma_event {
109	struct ucma_context	*ctx;
110	struct ucma_multicast	*mc;
111	struct list_head	list;
112	struct rdma_cm_id	*cm_id;
113	struct rdma_ucm_event_resp resp;
114};
115
116static DEFINE_MUTEX(mut);
117static DEFINE_IDR(ctx_idr);
118static DEFINE_IDR(multicast_idr);
119
120static inline struct ucma_context *_ucma_find_context(int id,
121						      struct ucma_file *file)
122{
123	struct ucma_context *ctx;
124
125	ctx = idr_find(&ctx_idr, id);
126	if (!ctx)
127		ctx = ERR_PTR(-ENOENT);
128	else if (ctx->file != file)
129		ctx = ERR_PTR(-EINVAL);
130	return ctx;
131}
132
133static struct ucma_context *ucma_get_ctx(struct ucma_file *file, int id)
134{
135	struct ucma_context *ctx;
136
137	mutex_lock(&mut);
138	ctx = _ucma_find_context(id, file);
139	if (!IS_ERR(ctx))
140		atomic_inc(&ctx->ref);
141	mutex_unlock(&mut);
142	return ctx;
143}
144
145static void ucma_put_ctx(struct ucma_context *ctx)
146{
147	if (atomic_dec_and_test(&ctx->ref))
148		complete(&ctx->comp);
149}
150
151static struct ucma_context *ucma_alloc_ctx(struct ucma_file *file)
152{
153	struct ucma_context *ctx;
154	int ret;
155
156	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
157	if (!ctx)
158		return NULL;
159
160	atomic_set(&ctx->ref, 1);
161	init_completion(&ctx->comp);
162	INIT_LIST_HEAD(&ctx->mc_list);
163	ctx->file = file;
164
165	do {
166		ret = idr_pre_get(&ctx_idr, GFP_KERNEL);
167		if (!ret)
168			goto error;
169
170		mutex_lock(&mut);
171		ret = idr_get_new(&ctx_idr, ctx, &ctx->id);
172		mutex_unlock(&mut);
173	} while (ret == -EAGAIN);
174
175	if (ret)
176		goto error;
177
178	list_add_tail(&ctx->list, &file->ctx_list);
179	return ctx;
180
181error:
182	kfree(ctx);
183	return NULL;
184}
185
186static struct ucma_multicast* ucma_alloc_multicast(struct ucma_context *ctx)
187{
188	struct ucma_multicast *mc;
189	int ret;
190
191	mc = kzalloc(sizeof(*mc), GFP_KERNEL);
192	if (!mc)
193		return NULL;
194
195	do {
196		ret = idr_pre_get(&multicast_idr, GFP_KERNEL);
197		if (!ret)
198			goto error;
199
200		mutex_lock(&mut);
201		ret = idr_get_new(&multicast_idr, mc, &mc->id);
202		mutex_unlock(&mut);
203	} while (ret == -EAGAIN);
204
205	if (ret)
206		goto error;
207
208	mc->ctx = ctx;
209	list_add_tail(&mc->list, &ctx->mc_list);
210	return mc;
211
212error:
213	kfree(mc);
214	return NULL;
215}
216
217static void ucma_copy_conn_event(struct rdma_ucm_conn_param *dst,
218				 struct rdma_conn_param *src)
219{
220	if (src->private_data_len)
221		memcpy(dst->private_data, src->private_data,
222		       src->private_data_len);
223	dst->private_data_len = src->private_data_len;
224	dst->responder_resources =src->responder_resources;
225	dst->initiator_depth = src->initiator_depth;
226	dst->flow_control = src->flow_control;
227	dst->retry_count = src->retry_count;
228	dst->rnr_retry_count = src->rnr_retry_count;
229	dst->srq = src->srq;
230	dst->qp_num = src->qp_num;
231}
232
233static void ucma_copy_ud_event(struct rdma_ucm_ud_param *dst,
234			       struct rdma_ud_param *src)
235{
236	if (src->private_data_len)
237		memcpy(dst->private_data, src->private_data,
238		       src->private_data_len);
239	dst->private_data_len = src->private_data_len;
240	ib_copy_ah_attr_to_user(&dst->ah_attr, &src->ah_attr);
241	dst->qp_num = src->qp_num;
242	dst->qkey = src->qkey;
243}
244
245static void ucma_set_event_context(struct ucma_context *ctx,
246				   struct rdma_cm_event *event,
247				   struct ucma_event *uevent)
248{
249	uevent->ctx = ctx;
250	switch (event->event) {
251	case RDMA_CM_EVENT_MULTICAST_JOIN:
252	case RDMA_CM_EVENT_MULTICAST_ERROR:
253		uevent->mc = (struct ucma_multicast *)
254			     event->param.ud.private_data;
255		uevent->resp.uid = uevent->mc->uid;
256		uevent->resp.id = uevent->mc->id;
257		break;
258	default:
259		uevent->resp.uid = ctx->uid;
260		uevent->resp.id = ctx->id;
261		break;
262	}
263}
264
265static int ucma_event_handler(struct rdma_cm_id *cm_id,
266			      struct rdma_cm_event *event)
267{
268	struct ucma_event *uevent;
269	struct ucma_context *ctx = cm_id->context;
270	int ret = 0;
271
272	uevent = kzalloc(sizeof(*uevent), GFP_KERNEL);
273	if (!uevent)
274		return event->event == RDMA_CM_EVENT_CONNECT_REQUEST;
275
276	uevent->cm_id = cm_id;
277	ucma_set_event_context(ctx, event, uevent);
278	uevent->resp.event = event->event;
279	uevent->resp.status = event->status;
280	if (cm_id->qp_type == IB_QPT_UD)
281		ucma_copy_ud_event(&uevent->resp.param.ud, &event->param.ud);
282	else
283		ucma_copy_conn_event(&uevent->resp.param.conn,
284				     &event->param.conn);
285
286	mutex_lock(&ctx->file->mut);
287	if (event->event == RDMA_CM_EVENT_CONNECT_REQUEST) {
288		if (!ctx->backlog) {
289			ret = -ENOMEM;
290			kfree(uevent);
291			goto out;
292		}
293		ctx->backlog--;
294	} else if (!ctx->uid) {
295		/*
296		 * We ignore events for new connections until userspace has set
297		 * their context.  This can only happen if an error occurs on a
298		 * new connection before the user accepts it.  This is okay,
299		 * since the accept will just fail later.
300		 */
301		kfree(uevent);
302		goto out;
303	}
304
305	list_add_tail(&uevent->list, &ctx->file->event_list);
306	wake_up_interruptible(&ctx->file->poll_wait);
307out:
308	mutex_unlock(&ctx->file->mut);
309	return ret;
310}
311
312static ssize_t ucma_get_event(struct ucma_file *file, const char __user *inbuf,
313			      int in_len, int out_len)
314{
315	struct ucma_context *ctx;
316	struct rdma_ucm_get_event cmd;
317	struct ucma_event *uevent;
318	int ret = 0;
319	DEFINE_WAIT(wait);
320
321	if (out_len < sizeof uevent->resp)
322		return -ENOSPC;
323
324	if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
325		return -EFAULT;
326
327	mutex_lock(&file->mut);
328	while (list_empty(&file->event_list)) {
329		mutex_unlock(&file->mut);
330
331		if (file->filp->f_flags & O_NONBLOCK)
332			return -EAGAIN;
333
334		if (wait_event_interruptible(file->poll_wait,
335					     !list_empty(&file->event_list)))
336			return -ERESTARTSYS;
337
338		mutex_lock(&file->mut);
339	}
340
341	uevent = list_entry(file->event_list.next, struct ucma_event, list);
342
343	if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST) {
344		ctx = ucma_alloc_ctx(file);
345		if (!ctx) {
346			ret = -ENOMEM;
347			goto done;
348		}
349		uevent->ctx->backlog++;
350		ctx->cm_id = uevent->cm_id;
351		ctx->cm_id->context = ctx;
352		uevent->resp.id = ctx->id;
353	}
354
355	if (copy_to_user((void __user *)(unsigned long)cmd.response,
356			 &uevent->resp, sizeof uevent->resp)) {
357		ret = -EFAULT;
358		goto done;
359	}
360
361	list_del(&uevent->list);
362	uevent->ctx->events_reported++;
363	if (uevent->mc)
364		uevent->mc->events_reported++;
365	kfree(uevent);
366done:
367	mutex_unlock(&file->mut);
368	return ret;
369}
370
371static int ucma_get_qp_type(struct rdma_ucm_create_id *cmd, enum ib_qp_type *qp_type)
372{
373	switch (cmd->ps) {
374	case RDMA_PS_TCP:
375		*qp_type = IB_QPT_RC;
376		return 0;
377	case RDMA_PS_UDP:
378	case RDMA_PS_IPOIB:
379		*qp_type = IB_QPT_UD;
380		return 0;
381	case RDMA_PS_IB:
382		*qp_type = cmd->qp_type;
383		return 0;
384	default:
385		return -EINVAL;
386	}
387}
388
389static ssize_t ucma_create_id(struct ucma_file *file, const char __user *inbuf,
390			      int in_len, int out_len)
391{
392	struct rdma_ucm_create_id cmd;
393	struct rdma_ucm_create_id_resp resp;
394	struct ucma_context *ctx;
395	enum ib_qp_type qp_type;
396	int ret;
397
398	if (out_len < sizeof(resp))
399		return -ENOSPC;
400
401	if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
402		return -EFAULT;
403
404	ret = ucma_get_qp_type(&cmd, &qp_type);
405	if (ret)
406		return ret;
407
408	mutex_lock(&file->mut);
409	ctx = ucma_alloc_ctx(file);
410	mutex_unlock(&file->mut);
411	if (!ctx)
412		return -ENOMEM;
413
414	ctx->uid = cmd.uid;
415	ctx->cm_id = rdma_create_id(ucma_event_handler, ctx, cmd.ps, qp_type);
416	if (IS_ERR(ctx->cm_id)) {
417		ret = PTR_ERR(ctx->cm_id);
418		goto err1;
419	}
420
421	resp.id = ctx->id;
422	if (copy_to_user((void __user *)(unsigned long)cmd.response,
423			 &resp, sizeof(resp))) {
424		ret = -EFAULT;
425		goto err2;
426	}
427	return 0;
428
429err2:
430	rdma_destroy_id(ctx->cm_id);
431err1:
432	mutex_lock(&mut);
433	idr_remove(&ctx_idr, ctx->id);
434	mutex_unlock(&mut);
435	kfree(ctx);
436	return ret;
437}
438
439static void ucma_cleanup_multicast(struct ucma_context *ctx)
440{
441	struct ucma_multicast *mc, *tmp;
442
443	mutex_lock(&mut);
444	list_for_each_entry_safe(mc, tmp, &ctx->mc_list, list) {
445		list_del(&mc->list);
446		idr_remove(&multicast_idr, mc->id);
447		kfree(mc);
448	}
449	mutex_unlock(&mut);
450}
451
452static void ucma_cleanup_mc_events(struct ucma_multicast *mc)
453{
454	struct ucma_event *uevent, *tmp;
455
456	list_for_each_entry_safe(uevent, tmp, &mc->ctx->file->event_list, list) {
457		if (uevent->mc != mc)
458			continue;
459
460		list_del(&uevent->list);
461		kfree(uevent);
462	}
463}
464
465/*
466 * We cannot hold file->mut when calling rdma_destroy_id() or we can
467 * deadlock.  We also acquire file->mut in ucma_event_handler(), and
468 * rdma_destroy_id() will wait until all callbacks have completed.
469 */
470static int ucma_free_ctx(struct ucma_context *ctx)
471{
472	int events_reported;
473	struct ucma_event *uevent, *tmp;
474	LIST_HEAD(list);
475
476	/* No new events will be generated after destroying the id. */
477	rdma_destroy_id(ctx->cm_id);
478
479	ucma_cleanup_multicast(ctx);
480
481	/* Cleanup events not yet reported to the user. */
482	mutex_lock(&ctx->file->mut);
483	list_for_each_entry_safe(uevent, tmp, &ctx->file->event_list, list) {
484		if (uevent->ctx == ctx)
485			list_move_tail(&uevent->list, &list);
486	}
487	list_del(&ctx->list);
488	mutex_unlock(&ctx->file->mut);
489
490	list_for_each_entry_safe(uevent, tmp, &list, list) {
491		list_del(&uevent->list);
492		if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST)
493			rdma_destroy_id(uevent->cm_id);
494		kfree(uevent);
495	}
496
497	events_reported = ctx->events_reported;
498	kfree(ctx);
499	return events_reported;
500}
501
502static ssize_t ucma_destroy_id(struct ucma_file *file, const char __user *inbuf,
503			       int in_len, int out_len)
504{
505	struct rdma_ucm_destroy_id cmd;
506	struct rdma_ucm_destroy_id_resp resp;
507	struct ucma_context *ctx;
508	int ret = 0;
509
510	if (out_len < sizeof(resp))
511		return -ENOSPC;
512
513	if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
514		return -EFAULT;
515
516	mutex_lock(&mut);
517	ctx = _ucma_find_context(cmd.id, file);
518	if (!IS_ERR(ctx))
519		idr_remove(&ctx_idr, ctx->id);
520	mutex_unlock(&mut);
521
522	if (IS_ERR(ctx))
523		return PTR_ERR(ctx);
524
525	ucma_put_ctx(ctx);
526	wait_for_completion(&ctx->comp);
527	resp.events_reported = ucma_free_ctx(ctx);
528
529	if (copy_to_user((void __user *)(unsigned long)cmd.response,
530			 &resp, sizeof(resp)))
531		ret = -EFAULT;
532
533	return ret;
534}
535
536static ssize_t ucma_bind_addr(struct ucma_file *file, const char __user *inbuf,
537			      int in_len, int out_len)
538{
539	struct rdma_ucm_bind_addr cmd;
540	struct ucma_context *ctx;
541	int ret;
542
543	if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
544		return -EFAULT;
545
546	ctx = ucma_get_ctx(file, cmd.id);
547	if (IS_ERR(ctx))
548		return PTR_ERR(ctx);
549
550	ret = rdma_bind_addr(ctx->cm_id, (struct sockaddr *) &cmd.addr);
551	ucma_put_ctx(ctx);
552	return ret;
553}
554
555static ssize_t ucma_resolve_addr(struct ucma_file *file,
556				 const char __user *inbuf,
557				 int in_len, int out_len)
558{
559	struct rdma_ucm_resolve_addr cmd;
560	struct ucma_context *ctx;
561	int ret;
562
563	if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
564		return -EFAULT;
565
566	ctx = ucma_get_ctx(file, cmd.id);
567	if (IS_ERR(ctx))
568		return PTR_ERR(ctx);
569
570	ret = rdma_resolve_addr(ctx->cm_id, (struct sockaddr *) &cmd.src_addr,
571				(struct sockaddr *) &cmd.dst_addr,
572				cmd.timeout_ms);
573	ucma_put_ctx(ctx);
574	return ret;
575}
576
577static ssize_t ucma_resolve_route(struct ucma_file *file,
578				  const char __user *inbuf,
579				  int in_len, int out_len)
580{
581	struct rdma_ucm_resolve_route cmd;
582	struct ucma_context *ctx;
583	int ret;
584
585	if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
586		return -EFAULT;
587
588	ctx = ucma_get_ctx(file, cmd.id);
589	if (IS_ERR(ctx))
590		return PTR_ERR(ctx);
591
592	ret = rdma_resolve_route(ctx->cm_id, cmd.timeout_ms);
593	ucma_put_ctx(ctx);
594	return ret;
595}
596
597static void ucma_copy_ib_route(struct rdma_ucm_query_route_resp *resp,
598			       struct rdma_route *route)
599{
600	struct rdma_dev_addr *dev_addr;
601
602	resp->num_paths = route->num_paths;
603	switch (route->num_paths) {
604	case 0:
605		dev_addr = &route->addr.dev_addr;
606		rdma_addr_get_dgid(dev_addr,
607				   (union ib_gid *) &resp->ib_route[0].dgid);
608		rdma_addr_get_sgid(dev_addr,
609				   (union ib_gid *) &resp->ib_route[0].sgid);
610		resp->ib_route[0].pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr));
611		break;
612	case 2:
613		ib_copy_path_rec_to_user(&resp->ib_route[1],
614					 &route->path_rec[1]);
615		/* fall through */
616	case 1:
617		ib_copy_path_rec_to_user(&resp->ib_route[0],
618					 &route->path_rec[0]);
619		break;
620	default:
621		break;
622	}
623}
624
625static void ucma_copy_iboe_route(struct rdma_ucm_query_route_resp *resp,
626				 struct rdma_route *route)
627{
628	struct rdma_dev_addr *dev_addr;
629	struct net_device *dev;
630	u16 vid = 0;
631
632	resp->num_paths = route->num_paths;
633	switch (route->num_paths) {
634	case 0:
635		dev_addr = &route->addr.dev_addr;
636		dev = dev_get_by_index(&init_net, dev_addr->bound_dev_if);
637			if (dev) {
638				vid = rdma_vlan_dev_vlan_id(dev);
639				dev_put(dev);
640			}
641
642		iboe_mac_vlan_to_ll((union ib_gid *) &resp->ib_route[0].dgid,
643				    dev_addr->dst_dev_addr, vid);
644		iboe_addr_get_sgid(dev_addr,
645				   (union ib_gid *) &resp->ib_route[0].sgid);
646		resp->ib_route[0].pkey = cpu_to_be16(0xffff);
647		break;
648	case 2:
649		ib_copy_path_rec_to_user(&resp->ib_route[1],
650					 &route->path_rec[1]);
651		/* fall through */
652	case 1:
653		ib_copy_path_rec_to_user(&resp->ib_route[0],
654					 &route->path_rec[0]);
655		break;
656	default:
657		break;
658	}
659}
660
661static void ucma_copy_iw_route(struct rdma_ucm_query_route_resp *resp,
662			       struct rdma_route *route)
663{
664	struct rdma_dev_addr *dev_addr;
665
666	dev_addr = &route->addr.dev_addr;
667	rdma_addr_get_dgid(dev_addr, (union ib_gid *) &resp->ib_route[0].dgid);
668	rdma_addr_get_sgid(dev_addr, (union ib_gid *) &resp->ib_route[0].sgid);
669}
670
671static ssize_t ucma_query_route(struct ucma_file *file,
672				const char __user *inbuf,
673				int in_len, int out_len)
674{
675	struct rdma_ucm_query_route cmd;
676	struct rdma_ucm_query_route_resp resp;
677	struct ucma_context *ctx;
678	struct sockaddr *addr;
679	int ret = 0;
680
681	if (out_len < sizeof(resp))
682		return -ENOSPC;
683
684	if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
685		return -EFAULT;
686
687	ctx = ucma_get_ctx(file, cmd.id);
688	if (IS_ERR(ctx))
689		return PTR_ERR(ctx);
690
691	memset(&resp, 0, sizeof resp);
692	addr = (struct sockaddr *) &ctx->cm_id->route.addr.src_addr;
693	memcpy(&resp.src_addr, addr, addr->sa_family == AF_INET ?
694				     sizeof(struct sockaddr_in) :
695				     sizeof(struct sockaddr_in6));
696	addr = (struct sockaddr *) &ctx->cm_id->route.addr.dst_addr;
697	memcpy(&resp.dst_addr, addr, addr->sa_family == AF_INET ?
698				     sizeof(struct sockaddr_in) :
699				     sizeof(struct sockaddr_in6));
700	if (!ctx->cm_id->device)
701		goto out;
702
703	resp.node_guid = (__force __u64) ctx->cm_id->device->node_guid;
704	resp.port_num = ctx->cm_id->port_num;
705	switch (rdma_node_get_transport(ctx->cm_id->device->node_type)) {
706	case RDMA_TRANSPORT_IB:
707		switch (rdma_port_get_link_layer(ctx->cm_id->device,
708			ctx->cm_id->port_num)) {
709		case IB_LINK_LAYER_INFINIBAND:
710			ucma_copy_ib_route(&resp, &ctx->cm_id->route);
711			break;
712		case IB_LINK_LAYER_ETHERNET:
713			ucma_copy_iboe_route(&resp, &ctx->cm_id->route);
714			break;
715		default:
716			break;
717		}
718		break;
719	case RDMA_TRANSPORT_IWARP:
720		ucma_copy_iw_route(&resp, &ctx->cm_id->route);
721		break;
722	default:
723		break;
724	}
725
726out:
727	if (copy_to_user((void __user *)(unsigned long)cmd.response,
728			 &resp, sizeof(resp)))
729		ret = -EFAULT;
730
731	ucma_put_ctx(ctx);
732	return ret;
733}
734
735static void ucma_copy_conn_param(struct rdma_conn_param *dst,
736				 struct rdma_ucm_conn_param *src)
737{
738	dst->private_data = src->private_data;
739	dst->private_data_len = src->private_data_len;
740	dst->responder_resources =src->responder_resources;
741	dst->initiator_depth = src->initiator_depth;
742	dst->flow_control = src->flow_control;
743	dst->retry_count = src->retry_count;
744	dst->rnr_retry_count = src->rnr_retry_count;
745	dst->srq = src->srq;
746	dst->qp_num = src->qp_num;
747}
748
749static ssize_t ucma_connect(struct ucma_file *file, const char __user *inbuf,
750			    int in_len, int out_len)
751{
752	struct rdma_ucm_connect cmd;
753	struct rdma_conn_param conn_param;
754	struct ucma_context *ctx;
755	int ret;
756
757	if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
758		return -EFAULT;
759
760	if (!cmd.conn_param.valid)
761		return -EINVAL;
762
763	ctx = ucma_get_ctx(file, cmd.id);
764	if (IS_ERR(ctx))
765		return PTR_ERR(ctx);
766
767	ucma_copy_conn_param(&conn_param, &cmd.conn_param);
768	ret = rdma_connect(ctx->cm_id, &conn_param);
769	ucma_put_ctx(ctx);
770	return ret;
771}
772
773static ssize_t ucma_listen(struct ucma_file *file, const char __user *inbuf,
774			   int in_len, int out_len)
775{
776	struct rdma_ucm_listen cmd;
777	struct ucma_context *ctx;
778	int ret;
779
780	if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
781		return -EFAULT;
782
783	ctx = ucma_get_ctx(file, cmd.id);
784	if (IS_ERR(ctx))
785		return PTR_ERR(ctx);
786
787	ctx->backlog = cmd.backlog > 0 && cmd.backlog < max_backlog ?
788		       cmd.backlog : max_backlog;
789	ret = rdma_listen(ctx->cm_id, ctx->backlog);
790	ucma_put_ctx(ctx);
791	return ret;
792}
793
794static ssize_t ucma_accept(struct ucma_file *file, const char __user *inbuf,
795			   int in_len, int out_len)
796{
797	struct rdma_ucm_accept cmd;
798	struct rdma_conn_param conn_param;
799	struct ucma_context *ctx;
800	int ret;
801
802	if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
803		return -EFAULT;
804
805	ctx = ucma_get_ctx(file, cmd.id);
806	if (IS_ERR(ctx))
807		return PTR_ERR(ctx);
808
809	if (cmd.conn_param.valid) {
810		ucma_copy_conn_param(&conn_param, &cmd.conn_param);
811		mutex_lock(&file->mut);
812		ret = rdma_accept(ctx->cm_id, &conn_param);
813		if (!ret)
814			ctx->uid = cmd.uid;
815		mutex_unlock(&file->mut);
816	} else
817		ret = rdma_accept(ctx->cm_id, NULL);
818
819	ucma_put_ctx(ctx);
820	return ret;
821}
822
823static ssize_t ucma_reject(struct ucma_file *file, const char __user *inbuf,
824			   int in_len, int out_len)
825{
826	struct rdma_ucm_reject cmd;
827	struct ucma_context *ctx;
828	int ret;
829
830	if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
831		return -EFAULT;
832
833	ctx = ucma_get_ctx(file, cmd.id);
834	if (IS_ERR(ctx))
835		return PTR_ERR(ctx);
836
837	ret = rdma_reject(ctx->cm_id, cmd.private_data, cmd.private_data_len);
838	ucma_put_ctx(ctx);
839	return ret;
840}
841
842static ssize_t ucma_disconnect(struct ucma_file *file, const char __user *inbuf,
843			       int in_len, int out_len)
844{
845	struct rdma_ucm_disconnect cmd;
846	struct ucma_context *ctx;
847	int ret;
848
849	if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
850		return -EFAULT;
851
852	ctx = ucma_get_ctx(file, cmd.id);
853	if (IS_ERR(ctx))
854		return PTR_ERR(ctx);
855
856	ret = rdma_disconnect(ctx->cm_id);
857	ucma_put_ctx(ctx);
858	return ret;
859}
860
861static ssize_t ucma_init_qp_attr(struct ucma_file *file,
862				 const char __user *inbuf,
863				 int in_len, int out_len)
864{
865	struct rdma_ucm_init_qp_attr cmd;
866	struct ib_uverbs_qp_attr resp;
867	struct ucma_context *ctx;
868	struct ib_qp_attr qp_attr;
869	int ret;
870
871	if (out_len < sizeof(resp))
872		return -ENOSPC;
873
874	if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
875		return -EFAULT;
876
877	ctx = ucma_get_ctx(file, cmd.id);
878	if (IS_ERR(ctx))
879		return PTR_ERR(ctx);
880
881	resp.qp_attr_mask = 0;
882	memset(&qp_attr, 0, sizeof qp_attr);
883	qp_attr.qp_state = cmd.qp_state;
884	ret = rdma_init_qp_attr(ctx->cm_id, &qp_attr, &resp.qp_attr_mask);
885	if (ret)
886		goto out;
887
888	ib_copy_qp_attr_to_user(&resp, &qp_attr);
889	if (copy_to_user((void __user *)(unsigned long)cmd.response,
890			 &resp, sizeof(resp)))
891		ret = -EFAULT;
892
893out:
894	ucma_put_ctx(ctx);
895	return ret;
896}
897
898static int ucma_set_option_id(struct ucma_context *ctx, int optname,
899			      void *optval, size_t optlen)
900{
901	int ret = 0;
902
903	switch (optname) {
904	case RDMA_OPTION_ID_TOS:
905		if (optlen != sizeof(u8)) {
906			ret = -EINVAL;
907			break;
908		}
909		rdma_set_service_type(ctx->cm_id, *((u8 *) optval));
910		break;
911	case RDMA_OPTION_ID_REUSEADDR:
912		if (optlen != sizeof(int)) {
913			ret = -EINVAL;
914			break;
915		}
916		ret = rdma_set_reuseaddr(ctx->cm_id, *((int *) optval) ? 1 : 0);
917		break;
918	default:
919		ret = -ENOSYS;
920	}
921
922	return ret;
923}
924
925static int ucma_set_ib_path(struct ucma_context *ctx,
926			    struct ib_path_rec_data *path_data, size_t optlen)
927{
928	struct ib_sa_path_rec sa_path;
929	struct rdma_cm_event event;
930	int ret;
931
932	if (optlen % sizeof(*path_data))
933		return -EINVAL;
934
935	for (; optlen; optlen -= sizeof(*path_data), path_data++) {
936		if (path_data->flags == (IB_PATH_GMP | IB_PATH_PRIMARY |
937					 IB_PATH_BIDIRECTIONAL))
938			break;
939	}
940
941	if (!optlen)
942		return -EINVAL;
943
944	ib_sa_unpack_path(path_data->path_rec, &sa_path);
945	ret = rdma_set_ib_paths(ctx->cm_id, &sa_path, 1);
946	if (ret)
947		return ret;
948
949	memset(&event, 0, sizeof event);
950	event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
951	return ucma_event_handler(ctx->cm_id, &event);
952}
953
954static int ucma_set_option_ib(struct ucma_context *ctx, int optname,
955			      void *optval, size_t optlen)
956{
957	int ret;
958
959	switch (optname) {
960	case RDMA_OPTION_IB_PATH:
961		ret = ucma_set_ib_path(ctx, optval, optlen);
962		break;
963	default:
964		ret = -ENOSYS;
965	}
966
967	return ret;
968}
969
970static int ucma_set_option_level(struct ucma_context *ctx, int level,
971				 int optname, void *optval, size_t optlen)
972{
973	int ret;
974
975	switch (level) {
976	case RDMA_OPTION_ID:
977		ret = ucma_set_option_id(ctx, optname, optval, optlen);
978		break;
979	case RDMA_OPTION_IB:
980		ret = ucma_set_option_ib(ctx, optname, optval, optlen);
981		break;
982	default:
983		ret = -ENOSYS;
984	}
985
986	return ret;
987}
988
989static ssize_t ucma_set_option(struct ucma_file *file, const char __user *inbuf,
990			       int in_len, int out_len)
991{
992	struct rdma_ucm_set_option cmd;
993	struct ucma_context *ctx;
994	void *optval;
995	int ret;
996
997	if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
998		return -EFAULT;
999
1000	ctx = ucma_get_ctx(file, cmd.id);
1001	if (IS_ERR(ctx))
1002		return PTR_ERR(ctx);
1003
1004	optval = kmalloc(cmd.optlen, GFP_KERNEL);
1005	if (!optval) {
1006		ret = -ENOMEM;
1007		goto out1;
1008	}
1009
1010	if (copy_from_user(optval, (void __user *) (unsigned long) cmd.optval,
1011			   cmd.optlen)) {
1012		ret = -EFAULT;
1013		goto out2;
1014	}
1015
1016	ret = ucma_set_option_level(ctx, cmd.level, cmd.optname, optval,
1017				    cmd.optlen);
1018out2:
1019	kfree(optval);
1020out1:
1021	ucma_put_ctx(ctx);
1022	return ret;
1023}
1024
1025static ssize_t ucma_notify(struct ucma_file *file, const char __user *inbuf,
1026			   int in_len, int out_len)
1027{
1028	struct rdma_ucm_notify cmd;
1029	struct ucma_context *ctx;
1030	int ret;
1031
1032	if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
1033		return -EFAULT;
1034
1035	ctx = ucma_get_ctx(file, cmd.id);
1036	if (IS_ERR(ctx))
1037		return PTR_ERR(ctx);
1038
1039	ret = rdma_notify(ctx->cm_id, (enum ib_event_type) cmd.event);
1040	ucma_put_ctx(ctx);
1041	return ret;
1042}
1043
1044static ssize_t ucma_join_multicast(struct ucma_file *file,
1045				   const char __user *inbuf,
1046				   int in_len, int out_len)
1047{
1048	struct rdma_ucm_join_mcast cmd;
1049	struct rdma_ucm_create_id_resp resp;
1050	struct ucma_context *ctx;
1051	struct ucma_multicast *mc;
1052	int ret;
1053
1054	if (out_len < sizeof(resp))
1055		return -ENOSPC;
1056
1057	if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
1058		return -EFAULT;
1059
1060	ctx = ucma_get_ctx(file, cmd.id);
1061	if (IS_ERR(ctx))
1062		return PTR_ERR(ctx);
1063
1064	mutex_lock(&file->mut);
1065	mc = ucma_alloc_multicast(ctx);
1066	if (!mc) {
1067		ret = -ENOMEM;
1068		goto err1;
1069	}
1070
1071	mc->uid = cmd.uid;
1072	memcpy(&mc->addr, &cmd.addr, sizeof cmd.addr);
1073	ret = rdma_join_multicast(ctx->cm_id, (struct sockaddr *) &mc->addr, mc);
1074	if (ret)
1075		goto err2;
1076
1077	resp.id = mc->id;
1078	if (copy_to_user((void __user *)(unsigned long)cmd.response,
1079			 &resp, sizeof(resp))) {
1080		ret = -EFAULT;
1081		goto err3;
1082	}
1083
1084	mutex_unlock(&file->mut);
1085	ucma_put_ctx(ctx);
1086	return 0;
1087
1088err3:
1089	rdma_leave_multicast(ctx->cm_id, (struct sockaddr *) &mc->addr);
1090	ucma_cleanup_mc_events(mc);
1091err2:
1092	mutex_lock(&mut);
1093	idr_remove(&multicast_idr, mc->id);
1094	mutex_unlock(&mut);
1095	list_del(&mc->list);
1096	kfree(mc);
1097err1:
1098	mutex_unlock(&file->mut);
1099	ucma_put_ctx(ctx);
1100	return ret;
1101}
1102
1103static ssize_t ucma_leave_multicast(struct ucma_file *file,
1104				    const char __user *inbuf,
1105				    int in_len, int out_len)
1106{
1107	struct rdma_ucm_destroy_id cmd;
1108	struct rdma_ucm_destroy_id_resp resp;
1109	struct ucma_multicast *mc;
1110	int ret = 0;
1111
1112	if (out_len < sizeof(resp))
1113		return -ENOSPC;
1114
1115	if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
1116		return -EFAULT;
1117
1118	mutex_lock(&mut);
1119	mc = idr_find(&multicast_idr, cmd.id);
1120	if (!mc)
1121		mc = ERR_PTR(-ENOENT);
1122	else if (mc->ctx->file != file)
1123		mc = ERR_PTR(-EINVAL);
1124	else {
1125		idr_remove(&multicast_idr, mc->id);
1126		atomic_inc(&mc->ctx->ref);
1127	}
1128	mutex_unlock(&mut);
1129
1130	if (IS_ERR(mc)) {
1131		ret = PTR_ERR(mc);
1132		goto out;
1133	}
1134
1135	rdma_leave_multicast(mc->ctx->cm_id, (struct sockaddr *) &mc->addr);
1136	mutex_lock(&mc->ctx->file->mut);
1137	ucma_cleanup_mc_events(mc);
1138	list_del(&mc->list);
1139	mutex_unlock(&mc->ctx->file->mut);
1140
1141	ucma_put_ctx(mc->ctx);
1142	resp.events_reported = mc->events_reported;
1143	kfree(mc);
1144
1145	if (copy_to_user((void __user *)(unsigned long)cmd.response,
1146			 &resp, sizeof(resp)))
1147		ret = -EFAULT;
1148out:
1149	return ret;
1150}
1151
1152static void ucma_lock_files(struct ucma_file *file1, struct ucma_file *file2)
1153{
1154	/* Acquire mutex's based on pointer comparison to prevent deadlock. */
1155	if (file1 < file2) {
1156		mutex_lock(&file1->mut);
1157		mutex_lock(&file2->mut);
1158	} else {
1159		mutex_lock(&file2->mut);
1160		mutex_lock(&file1->mut);
1161	}
1162}
1163
1164static void ucma_unlock_files(struct ucma_file *file1, struct ucma_file *file2)
1165{
1166	if (file1 < file2) {
1167		mutex_unlock(&file2->mut);
1168		mutex_unlock(&file1->mut);
1169	} else {
1170		mutex_unlock(&file1->mut);
1171		mutex_unlock(&file2->mut);
1172	}
1173}
1174
1175static void ucma_move_events(struct ucma_context *ctx, struct ucma_file *file)
1176{
1177	struct ucma_event *uevent, *tmp;
1178
1179	list_for_each_entry_safe(uevent, tmp, &ctx->file->event_list, list)
1180		if (uevent->ctx == ctx)
1181			list_move_tail(&uevent->list, &file->event_list);
1182}
1183
1184static ssize_t ucma_migrate_id(struct ucma_file *new_file,
1185			       const char __user *inbuf,
1186			       int in_len, int out_len)
1187{
1188	struct rdma_ucm_migrate_id cmd;
1189	struct rdma_ucm_migrate_resp resp;
1190	struct ucma_context *ctx;
1191	struct file *filp;
1192	struct ucma_file *cur_file;
1193	int ret = 0;
1194
1195	if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
1196		return -EFAULT;
1197
1198	/* Get current fd to protect against it being closed */
1199	filp = fget(cmd.fd);
1200	if (!filp)
1201		return -ENOENT;
1202
1203	/* Validate current fd and prevent destruction of id. */
1204	ctx = ucma_get_ctx(filp->private_data, cmd.id);
1205	if (IS_ERR(ctx)) {
1206		ret = PTR_ERR(ctx);
1207		goto file_put;
1208	}
1209
1210	cur_file = ctx->file;
1211	if (cur_file == new_file) {
1212		resp.events_reported = ctx->events_reported;
1213		goto response;
1214	}
1215
1216	/*
1217	 * Migrate events between fd's, maintaining order, and avoiding new
1218	 * events being added before existing events.
1219	 */
1220	ucma_lock_files(cur_file, new_file);
1221	mutex_lock(&mut);
1222
1223	list_move_tail(&ctx->list, &new_file->ctx_list);
1224	ucma_move_events(ctx, new_file);
1225	ctx->file = new_file;
1226	resp.events_reported = ctx->events_reported;
1227
1228	mutex_unlock(&mut);
1229	ucma_unlock_files(cur_file, new_file);
1230
1231response:
1232	if (copy_to_user((void __user *)(unsigned long)cmd.response,
1233			 &resp, sizeof(resp)))
1234		ret = -EFAULT;
1235
1236	ucma_put_ctx(ctx);
1237file_put:
1238	fput(filp);
1239	return ret;
1240}
1241
1242static ssize_t (*ucma_cmd_table[])(struct ucma_file *file,
1243				   const char __user *inbuf,
1244				   int in_len, int out_len) = {
1245	[RDMA_USER_CM_CMD_CREATE_ID]	= ucma_create_id,
1246	[RDMA_USER_CM_CMD_DESTROY_ID]	= ucma_destroy_id,
1247	[RDMA_USER_CM_CMD_BIND_ADDR]	= ucma_bind_addr,
1248	[RDMA_USER_CM_CMD_RESOLVE_ADDR]	= ucma_resolve_addr,
1249	[RDMA_USER_CM_CMD_RESOLVE_ROUTE]= ucma_resolve_route,
1250	[RDMA_USER_CM_CMD_QUERY_ROUTE]	= ucma_query_route,
1251	[RDMA_USER_CM_CMD_CONNECT]	= ucma_connect,
1252	[RDMA_USER_CM_CMD_LISTEN]	= ucma_listen,
1253	[RDMA_USER_CM_CMD_ACCEPT]	= ucma_accept,
1254	[RDMA_USER_CM_CMD_REJECT]	= ucma_reject,
1255	[RDMA_USER_CM_CMD_DISCONNECT]	= ucma_disconnect,
1256	[RDMA_USER_CM_CMD_INIT_QP_ATTR]	= ucma_init_qp_attr,
1257	[RDMA_USER_CM_CMD_GET_EVENT]	= ucma_get_event,
1258	[RDMA_USER_CM_CMD_GET_OPTION]	= NULL,
1259	[RDMA_USER_CM_CMD_SET_OPTION]	= ucma_set_option,
1260	[RDMA_USER_CM_CMD_NOTIFY]	= ucma_notify,
1261	[RDMA_USER_CM_CMD_JOIN_MCAST]	= ucma_join_multicast,
1262	[RDMA_USER_CM_CMD_LEAVE_MCAST]	= ucma_leave_multicast,
1263	[RDMA_USER_CM_CMD_MIGRATE_ID]	= ucma_migrate_id
1264};
1265
1266static ssize_t ucma_write(struct file *filp, const char __user *buf,
1267			  size_t len, loff_t *pos)
1268{
1269	struct ucma_file *file = filp->private_data;
1270	struct rdma_ucm_cmd_hdr hdr;
1271	ssize_t ret;
1272
1273	if (len < sizeof(hdr))
1274		return -EINVAL;
1275
1276	if (copy_from_user(&hdr, buf, sizeof(hdr)))
1277		return -EFAULT;
1278
1279	if (hdr.cmd >= ARRAY_SIZE(ucma_cmd_table))
1280		return -EINVAL;
1281
1282	if (hdr.in + sizeof(hdr) > len)
1283		return -EINVAL;
1284
1285	if (!ucma_cmd_table[hdr.cmd])
1286		return -ENOSYS;
1287
1288	ret = ucma_cmd_table[hdr.cmd](file, buf + sizeof(hdr), hdr.in, hdr.out);
1289	if (!ret)
1290		ret = len;
1291
1292	return ret;
1293}
1294
1295static unsigned int ucma_poll(struct file *filp, struct poll_table_struct *wait)
1296{
1297	struct ucma_file *file = filp->private_data;
1298	unsigned int mask = 0;
1299
1300	poll_wait(filp, &file->poll_wait, wait);
1301
1302	if (!list_empty(&file->event_list))
1303		mask = POLLIN | POLLRDNORM;
1304
1305	return mask;
1306}
1307
1308/*
1309 * ucma_open() does not need the BKL:
1310 *
1311 *  - no global state is referred to;
1312 *  - there is no ioctl method to race against;
1313 *  - no further module initialization is required for open to work
1314 *    after the device is registered.
1315 */
1316static int ucma_open(struct inode *inode, struct file *filp)
1317{
1318	struct ucma_file *file;
1319
1320	file = kmalloc(sizeof *file, GFP_KERNEL);
1321	if (!file)
1322		return -ENOMEM;
1323
1324	INIT_LIST_HEAD(&file->event_list);
1325	INIT_LIST_HEAD(&file->ctx_list);
1326	init_waitqueue_head(&file->poll_wait);
1327	mutex_init(&file->mut);
1328
1329	filp->private_data = file;
1330	file->filp = filp;
1331
1332	return nonseekable_open(inode, filp);
1333}
1334
1335static int ucma_close(struct inode *inode, struct file *filp)
1336{
1337	struct ucma_file *file = filp->private_data;
1338	struct ucma_context *ctx, *tmp;
1339
1340	mutex_lock(&file->mut);
1341	list_for_each_entry_safe(ctx, tmp, &file->ctx_list, list) {
1342		mutex_unlock(&file->mut);
1343
1344		mutex_lock(&mut);
1345		idr_remove(&ctx_idr, ctx->id);
1346		mutex_unlock(&mut);
1347
1348		ucma_free_ctx(ctx);
1349		mutex_lock(&file->mut);
1350	}
1351	mutex_unlock(&file->mut);
1352	kfree(file);
1353	return 0;
1354}
1355
1356static const struct file_operations ucma_fops = {
1357	.owner 	 = THIS_MODULE,
1358	.open 	 = ucma_open,
1359	.release = ucma_close,
1360	.write	 = ucma_write,
1361	.poll    = ucma_poll,
1362	.llseek	 = no_llseek,
1363};
1364
1365static struct miscdevice ucma_misc = {
1366	.minor		= MISC_DYNAMIC_MINOR,
1367	.name		= "rdma_cm",
1368	.nodename	= "infiniband/rdma_cm",
1369	.mode		= 0666,
1370	.fops		= &ucma_fops,
1371};
1372
1373static ssize_t show_abi_version(struct device *dev,
1374				struct device_attribute *attr,
1375				char *buf)
1376{
1377	return sprintf(buf, "%d\n", RDMA_USER_CM_ABI_VERSION);
1378}
1379static DEVICE_ATTR(abi_version, S_IRUGO, show_abi_version, NULL);
1380
1381static int __init ucma_init(void)
1382{
1383	int ret;
1384
1385	ret = misc_register(&ucma_misc);
1386	if (ret)
1387		return ret;
1388
1389	ret = device_create_file(ucma_misc.this_device, &dev_attr_abi_version);
1390	if (ret) {
1391		printk(KERN_ERR "rdma_ucm: couldn't create abi_version attr\n");
1392		goto err1;
1393	}
1394
1395	ucma_ctl_table_hdr = register_sysctl_paths(ucma_ctl_path, ucma_ctl_table);
1396	if (!ucma_ctl_table_hdr) {
1397		printk(KERN_ERR "rdma_ucm: couldn't register sysctl paths\n");
1398		ret = -ENOMEM;
1399		goto err2;
1400	}
1401	return 0;
1402err2:
1403	device_remove_file(ucma_misc.this_device, &dev_attr_abi_version);
1404err1:
1405	misc_deregister(&ucma_misc);
1406	return ret;
1407}
1408
1409static void __exit ucma_cleanup(void)
1410{
1411	unregister_sysctl_table(ucma_ctl_table_hdr);
1412	device_remove_file(ucma_misc.this_device, &dev_attr_abi_version);
1413	misc_deregister(&ucma_misc);
1414	idr_destroy(&ctx_idr);
1415}
1416
1417module_init(ucma_init);
1418module_exit(ucma_cleanup);
1419