cma.c revision a81c994d5eef87ed77cb30d8343d6be296528b3f
1/*
2 * Copyright (c) 2005 Voltaire Inc.  All rights reserved.
3 * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved.
4 * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved.
5 * Copyright (c) 2005-2006 Intel Corporation.  All rights reserved.
6 *
7 * This Software is licensed under one of the following licenses:
8 *
9 * 1) under the terms of the "Common Public License 1.0" a copy of which is
10 *    available from the Open Source Initiative, see
11 *    http://www.opensource.org/licenses/cpl.php.
12 *
13 * 2) under the terms of the "The BSD License" a copy of which is
14 *    available from the Open Source Initiative, see
15 *    http://www.opensource.org/licenses/bsd-license.php.
16 *
17 * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
18 *    copy of which is available from the Open Source Initiative, see
19 *    http://www.opensource.org/licenses/gpl-license.php.
20 *
21 * Licensee has the right to choose one of the above licenses.
22 *
23 * Redistributions of source code must retain the above copyright
24 * notice and one of the license notices.
25 *
26 * Redistributions in binary form must reproduce both the above copyright
27 * notice, one of the license notices in the documentation
28 * and/or other materials provided with the distribution.
29 *
30 */
31
32#include <linux/completion.h>
33#include <linux/in.h>
34#include <linux/in6.h>
35#include <linux/mutex.h>
36#include <linux/random.h>
37#include <linux/idr.h>
38#include <linux/inetdevice.h>
39
40#include <net/tcp.h>
41
42#include <rdma/rdma_cm.h>
43#include <rdma/rdma_cm_ib.h>
44#include <rdma/ib_cache.h>
45#include <rdma/ib_cm.h>
46#include <rdma/ib_sa.h>
47#include <rdma/iw_cm.h>
48
49MODULE_AUTHOR("Sean Hefty");
50MODULE_DESCRIPTION("Generic RDMA CM Agent");
51MODULE_LICENSE("Dual BSD/GPL");
52
53#define CMA_CM_RESPONSE_TIMEOUT 20
54#define CMA_MAX_CM_RETRIES 15
55
56static void cma_add_one(struct ib_device *device);
57static void cma_remove_one(struct ib_device *device);
58
59static struct ib_client cma_client = {
60	.name   = "cma",
61	.add    = cma_add_one,
62	.remove = cma_remove_one
63};
64
65static struct ib_sa_client sa_client;
66static struct rdma_addr_client addr_client;
67static LIST_HEAD(dev_list);
68static LIST_HEAD(listen_any_list);
69static DEFINE_MUTEX(lock);
70static struct workqueue_struct *cma_wq;
71static DEFINE_IDR(sdp_ps);
72static DEFINE_IDR(tcp_ps);
73static DEFINE_IDR(udp_ps);
74static DEFINE_IDR(ipoib_ps);
75static int next_port;
76
77struct cma_device {
78	struct list_head	list;
79	struct ib_device	*device;
80	struct completion	comp;
81	atomic_t		refcount;
82	struct list_head	id_list;
83};
84
85enum cma_state {
86	CMA_IDLE,
87	CMA_ADDR_QUERY,
88	CMA_ADDR_RESOLVED,
89	CMA_ROUTE_QUERY,
90	CMA_ROUTE_RESOLVED,
91	CMA_CONNECT,
92	CMA_DISCONNECT,
93	CMA_ADDR_BOUND,
94	CMA_LISTEN,
95	CMA_DEVICE_REMOVAL,
96	CMA_DESTROYING
97};
98
99struct rdma_bind_list {
100	struct idr		*ps;
101	struct hlist_head	owners;
102	unsigned short		port;
103};
104
105/*
106 * Device removal can occur at anytime, so we need extra handling to
107 * serialize notifying the user of device removal with other callbacks.
108 * We do this by disabling removal notification while a callback is in process,
109 * and reporting it after the callback completes.
110 */
111struct rdma_id_private {
112	struct rdma_cm_id	id;
113
114	struct rdma_bind_list	*bind_list;
115	struct hlist_node	node;
116	struct list_head	list;
117	struct list_head	listen_list;
118	struct cma_device	*cma_dev;
119	struct list_head	mc_list;
120
121	enum cma_state		state;
122	spinlock_t		lock;
123	struct completion	comp;
124	atomic_t		refcount;
125	wait_queue_head_t	wait_remove;
126	atomic_t		dev_remove;
127
128	int			backlog;
129	int			timeout_ms;
130	struct ib_sa_query	*query;
131	int			query_id;
132	union {
133		struct ib_cm_id	*ib;
134		struct iw_cm_id	*iw;
135	} cm_id;
136
137	u32			seq_num;
138	u32			qkey;
139	u32			qp_num;
140	u8			srq;
141	u8			tos;
142};
143
144struct cma_multicast {
145	struct rdma_id_private *id_priv;
146	union {
147		struct ib_sa_multicast *ib;
148	} multicast;
149	struct list_head	list;
150	void			*context;
151	struct sockaddr		addr;
152	u8			pad[sizeof(struct sockaddr_in6) -
153				    sizeof(struct sockaddr)];
154};
155
156struct cma_work {
157	struct work_struct	work;
158	struct rdma_id_private	*id;
159	enum cma_state		old_state;
160	enum cma_state		new_state;
161	struct rdma_cm_event	event;
162};
163
164union cma_ip_addr {
165	struct in6_addr ip6;
166	struct {
167		__u32 pad[3];
168		__u32 addr;
169	} ip4;
170};
171
172struct cma_hdr {
173	u8 cma_version;
174	u8 ip_version;	/* IP version: 7:4 */
175	__u16 port;
176	union cma_ip_addr src_addr;
177	union cma_ip_addr dst_addr;
178};
179
180struct sdp_hh {
181	u8 bsdh[16];
182	u8 sdp_version; /* Major version: 7:4 */
183	u8 ip_version;	/* IP version: 7:4 */
184	u8 sdp_specific1[10];
185	__u16 port;
186	__u16 sdp_specific2;
187	union cma_ip_addr src_addr;
188	union cma_ip_addr dst_addr;
189};
190
191struct sdp_hah {
192	u8 bsdh[16];
193	u8 sdp_version;
194};
195
196#define CMA_VERSION 0x00
197#define SDP_MAJ_VERSION 0x2
198
199static int cma_comp(struct rdma_id_private *id_priv, enum cma_state comp)
200{
201	unsigned long flags;
202	int ret;
203
204	spin_lock_irqsave(&id_priv->lock, flags);
205	ret = (id_priv->state == comp);
206	spin_unlock_irqrestore(&id_priv->lock, flags);
207	return ret;
208}
209
210static int cma_comp_exch(struct rdma_id_private *id_priv,
211			 enum cma_state comp, enum cma_state exch)
212{
213	unsigned long flags;
214	int ret;
215
216	spin_lock_irqsave(&id_priv->lock, flags);
217	if ((ret = (id_priv->state == comp)))
218		id_priv->state = exch;
219	spin_unlock_irqrestore(&id_priv->lock, flags);
220	return ret;
221}
222
223static enum cma_state cma_exch(struct rdma_id_private *id_priv,
224			       enum cma_state exch)
225{
226	unsigned long flags;
227	enum cma_state old;
228
229	spin_lock_irqsave(&id_priv->lock, flags);
230	old = id_priv->state;
231	id_priv->state = exch;
232	spin_unlock_irqrestore(&id_priv->lock, flags);
233	return old;
234}
235
236static inline u8 cma_get_ip_ver(struct cma_hdr *hdr)
237{
238	return hdr->ip_version >> 4;
239}
240
241static inline void cma_set_ip_ver(struct cma_hdr *hdr, u8 ip_ver)
242{
243	hdr->ip_version = (ip_ver << 4) | (hdr->ip_version & 0xF);
244}
245
246static inline u8 sdp_get_majv(u8 sdp_version)
247{
248	return sdp_version >> 4;
249}
250
251static inline u8 sdp_get_ip_ver(struct sdp_hh *hh)
252{
253	return hh->ip_version >> 4;
254}
255
256static inline void sdp_set_ip_ver(struct sdp_hh *hh, u8 ip_ver)
257{
258	hh->ip_version = (ip_ver << 4) | (hh->ip_version & 0xF);
259}
260
261static inline int cma_is_ud_ps(enum rdma_port_space ps)
262{
263	return (ps == RDMA_PS_UDP || ps == RDMA_PS_IPOIB);
264}
265
266static void cma_attach_to_dev(struct rdma_id_private *id_priv,
267			      struct cma_device *cma_dev)
268{
269	atomic_inc(&cma_dev->refcount);
270	id_priv->cma_dev = cma_dev;
271	id_priv->id.device = cma_dev->device;
272	list_add_tail(&id_priv->list, &cma_dev->id_list);
273}
274
275static inline void cma_deref_dev(struct cma_device *cma_dev)
276{
277	if (atomic_dec_and_test(&cma_dev->refcount))
278		complete(&cma_dev->comp);
279}
280
281static void cma_detach_from_dev(struct rdma_id_private *id_priv)
282{
283	list_del(&id_priv->list);
284	cma_deref_dev(id_priv->cma_dev);
285	id_priv->cma_dev = NULL;
286}
287
288static int cma_set_qkey(struct ib_device *device, u8 port_num,
289			enum rdma_port_space ps,
290			struct rdma_dev_addr *dev_addr, u32 *qkey)
291{
292	struct ib_sa_mcmember_rec rec;
293	int ret = 0;
294
295	switch (ps) {
296	case RDMA_PS_UDP:
297		*qkey = RDMA_UDP_QKEY;
298		break;
299	case RDMA_PS_IPOIB:
300		ib_addr_get_mgid(dev_addr, &rec.mgid);
301		ret = ib_sa_get_mcmember_rec(device, port_num, &rec.mgid, &rec);
302		*qkey = be32_to_cpu(rec.qkey);
303		break;
304	default:
305		break;
306	}
307	return ret;
308}
309
310static int cma_acquire_dev(struct rdma_id_private *id_priv)
311{
312	struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
313	struct cma_device *cma_dev;
314	union ib_gid gid;
315	int ret = -ENODEV;
316
317	switch (rdma_node_get_transport(dev_addr->dev_type)) {
318	case RDMA_TRANSPORT_IB:
319		ib_addr_get_sgid(dev_addr, &gid);
320		break;
321	case RDMA_TRANSPORT_IWARP:
322		iw_addr_get_sgid(dev_addr, &gid);
323		break;
324	default:
325		return -ENODEV;
326	}
327
328	list_for_each_entry(cma_dev, &dev_list, list) {
329		ret = ib_find_cached_gid(cma_dev->device, &gid,
330					 &id_priv->id.port_num, NULL);
331		if (!ret) {
332			ret = cma_set_qkey(cma_dev->device,
333					   id_priv->id.port_num,
334					   id_priv->id.ps, dev_addr,
335					   &id_priv->qkey);
336			if (!ret)
337				cma_attach_to_dev(id_priv, cma_dev);
338			break;
339		}
340	}
341	return ret;
342}
343
344static void cma_deref_id(struct rdma_id_private *id_priv)
345{
346	if (atomic_dec_and_test(&id_priv->refcount))
347		complete(&id_priv->comp);
348}
349
350static int cma_disable_remove(struct rdma_id_private *id_priv,
351			      enum cma_state state)
352{
353	unsigned long flags;
354	int ret;
355
356	spin_lock_irqsave(&id_priv->lock, flags);
357	if (id_priv->state == state) {
358		atomic_inc(&id_priv->dev_remove);
359		ret = 0;
360	} else
361		ret = -EINVAL;
362	spin_unlock_irqrestore(&id_priv->lock, flags);
363	return ret;
364}
365
366static void cma_enable_remove(struct rdma_id_private *id_priv)
367{
368	if (atomic_dec_and_test(&id_priv->dev_remove))
369		wake_up(&id_priv->wait_remove);
370}
371
372static int cma_has_cm_dev(struct rdma_id_private *id_priv)
373{
374	return (id_priv->id.device && id_priv->cm_id.ib);
375}
376
377struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler,
378				  void *context, enum rdma_port_space ps)
379{
380	struct rdma_id_private *id_priv;
381
382	id_priv = kzalloc(sizeof *id_priv, GFP_KERNEL);
383	if (!id_priv)
384		return ERR_PTR(-ENOMEM);
385
386	id_priv->state = CMA_IDLE;
387	id_priv->id.context = context;
388	id_priv->id.event_handler = event_handler;
389	id_priv->id.ps = ps;
390	spin_lock_init(&id_priv->lock);
391	init_completion(&id_priv->comp);
392	atomic_set(&id_priv->refcount, 1);
393	init_waitqueue_head(&id_priv->wait_remove);
394	atomic_set(&id_priv->dev_remove, 0);
395	INIT_LIST_HEAD(&id_priv->listen_list);
396	INIT_LIST_HEAD(&id_priv->mc_list);
397	get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num);
398
399	return &id_priv->id;
400}
401EXPORT_SYMBOL(rdma_create_id);
402
403static int cma_init_ud_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
404{
405	struct ib_qp_attr qp_attr;
406	int qp_attr_mask, ret;
407
408	qp_attr.qp_state = IB_QPS_INIT;
409	ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
410	if (ret)
411		return ret;
412
413	ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask);
414	if (ret)
415		return ret;
416
417	qp_attr.qp_state = IB_QPS_RTR;
418	ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE);
419	if (ret)
420		return ret;
421
422	qp_attr.qp_state = IB_QPS_RTS;
423	qp_attr.sq_psn = 0;
424	ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE | IB_QP_SQ_PSN);
425
426	return ret;
427}
428
429static int cma_init_conn_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
430{
431	struct ib_qp_attr qp_attr;
432	int qp_attr_mask, ret;
433
434	qp_attr.qp_state = IB_QPS_INIT;
435	ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
436	if (ret)
437		return ret;
438
439	return ib_modify_qp(qp, &qp_attr, qp_attr_mask);
440}
441
442int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd,
443		   struct ib_qp_init_attr *qp_init_attr)
444{
445	struct rdma_id_private *id_priv;
446	struct ib_qp *qp;
447	int ret;
448
449	id_priv = container_of(id, struct rdma_id_private, id);
450	if (id->device != pd->device)
451		return -EINVAL;
452
453	qp = ib_create_qp(pd, qp_init_attr);
454	if (IS_ERR(qp))
455		return PTR_ERR(qp);
456
457	if (cma_is_ud_ps(id_priv->id.ps))
458		ret = cma_init_ud_qp(id_priv, qp);
459	else
460		ret = cma_init_conn_qp(id_priv, qp);
461	if (ret)
462		goto err;
463
464	id->qp = qp;
465	id_priv->qp_num = qp->qp_num;
466	id_priv->srq = (qp->srq != NULL);
467	return 0;
468err:
469	ib_destroy_qp(qp);
470	return ret;
471}
472EXPORT_SYMBOL(rdma_create_qp);
473
474void rdma_destroy_qp(struct rdma_cm_id *id)
475{
476	ib_destroy_qp(id->qp);
477}
478EXPORT_SYMBOL(rdma_destroy_qp);
479
480static int cma_modify_qp_rtr(struct rdma_cm_id *id)
481{
482	struct ib_qp_attr qp_attr;
483	int qp_attr_mask, ret;
484
485	if (!id->qp)
486		return 0;
487
488	/* Need to update QP attributes from default values. */
489	qp_attr.qp_state = IB_QPS_INIT;
490	ret = rdma_init_qp_attr(id, &qp_attr, &qp_attr_mask);
491	if (ret)
492		return ret;
493
494	ret = ib_modify_qp(id->qp, &qp_attr, qp_attr_mask);
495	if (ret)
496		return ret;
497
498	qp_attr.qp_state = IB_QPS_RTR;
499	ret = rdma_init_qp_attr(id, &qp_attr, &qp_attr_mask);
500	if (ret)
501		return ret;
502
503	return ib_modify_qp(id->qp, &qp_attr, qp_attr_mask);
504}
505
506static int cma_modify_qp_rts(struct rdma_cm_id *id)
507{
508	struct ib_qp_attr qp_attr;
509	int qp_attr_mask, ret;
510
511	if (!id->qp)
512		return 0;
513
514	qp_attr.qp_state = IB_QPS_RTS;
515	ret = rdma_init_qp_attr(id, &qp_attr, &qp_attr_mask);
516	if (ret)
517		return ret;
518
519	return ib_modify_qp(id->qp, &qp_attr, qp_attr_mask);
520}
521
522static int cma_modify_qp_err(struct rdma_cm_id *id)
523{
524	struct ib_qp_attr qp_attr;
525
526	if (!id->qp)
527		return 0;
528
529	qp_attr.qp_state = IB_QPS_ERR;
530	return ib_modify_qp(id->qp, &qp_attr, IB_QP_STATE);
531}
532
533static int cma_ib_init_qp_attr(struct rdma_id_private *id_priv,
534			       struct ib_qp_attr *qp_attr, int *qp_attr_mask)
535{
536	struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
537	int ret;
538
539	ret = ib_find_cached_pkey(id_priv->id.device, id_priv->id.port_num,
540				  ib_addr_get_pkey(dev_addr),
541				  &qp_attr->pkey_index);
542	if (ret)
543		return ret;
544
545	qp_attr->port_num = id_priv->id.port_num;
546	*qp_attr_mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT;
547
548	if (cma_is_ud_ps(id_priv->id.ps)) {
549		qp_attr->qkey = id_priv->qkey;
550		*qp_attr_mask |= IB_QP_QKEY;
551	} else {
552		qp_attr->qp_access_flags = 0;
553		*qp_attr_mask |= IB_QP_ACCESS_FLAGS;
554	}
555	return 0;
556}
557
558int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr,
559		       int *qp_attr_mask)
560{
561	struct rdma_id_private *id_priv;
562	int ret = 0;
563
564	id_priv = container_of(id, struct rdma_id_private, id);
565	switch (rdma_node_get_transport(id_priv->id.device->node_type)) {
566	case RDMA_TRANSPORT_IB:
567		if (!id_priv->cm_id.ib || cma_is_ud_ps(id_priv->id.ps))
568			ret = cma_ib_init_qp_attr(id_priv, qp_attr, qp_attr_mask);
569		else
570			ret = ib_cm_init_qp_attr(id_priv->cm_id.ib, qp_attr,
571						 qp_attr_mask);
572		if (qp_attr->qp_state == IB_QPS_RTR)
573			qp_attr->rq_psn = id_priv->seq_num;
574		break;
575	case RDMA_TRANSPORT_IWARP:
576		if (!id_priv->cm_id.iw) {
577			qp_attr->qp_access_flags = 0;
578			*qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS;
579		} else
580			ret = iw_cm_init_qp_attr(id_priv->cm_id.iw, qp_attr,
581						 qp_attr_mask);
582		break;
583	default:
584		ret = -ENOSYS;
585		break;
586	}
587
588	return ret;
589}
590EXPORT_SYMBOL(rdma_init_qp_attr);
591
592static inline int cma_zero_addr(struct sockaddr *addr)
593{
594	struct in6_addr *ip6;
595
596	if (addr->sa_family == AF_INET)
597		return ZERONET(((struct sockaddr_in *) addr)->sin_addr.s_addr);
598	else {
599		ip6 = &((struct sockaddr_in6 *) addr)->sin6_addr;
600		return (ip6->s6_addr32[0] | ip6->s6_addr32[1] |
601			ip6->s6_addr32[2] | ip6->s6_addr32[3]) == 0;
602	}
603}
604
605static inline int cma_loopback_addr(struct sockaddr *addr)
606{
607	return LOOPBACK(((struct sockaddr_in *) addr)->sin_addr.s_addr);
608}
609
610static inline int cma_any_addr(struct sockaddr *addr)
611{
612	return cma_zero_addr(addr) || cma_loopback_addr(addr);
613}
614
615static inline __be16 cma_port(struct sockaddr *addr)
616{
617	if (addr->sa_family == AF_INET)
618		return ((struct sockaddr_in *) addr)->sin_port;
619	else
620		return ((struct sockaddr_in6 *) addr)->sin6_port;
621}
622
623static inline int cma_any_port(struct sockaddr *addr)
624{
625	return !cma_port(addr);
626}
627
628static int cma_get_net_info(void *hdr, enum rdma_port_space ps,
629			    u8 *ip_ver, __u16 *port,
630			    union cma_ip_addr **src, union cma_ip_addr **dst)
631{
632	switch (ps) {
633	case RDMA_PS_SDP:
634		if (sdp_get_majv(((struct sdp_hh *) hdr)->sdp_version) !=
635		    SDP_MAJ_VERSION)
636			return -EINVAL;
637
638		*ip_ver	= sdp_get_ip_ver(hdr);
639		*port	= ((struct sdp_hh *) hdr)->port;
640		*src	= &((struct sdp_hh *) hdr)->src_addr;
641		*dst	= &((struct sdp_hh *) hdr)->dst_addr;
642		break;
643	default:
644		if (((struct cma_hdr *) hdr)->cma_version != CMA_VERSION)
645			return -EINVAL;
646
647		*ip_ver	= cma_get_ip_ver(hdr);
648		*port	= ((struct cma_hdr *) hdr)->port;
649		*src	= &((struct cma_hdr *) hdr)->src_addr;
650		*dst	= &((struct cma_hdr *) hdr)->dst_addr;
651		break;
652	}
653
654	if (*ip_ver != 4 && *ip_ver != 6)
655		return -EINVAL;
656	return 0;
657}
658
659static void cma_save_net_info(struct rdma_addr *addr,
660			      struct rdma_addr *listen_addr,
661			      u8 ip_ver, __u16 port,
662			      union cma_ip_addr *src, union cma_ip_addr *dst)
663{
664	struct sockaddr_in *listen4, *ip4;
665	struct sockaddr_in6 *listen6, *ip6;
666
667	switch (ip_ver) {
668	case 4:
669		listen4 = (struct sockaddr_in *) &listen_addr->src_addr;
670		ip4 = (struct sockaddr_in *) &addr->src_addr;
671		ip4->sin_family = listen4->sin_family;
672		ip4->sin_addr.s_addr = dst->ip4.addr;
673		ip4->sin_port = listen4->sin_port;
674
675		ip4 = (struct sockaddr_in *) &addr->dst_addr;
676		ip4->sin_family = listen4->sin_family;
677		ip4->sin_addr.s_addr = src->ip4.addr;
678		ip4->sin_port = port;
679		break;
680	case 6:
681		listen6 = (struct sockaddr_in6 *) &listen_addr->src_addr;
682		ip6 = (struct sockaddr_in6 *) &addr->src_addr;
683		ip6->sin6_family = listen6->sin6_family;
684		ip6->sin6_addr = dst->ip6;
685		ip6->sin6_port = listen6->sin6_port;
686
687		ip6 = (struct sockaddr_in6 *) &addr->dst_addr;
688		ip6->sin6_family = listen6->sin6_family;
689		ip6->sin6_addr = src->ip6;
690		ip6->sin6_port = port;
691		break;
692	default:
693		break;
694	}
695}
696
697static inline int cma_user_data_offset(enum rdma_port_space ps)
698{
699	switch (ps) {
700	case RDMA_PS_SDP:
701		return 0;
702	default:
703		return sizeof(struct cma_hdr);
704	}
705}
706
707static void cma_cancel_route(struct rdma_id_private *id_priv)
708{
709	switch (rdma_node_get_transport(id_priv->id.device->node_type)) {
710	case RDMA_TRANSPORT_IB:
711		if (id_priv->query)
712			ib_sa_cancel_query(id_priv->query_id, id_priv->query);
713		break;
714	default:
715		break;
716	}
717}
718
719static inline int cma_internal_listen(struct rdma_id_private *id_priv)
720{
721	return (id_priv->state == CMA_LISTEN) && id_priv->cma_dev &&
722	       cma_any_addr(&id_priv->id.route.addr.src_addr);
723}
724
725static void cma_destroy_listen(struct rdma_id_private *id_priv)
726{
727	cma_exch(id_priv, CMA_DESTROYING);
728
729	if (id_priv->cma_dev) {
730		switch (rdma_node_get_transport(id_priv->id.device->node_type)) {
731		case RDMA_TRANSPORT_IB:
732			if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib))
733				ib_destroy_cm_id(id_priv->cm_id.ib);
734			break;
735		case RDMA_TRANSPORT_IWARP:
736			if (id_priv->cm_id.iw && !IS_ERR(id_priv->cm_id.iw))
737				iw_destroy_cm_id(id_priv->cm_id.iw);
738			break;
739		default:
740			break;
741		}
742		cma_detach_from_dev(id_priv);
743	}
744	list_del(&id_priv->listen_list);
745
746	cma_deref_id(id_priv);
747	wait_for_completion(&id_priv->comp);
748
749	kfree(id_priv);
750}
751
752static void cma_cancel_listens(struct rdma_id_private *id_priv)
753{
754	struct rdma_id_private *dev_id_priv;
755
756	mutex_lock(&lock);
757	list_del(&id_priv->list);
758
759	while (!list_empty(&id_priv->listen_list)) {
760		dev_id_priv = list_entry(id_priv->listen_list.next,
761					 struct rdma_id_private, listen_list);
762		cma_destroy_listen(dev_id_priv);
763	}
764	mutex_unlock(&lock);
765}
766
767static void cma_cancel_operation(struct rdma_id_private *id_priv,
768				 enum cma_state state)
769{
770	switch (state) {
771	case CMA_ADDR_QUERY:
772		rdma_addr_cancel(&id_priv->id.route.addr.dev_addr);
773		break;
774	case CMA_ROUTE_QUERY:
775		cma_cancel_route(id_priv);
776		break;
777	case CMA_LISTEN:
778		if (cma_any_addr(&id_priv->id.route.addr.src_addr) &&
779		    !id_priv->cma_dev)
780			cma_cancel_listens(id_priv);
781		break;
782	default:
783		break;
784	}
785}
786
787static void cma_release_port(struct rdma_id_private *id_priv)
788{
789	struct rdma_bind_list *bind_list = id_priv->bind_list;
790
791	if (!bind_list)
792		return;
793
794	mutex_lock(&lock);
795	hlist_del(&id_priv->node);
796	if (hlist_empty(&bind_list->owners)) {
797		idr_remove(bind_list->ps, bind_list->port);
798		kfree(bind_list);
799	}
800	mutex_unlock(&lock);
801}
802
803static void cma_leave_mc_groups(struct rdma_id_private *id_priv)
804{
805	struct cma_multicast *mc;
806
807	while (!list_empty(&id_priv->mc_list)) {
808		mc = container_of(id_priv->mc_list.next,
809				  struct cma_multicast, list);
810		list_del(&mc->list);
811		ib_sa_free_multicast(mc->multicast.ib);
812		kfree(mc);
813	}
814}
815
816void rdma_destroy_id(struct rdma_cm_id *id)
817{
818	struct rdma_id_private *id_priv;
819	enum cma_state state;
820
821	id_priv = container_of(id, struct rdma_id_private, id);
822	state = cma_exch(id_priv, CMA_DESTROYING);
823	cma_cancel_operation(id_priv, state);
824
825	mutex_lock(&lock);
826	if (id_priv->cma_dev) {
827		mutex_unlock(&lock);
828		switch (rdma_node_get_transport(id->device->node_type)) {
829		case RDMA_TRANSPORT_IB:
830			if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib))
831				ib_destroy_cm_id(id_priv->cm_id.ib);
832			break;
833		case RDMA_TRANSPORT_IWARP:
834			if (id_priv->cm_id.iw && !IS_ERR(id_priv->cm_id.iw))
835				iw_destroy_cm_id(id_priv->cm_id.iw);
836			break;
837		default:
838			break;
839		}
840		cma_leave_mc_groups(id_priv);
841		mutex_lock(&lock);
842		cma_detach_from_dev(id_priv);
843	}
844	mutex_unlock(&lock);
845
846	cma_release_port(id_priv);
847	cma_deref_id(id_priv);
848	wait_for_completion(&id_priv->comp);
849
850	kfree(id_priv->id.route.path_rec);
851	kfree(id_priv);
852}
853EXPORT_SYMBOL(rdma_destroy_id);
854
855static int cma_rep_recv(struct rdma_id_private *id_priv)
856{
857	int ret;
858
859	ret = cma_modify_qp_rtr(&id_priv->id);
860	if (ret)
861		goto reject;
862
863	ret = cma_modify_qp_rts(&id_priv->id);
864	if (ret)
865		goto reject;
866
867	ret = ib_send_cm_rtu(id_priv->cm_id.ib, NULL, 0);
868	if (ret)
869		goto reject;
870
871	return 0;
872reject:
873	cma_modify_qp_err(&id_priv->id);
874	ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED,
875		       NULL, 0, NULL, 0);
876	return ret;
877}
878
879static int cma_verify_rep(struct rdma_id_private *id_priv, void *data)
880{
881	if (id_priv->id.ps == RDMA_PS_SDP &&
882	    sdp_get_majv(((struct sdp_hah *) data)->sdp_version) !=
883	    SDP_MAJ_VERSION)
884		return -EINVAL;
885
886	return 0;
887}
888
889static void cma_set_rep_event_data(struct rdma_cm_event *event,
890				   struct ib_cm_rep_event_param *rep_data,
891				   void *private_data)
892{
893	event->param.conn.private_data = private_data;
894	event->param.conn.private_data_len = IB_CM_REP_PRIVATE_DATA_SIZE;
895	event->param.conn.responder_resources = rep_data->responder_resources;
896	event->param.conn.initiator_depth = rep_data->initiator_depth;
897	event->param.conn.flow_control = rep_data->flow_control;
898	event->param.conn.rnr_retry_count = rep_data->rnr_retry_count;
899	event->param.conn.srq = rep_data->srq;
900	event->param.conn.qp_num = rep_data->remote_qpn;
901}
902
903static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
904{
905	struct rdma_id_private *id_priv = cm_id->context;
906	struct rdma_cm_event event;
907	int ret = 0;
908
909	if (cma_disable_remove(id_priv, CMA_CONNECT))
910		return 0;
911
912	memset(&event, 0, sizeof event);
913	switch (ib_event->event) {
914	case IB_CM_REQ_ERROR:
915	case IB_CM_REP_ERROR:
916		event.event = RDMA_CM_EVENT_UNREACHABLE;
917		event.status = -ETIMEDOUT;
918		break;
919	case IB_CM_REP_RECEIVED:
920		event.status = cma_verify_rep(id_priv, ib_event->private_data);
921		if (event.status)
922			event.event = RDMA_CM_EVENT_CONNECT_ERROR;
923		else if (id_priv->id.qp && id_priv->id.ps != RDMA_PS_SDP) {
924			event.status = cma_rep_recv(id_priv);
925			event.event = event.status ? RDMA_CM_EVENT_CONNECT_ERROR :
926						     RDMA_CM_EVENT_ESTABLISHED;
927		} else
928			event.event = RDMA_CM_EVENT_CONNECT_RESPONSE;
929		cma_set_rep_event_data(&event, &ib_event->param.rep_rcvd,
930				       ib_event->private_data);
931		break;
932	case IB_CM_RTU_RECEIVED:
933	case IB_CM_USER_ESTABLISHED:
934		event.event = RDMA_CM_EVENT_ESTABLISHED;
935		break;
936	case IB_CM_DREQ_ERROR:
937		event.status = -ETIMEDOUT; /* fall through */
938	case IB_CM_DREQ_RECEIVED:
939	case IB_CM_DREP_RECEIVED:
940		if (!cma_comp_exch(id_priv, CMA_CONNECT, CMA_DISCONNECT))
941			goto out;
942		event.event = RDMA_CM_EVENT_DISCONNECTED;
943		break;
944	case IB_CM_TIMEWAIT_EXIT:
945	case IB_CM_MRA_RECEIVED:
946		/* ignore event */
947		goto out;
948	case IB_CM_REJ_RECEIVED:
949		cma_modify_qp_err(&id_priv->id);
950		event.status = ib_event->param.rej_rcvd.reason;
951		event.event = RDMA_CM_EVENT_REJECTED;
952		event.param.conn.private_data = ib_event->private_data;
953		event.param.conn.private_data_len = IB_CM_REJ_PRIVATE_DATA_SIZE;
954		break;
955	default:
956		printk(KERN_ERR "RDMA CMA: unexpected IB CM event: %d",
957		       ib_event->event);
958		goto out;
959	}
960
961	ret = id_priv->id.event_handler(&id_priv->id, &event);
962	if (ret) {
963		/* Destroy the CM ID by returning a non-zero value. */
964		id_priv->cm_id.ib = NULL;
965		cma_exch(id_priv, CMA_DESTROYING);
966		cma_enable_remove(id_priv);
967		rdma_destroy_id(&id_priv->id);
968		return ret;
969	}
970out:
971	cma_enable_remove(id_priv);
972	return ret;
973}
974
975static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id,
976					       struct ib_cm_event *ib_event)
977{
978	struct rdma_id_private *id_priv;
979	struct rdma_cm_id *id;
980	struct rdma_route *rt;
981	union cma_ip_addr *src, *dst;
982	__u16 port;
983	u8 ip_ver;
984
985	if (cma_get_net_info(ib_event->private_data, listen_id->ps,
986			     &ip_ver, &port, &src, &dst))
987		goto err;
988
989	id = rdma_create_id(listen_id->event_handler, listen_id->context,
990			    listen_id->ps);
991	if (IS_ERR(id))
992		goto err;
993
994	cma_save_net_info(&id->route.addr, &listen_id->route.addr,
995			  ip_ver, port, src, dst);
996
997	rt = &id->route;
998	rt->num_paths = ib_event->param.req_rcvd.alternate_path ? 2 : 1;
999	rt->path_rec = kmalloc(sizeof *rt->path_rec * rt->num_paths,
1000			       GFP_KERNEL);
1001	if (!rt->path_rec)
1002		goto destroy_id;
1003
1004	rt->path_rec[0] = *ib_event->param.req_rcvd.primary_path;
1005	if (rt->num_paths == 2)
1006		rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path;
1007
1008	ib_addr_set_sgid(&rt->addr.dev_addr, &rt->path_rec[0].sgid);
1009	ib_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid);
1010	ib_addr_set_pkey(&rt->addr.dev_addr, be16_to_cpu(rt->path_rec[0].pkey));
1011	rt->addr.dev_addr.dev_type = RDMA_NODE_IB_CA;
1012
1013	id_priv = container_of(id, struct rdma_id_private, id);
1014	id_priv->state = CMA_CONNECT;
1015	return id_priv;
1016
1017destroy_id:
1018	rdma_destroy_id(id);
1019err:
1020	return NULL;
1021}
1022
1023static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id,
1024					      struct ib_cm_event *ib_event)
1025{
1026	struct rdma_id_private *id_priv;
1027	struct rdma_cm_id *id;
1028	union cma_ip_addr *src, *dst;
1029	__u16 port;
1030	u8 ip_ver;
1031	int ret;
1032
1033	id = rdma_create_id(listen_id->event_handler, listen_id->context,
1034			    listen_id->ps);
1035	if (IS_ERR(id))
1036		return NULL;
1037
1038
1039	if (cma_get_net_info(ib_event->private_data, listen_id->ps,
1040			     &ip_ver, &port, &src, &dst))
1041		goto err;
1042
1043	cma_save_net_info(&id->route.addr, &listen_id->route.addr,
1044			  ip_ver, port, src, dst);
1045
1046	ret = rdma_translate_ip(&id->route.addr.src_addr,
1047				&id->route.addr.dev_addr);
1048	if (ret)
1049		goto err;
1050
1051	id_priv = container_of(id, struct rdma_id_private, id);
1052	id_priv->state = CMA_CONNECT;
1053	return id_priv;
1054err:
1055	rdma_destroy_id(id);
1056	return NULL;
1057}
1058
1059static void cma_set_req_event_data(struct rdma_cm_event *event,
1060				   struct ib_cm_req_event_param *req_data,
1061				   void *private_data, int offset)
1062{
1063	event->param.conn.private_data = private_data + offset;
1064	event->param.conn.private_data_len = IB_CM_REQ_PRIVATE_DATA_SIZE - offset;
1065	event->param.conn.responder_resources = req_data->responder_resources;
1066	event->param.conn.initiator_depth = req_data->initiator_depth;
1067	event->param.conn.flow_control = req_data->flow_control;
1068	event->param.conn.retry_count = req_data->retry_count;
1069	event->param.conn.rnr_retry_count = req_data->rnr_retry_count;
1070	event->param.conn.srq = req_data->srq;
1071	event->param.conn.qp_num = req_data->remote_qpn;
1072}
1073
1074static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
1075{
1076	struct rdma_id_private *listen_id, *conn_id;
1077	struct rdma_cm_event event;
1078	int offset, ret;
1079
1080	listen_id = cm_id->context;
1081	if (cma_disable_remove(listen_id, CMA_LISTEN))
1082		return -ECONNABORTED;
1083
1084	memset(&event, 0, sizeof event);
1085	offset = cma_user_data_offset(listen_id->id.ps);
1086	event.event = RDMA_CM_EVENT_CONNECT_REQUEST;
1087	if (cma_is_ud_ps(listen_id->id.ps)) {
1088		conn_id = cma_new_udp_id(&listen_id->id, ib_event);
1089		event.param.ud.private_data = ib_event->private_data + offset;
1090		event.param.ud.private_data_len =
1091				IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE - offset;
1092	} else {
1093		conn_id = cma_new_conn_id(&listen_id->id, ib_event);
1094		cma_set_req_event_data(&event, &ib_event->param.req_rcvd,
1095				       ib_event->private_data, offset);
1096	}
1097	if (!conn_id) {
1098		ret = -ENOMEM;
1099		goto out;
1100	}
1101
1102	atomic_inc(&conn_id->dev_remove);
1103	mutex_lock(&lock);
1104	ret = cma_acquire_dev(conn_id);
1105	mutex_unlock(&lock);
1106	if (ret)
1107		goto release_conn_id;
1108
1109	conn_id->cm_id.ib = cm_id;
1110	cm_id->context = conn_id;
1111	cm_id->cm_handler = cma_ib_handler;
1112
1113	ret = conn_id->id.event_handler(&conn_id->id, &event);
1114	if (!ret)
1115		goto out;
1116
1117	/* Destroy the CM ID by returning a non-zero value. */
1118	conn_id->cm_id.ib = NULL;
1119
1120release_conn_id:
1121	cma_exch(conn_id, CMA_DESTROYING);
1122	cma_enable_remove(conn_id);
1123	rdma_destroy_id(&conn_id->id);
1124
1125out:
1126	cma_enable_remove(listen_id);
1127	return ret;
1128}
1129
1130static __be64 cma_get_service_id(enum rdma_port_space ps, struct sockaddr *addr)
1131{
1132	return cpu_to_be64(((u64)ps << 16) + be16_to_cpu(cma_port(addr)));
1133}
1134
1135static void cma_set_compare_data(enum rdma_port_space ps, struct sockaddr *addr,
1136				 struct ib_cm_compare_data *compare)
1137{
1138	struct cma_hdr *cma_data, *cma_mask;
1139	struct sdp_hh *sdp_data, *sdp_mask;
1140	__u32 ip4_addr;
1141	struct in6_addr ip6_addr;
1142
1143	memset(compare, 0, sizeof *compare);
1144	cma_data = (void *) compare->data;
1145	cma_mask = (void *) compare->mask;
1146	sdp_data = (void *) compare->data;
1147	sdp_mask = (void *) compare->mask;
1148
1149	switch (addr->sa_family) {
1150	case AF_INET:
1151		ip4_addr = ((struct sockaddr_in *) addr)->sin_addr.s_addr;
1152		if (ps == RDMA_PS_SDP) {
1153			sdp_set_ip_ver(sdp_data, 4);
1154			sdp_set_ip_ver(sdp_mask, 0xF);
1155			sdp_data->dst_addr.ip4.addr = ip4_addr;
1156			sdp_mask->dst_addr.ip4.addr = ~0;
1157		} else {
1158			cma_set_ip_ver(cma_data, 4);
1159			cma_set_ip_ver(cma_mask, 0xF);
1160			cma_data->dst_addr.ip4.addr = ip4_addr;
1161			cma_mask->dst_addr.ip4.addr = ~0;
1162		}
1163		break;
1164	case AF_INET6:
1165		ip6_addr = ((struct sockaddr_in6 *) addr)->sin6_addr;
1166		if (ps == RDMA_PS_SDP) {
1167			sdp_set_ip_ver(sdp_data, 6);
1168			sdp_set_ip_ver(sdp_mask, 0xF);
1169			sdp_data->dst_addr.ip6 = ip6_addr;
1170			memset(&sdp_mask->dst_addr.ip6, 0xFF,
1171			       sizeof sdp_mask->dst_addr.ip6);
1172		} else {
1173			cma_set_ip_ver(cma_data, 6);
1174			cma_set_ip_ver(cma_mask, 0xF);
1175			cma_data->dst_addr.ip6 = ip6_addr;
1176			memset(&cma_mask->dst_addr.ip6, 0xFF,
1177			       sizeof cma_mask->dst_addr.ip6);
1178		}
1179		break;
1180	default:
1181		break;
1182	}
1183}
1184
1185static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)
1186{
1187	struct rdma_id_private *id_priv = iw_id->context;
1188	struct rdma_cm_event event;
1189	struct sockaddr_in *sin;
1190	int ret = 0;
1191
1192	if (cma_disable_remove(id_priv, CMA_CONNECT))
1193		return 0;
1194
1195	memset(&event, 0, sizeof event);
1196	switch (iw_event->event) {
1197	case IW_CM_EVENT_CLOSE:
1198		event.event = RDMA_CM_EVENT_DISCONNECTED;
1199		break;
1200	case IW_CM_EVENT_CONNECT_REPLY:
1201		sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
1202		*sin = iw_event->local_addr;
1203		sin = (struct sockaddr_in *) &id_priv->id.route.addr.dst_addr;
1204		*sin = iw_event->remote_addr;
1205		switch (iw_event->status) {
1206		case 0:
1207			event.event = RDMA_CM_EVENT_ESTABLISHED;
1208			break;
1209		case -ECONNRESET:
1210		case -ECONNREFUSED:
1211			event.event = RDMA_CM_EVENT_REJECTED;
1212			break;
1213		case -ETIMEDOUT:
1214			event.event = RDMA_CM_EVENT_UNREACHABLE;
1215			break;
1216		default:
1217			event.event = RDMA_CM_EVENT_CONNECT_ERROR;
1218			break;
1219		}
1220		break;
1221	case IW_CM_EVENT_ESTABLISHED:
1222		event.event = RDMA_CM_EVENT_ESTABLISHED;
1223		break;
1224	default:
1225		BUG_ON(1);
1226	}
1227
1228	event.status = iw_event->status;
1229	event.param.conn.private_data = iw_event->private_data;
1230	event.param.conn.private_data_len = iw_event->private_data_len;
1231	ret = id_priv->id.event_handler(&id_priv->id, &event);
1232	if (ret) {
1233		/* Destroy the CM ID by returning a non-zero value. */
1234		id_priv->cm_id.iw = NULL;
1235		cma_exch(id_priv, CMA_DESTROYING);
1236		cma_enable_remove(id_priv);
1237		rdma_destroy_id(&id_priv->id);
1238		return ret;
1239	}
1240
1241	cma_enable_remove(id_priv);
1242	return ret;
1243}
1244
1245static int iw_conn_req_handler(struct iw_cm_id *cm_id,
1246			       struct iw_cm_event *iw_event)
1247{
1248	struct rdma_cm_id *new_cm_id;
1249	struct rdma_id_private *listen_id, *conn_id;
1250	struct sockaddr_in *sin;
1251	struct net_device *dev = NULL;
1252	struct rdma_cm_event event;
1253	int ret;
1254
1255	listen_id = cm_id->context;
1256	if (cma_disable_remove(listen_id, CMA_LISTEN))
1257		return -ECONNABORTED;
1258
1259	/* Create a new RDMA id for the new IW CM ID */
1260	new_cm_id = rdma_create_id(listen_id->id.event_handler,
1261				   listen_id->id.context,
1262				   RDMA_PS_TCP);
1263	if (!new_cm_id) {
1264		ret = -ENOMEM;
1265		goto out;
1266	}
1267	conn_id = container_of(new_cm_id, struct rdma_id_private, id);
1268	atomic_inc(&conn_id->dev_remove);
1269	conn_id->state = CMA_CONNECT;
1270
1271	dev = ip_dev_find(iw_event->local_addr.sin_addr.s_addr);
1272	if (!dev) {
1273		ret = -EADDRNOTAVAIL;
1274		cma_enable_remove(conn_id);
1275		rdma_destroy_id(new_cm_id);
1276		goto out;
1277	}
1278	ret = rdma_copy_addr(&conn_id->id.route.addr.dev_addr, dev, NULL);
1279	if (ret) {
1280		cma_enable_remove(conn_id);
1281		rdma_destroy_id(new_cm_id);
1282		goto out;
1283	}
1284
1285	mutex_lock(&lock);
1286	ret = cma_acquire_dev(conn_id);
1287	mutex_unlock(&lock);
1288	if (ret) {
1289		cma_enable_remove(conn_id);
1290		rdma_destroy_id(new_cm_id);
1291		goto out;
1292	}
1293
1294	conn_id->cm_id.iw = cm_id;
1295	cm_id->context = conn_id;
1296	cm_id->cm_handler = cma_iw_handler;
1297
1298	sin = (struct sockaddr_in *) &new_cm_id->route.addr.src_addr;
1299	*sin = iw_event->local_addr;
1300	sin = (struct sockaddr_in *) &new_cm_id->route.addr.dst_addr;
1301	*sin = iw_event->remote_addr;
1302
1303	memset(&event, 0, sizeof event);
1304	event.event = RDMA_CM_EVENT_CONNECT_REQUEST;
1305	event.param.conn.private_data = iw_event->private_data;
1306	event.param.conn.private_data_len = iw_event->private_data_len;
1307	ret = conn_id->id.event_handler(&conn_id->id, &event);
1308	if (ret) {
1309		/* User wants to destroy the CM ID */
1310		conn_id->cm_id.iw = NULL;
1311		cma_exch(conn_id, CMA_DESTROYING);
1312		cma_enable_remove(conn_id);
1313		rdma_destroy_id(&conn_id->id);
1314	}
1315
1316out:
1317	if (dev)
1318		dev_put(dev);
1319	cma_enable_remove(listen_id);
1320	return ret;
1321}
1322
1323static int cma_ib_listen(struct rdma_id_private *id_priv)
1324{
1325	struct ib_cm_compare_data compare_data;
1326	struct sockaddr *addr;
1327	__be64 svc_id;
1328	int ret;
1329
1330	id_priv->cm_id.ib = ib_create_cm_id(id_priv->id.device, cma_req_handler,
1331					    id_priv);
1332	if (IS_ERR(id_priv->cm_id.ib))
1333		return PTR_ERR(id_priv->cm_id.ib);
1334
1335	addr = &id_priv->id.route.addr.src_addr;
1336	svc_id = cma_get_service_id(id_priv->id.ps, addr);
1337	if (cma_any_addr(addr))
1338		ret = ib_cm_listen(id_priv->cm_id.ib, svc_id, 0, NULL);
1339	else {
1340		cma_set_compare_data(id_priv->id.ps, addr, &compare_data);
1341		ret = ib_cm_listen(id_priv->cm_id.ib, svc_id, 0, &compare_data);
1342	}
1343
1344	if (ret) {
1345		ib_destroy_cm_id(id_priv->cm_id.ib);
1346		id_priv->cm_id.ib = NULL;
1347	}
1348
1349	return ret;
1350}
1351
1352static int cma_iw_listen(struct rdma_id_private *id_priv, int backlog)
1353{
1354	int ret;
1355	struct sockaddr_in *sin;
1356
1357	id_priv->cm_id.iw = iw_create_cm_id(id_priv->id.device,
1358					    iw_conn_req_handler,
1359					    id_priv);
1360	if (IS_ERR(id_priv->cm_id.iw))
1361		return PTR_ERR(id_priv->cm_id.iw);
1362
1363	sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
1364	id_priv->cm_id.iw->local_addr = *sin;
1365
1366	ret = iw_cm_listen(id_priv->cm_id.iw, backlog);
1367
1368	if (ret) {
1369		iw_destroy_cm_id(id_priv->cm_id.iw);
1370		id_priv->cm_id.iw = NULL;
1371	}
1372
1373	return ret;
1374}
1375
1376static int cma_listen_handler(struct rdma_cm_id *id,
1377			      struct rdma_cm_event *event)
1378{
1379	struct rdma_id_private *id_priv = id->context;
1380
1381	id->context = id_priv->id.context;
1382	id->event_handler = id_priv->id.event_handler;
1383	return id_priv->id.event_handler(id, event);
1384}
1385
1386static void cma_listen_on_dev(struct rdma_id_private *id_priv,
1387			      struct cma_device *cma_dev)
1388{
1389	struct rdma_id_private *dev_id_priv;
1390	struct rdma_cm_id *id;
1391	int ret;
1392
1393	id = rdma_create_id(cma_listen_handler, id_priv, id_priv->id.ps);
1394	if (IS_ERR(id))
1395		return;
1396
1397	dev_id_priv = container_of(id, struct rdma_id_private, id);
1398
1399	dev_id_priv->state = CMA_ADDR_BOUND;
1400	memcpy(&id->route.addr.src_addr, &id_priv->id.route.addr.src_addr,
1401	       ip_addr_size(&id_priv->id.route.addr.src_addr));
1402
1403	cma_attach_to_dev(dev_id_priv, cma_dev);
1404	list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list);
1405
1406	ret = rdma_listen(id, id_priv->backlog);
1407	if (ret)
1408		goto err;
1409
1410	return;
1411err:
1412	cma_destroy_listen(dev_id_priv);
1413}
1414
1415static void cma_listen_on_all(struct rdma_id_private *id_priv)
1416{
1417	struct cma_device *cma_dev;
1418
1419	mutex_lock(&lock);
1420	list_add_tail(&id_priv->list, &listen_any_list);
1421	list_for_each_entry(cma_dev, &dev_list, list)
1422		cma_listen_on_dev(id_priv, cma_dev);
1423	mutex_unlock(&lock);
1424}
1425
1426static int cma_bind_any(struct rdma_cm_id *id, sa_family_t af)
1427{
1428	struct sockaddr_in addr_in;
1429
1430	memset(&addr_in, 0, sizeof addr_in);
1431	addr_in.sin_family = af;
1432	return rdma_bind_addr(id, (struct sockaddr *) &addr_in);
1433}
1434
1435int rdma_listen(struct rdma_cm_id *id, int backlog)
1436{
1437	struct rdma_id_private *id_priv;
1438	int ret;
1439
1440	id_priv = container_of(id, struct rdma_id_private, id);
1441	if (id_priv->state == CMA_IDLE) {
1442		ret = cma_bind_any(id, AF_INET);
1443		if (ret)
1444			return ret;
1445	}
1446
1447	if (!cma_comp_exch(id_priv, CMA_ADDR_BOUND, CMA_LISTEN))
1448		return -EINVAL;
1449
1450	id_priv->backlog = backlog;
1451	if (id->device) {
1452		switch (rdma_node_get_transport(id->device->node_type)) {
1453		case RDMA_TRANSPORT_IB:
1454			ret = cma_ib_listen(id_priv);
1455			if (ret)
1456				goto err;
1457			break;
1458		case RDMA_TRANSPORT_IWARP:
1459			ret = cma_iw_listen(id_priv, backlog);
1460			if (ret)
1461				goto err;
1462			break;
1463		default:
1464			ret = -ENOSYS;
1465			goto err;
1466		}
1467	} else
1468		cma_listen_on_all(id_priv);
1469
1470	return 0;
1471err:
1472	id_priv->backlog = 0;
1473	cma_comp_exch(id_priv, CMA_LISTEN, CMA_ADDR_BOUND);
1474	return ret;
1475}
1476EXPORT_SYMBOL(rdma_listen);
1477
1478void rdma_set_service_type(struct rdma_cm_id *id, int tos)
1479{
1480	struct rdma_id_private *id_priv;
1481
1482	id_priv = container_of(id, struct rdma_id_private, id);
1483	id_priv->tos = (u8) tos;
1484}
1485EXPORT_SYMBOL(rdma_set_service_type);
1486
1487static void cma_query_handler(int status, struct ib_sa_path_rec *path_rec,
1488			      void *context)
1489{
1490	struct cma_work *work = context;
1491	struct rdma_route *route;
1492
1493	route = &work->id->id.route;
1494
1495	if (!status) {
1496		route->num_paths = 1;
1497		*route->path_rec = *path_rec;
1498	} else {
1499		work->old_state = CMA_ROUTE_QUERY;
1500		work->new_state = CMA_ADDR_RESOLVED;
1501		work->event.event = RDMA_CM_EVENT_ROUTE_ERROR;
1502		work->event.status = status;
1503	}
1504
1505	queue_work(cma_wq, &work->work);
1506}
1507
1508static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms,
1509			      struct cma_work *work)
1510{
1511	struct rdma_addr *addr = &id_priv->id.route.addr;
1512	struct ib_sa_path_rec path_rec;
1513	ib_sa_comp_mask comp_mask;
1514	struct sockaddr_in6 *sin6;
1515
1516	memset(&path_rec, 0, sizeof path_rec);
1517	ib_addr_get_sgid(&addr->dev_addr, &path_rec.sgid);
1518	ib_addr_get_dgid(&addr->dev_addr, &path_rec.dgid);
1519	path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(&addr->dev_addr));
1520	path_rec.numb_path = 1;
1521	path_rec.reversible = 1;
1522	path_rec.service_id = cma_get_service_id(id_priv->id.ps, &addr->dst_addr);
1523
1524	comp_mask = IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID |
1525		    IB_SA_PATH_REC_PKEY | IB_SA_PATH_REC_NUMB_PATH |
1526		    IB_SA_PATH_REC_REVERSIBLE | IB_SA_PATH_REC_SERVICE_ID;
1527
1528	if (addr->src_addr.sa_family == AF_INET) {
1529		path_rec.qos_class = cpu_to_be16((u16) id_priv->tos);
1530		comp_mask |= IB_SA_PATH_REC_QOS_CLASS;
1531	} else {
1532		sin6 = (struct sockaddr_in6 *) &addr->src_addr;
1533		path_rec.traffic_class = (u8) (be32_to_cpu(sin6->sin6_flowinfo) >> 20);
1534		comp_mask |= IB_SA_PATH_REC_TRAFFIC_CLASS;
1535	}
1536
1537	id_priv->query_id = ib_sa_path_rec_get(&sa_client, id_priv->id.device,
1538					       id_priv->id.port_num, &path_rec,
1539					       comp_mask, timeout_ms,
1540					       GFP_KERNEL, cma_query_handler,
1541					       work, &id_priv->query);
1542
1543	return (id_priv->query_id < 0) ? id_priv->query_id : 0;
1544}
1545
1546static void cma_work_handler(struct work_struct *_work)
1547{
1548	struct cma_work *work = container_of(_work, struct cma_work, work);
1549	struct rdma_id_private *id_priv = work->id;
1550	int destroy = 0;
1551
1552	atomic_inc(&id_priv->dev_remove);
1553	if (!cma_comp_exch(id_priv, work->old_state, work->new_state))
1554		goto out;
1555
1556	if (id_priv->id.event_handler(&id_priv->id, &work->event)) {
1557		cma_exch(id_priv, CMA_DESTROYING);
1558		destroy = 1;
1559	}
1560out:
1561	cma_enable_remove(id_priv);
1562	cma_deref_id(id_priv);
1563	if (destroy)
1564		rdma_destroy_id(&id_priv->id);
1565	kfree(work);
1566}
1567
1568static int cma_resolve_ib_route(struct rdma_id_private *id_priv, int timeout_ms)
1569{
1570	struct rdma_route *route = &id_priv->id.route;
1571	struct cma_work *work;
1572	int ret;
1573
1574	work = kzalloc(sizeof *work, GFP_KERNEL);
1575	if (!work)
1576		return -ENOMEM;
1577
1578	work->id = id_priv;
1579	INIT_WORK(&work->work, cma_work_handler);
1580	work->old_state = CMA_ROUTE_QUERY;
1581	work->new_state = CMA_ROUTE_RESOLVED;
1582	work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
1583
1584	route->path_rec = kmalloc(sizeof *route->path_rec, GFP_KERNEL);
1585	if (!route->path_rec) {
1586		ret = -ENOMEM;
1587		goto err1;
1588	}
1589
1590	ret = cma_query_ib_route(id_priv, timeout_ms, work);
1591	if (ret)
1592		goto err2;
1593
1594	return 0;
1595err2:
1596	kfree(route->path_rec);
1597	route->path_rec = NULL;
1598err1:
1599	kfree(work);
1600	return ret;
1601}
1602
1603int rdma_set_ib_paths(struct rdma_cm_id *id,
1604		      struct ib_sa_path_rec *path_rec, int num_paths)
1605{
1606	struct rdma_id_private *id_priv;
1607	int ret;
1608
1609	id_priv = container_of(id, struct rdma_id_private, id);
1610	if (!cma_comp_exch(id_priv, CMA_ADDR_RESOLVED, CMA_ROUTE_RESOLVED))
1611		return -EINVAL;
1612
1613	id->route.path_rec = kmalloc(sizeof *path_rec * num_paths, GFP_KERNEL);
1614	if (!id->route.path_rec) {
1615		ret = -ENOMEM;
1616		goto err;
1617	}
1618
1619	memcpy(id->route.path_rec, path_rec, sizeof *path_rec * num_paths);
1620	return 0;
1621err:
1622	cma_comp_exch(id_priv, CMA_ROUTE_RESOLVED, CMA_ADDR_RESOLVED);
1623	return ret;
1624}
1625EXPORT_SYMBOL(rdma_set_ib_paths);
1626
1627static int cma_resolve_iw_route(struct rdma_id_private *id_priv, int timeout_ms)
1628{
1629	struct cma_work *work;
1630
1631	work = kzalloc(sizeof *work, GFP_KERNEL);
1632	if (!work)
1633		return -ENOMEM;
1634
1635	work->id = id_priv;
1636	INIT_WORK(&work->work, cma_work_handler);
1637	work->old_state = CMA_ROUTE_QUERY;
1638	work->new_state = CMA_ROUTE_RESOLVED;
1639	work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
1640	queue_work(cma_wq, &work->work);
1641	return 0;
1642}
1643
1644int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms)
1645{
1646	struct rdma_id_private *id_priv;
1647	int ret;
1648
1649	id_priv = container_of(id, struct rdma_id_private, id);
1650	if (!cma_comp_exch(id_priv, CMA_ADDR_RESOLVED, CMA_ROUTE_QUERY))
1651		return -EINVAL;
1652
1653	atomic_inc(&id_priv->refcount);
1654	switch (rdma_node_get_transport(id->device->node_type)) {
1655	case RDMA_TRANSPORT_IB:
1656		ret = cma_resolve_ib_route(id_priv, timeout_ms);
1657		break;
1658	case RDMA_TRANSPORT_IWARP:
1659		ret = cma_resolve_iw_route(id_priv, timeout_ms);
1660		break;
1661	default:
1662		ret = -ENOSYS;
1663		break;
1664	}
1665	if (ret)
1666		goto err;
1667
1668	return 0;
1669err:
1670	cma_comp_exch(id_priv, CMA_ROUTE_QUERY, CMA_ADDR_RESOLVED);
1671	cma_deref_id(id_priv);
1672	return ret;
1673}
1674EXPORT_SYMBOL(rdma_resolve_route);
1675
1676static int cma_bind_loopback(struct rdma_id_private *id_priv)
1677{
1678	struct cma_device *cma_dev;
1679	struct ib_port_attr port_attr;
1680	union ib_gid gid;
1681	u16 pkey;
1682	int ret;
1683	u8 p;
1684
1685	mutex_lock(&lock);
1686	if (list_empty(&dev_list)) {
1687		ret = -ENODEV;
1688		goto out;
1689	}
1690	list_for_each_entry(cma_dev, &dev_list, list)
1691		for (p = 1; p <= cma_dev->device->phys_port_cnt; ++p)
1692			if (!ib_query_port(cma_dev->device, p, &port_attr) &&
1693			    port_attr.state == IB_PORT_ACTIVE)
1694				goto port_found;
1695
1696	p = 1;
1697	cma_dev = list_entry(dev_list.next, struct cma_device, list);
1698
1699port_found:
1700	ret = ib_get_cached_gid(cma_dev->device, p, 0, &gid);
1701	if (ret)
1702		goto out;
1703
1704	ret = ib_get_cached_pkey(cma_dev->device, p, 0, &pkey);
1705	if (ret)
1706		goto out;
1707
1708	ib_addr_set_sgid(&id_priv->id.route.addr.dev_addr, &gid);
1709	ib_addr_set_pkey(&id_priv->id.route.addr.dev_addr, pkey);
1710	id_priv->id.port_num = p;
1711	cma_attach_to_dev(id_priv, cma_dev);
1712out:
1713	mutex_unlock(&lock);
1714	return ret;
1715}
1716
1717static void addr_handler(int status, struct sockaddr *src_addr,
1718			 struct rdma_dev_addr *dev_addr, void *context)
1719{
1720	struct rdma_id_private *id_priv = context;
1721	struct rdma_cm_event event;
1722
1723	memset(&event, 0, sizeof event);
1724	atomic_inc(&id_priv->dev_remove);
1725
1726	/*
1727	 * Grab mutex to block rdma_destroy_id() from removing the device while
1728	 * we're trying to acquire it.
1729	 */
1730	mutex_lock(&lock);
1731	if (!cma_comp_exch(id_priv, CMA_ADDR_QUERY, CMA_ADDR_RESOLVED)) {
1732		mutex_unlock(&lock);
1733		goto out;
1734	}
1735
1736	if (!status && !id_priv->cma_dev)
1737		status = cma_acquire_dev(id_priv);
1738	mutex_unlock(&lock);
1739
1740	if (status) {
1741		if (!cma_comp_exch(id_priv, CMA_ADDR_RESOLVED, CMA_ADDR_BOUND))
1742			goto out;
1743		event.event = RDMA_CM_EVENT_ADDR_ERROR;
1744		event.status = status;
1745	} else {
1746		memcpy(&id_priv->id.route.addr.src_addr, src_addr,
1747		       ip_addr_size(src_addr));
1748		event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
1749	}
1750
1751	if (id_priv->id.event_handler(&id_priv->id, &event)) {
1752		cma_exch(id_priv, CMA_DESTROYING);
1753		cma_enable_remove(id_priv);
1754		cma_deref_id(id_priv);
1755		rdma_destroy_id(&id_priv->id);
1756		return;
1757	}
1758out:
1759	cma_enable_remove(id_priv);
1760	cma_deref_id(id_priv);
1761}
1762
1763static int cma_resolve_loopback(struct rdma_id_private *id_priv)
1764{
1765	struct cma_work *work;
1766	struct sockaddr_in *src_in, *dst_in;
1767	union ib_gid gid;
1768	int ret;
1769
1770	work = kzalloc(sizeof *work, GFP_KERNEL);
1771	if (!work)
1772		return -ENOMEM;
1773
1774	if (!id_priv->cma_dev) {
1775		ret = cma_bind_loopback(id_priv);
1776		if (ret)
1777			goto err;
1778	}
1779
1780	ib_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid);
1781	ib_addr_set_dgid(&id_priv->id.route.addr.dev_addr, &gid);
1782
1783	if (cma_zero_addr(&id_priv->id.route.addr.src_addr)) {
1784		src_in = (struct sockaddr_in *)&id_priv->id.route.addr.src_addr;
1785		dst_in = (struct sockaddr_in *)&id_priv->id.route.addr.dst_addr;
1786		src_in->sin_family = dst_in->sin_family;
1787		src_in->sin_addr.s_addr = dst_in->sin_addr.s_addr;
1788	}
1789
1790	work->id = id_priv;
1791	INIT_WORK(&work->work, cma_work_handler);
1792	work->old_state = CMA_ADDR_QUERY;
1793	work->new_state = CMA_ADDR_RESOLVED;
1794	work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
1795	queue_work(cma_wq, &work->work);
1796	return 0;
1797err:
1798	kfree(work);
1799	return ret;
1800}
1801
1802static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
1803			 struct sockaddr *dst_addr)
1804{
1805	if (src_addr && src_addr->sa_family)
1806		return rdma_bind_addr(id, src_addr);
1807	else
1808		return cma_bind_any(id, dst_addr->sa_family);
1809}
1810
1811int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
1812		      struct sockaddr *dst_addr, int timeout_ms)
1813{
1814	struct rdma_id_private *id_priv;
1815	int ret;
1816
1817	id_priv = container_of(id, struct rdma_id_private, id);
1818	if (id_priv->state == CMA_IDLE) {
1819		ret = cma_bind_addr(id, src_addr, dst_addr);
1820		if (ret)
1821			return ret;
1822	}
1823
1824	if (!cma_comp_exch(id_priv, CMA_ADDR_BOUND, CMA_ADDR_QUERY))
1825		return -EINVAL;
1826
1827	atomic_inc(&id_priv->refcount);
1828	memcpy(&id->route.addr.dst_addr, dst_addr, ip_addr_size(dst_addr));
1829	if (cma_any_addr(dst_addr))
1830		ret = cma_resolve_loopback(id_priv);
1831	else
1832		ret = rdma_resolve_ip(&addr_client, &id->route.addr.src_addr,
1833				      dst_addr, &id->route.addr.dev_addr,
1834				      timeout_ms, addr_handler, id_priv);
1835	if (ret)
1836		goto err;
1837
1838	return 0;
1839err:
1840	cma_comp_exch(id_priv, CMA_ADDR_QUERY, CMA_ADDR_BOUND);
1841	cma_deref_id(id_priv);
1842	return ret;
1843}
1844EXPORT_SYMBOL(rdma_resolve_addr);
1845
1846static void cma_bind_port(struct rdma_bind_list *bind_list,
1847			  struct rdma_id_private *id_priv)
1848{
1849	struct sockaddr_in *sin;
1850
1851	sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
1852	sin->sin_port = htons(bind_list->port);
1853	id_priv->bind_list = bind_list;
1854	hlist_add_head(&id_priv->node, &bind_list->owners);
1855}
1856
1857static int cma_alloc_port(struct idr *ps, struct rdma_id_private *id_priv,
1858			  unsigned short snum)
1859{
1860	struct rdma_bind_list *bind_list;
1861	int port, ret;
1862
1863	bind_list = kzalloc(sizeof *bind_list, GFP_KERNEL);
1864	if (!bind_list)
1865		return -ENOMEM;
1866
1867	do {
1868		ret = idr_get_new_above(ps, bind_list, snum, &port);
1869	} while ((ret == -EAGAIN) && idr_pre_get(ps, GFP_KERNEL));
1870
1871	if (ret)
1872		goto err1;
1873
1874	if (port != snum) {
1875		ret = -EADDRNOTAVAIL;
1876		goto err2;
1877	}
1878
1879	bind_list->ps = ps;
1880	bind_list->port = (unsigned short) port;
1881	cma_bind_port(bind_list, id_priv);
1882	return 0;
1883err2:
1884	idr_remove(ps, port);
1885err1:
1886	kfree(bind_list);
1887	return ret;
1888}
1889
1890static int cma_alloc_any_port(struct idr *ps, struct rdma_id_private *id_priv)
1891{
1892	struct rdma_bind_list *bind_list;
1893	int port, ret;
1894
1895	bind_list = kzalloc(sizeof *bind_list, GFP_KERNEL);
1896	if (!bind_list)
1897		return -ENOMEM;
1898
1899retry:
1900	do {
1901		ret = idr_get_new_above(ps, bind_list, next_port, &port);
1902	} while ((ret == -EAGAIN) && idr_pre_get(ps, GFP_KERNEL));
1903
1904	if (ret)
1905		goto err1;
1906
1907	if (port > sysctl_local_port_range[1]) {
1908		if (next_port != sysctl_local_port_range[0]) {
1909			idr_remove(ps, port);
1910			next_port = sysctl_local_port_range[0];
1911			goto retry;
1912		}
1913		ret = -EADDRNOTAVAIL;
1914		goto err2;
1915	}
1916
1917	if (port == sysctl_local_port_range[1])
1918		next_port = sysctl_local_port_range[0];
1919	else
1920		next_port = port + 1;
1921
1922	bind_list->ps = ps;
1923	bind_list->port = (unsigned short) port;
1924	cma_bind_port(bind_list, id_priv);
1925	return 0;
1926err2:
1927	idr_remove(ps, port);
1928err1:
1929	kfree(bind_list);
1930	return ret;
1931}
1932
1933static int cma_use_port(struct idr *ps, struct rdma_id_private *id_priv)
1934{
1935	struct rdma_id_private *cur_id;
1936	struct sockaddr_in *sin, *cur_sin;
1937	struct rdma_bind_list *bind_list;
1938	struct hlist_node *node;
1939	unsigned short snum;
1940
1941	sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
1942	snum = ntohs(sin->sin_port);
1943	if (snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE))
1944		return -EACCES;
1945
1946	bind_list = idr_find(ps, snum);
1947	if (!bind_list)
1948		return cma_alloc_port(ps, id_priv, snum);
1949
1950	/*
1951	 * We don't support binding to any address if anyone is bound to
1952	 * a specific address on the same port.
1953	 */
1954	if (cma_any_addr(&id_priv->id.route.addr.src_addr))
1955		return -EADDRNOTAVAIL;
1956
1957	hlist_for_each_entry(cur_id, node, &bind_list->owners, node) {
1958		if (cma_any_addr(&cur_id->id.route.addr.src_addr))
1959			return -EADDRNOTAVAIL;
1960
1961		cur_sin = (struct sockaddr_in *) &cur_id->id.route.addr.src_addr;
1962		if (sin->sin_addr.s_addr == cur_sin->sin_addr.s_addr)
1963			return -EADDRINUSE;
1964	}
1965
1966	cma_bind_port(bind_list, id_priv);
1967	return 0;
1968}
1969
1970static int cma_get_port(struct rdma_id_private *id_priv)
1971{
1972	struct idr *ps;
1973	int ret;
1974
1975	switch (id_priv->id.ps) {
1976	case RDMA_PS_SDP:
1977		ps = &sdp_ps;
1978		break;
1979	case RDMA_PS_TCP:
1980		ps = &tcp_ps;
1981		break;
1982	case RDMA_PS_UDP:
1983		ps = &udp_ps;
1984		break;
1985	case RDMA_PS_IPOIB:
1986		ps = &ipoib_ps;
1987		break;
1988	default:
1989		return -EPROTONOSUPPORT;
1990	}
1991
1992	mutex_lock(&lock);
1993	if (cma_any_port(&id_priv->id.route.addr.src_addr))
1994		ret = cma_alloc_any_port(ps, id_priv);
1995	else
1996		ret = cma_use_port(ps, id_priv);
1997	mutex_unlock(&lock);
1998
1999	return ret;
2000}
2001
2002int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
2003{
2004	struct rdma_id_private *id_priv;
2005	int ret;
2006
2007	if (addr->sa_family != AF_INET)
2008		return -EAFNOSUPPORT;
2009
2010	id_priv = container_of(id, struct rdma_id_private, id);
2011	if (!cma_comp_exch(id_priv, CMA_IDLE, CMA_ADDR_BOUND))
2012		return -EINVAL;
2013
2014	if (!cma_any_addr(addr)) {
2015		ret = rdma_translate_ip(addr, &id->route.addr.dev_addr);
2016		if (ret)
2017			goto err1;
2018
2019		mutex_lock(&lock);
2020		ret = cma_acquire_dev(id_priv);
2021		mutex_unlock(&lock);
2022		if (ret)
2023			goto err1;
2024	}
2025
2026	memcpy(&id->route.addr.src_addr, addr, ip_addr_size(addr));
2027	ret = cma_get_port(id_priv);
2028	if (ret)
2029		goto err2;
2030
2031	return 0;
2032err2:
2033	if (!cma_any_addr(addr)) {
2034		mutex_lock(&lock);
2035		cma_detach_from_dev(id_priv);
2036		mutex_unlock(&lock);
2037	}
2038err1:
2039	cma_comp_exch(id_priv, CMA_ADDR_BOUND, CMA_IDLE);
2040	return ret;
2041}
2042EXPORT_SYMBOL(rdma_bind_addr);
2043
2044static int cma_format_hdr(void *hdr, enum rdma_port_space ps,
2045			  struct rdma_route *route)
2046{
2047	struct sockaddr_in *src4, *dst4;
2048	struct cma_hdr *cma_hdr;
2049	struct sdp_hh *sdp_hdr;
2050
2051	src4 = (struct sockaddr_in *) &route->addr.src_addr;
2052	dst4 = (struct sockaddr_in *) &route->addr.dst_addr;
2053
2054	switch (ps) {
2055	case RDMA_PS_SDP:
2056		sdp_hdr = hdr;
2057		if (sdp_get_majv(sdp_hdr->sdp_version) != SDP_MAJ_VERSION)
2058			return -EINVAL;
2059		sdp_set_ip_ver(sdp_hdr, 4);
2060		sdp_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr;
2061		sdp_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr;
2062		sdp_hdr->port = src4->sin_port;
2063		break;
2064	default:
2065		cma_hdr = hdr;
2066		cma_hdr->cma_version = CMA_VERSION;
2067		cma_set_ip_ver(cma_hdr, 4);
2068		cma_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr;
2069		cma_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr;
2070		cma_hdr->port = src4->sin_port;
2071		break;
2072	}
2073	return 0;
2074}
2075
2076static int cma_sidr_rep_handler(struct ib_cm_id *cm_id,
2077				struct ib_cm_event *ib_event)
2078{
2079	struct rdma_id_private *id_priv = cm_id->context;
2080	struct rdma_cm_event event;
2081	struct ib_cm_sidr_rep_event_param *rep = &ib_event->param.sidr_rep_rcvd;
2082	int ret = 0;
2083
2084	if (cma_disable_remove(id_priv, CMA_CONNECT))
2085		return 0;
2086
2087	memset(&event, 0, sizeof event);
2088	switch (ib_event->event) {
2089	case IB_CM_SIDR_REQ_ERROR:
2090		event.event = RDMA_CM_EVENT_UNREACHABLE;
2091		event.status = -ETIMEDOUT;
2092		break;
2093	case IB_CM_SIDR_REP_RECEIVED:
2094		event.param.ud.private_data = ib_event->private_data;
2095		event.param.ud.private_data_len = IB_CM_SIDR_REP_PRIVATE_DATA_SIZE;
2096		if (rep->status != IB_SIDR_SUCCESS) {
2097			event.event = RDMA_CM_EVENT_UNREACHABLE;
2098			event.status = ib_event->param.sidr_rep_rcvd.status;
2099			break;
2100		}
2101		if (id_priv->qkey != rep->qkey) {
2102			event.event = RDMA_CM_EVENT_UNREACHABLE;
2103			event.status = -EINVAL;
2104			break;
2105		}
2106		ib_init_ah_from_path(id_priv->id.device, id_priv->id.port_num,
2107				     id_priv->id.route.path_rec,
2108				     &event.param.ud.ah_attr);
2109		event.param.ud.qp_num = rep->qpn;
2110		event.param.ud.qkey = rep->qkey;
2111		event.event = RDMA_CM_EVENT_ESTABLISHED;
2112		event.status = 0;
2113		break;
2114	default:
2115		printk(KERN_ERR "RDMA CMA: unexpected IB CM event: %d",
2116		       ib_event->event);
2117		goto out;
2118	}
2119
2120	ret = id_priv->id.event_handler(&id_priv->id, &event);
2121	if (ret) {
2122		/* Destroy the CM ID by returning a non-zero value. */
2123		id_priv->cm_id.ib = NULL;
2124		cma_exch(id_priv, CMA_DESTROYING);
2125		cma_enable_remove(id_priv);
2126		rdma_destroy_id(&id_priv->id);
2127		return ret;
2128	}
2129out:
2130	cma_enable_remove(id_priv);
2131	return ret;
2132}
2133
2134static int cma_resolve_ib_udp(struct rdma_id_private *id_priv,
2135			      struct rdma_conn_param *conn_param)
2136{
2137	struct ib_cm_sidr_req_param req;
2138	struct rdma_route *route;
2139	int ret;
2140
2141	req.private_data_len = sizeof(struct cma_hdr) +
2142			       conn_param->private_data_len;
2143	req.private_data = kzalloc(req.private_data_len, GFP_ATOMIC);
2144	if (!req.private_data)
2145		return -ENOMEM;
2146
2147	if (conn_param->private_data && conn_param->private_data_len)
2148		memcpy((void *) req.private_data + sizeof(struct cma_hdr),
2149		       conn_param->private_data, conn_param->private_data_len);
2150
2151	route = &id_priv->id.route;
2152	ret = cma_format_hdr((void *) req.private_data, id_priv->id.ps, route);
2153	if (ret)
2154		goto out;
2155
2156	id_priv->cm_id.ib = ib_create_cm_id(id_priv->id.device,
2157					    cma_sidr_rep_handler, id_priv);
2158	if (IS_ERR(id_priv->cm_id.ib)) {
2159		ret = PTR_ERR(id_priv->cm_id.ib);
2160		goto out;
2161	}
2162
2163	req.path = route->path_rec;
2164	req.service_id = cma_get_service_id(id_priv->id.ps,
2165					    &route->addr.dst_addr);
2166	req.timeout_ms = 1 << (CMA_CM_RESPONSE_TIMEOUT - 8);
2167	req.max_cm_retries = CMA_MAX_CM_RETRIES;
2168
2169	ret = ib_send_cm_sidr_req(id_priv->cm_id.ib, &req);
2170	if (ret) {
2171		ib_destroy_cm_id(id_priv->cm_id.ib);
2172		id_priv->cm_id.ib = NULL;
2173	}
2174out:
2175	kfree(req.private_data);
2176	return ret;
2177}
2178
2179static int cma_connect_ib(struct rdma_id_private *id_priv,
2180			  struct rdma_conn_param *conn_param)
2181{
2182	struct ib_cm_req_param req;
2183	struct rdma_route *route;
2184	void *private_data;
2185	int offset, ret;
2186
2187	memset(&req, 0, sizeof req);
2188	offset = cma_user_data_offset(id_priv->id.ps);
2189	req.private_data_len = offset + conn_param->private_data_len;
2190	private_data = kzalloc(req.private_data_len, GFP_ATOMIC);
2191	if (!private_data)
2192		return -ENOMEM;
2193
2194	if (conn_param->private_data && conn_param->private_data_len)
2195		memcpy(private_data + offset, conn_param->private_data,
2196		       conn_param->private_data_len);
2197
2198	id_priv->cm_id.ib = ib_create_cm_id(id_priv->id.device, cma_ib_handler,
2199					    id_priv);
2200	if (IS_ERR(id_priv->cm_id.ib)) {
2201		ret = PTR_ERR(id_priv->cm_id.ib);
2202		goto out;
2203	}
2204
2205	route = &id_priv->id.route;
2206	ret = cma_format_hdr(private_data, id_priv->id.ps, route);
2207	if (ret)
2208		goto out;
2209	req.private_data = private_data;
2210
2211	req.primary_path = &route->path_rec[0];
2212	if (route->num_paths == 2)
2213		req.alternate_path = &route->path_rec[1];
2214
2215	req.service_id = cma_get_service_id(id_priv->id.ps,
2216					    &route->addr.dst_addr);
2217	req.qp_num = id_priv->qp_num;
2218	req.qp_type = IB_QPT_RC;
2219	req.starting_psn = id_priv->seq_num;
2220	req.responder_resources = conn_param->responder_resources;
2221	req.initiator_depth = conn_param->initiator_depth;
2222	req.flow_control = conn_param->flow_control;
2223	req.retry_count = conn_param->retry_count;
2224	req.rnr_retry_count = conn_param->rnr_retry_count;
2225	req.remote_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT;
2226	req.local_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT;
2227	req.max_cm_retries = CMA_MAX_CM_RETRIES;
2228	req.srq = id_priv->srq ? 1 : 0;
2229
2230	ret = ib_send_cm_req(id_priv->cm_id.ib, &req);
2231out:
2232	if (ret && !IS_ERR(id_priv->cm_id.ib)) {
2233		ib_destroy_cm_id(id_priv->cm_id.ib);
2234		id_priv->cm_id.ib = NULL;
2235	}
2236
2237	kfree(private_data);
2238	return ret;
2239}
2240
2241static int cma_connect_iw(struct rdma_id_private *id_priv,
2242			  struct rdma_conn_param *conn_param)
2243{
2244	struct iw_cm_id *cm_id;
2245	struct sockaddr_in* sin;
2246	int ret;
2247	struct iw_cm_conn_param iw_param;
2248
2249	cm_id = iw_create_cm_id(id_priv->id.device, cma_iw_handler, id_priv);
2250	if (IS_ERR(cm_id)) {
2251		ret = PTR_ERR(cm_id);
2252		goto out;
2253	}
2254
2255	id_priv->cm_id.iw = cm_id;
2256
2257	sin = (struct sockaddr_in*) &id_priv->id.route.addr.src_addr;
2258	cm_id->local_addr = *sin;
2259
2260	sin = (struct sockaddr_in*) &id_priv->id.route.addr.dst_addr;
2261	cm_id->remote_addr = *sin;
2262
2263	ret = cma_modify_qp_rtr(&id_priv->id);
2264	if (ret)
2265		goto out;
2266
2267	iw_param.ord = conn_param->initiator_depth;
2268	iw_param.ird = conn_param->responder_resources;
2269	iw_param.private_data = conn_param->private_data;
2270	iw_param.private_data_len = conn_param->private_data_len;
2271	if (id_priv->id.qp)
2272		iw_param.qpn = id_priv->qp_num;
2273	else
2274		iw_param.qpn = conn_param->qp_num;
2275	ret = iw_cm_connect(cm_id, &iw_param);
2276out:
2277	if (ret && !IS_ERR(cm_id)) {
2278		iw_destroy_cm_id(cm_id);
2279		id_priv->cm_id.iw = NULL;
2280	}
2281	return ret;
2282}
2283
2284int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
2285{
2286	struct rdma_id_private *id_priv;
2287	int ret;
2288
2289	id_priv = container_of(id, struct rdma_id_private, id);
2290	if (!cma_comp_exch(id_priv, CMA_ROUTE_RESOLVED, CMA_CONNECT))
2291		return -EINVAL;
2292
2293	if (!id->qp) {
2294		id_priv->qp_num = conn_param->qp_num;
2295		id_priv->srq = conn_param->srq;
2296	}
2297
2298	switch (rdma_node_get_transport(id->device->node_type)) {
2299	case RDMA_TRANSPORT_IB:
2300		if (cma_is_ud_ps(id->ps))
2301			ret = cma_resolve_ib_udp(id_priv, conn_param);
2302		else
2303			ret = cma_connect_ib(id_priv, conn_param);
2304		break;
2305	case RDMA_TRANSPORT_IWARP:
2306		ret = cma_connect_iw(id_priv, conn_param);
2307		break;
2308	default:
2309		ret = -ENOSYS;
2310		break;
2311	}
2312	if (ret)
2313		goto err;
2314
2315	return 0;
2316err:
2317	cma_comp_exch(id_priv, CMA_CONNECT, CMA_ROUTE_RESOLVED);
2318	return ret;
2319}
2320EXPORT_SYMBOL(rdma_connect);
2321
2322static int cma_accept_ib(struct rdma_id_private *id_priv,
2323			 struct rdma_conn_param *conn_param)
2324{
2325	struct ib_cm_rep_param rep;
2326	struct ib_qp_attr qp_attr;
2327	int qp_attr_mask, ret;
2328
2329	if (id_priv->id.qp) {
2330		ret = cma_modify_qp_rtr(&id_priv->id);
2331		if (ret)
2332			goto out;
2333
2334		qp_attr.qp_state = IB_QPS_RTS;
2335		ret = ib_cm_init_qp_attr(id_priv->cm_id.ib, &qp_attr,
2336					 &qp_attr_mask);
2337		if (ret)
2338			goto out;
2339
2340		qp_attr.max_rd_atomic = conn_param->initiator_depth;
2341		ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask);
2342		if (ret)
2343			goto out;
2344	}
2345
2346	memset(&rep, 0, sizeof rep);
2347	rep.qp_num = id_priv->qp_num;
2348	rep.starting_psn = id_priv->seq_num;
2349	rep.private_data = conn_param->private_data;
2350	rep.private_data_len = conn_param->private_data_len;
2351	rep.responder_resources = conn_param->responder_resources;
2352	rep.initiator_depth = conn_param->initiator_depth;
2353	rep.failover_accepted = 0;
2354	rep.flow_control = conn_param->flow_control;
2355	rep.rnr_retry_count = conn_param->rnr_retry_count;
2356	rep.srq = id_priv->srq ? 1 : 0;
2357
2358	ret = ib_send_cm_rep(id_priv->cm_id.ib, &rep);
2359out:
2360	return ret;
2361}
2362
2363static int cma_accept_iw(struct rdma_id_private *id_priv,
2364		  struct rdma_conn_param *conn_param)
2365{
2366	struct iw_cm_conn_param iw_param;
2367	int ret;
2368
2369	ret = cma_modify_qp_rtr(&id_priv->id);
2370	if (ret)
2371		return ret;
2372
2373	iw_param.ord = conn_param->initiator_depth;
2374	iw_param.ird = conn_param->responder_resources;
2375	iw_param.private_data = conn_param->private_data;
2376	iw_param.private_data_len = conn_param->private_data_len;
2377	if (id_priv->id.qp) {
2378		iw_param.qpn = id_priv->qp_num;
2379	} else
2380		iw_param.qpn = conn_param->qp_num;
2381
2382	return iw_cm_accept(id_priv->cm_id.iw, &iw_param);
2383}
2384
2385static int cma_send_sidr_rep(struct rdma_id_private *id_priv,
2386			     enum ib_cm_sidr_status status,
2387			     const void *private_data, int private_data_len)
2388{
2389	struct ib_cm_sidr_rep_param rep;
2390
2391	memset(&rep, 0, sizeof rep);
2392	rep.status = status;
2393	if (status == IB_SIDR_SUCCESS) {
2394		rep.qp_num = id_priv->qp_num;
2395		rep.qkey = id_priv->qkey;
2396	}
2397	rep.private_data = private_data;
2398	rep.private_data_len = private_data_len;
2399
2400	return ib_send_cm_sidr_rep(id_priv->cm_id.ib, &rep);
2401}
2402
2403int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
2404{
2405	struct rdma_id_private *id_priv;
2406	int ret;
2407
2408	id_priv = container_of(id, struct rdma_id_private, id);
2409	if (!cma_comp(id_priv, CMA_CONNECT))
2410		return -EINVAL;
2411
2412	if (!id->qp && conn_param) {
2413		id_priv->qp_num = conn_param->qp_num;
2414		id_priv->srq = conn_param->srq;
2415	}
2416
2417	switch (rdma_node_get_transport(id->device->node_type)) {
2418	case RDMA_TRANSPORT_IB:
2419		if (cma_is_ud_ps(id->ps))
2420			ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS,
2421						conn_param->private_data,
2422						conn_param->private_data_len);
2423		else if (conn_param)
2424			ret = cma_accept_ib(id_priv, conn_param);
2425		else
2426			ret = cma_rep_recv(id_priv);
2427		break;
2428	case RDMA_TRANSPORT_IWARP:
2429		ret = cma_accept_iw(id_priv, conn_param);
2430		break;
2431	default:
2432		ret = -ENOSYS;
2433		break;
2434	}
2435
2436	if (ret)
2437		goto reject;
2438
2439	return 0;
2440reject:
2441	cma_modify_qp_err(id);
2442	rdma_reject(id, NULL, 0);
2443	return ret;
2444}
2445EXPORT_SYMBOL(rdma_accept);
2446
2447int rdma_notify(struct rdma_cm_id *id, enum ib_event_type event)
2448{
2449	struct rdma_id_private *id_priv;
2450	int ret;
2451
2452	id_priv = container_of(id, struct rdma_id_private, id);
2453	if (!cma_has_cm_dev(id_priv))
2454		return -EINVAL;
2455
2456	switch (id->device->node_type) {
2457	case RDMA_NODE_IB_CA:
2458		ret = ib_cm_notify(id_priv->cm_id.ib, event);
2459		break;
2460	default:
2461		ret = 0;
2462		break;
2463	}
2464	return ret;
2465}
2466EXPORT_SYMBOL(rdma_notify);
2467
2468int rdma_reject(struct rdma_cm_id *id, const void *private_data,
2469		u8 private_data_len)
2470{
2471	struct rdma_id_private *id_priv;
2472	int ret;
2473
2474	id_priv = container_of(id, struct rdma_id_private, id);
2475	if (!cma_has_cm_dev(id_priv))
2476		return -EINVAL;
2477
2478	switch (rdma_node_get_transport(id->device->node_type)) {
2479	case RDMA_TRANSPORT_IB:
2480		if (cma_is_ud_ps(id->ps))
2481			ret = cma_send_sidr_rep(id_priv, IB_SIDR_REJECT,
2482						private_data, private_data_len);
2483		else
2484			ret = ib_send_cm_rej(id_priv->cm_id.ib,
2485					     IB_CM_REJ_CONSUMER_DEFINED, NULL,
2486					     0, private_data, private_data_len);
2487		break;
2488	case RDMA_TRANSPORT_IWARP:
2489		ret = iw_cm_reject(id_priv->cm_id.iw,
2490				   private_data, private_data_len);
2491		break;
2492	default:
2493		ret = -ENOSYS;
2494		break;
2495	}
2496	return ret;
2497}
2498EXPORT_SYMBOL(rdma_reject);
2499
2500int rdma_disconnect(struct rdma_cm_id *id)
2501{
2502	struct rdma_id_private *id_priv;
2503	int ret;
2504
2505	id_priv = container_of(id, struct rdma_id_private, id);
2506	if (!cma_has_cm_dev(id_priv))
2507		return -EINVAL;
2508
2509	switch (rdma_node_get_transport(id->device->node_type)) {
2510	case RDMA_TRANSPORT_IB:
2511		ret = cma_modify_qp_err(id);
2512		if (ret)
2513			goto out;
2514		/* Initiate or respond to a disconnect. */
2515		if (ib_send_cm_dreq(id_priv->cm_id.ib, NULL, 0))
2516			ib_send_cm_drep(id_priv->cm_id.ib, NULL, 0);
2517		break;
2518	case RDMA_TRANSPORT_IWARP:
2519		ret = iw_cm_disconnect(id_priv->cm_id.iw, 0);
2520		break;
2521	default:
2522		ret = -EINVAL;
2523		break;
2524	}
2525out:
2526	return ret;
2527}
2528EXPORT_SYMBOL(rdma_disconnect);
2529
2530static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
2531{
2532	struct rdma_id_private *id_priv;
2533	struct cma_multicast *mc = multicast->context;
2534	struct rdma_cm_event event;
2535	int ret;
2536
2537	id_priv = mc->id_priv;
2538	if (cma_disable_remove(id_priv, CMA_ADDR_BOUND) &&
2539	    cma_disable_remove(id_priv, CMA_ADDR_RESOLVED))
2540		return 0;
2541
2542	if (!status && id_priv->id.qp)
2543		status = ib_attach_mcast(id_priv->id.qp, &multicast->rec.mgid,
2544					 multicast->rec.mlid);
2545
2546	memset(&event, 0, sizeof event);
2547	event.status = status;
2548	event.param.ud.private_data = mc->context;
2549	if (!status) {
2550		event.event = RDMA_CM_EVENT_MULTICAST_JOIN;
2551		ib_init_ah_from_mcmember(id_priv->id.device,
2552					 id_priv->id.port_num, &multicast->rec,
2553					 &event.param.ud.ah_attr);
2554		event.param.ud.qp_num = 0xFFFFFF;
2555		event.param.ud.qkey = be32_to_cpu(multicast->rec.qkey);
2556	} else
2557		event.event = RDMA_CM_EVENT_MULTICAST_ERROR;
2558
2559	ret = id_priv->id.event_handler(&id_priv->id, &event);
2560	if (ret) {
2561		cma_exch(id_priv, CMA_DESTROYING);
2562		cma_enable_remove(id_priv);
2563		rdma_destroy_id(&id_priv->id);
2564		return 0;
2565	}
2566
2567	cma_enable_remove(id_priv);
2568	return 0;
2569}
2570
2571static void cma_set_mgid(struct rdma_id_private *id_priv,
2572			 struct sockaddr *addr, union ib_gid *mgid)
2573{
2574	unsigned char mc_map[MAX_ADDR_LEN];
2575	struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
2576	struct sockaddr_in *sin = (struct sockaddr_in *) addr;
2577	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) addr;
2578
2579	if (cma_any_addr(addr)) {
2580		memset(mgid, 0, sizeof *mgid);
2581	} else if ((addr->sa_family == AF_INET6) &&
2582		   ((be32_to_cpu(sin6->sin6_addr.s6_addr32[0]) & 0xFF10A01B) ==
2583								 0xFF10A01B)) {
2584		/* IPv6 address is an SA assigned MGID. */
2585		memcpy(mgid, &sin6->sin6_addr, sizeof *mgid);
2586	} else {
2587		ip_ib_mc_map(sin->sin_addr.s_addr, mc_map);
2588		if (id_priv->id.ps == RDMA_PS_UDP)
2589			mc_map[7] = 0x01;	/* Use RDMA CM signature */
2590		mc_map[8] = ib_addr_get_pkey(dev_addr) >> 8;
2591		mc_map[9] = (unsigned char) ib_addr_get_pkey(dev_addr);
2592		*mgid = *(union ib_gid *) (mc_map + 4);
2593	}
2594}
2595
2596static int cma_join_ib_multicast(struct rdma_id_private *id_priv,
2597				 struct cma_multicast *mc)
2598{
2599	struct ib_sa_mcmember_rec rec;
2600	struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
2601	ib_sa_comp_mask comp_mask;
2602	int ret;
2603
2604	ib_addr_get_mgid(dev_addr, &rec.mgid);
2605	ret = ib_sa_get_mcmember_rec(id_priv->id.device, id_priv->id.port_num,
2606				     &rec.mgid, &rec);
2607	if (ret)
2608		return ret;
2609
2610	cma_set_mgid(id_priv, &mc->addr, &rec.mgid);
2611	if (id_priv->id.ps == RDMA_PS_UDP)
2612		rec.qkey = cpu_to_be32(RDMA_UDP_QKEY);
2613	ib_addr_get_sgid(dev_addr, &rec.port_gid);
2614	rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr));
2615	rec.join_state = 1;
2616
2617	comp_mask = IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID |
2618		    IB_SA_MCMEMBER_REC_PKEY | IB_SA_MCMEMBER_REC_JOIN_STATE |
2619		    IB_SA_MCMEMBER_REC_QKEY | IB_SA_MCMEMBER_REC_SL |
2620		    IB_SA_MCMEMBER_REC_FLOW_LABEL |
2621		    IB_SA_MCMEMBER_REC_TRAFFIC_CLASS;
2622
2623	mc->multicast.ib = ib_sa_join_multicast(&sa_client, id_priv->id.device,
2624						id_priv->id.port_num, &rec,
2625						comp_mask, GFP_KERNEL,
2626						cma_ib_mc_handler, mc);
2627	if (IS_ERR(mc->multicast.ib))
2628		return PTR_ERR(mc->multicast.ib);
2629
2630	return 0;
2631}
2632
2633int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
2634			void *context)
2635{
2636	struct rdma_id_private *id_priv;
2637	struct cma_multicast *mc;
2638	int ret;
2639
2640	id_priv = container_of(id, struct rdma_id_private, id);
2641	if (!cma_comp(id_priv, CMA_ADDR_BOUND) &&
2642	    !cma_comp(id_priv, CMA_ADDR_RESOLVED))
2643		return -EINVAL;
2644
2645	mc = kmalloc(sizeof *mc, GFP_KERNEL);
2646	if (!mc)
2647		return -ENOMEM;
2648
2649	memcpy(&mc->addr, addr, ip_addr_size(addr));
2650	mc->context = context;
2651	mc->id_priv = id_priv;
2652
2653	spin_lock(&id_priv->lock);
2654	list_add(&mc->list, &id_priv->mc_list);
2655	spin_unlock(&id_priv->lock);
2656
2657	switch (rdma_node_get_transport(id->device->node_type)) {
2658	case RDMA_TRANSPORT_IB:
2659		ret = cma_join_ib_multicast(id_priv, mc);
2660		break;
2661	default:
2662		ret = -ENOSYS;
2663		break;
2664	}
2665
2666	if (ret) {
2667		spin_lock_irq(&id_priv->lock);
2668		list_del(&mc->list);
2669		spin_unlock_irq(&id_priv->lock);
2670		kfree(mc);
2671	}
2672	return ret;
2673}
2674EXPORT_SYMBOL(rdma_join_multicast);
2675
2676void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr)
2677{
2678	struct rdma_id_private *id_priv;
2679	struct cma_multicast *mc;
2680
2681	id_priv = container_of(id, struct rdma_id_private, id);
2682	spin_lock_irq(&id_priv->lock);
2683	list_for_each_entry(mc, &id_priv->mc_list, list) {
2684		if (!memcmp(&mc->addr, addr, ip_addr_size(addr))) {
2685			list_del(&mc->list);
2686			spin_unlock_irq(&id_priv->lock);
2687
2688			if (id->qp)
2689				ib_detach_mcast(id->qp,
2690						&mc->multicast.ib->rec.mgid,
2691						mc->multicast.ib->rec.mlid);
2692			ib_sa_free_multicast(mc->multicast.ib);
2693			kfree(mc);
2694			return;
2695		}
2696	}
2697	spin_unlock_irq(&id_priv->lock);
2698}
2699EXPORT_SYMBOL(rdma_leave_multicast);
2700
2701static void cma_add_one(struct ib_device *device)
2702{
2703	struct cma_device *cma_dev;
2704	struct rdma_id_private *id_priv;
2705
2706	cma_dev = kmalloc(sizeof *cma_dev, GFP_KERNEL);
2707	if (!cma_dev)
2708		return;
2709
2710	cma_dev->device = device;
2711
2712	init_completion(&cma_dev->comp);
2713	atomic_set(&cma_dev->refcount, 1);
2714	INIT_LIST_HEAD(&cma_dev->id_list);
2715	ib_set_client_data(device, &cma_client, cma_dev);
2716
2717	mutex_lock(&lock);
2718	list_add_tail(&cma_dev->list, &dev_list);
2719	list_for_each_entry(id_priv, &listen_any_list, list)
2720		cma_listen_on_dev(id_priv, cma_dev);
2721	mutex_unlock(&lock);
2722}
2723
2724static int cma_remove_id_dev(struct rdma_id_private *id_priv)
2725{
2726	struct rdma_cm_event event;
2727	enum cma_state state;
2728
2729	/* Record that we want to remove the device */
2730	state = cma_exch(id_priv, CMA_DEVICE_REMOVAL);
2731	if (state == CMA_DESTROYING)
2732		return 0;
2733
2734	cma_cancel_operation(id_priv, state);
2735	wait_event(id_priv->wait_remove, !atomic_read(&id_priv->dev_remove));
2736
2737	/* Check for destruction from another callback. */
2738	if (!cma_comp(id_priv, CMA_DEVICE_REMOVAL))
2739		return 0;
2740
2741	memset(&event, 0, sizeof event);
2742	event.event = RDMA_CM_EVENT_DEVICE_REMOVAL;
2743	return id_priv->id.event_handler(&id_priv->id, &event);
2744}
2745
2746static void cma_process_remove(struct cma_device *cma_dev)
2747{
2748	struct rdma_id_private *id_priv;
2749	int ret;
2750
2751	mutex_lock(&lock);
2752	while (!list_empty(&cma_dev->id_list)) {
2753		id_priv = list_entry(cma_dev->id_list.next,
2754				     struct rdma_id_private, list);
2755
2756		if (cma_internal_listen(id_priv)) {
2757			cma_destroy_listen(id_priv);
2758			continue;
2759		}
2760
2761		list_del_init(&id_priv->list);
2762		atomic_inc(&id_priv->refcount);
2763		mutex_unlock(&lock);
2764
2765		ret = cma_remove_id_dev(id_priv);
2766		cma_deref_id(id_priv);
2767		if (ret)
2768			rdma_destroy_id(&id_priv->id);
2769
2770		mutex_lock(&lock);
2771	}
2772	mutex_unlock(&lock);
2773
2774	cma_deref_dev(cma_dev);
2775	wait_for_completion(&cma_dev->comp);
2776}
2777
2778static void cma_remove_one(struct ib_device *device)
2779{
2780	struct cma_device *cma_dev;
2781
2782	cma_dev = ib_get_client_data(device, &cma_client);
2783	if (!cma_dev)
2784		return;
2785
2786	mutex_lock(&lock);
2787	list_del(&cma_dev->list);
2788	mutex_unlock(&lock);
2789
2790	cma_process_remove(cma_dev);
2791	kfree(cma_dev);
2792}
2793
2794static int cma_init(void)
2795{
2796	int ret;
2797
2798	get_random_bytes(&next_port, sizeof next_port);
2799	next_port = ((unsigned int) next_port %
2800		    (sysctl_local_port_range[1] - sysctl_local_port_range[0])) +
2801		    sysctl_local_port_range[0];
2802	cma_wq = create_singlethread_workqueue("rdma_cm");
2803	if (!cma_wq)
2804		return -ENOMEM;
2805
2806	ib_sa_register_client(&sa_client);
2807	rdma_addr_register_client(&addr_client);
2808
2809	ret = ib_register_client(&cma_client);
2810	if (ret)
2811		goto err;
2812	return 0;
2813
2814err:
2815	rdma_addr_unregister_client(&addr_client);
2816	ib_sa_unregister_client(&sa_client);
2817	destroy_workqueue(cma_wq);
2818	return ret;
2819}
2820
2821static void cma_cleanup(void)
2822{
2823	ib_unregister_client(&cma_client);
2824	rdma_addr_unregister_client(&addr_client);
2825	ib_sa_unregister_client(&sa_client);
2826	destroy_workqueue(cma_wq);
2827	idr_destroy(&sdp_ps);
2828	idr_destroy(&tcp_ps);
2829	idr_destroy(&udp_ps);
2830	idr_destroy(&ipoib_ps);
2831}
2832
2833module_init(cma_init);
2834module_exit(cma_cleanup);
2835