cma.c revision 4dd81e895655c59bd19d7a8f03a5de1310f4aeb6
1/*
2 * Copyright (c) 2005 Voltaire Inc.  All rights reserved.
3 * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved.
4 * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved.
5 * Copyright (c) 2005-2006 Intel Corporation.  All rights reserved.
6 *
7 * This software is available to you under a choice of one of two
8 * licenses.  You may choose to be licensed under the terms of the GNU
9 * General Public License (GPL) Version 2, available from the file
10 * COPYING in the main directory of this source tree, or the
11 * OpenIB.org BSD license below:
12 *
13 *     Redistribution and use in source and binary forms, with or
14 *     without modification, are permitted provided that the following
15 *     conditions are met:
16 *
17 *      - Redistributions of source code must retain the above
18 *        copyright notice, this list of conditions and the following
19 *        disclaimer.
20 *
21 *      - Redistributions in binary form must reproduce the above
22 *        copyright notice, this list of conditions and the following
23 *        disclaimer in the documentation and/or other materials
24 *        provided with the distribution.
25 *
26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 * SOFTWARE.
34 */
35
36#include <linux/completion.h>
37#include <linux/in.h>
38#include <linux/in6.h>
39#include <linux/mutex.h>
40#include <linux/random.h>
41#include <linux/idr.h>
42#include <linux/inetdevice.h>
43#include <linux/slab.h>
44#include <linux/module.h>
45#include <net/route.h>
46
47#include <net/tcp.h>
48#include <net/ipv6.h>
49
50#include <rdma/rdma_cm.h>
51#include <rdma/rdma_cm_ib.h>
52#include <rdma/rdma_netlink.h>
53#include <rdma/ib_cache.h>
54#include <rdma/ib_cm.h>
55#include <rdma/ib_sa.h>
56#include <rdma/iw_cm.h>
57
58MODULE_AUTHOR("Sean Hefty");
59MODULE_DESCRIPTION("Generic RDMA CM Agent");
60MODULE_LICENSE("Dual BSD/GPL");
61
62#define CMA_CM_RESPONSE_TIMEOUT 20
63#define CMA_MAX_CM_RETRIES 15
64#define CMA_CM_MRA_SETTING (IB_CM_MRA_FLAG_DELAY | 24)
65#define CMA_IBOE_PACKET_LIFETIME 18
66
67static void cma_add_one(struct ib_device *device);
68static void cma_remove_one(struct ib_device *device);
69
70static struct ib_client cma_client = {
71	.name   = "cma",
72	.add    = cma_add_one,
73	.remove = cma_remove_one
74};
75
76static struct ib_sa_client sa_client;
77static struct rdma_addr_client addr_client;
78static LIST_HEAD(dev_list);
79static LIST_HEAD(listen_any_list);
80static DEFINE_MUTEX(lock);
81static struct workqueue_struct *cma_wq;
82static DEFINE_IDR(sdp_ps);
83static DEFINE_IDR(tcp_ps);
84static DEFINE_IDR(udp_ps);
85static DEFINE_IDR(ipoib_ps);
86static DEFINE_IDR(ib_ps);
87
88struct cma_device {
89	struct list_head	list;
90	struct ib_device	*device;
91	struct completion	comp;
92	atomic_t		refcount;
93	struct list_head	id_list;
94};
95
96struct rdma_bind_list {
97	struct idr		*ps;
98	struct hlist_head	owners;
99	unsigned short		port;
100};
101
102/*
103 * Device removal can occur at anytime, so we need extra handling to
104 * serialize notifying the user of device removal with other callbacks.
105 * We do this by disabling removal notification while a callback is in process,
106 * and reporting it after the callback completes.
107 */
108struct rdma_id_private {
109	struct rdma_cm_id	id;
110
111	struct rdma_bind_list	*bind_list;
112	struct hlist_node	node;
113	struct list_head	list; /* listen_any_list or cma_device.list */
114	struct list_head	listen_list; /* per device listens */
115	struct cma_device	*cma_dev;
116	struct list_head	mc_list;
117
118	int			internal_id;
119	enum rdma_cm_state	state;
120	spinlock_t		lock;
121	struct mutex		qp_mutex;
122
123	struct completion	comp;
124	atomic_t		refcount;
125	struct mutex		handler_mutex;
126
127	int			backlog;
128	int			timeout_ms;
129	struct ib_sa_query	*query;
130	int			query_id;
131	union {
132		struct ib_cm_id	*ib;
133		struct iw_cm_id	*iw;
134	} cm_id;
135
136	u32			seq_num;
137	u32			qkey;
138	u32			qp_num;
139	pid_t			owner;
140	u8			srq;
141	u8			tos;
142	u8			reuseaddr;
143};
144
145struct cma_multicast {
146	struct rdma_id_private *id_priv;
147	union {
148		struct ib_sa_multicast *ib;
149	} multicast;
150	struct list_head	list;
151	void			*context;
152	struct sockaddr_storage	addr;
153	struct kref		mcref;
154};
155
156struct cma_work {
157	struct work_struct	work;
158	struct rdma_id_private	*id;
159	enum rdma_cm_state	old_state;
160	enum rdma_cm_state	new_state;
161	struct rdma_cm_event	event;
162};
163
164struct cma_ndev_work {
165	struct work_struct	work;
166	struct rdma_id_private	*id;
167	struct rdma_cm_event	event;
168};
169
170struct iboe_mcast_work {
171	struct work_struct	 work;
172	struct rdma_id_private	*id;
173	struct cma_multicast	*mc;
174};
175
176union cma_ip_addr {
177	struct in6_addr ip6;
178	struct {
179		__be32 pad[3];
180		__be32 addr;
181	} ip4;
182};
183
184struct cma_hdr {
185	u8 cma_version;
186	u8 ip_version;	/* IP version: 7:4 */
187	__be16 port;
188	union cma_ip_addr src_addr;
189	union cma_ip_addr dst_addr;
190};
191
192struct sdp_hh {
193	u8 bsdh[16];
194	u8 sdp_version; /* Major version: 7:4 */
195	u8 ip_version;	/* IP version: 7:4 */
196	u8 sdp_specific1[10];
197	__be16 port;
198	__be16 sdp_specific2;
199	union cma_ip_addr src_addr;
200	union cma_ip_addr dst_addr;
201};
202
203struct sdp_hah {
204	u8 bsdh[16];
205	u8 sdp_version;
206};
207
208#define CMA_VERSION 0x00
209#define SDP_MAJ_VERSION 0x2
210
211static int cma_comp(struct rdma_id_private *id_priv, enum rdma_cm_state comp)
212{
213	unsigned long flags;
214	int ret;
215
216	spin_lock_irqsave(&id_priv->lock, flags);
217	ret = (id_priv->state == comp);
218	spin_unlock_irqrestore(&id_priv->lock, flags);
219	return ret;
220}
221
222static int cma_comp_exch(struct rdma_id_private *id_priv,
223			 enum rdma_cm_state comp, enum rdma_cm_state exch)
224{
225	unsigned long flags;
226	int ret;
227
228	spin_lock_irqsave(&id_priv->lock, flags);
229	if ((ret = (id_priv->state == comp)))
230		id_priv->state = exch;
231	spin_unlock_irqrestore(&id_priv->lock, flags);
232	return ret;
233}
234
235static enum rdma_cm_state cma_exch(struct rdma_id_private *id_priv,
236				   enum rdma_cm_state exch)
237{
238	unsigned long flags;
239	enum rdma_cm_state old;
240
241	spin_lock_irqsave(&id_priv->lock, flags);
242	old = id_priv->state;
243	id_priv->state = exch;
244	spin_unlock_irqrestore(&id_priv->lock, flags);
245	return old;
246}
247
248static inline u8 cma_get_ip_ver(struct cma_hdr *hdr)
249{
250	return hdr->ip_version >> 4;
251}
252
253static inline void cma_set_ip_ver(struct cma_hdr *hdr, u8 ip_ver)
254{
255	hdr->ip_version = (ip_ver << 4) | (hdr->ip_version & 0xF);
256}
257
258static inline u8 sdp_get_majv(u8 sdp_version)
259{
260	return sdp_version >> 4;
261}
262
263static inline u8 sdp_get_ip_ver(struct sdp_hh *hh)
264{
265	return hh->ip_version >> 4;
266}
267
268static inline void sdp_set_ip_ver(struct sdp_hh *hh, u8 ip_ver)
269{
270	hh->ip_version = (ip_ver << 4) | (hh->ip_version & 0xF);
271}
272
273static void cma_attach_to_dev(struct rdma_id_private *id_priv,
274			      struct cma_device *cma_dev)
275{
276	atomic_inc(&cma_dev->refcount);
277	id_priv->cma_dev = cma_dev;
278	id_priv->id.device = cma_dev->device;
279	id_priv->id.route.addr.dev_addr.transport =
280		rdma_node_get_transport(cma_dev->device->node_type);
281	list_add_tail(&id_priv->list, &cma_dev->id_list);
282}
283
284static inline void cma_deref_dev(struct cma_device *cma_dev)
285{
286	if (atomic_dec_and_test(&cma_dev->refcount))
287		complete(&cma_dev->comp);
288}
289
290static inline void release_mc(struct kref *kref)
291{
292	struct cma_multicast *mc = container_of(kref, struct cma_multicast, mcref);
293
294	kfree(mc->multicast.ib);
295	kfree(mc);
296}
297
298static void cma_release_dev(struct rdma_id_private *id_priv)
299{
300	mutex_lock(&lock);
301	list_del(&id_priv->list);
302	cma_deref_dev(id_priv->cma_dev);
303	id_priv->cma_dev = NULL;
304	mutex_unlock(&lock);
305}
306
307static int cma_set_qkey(struct rdma_id_private *id_priv)
308{
309	struct ib_sa_mcmember_rec rec;
310	int ret = 0;
311
312	if (id_priv->qkey)
313		return 0;
314
315	switch (id_priv->id.ps) {
316	case RDMA_PS_UDP:
317		id_priv->qkey = RDMA_UDP_QKEY;
318		break;
319	case RDMA_PS_IPOIB:
320		ib_addr_get_mgid(&id_priv->id.route.addr.dev_addr, &rec.mgid);
321		ret = ib_sa_get_mcmember_rec(id_priv->id.device,
322					     id_priv->id.port_num, &rec.mgid,
323					     &rec);
324		if (!ret)
325			id_priv->qkey = be32_to_cpu(rec.qkey);
326		break;
327	default:
328		break;
329	}
330	return ret;
331}
332
333static int find_gid_port(struct ib_device *device, union ib_gid *gid, u8 port_num)
334{
335	int i;
336	int err;
337	struct ib_port_attr props;
338	union ib_gid tmp;
339
340	err = ib_query_port(device, port_num, &props);
341	if (err)
342		return 1;
343
344	for (i = 0; i < props.gid_tbl_len; ++i) {
345		err = ib_query_gid(device, port_num, i, &tmp);
346		if (err)
347			return 1;
348		if (!memcmp(&tmp, gid, sizeof tmp))
349			return 0;
350	}
351
352	return -EAGAIN;
353}
354
355static int cma_acquire_dev(struct rdma_id_private *id_priv)
356{
357	struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
358	struct cma_device *cma_dev;
359	union ib_gid gid, iboe_gid;
360	int ret = -ENODEV;
361	u8 port;
362	enum rdma_link_layer dev_ll = dev_addr->dev_type == ARPHRD_INFINIBAND ?
363		IB_LINK_LAYER_INFINIBAND : IB_LINK_LAYER_ETHERNET;
364
365	if (dev_ll != IB_LINK_LAYER_INFINIBAND &&
366	    id_priv->id.ps == RDMA_PS_IPOIB)
367		return -EINVAL;
368
369	mutex_lock(&lock);
370	iboe_addr_get_sgid(dev_addr, &iboe_gid);
371	memcpy(&gid, dev_addr->src_dev_addr +
372	       rdma_addr_gid_offset(dev_addr), sizeof gid);
373	list_for_each_entry(cma_dev, &dev_list, list) {
374		for (port = 1; port <= cma_dev->device->phys_port_cnt; ++port) {
375			if (rdma_port_get_link_layer(cma_dev->device, port) == dev_ll) {
376				if (rdma_node_get_transport(cma_dev->device->node_type) == RDMA_TRANSPORT_IB &&
377				    rdma_port_get_link_layer(cma_dev->device, port) == IB_LINK_LAYER_ETHERNET)
378					ret = find_gid_port(cma_dev->device, &iboe_gid, port);
379				else
380					ret = find_gid_port(cma_dev->device, &gid, port);
381
382				if (!ret) {
383					id_priv->id.port_num = port;
384					goto out;
385				} else if (ret == 1)
386					break;
387			}
388		}
389	}
390
391out:
392	if (!ret)
393		cma_attach_to_dev(id_priv, cma_dev);
394
395	mutex_unlock(&lock);
396	return ret;
397}
398
399static void cma_deref_id(struct rdma_id_private *id_priv)
400{
401	if (atomic_dec_and_test(&id_priv->refcount))
402		complete(&id_priv->comp);
403}
404
405static int cma_disable_callback(struct rdma_id_private *id_priv,
406				enum rdma_cm_state state)
407{
408	mutex_lock(&id_priv->handler_mutex);
409	if (id_priv->state != state) {
410		mutex_unlock(&id_priv->handler_mutex);
411		return -EINVAL;
412	}
413	return 0;
414}
415
416struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler,
417				  void *context, enum rdma_port_space ps,
418				  enum ib_qp_type qp_type)
419{
420	struct rdma_id_private *id_priv;
421
422	id_priv = kzalloc(sizeof *id_priv, GFP_KERNEL);
423	if (!id_priv)
424		return ERR_PTR(-ENOMEM);
425
426	id_priv->owner = task_pid_nr(current);
427	id_priv->state = RDMA_CM_IDLE;
428	id_priv->id.context = context;
429	id_priv->id.event_handler = event_handler;
430	id_priv->id.ps = ps;
431	id_priv->id.qp_type = qp_type;
432	spin_lock_init(&id_priv->lock);
433	mutex_init(&id_priv->qp_mutex);
434	init_completion(&id_priv->comp);
435	atomic_set(&id_priv->refcount, 1);
436	mutex_init(&id_priv->handler_mutex);
437	INIT_LIST_HEAD(&id_priv->listen_list);
438	INIT_LIST_HEAD(&id_priv->mc_list);
439	get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num);
440
441	return &id_priv->id;
442}
443EXPORT_SYMBOL(rdma_create_id);
444
445static int cma_init_ud_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
446{
447	struct ib_qp_attr qp_attr;
448	int qp_attr_mask, ret;
449
450	qp_attr.qp_state = IB_QPS_INIT;
451	ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
452	if (ret)
453		return ret;
454
455	ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask);
456	if (ret)
457		return ret;
458
459	qp_attr.qp_state = IB_QPS_RTR;
460	ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE);
461	if (ret)
462		return ret;
463
464	qp_attr.qp_state = IB_QPS_RTS;
465	qp_attr.sq_psn = 0;
466	ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE | IB_QP_SQ_PSN);
467
468	return ret;
469}
470
471static int cma_init_conn_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
472{
473	struct ib_qp_attr qp_attr;
474	int qp_attr_mask, ret;
475
476	qp_attr.qp_state = IB_QPS_INIT;
477	ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
478	if (ret)
479		return ret;
480
481	return ib_modify_qp(qp, &qp_attr, qp_attr_mask);
482}
483
484int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd,
485		   struct ib_qp_init_attr *qp_init_attr)
486{
487	struct rdma_id_private *id_priv;
488	struct ib_qp *qp;
489	int ret;
490
491	id_priv = container_of(id, struct rdma_id_private, id);
492	if (id->device != pd->device)
493		return -EINVAL;
494
495	qp = ib_create_qp(pd, qp_init_attr);
496	if (IS_ERR(qp))
497		return PTR_ERR(qp);
498
499	if (id->qp_type == IB_QPT_UD)
500		ret = cma_init_ud_qp(id_priv, qp);
501	else
502		ret = cma_init_conn_qp(id_priv, qp);
503	if (ret)
504		goto err;
505
506	id->qp = qp;
507	id_priv->qp_num = qp->qp_num;
508	id_priv->srq = (qp->srq != NULL);
509	return 0;
510err:
511	ib_destroy_qp(qp);
512	return ret;
513}
514EXPORT_SYMBOL(rdma_create_qp);
515
516void rdma_destroy_qp(struct rdma_cm_id *id)
517{
518	struct rdma_id_private *id_priv;
519
520	id_priv = container_of(id, struct rdma_id_private, id);
521	mutex_lock(&id_priv->qp_mutex);
522	ib_destroy_qp(id_priv->id.qp);
523	id_priv->id.qp = NULL;
524	mutex_unlock(&id_priv->qp_mutex);
525}
526EXPORT_SYMBOL(rdma_destroy_qp);
527
528static int cma_modify_qp_rtr(struct rdma_id_private *id_priv,
529			     struct rdma_conn_param *conn_param)
530{
531	struct ib_qp_attr qp_attr;
532	int qp_attr_mask, ret;
533
534	mutex_lock(&id_priv->qp_mutex);
535	if (!id_priv->id.qp) {
536		ret = 0;
537		goto out;
538	}
539
540	/* Need to update QP attributes from default values. */
541	qp_attr.qp_state = IB_QPS_INIT;
542	ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
543	if (ret)
544		goto out;
545
546	ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask);
547	if (ret)
548		goto out;
549
550	qp_attr.qp_state = IB_QPS_RTR;
551	ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
552	if (ret)
553		goto out;
554
555	if (conn_param)
556		qp_attr.max_dest_rd_atomic = conn_param->responder_resources;
557	ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask);
558out:
559	mutex_unlock(&id_priv->qp_mutex);
560	return ret;
561}
562
563static int cma_modify_qp_rts(struct rdma_id_private *id_priv,
564			     struct rdma_conn_param *conn_param)
565{
566	struct ib_qp_attr qp_attr;
567	int qp_attr_mask, ret;
568
569	mutex_lock(&id_priv->qp_mutex);
570	if (!id_priv->id.qp) {
571		ret = 0;
572		goto out;
573	}
574
575	qp_attr.qp_state = IB_QPS_RTS;
576	ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
577	if (ret)
578		goto out;
579
580	if (conn_param)
581		qp_attr.max_rd_atomic = conn_param->initiator_depth;
582	ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask);
583out:
584	mutex_unlock(&id_priv->qp_mutex);
585	return ret;
586}
587
588static int cma_modify_qp_err(struct rdma_id_private *id_priv)
589{
590	struct ib_qp_attr qp_attr;
591	int ret;
592
593	mutex_lock(&id_priv->qp_mutex);
594	if (!id_priv->id.qp) {
595		ret = 0;
596		goto out;
597	}
598
599	qp_attr.qp_state = IB_QPS_ERR;
600	ret = ib_modify_qp(id_priv->id.qp, &qp_attr, IB_QP_STATE);
601out:
602	mutex_unlock(&id_priv->qp_mutex);
603	return ret;
604}
605
606static int cma_ib_init_qp_attr(struct rdma_id_private *id_priv,
607			       struct ib_qp_attr *qp_attr, int *qp_attr_mask)
608{
609	struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
610	int ret;
611	u16 pkey;
612
613	if (rdma_port_get_link_layer(id_priv->id.device, id_priv->id.port_num) ==
614	    IB_LINK_LAYER_INFINIBAND)
615		pkey = ib_addr_get_pkey(dev_addr);
616	else
617		pkey = 0xffff;
618
619	ret = ib_find_cached_pkey(id_priv->id.device, id_priv->id.port_num,
620				  pkey, &qp_attr->pkey_index);
621	if (ret)
622		return ret;
623
624	qp_attr->port_num = id_priv->id.port_num;
625	*qp_attr_mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT;
626
627	if (id_priv->id.qp_type == IB_QPT_UD) {
628		ret = cma_set_qkey(id_priv);
629		if (ret)
630			return ret;
631
632		qp_attr->qkey = id_priv->qkey;
633		*qp_attr_mask |= IB_QP_QKEY;
634	} else {
635		qp_attr->qp_access_flags = 0;
636		*qp_attr_mask |= IB_QP_ACCESS_FLAGS;
637	}
638	return 0;
639}
640
641int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr,
642		       int *qp_attr_mask)
643{
644	struct rdma_id_private *id_priv;
645	int ret = 0;
646
647	id_priv = container_of(id, struct rdma_id_private, id);
648	switch (rdma_node_get_transport(id_priv->id.device->node_type)) {
649	case RDMA_TRANSPORT_IB:
650		if (!id_priv->cm_id.ib || (id_priv->id.qp_type == IB_QPT_UD))
651			ret = cma_ib_init_qp_attr(id_priv, qp_attr, qp_attr_mask);
652		else
653			ret = ib_cm_init_qp_attr(id_priv->cm_id.ib, qp_attr,
654						 qp_attr_mask);
655		if (qp_attr->qp_state == IB_QPS_RTR)
656			qp_attr->rq_psn = id_priv->seq_num;
657		break;
658	case RDMA_TRANSPORT_IWARP:
659		if (!id_priv->cm_id.iw) {
660			qp_attr->qp_access_flags = 0;
661			*qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS;
662		} else
663			ret = iw_cm_init_qp_attr(id_priv->cm_id.iw, qp_attr,
664						 qp_attr_mask);
665		break;
666	default:
667		ret = -ENOSYS;
668		break;
669	}
670
671	return ret;
672}
673EXPORT_SYMBOL(rdma_init_qp_attr);
674
675static inline int cma_zero_addr(struct sockaddr *addr)
676{
677	struct in6_addr *ip6;
678
679	if (addr->sa_family == AF_INET)
680		return ipv4_is_zeronet(
681			((struct sockaddr_in *)addr)->sin_addr.s_addr);
682	else {
683		ip6 = &((struct sockaddr_in6 *) addr)->sin6_addr;
684		return (ip6->s6_addr32[0] | ip6->s6_addr32[1] |
685			ip6->s6_addr32[2] | ip6->s6_addr32[3]) == 0;
686	}
687}
688
689static inline int cma_loopback_addr(struct sockaddr *addr)
690{
691	if (addr->sa_family == AF_INET)
692		return ipv4_is_loopback(
693			((struct sockaddr_in *) addr)->sin_addr.s_addr);
694	else
695		return ipv6_addr_loopback(
696			&((struct sockaddr_in6 *) addr)->sin6_addr);
697}
698
699static inline int cma_any_addr(struct sockaddr *addr)
700{
701	return cma_zero_addr(addr) || cma_loopback_addr(addr);
702}
703
704static int cma_addr_cmp(struct sockaddr *src, struct sockaddr *dst)
705{
706	if (src->sa_family != dst->sa_family)
707		return -1;
708
709	switch (src->sa_family) {
710	case AF_INET:
711		return ((struct sockaddr_in *) src)->sin_addr.s_addr !=
712		       ((struct sockaddr_in *) dst)->sin_addr.s_addr;
713	default:
714		return ipv6_addr_cmp(&((struct sockaddr_in6 *) src)->sin6_addr,
715				     &((struct sockaddr_in6 *) dst)->sin6_addr);
716	}
717}
718
719static inline __be16 cma_port(struct sockaddr *addr)
720{
721	if (addr->sa_family == AF_INET)
722		return ((struct sockaddr_in *) addr)->sin_port;
723	else
724		return ((struct sockaddr_in6 *) addr)->sin6_port;
725}
726
727static inline int cma_any_port(struct sockaddr *addr)
728{
729	return !cma_port(addr);
730}
731
732static int cma_get_net_info(void *hdr, enum rdma_port_space ps,
733			    u8 *ip_ver, __be16 *port,
734			    union cma_ip_addr **src, union cma_ip_addr **dst)
735{
736	switch (ps) {
737	case RDMA_PS_SDP:
738		if (sdp_get_majv(((struct sdp_hh *) hdr)->sdp_version) !=
739		    SDP_MAJ_VERSION)
740			return -EINVAL;
741
742		*ip_ver	= sdp_get_ip_ver(hdr);
743		*port	= ((struct sdp_hh *) hdr)->port;
744		*src	= &((struct sdp_hh *) hdr)->src_addr;
745		*dst	= &((struct sdp_hh *) hdr)->dst_addr;
746		break;
747	default:
748		if (((struct cma_hdr *) hdr)->cma_version != CMA_VERSION)
749			return -EINVAL;
750
751		*ip_ver	= cma_get_ip_ver(hdr);
752		*port	= ((struct cma_hdr *) hdr)->port;
753		*src	= &((struct cma_hdr *) hdr)->src_addr;
754		*dst	= &((struct cma_hdr *) hdr)->dst_addr;
755		break;
756	}
757
758	if (*ip_ver != 4 && *ip_ver != 6)
759		return -EINVAL;
760	return 0;
761}
762
763static void cma_save_net_info(struct rdma_addr *addr,
764			      struct rdma_addr *listen_addr,
765			      u8 ip_ver, __be16 port,
766			      union cma_ip_addr *src, union cma_ip_addr *dst)
767{
768	struct sockaddr_in *listen4, *ip4;
769	struct sockaddr_in6 *listen6, *ip6;
770
771	switch (ip_ver) {
772	case 4:
773		listen4 = (struct sockaddr_in *) &listen_addr->src_addr;
774		ip4 = (struct sockaddr_in *) &addr->src_addr;
775		ip4->sin_family = listen4->sin_family;
776		ip4->sin_addr.s_addr = dst->ip4.addr;
777		ip4->sin_port = listen4->sin_port;
778
779		ip4 = (struct sockaddr_in *) &addr->dst_addr;
780		ip4->sin_family = listen4->sin_family;
781		ip4->sin_addr.s_addr = src->ip4.addr;
782		ip4->sin_port = port;
783		break;
784	case 6:
785		listen6 = (struct sockaddr_in6 *) &listen_addr->src_addr;
786		ip6 = (struct sockaddr_in6 *) &addr->src_addr;
787		ip6->sin6_family = listen6->sin6_family;
788		ip6->sin6_addr = dst->ip6;
789		ip6->sin6_port = listen6->sin6_port;
790
791		ip6 = (struct sockaddr_in6 *) &addr->dst_addr;
792		ip6->sin6_family = listen6->sin6_family;
793		ip6->sin6_addr = src->ip6;
794		ip6->sin6_port = port;
795		break;
796	default:
797		break;
798	}
799}
800
801static inline int cma_user_data_offset(enum rdma_port_space ps)
802{
803	switch (ps) {
804	case RDMA_PS_SDP:
805		return 0;
806	default:
807		return sizeof(struct cma_hdr);
808	}
809}
810
811static void cma_cancel_route(struct rdma_id_private *id_priv)
812{
813	switch (rdma_port_get_link_layer(id_priv->id.device, id_priv->id.port_num)) {
814	case IB_LINK_LAYER_INFINIBAND:
815		if (id_priv->query)
816			ib_sa_cancel_query(id_priv->query_id, id_priv->query);
817		break;
818	default:
819		break;
820	}
821}
822
823static void cma_cancel_listens(struct rdma_id_private *id_priv)
824{
825	struct rdma_id_private *dev_id_priv;
826
827	/*
828	 * Remove from listen_any_list to prevent added devices from spawning
829	 * additional listen requests.
830	 */
831	mutex_lock(&lock);
832	list_del(&id_priv->list);
833
834	while (!list_empty(&id_priv->listen_list)) {
835		dev_id_priv = list_entry(id_priv->listen_list.next,
836					 struct rdma_id_private, listen_list);
837		/* sync with device removal to avoid duplicate destruction */
838		list_del_init(&dev_id_priv->list);
839		list_del(&dev_id_priv->listen_list);
840		mutex_unlock(&lock);
841
842		rdma_destroy_id(&dev_id_priv->id);
843		mutex_lock(&lock);
844	}
845	mutex_unlock(&lock);
846}
847
848static void cma_cancel_operation(struct rdma_id_private *id_priv,
849				 enum rdma_cm_state state)
850{
851	switch (state) {
852	case RDMA_CM_ADDR_QUERY:
853		rdma_addr_cancel(&id_priv->id.route.addr.dev_addr);
854		break;
855	case RDMA_CM_ROUTE_QUERY:
856		cma_cancel_route(id_priv);
857		break;
858	case RDMA_CM_LISTEN:
859		if (cma_any_addr((struct sockaddr *) &id_priv->id.route.addr.src_addr)
860				&& !id_priv->cma_dev)
861			cma_cancel_listens(id_priv);
862		break;
863	default:
864		break;
865	}
866}
867
868static void cma_release_port(struct rdma_id_private *id_priv)
869{
870	struct rdma_bind_list *bind_list = id_priv->bind_list;
871
872	if (!bind_list)
873		return;
874
875	mutex_lock(&lock);
876	hlist_del(&id_priv->node);
877	if (hlist_empty(&bind_list->owners)) {
878		idr_remove(bind_list->ps, bind_list->port);
879		kfree(bind_list);
880	}
881	mutex_unlock(&lock);
882}
883
884static void cma_leave_mc_groups(struct rdma_id_private *id_priv)
885{
886	struct cma_multicast *mc;
887
888	while (!list_empty(&id_priv->mc_list)) {
889		mc = container_of(id_priv->mc_list.next,
890				  struct cma_multicast, list);
891		list_del(&mc->list);
892		switch (rdma_port_get_link_layer(id_priv->cma_dev->device, id_priv->id.port_num)) {
893		case IB_LINK_LAYER_INFINIBAND:
894			ib_sa_free_multicast(mc->multicast.ib);
895			kfree(mc);
896			break;
897		case IB_LINK_LAYER_ETHERNET:
898			kref_put(&mc->mcref, release_mc);
899			break;
900		default:
901			break;
902		}
903	}
904}
905
906void rdma_destroy_id(struct rdma_cm_id *id)
907{
908	struct rdma_id_private *id_priv;
909	enum rdma_cm_state state;
910
911	id_priv = container_of(id, struct rdma_id_private, id);
912	state = cma_exch(id_priv, RDMA_CM_DESTROYING);
913	cma_cancel_operation(id_priv, state);
914
915	/*
916	 * Wait for any active callback to finish.  New callbacks will find
917	 * the id_priv state set to destroying and abort.
918	 */
919	mutex_lock(&id_priv->handler_mutex);
920	mutex_unlock(&id_priv->handler_mutex);
921
922	if (id_priv->cma_dev) {
923		switch (rdma_node_get_transport(id_priv->id.device->node_type)) {
924		case RDMA_TRANSPORT_IB:
925			if (id_priv->cm_id.ib)
926				ib_destroy_cm_id(id_priv->cm_id.ib);
927			break;
928		case RDMA_TRANSPORT_IWARP:
929			if (id_priv->cm_id.iw)
930				iw_destroy_cm_id(id_priv->cm_id.iw);
931			break;
932		default:
933			break;
934		}
935		cma_leave_mc_groups(id_priv);
936		cma_release_dev(id_priv);
937	}
938
939	cma_release_port(id_priv);
940	cma_deref_id(id_priv);
941	wait_for_completion(&id_priv->comp);
942
943	if (id_priv->internal_id)
944		cma_deref_id(id_priv->id.context);
945
946	kfree(id_priv->id.route.path_rec);
947	kfree(id_priv);
948}
949EXPORT_SYMBOL(rdma_destroy_id);
950
951static int cma_rep_recv(struct rdma_id_private *id_priv)
952{
953	int ret;
954
955	ret = cma_modify_qp_rtr(id_priv, NULL);
956	if (ret)
957		goto reject;
958
959	ret = cma_modify_qp_rts(id_priv, NULL);
960	if (ret)
961		goto reject;
962
963	ret = ib_send_cm_rtu(id_priv->cm_id.ib, NULL, 0);
964	if (ret)
965		goto reject;
966
967	return 0;
968reject:
969	cma_modify_qp_err(id_priv);
970	ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED,
971		       NULL, 0, NULL, 0);
972	return ret;
973}
974
975static int cma_verify_rep(struct rdma_id_private *id_priv, void *data)
976{
977	if (id_priv->id.ps == RDMA_PS_SDP &&
978	    sdp_get_majv(((struct sdp_hah *) data)->sdp_version) !=
979	    SDP_MAJ_VERSION)
980		return -EINVAL;
981
982	return 0;
983}
984
985static void cma_set_rep_event_data(struct rdma_cm_event *event,
986				   struct ib_cm_rep_event_param *rep_data,
987				   void *private_data)
988{
989	event->param.conn.private_data = private_data;
990	event->param.conn.private_data_len = IB_CM_REP_PRIVATE_DATA_SIZE;
991	event->param.conn.responder_resources = rep_data->responder_resources;
992	event->param.conn.initiator_depth = rep_data->initiator_depth;
993	event->param.conn.flow_control = rep_data->flow_control;
994	event->param.conn.rnr_retry_count = rep_data->rnr_retry_count;
995	event->param.conn.srq = rep_data->srq;
996	event->param.conn.qp_num = rep_data->remote_qpn;
997}
998
999static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
1000{
1001	struct rdma_id_private *id_priv = cm_id->context;
1002	struct rdma_cm_event event;
1003	int ret = 0;
1004
1005	if ((ib_event->event != IB_CM_TIMEWAIT_EXIT &&
1006		cma_disable_callback(id_priv, RDMA_CM_CONNECT)) ||
1007	    (ib_event->event == IB_CM_TIMEWAIT_EXIT &&
1008		cma_disable_callback(id_priv, RDMA_CM_DISCONNECT)))
1009		return 0;
1010
1011	memset(&event, 0, sizeof event);
1012	switch (ib_event->event) {
1013	case IB_CM_REQ_ERROR:
1014	case IB_CM_REP_ERROR:
1015		event.event = RDMA_CM_EVENT_UNREACHABLE;
1016		event.status = -ETIMEDOUT;
1017		break;
1018	case IB_CM_REP_RECEIVED:
1019		event.status = cma_verify_rep(id_priv, ib_event->private_data);
1020		if (event.status)
1021			event.event = RDMA_CM_EVENT_CONNECT_ERROR;
1022		else if (id_priv->id.qp && id_priv->id.ps != RDMA_PS_SDP) {
1023			event.status = cma_rep_recv(id_priv);
1024			event.event = event.status ? RDMA_CM_EVENT_CONNECT_ERROR :
1025						     RDMA_CM_EVENT_ESTABLISHED;
1026		} else
1027			event.event = RDMA_CM_EVENT_CONNECT_RESPONSE;
1028		cma_set_rep_event_data(&event, &ib_event->param.rep_rcvd,
1029				       ib_event->private_data);
1030		break;
1031	case IB_CM_RTU_RECEIVED:
1032	case IB_CM_USER_ESTABLISHED:
1033		event.event = RDMA_CM_EVENT_ESTABLISHED;
1034		break;
1035	case IB_CM_DREQ_ERROR:
1036		event.status = -ETIMEDOUT; /* fall through */
1037	case IB_CM_DREQ_RECEIVED:
1038	case IB_CM_DREP_RECEIVED:
1039		if (!cma_comp_exch(id_priv, RDMA_CM_CONNECT,
1040				   RDMA_CM_DISCONNECT))
1041			goto out;
1042		event.event = RDMA_CM_EVENT_DISCONNECTED;
1043		break;
1044	case IB_CM_TIMEWAIT_EXIT:
1045		event.event = RDMA_CM_EVENT_TIMEWAIT_EXIT;
1046		break;
1047	case IB_CM_MRA_RECEIVED:
1048		/* ignore event */
1049		goto out;
1050	case IB_CM_REJ_RECEIVED:
1051		cma_modify_qp_err(id_priv);
1052		event.status = ib_event->param.rej_rcvd.reason;
1053		event.event = RDMA_CM_EVENT_REJECTED;
1054		event.param.conn.private_data = ib_event->private_data;
1055		event.param.conn.private_data_len = IB_CM_REJ_PRIVATE_DATA_SIZE;
1056		break;
1057	default:
1058		printk(KERN_ERR "RDMA CMA: unexpected IB CM event: %d\n",
1059		       ib_event->event);
1060		goto out;
1061	}
1062
1063	ret = id_priv->id.event_handler(&id_priv->id, &event);
1064	if (ret) {
1065		/* Destroy the CM ID by returning a non-zero value. */
1066		id_priv->cm_id.ib = NULL;
1067		cma_exch(id_priv, RDMA_CM_DESTROYING);
1068		mutex_unlock(&id_priv->handler_mutex);
1069		rdma_destroy_id(&id_priv->id);
1070		return ret;
1071	}
1072out:
1073	mutex_unlock(&id_priv->handler_mutex);
1074	return ret;
1075}
1076
1077static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id,
1078					       struct ib_cm_event *ib_event)
1079{
1080	struct rdma_id_private *id_priv;
1081	struct rdma_cm_id *id;
1082	struct rdma_route *rt;
1083	union cma_ip_addr *src, *dst;
1084	__be16 port;
1085	u8 ip_ver;
1086	int ret;
1087
1088	if (cma_get_net_info(ib_event->private_data, listen_id->ps,
1089			     &ip_ver, &port, &src, &dst))
1090		return NULL;
1091
1092	id = rdma_create_id(listen_id->event_handler, listen_id->context,
1093			    listen_id->ps, ib_event->param.req_rcvd.qp_type);
1094	if (IS_ERR(id))
1095		return NULL;
1096
1097	cma_save_net_info(&id->route.addr, &listen_id->route.addr,
1098			  ip_ver, port, src, dst);
1099
1100	rt = &id->route;
1101	rt->num_paths = ib_event->param.req_rcvd.alternate_path ? 2 : 1;
1102	rt->path_rec = kmalloc(sizeof *rt->path_rec * rt->num_paths,
1103			       GFP_KERNEL);
1104	if (!rt->path_rec)
1105		goto err;
1106
1107	rt->path_rec[0] = *ib_event->param.req_rcvd.primary_path;
1108	if (rt->num_paths == 2)
1109		rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path;
1110
1111	if (cma_any_addr((struct sockaddr *) &rt->addr.src_addr)) {
1112		rt->addr.dev_addr.dev_type = ARPHRD_INFINIBAND;
1113		rdma_addr_set_sgid(&rt->addr.dev_addr, &rt->path_rec[0].sgid);
1114		ib_addr_set_pkey(&rt->addr.dev_addr, be16_to_cpu(rt->path_rec[0].pkey));
1115	} else {
1116		ret = rdma_translate_ip((struct sockaddr *) &rt->addr.src_addr,
1117					&rt->addr.dev_addr);
1118		if (ret)
1119			goto err;
1120	}
1121	rdma_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid);
1122
1123	id_priv = container_of(id, struct rdma_id_private, id);
1124	id_priv->state = RDMA_CM_CONNECT;
1125	return id_priv;
1126
1127err:
1128	rdma_destroy_id(id);
1129	return NULL;
1130}
1131
1132static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id,
1133					      struct ib_cm_event *ib_event)
1134{
1135	struct rdma_id_private *id_priv;
1136	struct rdma_cm_id *id;
1137	union cma_ip_addr *src, *dst;
1138	__be16 port;
1139	u8 ip_ver;
1140	int ret;
1141
1142	id = rdma_create_id(listen_id->event_handler, listen_id->context,
1143			    listen_id->ps, IB_QPT_UD);
1144	if (IS_ERR(id))
1145		return NULL;
1146
1147
1148	if (cma_get_net_info(ib_event->private_data, listen_id->ps,
1149			     &ip_ver, &port, &src, &dst))
1150		goto err;
1151
1152	cma_save_net_info(&id->route.addr, &listen_id->route.addr,
1153			  ip_ver, port, src, dst);
1154
1155	if (!cma_any_addr((struct sockaddr *) &id->route.addr.src_addr)) {
1156		ret = rdma_translate_ip((struct sockaddr *) &id->route.addr.src_addr,
1157					&id->route.addr.dev_addr);
1158		if (ret)
1159			goto err;
1160	}
1161
1162	id_priv = container_of(id, struct rdma_id_private, id);
1163	id_priv->state = RDMA_CM_CONNECT;
1164	return id_priv;
1165err:
1166	rdma_destroy_id(id);
1167	return NULL;
1168}
1169
1170static void cma_set_req_event_data(struct rdma_cm_event *event,
1171				   struct ib_cm_req_event_param *req_data,
1172				   void *private_data, int offset)
1173{
1174	event->param.conn.private_data = private_data + offset;
1175	event->param.conn.private_data_len = IB_CM_REQ_PRIVATE_DATA_SIZE - offset;
1176	event->param.conn.responder_resources = req_data->responder_resources;
1177	event->param.conn.initiator_depth = req_data->initiator_depth;
1178	event->param.conn.flow_control = req_data->flow_control;
1179	event->param.conn.retry_count = req_data->retry_count;
1180	event->param.conn.rnr_retry_count = req_data->rnr_retry_count;
1181	event->param.conn.srq = req_data->srq;
1182	event->param.conn.qp_num = req_data->remote_qpn;
1183}
1184
1185static int cma_check_req_qp_type(struct rdma_cm_id *id, struct ib_cm_event *ib_event)
1186{
1187	return (((ib_event->event == IB_CM_REQ_RECEIVED) &&
1188		 (ib_event->param.req_rcvd.qp_type == id->qp_type)) ||
1189		((ib_event->event == IB_CM_SIDR_REQ_RECEIVED) &&
1190		 (id->qp_type == IB_QPT_UD)) ||
1191		(!id->qp_type));
1192}
1193
1194static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
1195{
1196	struct rdma_id_private *listen_id, *conn_id;
1197	struct rdma_cm_event event;
1198	int offset, ret;
1199
1200	listen_id = cm_id->context;
1201	if (!cma_check_req_qp_type(&listen_id->id, ib_event))
1202		return -EINVAL;
1203
1204	if (cma_disable_callback(listen_id, RDMA_CM_LISTEN))
1205		return -ECONNABORTED;
1206
1207	memset(&event, 0, sizeof event);
1208	offset = cma_user_data_offset(listen_id->id.ps);
1209	event.event = RDMA_CM_EVENT_CONNECT_REQUEST;
1210	if (ib_event->event == IB_CM_SIDR_REQ_RECEIVED) {
1211		conn_id = cma_new_udp_id(&listen_id->id, ib_event);
1212		event.param.ud.private_data = ib_event->private_data + offset;
1213		event.param.ud.private_data_len =
1214				IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE - offset;
1215	} else {
1216		conn_id = cma_new_conn_id(&listen_id->id, ib_event);
1217		cma_set_req_event_data(&event, &ib_event->param.req_rcvd,
1218				       ib_event->private_data, offset);
1219	}
1220	if (!conn_id) {
1221		ret = -ENOMEM;
1222		goto err1;
1223	}
1224
1225	mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING);
1226	ret = cma_acquire_dev(conn_id);
1227	if (ret)
1228		goto err2;
1229
1230	conn_id->cm_id.ib = cm_id;
1231	cm_id->context = conn_id;
1232	cm_id->cm_handler = cma_ib_handler;
1233
1234	/*
1235	 * Protect against the user destroying conn_id from another thread
1236	 * until we're done accessing it.
1237	 */
1238	atomic_inc(&conn_id->refcount);
1239	ret = conn_id->id.event_handler(&conn_id->id, &event);
1240	if (ret)
1241		goto err3;
1242
1243	/*
1244	 * Acquire mutex to prevent user executing rdma_destroy_id()
1245	 * while we're accessing the cm_id.
1246	 */
1247	mutex_lock(&lock);
1248	if (cma_comp(conn_id, RDMA_CM_CONNECT) && (conn_id->id.qp_type != IB_QPT_UD))
1249		ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0);
1250	mutex_unlock(&lock);
1251	mutex_unlock(&conn_id->handler_mutex);
1252	mutex_unlock(&listen_id->handler_mutex);
1253	cma_deref_id(conn_id);
1254	return 0;
1255
1256err3:
1257	cma_deref_id(conn_id);
1258	/* Destroy the CM ID by returning a non-zero value. */
1259	conn_id->cm_id.ib = NULL;
1260err2:
1261	cma_exch(conn_id, RDMA_CM_DESTROYING);
1262	mutex_unlock(&conn_id->handler_mutex);
1263err1:
1264	mutex_unlock(&listen_id->handler_mutex);
1265	if (conn_id)
1266		rdma_destroy_id(&conn_id->id);
1267	return ret;
1268}
1269
1270static __be64 cma_get_service_id(enum rdma_port_space ps, struct sockaddr *addr)
1271{
1272	return cpu_to_be64(((u64)ps << 16) + be16_to_cpu(cma_port(addr)));
1273}
1274
1275static void cma_set_compare_data(enum rdma_port_space ps, struct sockaddr *addr,
1276				 struct ib_cm_compare_data *compare)
1277{
1278	struct cma_hdr *cma_data, *cma_mask;
1279	struct sdp_hh *sdp_data, *sdp_mask;
1280	__be32 ip4_addr;
1281	struct in6_addr ip6_addr;
1282
1283	memset(compare, 0, sizeof *compare);
1284	cma_data = (void *) compare->data;
1285	cma_mask = (void *) compare->mask;
1286	sdp_data = (void *) compare->data;
1287	sdp_mask = (void *) compare->mask;
1288
1289	switch (addr->sa_family) {
1290	case AF_INET:
1291		ip4_addr = ((struct sockaddr_in *) addr)->sin_addr.s_addr;
1292		if (ps == RDMA_PS_SDP) {
1293			sdp_set_ip_ver(sdp_data, 4);
1294			sdp_set_ip_ver(sdp_mask, 0xF);
1295			sdp_data->dst_addr.ip4.addr = ip4_addr;
1296			sdp_mask->dst_addr.ip4.addr = htonl(~0);
1297		} else {
1298			cma_set_ip_ver(cma_data, 4);
1299			cma_set_ip_ver(cma_mask, 0xF);
1300			cma_data->dst_addr.ip4.addr = ip4_addr;
1301			cma_mask->dst_addr.ip4.addr = htonl(~0);
1302		}
1303		break;
1304	case AF_INET6:
1305		ip6_addr = ((struct sockaddr_in6 *) addr)->sin6_addr;
1306		if (ps == RDMA_PS_SDP) {
1307			sdp_set_ip_ver(sdp_data, 6);
1308			sdp_set_ip_ver(sdp_mask, 0xF);
1309			sdp_data->dst_addr.ip6 = ip6_addr;
1310			memset(&sdp_mask->dst_addr.ip6, 0xFF,
1311			       sizeof sdp_mask->dst_addr.ip6);
1312		} else {
1313			cma_set_ip_ver(cma_data, 6);
1314			cma_set_ip_ver(cma_mask, 0xF);
1315			cma_data->dst_addr.ip6 = ip6_addr;
1316			memset(&cma_mask->dst_addr.ip6, 0xFF,
1317			       sizeof cma_mask->dst_addr.ip6);
1318		}
1319		break;
1320	default:
1321		break;
1322	}
1323}
1324
1325static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)
1326{
1327	struct rdma_id_private *id_priv = iw_id->context;
1328	struct rdma_cm_event event;
1329	struct sockaddr_in *sin;
1330	int ret = 0;
1331
1332	if (cma_disable_callback(id_priv, RDMA_CM_CONNECT))
1333		return 0;
1334
1335	memset(&event, 0, sizeof event);
1336	switch (iw_event->event) {
1337	case IW_CM_EVENT_CLOSE:
1338		event.event = RDMA_CM_EVENT_DISCONNECTED;
1339		break;
1340	case IW_CM_EVENT_CONNECT_REPLY:
1341		sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
1342		*sin = iw_event->local_addr;
1343		sin = (struct sockaddr_in *) &id_priv->id.route.addr.dst_addr;
1344		*sin = iw_event->remote_addr;
1345		switch (iw_event->status) {
1346		case 0:
1347			event.event = RDMA_CM_EVENT_ESTABLISHED;
1348			event.param.conn.initiator_depth = iw_event->ird;
1349			event.param.conn.responder_resources = iw_event->ord;
1350			break;
1351		case -ECONNRESET:
1352		case -ECONNREFUSED:
1353			event.event = RDMA_CM_EVENT_REJECTED;
1354			break;
1355		case -ETIMEDOUT:
1356			event.event = RDMA_CM_EVENT_UNREACHABLE;
1357			break;
1358		default:
1359			event.event = RDMA_CM_EVENT_CONNECT_ERROR;
1360			break;
1361		}
1362		break;
1363	case IW_CM_EVENT_ESTABLISHED:
1364		event.event = RDMA_CM_EVENT_ESTABLISHED;
1365		event.param.conn.initiator_depth = iw_event->ird;
1366		event.param.conn.responder_resources = iw_event->ord;
1367		break;
1368	default:
1369		BUG_ON(1);
1370	}
1371
1372	event.status = iw_event->status;
1373	event.param.conn.private_data = iw_event->private_data;
1374	event.param.conn.private_data_len = iw_event->private_data_len;
1375	ret = id_priv->id.event_handler(&id_priv->id, &event);
1376	if (ret) {
1377		/* Destroy the CM ID by returning a non-zero value. */
1378		id_priv->cm_id.iw = NULL;
1379		cma_exch(id_priv, RDMA_CM_DESTROYING);
1380		mutex_unlock(&id_priv->handler_mutex);
1381		rdma_destroy_id(&id_priv->id);
1382		return ret;
1383	}
1384
1385	mutex_unlock(&id_priv->handler_mutex);
1386	return ret;
1387}
1388
1389static int iw_conn_req_handler(struct iw_cm_id *cm_id,
1390			       struct iw_cm_event *iw_event)
1391{
1392	struct rdma_cm_id *new_cm_id;
1393	struct rdma_id_private *listen_id, *conn_id;
1394	struct sockaddr_in *sin;
1395	struct net_device *dev = NULL;
1396	struct rdma_cm_event event;
1397	int ret;
1398	struct ib_device_attr attr;
1399
1400	listen_id = cm_id->context;
1401	if (cma_disable_callback(listen_id, RDMA_CM_LISTEN))
1402		return -ECONNABORTED;
1403
1404	/* Create a new RDMA id for the new IW CM ID */
1405	new_cm_id = rdma_create_id(listen_id->id.event_handler,
1406				   listen_id->id.context,
1407				   RDMA_PS_TCP, IB_QPT_RC);
1408	if (IS_ERR(new_cm_id)) {
1409		ret = -ENOMEM;
1410		goto out;
1411	}
1412	conn_id = container_of(new_cm_id, struct rdma_id_private, id);
1413	mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING);
1414	conn_id->state = RDMA_CM_CONNECT;
1415
1416	dev = ip_dev_find(&init_net, iw_event->local_addr.sin_addr.s_addr);
1417	if (!dev) {
1418		ret = -EADDRNOTAVAIL;
1419		mutex_unlock(&conn_id->handler_mutex);
1420		rdma_destroy_id(new_cm_id);
1421		goto out;
1422	}
1423	ret = rdma_copy_addr(&conn_id->id.route.addr.dev_addr, dev, NULL);
1424	if (ret) {
1425		mutex_unlock(&conn_id->handler_mutex);
1426		rdma_destroy_id(new_cm_id);
1427		goto out;
1428	}
1429
1430	ret = cma_acquire_dev(conn_id);
1431	if (ret) {
1432		mutex_unlock(&conn_id->handler_mutex);
1433		rdma_destroy_id(new_cm_id);
1434		goto out;
1435	}
1436
1437	conn_id->cm_id.iw = cm_id;
1438	cm_id->context = conn_id;
1439	cm_id->cm_handler = cma_iw_handler;
1440
1441	sin = (struct sockaddr_in *) &new_cm_id->route.addr.src_addr;
1442	*sin = iw_event->local_addr;
1443	sin = (struct sockaddr_in *) &new_cm_id->route.addr.dst_addr;
1444	*sin = iw_event->remote_addr;
1445
1446	ret = ib_query_device(conn_id->id.device, &attr);
1447	if (ret) {
1448		mutex_unlock(&conn_id->handler_mutex);
1449		rdma_destroy_id(new_cm_id);
1450		goto out;
1451	}
1452
1453	memset(&event, 0, sizeof event);
1454	event.event = RDMA_CM_EVENT_CONNECT_REQUEST;
1455	event.param.conn.private_data = iw_event->private_data;
1456	event.param.conn.private_data_len = iw_event->private_data_len;
1457	event.param.conn.initiator_depth = iw_event->ird;
1458	event.param.conn.responder_resources = iw_event->ord;
1459
1460	/*
1461	 * Protect against the user destroying conn_id from another thread
1462	 * until we're done accessing it.
1463	 */
1464	atomic_inc(&conn_id->refcount);
1465	ret = conn_id->id.event_handler(&conn_id->id, &event);
1466	if (ret) {
1467		/* User wants to destroy the CM ID */
1468		conn_id->cm_id.iw = NULL;
1469		cma_exch(conn_id, RDMA_CM_DESTROYING);
1470		mutex_unlock(&conn_id->handler_mutex);
1471		cma_deref_id(conn_id);
1472		rdma_destroy_id(&conn_id->id);
1473		goto out;
1474	}
1475
1476	mutex_unlock(&conn_id->handler_mutex);
1477	cma_deref_id(conn_id);
1478
1479out:
1480	if (dev)
1481		dev_put(dev);
1482	mutex_unlock(&listen_id->handler_mutex);
1483	return ret;
1484}
1485
1486static int cma_ib_listen(struct rdma_id_private *id_priv)
1487{
1488	struct ib_cm_compare_data compare_data;
1489	struct sockaddr *addr;
1490	struct ib_cm_id	*id;
1491	__be64 svc_id;
1492	int ret;
1493
1494	id = ib_create_cm_id(id_priv->id.device, cma_req_handler, id_priv);
1495	if (IS_ERR(id))
1496		return PTR_ERR(id);
1497
1498	id_priv->cm_id.ib = id;
1499
1500	addr = (struct sockaddr *) &id_priv->id.route.addr.src_addr;
1501	svc_id = cma_get_service_id(id_priv->id.ps, addr);
1502	if (cma_any_addr(addr))
1503		ret = ib_cm_listen(id_priv->cm_id.ib, svc_id, 0, NULL);
1504	else {
1505		cma_set_compare_data(id_priv->id.ps, addr, &compare_data);
1506		ret = ib_cm_listen(id_priv->cm_id.ib, svc_id, 0, &compare_data);
1507	}
1508
1509	if (ret) {
1510		ib_destroy_cm_id(id_priv->cm_id.ib);
1511		id_priv->cm_id.ib = NULL;
1512	}
1513
1514	return ret;
1515}
1516
1517static int cma_iw_listen(struct rdma_id_private *id_priv, int backlog)
1518{
1519	int ret;
1520	struct sockaddr_in *sin;
1521	struct iw_cm_id	*id;
1522
1523	id = iw_create_cm_id(id_priv->id.device,
1524			     iw_conn_req_handler,
1525			     id_priv);
1526	if (IS_ERR(id))
1527		return PTR_ERR(id);
1528
1529	id_priv->cm_id.iw = id;
1530
1531	sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
1532	id_priv->cm_id.iw->local_addr = *sin;
1533
1534	ret = iw_cm_listen(id_priv->cm_id.iw, backlog);
1535
1536	if (ret) {
1537		iw_destroy_cm_id(id_priv->cm_id.iw);
1538		id_priv->cm_id.iw = NULL;
1539	}
1540
1541	return ret;
1542}
1543
1544static int cma_listen_handler(struct rdma_cm_id *id,
1545			      struct rdma_cm_event *event)
1546{
1547	struct rdma_id_private *id_priv = id->context;
1548
1549	id->context = id_priv->id.context;
1550	id->event_handler = id_priv->id.event_handler;
1551	return id_priv->id.event_handler(id, event);
1552}
1553
1554static void cma_listen_on_dev(struct rdma_id_private *id_priv,
1555			      struct cma_device *cma_dev)
1556{
1557	struct rdma_id_private *dev_id_priv;
1558	struct rdma_cm_id *id;
1559	int ret;
1560
1561	id = rdma_create_id(cma_listen_handler, id_priv, id_priv->id.ps,
1562			    id_priv->id.qp_type);
1563	if (IS_ERR(id))
1564		return;
1565
1566	dev_id_priv = container_of(id, struct rdma_id_private, id);
1567
1568	dev_id_priv->state = RDMA_CM_ADDR_BOUND;
1569	memcpy(&id->route.addr.src_addr, &id_priv->id.route.addr.src_addr,
1570	       ip_addr_size((struct sockaddr *) &id_priv->id.route.addr.src_addr));
1571
1572	cma_attach_to_dev(dev_id_priv, cma_dev);
1573	list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list);
1574	atomic_inc(&id_priv->refcount);
1575	dev_id_priv->internal_id = 1;
1576
1577	ret = rdma_listen(id, id_priv->backlog);
1578	if (ret)
1579		printk(KERN_WARNING "RDMA CMA: cma_listen_on_dev, error %d, "
1580		       "listening on device %s\n", ret, cma_dev->device->name);
1581}
1582
1583static void cma_listen_on_all(struct rdma_id_private *id_priv)
1584{
1585	struct cma_device *cma_dev;
1586
1587	mutex_lock(&lock);
1588	list_add_tail(&id_priv->list, &listen_any_list);
1589	list_for_each_entry(cma_dev, &dev_list, list)
1590		cma_listen_on_dev(id_priv, cma_dev);
1591	mutex_unlock(&lock);
1592}
1593
1594void rdma_set_service_type(struct rdma_cm_id *id, int tos)
1595{
1596	struct rdma_id_private *id_priv;
1597
1598	id_priv = container_of(id, struct rdma_id_private, id);
1599	id_priv->tos = (u8) tos;
1600}
1601EXPORT_SYMBOL(rdma_set_service_type);
1602
1603static void cma_query_handler(int status, struct ib_sa_path_rec *path_rec,
1604			      void *context)
1605{
1606	struct cma_work *work = context;
1607	struct rdma_route *route;
1608
1609	route = &work->id->id.route;
1610
1611	if (!status) {
1612		route->num_paths = 1;
1613		*route->path_rec = *path_rec;
1614	} else {
1615		work->old_state = RDMA_CM_ROUTE_QUERY;
1616		work->new_state = RDMA_CM_ADDR_RESOLVED;
1617		work->event.event = RDMA_CM_EVENT_ROUTE_ERROR;
1618		work->event.status = status;
1619	}
1620
1621	queue_work(cma_wq, &work->work);
1622}
1623
1624static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms,
1625			      struct cma_work *work)
1626{
1627	struct rdma_addr *addr = &id_priv->id.route.addr;
1628	struct ib_sa_path_rec path_rec;
1629	ib_sa_comp_mask comp_mask;
1630	struct sockaddr_in6 *sin6;
1631
1632	memset(&path_rec, 0, sizeof path_rec);
1633	rdma_addr_get_sgid(&addr->dev_addr, &path_rec.sgid);
1634	rdma_addr_get_dgid(&addr->dev_addr, &path_rec.dgid);
1635	path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(&addr->dev_addr));
1636	path_rec.numb_path = 1;
1637	path_rec.reversible = 1;
1638	path_rec.service_id = cma_get_service_id(id_priv->id.ps,
1639							(struct sockaddr *) &addr->dst_addr);
1640
1641	comp_mask = IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID |
1642		    IB_SA_PATH_REC_PKEY | IB_SA_PATH_REC_NUMB_PATH |
1643		    IB_SA_PATH_REC_REVERSIBLE | IB_SA_PATH_REC_SERVICE_ID;
1644
1645	if (addr->src_addr.ss_family == AF_INET) {
1646		path_rec.qos_class = cpu_to_be16((u16) id_priv->tos);
1647		comp_mask |= IB_SA_PATH_REC_QOS_CLASS;
1648	} else {
1649		sin6 = (struct sockaddr_in6 *) &addr->src_addr;
1650		path_rec.traffic_class = (u8) (be32_to_cpu(sin6->sin6_flowinfo) >> 20);
1651		comp_mask |= IB_SA_PATH_REC_TRAFFIC_CLASS;
1652	}
1653
1654	id_priv->query_id = ib_sa_path_rec_get(&sa_client, id_priv->id.device,
1655					       id_priv->id.port_num, &path_rec,
1656					       comp_mask, timeout_ms,
1657					       GFP_KERNEL, cma_query_handler,
1658					       work, &id_priv->query);
1659
1660	return (id_priv->query_id < 0) ? id_priv->query_id : 0;
1661}
1662
1663static void cma_work_handler(struct work_struct *_work)
1664{
1665	struct cma_work *work = container_of(_work, struct cma_work, work);
1666	struct rdma_id_private *id_priv = work->id;
1667	int destroy = 0;
1668
1669	mutex_lock(&id_priv->handler_mutex);
1670	if (!cma_comp_exch(id_priv, work->old_state, work->new_state))
1671		goto out;
1672
1673	if (id_priv->id.event_handler(&id_priv->id, &work->event)) {
1674		cma_exch(id_priv, RDMA_CM_DESTROYING);
1675		destroy = 1;
1676	}
1677out:
1678	mutex_unlock(&id_priv->handler_mutex);
1679	cma_deref_id(id_priv);
1680	if (destroy)
1681		rdma_destroy_id(&id_priv->id);
1682	kfree(work);
1683}
1684
1685static void cma_ndev_work_handler(struct work_struct *_work)
1686{
1687	struct cma_ndev_work *work = container_of(_work, struct cma_ndev_work, work);
1688	struct rdma_id_private *id_priv = work->id;
1689	int destroy = 0;
1690
1691	mutex_lock(&id_priv->handler_mutex);
1692	if (id_priv->state == RDMA_CM_DESTROYING ||
1693	    id_priv->state == RDMA_CM_DEVICE_REMOVAL)
1694		goto out;
1695
1696	if (id_priv->id.event_handler(&id_priv->id, &work->event)) {
1697		cma_exch(id_priv, RDMA_CM_DESTROYING);
1698		destroy = 1;
1699	}
1700
1701out:
1702	mutex_unlock(&id_priv->handler_mutex);
1703	cma_deref_id(id_priv);
1704	if (destroy)
1705		rdma_destroy_id(&id_priv->id);
1706	kfree(work);
1707}
1708
1709static int cma_resolve_ib_route(struct rdma_id_private *id_priv, int timeout_ms)
1710{
1711	struct rdma_route *route = &id_priv->id.route;
1712	struct cma_work *work;
1713	int ret;
1714
1715	work = kzalloc(sizeof *work, GFP_KERNEL);
1716	if (!work)
1717		return -ENOMEM;
1718
1719	work->id = id_priv;
1720	INIT_WORK(&work->work, cma_work_handler);
1721	work->old_state = RDMA_CM_ROUTE_QUERY;
1722	work->new_state = RDMA_CM_ROUTE_RESOLVED;
1723	work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
1724
1725	route->path_rec = kmalloc(sizeof *route->path_rec, GFP_KERNEL);
1726	if (!route->path_rec) {
1727		ret = -ENOMEM;
1728		goto err1;
1729	}
1730
1731	ret = cma_query_ib_route(id_priv, timeout_ms, work);
1732	if (ret)
1733		goto err2;
1734
1735	return 0;
1736err2:
1737	kfree(route->path_rec);
1738	route->path_rec = NULL;
1739err1:
1740	kfree(work);
1741	return ret;
1742}
1743
1744int rdma_set_ib_paths(struct rdma_cm_id *id,
1745		      struct ib_sa_path_rec *path_rec, int num_paths)
1746{
1747	struct rdma_id_private *id_priv;
1748	int ret;
1749
1750	id_priv = container_of(id, struct rdma_id_private, id);
1751	if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED,
1752			   RDMA_CM_ROUTE_RESOLVED))
1753		return -EINVAL;
1754
1755	id->route.path_rec = kmemdup(path_rec, sizeof *path_rec * num_paths,
1756				     GFP_KERNEL);
1757	if (!id->route.path_rec) {
1758		ret = -ENOMEM;
1759		goto err;
1760	}
1761
1762	id->route.num_paths = num_paths;
1763	return 0;
1764err:
1765	cma_comp_exch(id_priv, RDMA_CM_ROUTE_RESOLVED, RDMA_CM_ADDR_RESOLVED);
1766	return ret;
1767}
1768EXPORT_SYMBOL(rdma_set_ib_paths);
1769
1770static int cma_resolve_iw_route(struct rdma_id_private *id_priv, int timeout_ms)
1771{
1772	struct cma_work *work;
1773
1774	work = kzalloc(sizeof *work, GFP_KERNEL);
1775	if (!work)
1776		return -ENOMEM;
1777
1778	work->id = id_priv;
1779	INIT_WORK(&work->work, cma_work_handler);
1780	work->old_state = RDMA_CM_ROUTE_QUERY;
1781	work->new_state = RDMA_CM_ROUTE_RESOLVED;
1782	work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
1783	queue_work(cma_wq, &work->work);
1784	return 0;
1785}
1786
1787static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
1788{
1789	struct rdma_route *route = &id_priv->id.route;
1790	struct rdma_addr *addr = &route->addr;
1791	struct cma_work *work;
1792	int ret;
1793	struct sockaddr_in *src_addr = (struct sockaddr_in *)&route->addr.src_addr;
1794	struct sockaddr_in *dst_addr = (struct sockaddr_in *)&route->addr.dst_addr;
1795	struct net_device *ndev = NULL;
1796	u16 vid;
1797
1798	if (src_addr->sin_family != dst_addr->sin_family)
1799		return -EINVAL;
1800
1801	work = kzalloc(sizeof *work, GFP_KERNEL);
1802	if (!work)
1803		return -ENOMEM;
1804
1805	work->id = id_priv;
1806	INIT_WORK(&work->work, cma_work_handler);
1807
1808	route->path_rec = kzalloc(sizeof *route->path_rec, GFP_KERNEL);
1809	if (!route->path_rec) {
1810		ret = -ENOMEM;
1811		goto err1;
1812	}
1813
1814	route->num_paths = 1;
1815
1816	if (addr->dev_addr.bound_dev_if)
1817		ndev = dev_get_by_index(&init_net, addr->dev_addr.bound_dev_if);
1818	if (!ndev) {
1819		ret = -ENODEV;
1820		goto err2;
1821	}
1822
1823	vid = rdma_vlan_dev_vlan_id(ndev);
1824
1825	iboe_mac_vlan_to_ll(&route->path_rec->sgid, addr->dev_addr.src_dev_addr, vid);
1826	iboe_mac_vlan_to_ll(&route->path_rec->dgid, addr->dev_addr.dst_dev_addr, vid);
1827
1828	route->path_rec->hop_limit = 1;
1829	route->path_rec->reversible = 1;
1830	route->path_rec->pkey = cpu_to_be16(0xffff);
1831	route->path_rec->mtu_selector = IB_SA_EQ;
1832	route->path_rec->sl = netdev_get_prio_tc_map(
1833			ndev->priv_flags & IFF_802_1Q_VLAN ?
1834				vlan_dev_real_dev(ndev) : ndev,
1835			rt_tos2priority(id_priv->tos));
1836
1837	route->path_rec->mtu = iboe_get_mtu(ndev->mtu);
1838	route->path_rec->rate_selector = IB_SA_EQ;
1839	route->path_rec->rate = iboe_get_rate(ndev);
1840	dev_put(ndev);
1841	route->path_rec->packet_life_time_selector = IB_SA_EQ;
1842	route->path_rec->packet_life_time = CMA_IBOE_PACKET_LIFETIME;
1843	if (!route->path_rec->mtu) {
1844		ret = -EINVAL;
1845		goto err2;
1846	}
1847
1848	work->old_state = RDMA_CM_ROUTE_QUERY;
1849	work->new_state = RDMA_CM_ROUTE_RESOLVED;
1850	work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
1851	work->event.status = 0;
1852
1853	queue_work(cma_wq, &work->work);
1854
1855	return 0;
1856
1857err2:
1858	kfree(route->path_rec);
1859	route->path_rec = NULL;
1860err1:
1861	kfree(work);
1862	return ret;
1863}
1864
1865int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms)
1866{
1867	struct rdma_id_private *id_priv;
1868	int ret;
1869
1870	id_priv = container_of(id, struct rdma_id_private, id);
1871	if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED, RDMA_CM_ROUTE_QUERY))
1872		return -EINVAL;
1873
1874	atomic_inc(&id_priv->refcount);
1875	switch (rdma_node_get_transport(id->device->node_type)) {
1876	case RDMA_TRANSPORT_IB:
1877		switch (rdma_port_get_link_layer(id->device, id->port_num)) {
1878		case IB_LINK_LAYER_INFINIBAND:
1879			ret = cma_resolve_ib_route(id_priv, timeout_ms);
1880			break;
1881		case IB_LINK_LAYER_ETHERNET:
1882			ret = cma_resolve_iboe_route(id_priv);
1883			break;
1884		default:
1885			ret = -ENOSYS;
1886		}
1887		break;
1888	case RDMA_TRANSPORT_IWARP:
1889		ret = cma_resolve_iw_route(id_priv, timeout_ms);
1890		break;
1891	default:
1892		ret = -ENOSYS;
1893		break;
1894	}
1895	if (ret)
1896		goto err;
1897
1898	return 0;
1899err:
1900	cma_comp_exch(id_priv, RDMA_CM_ROUTE_QUERY, RDMA_CM_ADDR_RESOLVED);
1901	cma_deref_id(id_priv);
1902	return ret;
1903}
1904EXPORT_SYMBOL(rdma_resolve_route);
1905
1906static int cma_bind_loopback(struct rdma_id_private *id_priv)
1907{
1908	struct cma_device *cma_dev;
1909	struct ib_port_attr port_attr;
1910	union ib_gid gid;
1911	u16 pkey;
1912	int ret;
1913	u8 p;
1914
1915	mutex_lock(&lock);
1916	if (list_empty(&dev_list)) {
1917		ret = -ENODEV;
1918		goto out;
1919	}
1920	list_for_each_entry(cma_dev, &dev_list, list)
1921		for (p = 1; p <= cma_dev->device->phys_port_cnt; ++p)
1922			if (!ib_query_port(cma_dev->device, p, &port_attr) &&
1923			    port_attr.state == IB_PORT_ACTIVE)
1924				goto port_found;
1925
1926	p = 1;
1927	cma_dev = list_entry(dev_list.next, struct cma_device, list);
1928
1929port_found:
1930	ret = ib_get_cached_gid(cma_dev->device, p, 0, &gid);
1931	if (ret)
1932		goto out;
1933
1934	ret = ib_get_cached_pkey(cma_dev->device, p, 0, &pkey);
1935	if (ret)
1936		goto out;
1937
1938	id_priv->id.route.addr.dev_addr.dev_type =
1939		(rdma_port_get_link_layer(cma_dev->device, p) == IB_LINK_LAYER_INFINIBAND) ?
1940		ARPHRD_INFINIBAND : ARPHRD_ETHER;
1941
1942	rdma_addr_set_sgid(&id_priv->id.route.addr.dev_addr, &gid);
1943	ib_addr_set_pkey(&id_priv->id.route.addr.dev_addr, pkey);
1944	id_priv->id.port_num = p;
1945	cma_attach_to_dev(id_priv, cma_dev);
1946out:
1947	mutex_unlock(&lock);
1948	return ret;
1949}
1950
1951static void addr_handler(int status, struct sockaddr *src_addr,
1952			 struct rdma_dev_addr *dev_addr, void *context)
1953{
1954	struct rdma_id_private *id_priv = context;
1955	struct rdma_cm_event event;
1956
1957	memset(&event, 0, sizeof event);
1958	mutex_lock(&id_priv->handler_mutex);
1959	if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY,
1960			   RDMA_CM_ADDR_RESOLVED))
1961		goto out;
1962
1963	if (!status && !id_priv->cma_dev)
1964		status = cma_acquire_dev(id_priv);
1965
1966	if (status) {
1967		if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED,
1968				   RDMA_CM_ADDR_BOUND))
1969			goto out;
1970		event.event = RDMA_CM_EVENT_ADDR_ERROR;
1971		event.status = status;
1972	} else {
1973		memcpy(&id_priv->id.route.addr.src_addr, src_addr,
1974		       ip_addr_size(src_addr));
1975		event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
1976	}
1977
1978	if (id_priv->id.event_handler(&id_priv->id, &event)) {
1979		cma_exch(id_priv, RDMA_CM_DESTROYING);
1980		mutex_unlock(&id_priv->handler_mutex);
1981		cma_deref_id(id_priv);
1982		rdma_destroy_id(&id_priv->id);
1983		return;
1984	}
1985out:
1986	mutex_unlock(&id_priv->handler_mutex);
1987	cma_deref_id(id_priv);
1988}
1989
1990static int cma_resolve_loopback(struct rdma_id_private *id_priv)
1991{
1992	struct cma_work *work;
1993	struct sockaddr *src, *dst;
1994	union ib_gid gid;
1995	int ret;
1996
1997	work = kzalloc(sizeof *work, GFP_KERNEL);
1998	if (!work)
1999		return -ENOMEM;
2000
2001	if (!id_priv->cma_dev) {
2002		ret = cma_bind_loopback(id_priv);
2003		if (ret)
2004			goto err;
2005	}
2006
2007	rdma_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid);
2008	rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, &gid);
2009
2010	src = (struct sockaddr *) &id_priv->id.route.addr.src_addr;
2011	if (cma_zero_addr(src)) {
2012		dst = (struct sockaddr *) &id_priv->id.route.addr.dst_addr;
2013		if ((src->sa_family = dst->sa_family) == AF_INET) {
2014			((struct sockaddr_in *)src)->sin_addr =
2015				((struct sockaddr_in *)dst)->sin_addr;
2016		} else {
2017			((struct sockaddr_in6 *)src)->sin6_addr =
2018				((struct sockaddr_in6 *)dst)->sin6_addr;
2019		}
2020	}
2021
2022	work->id = id_priv;
2023	INIT_WORK(&work->work, cma_work_handler);
2024	work->old_state = RDMA_CM_ADDR_QUERY;
2025	work->new_state = RDMA_CM_ADDR_RESOLVED;
2026	work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
2027	queue_work(cma_wq, &work->work);
2028	return 0;
2029err:
2030	kfree(work);
2031	return ret;
2032}
2033
2034static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
2035			 struct sockaddr *dst_addr)
2036{
2037	if (!src_addr || !src_addr->sa_family) {
2038		src_addr = (struct sockaddr *) &id->route.addr.src_addr;
2039		if ((src_addr->sa_family = dst_addr->sa_family) == AF_INET6) {
2040			((struct sockaddr_in6 *) src_addr)->sin6_scope_id =
2041				((struct sockaddr_in6 *) dst_addr)->sin6_scope_id;
2042		}
2043	}
2044	return rdma_bind_addr(id, src_addr);
2045}
2046
2047int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
2048		      struct sockaddr *dst_addr, int timeout_ms)
2049{
2050	struct rdma_id_private *id_priv;
2051	int ret;
2052
2053	id_priv = container_of(id, struct rdma_id_private, id);
2054	if (id_priv->state == RDMA_CM_IDLE) {
2055		ret = cma_bind_addr(id, src_addr, dst_addr);
2056		if (ret)
2057			return ret;
2058	}
2059
2060	if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY))
2061		return -EINVAL;
2062
2063	atomic_inc(&id_priv->refcount);
2064	memcpy(&id->route.addr.dst_addr, dst_addr, ip_addr_size(dst_addr));
2065	if (cma_any_addr(dst_addr))
2066		ret = cma_resolve_loopback(id_priv);
2067	else
2068		ret = rdma_resolve_ip(&addr_client, (struct sockaddr *) &id->route.addr.src_addr,
2069				      dst_addr, &id->route.addr.dev_addr,
2070				      timeout_ms, addr_handler, id_priv);
2071	if (ret)
2072		goto err;
2073
2074	return 0;
2075err:
2076	cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_BOUND);
2077	cma_deref_id(id_priv);
2078	return ret;
2079}
2080EXPORT_SYMBOL(rdma_resolve_addr);
2081
2082int rdma_set_reuseaddr(struct rdma_cm_id *id, int reuse)
2083{
2084	struct rdma_id_private *id_priv;
2085	unsigned long flags;
2086	int ret;
2087
2088	id_priv = container_of(id, struct rdma_id_private, id);
2089	spin_lock_irqsave(&id_priv->lock, flags);
2090	if (id_priv->state == RDMA_CM_IDLE) {
2091		id_priv->reuseaddr = reuse;
2092		ret = 0;
2093	} else {
2094		ret = -EINVAL;
2095	}
2096	spin_unlock_irqrestore(&id_priv->lock, flags);
2097	return ret;
2098}
2099EXPORT_SYMBOL(rdma_set_reuseaddr);
2100
2101static void cma_bind_port(struct rdma_bind_list *bind_list,
2102			  struct rdma_id_private *id_priv)
2103{
2104	struct sockaddr_in *sin;
2105
2106	sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
2107	sin->sin_port = htons(bind_list->port);
2108	id_priv->bind_list = bind_list;
2109	hlist_add_head(&id_priv->node, &bind_list->owners);
2110}
2111
2112static int cma_alloc_port(struct idr *ps, struct rdma_id_private *id_priv,
2113			  unsigned short snum)
2114{
2115	struct rdma_bind_list *bind_list;
2116	int port, ret;
2117
2118	bind_list = kzalloc(sizeof *bind_list, GFP_KERNEL);
2119	if (!bind_list)
2120		return -ENOMEM;
2121
2122	do {
2123		ret = idr_get_new_above(ps, bind_list, snum, &port);
2124	} while ((ret == -EAGAIN) && idr_pre_get(ps, GFP_KERNEL));
2125
2126	if (ret)
2127		goto err1;
2128
2129	if (port != snum) {
2130		ret = -EADDRNOTAVAIL;
2131		goto err2;
2132	}
2133
2134	bind_list->ps = ps;
2135	bind_list->port = (unsigned short) port;
2136	cma_bind_port(bind_list, id_priv);
2137	return 0;
2138err2:
2139	idr_remove(ps, port);
2140err1:
2141	kfree(bind_list);
2142	return ret;
2143}
2144
2145static int cma_alloc_any_port(struct idr *ps, struct rdma_id_private *id_priv)
2146{
2147	static unsigned int last_used_port;
2148	int low, high, remaining;
2149	unsigned int rover;
2150
2151	inet_get_local_port_range(&low, &high);
2152	remaining = (high - low) + 1;
2153	rover = net_random() % remaining + low;
2154retry:
2155	if (last_used_port != rover &&
2156	    !idr_find(ps, (unsigned short) rover)) {
2157		int ret = cma_alloc_port(ps, id_priv, rover);
2158		/*
2159		 * Remember previously used port number in order to avoid
2160		 * re-using same port immediately after it is closed.
2161		 */
2162		if (!ret)
2163			last_used_port = rover;
2164		if (ret != -EADDRNOTAVAIL)
2165			return ret;
2166	}
2167	if (--remaining) {
2168		rover++;
2169		if ((rover < low) || (rover > high))
2170			rover = low;
2171		goto retry;
2172	}
2173	return -EADDRNOTAVAIL;
2174}
2175
2176/*
2177 * Check that the requested port is available.  This is called when trying to
2178 * bind to a specific port, or when trying to listen on a bound port.  In
2179 * the latter case, the provided id_priv may already be on the bind_list, but
2180 * we still need to check that it's okay to start listening.
2181 */
2182static int cma_check_port(struct rdma_bind_list *bind_list,
2183			  struct rdma_id_private *id_priv, uint8_t reuseaddr)
2184{
2185	struct rdma_id_private *cur_id;
2186	struct sockaddr *addr, *cur_addr;
2187	struct hlist_node *node;
2188
2189	addr = (struct sockaddr *) &id_priv->id.route.addr.src_addr;
2190	if (cma_any_addr(addr) && !reuseaddr)
2191		return -EADDRNOTAVAIL;
2192
2193	hlist_for_each_entry(cur_id, node, &bind_list->owners, node) {
2194		if (id_priv == cur_id)
2195			continue;
2196
2197		if ((cur_id->state == RDMA_CM_LISTEN) ||
2198		    !reuseaddr || !cur_id->reuseaddr) {
2199			cur_addr = (struct sockaddr *) &cur_id->id.route.addr.src_addr;
2200			if (cma_any_addr(cur_addr))
2201				return -EADDRNOTAVAIL;
2202
2203			if (!cma_addr_cmp(addr, cur_addr))
2204				return -EADDRINUSE;
2205		}
2206	}
2207	return 0;
2208}
2209
2210static int cma_use_port(struct idr *ps, struct rdma_id_private *id_priv)
2211{
2212	struct rdma_bind_list *bind_list;
2213	unsigned short snum;
2214	int ret;
2215
2216	snum = ntohs(cma_port((struct sockaddr *) &id_priv->id.route.addr.src_addr));
2217	if (snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE))
2218		return -EACCES;
2219
2220	bind_list = idr_find(ps, snum);
2221	if (!bind_list) {
2222		ret = cma_alloc_port(ps, id_priv, snum);
2223	} else {
2224		ret = cma_check_port(bind_list, id_priv, id_priv->reuseaddr);
2225		if (!ret)
2226			cma_bind_port(bind_list, id_priv);
2227	}
2228	return ret;
2229}
2230
2231static int cma_bind_listen(struct rdma_id_private *id_priv)
2232{
2233	struct rdma_bind_list *bind_list = id_priv->bind_list;
2234	int ret = 0;
2235
2236	mutex_lock(&lock);
2237	if (bind_list->owners.first->next)
2238		ret = cma_check_port(bind_list, id_priv, 0);
2239	mutex_unlock(&lock);
2240	return ret;
2241}
2242
2243static int cma_get_port(struct rdma_id_private *id_priv)
2244{
2245	struct idr *ps;
2246	int ret;
2247
2248	switch (id_priv->id.ps) {
2249	case RDMA_PS_SDP:
2250		ps = &sdp_ps;
2251		break;
2252	case RDMA_PS_TCP:
2253		ps = &tcp_ps;
2254		break;
2255	case RDMA_PS_UDP:
2256		ps = &udp_ps;
2257		break;
2258	case RDMA_PS_IPOIB:
2259		ps = &ipoib_ps;
2260		break;
2261	case RDMA_PS_IB:
2262		ps = &ib_ps;
2263		break;
2264	default:
2265		return -EPROTONOSUPPORT;
2266	}
2267
2268	mutex_lock(&lock);
2269	if (cma_any_port((struct sockaddr *) &id_priv->id.route.addr.src_addr))
2270		ret = cma_alloc_any_port(ps, id_priv);
2271	else
2272		ret = cma_use_port(ps, id_priv);
2273	mutex_unlock(&lock);
2274
2275	return ret;
2276}
2277
2278static int cma_check_linklocal(struct rdma_dev_addr *dev_addr,
2279			       struct sockaddr *addr)
2280{
2281#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
2282	struct sockaddr_in6 *sin6;
2283
2284	if (addr->sa_family != AF_INET6)
2285		return 0;
2286
2287	sin6 = (struct sockaddr_in6 *) addr;
2288	if ((ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) &&
2289	    !sin6->sin6_scope_id)
2290			return -EINVAL;
2291
2292	dev_addr->bound_dev_if = sin6->sin6_scope_id;
2293#endif
2294	return 0;
2295}
2296
2297int rdma_listen(struct rdma_cm_id *id, int backlog)
2298{
2299	struct rdma_id_private *id_priv;
2300	int ret;
2301
2302	id_priv = container_of(id, struct rdma_id_private, id);
2303	if (id_priv->state == RDMA_CM_IDLE) {
2304		((struct sockaddr *) &id->route.addr.src_addr)->sa_family = AF_INET;
2305		ret = rdma_bind_addr(id, (struct sockaddr *) &id->route.addr.src_addr);
2306		if (ret)
2307			return ret;
2308	}
2309
2310	if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_LISTEN))
2311		return -EINVAL;
2312
2313	if (id_priv->reuseaddr) {
2314		ret = cma_bind_listen(id_priv);
2315		if (ret)
2316			goto err;
2317	}
2318
2319	id_priv->backlog = backlog;
2320	if (id->device) {
2321		switch (rdma_node_get_transport(id->device->node_type)) {
2322		case RDMA_TRANSPORT_IB:
2323			ret = cma_ib_listen(id_priv);
2324			if (ret)
2325				goto err;
2326			break;
2327		case RDMA_TRANSPORT_IWARP:
2328			ret = cma_iw_listen(id_priv, backlog);
2329			if (ret)
2330				goto err;
2331			break;
2332		default:
2333			ret = -ENOSYS;
2334			goto err;
2335		}
2336	} else
2337		cma_listen_on_all(id_priv);
2338
2339	return 0;
2340err:
2341	id_priv->backlog = 0;
2342	cma_comp_exch(id_priv, RDMA_CM_LISTEN, RDMA_CM_ADDR_BOUND);
2343	return ret;
2344}
2345EXPORT_SYMBOL(rdma_listen);
2346
2347int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
2348{
2349	struct rdma_id_private *id_priv;
2350	int ret;
2351
2352	if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6)
2353		return -EAFNOSUPPORT;
2354
2355	id_priv = container_of(id, struct rdma_id_private, id);
2356	if (!cma_comp_exch(id_priv, RDMA_CM_IDLE, RDMA_CM_ADDR_BOUND))
2357		return -EINVAL;
2358
2359	ret = cma_check_linklocal(&id->route.addr.dev_addr, addr);
2360	if (ret)
2361		goto err1;
2362
2363	if (!cma_any_addr(addr)) {
2364		ret = rdma_translate_ip(addr, &id->route.addr.dev_addr);
2365		if (ret)
2366			goto err1;
2367
2368		ret = cma_acquire_dev(id_priv);
2369		if (ret)
2370			goto err1;
2371	}
2372
2373	memcpy(&id->route.addr.src_addr, addr, ip_addr_size(addr));
2374	ret = cma_get_port(id_priv);
2375	if (ret)
2376		goto err2;
2377
2378	return 0;
2379err2:
2380	if (id_priv->cma_dev)
2381		cma_release_dev(id_priv);
2382err1:
2383	cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_IDLE);
2384	return ret;
2385}
2386EXPORT_SYMBOL(rdma_bind_addr);
2387
2388static int cma_format_hdr(void *hdr, enum rdma_port_space ps,
2389			  struct rdma_route *route)
2390{
2391	struct cma_hdr *cma_hdr;
2392	struct sdp_hh *sdp_hdr;
2393
2394	if (route->addr.src_addr.ss_family == AF_INET) {
2395		struct sockaddr_in *src4, *dst4;
2396
2397		src4 = (struct sockaddr_in *) &route->addr.src_addr;
2398		dst4 = (struct sockaddr_in *) &route->addr.dst_addr;
2399
2400		switch (ps) {
2401		case RDMA_PS_SDP:
2402			sdp_hdr = hdr;
2403			if (sdp_get_majv(sdp_hdr->sdp_version) != SDP_MAJ_VERSION)
2404				return -EINVAL;
2405			sdp_set_ip_ver(sdp_hdr, 4);
2406			sdp_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr;
2407			sdp_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr;
2408			sdp_hdr->port = src4->sin_port;
2409			break;
2410		default:
2411			cma_hdr = hdr;
2412			cma_hdr->cma_version = CMA_VERSION;
2413			cma_set_ip_ver(cma_hdr, 4);
2414			cma_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr;
2415			cma_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr;
2416			cma_hdr->port = src4->sin_port;
2417			break;
2418		}
2419	} else {
2420		struct sockaddr_in6 *src6, *dst6;
2421
2422		src6 = (struct sockaddr_in6 *) &route->addr.src_addr;
2423		dst6 = (struct sockaddr_in6 *) &route->addr.dst_addr;
2424
2425		switch (ps) {
2426		case RDMA_PS_SDP:
2427			sdp_hdr = hdr;
2428			if (sdp_get_majv(sdp_hdr->sdp_version) != SDP_MAJ_VERSION)
2429				return -EINVAL;
2430			sdp_set_ip_ver(sdp_hdr, 6);
2431			sdp_hdr->src_addr.ip6 = src6->sin6_addr;
2432			sdp_hdr->dst_addr.ip6 = dst6->sin6_addr;
2433			sdp_hdr->port = src6->sin6_port;
2434			break;
2435		default:
2436			cma_hdr = hdr;
2437			cma_hdr->cma_version = CMA_VERSION;
2438			cma_set_ip_ver(cma_hdr, 6);
2439			cma_hdr->src_addr.ip6 = src6->sin6_addr;
2440			cma_hdr->dst_addr.ip6 = dst6->sin6_addr;
2441			cma_hdr->port = src6->sin6_port;
2442			break;
2443		}
2444	}
2445	return 0;
2446}
2447
2448static int cma_sidr_rep_handler(struct ib_cm_id *cm_id,
2449				struct ib_cm_event *ib_event)
2450{
2451	struct rdma_id_private *id_priv = cm_id->context;
2452	struct rdma_cm_event event;
2453	struct ib_cm_sidr_rep_event_param *rep = &ib_event->param.sidr_rep_rcvd;
2454	int ret = 0;
2455
2456	if (cma_disable_callback(id_priv, RDMA_CM_CONNECT))
2457		return 0;
2458
2459	memset(&event, 0, sizeof event);
2460	switch (ib_event->event) {
2461	case IB_CM_SIDR_REQ_ERROR:
2462		event.event = RDMA_CM_EVENT_UNREACHABLE;
2463		event.status = -ETIMEDOUT;
2464		break;
2465	case IB_CM_SIDR_REP_RECEIVED:
2466		event.param.ud.private_data = ib_event->private_data;
2467		event.param.ud.private_data_len = IB_CM_SIDR_REP_PRIVATE_DATA_SIZE;
2468		if (rep->status != IB_SIDR_SUCCESS) {
2469			event.event = RDMA_CM_EVENT_UNREACHABLE;
2470			event.status = ib_event->param.sidr_rep_rcvd.status;
2471			break;
2472		}
2473		ret = cma_set_qkey(id_priv);
2474		if (ret) {
2475			event.event = RDMA_CM_EVENT_ADDR_ERROR;
2476			event.status = -EINVAL;
2477			break;
2478		}
2479		if (id_priv->qkey != rep->qkey) {
2480			event.event = RDMA_CM_EVENT_UNREACHABLE;
2481			event.status = -EINVAL;
2482			break;
2483		}
2484		ib_init_ah_from_path(id_priv->id.device, id_priv->id.port_num,
2485				     id_priv->id.route.path_rec,
2486				     &event.param.ud.ah_attr);
2487		event.param.ud.qp_num = rep->qpn;
2488		event.param.ud.qkey = rep->qkey;
2489		event.event = RDMA_CM_EVENT_ESTABLISHED;
2490		event.status = 0;
2491		break;
2492	default:
2493		printk(KERN_ERR "RDMA CMA: unexpected IB CM event: %d\n",
2494		       ib_event->event);
2495		goto out;
2496	}
2497
2498	ret = id_priv->id.event_handler(&id_priv->id, &event);
2499	if (ret) {
2500		/* Destroy the CM ID by returning a non-zero value. */
2501		id_priv->cm_id.ib = NULL;
2502		cma_exch(id_priv, RDMA_CM_DESTROYING);
2503		mutex_unlock(&id_priv->handler_mutex);
2504		rdma_destroy_id(&id_priv->id);
2505		return ret;
2506	}
2507out:
2508	mutex_unlock(&id_priv->handler_mutex);
2509	return ret;
2510}
2511
2512static int cma_resolve_ib_udp(struct rdma_id_private *id_priv,
2513			      struct rdma_conn_param *conn_param)
2514{
2515	struct ib_cm_sidr_req_param req;
2516	struct rdma_route *route;
2517	struct ib_cm_id	*id;
2518	int ret;
2519
2520	req.private_data_len = sizeof(struct cma_hdr) +
2521			       conn_param->private_data_len;
2522	if (req.private_data_len < conn_param->private_data_len)
2523		return -EINVAL;
2524
2525	req.private_data = kzalloc(req.private_data_len, GFP_ATOMIC);
2526	if (!req.private_data)
2527		return -ENOMEM;
2528
2529	if (conn_param->private_data && conn_param->private_data_len)
2530		memcpy((void *) req.private_data + sizeof(struct cma_hdr),
2531		       conn_param->private_data, conn_param->private_data_len);
2532
2533	route = &id_priv->id.route;
2534	ret = cma_format_hdr((void *) req.private_data, id_priv->id.ps, route);
2535	if (ret)
2536		goto out;
2537
2538	id = ib_create_cm_id(id_priv->id.device, cma_sidr_rep_handler,
2539			     id_priv);
2540	if (IS_ERR(id)) {
2541		ret = PTR_ERR(id);
2542		goto out;
2543	}
2544	id_priv->cm_id.ib = id;
2545
2546	req.path = route->path_rec;
2547	req.service_id = cma_get_service_id(id_priv->id.ps,
2548					    (struct sockaddr *) &route->addr.dst_addr);
2549	req.timeout_ms = 1 << (CMA_CM_RESPONSE_TIMEOUT - 8);
2550	req.max_cm_retries = CMA_MAX_CM_RETRIES;
2551
2552	ret = ib_send_cm_sidr_req(id_priv->cm_id.ib, &req);
2553	if (ret) {
2554		ib_destroy_cm_id(id_priv->cm_id.ib);
2555		id_priv->cm_id.ib = NULL;
2556	}
2557out:
2558	kfree(req.private_data);
2559	return ret;
2560}
2561
2562static int cma_connect_ib(struct rdma_id_private *id_priv,
2563			  struct rdma_conn_param *conn_param)
2564{
2565	struct ib_cm_req_param req;
2566	struct rdma_route *route;
2567	void *private_data;
2568	struct ib_cm_id	*id;
2569	int offset, ret;
2570
2571	memset(&req, 0, sizeof req);
2572	offset = cma_user_data_offset(id_priv->id.ps);
2573	req.private_data_len = offset + conn_param->private_data_len;
2574	if (req.private_data_len < conn_param->private_data_len)
2575		return -EINVAL;
2576
2577	private_data = kzalloc(req.private_data_len, GFP_ATOMIC);
2578	if (!private_data)
2579		return -ENOMEM;
2580
2581	if (conn_param->private_data && conn_param->private_data_len)
2582		memcpy(private_data + offset, conn_param->private_data,
2583		       conn_param->private_data_len);
2584
2585	id = ib_create_cm_id(id_priv->id.device, cma_ib_handler, id_priv);
2586	if (IS_ERR(id)) {
2587		ret = PTR_ERR(id);
2588		goto out;
2589	}
2590	id_priv->cm_id.ib = id;
2591
2592	route = &id_priv->id.route;
2593	ret = cma_format_hdr(private_data, id_priv->id.ps, route);
2594	if (ret)
2595		goto out;
2596	req.private_data = private_data;
2597
2598	req.primary_path = &route->path_rec[0];
2599	if (route->num_paths == 2)
2600		req.alternate_path = &route->path_rec[1];
2601
2602	req.service_id = cma_get_service_id(id_priv->id.ps,
2603					    (struct sockaddr *) &route->addr.dst_addr);
2604	req.qp_num = id_priv->qp_num;
2605	req.qp_type = id_priv->id.qp_type;
2606	req.starting_psn = id_priv->seq_num;
2607	req.responder_resources = conn_param->responder_resources;
2608	req.initiator_depth = conn_param->initiator_depth;
2609	req.flow_control = conn_param->flow_control;
2610	req.retry_count = conn_param->retry_count;
2611	req.rnr_retry_count = conn_param->rnr_retry_count;
2612	req.remote_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT;
2613	req.local_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT;
2614	req.max_cm_retries = CMA_MAX_CM_RETRIES;
2615	req.srq = id_priv->srq ? 1 : 0;
2616
2617	ret = ib_send_cm_req(id_priv->cm_id.ib, &req);
2618out:
2619	if (ret && !IS_ERR(id)) {
2620		ib_destroy_cm_id(id);
2621		id_priv->cm_id.ib = NULL;
2622	}
2623
2624	kfree(private_data);
2625	return ret;
2626}
2627
2628static int cma_connect_iw(struct rdma_id_private *id_priv,
2629			  struct rdma_conn_param *conn_param)
2630{
2631	struct iw_cm_id *cm_id;
2632	struct sockaddr_in* sin;
2633	int ret;
2634	struct iw_cm_conn_param iw_param;
2635
2636	cm_id = iw_create_cm_id(id_priv->id.device, cma_iw_handler, id_priv);
2637	if (IS_ERR(cm_id))
2638		return PTR_ERR(cm_id);
2639
2640	id_priv->cm_id.iw = cm_id;
2641
2642	sin = (struct sockaddr_in*) &id_priv->id.route.addr.src_addr;
2643	cm_id->local_addr = *sin;
2644
2645	sin = (struct sockaddr_in*) &id_priv->id.route.addr.dst_addr;
2646	cm_id->remote_addr = *sin;
2647
2648	ret = cma_modify_qp_rtr(id_priv, conn_param);
2649	if (ret)
2650		goto out;
2651
2652	if (conn_param) {
2653		iw_param.ord = conn_param->initiator_depth;
2654		iw_param.ird = conn_param->responder_resources;
2655		iw_param.private_data = conn_param->private_data;
2656		iw_param.private_data_len = conn_param->private_data_len;
2657		iw_param.qpn = id_priv->id.qp ? id_priv->qp_num : conn_param->qp_num;
2658	} else {
2659		memset(&iw_param, 0, sizeof iw_param);
2660		iw_param.qpn = id_priv->qp_num;
2661	}
2662	ret = iw_cm_connect(cm_id, &iw_param);
2663out:
2664	if (ret) {
2665		iw_destroy_cm_id(cm_id);
2666		id_priv->cm_id.iw = NULL;
2667	}
2668	return ret;
2669}
2670
2671int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
2672{
2673	struct rdma_id_private *id_priv;
2674	int ret;
2675
2676	id_priv = container_of(id, struct rdma_id_private, id);
2677	if (!cma_comp_exch(id_priv, RDMA_CM_ROUTE_RESOLVED, RDMA_CM_CONNECT))
2678		return -EINVAL;
2679
2680	if (!id->qp) {
2681		id_priv->qp_num = conn_param->qp_num;
2682		id_priv->srq = conn_param->srq;
2683	}
2684
2685	switch (rdma_node_get_transport(id->device->node_type)) {
2686	case RDMA_TRANSPORT_IB:
2687		if (id->qp_type == IB_QPT_UD)
2688			ret = cma_resolve_ib_udp(id_priv, conn_param);
2689		else
2690			ret = cma_connect_ib(id_priv, conn_param);
2691		break;
2692	case RDMA_TRANSPORT_IWARP:
2693		ret = cma_connect_iw(id_priv, conn_param);
2694		break;
2695	default:
2696		ret = -ENOSYS;
2697		break;
2698	}
2699	if (ret)
2700		goto err;
2701
2702	return 0;
2703err:
2704	cma_comp_exch(id_priv, RDMA_CM_CONNECT, RDMA_CM_ROUTE_RESOLVED);
2705	return ret;
2706}
2707EXPORT_SYMBOL(rdma_connect);
2708
2709static int cma_accept_ib(struct rdma_id_private *id_priv,
2710			 struct rdma_conn_param *conn_param)
2711{
2712	struct ib_cm_rep_param rep;
2713	int ret;
2714
2715	ret = cma_modify_qp_rtr(id_priv, conn_param);
2716	if (ret)
2717		goto out;
2718
2719	ret = cma_modify_qp_rts(id_priv, conn_param);
2720	if (ret)
2721		goto out;
2722
2723	memset(&rep, 0, sizeof rep);
2724	rep.qp_num = id_priv->qp_num;
2725	rep.starting_psn = id_priv->seq_num;
2726	rep.private_data = conn_param->private_data;
2727	rep.private_data_len = conn_param->private_data_len;
2728	rep.responder_resources = conn_param->responder_resources;
2729	rep.initiator_depth = conn_param->initiator_depth;
2730	rep.failover_accepted = 0;
2731	rep.flow_control = conn_param->flow_control;
2732	rep.rnr_retry_count = conn_param->rnr_retry_count;
2733	rep.srq = id_priv->srq ? 1 : 0;
2734
2735	ret = ib_send_cm_rep(id_priv->cm_id.ib, &rep);
2736out:
2737	return ret;
2738}
2739
2740static int cma_accept_iw(struct rdma_id_private *id_priv,
2741		  struct rdma_conn_param *conn_param)
2742{
2743	struct iw_cm_conn_param iw_param;
2744	int ret;
2745
2746	ret = cma_modify_qp_rtr(id_priv, conn_param);
2747	if (ret)
2748		return ret;
2749
2750	iw_param.ord = conn_param->initiator_depth;
2751	iw_param.ird = conn_param->responder_resources;
2752	iw_param.private_data = conn_param->private_data;
2753	iw_param.private_data_len = conn_param->private_data_len;
2754	if (id_priv->id.qp) {
2755		iw_param.qpn = id_priv->qp_num;
2756	} else
2757		iw_param.qpn = conn_param->qp_num;
2758
2759	return iw_cm_accept(id_priv->cm_id.iw, &iw_param);
2760}
2761
2762static int cma_send_sidr_rep(struct rdma_id_private *id_priv,
2763			     enum ib_cm_sidr_status status,
2764			     const void *private_data, int private_data_len)
2765{
2766	struct ib_cm_sidr_rep_param rep;
2767	int ret;
2768
2769	memset(&rep, 0, sizeof rep);
2770	rep.status = status;
2771	if (status == IB_SIDR_SUCCESS) {
2772		ret = cma_set_qkey(id_priv);
2773		if (ret)
2774			return ret;
2775		rep.qp_num = id_priv->qp_num;
2776		rep.qkey = id_priv->qkey;
2777	}
2778	rep.private_data = private_data;
2779	rep.private_data_len = private_data_len;
2780
2781	return ib_send_cm_sidr_rep(id_priv->cm_id.ib, &rep);
2782}
2783
2784int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
2785{
2786	struct rdma_id_private *id_priv;
2787	int ret;
2788
2789	id_priv = container_of(id, struct rdma_id_private, id);
2790
2791	id_priv->owner = task_pid_nr(current);
2792
2793	if (!cma_comp(id_priv, RDMA_CM_CONNECT))
2794		return -EINVAL;
2795
2796	if (!id->qp && conn_param) {
2797		id_priv->qp_num = conn_param->qp_num;
2798		id_priv->srq = conn_param->srq;
2799	}
2800
2801	switch (rdma_node_get_transport(id->device->node_type)) {
2802	case RDMA_TRANSPORT_IB:
2803		if (id->qp_type == IB_QPT_UD) {
2804			if (conn_param)
2805				ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS,
2806							conn_param->private_data,
2807							conn_param->private_data_len);
2808			else
2809				ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS,
2810							NULL, 0);
2811		} else {
2812			if (conn_param)
2813				ret = cma_accept_ib(id_priv, conn_param);
2814			else
2815				ret = cma_rep_recv(id_priv);
2816		}
2817		break;
2818	case RDMA_TRANSPORT_IWARP:
2819		ret = cma_accept_iw(id_priv, conn_param);
2820		break;
2821	default:
2822		ret = -ENOSYS;
2823		break;
2824	}
2825
2826	if (ret)
2827		goto reject;
2828
2829	return 0;
2830reject:
2831	cma_modify_qp_err(id_priv);
2832	rdma_reject(id, NULL, 0);
2833	return ret;
2834}
2835EXPORT_SYMBOL(rdma_accept);
2836
2837int rdma_notify(struct rdma_cm_id *id, enum ib_event_type event)
2838{
2839	struct rdma_id_private *id_priv;
2840	int ret;
2841
2842	id_priv = container_of(id, struct rdma_id_private, id);
2843	if (!id_priv->cm_id.ib)
2844		return -EINVAL;
2845
2846	switch (id->device->node_type) {
2847	case RDMA_NODE_IB_CA:
2848		ret = ib_cm_notify(id_priv->cm_id.ib, event);
2849		break;
2850	default:
2851		ret = 0;
2852		break;
2853	}
2854	return ret;
2855}
2856EXPORT_SYMBOL(rdma_notify);
2857
2858int rdma_reject(struct rdma_cm_id *id, const void *private_data,
2859		u8 private_data_len)
2860{
2861	struct rdma_id_private *id_priv;
2862	int ret;
2863
2864	id_priv = container_of(id, struct rdma_id_private, id);
2865	if (!id_priv->cm_id.ib)
2866		return -EINVAL;
2867
2868	switch (rdma_node_get_transport(id->device->node_type)) {
2869	case RDMA_TRANSPORT_IB:
2870		if (id->qp_type == IB_QPT_UD)
2871			ret = cma_send_sidr_rep(id_priv, IB_SIDR_REJECT,
2872						private_data, private_data_len);
2873		else
2874			ret = ib_send_cm_rej(id_priv->cm_id.ib,
2875					     IB_CM_REJ_CONSUMER_DEFINED, NULL,
2876					     0, private_data, private_data_len);
2877		break;
2878	case RDMA_TRANSPORT_IWARP:
2879		ret = iw_cm_reject(id_priv->cm_id.iw,
2880				   private_data, private_data_len);
2881		break;
2882	default:
2883		ret = -ENOSYS;
2884		break;
2885	}
2886	return ret;
2887}
2888EXPORT_SYMBOL(rdma_reject);
2889
2890int rdma_disconnect(struct rdma_cm_id *id)
2891{
2892	struct rdma_id_private *id_priv;
2893	int ret;
2894
2895	id_priv = container_of(id, struct rdma_id_private, id);
2896	if (!id_priv->cm_id.ib)
2897		return -EINVAL;
2898
2899	switch (rdma_node_get_transport(id->device->node_type)) {
2900	case RDMA_TRANSPORT_IB:
2901		ret = cma_modify_qp_err(id_priv);
2902		if (ret)
2903			goto out;
2904		/* Initiate or respond to a disconnect. */
2905		if (ib_send_cm_dreq(id_priv->cm_id.ib, NULL, 0))
2906			ib_send_cm_drep(id_priv->cm_id.ib, NULL, 0);
2907		break;
2908	case RDMA_TRANSPORT_IWARP:
2909		ret = iw_cm_disconnect(id_priv->cm_id.iw, 0);
2910		break;
2911	default:
2912		ret = -EINVAL;
2913		break;
2914	}
2915out:
2916	return ret;
2917}
2918EXPORT_SYMBOL(rdma_disconnect);
2919
2920static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
2921{
2922	struct rdma_id_private *id_priv;
2923	struct cma_multicast *mc = multicast->context;
2924	struct rdma_cm_event event;
2925	int ret;
2926
2927	id_priv = mc->id_priv;
2928	if (cma_disable_callback(id_priv, RDMA_CM_ADDR_BOUND) &&
2929	    cma_disable_callback(id_priv, RDMA_CM_ADDR_RESOLVED))
2930		return 0;
2931
2932	mutex_lock(&id_priv->qp_mutex);
2933	if (!status && id_priv->id.qp)
2934		status = ib_attach_mcast(id_priv->id.qp, &multicast->rec.mgid,
2935					 be16_to_cpu(multicast->rec.mlid));
2936	mutex_unlock(&id_priv->qp_mutex);
2937
2938	memset(&event, 0, sizeof event);
2939	event.status = status;
2940	event.param.ud.private_data = mc->context;
2941	if (!status) {
2942		event.event = RDMA_CM_EVENT_MULTICAST_JOIN;
2943		ib_init_ah_from_mcmember(id_priv->id.device,
2944					 id_priv->id.port_num, &multicast->rec,
2945					 &event.param.ud.ah_attr);
2946		event.param.ud.qp_num = 0xFFFFFF;
2947		event.param.ud.qkey = be32_to_cpu(multicast->rec.qkey);
2948	} else
2949		event.event = RDMA_CM_EVENT_MULTICAST_ERROR;
2950
2951	ret = id_priv->id.event_handler(&id_priv->id, &event);
2952	if (ret) {
2953		cma_exch(id_priv, RDMA_CM_DESTROYING);
2954		mutex_unlock(&id_priv->handler_mutex);
2955		rdma_destroy_id(&id_priv->id);
2956		return 0;
2957	}
2958
2959	mutex_unlock(&id_priv->handler_mutex);
2960	return 0;
2961}
2962
2963static void cma_set_mgid(struct rdma_id_private *id_priv,
2964			 struct sockaddr *addr, union ib_gid *mgid)
2965{
2966	unsigned char mc_map[MAX_ADDR_LEN];
2967	struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
2968	struct sockaddr_in *sin = (struct sockaddr_in *) addr;
2969	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) addr;
2970
2971	if (cma_any_addr(addr)) {
2972		memset(mgid, 0, sizeof *mgid);
2973	} else if ((addr->sa_family == AF_INET6) &&
2974		   ((be32_to_cpu(sin6->sin6_addr.s6_addr32[0]) & 0xFFF0FFFF) ==
2975								 0xFF10A01B)) {
2976		/* IPv6 address is an SA assigned MGID. */
2977		memcpy(mgid, &sin6->sin6_addr, sizeof *mgid);
2978	} else if ((addr->sa_family == AF_INET6)) {
2979		ipv6_ib_mc_map(&sin6->sin6_addr, dev_addr->broadcast, mc_map);
2980		if (id_priv->id.ps == RDMA_PS_UDP)
2981			mc_map[7] = 0x01;	/* Use RDMA CM signature */
2982		*mgid = *(union ib_gid *) (mc_map + 4);
2983	} else {
2984		ip_ib_mc_map(sin->sin_addr.s_addr, dev_addr->broadcast, mc_map);
2985		if (id_priv->id.ps == RDMA_PS_UDP)
2986			mc_map[7] = 0x01;	/* Use RDMA CM signature */
2987		*mgid = *(union ib_gid *) (mc_map + 4);
2988	}
2989}
2990
2991static int cma_join_ib_multicast(struct rdma_id_private *id_priv,
2992				 struct cma_multicast *mc)
2993{
2994	struct ib_sa_mcmember_rec rec;
2995	struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
2996	ib_sa_comp_mask comp_mask;
2997	int ret;
2998
2999	ib_addr_get_mgid(dev_addr, &rec.mgid);
3000	ret = ib_sa_get_mcmember_rec(id_priv->id.device, id_priv->id.port_num,
3001				     &rec.mgid, &rec);
3002	if (ret)
3003		return ret;
3004
3005	cma_set_mgid(id_priv, (struct sockaddr *) &mc->addr, &rec.mgid);
3006	if (id_priv->id.ps == RDMA_PS_UDP)
3007		rec.qkey = cpu_to_be32(RDMA_UDP_QKEY);
3008	rdma_addr_get_sgid(dev_addr, &rec.port_gid);
3009	rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr));
3010	rec.join_state = 1;
3011
3012	comp_mask = IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID |
3013		    IB_SA_MCMEMBER_REC_PKEY | IB_SA_MCMEMBER_REC_JOIN_STATE |
3014		    IB_SA_MCMEMBER_REC_QKEY | IB_SA_MCMEMBER_REC_SL |
3015		    IB_SA_MCMEMBER_REC_FLOW_LABEL |
3016		    IB_SA_MCMEMBER_REC_TRAFFIC_CLASS;
3017
3018	if (id_priv->id.ps == RDMA_PS_IPOIB)
3019		comp_mask |= IB_SA_MCMEMBER_REC_RATE |
3020			     IB_SA_MCMEMBER_REC_RATE_SELECTOR;
3021
3022	mc->multicast.ib = ib_sa_join_multicast(&sa_client, id_priv->id.device,
3023						id_priv->id.port_num, &rec,
3024						comp_mask, GFP_KERNEL,
3025						cma_ib_mc_handler, mc);
3026	if (IS_ERR(mc->multicast.ib))
3027		return PTR_ERR(mc->multicast.ib);
3028
3029	return 0;
3030}
3031
3032static void iboe_mcast_work_handler(struct work_struct *work)
3033{
3034	struct iboe_mcast_work *mw = container_of(work, struct iboe_mcast_work, work);
3035	struct cma_multicast *mc = mw->mc;
3036	struct ib_sa_multicast *m = mc->multicast.ib;
3037
3038	mc->multicast.ib->context = mc;
3039	cma_ib_mc_handler(0, m);
3040	kref_put(&mc->mcref, release_mc);
3041	kfree(mw);
3042}
3043
3044static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid)
3045{
3046	struct sockaddr_in *sin = (struct sockaddr_in *)addr;
3047	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)addr;
3048
3049	if (cma_any_addr(addr)) {
3050		memset(mgid, 0, sizeof *mgid);
3051	} else if (addr->sa_family == AF_INET6) {
3052		memcpy(mgid, &sin6->sin6_addr, sizeof *mgid);
3053	} else {
3054		mgid->raw[0] = 0xff;
3055		mgid->raw[1] = 0x0e;
3056		mgid->raw[2] = 0;
3057		mgid->raw[3] = 0;
3058		mgid->raw[4] = 0;
3059		mgid->raw[5] = 0;
3060		mgid->raw[6] = 0;
3061		mgid->raw[7] = 0;
3062		mgid->raw[8] = 0;
3063		mgid->raw[9] = 0;
3064		mgid->raw[10] = 0xff;
3065		mgid->raw[11] = 0xff;
3066		*(__be32 *)(&mgid->raw[12]) = sin->sin_addr.s_addr;
3067	}
3068}
3069
3070static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
3071				   struct cma_multicast *mc)
3072{
3073	struct iboe_mcast_work *work;
3074	struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
3075	int err;
3076	struct sockaddr *addr = (struct sockaddr *)&mc->addr;
3077	struct net_device *ndev = NULL;
3078
3079	if (cma_zero_addr((struct sockaddr *)&mc->addr))
3080		return -EINVAL;
3081
3082	work = kzalloc(sizeof *work, GFP_KERNEL);
3083	if (!work)
3084		return -ENOMEM;
3085
3086	mc->multicast.ib = kzalloc(sizeof(struct ib_sa_multicast), GFP_KERNEL);
3087	if (!mc->multicast.ib) {
3088		err = -ENOMEM;
3089		goto out1;
3090	}
3091
3092	cma_iboe_set_mgid(addr, &mc->multicast.ib->rec.mgid);
3093
3094	mc->multicast.ib->rec.pkey = cpu_to_be16(0xffff);
3095	if (id_priv->id.ps == RDMA_PS_UDP)
3096		mc->multicast.ib->rec.qkey = cpu_to_be32(RDMA_UDP_QKEY);
3097
3098	if (dev_addr->bound_dev_if)
3099		ndev = dev_get_by_index(&init_net, dev_addr->bound_dev_if);
3100	if (!ndev) {
3101		err = -ENODEV;
3102		goto out2;
3103	}
3104	mc->multicast.ib->rec.rate = iboe_get_rate(ndev);
3105	mc->multicast.ib->rec.hop_limit = 1;
3106	mc->multicast.ib->rec.mtu = iboe_get_mtu(ndev->mtu);
3107	dev_put(ndev);
3108	if (!mc->multicast.ib->rec.mtu) {
3109		err = -EINVAL;
3110		goto out2;
3111	}
3112	iboe_addr_get_sgid(dev_addr, &mc->multicast.ib->rec.port_gid);
3113	work->id = id_priv;
3114	work->mc = mc;
3115	INIT_WORK(&work->work, iboe_mcast_work_handler);
3116	kref_get(&mc->mcref);
3117	queue_work(cma_wq, &work->work);
3118
3119	return 0;
3120
3121out2:
3122	kfree(mc->multicast.ib);
3123out1:
3124	kfree(work);
3125	return err;
3126}
3127
3128int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
3129			void *context)
3130{
3131	struct rdma_id_private *id_priv;
3132	struct cma_multicast *mc;
3133	int ret;
3134
3135	id_priv = container_of(id, struct rdma_id_private, id);
3136	if (!cma_comp(id_priv, RDMA_CM_ADDR_BOUND) &&
3137	    !cma_comp(id_priv, RDMA_CM_ADDR_RESOLVED))
3138		return -EINVAL;
3139
3140	mc = kmalloc(sizeof *mc, GFP_KERNEL);
3141	if (!mc)
3142		return -ENOMEM;
3143
3144	memcpy(&mc->addr, addr, ip_addr_size(addr));
3145	mc->context = context;
3146	mc->id_priv = id_priv;
3147
3148	spin_lock(&id_priv->lock);
3149	list_add(&mc->list, &id_priv->mc_list);
3150	spin_unlock(&id_priv->lock);
3151
3152	switch (rdma_node_get_transport(id->device->node_type)) {
3153	case RDMA_TRANSPORT_IB:
3154		switch (rdma_port_get_link_layer(id->device, id->port_num)) {
3155		case IB_LINK_LAYER_INFINIBAND:
3156			ret = cma_join_ib_multicast(id_priv, mc);
3157			break;
3158		case IB_LINK_LAYER_ETHERNET:
3159			kref_init(&mc->mcref);
3160			ret = cma_iboe_join_multicast(id_priv, mc);
3161			break;
3162		default:
3163			ret = -EINVAL;
3164		}
3165		break;
3166	default:
3167		ret = -ENOSYS;
3168		break;
3169	}
3170
3171	if (ret) {
3172		spin_lock_irq(&id_priv->lock);
3173		list_del(&mc->list);
3174		spin_unlock_irq(&id_priv->lock);
3175		kfree(mc);
3176	}
3177	return ret;
3178}
3179EXPORT_SYMBOL(rdma_join_multicast);
3180
3181void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr)
3182{
3183	struct rdma_id_private *id_priv;
3184	struct cma_multicast *mc;
3185
3186	id_priv = container_of(id, struct rdma_id_private, id);
3187	spin_lock_irq(&id_priv->lock);
3188	list_for_each_entry(mc, &id_priv->mc_list, list) {
3189		if (!memcmp(&mc->addr, addr, ip_addr_size(addr))) {
3190			list_del(&mc->list);
3191			spin_unlock_irq(&id_priv->lock);
3192
3193			if (id->qp)
3194				ib_detach_mcast(id->qp,
3195						&mc->multicast.ib->rec.mgid,
3196						be16_to_cpu(mc->multicast.ib->rec.mlid));
3197			if (rdma_node_get_transport(id_priv->cma_dev->device->node_type) == RDMA_TRANSPORT_IB) {
3198				switch (rdma_port_get_link_layer(id->device, id->port_num)) {
3199				case IB_LINK_LAYER_INFINIBAND:
3200					ib_sa_free_multicast(mc->multicast.ib);
3201					kfree(mc);
3202					break;
3203				case IB_LINK_LAYER_ETHERNET:
3204					kref_put(&mc->mcref, release_mc);
3205					break;
3206				default:
3207					break;
3208				}
3209			}
3210			return;
3211		}
3212	}
3213	spin_unlock_irq(&id_priv->lock);
3214}
3215EXPORT_SYMBOL(rdma_leave_multicast);
3216
3217static int cma_netdev_change(struct net_device *ndev, struct rdma_id_private *id_priv)
3218{
3219	struct rdma_dev_addr *dev_addr;
3220	struct cma_ndev_work *work;
3221
3222	dev_addr = &id_priv->id.route.addr.dev_addr;
3223
3224	if ((dev_addr->bound_dev_if == ndev->ifindex) &&
3225	    memcmp(dev_addr->src_dev_addr, ndev->dev_addr, ndev->addr_len)) {
3226		printk(KERN_INFO "RDMA CM addr change for ndev %s used by id %p\n",
3227		       ndev->name, &id_priv->id);
3228		work = kzalloc(sizeof *work, GFP_KERNEL);
3229		if (!work)
3230			return -ENOMEM;
3231
3232		INIT_WORK(&work->work, cma_ndev_work_handler);
3233		work->id = id_priv;
3234		work->event.event = RDMA_CM_EVENT_ADDR_CHANGE;
3235		atomic_inc(&id_priv->refcount);
3236		queue_work(cma_wq, &work->work);
3237	}
3238
3239	return 0;
3240}
3241
3242static int cma_netdev_callback(struct notifier_block *self, unsigned long event,
3243			       void *ctx)
3244{
3245	struct net_device *ndev = (struct net_device *)ctx;
3246	struct cma_device *cma_dev;
3247	struct rdma_id_private *id_priv;
3248	int ret = NOTIFY_DONE;
3249
3250	if (dev_net(ndev) != &init_net)
3251		return NOTIFY_DONE;
3252
3253	if (event != NETDEV_BONDING_FAILOVER)
3254		return NOTIFY_DONE;
3255
3256	if (!(ndev->flags & IFF_MASTER) || !(ndev->priv_flags & IFF_BONDING))
3257		return NOTIFY_DONE;
3258
3259	mutex_lock(&lock);
3260	list_for_each_entry(cma_dev, &dev_list, list)
3261		list_for_each_entry(id_priv, &cma_dev->id_list, list) {
3262			ret = cma_netdev_change(ndev, id_priv);
3263			if (ret)
3264				goto out;
3265		}
3266
3267out:
3268	mutex_unlock(&lock);
3269	return ret;
3270}
3271
3272static struct notifier_block cma_nb = {
3273	.notifier_call = cma_netdev_callback
3274};
3275
3276static void cma_add_one(struct ib_device *device)
3277{
3278	struct cma_device *cma_dev;
3279	struct rdma_id_private *id_priv;
3280
3281	cma_dev = kmalloc(sizeof *cma_dev, GFP_KERNEL);
3282	if (!cma_dev)
3283		return;
3284
3285	cma_dev->device = device;
3286
3287	init_completion(&cma_dev->comp);
3288	atomic_set(&cma_dev->refcount, 1);
3289	INIT_LIST_HEAD(&cma_dev->id_list);
3290	ib_set_client_data(device, &cma_client, cma_dev);
3291
3292	mutex_lock(&lock);
3293	list_add_tail(&cma_dev->list, &dev_list);
3294	list_for_each_entry(id_priv, &listen_any_list, list)
3295		cma_listen_on_dev(id_priv, cma_dev);
3296	mutex_unlock(&lock);
3297}
3298
3299static int cma_remove_id_dev(struct rdma_id_private *id_priv)
3300{
3301	struct rdma_cm_event event;
3302	enum rdma_cm_state state;
3303	int ret = 0;
3304
3305	/* Record that we want to remove the device */
3306	state = cma_exch(id_priv, RDMA_CM_DEVICE_REMOVAL);
3307	if (state == RDMA_CM_DESTROYING)
3308		return 0;
3309
3310	cma_cancel_operation(id_priv, state);
3311	mutex_lock(&id_priv->handler_mutex);
3312
3313	/* Check for destruction from another callback. */
3314	if (!cma_comp(id_priv, RDMA_CM_DEVICE_REMOVAL))
3315		goto out;
3316
3317	memset(&event, 0, sizeof event);
3318	event.event = RDMA_CM_EVENT_DEVICE_REMOVAL;
3319	ret = id_priv->id.event_handler(&id_priv->id, &event);
3320out:
3321	mutex_unlock(&id_priv->handler_mutex);
3322	return ret;
3323}
3324
3325static void cma_process_remove(struct cma_device *cma_dev)
3326{
3327	struct rdma_id_private *id_priv;
3328	int ret;
3329
3330	mutex_lock(&lock);
3331	while (!list_empty(&cma_dev->id_list)) {
3332		id_priv = list_entry(cma_dev->id_list.next,
3333				     struct rdma_id_private, list);
3334
3335		list_del(&id_priv->listen_list);
3336		list_del_init(&id_priv->list);
3337		atomic_inc(&id_priv->refcount);
3338		mutex_unlock(&lock);
3339
3340		ret = id_priv->internal_id ? 1 : cma_remove_id_dev(id_priv);
3341		cma_deref_id(id_priv);
3342		if (ret)
3343			rdma_destroy_id(&id_priv->id);
3344
3345		mutex_lock(&lock);
3346	}
3347	mutex_unlock(&lock);
3348
3349	cma_deref_dev(cma_dev);
3350	wait_for_completion(&cma_dev->comp);
3351}
3352
3353static void cma_remove_one(struct ib_device *device)
3354{
3355	struct cma_device *cma_dev;
3356
3357	cma_dev = ib_get_client_data(device, &cma_client);
3358	if (!cma_dev)
3359		return;
3360
3361	mutex_lock(&lock);
3362	list_del(&cma_dev->list);
3363	mutex_unlock(&lock);
3364
3365	cma_process_remove(cma_dev);
3366	kfree(cma_dev);
3367}
3368
3369static int cma_get_id_stats(struct sk_buff *skb, struct netlink_callback *cb)
3370{
3371	struct nlmsghdr *nlh;
3372	struct rdma_cm_id_stats *id_stats;
3373	struct rdma_id_private *id_priv;
3374	struct rdma_cm_id *id = NULL;
3375	struct cma_device *cma_dev;
3376	int i_dev = 0, i_id = 0;
3377
3378	/*
3379	 * We export all of the IDs as a sequence of messages.  Each
3380	 * ID gets its own netlink message.
3381	 */
3382	mutex_lock(&lock);
3383
3384	list_for_each_entry(cma_dev, &dev_list, list) {
3385		if (i_dev < cb->args[0]) {
3386			i_dev++;
3387			continue;
3388		}
3389
3390		i_id = 0;
3391		list_for_each_entry(id_priv, &cma_dev->id_list, list) {
3392			if (i_id < cb->args[1]) {
3393				i_id++;
3394				continue;
3395			}
3396
3397			id_stats = ibnl_put_msg(skb, &nlh, cb->nlh->nlmsg_seq,
3398						sizeof *id_stats, RDMA_NL_RDMA_CM,
3399						RDMA_NL_RDMA_CM_ID_STATS);
3400			if (!id_stats)
3401				goto out;
3402
3403			memset(id_stats, 0, sizeof *id_stats);
3404			id = &id_priv->id;
3405			id_stats->node_type = id->route.addr.dev_addr.dev_type;
3406			id_stats->port_num = id->port_num;
3407			id_stats->bound_dev_if =
3408				id->route.addr.dev_addr.bound_dev_if;
3409
3410			if (id->route.addr.src_addr.ss_family == AF_INET) {
3411				if (ibnl_put_attr(skb, nlh,
3412						  sizeof(struct sockaddr_in),
3413						  &id->route.addr.src_addr,
3414						  RDMA_NL_RDMA_CM_ATTR_SRC_ADDR)) {
3415					goto out;
3416				}
3417				if (ibnl_put_attr(skb, nlh,
3418						  sizeof(struct sockaddr_in),
3419						  &id->route.addr.dst_addr,
3420						  RDMA_NL_RDMA_CM_ATTR_DST_ADDR)) {
3421					goto out;
3422				}
3423			} else if (id->route.addr.src_addr.ss_family == AF_INET6) {
3424				if (ibnl_put_attr(skb, nlh,
3425						  sizeof(struct sockaddr_in6),
3426						  &id->route.addr.src_addr,
3427						  RDMA_NL_RDMA_CM_ATTR_SRC_ADDR)) {
3428					goto out;
3429				}
3430				if (ibnl_put_attr(skb, nlh,
3431						  sizeof(struct sockaddr_in6),
3432						  &id->route.addr.dst_addr,
3433						  RDMA_NL_RDMA_CM_ATTR_DST_ADDR)) {
3434					goto out;
3435				}
3436			}
3437
3438			id_stats->pid		= id_priv->owner;
3439			id_stats->port_space	= id->ps;
3440			id_stats->cm_state	= id_priv->state;
3441			id_stats->qp_num	= id_priv->qp_num;
3442			id_stats->qp_type	= id->qp_type;
3443
3444			i_id++;
3445		}
3446
3447		cb->args[1] = 0;
3448		i_dev++;
3449	}
3450
3451out:
3452	mutex_unlock(&lock);
3453	cb->args[0] = i_dev;
3454	cb->args[1] = i_id;
3455
3456	return skb->len;
3457}
3458
3459static const struct ibnl_client_cbs cma_cb_table[] = {
3460	[RDMA_NL_RDMA_CM_ID_STATS] = { .dump = cma_get_id_stats },
3461};
3462
3463static int __init cma_init(void)
3464{
3465	int ret;
3466
3467	cma_wq = create_singlethread_workqueue("rdma_cm");
3468	if (!cma_wq)
3469		return -ENOMEM;
3470
3471	ib_sa_register_client(&sa_client);
3472	rdma_addr_register_client(&addr_client);
3473	register_netdevice_notifier(&cma_nb);
3474
3475	ret = ib_register_client(&cma_client);
3476	if (ret)
3477		goto err;
3478
3479	if (ibnl_add_client(RDMA_NL_RDMA_CM, RDMA_NL_RDMA_CM_NUM_OPS, cma_cb_table))
3480		printk(KERN_WARNING "RDMA CMA: failed to add netlink callback\n");
3481
3482	return 0;
3483
3484err:
3485	unregister_netdevice_notifier(&cma_nb);
3486	rdma_addr_unregister_client(&addr_client);
3487	ib_sa_unregister_client(&sa_client);
3488	destroy_workqueue(cma_wq);
3489	return ret;
3490}
3491
3492static void __exit cma_cleanup(void)
3493{
3494	ibnl_remove_client(RDMA_NL_RDMA_CM);
3495	ib_unregister_client(&cma_client);
3496	unregister_netdevice_notifier(&cma_nb);
3497	rdma_addr_unregister_client(&addr_client);
3498	ib_sa_unregister_client(&sa_client);
3499	destroy_workqueue(cma_wq);
3500	idr_destroy(&sdp_ps);
3501	idr_destroy(&tcp_ps);
3502	idr_destroy(&udp_ps);
3503	idr_destroy(&ipoib_ps);
3504	idr_destroy(&ib_ps);
3505}
3506
3507module_init(cma_init);
3508module_exit(cma_cleanup);
3509