cma.c revision a9bb79128aa659f97b774b97c9bb1bdc74444595
1/*
2 * Copyright (c) 2005 Voltaire Inc.  All rights reserved.
3 * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved.
4 * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved.
5 * Copyright (c) 2005-2006 Intel Corporation.  All rights reserved.
6 *
7 * This software is available to you under a choice of one of two
8 * licenses.  You may choose to be licensed under the terms of the GNU
9 * General Public License (GPL) Version 2, available from the file
10 * COPYING in the main directory of this source tree, or the
11 * OpenIB.org BSD license below:
12 *
13 *     Redistribution and use in source and binary forms, with or
14 *     without modification, are permitted provided that the following
15 *     conditions are met:
16 *
17 *      - Redistributions of source code must retain the above
18 *        copyright notice, this list of conditions and the following
19 *        disclaimer.
20 *
21 *      - Redistributions in binary form must reproduce the above
22 *        copyright notice, this list of conditions and the following
23 *        disclaimer in the documentation and/or other materials
24 *        provided with the distribution.
25 *
26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 * SOFTWARE.
34 */
35
36#include <linux/completion.h>
37#include <linux/in.h>
38#include <linux/in6.h>
39#include <linux/mutex.h>
40#include <linux/random.h>
41#include <linux/idr.h>
42#include <linux/inetdevice.h>
43#include <linux/slab.h>
44
45#include <net/tcp.h>
46#include <net/ipv6.h>
47
48#include <rdma/rdma_cm.h>
49#include <rdma/rdma_cm_ib.h>
50#include <rdma/ib_cache.h>
51#include <rdma/ib_cm.h>
52#include <rdma/ib_sa.h>
53#include <rdma/iw_cm.h>
54
55MODULE_AUTHOR("Sean Hefty");
56MODULE_DESCRIPTION("Generic RDMA CM Agent");
57MODULE_LICENSE("Dual BSD/GPL");
58
59#define CMA_CM_RESPONSE_TIMEOUT 20
60#define CMA_MAX_CM_RETRIES 15
61#define CMA_CM_MRA_SETTING (IB_CM_MRA_FLAG_DELAY | 24)
62#define CMA_IBOE_PACKET_LIFETIME 18
63
64static void cma_add_one(struct ib_device *device);
65static void cma_remove_one(struct ib_device *device);
66
67static struct ib_client cma_client = {
68	.name   = "cma",
69	.add    = cma_add_one,
70	.remove = cma_remove_one
71};
72
73static struct ib_sa_client sa_client;
74static struct rdma_addr_client addr_client;
75static LIST_HEAD(dev_list);
76static LIST_HEAD(listen_any_list);
77static DEFINE_MUTEX(lock);
78static struct workqueue_struct *cma_wq;
79static DEFINE_IDR(sdp_ps);
80static DEFINE_IDR(tcp_ps);
81static DEFINE_IDR(udp_ps);
82static DEFINE_IDR(ipoib_ps);
83
84struct cma_device {
85	struct list_head	list;
86	struct ib_device	*device;
87	struct completion	comp;
88	atomic_t		refcount;
89	struct list_head	id_list;
90};
91
92enum cma_state {
93	CMA_IDLE,
94	CMA_ADDR_QUERY,
95	CMA_ADDR_RESOLVED,
96	CMA_ROUTE_QUERY,
97	CMA_ROUTE_RESOLVED,
98	CMA_CONNECT,
99	CMA_DISCONNECT,
100	CMA_ADDR_BOUND,
101	CMA_LISTEN,
102	CMA_DEVICE_REMOVAL,
103	CMA_DESTROYING
104};
105
106struct rdma_bind_list {
107	struct idr		*ps;
108	struct hlist_head	owners;
109	unsigned short		port;
110};
111
112/*
113 * Device removal can occur at anytime, so we need extra handling to
114 * serialize notifying the user of device removal with other callbacks.
115 * We do this by disabling removal notification while a callback is in process,
116 * and reporting it after the callback completes.
117 */
118struct rdma_id_private {
119	struct rdma_cm_id	id;
120
121	struct rdma_bind_list	*bind_list;
122	struct hlist_node	node;
123	struct list_head	list; /* listen_any_list or cma_device.list */
124	struct list_head	listen_list; /* per device listens */
125	struct cma_device	*cma_dev;
126	struct list_head	mc_list;
127
128	int			internal_id;
129	enum cma_state		state;
130	spinlock_t		lock;
131	struct mutex		qp_mutex;
132
133	struct completion	comp;
134	atomic_t		refcount;
135	struct mutex		handler_mutex;
136
137	int			backlog;
138	int			timeout_ms;
139	struct ib_sa_query	*query;
140	int			query_id;
141	union {
142		struct ib_cm_id	*ib;
143		struct iw_cm_id	*iw;
144	} cm_id;
145
146	u32			seq_num;
147	u32			qkey;
148	u32			qp_num;
149	u8			srq;
150	u8			tos;
151	u8			reuseaddr;
152};
153
154struct cma_multicast {
155	struct rdma_id_private *id_priv;
156	union {
157		struct ib_sa_multicast *ib;
158	} multicast;
159	struct list_head	list;
160	void			*context;
161	struct sockaddr_storage	addr;
162	struct kref		mcref;
163};
164
165struct cma_work {
166	struct work_struct	work;
167	struct rdma_id_private	*id;
168	enum cma_state		old_state;
169	enum cma_state		new_state;
170	struct rdma_cm_event	event;
171};
172
173struct cma_ndev_work {
174	struct work_struct	work;
175	struct rdma_id_private	*id;
176	struct rdma_cm_event	event;
177};
178
179struct iboe_mcast_work {
180	struct work_struct	 work;
181	struct rdma_id_private	*id;
182	struct cma_multicast	*mc;
183};
184
185union cma_ip_addr {
186	struct in6_addr ip6;
187	struct {
188		__be32 pad[3];
189		__be32 addr;
190	} ip4;
191};
192
193struct cma_hdr {
194	u8 cma_version;
195	u8 ip_version;	/* IP version: 7:4 */
196	__be16 port;
197	union cma_ip_addr src_addr;
198	union cma_ip_addr dst_addr;
199};
200
201struct sdp_hh {
202	u8 bsdh[16];
203	u8 sdp_version; /* Major version: 7:4 */
204	u8 ip_version;	/* IP version: 7:4 */
205	u8 sdp_specific1[10];
206	__be16 port;
207	__be16 sdp_specific2;
208	union cma_ip_addr src_addr;
209	union cma_ip_addr dst_addr;
210};
211
212struct sdp_hah {
213	u8 bsdh[16];
214	u8 sdp_version;
215};
216
217#define CMA_VERSION 0x00
218#define SDP_MAJ_VERSION 0x2
219
220static int cma_comp(struct rdma_id_private *id_priv, enum cma_state comp)
221{
222	unsigned long flags;
223	int ret;
224
225	spin_lock_irqsave(&id_priv->lock, flags);
226	ret = (id_priv->state == comp);
227	spin_unlock_irqrestore(&id_priv->lock, flags);
228	return ret;
229}
230
231static int cma_comp_exch(struct rdma_id_private *id_priv,
232			 enum cma_state comp, enum cma_state exch)
233{
234	unsigned long flags;
235	int ret;
236
237	spin_lock_irqsave(&id_priv->lock, flags);
238	if ((ret = (id_priv->state == comp)))
239		id_priv->state = exch;
240	spin_unlock_irqrestore(&id_priv->lock, flags);
241	return ret;
242}
243
244static enum cma_state cma_exch(struct rdma_id_private *id_priv,
245			       enum cma_state exch)
246{
247	unsigned long flags;
248	enum cma_state old;
249
250	spin_lock_irqsave(&id_priv->lock, flags);
251	old = id_priv->state;
252	id_priv->state = exch;
253	spin_unlock_irqrestore(&id_priv->lock, flags);
254	return old;
255}
256
257static inline u8 cma_get_ip_ver(struct cma_hdr *hdr)
258{
259	return hdr->ip_version >> 4;
260}
261
262static inline void cma_set_ip_ver(struct cma_hdr *hdr, u8 ip_ver)
263{
264	hdr->ip_version = (ip_ver << 4) | (hdr->ip_version & 0xF);
265}
266
267static inline u8 sdp_get_majv(u8 sdp_version)
268{
269	return sdp_version >> 4;
270}
271
272static inline u8 sdp_get_ip_ver(struct sdp_hh *hh)
273{
274	return hh->ip_version >> 4;
275}
276
277static inline void sdp_set_ip_ver(struct sdp_hh *hh, u8 ip_ver)
278{
279	hh->ip_version = (ip_ver << 4) | (hh->ip_version & 0xF);
280}
281
282static inline int cma_is_ud_ps(enum rdma_port_space ps)
283{
284	return (ps == RDMA_PS_UDP || ps == RDMA_PS_IPOIB);
285}
286
287static void cma_attach_to_dev(struct rdma_id_private *id_priv,
288			      struct cma_device *cma_dev)
289{
290	atomic_inc(&cma_dev->refcount);
291	id_priv->cma_dev = cma_dev;
292	id_priv->id.device = cma_dev->device;
293	id_priv->id.route.addr.dev_addr.transport =
294		rdma_node_get_transport(cma_dev->device->node_type);
295	list_add_tail(&id_priv->list, &cma_dev->id_list);
296}
297
298static inline void cma_deref_dev(struct cma_device *cma_dev)
299{
300	if (atomic_dec_and_test(&cma_dev->refcount))
301		complete(&cma_dev->comp);
302}
303
304static inline void release_mc(struct kref *kref)
305{
306	struct cma_multicast *mc = container_of(kref, struct cma_multicast, mcref);
307
308	kfree(mc->multicast.ib);
309	kfree(mc);
310}
311
312static void cma_release_dev(struct rdma_id_private *id_priv)
313{
314	mutex_lock(&lock);
315	list_del(&id_priv->list);
316	cma_deref_dev(id_priv->cma_dev);
317	id_priv->cma_dev = NULL;
318	mutex_unlock(&lock);
319}
320
321static int cma_set_qkey(struct rdma_id_private *id_priv)
322{
323	struct ib_sa_mcmember_rec rec;
324	int ret = 0;
325
326	if (id_priv->qkey)
327		return 0;
328
329	switch (id_priv->id.ps) {
330	case RDMA_PS_UDP:
331		id_priv->qkey = RDMA_UDP_QKEY;
332		break;
333	case RDMA_PS_IPOIB:
334		ib_addr_get_mgid(&id_priv->id.route.addr.dev_addr, &rec.mgid);
335		ret = ib_sa_get_mcmember_rec(id_priv->id.device,
336					     id_priv->id.port_num, &rec.mgid,
337					     &rec);
338		if (!ret)
339			id_priv->qkey = be32_to_cpu(rec.qkey);
340		break;
341	default:
342		break;
343	}
344	return ret;
345}
346
347static int find_gid_port(struct ib_device *device, union ib_gid *gid, u8 port_num)
348{
349	int i;
350	int err;
351	struct ib_port_attr props;
352	union ib_gid tmp;
353
354	err = ib_query_port(device, port_num, &props);
355	if (err)
356		return 1;
357
358	for (i = 0; i < props.gid_tbl_len; ++i) {
359		err = ib_query_gid(device, port_num, i, &tmp);
360		if (err)
361			return 1;
362		if (!memcmp(&tmp, gid, sizeof tmp))
363			return 0;
364	}
365
366	return -EAGAIN;
367}
368
369static int cma_acquire_dev(struct rdma_id_private *id_priv)
370{
371	struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
372	struct cma_device *cma_dev;
373	union ib_gid gid, iboe_gid;
374	int ret = -ENODEV;
375	u8 port;
376	enum rdma_link_layer dev_ll = dev_addr->dev_type == ARPHRD_INFINIBAND ?
377		IB_LINK_LAYER_INFINIBAND : IB_LINK_LAYER_ETHERNET;
378
379	mutex_lock(&lock);
380	iboe_addr_get_sgid(dev_addr, &iboe_gid);
381	memcpy(&gid, dev_addr->src_dev_addr +
382	       rdma_addr_gid_offset(dev_addr), sizeof gid);
383	list_for_each_entry(cma_dev, &dev_list, list) {
384		for (port = 1; port <= cma_dev->device->phys_port_cnt; ++port) {
385			if (rdma_port_get_link_layer(cma_dev->device, port) == dev_ll) {
386				if (rdma_node_get_transport(cma_dev->device->node_type) == RDMA_TRANSPORT_IB &&
387				    rdma_port_get_link_layer(cma_dev->device, port) == IB_LINK_LAYER_ETHERNET)
388					ret = find_gid_port(cma_dev->device, &iboe_gid, port);
389				else
390					ret = find_gid_port(cma_dev->device, &gid, port);
391
392				if (!ret) {
393					id_priv->id.port_num = port;
394					goto out;
395				} else if (ret == 1)
396					break;
397			}
398		}
399	}
400
401out:
402	if (!ret)
403		cma_attach_to_dev(id_priv, cma_dev);
404
405	mutex_unlock(&lock);
406	return ret;
407}
408
409static void cma_deref_id(struct rdma_id_private *id_priv)
410{
411	if (atomic_dec_and_test(&id_priv->refcount))
412		complete(&id_priv->comp);
413}
414
415static int cma_disable_callback(struct rdma_id_private *id_priv,
416			      enum cma_state state)
417{
418	mutex_lock(&id_priv->handler_mutex);
419	if (id_priv->state != state) {
420		mutex_unlock(&id_priv->handler_mutex);
421		return -EINVAL;
422	}
423	return 0;
424}
425
426static int cma_has_cm_dev(struct rdma_id_private *id_priv)
427{
428	return (id_priv->id.device && id_priv->cm_id.ib);
429}
430
431struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler,
432				  void *context, enum rdma_port_space ps)
433{
434	struct rdma_id_private *id_priv;
435
436	id_priv = kzalloc(sizeof *id_priv, GFP_KERNEL);
437	if (!id_priv)
438		return ERR_PTR(-ENOMEM);
439
440	id_priv->state = CMA_IDLE;
441	id_priv->id.context = context;
442	id_priv->id.event_handler = event_handler;
443	id_priv->id.ps = ps;
444	spin_lock_init(&id_priv->lock);
445	mutex_init(&id_priv->qp_mutex);
446	init_completion(&id_priv->comp);
447	atomic_set(&id_priv->refcount, 1);
448	mutex_init(&id_priv->handler_mutex);
449	INIT_LIST_HEAD(&id_priv->listen_list);
450	INIT_LIST_HEAD(&id_priv->mc_list);
451	get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num);
452
453	return &id_priv->id;
454}
455EXPORT_SYMBOL(rdma_create_id);
456
457static int cma_init_ud_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
458{
459	struct ib_qp_attr qp_attr;
460	int qp_attr_mask, ret;
461
462	qp_attr.qp_state = IB_QPS_INIT;
463	ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
464	if (ret)
465		return ret;
466
467	ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask);
468	if (ret)
469		return ret;
470
471	qp_attr.qp_state = IB_QPS_RTR;
472	ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE);
473	if (ret)
474		return ret;
475
476	qp_attr.qp_state = IB_QPS_RTS;
477	qp_attr.sq_psn = 0;
478	ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE | IB_QP_SQ_PSN);
479
480	return ret;
481}
482
483static int cma_init_conn_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
484{
485	struct ib_qp_attr qp_attr;
486	int qp_attr_mask, ret;
487
488	qp_attr.qp_state = IB_QPS_INIT;
489	ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
490	if (ret)
491		return ret;
492
493	return ib_modify_qp(qp, &qp_attr, qp_attr_mask);
494}
495
496int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd,
497		   struct ib_qp_init_attr *qp_init_attr)
498{
499	struct rdma_id_private *id_priv;
500	struct ib_qp *qp;
501	int ret;
502
503	id_priv = container_of(id, struct rdma_id_private, id);
504	if (id->device != pd->device)
505		return -EINVAL;
506
507	qp = ib_create_qp(pd, qp_init_attr);
508	if (IS_ERR(qp))
509		return PTR_ERR(qp);
510
511	if (cma_is_ud_ps(id_priv->id.ps))
512		ret = cma_init_ud_qp(id_priv, qp);
513	else
514		ret = cma_init_conn_qp(id_priv, qp);
515	if (ret)
516		goto err;
517
518	id->qp = qp;
519	id_priv->qp_num = qp->qp_num;
520	id_priv->srq = (qp->srq != NULL);
521	return 0;
522err:
523	ib_destroy_qp(qp);
524	return ret;
525}
526EXPORT_SYMBOL(rdma_create_qp);
527
528void rdma_destroy_qp(struct rdma_cm_id *id)
529{
530	struct rdma_id_private *id_priv;
531
532	id_priv = container_of(id, struct rdma_id_private, id);
533	mutex_lock(&id_priv->qp_mutex);
534	ib_destroy_qp(id_priv->id.qp);
535	id_priv->id.qp = NULL;
536	mutex_unlock(&id_priv->qp_mutex);
537}
538EXPORT_SYMBOL(rdma_destroy_qp);
539
540static int cma_modify_qp_rtr(struct rdma_id_private *id_priv,
541			     struct rdma_conn_param *conn_param)
542{
543	struct ib_qp_attr qp_attr;
544	int qp_attr_mask, ret;
545
546	mutex_lock(&id_priv->qp_mutex);
547	if (!id_priv->id.qp) {
548		ret = 0;
549		goto out;
550	}
551
552	/* Need to update QP attributes from default values. */
553	qp_attr.qp_state = IB_QPS_INIT;
554	ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
555	if (ret)
556		goto out;
557
558	ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask);
559	if (ret)
560		goto out;
561
562	qp_attr.qp_state = IB_QPS_RTR;
563	ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
564	if (ret)
565		goto out;
566
567	if (conn_param)
568		qp_attr.max_dest_rd_atomic = conn_param->responder_resources;
569	ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask);
570out:
571	mutex_unlock(&id_priv->qp_mutex);
572	return ret;
573}
574
575static int cma_modify_qp_rts(struct rdma_id_private *id_priv,
576			     struct rdma_conn_param *conn_param)
577{
578	struct ib_qp_attr qp_attr;
579	int qp_attr_mask, ret;
580
581	mutex_lock(&id_priv->qp_mutex);
582	if (!id_priv->id.qp) {
583		ret = 0;
584		goto out;
585	}
586
587	qp_attr.qp_state = IB_QPS_RTS;
588	ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
589	if (ret)
590		goto out;
591
592	if (conn_param)
593		qp_attr.max_rd_atomic = conn_param->initiator_depth;
594	ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask);
595out:
596	mutex_unlock(&id_priv->qp_mutex);
597	return ret;
598}
599
600static int cma_modify_qp_err(struct rdma_id_private *id_priv)
601{
602	struct ib_qp_attr qp_attr;
603	int ret;
604
605	mutex_lock(&id_priv->qp_mutex);
606	if (!id_priv->id.qp) {
607		ret = 0;
608		goto out;
609	}
610
611	qp_attr.qp_state = IB_QPS_ERR;
612	ret = ib_modify_qp(id_priv->id.qp, &qp_attr, IB_QP_STATE);
613out:
614	mutex_unlock(&id_priv->qp_mutex);
615	return ret;
616}
617
618static int cma_ib_init_qp_attr(struct rdma_id_private *id_priv,
619			       struct ib_qp_attr *qp_attr, int *qp_attr_mask)
620{
621	struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
622	int ret;
623	u16 pkey;
624
625	if (rdma_port_get_link_layer(id_priv->id.device, id_priv->id.port_num) ==
626	    IB_LINK_LAYER_INFINIBAND)
627		pkey = ib_addr_get_pkey(dev_addr);
628	else
629		pkey = 0xffff;
630
631	ret = ib_find_cached_pkey(id_priv->id.device, id_priv->id.port_num,
632				  pkey, &qp_attr->pkey_index);
633	if (ret)
634		return ret;
635
636	qp_attr->port_num = id_priv->id.port_num;
637	*qp_attr_mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT;
638
639	if (cma_is_ud_ps(id_priv->id.ps)) {
640		ret = cma_set_qkey(id_priv);
641		if (ret)
642			return ret;
643
644		qp_attr->qkey = id_priv->qkey;
645		*qp_attr_mask |= IB_QP_QKEY;
646	} else {
647		qp_attr->qp_access_flags = 0;
648		*qp_attr_mask |= IB_QP_ACCESS_FLAGS;
649	}
650	return 0;
651}
652
653int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr,
654		       int *qp_attr_mask)
655{
656	struct rdma_id_private *id_priv;
657	int ret = 0;
658
659	id_priv = container_of(id, struct rdma_id_private, id);
660	switch (rdma_node_get_transport(id_priv->id.device->node_type)) {
661	case RDMA_TRANSPORT_IB:
662		if (!id_priv->cm_id.ib || cma_is_ud_ps(id_priv->id.ps))
663			ret = cma_ib_init_qp_attr(id_priv, qp_attr, qp_attr_mask);
664		else
665			ret = ib_cm_init_qp_attr(id_priv->cm_id.ib, qp_attr,
666						 qp_attr_mask);
667		if (qp_attr->qp_state == IB_QPS_RTR)
668			qp_attr->rq_psn = id_priv->seq_num;
669		break;
670	case RDMA_TRANSPORT_IWARP:
671		if (!id_priv->cm_id.iw) {
672			qp_attr->qp_access_flags = 0;
673			*qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS;
674		} else
675			ret = iw_cm_init_qp_attr(id_priv->cm_id.iw, qp_attr,
676						 qp_attr_mask);
677		break;
678	default:
679		ret = -ENOSYS;
680		break;
681	}
682
683	return ret;
684}
685EXPORT_SYMBOL(rdma_init_qp_attr);
686
687static inline int cma_zero_addr(struct sockaddr *addr)
688{
689	struct in6_addr *ip6;
690
691	if (addr->sa_family == AF_INET)
692		return ipv4_is_zeronet(
693			((struct sockaddr_in *)addr)->sin_addr.s_addr);
694	else {
695		ip6 = &((struct sockaddr_in6 *) addr)->sin6_addr;
696		return (ip6->s6_addr32[0] | ip6->s6_addr32[1] |
697			ip6->s6_addr32[2] | ip6->s6_addr32[3]) == 0;
698	}
699}
700
701static inline int cma_loopback_addr(struct sockaddr *addr)
702{
703	if (addr->sa_family == AF_INET)
704		return ipv4_is_loopback(
705			((struct sockaddr_in *) addr)->sin_addr.s_addr);
706	else
707		return ipv6_addr_loopback(
708			&((struct sockaddr_in6 *) addr)->sin6_addr);
709}
710
711static inline int cma_any_addr(struct sockaddr *addr)
712{
713	return cma_zero_addr(addr) || cma_loopback_addr(addr);
714}
715
716static int cma_addr_cmp(struct sockaddr *src, struct sockaddr *dst)
717{
718	if (src->sa_family != dst->sa_family)
719		return -1;
720
721	switch (src->sa_family) {
722	case AF_INET:
723		return ((struct sockaddr_in *) src)->sin_addr.s_addr !=
724		       ((struct sockaddr_in *) dst)->sin_addr.s_addr;
725	default:
726		return ipv6_addr_cmp(&((struct sockaddr_in6 *) src)->sin6_addr,
727				     &((struct sockaddr_in6 *) dst)->sin6_addr);
728	}
729}
730
731static inline __be16 cma_port(struct sockaddr *addr)
732{
733	if (addr->sa_family == AF_INET)
734		return ((struct sockaddr_in *) addr)->sin_port;
735	else
736		return ((struct sockaddr_in6 *) addr)->sin6_port;
737}
738
739static inline int cma_any_port(struct sockaddr *addr)
740{
741	return !cma_port(addr);
742}
743
744static int cma_get_net_info(void *hdr, enum rdma_port_space ps,
745			    u8 *ip_ver, __be16 *port,
746			    union cma_ip_addr **src, union cma_ip_addr **dst)
747{
748	switch (ps) {
749	case RDMA_PS_SDP:
750		if (sdp_get_majv(((struct sdp_hh *) hdr)->sdp_version) !=
751		    SDP_MAJ_VERSION)
752			return -EINVAL;
753
754		*ip_ver	= sdp_get_ip_ver(hdr);
755		*port	= ((struct sdp_hh *) hdr)->port;
756		*src	= &((struct sdp_hh *) hdr)->src_addr;
757		*dst	= &((struct sdp_hh *) hdr)->dst_addr;
758		break;
759	default:
760		if (((struct cma_hdr *) hdr)->cma_version != CMA_VERSION)
761			return -EINVAL;
762
763		*ip_ver	= cma_get_ip_ver(hdr);
764		*port	= ((struct cma_hdr *) hdr)->port;
765		*src	= &((struct cma_hdr *) hdr)->src_addr;
766		*dst	= &((struct cma_hdr *) hdr)->dst_addr;
767		break;
768	}
769
770	if (*ip_ver != 4 && *ip_ver != 6)
771		return -EINVAL;
772	return 0;
773}
774
775static void cma_save_net_info(struct rdma_addr *addr,
776			      struct rdma_addr *listen_addr,
777			      u8 ip_ver, __be16 port,
778			      union cma_ip_addr *src, union cma_ip_addr *dst)
779{
780	struct sockaddr_in *listen4, *ip4;
781	struct sockaddr_in6 *listen6, *ip6;
782
783	switch (ip_ver) {
784	case 4:
785		listen4 = (struct sockaddr_in *) &listen_addr->src_addr;
786		ip4 = (struct sockaddr_in *) &addr->src_addr;
787		ip4->sin_family = listen4->sin_family;
788		ip4->sin_addr.s_addr = dst->ip4.addr;
789		ip4->sin_port = listen4->sin_port;
790
791		ip4 = (struct sockaddr_in *) &addr->dst_addr;
792		ip4->sin_family = listen4->sin_family;
793		ip4->sin_addr.s_addr = src->ip4.addr;
794		ip4->sin_port = port;
795		break;
796	case 6:
797		listen6 = (struct sockaddr_in6 *) &listen_addr->src_addr;
798		ip6 = (struct sockaddr_in6 *) &addr->src_addr;
799		ip6->sin6_family = listen6->sin6_family;
800		ip6->sin6_addr = dst->ip6;
801		ip6->sin6_port = listen6->sin6_port;
802
803		ip6 = (struct sockaddr_in6 *) &addr->dst_addr;
804		ip6->sin6_family = listen6->sin6_family;
805		ip6->sin6_addr = src->ip6;
806		ip6->sin6_port = port;
807		break;
808	default:
809		break;
810	}
811}
812
813static inline int cma_user_data_offset(enum rdma_port_space ps)
814{
815	switch (ps) {
816	case RDMA_PS_SDP:
817		return 0;
818	default:
819		return sizeof(struct cma_hdr);
820	}
821}
822
823static void cma_cancel_route(struct rdma_id_private *id_priv)
824{
825	switch (rdma_port_get_link_layer(id_priv->id.device, id_priv->id.port_num)) {
826	case IB_LINK_LAYER_INFINIBAND:
827		if (id_priv->query)
828			ib_sa_cancel_query(id_priv->query_id, id_priv->query);
829		break;
830	default:
831		break;
832	}
833}
834
835static void cma_cancel_listens(struct rdma_id_private *id_priv)
836{
837	struct rdma_id_private *dev_id_priv;
838
839	/*
840	 * Remove from listen_any_list to prevent added devices from spawning
841	 * additional listen requests.
842	 */
843	mutex_lock(&lock);
844	list_del(&id_priv->list);
845
846	while (!list_empty(&id_priv->listen_list)) {
847		dev_id_priv = list_entry(id_priv->listen_list.next,
848					 struct rdma_id_private, listen_list);
849		/* sync with device removal to avoid duplicate destruction */
850		list_del_init(&dev_id_priv->list);
851		list_del(&dev_id_priv->listen_list);
852		mutex_unlock(&lock);
853
854		rdma_destroy_id(&dev_id_priv->id);
855		mutex_lock(&lock);
856	}
857	mutex_unlock(&lock);
858}
859
860static void cma_cancel_operation(struct rdma_id_private *id_priv,
861				 enum cma_state state)
862{
863	switch (state) {
864	case CMA_ADDR_QUERY:
865		rdma_addr_cancel(&id_priv->id.route.addr.dev_addr);
866		break;
867	case CMA_ROUTE_QUERY:
868		cma_cancel_route(id_priv);
869		break;
870	case CMA_LISTEN:
871		if (cma_any_addr((struct sockaddr *) &id_priv->id.route.addr.src_addr)
872				&& !id_priv->cma_dev)
873			cma_cancel_listens(id_priv);
874		break;
875	default:
876		break;
877	}
878}
879
880static void cma_release_port(struct rdma_id_private *id_priv)
881{
882	struct rdma_bind_list *bind_list = id_priv->bind_list;
883
884	if (!bind_list)
885		return;
886
887	mutex_lock(&lock);
888	hlist_del(&id_priv->node);
889	if (hlist_empty(&bind_list->owners)) {
890		idr_remove(bind_list->ps, bind_list->port);
891		kfree(bind_list);
892	}
893	mutex_unlock(&lock);
894}
895
896static void cma_leave_mc_groups(struct rdma_id_private *id_priv)
897{
898	struct cma_multicast *mc;
899
900	while (!list_empty(&id_priv->mc_list)) {
901		mc = container_of(id_priv->mc_list.next,
902				  struct cma_multicast, list);
903		list_del(&mc->list);
904		switch (rdma_port_get_link_layer(id_priv->cma_dev->device, id_priv->id.port_num)) {
905		case IB_LINK_LAYER_INFINIBAND:
906			ib_sa_free_multicast(mc->multicast.ib);
907			kfree(mc);
908			break;
909		case IB_LINK_LAYER_ETHERNET:
910			kref_put(&mc->mcref, release_mc);
911			break;
912		default:
913			break;
914		}
915	}
916}
917
918void rdma_destroy_id(struct rdma_cm_id *id)
919{
920	struct rdma_id_private *id_priv;
921	enum cma_state state;
922
923	id_priv = container_of(id, struct rdma_id_private, id);
924	state = cma_exch(id_priv, CMA_DESTROYING);
925	cma_cancel_operation(id_priv, state);
926
927	/*
928	 * Wait for any active callback to finish.  New callbacks will find
929	 * the id_priv state set to destroying and abort.
930	 */
931	mutex_lock(&id_priv->handler_mutex);
932	mutex_unlock(&id_priv->handler_mutex);
933
934	if (id_priv->cma_dev) {
935		switch (rdma_node_get_transport(id_priv->id.device->node_type)) {
936		case RDMA_TRANSPORT_IB:
937			if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib))
938				ib_destroy_cm_id(id_priv->cm_id.ib);
939			break;
940		case RDMA_TRANSPORT_IWARP:
941			if (id_priv->cm_id.iw && !IS_ERR(id_priv->cm_id.iw))
942				iw_destroy_cm_id(id_priv->cm_id.iw);
943			break;
944		default:
945			break;
946		}
947		cma_leave_mc_groups(id_priv);
948		cma_release_dev(id_priv);
949	}
950
951	cma_release_port(id_priv);
952	cma_deref_id(id_priv);
953	wait_for_completion(&id_priv->comp);
954
955	if (id_priv->internal_id)
956		cma_deref_id(id_priv->id.context);
957
958	kfree(id_priv->id.route.path_rec);
959	kfree(id_priv);
960}
961EXPORT_SYMBOL(rdma_destroy_id);
962
963static int cma_rep_recv(struct rdma_id_private *id_priv)
964{
965	int ret;
966
967	ret = cma_modify_qp_rtr(id_priv, NULL);
968	if (ret)
969		goto reject;
970
971	ret = cma_modify_qp_rts(id_priv, NULL);
972	if (ret)
973		goto reject;
974
975	ret = ib_send_cm_rtu(id_priv->cm_id.ib, NULL, 0);
976	if (ret)
977		goto reject;
978
979	return 0;
980reject:
981	cma_modify_qp_err(id_priv);
982	ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED,
983		       NULL, 0, NULL, 0);
984	return ret;
985}
986
987static int cma_verify_rep(struct rdma_id_private *id_priv, void *data)
988{
989	if (id_priv->id.ps == RDMA_PS_SDP &&
990	    sdp_get_majv(((struct sdp_hah *) data)->sdp_version) !=
991	    SDP_MAJ_VERSION)
992		return -EINVAL;
993
994	return 0;
995}
996
997static void cma_set_rep_event_data(struct rdma_cm_event *event,
998				   struct ib_cm_rep_event_param *rep_data,
999				   void *private_data)
1000{
1001	event->param.conn.private_data = private_data;
1002	event->param.conn.private_data_len = IB_CM_REP_PRIVATE_DATA_SIZE;
1003	event->param.conn.responder_resources = rep_data->responder_resources;
1004	event->param.conn.initiator_depth = rep_data->initiator_depth;
1005	event->param.conn.flow_control = rep_data->flow_control;
1006	event->param.conn.rnr_retry_count = rep_data->rnr_retry_count;
1007	event->param.conn.srq = rep_data->srq;
1008	event->param.conn.qp_num = rep_data->remote_qpn;
1009}
1010
1011static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
1012{
1013	struct rdma_id_private *id_priv = cm_id->context;
1014	struct rdma_cm_event event;
1015	int ret = 0;
1016
1017	if ((ib_event->event != IB_CM_TIMEWAIT_EXIT &&
1018		cma_disable_callback(id_priv, CMA_CONNECT)) ||
1019	    (ib_event->event == IB_CM_TIMEWAIT_EXIT &&
1020		cma_disable_callback(id_priv, CMA_DISCONNECT)))
1021		return 0;
1022
1023	memset(&event, 0, sizeof event);
1024	switch (ib_event->event) {
1025	case IB_CM_REQ_ERROR:
1026	case IB_CM_REP_ERROR:
1027		event.event = RDMA_CM_EVENT_UNREACHABLE;
1028		event.status = -ETIMEDOUT;
1029		break;
1030	case IB_CM_REP_RECEIVED:
1031		event.status = cma_verify_rep(id_priv, ib_event->private_data);
1032		if (event.status)
1033			event.event = RDMA_CM_EVENT_CONNECT_ERROR;
1034		else if (id_priv->id.qp && id_priv->id.ps != RDMA_PS_SDP) {
1035			event.status = cma_rep_recv(id_priv);
1036			event.event = event.status ? RDMA_CM_EVENT_CONNECT_ERROR :
1037						     RDMA_CM_EVENT_ESTABLISHED;
1038		} else
1039			event.event = RDMA_CM_EVENT_CONNECT_RESPONSE;
1040		cma_set_rep_event_data(&event, &ib_event->param.rep_rcvd,
1041				       ib_event->private_data);
1042		break;
1043	case IB_CM_RTU_RECEIVED:
1044	case IB_CM_USER_ESTABLISHED:
1045		event.event = RDMA_CM_EVENT_ESTABLISHED;
1046		break;
1047	case IB_CM_DREQ_ERROR:
1048		event.status = -ETIMEDOUT; /* fall through */
1049	case IB_CM_DREQ_RECEIVED:
1050	case IB_CM_DREP_RECEIVED:
1051		if (!cma_comp_exch(id_priv, CMA_CONNECT, CMA_DISCONNECT))
1052			goto out;
1053		event.event = RDMA_CM_EVENT_DISCONNECTED;
1054		break;
1055	case IB_CM_TIMEWAIT_EXIT:
1056		event.event = RDMA_CM_EVENT_TIMEWAIT_EXIT;
1057		break;
1058	case IB_CM_MRA_RECEIVED:
1059		/* ignore event */
1060		goto out;
1061	case IB_CM_REJ_RECEIVED:
1062		cma_modify_qp_err(id_priv);
1063		event.status = ib_event->param.rej_rcvd.reason;
1064		event.event = RDMA_CM_EVENT_REJECTED;
1065		event.param.conn.private_data = ib_event->private_data;
1066		event.param.conn.private_data_len = IB_CM_REJ_PRIVATE_DATA_SIZE;
1067		break;
1068	default:
1069		printk(KERN_ERR "RDMA CMA: unexpected IB CM event: %d\n",
1070		       ib_event->event);
1071		goto out;
1072	}
1073
1074	ret = id_priv->id.event_handler(&id_priv->id, &event);
1075	if (ret) {
1076		/* Destroy the CM ID by returning a non-zero value. */
1077		id_priv->cm_id.ib = NULL;
1078		cma_exch(id_priv, CMA_DESTROYING);
1079		mutex_unlock(&id_priv->handler_mutex);
1080		rdma_destroy_id(&id_priv->id);
1081		return ret;
1082	}
1083out:
1084	mutex_unlock(&id_priv->handler_mutex);
1085	return ret;
1086}
1087
1088static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id,
1089					       struct ib_cm_event *ib_event)
1090{
1091	struct rdma_id_private *id_priv;
1092	struct rdma_cm_id *id;
1093	struct rdma_route *rt;
1094	union cma_ip_addr *src, *dst;
1095	__be16 port;
1096	u8 ip_ver;
1097	int ret;
1098
1099	if (cma_get_net_info(ib_event->private_data, listen_id->ps,
1100			     &ip_ver, &port, &src, &dst))
1101		goto err;
1102
1103	id = rdma_create_id(listen_id->event_handler, listen_id->context,
1104			    listen_id->ps);
1105	if (IS_ERR(id))
1106		goto err;
1107
1108	cma_save_net_info(&id->route.addr, &listen_id->route.addr,
1109			  ip_ver, port, src, dst);
1110
1111	rt = &id->route;
1112	rt->num_paths = ib_event->param.req_rcvd.alternate_path ? 2 : 1;
1113	rt->path_rec = kmalloc(sizeof *rt->path_rec * rt->num_paths,
1114			       GFP_KERNEL);
1115	if (!rt->path_rec)
1116		goto destroy_id;
1117
1118	rt->path_rec[0] = *ib_event->param.req_rcvd.primary_path;
1119	if (rt->num_paths == 2)
1120		rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path;
1121
1122	if (cma_any_addr((struct sockaddr *) &rt->addr.src_addr)) {
1123		rt->addr.dev_addr.dev_type = ARPHRD_INFINIBAND;
1124		rdma_addr_set_sgid(&rt->addr.dev_addr, &rt->path_rec[0].sgid);
1125		ib_addr_set_pkey(&rt->addr.dev_addr, rt->path_rec[0].pkey);
1126	} else {
1127		ret = rdma_translate_ip((struct sockaddr *) &rt->addr.src_addr,
1128					&rt->addr.dev_addr);
1129		if (ret)
1130			goto destroy_id;
1131	}
1132	rdma_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid);
1133
1134	id_priv = container_of(id, struct rdma_id_private, id);
1135	id_priv->state = CMA_CONNECT;
1136	return id_priv;
1137
1138destroy_id:
1139	rdma_destroy_id(id);
1140err:
1141	return NULL;
1142}
1143
1144static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id,
1145					      struct ib_cm_event *ib_event)
1146{
1147	struct rdma_id_private *id_priv;
1148	struct rdma_cm_id *id;
1149	union cma_ip_addr *src, *dst;
1150	__be16 port;
1151	u8 ip_ver;
1152	int ret;
1153
1154	id = rdma_create_id(listen_id->event_handler, listen_id->context,
1155			    listen_id->ps);
1156	if (IS_ERR(id))
1157		return NULL;
1158
1159
1160	if (cma_get_net_info(ib_event->private_data, listen_id->ps,
1161			     &ip_ver, &port, &src, &dst))
1162		goto err;
1163
1164	cma_save_net_info(&id->route.addr, &listen_id->route.addr,
1165			  ip_ver, port, src, dst);
1166
1167	if (!cma_any_addr((struct sockaddr *) &id->route.addr.src_addr)) {
1168		ret = rdma_translate_ip((struct sockaddr *) &id->route.addr.src_addr,
1169					&id->route.addr.dev_addr);
1170		if (ret)
1171			goto err;
1172	}
1173
1174	id_priv = container_of(id, struct rdma_id_private, id);
1175	id_priv->state = CMA_CONNECT;
1176	return id_priv;
1177err:
1178	rdma_destroy_id(id);
1179	return NULL;
1180}
1181
1182static void cma_set_req_event_data(struct rdma_cm_event *event,
1183				   struct ib_cm_req_event_param *req_data,
1184				   void *private_data, int offset)
1185{
1186	event->param.conn.private_data = private_data + offset;
1187	event->param.conn.private_data_len = IB_CM_REQ_PRIVATE_DATA_SIZE - offset;
1188	event->param.conn.responder_resources = req_data->responder_resources;
1189	event->param.conn.initiator_depth = req_data->initiator_depth;
1190	event->param.conn.flow_control = req_data->flow_control;
1191	event->param.conn.retry_count = req_data->retry_count;
1192	event->param.conn.rnr_retry_count = req_data->rnr_retry_count;
1193	event->param.conn.srq = req_data->srq;
1194	event->param.conn.qp_num = req_data->remote_qpn;
1195}
1196
1197static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
1198{
1199	struct rdma_id_private *listen_id, *conn_id;
1200	struct rdma_cm_event event;
1201	int offset, ret;
1202
1203	listen_id = cm_id->context;
1204	if (cma_disable_callback(listen_id, CMA_LISTEN))
1205		return -ECONNABORTED;
1206
1207	memset(&event, 0, sizeof event);
1208	offset = cma_user_data_offset(listen_id->id.ps);
1209	event.event = RDMA_CM_EVENT_CONNECT_REQUEST;
1210	if (cma_is_ud_ps(listen_id->id.ps)) {
1211		conn_id = cma_new_udp_id(&listen_id->id, ib_event);
1212		event.param.ud.private_data = ib_event->private_data + offset;
1213		event.param.ud.private_data_len =
1214				IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE - offset;
1215	} else {
1216		conn_id = cma_new_conn_id(&listen_id->id, ib_event);
1217		cma_set_req_event_data(&event, &ib_event->param.req_rcvd,
1218				       ib_event->private_data, offset);
1219	}
1220	if (!conn_id) {
1221		ret = -ENOMEM;
1222		goto out;
1223	}
1224
1225	mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING);
1226	ret = cma_acquire_dev(conn_id);
1227	if (ret)
1228		goto release_conn_id;
1229
1230	conn_id->cm_id.ib = cm_id;
1231	cm_id->context = conn_id;
1232	cm_id->cm_handler = cma_ib_handler;
1233
1234	/*
1235	 * Protect against the user destroying conn_id from another thread
1236	 * until we're done accessing it.
1237	 */
1238	atomic_inc(&conn_id->refcount);
1239	ret = conn_id->id.event_handler(&conn_id->id, &event);
1240	if (!ret) {
1241		/*
1242		 * Acquire mutex to prevent user executing rdma_destroy_id()
1243		 * while we're accessing the cm_id.
1244		 */
1245		mutex_lock(&lock);
1246		if (cma_comp(conn_id, CMA_CONNECT) &&
1247		    !cma_is_ud_ps(conn_id->id.ps))
1248			ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0);
1249		mutex_unlock(&lock);
1250		mutex_unlock(&conn_id->handler_mutex);
1251		cma_deref_id(conn_id);
1252		goto out;
1253	}
1254	cma_deref_id(conn_id);
1255
1256	/* Destroy the CM ID by returning a non-zero value. */
1257	conn_id->cm_id.ib = NULL;
1258
1259release_conn_id:
1260	cma_exch(conn_id, CMA_DESTROYING);
1261	mutex_unlock(&conn_id->handler_mutex);
1262	rdma_destroy_id(&conn_id->id);
1263
1264out:
1265	mutex_unlock(&listen_id->handler_mutex);
1266	return ret;
1267}
1268
1269static __be64 cma_get_service_id(enum rdma_port_space ps, struct sockaddr *addr)
1270{
1271	return cpu_to_be64(((u64)ps << 16) + be16_to_cpu(cma_port(addr)));
1272}
1273
1274static void cma_set_compare_data(enum rdma_port_space ps, struct sockaddr *addr,
1275				 struct ib_cm_compare_data *compare)
1276{
1277	struct cma_hdr *cma_data, *cma_mask;
1278	struct sdp_hh *sdp_data, *sdp_mask;
1279	__be32 ip4_addr;
1280	struct in6_addr ip6_addr;
1281
1282	memset(compare, 0, sizeof *compare);
1283	cma_data = (void *) compare->data;
1284	cma_mask = (void *) compare->mask;
1285	sdp_data = (void *) compare->data;
1286	sdp_mask = (void *) compare->mask;
1287
1288	switch (addr->sa_family) {
1289	case AF_INET:
1290		ip4_addr = ((struct sockaddr_in *) addr)->sin_addr.s_addr;
1291		if (ps == RDMA_PS_SDP) {
1292			sdp_set_ip_ver(sdp_data, 4);
1293			sdp_set_ip_ver(sdp_mask, 0xF);
1294			sdp_data->dst_addr.ip4.addr = ip4_addr;
1295			sdp_mask->dst_addr.ip4.addr = htonl(~0);
1296		} else {
1297			cma_set_ip_ver(cma_data, 4);
1298			cma_set_ip_ver(cma_mask, 0xF);
1299			cma_data->dst_addr.ip4.addr = ip4_addr;
1300			cma_mask->dst_addr.ip4.addr = htonl(~0);
1301		}
1302		break;
1303	case AF_INET6:
1304		ip6_addr = ((struct sockaddr_in6 *) addr)->sin6_addr;
1305		if (ps == RDMA_PS_SDP) {
1306			sdp_set_ip_ver(sdp_data, 6);
1307			sdp_set_ip_ver(sdp_mask, 0xF);
1308			sdp_data->dst_addr.ip6 = ip6_addr;
1309			memset(&sdp_mask->dst_addr.ip6, 0xFF,
1310			       sizeof sdp_mask->dst_addr.ip6);
1311		} else {
1312			cma_set_ip_ver(cma_data, 6);
1313			cma_set_ip_ver(cma_mask, 0xF);
1314			cma_data->dst_addr.ip6 = ip6_addr;
1315			memset(&cma_mask->dst_addr.ip6, 0xFF,
1316			       sizeof cma_mask->dst_addr.ip6);
1317		}
1318		break;
1319	default:
1320		break;
1321	}
1322}
1323
1324static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)
1325{
1326	struct rdma_id_private *id_priv = iw_id->context;
1327	struct rdma_cm_event event;
1328	struct sockaddr_in *sin;
1329	int ret = 0;
1330
1331	if (cma_disable_callback(id_priv, CMA_CONNECT))
1332		return 0;
1333
1334	memset(&event, 0, sizeof event);
1335	switch (iw_event->event) {
1336	case IW_CM_EVENT_CLOSE:
1337		event.event = RDMA_CM_EVENT_DISCONNECTED;
1338		break;
1339	case IW_CM_EVENT_CONNECT_REPLY:
1340		sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
1341		*sin = iw_event->local_addr;
1342		sin = (struct sockaddr_in *) &id_priv->id.route.addr.dst_addr;
1343		*sin = iw_event->remote_addr;
1344		switch (iw_event->status) {
1345		case 0:
1346			event.event = RDMA_CM_EVENT_ESTABLISHED;
1347			break;
1348		case -ECONNRESET:
1349		case -ECONNREFUSED:
1350			event.event = RDMA_CM_EVENT_REJECTED;
1351			break;
1352		case -ETIMEDOUT:
1353			event.event = RDMA_CM_EVENT_UNREACHABLE;
1354			break;
1355		default:
1356			event.event = RDMA_CM_EVENT_CONNECT_ERROR;
1357			break;
1358		}
1359		break;
1360	case IW_CM_EVENT_ESTABLISHED:
1361		event.event = RDMA_CM_EVENT_ESTABLISHED;
1362		break;
1363	default:
1364		BUG_ON(1);
1365	}
1366
1367	event.status = iw_event->status;
1368	event.param.conn.private_data = iw_event->private_data;
1369	event.param.conn.private_data_len = iw_event->private_data_len;
1370	ret = id_priv->id.event_handler(&id_priv->id, &event);
1371	if (ret) {
1372		/* Destroy the CM ID by returning a non-zero value. */
1373		id_priv->cm_id.iw = NULL;
1374		cma_exch(id_priv, CMA_DESTROYING);
1375		mutex_unlock(&id_priv->handler_mutex);
1376		rdma_destroy_id(&id_priv->id);
1377		return ret;
1378	}
1379
1380	mutex_unlock(&id_priv->handler_mutex);
1381	return ret;
1382}
1383
1384static int iw_conn_req_handler(struct iw_cm_id *cm_id,
1385			       struct iw_cm_event *iw_event)
1386{
1387	struct rdma_cm_id *new_cm_id;
1388	struct rdma_id_private *listen_id, *conn_id;
1389	struct sockaddr_in *sin;
1390	struct net_device *dev = NULL;
1391	struct rdma_cm_event event;
1392	int ret;
1393	struct ib_device_attr attr;
1394
1395	listen_id = cm_id->context;
1396	if (cma_disable_callback(listen_id, CMA_LISTEN))
1397		return -ECONNABORTED;
1398
1399	/* Create a new RDMA id for the new IW CM ID */
1400	new_cm_id = rdma_create_id(listen_id->id.event_handler,
1401				   listen_id->id.context,
1402				   RDMA_PS_TCP);
1403	if (IS_ERR(new_cm_id)) {
1404		ret = -ENOMEM;
1405		goto out;
1406	}
1407	conn_id = container_of(new_cm_id, struct rdma_id_private, id);
1408	mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING);
1409	conn_id->state = CMA_CONNECT;
1410
1411	dev = ip_dev_find(&init_net, iw_event->local_addr.sin_addr.s_addr);
1412	if (!dev) {
1413		ret = -EADDRNOTAVAIL;
1414		mutex_unlock(&conn_id->handler_mutex);
1415		rdma_destroy_id(new_cm_id);
1416		goto out;
1417	}
1418	ret = rdma_copy_addr(&conn_id->id.route.addr.dev_addr, dev, NULL);
1419	if (ret) {
1420		mutex_unlock(&conn_id->handler_mutex);
1421		rdma_destroy_id(new_cm_id);
1422		goto out;
1423	}
1424
1425	ret = cma_acquire_dev(conn_id);
1426	if (ret) {
1427		mutex_unlock(&conn_id->handler_mutex);
1428		rdma_destroy_id(new_cm_id);
1429		goto out;
1430	}
1431
1432	conn_id->cm_id.iw = cm_id;
1433	cm_id->context = conn_id;
1434	cm_id->cm_handler = cma_iw_handler;
1435
1436	sin = (struct sockaddr_in *) &new_cm_id->route.addr.src_addr;
1437	*sin = iw_event->local_addr;
1438	sin = (struct sockaddr_in *) &new_cm_id->route.addr.dst_addr;
1439	*sin = iw_event->remote_addr;
1440
1441	ret = ib_query_device(conn_id->id.device, &attr);
1442	if (ret) {
1443		mutex_unlock(&conn_id->handler_mutex);
1444		rdma_destroy_id(new_cm_id);
1445		goto out;
1446	}
1447
1448	memset(&event, 0, sizeof event);
1449	event.event = RDMA_CM_EVENT_CONNECT_REQUEST;
1450	event.param.conn.private_data = iw_event->private_data;
1451	event.param.conn.private_data_len = iw_event->private_data_len;
1452	event.param.conn.initiator_depth = attr.max_qp_init_rd_atom;
1453	event.param.conn.responder_resources = attr.max_qp_rd_atom;
1454
1455	/*
1456	 * Protect against the user destroying conn_id from another thread
1457	 * until we're done accessing it.
1458	 */
1459	atomic_inc(&conn_id->refcount);
1460	ret = conn_id->id.event_handler(&conn_id->id, &event);
1461	if (ret) {
1462		/* User wants to destroy the CM ID */
1463		conn_id->cm_id.iw = NULL;
1464		cma_exch(conn_id, CMA_DESTROYING);
1465		mutex_unlock(&conn_id->handler_mutex);
1466		cma_deref_id(conn_id);
1467		rdma_destroy_id(&conn_id->id);
1468		goto out;
1469	}
1470
1471	mutex_unlock(&conn_id->handler_mutex);
1472	cma_deref_id(conn_id);
1473
1474out:
1475	if (dev)
1476		dev_put(dev);
1477	mutex_unlock(&listen_id->handler_mutex);
1478	return ret;
1479}
1480
1481static int cma_ib_listen(struct rdma_id_private *id_priv)
1482{
1483	struct ib_cm_compare_data compare_data;
1484	struct sockaddr *addr;
1485	__be64 svc_id;
1486	int ret;
1487
1488	id_priv->cm_id.ib = ib_create_cm_id(id_priv->id.device, cma_req_handler,
1489					    id_priv);
1490	if (IS_ERR(id_priv->cm_id.ib))
1491		return PTR_ERR(id_priv->cm_id.ib);
1492
1493	addr = (struct sockaddr *) &id_priv->id.route.addr.src_addr;
1494	svc_id = cma_get_service_id(id_priv->id.ps, addr);
1495	if (cma_any_addr(addr))
1496		ret = ib_cm_listen(id_priv->cm_id.ib, svc_id, 0, NULL);
1497	else {
1498		cma_set_compare_data(id_priv->id.ps, addr, &compare_data);
1499		ret = ib_cm_listen(id_priv->cm_id.ib, svc_id, 0, &compare_data);
1500	}
1501
1502	if (ret) {
1503		ib_destroy_cm_id(id_priv->cm_id.ib);
1504		id_priv->cm_id.ib = NULL;
1505	}
1506
1507	return ret;
1508}
1509
1510static int cma_iw_listen(struct rdma_id_private *id_priv, int backlog)
1511{
1512	int ret;
1513	struct sockaddr_in *sin;
1514
1515	id_priv->cm_id.iw = iw_create_cm_id(id_priv->id.device,
1516					    iw_conn_req_handler,
1517					    id_priv);
1518	if (IS_ERR(id_priv->cm_id.iw))
1519		return PTR_ERR(id_priv->cm_id.iw);
1520
1521	sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
1522	id_priv->cm_id.iw->local_addr = *sin;
1523
1524	ret = iw_cm_listen(id_priv->cm_id.iw, backlog);
1525
1526	if (ret) {
1527		iw_destroy_cm_id(id_priv->cm_id.iw);
1528		id_priv->cm_id.iw = NULL;
1529	}
1530
1531	return ret;
1532}
1533
1534static int cma_listen_handler(struct rdma_cm_id *id,
1535			      struct rdma_cm_event *event)
1536{
1537	struct rdma_id_private *id_priv = id->context;
1538
1539	id->context = id_priv->id.context;
1540	id->event_handler = id_priv->id.event_handler;
1541	return id_priv->id.event_handler(id, event);
1542}
1543
1544static void cma_listen_on_dev(struct rdma_id_private *id_priv,
1545			      struct cma_device *cma_dev)
1546{
1547	struct rdma_id_private *dev_id_priv;
1548	struct rdma_cm_id *id;
1549	int ret;
1550
1551	id = rdma_create_id(cma_listen_handler, id_priv, id_priv->id.ps);
1552	if (IS_ERR(id))
1553		return;
1554
1555	dev_id_priv = container_of(id, struct rdma_id_private, id);
1556
1557	dev_id_priv->state = CMA_ADDR_BOUND;
1558	memcpy(&id->route.addr.src_addr, &id_priv->id.route.addr.src_addr,
1559	       ip_addr_size((struct sockaddr *) &id_priv->id.route.addr.src_addr));
1560
1561	cma_attach_to_dev(dev_id_priv, cma_dev);
1562	list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list);
1563	atomic_inc(&id_priv->refcount);
1564	dev_id_priv->internal_id = 1;
1565
1566	ret = rdma_listen(id, id_priv->backlog);
1567	if (ret)
1568		printk(KERN_WARNING "RDMA CMA: cma_listen_on_dev, error %d, "
1569		       "listening on device %s\n", ret, cma_dev->device->name);
1570}
1571
1572static void cma_listen_on_all(struct rdma_id_private *id_priv)
1573{
1574	struct cma_device *cma_dev;
1575
1576	mutex_lock(&lock);
1577	list_add_tail(&id_priv->list, &listen_any_list);
1578	list_for_each_entry(cma_dev, &dev_list, list)
1579		cma_listen_on_dev(id_priv, cma_dev);
1580	mutex_unlock(&lock);
1581}
1582
1583void rdma_set_service_type(struct rdma_cm_id *id, int tos)
1584{
1585	struct rdma_id_private *id_priv;
1586
1587	id_priv = container_of(id, struct rdma_id_private, id);
1588	id_priv->tos = (u8) tos;
1589}
1590EXPORT_SYMBOL(rdma_set_service_type);
1591
1592static void cma_query_handler(int status, struct ib_sa_path_rec *path_rec,
1593			      void *context)
1594{
1595	struct cma_work *work = context;
1596	struct rdma_route *route;
1597
1598	route = &work->id->id.route;
1599
1600	if (!status) {
1601		route->num_paths = 1;
1602		*route->path_rec = *path_rec;
1603	} else {
1604		work->old_state = CMA_ROUTE_QUERY;
1605		work->new_state = CMA_ADDR_RESOLVED;
1606		work->event.event = RDMA_CM_EVENT_ROUTE_ERROR;
1607		work->event.status = status;
1608	}
1609
1610	queue_work(cma_wq, &work->work);
1611}
1612
1613static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms,
1614			      struct cma_work *work)
1615{
1616	struct rdma_addr *addr = &id_priv->id.route.addr;
1617	struct ib_sa_path_rec path_rec;
1618	ib_sa_comp_mask comp_mask;
1619	struct sockaddr_in6 *sin6;
1620
1621	memset(&path_rec, 0, sizeof path_rec);
1622	rdma_addr_get_sgid(&addr->dev_addr, &path_rec.sgid);
1623	rdma_addr_get_dgid(&addr->dev_addr, &path_rec.dgid);
1624	path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(&addr->dev_addr));
1625	path_rec.numb_path = 1;
1626	path_rec.reversible = 1;
1627	path_rec.service_id = cma_get_service_id(id_priv->id.ps,
1628							(struct sockaddr *) &addr->dst_addr);
1629
1630	comp_mask = IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID |
1631		    IB_SA_PATH_REC_PKEY | IB_SA_PATH_REC_NUMB_PATH |
1632		    IB_SA_PATH_REC_REVERSIBLE | IB_SA_PATH_REC_SERVICE_ID;
1633
1634	if (addr->src_addr.ss_family == AF_INET) {
1635		path_rec.qos_class = cpu_to_be16((u16) id_priv->tos);
1636		comp_mask |= IB_SA_PATH_REC_QOS_CLASS;
1637	} else {
1638		sin6 = (struct sockaddr_in6 *) &addr->src_addr;
1639		path_rec.traffic_class = (u8) (be32_to_cpu(sin6->sin6_flowinfo) >> 20);
1640		comp_mask |= IB_SA_PATH_REC_TRAFFIC_CLASS;
1641	}
1642
1643	id_priv->query_id = ib_sa_path_rec_get(&sa_client, id_priv->id.device,
1644					       id_priv->id.port_num, &path_rec,
1645					       comp_mask, timeout_ms,
1646					       GFP_KERNEL, cma_query_handler,
1647					       work, &id_priv->query);
1648
1649	return (id_priv->query_id < 0) ? id_priv->query_id : 0;
1650}
1651
1652static void cma_work_handler(struct work_struct *_work)
1653{
1654	struct cma_work *work = container_of(_work, struct cma_work, work);
1655	struct rdma_id_private *id_priv = work->id;
1656	int destroy = 0;
1657
1658	mutex_lock(&id_priv->handler_mutex);
1659	if (!cma_comp_exch(id_priv, work->old_state, work->new_state))
1660		goto out;
1661
1662	if (id_priv->id.event_handler(&id_priv->id, &work->event)) {
1663		cma_exch(id_priv, CMA_DESTROYING);
1664		destroy = 1;
1665	}
1666out:
1667	mutex_unlock(&id_priv->handler_mutex);
1668	cma_deref_id(id_priv);
1669	if (destroy)
1670		rdma_destroy_id(&id_priv->id);
1671	kfree(work);
1672}
1673
1674static void cma_ndev_work_handler(struct work_struct *_work)
1675{
1676	struct cma_ndev_work *work = container_of(_work, struct cma_ndev_work, work);
1677	struct rdma_id_private *id_priv = work->id;
1678	int destroy = 0;
1679
1680	mutex_lock(&id_priv->handler_mutex);
1681	if (id_priv->state == CMA_DESTROYING ||
1682	    id_priv->state == CMA_DEVICE_REMOVAL)
1683		goto out;
1684
1685	if (id_priv->id.event_handler(&id_priv->id, &work->event)) {
1686		cma_exch(id_priv, CMA_DESTROYING);
1687		destroy = 1;
1688	}
1689
1690out:
1691	mutex_unlock(&id_priv->handler_mutex);
1692	cma_deref_id(id_priv);
1693	if (destroy)
1694		rdma_destroy_id(&id_priv->id);
1695	kfree(work);
1696}
1697
1698static int cma_resolve_ib_route(struct rdma_id_private *id_priv, int timeout_ms)
1699{
1700	struct rdma_route *route = &id_priv->id.route;
1701	struct cma_work *work;
1702	int ret;
1703
1704	work = kzalloc(sizeof *work, GFP_KERNEL);
1705	if (!work)
1706		return -ENOMEM;
1707
1708	work->id = id_priv;
1709	INIT_WORK(&work->work, cma_work_handler);
1710	work->old_state = CMA_ROUTE_QUERY;
1711	work->new_state = CMA_ROUTE_RESOLVED;
1712	work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
1713
1714	route->path_rec = kmalloc(sizeof *route->path_rec, GFP_KERNEL);
1715	if (!route->path_rec) {
1716		ret = -ENOMEM;
1717		goto err1;
1718	}
1719
1720	ret = cma_query_ib_route(id_priv, timeout_ms, work);
1721	if (ret)
1722		goto err2;
1723
1724	return 0;
1725err2:
1726	kfree(route->path_rec);
1727	route->path_rec = NULL;
1728err1:
1729	kfree(work);
1730	return ret;
1731}
1732
1733int rdma_set_ib_paths(struct rdma_cm_id *id,
1734		      struct ib_sa_path_rec *path_rec, int num_paths)
1735{
1736	struct rdma_id_private *id_priv;
1737	int ret;
1738
1739	id_priv = container_of(id, struct rdma_id_private, id);
1740	if (!cma_comp_exch(id_priv, CMA_ADDR_RESOLVED, CMA_ROUTE_RESOLVED))
1741		return -EINVAL;
1742
1743	id->route.path_rec = kmemdup(path_rec, sizeof *path_rec * num_paths,
1744				     GFP_KERNEL);
1745	if (!id->route.path_rec) {
1746		ret = -ENOMEM;
1747		goto err;
1748	}
1749
1750	id->route.num_paths = num_paths;
1751	return 0;
1752err:
1753	cma_comp_exch(id_priv, CMA_ROUTE_RESOLVED, CMA_ADDR_RESOLVED);
1754	return ret;
1755}
1756EXPORT_SYMBOL(rdma_set_ib_paths);
1757
1758static int cma_resolve_iw_route(struct rdma_id_private *id_priv, int timeout_ms)
1759{
1760	struct cma_work *work;
1761
1762	work = kzalloc(sizeof *work, GFP_KERNEL);
1763	if (!work)
1764		return -ENOMEM;
1765
1766	work->id = id_priv;
1767	INIT_WORK(&work->work, cma_work_handler);
1768	work->old_state = CMA_ROUTE_QUERY;
1769	work->new_state = CMA_ROUTE_RESOLVED;
1770	work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
1771	queue_work(cma_wq, &work->work);
1772	return 0;
1773}
1774
1775static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
1776{
1777	struct rdma_route *route = &id_priv->id.route;
1778	struct rdma_addr *addr = &route->addr;
1779	struct cma_work *work;
1780	int ret;
1781	struct sockaddr_in *src_addr = (struct sockaddr_in *)&route->addr.src_addr;
1782	struct sockaddr_in *dst_addr = (struct sockaddr_in *)&route->addr.dst_addr;
1783	struct net_device *ndev = NULL;
1784	u16 vid;
1785
1786	if (src_addr->sin_family != dst_addr->sin_family)
1787		return -EINVAL;
1788
1789	work = kzalloc(sizeof *work, GFP_KERNEL);
1790	if (!work)
1791		return -ENOMEM;
1792
1793	work->id = id_priv;
1794	INIT_WORK(&work->work, cma_work_handler);
1795
1796	route->path_rec = kzalloc(sizeof *route->path_rec, GFP_KERNEL);
1797	if (!route->path_rec) {
1798		ret = -ENOMEM;
1799		goto err1;
1800	}
1801
1802	route->num_paths = 1;
1803
1804	if (addr->dev_addr.bound_dev_if)
1805		ndev = dev_get_by_index(&init_net, addr->dev_addr.bound_dev_if);
1806	if (!ndev) {
1807		ret = -ENODEV;
1808		goto err2;
1809	}
1810
1811	vid = rdma_vlan_dev_vlan_id(ndev);
1812
1813	iboe_mac_vlan_to_ll(&route->path_rec->sgid, addr->dev_addr.src_dev_addr, vid);
1814	iboe_mac_vlan_to_ll(&route->path_rec->dgid, addr->dev_addr.dst_dev_addr, vid);
1815
1816	route->path_rec->hop_limit = 1;
1817	route->path_rec->reversible = 1;
1818	route->path_rec->pkey = cpu_to_be16(0xffff);
1819	route->path_rec->mtu_selector = IB_SA_EQ;
1820	route->path_rec->sl = id_priv->tos >> 5;
1821
1822	route->path_rec->mtu = iboe_get_mtu(ndev->mtu);
1823	route->path_rec->rate_selector = IB_SA_EQ;
1824	route->path_rec->rate = iboe_get_rate(ndev);
1825	dev_put(ndev);
1826	route->path_rec->packet_life_time_selector = IB_SA_EQ;
1827	route->path_rec->packet_life_time = CMA_IBOE_PACKET_LIFETIME;
1828	if (!route->path_rec->mtu) {
1829		ret = -EINVAL;
1830		goto err2;
1831	}
1832
1833	work->old_state = CMA_ROUTE_QUERY;
1834	work->new_state = CMA_ROUTE_RESOLVED;
1835	work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
1836	work->event.status = 0;
1837
1838	queue_work(cma_wq, &work->work);
1839
1840	return 0;
1841
1842err2:
1843	kfree(route->path_rec);
1844	route->path_rec = NULL;
1845err1:
1846	kfree(work);
1847	return ret;
1848}
1849
1850int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms)
1851{
1852	struct rdma_id_private *id_priv;
1853	int ret;
1854
1855	id_priv = container_of(id, struct rdma_id_private, id);
1856	if (!cma_comp_exch(id_priv, CMA_ADDR_RESOLVED, CMA_ROUTE_QUERY))
1857		return -EINVAL;
1858
1859	atomic_inc(&id_priv->refcount);
1860	switch (rdma_node_get_transport(id->device->node_type)) {
1861	case RDMA_TRANSPORT_IB:
1862		switch (rdma_port_get_link_layer(id->device, id->port_num)) {
1863		case IB_LINK_LAYER_INFINIBAND:
1864			ret = cma_resolve_ib_route(id_priv, timeout_ms);
1865			break;
1866		case IB_LINK_LAYER_ETHERNET:
1867			ret = cma_resolve_iboe_route(id_priv);
1868			break;
1869		default:
1870			ret = -ENOSYS;
1871		}
1872		break;
1873	case RDMA_TRANSPORT_IWARP:
1874		ret = cma_resolve_iw_route(id_priv, timeout_ms);
1875		break;
1876	default:
1877		ret = -ENOSYS;
1878		break;
1879	}
1880	if (ret)
1881		goto err;
1882
1883	return 0;
1884err:
1885	cma_comp_exch(id_priv, CMA_ROUTE_QUERY, CMA_ADDR_RESOLVED);
1886	cma_deref_id(id_priv);
1887	return ret;
1888}
1889EXPORT_SYMBOL(rdma_resolve_route);
1890
1891static int cma_bind_loopback(struct rdma_id_private *id_priv)
1892{
1893	struct cma_device *cma_dev;
1894	struct ib_port_attr port_attr;
1895	union ib_gid gid;
1896	u16 pkey;
1897	int ret;
1898	u8 p;
1899
1900	mutex_lock(&lock);
1901	if (list_empty(&dev_list)) {
1902		ret = -ENODEV;
1903		goto out;
1904	}
1905	list_for_each_entry(cma_dev, &dev_list, list)
1906		for (p = 1; p <= cma_dev->device->phys_port_cnt; ++p)
1907			if (!ib_query_port(cma_dev->device, p, &port_attr) &&
1908			    port_attr.state == IB_PORT_ACTIVE)
1909				goto port_found;
1910
1911	p = 1;
1912	cma_dev = list_entry(dev_list.next, struct cma_device, list);
1913
1914port_found:
1915	ret = ib_get_cached_gid(cma_dev->device, p, 0, &gid);
1916	if (ret)
1917		goto out;
1918
1919	ret = ib_get_cached_pkey(cma_dev->device, p, 0, &pkey);
1920	if (ret)
1921		goto out;
1922
1923	id_priv->id.route.addr.dev_addr.dev_type =
1924		(rdma_port_get_link_layer(cma_dev->device, p) == IB_LINK_LAYER_INFINIBAND) ?
1925		ARPHRD_INFINIBAND : ARPHRD_ETHER;
1926
1927	rdma_addr_set_sgid(&id_priv->id.route.addr.dev_addr, &gid);
1928	ib_addr_set_pkey(&id_priv->id.route.addr.dev_addr, pkey);
1929	id_priv->id.port_num = p;
1930	cma_attach_to_dev(id_priv, cma_dev);
1931out:
1932	mutex_unlock(&lock);
1933	return ret;
1934}
1935
1936static void addr_handler(int status, struct sockaddr *src_addr,
1937			 struct rdma_dev_addr *dev_addr, void *context)
1938{
1939	struct rdma_id_private *id_priv = context;
1940	struct rdma_cm_event event;
1941
1942	memset(&event, 0, sizeof event);
1943	mutex_lock(&id_priv->handler_mutex);
1944	if (!cma_comp_exch(id_priv, CMA_ADDR_QUERY, CMA_ADDR_RESOLVED))
1945		goto out;
1946
1947	if (!status && !id_priv->cma_dev)
1948		status = cma_acquire_dev(id_priv);
1949
1950	if (status) {
1951		if (!cma_comp_exch(id_priv, CMA_ADDR_RESOLVED, CMA_ADDR_BOUND))
1952			goto out;
1953		event.event = RDMA_CM_EVENT_ADDR_ERROR;
1954		event.status = status;
1955	} else {
1956		memcpy(&id_priv->id.route.addr.src_addr, src_addr,
1957		       ip_addr_size(src_addr));
1958		event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
1959	}
1960
1961	if (id_priv->id.event_handler(&id_priv->id, &event)) {
1962		cma_exch(id_priv, CMA_DESTROYING);
1963		mutex_unlock(&id_priv->handler_mutex);
1964		cma_deref_id(id_priv);
1965		rdma_destroy_id(&id_priv->id);
1966		return;
1967	}
1968out:
1969	mutex_unlock(&id_priv->handler_mutex);
1970	cma_deref_id(id_priv);
1971}
1972
1973static int cma_resolve_loopback(struct rdma_id_private *id_priv)
1974{
1975	struct cma_work *work;
1976	struct sockaddr *src, *dst;
1977	union ib_gid gid;
1978	int ret;
1979
1980	work = kzalloc(sizeof *work, GFP_KERNEL);
1981	if (!work)
1982		return -ENOMEM;
1983
1984	if (!id_priv->cma_dev) {
1985		ret = cma_bind_loopback(id_priv);
1986		if (ret)
1987			goto err;
1988	}
1989
1990	rdma_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid);
1991	rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, &gid);
1992
1993	src = (struct sockaddr *) &id_priv->id.route.addr.src_addr;
1994	if (cma_zero_addr(src)) {
1995		dst = (struct sockaddr *) &id_priv->id.route.addr.dst_addr;
1996		if ((src->sa_family = dst->sa_family) == AF_INET) {
1997			((struct sockaddr_in *) src)->sin_addr.s_addr =
1998				((struct sockaddr_in *) dst)->sin_addr.s_addr;
1999		} else {
2000			ipv6_addr_copy(&((struct sockaddr_in6 *) src)->sin6_addr,
2001				       &((struct sockaddr_in6 *) dst)->sin6_addr);
2002		}
2003	}
2004
2005	work->id = id_priv;
2006	INIT_WORK(&work->work, cma_work_handler);
2007	work->old_state = CMA_ADDR_QUERY;
2008	work->new_state = CMA_ADDR_RESOLVED;
2009	work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
2010	queue_work(cma_wq, &work->work);
2011	return 0;
2012err:
2013	kfree(work);
2014	return ret;
2015}
2016
2017static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
2018			 struct sockaddr *dst_addr)
2019{
2020	if (!src_addr || !src_addr->sa_family) {
2021		src_addr = (struct sockaddr *) &id->route.addr.src_addr;
2022		if ((src_addr->sa_family = dst_addr->sa_family) == AF_INET6) {
2023			((struct sockaddr_in6 *) src_addr)->sin6_scope_id =
2024				((struct sockaddr_in6 *) dst_addr)->sin6_scope_id;
2025		}
2026	}
2027	return rdma_bind_addr(id, src_addr);
2028}
2029
2030int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
2031		      struct sockaddr *dst_addr, int timeout_ms)
2032{
2033	struct rdma_id_private *id_priv;
2034	int ret;
2035
2036	id_priv = container_of(id, struct rdma_id_private, id);
2037	if (id_priv->state == CMA_IDLE) {
2038		ret = cma_bind_addr(id, src_addr, dst_addr);
2039		if (ret)
2040			return ret;
2041	}
2042
2043	if (!cma_comp_exch(id_priv, CMA_ADDR_BOUND, CMA_ADDR_QUERY))
2044		return -EINVAL;
2045
2046	atomic_inc(&id_priv->refcount);
2047	memcpy(&id->route.addr.dst_addr, dst_addr, ip_addr_size(dst_addr));
2048	if (cma_any_addr(dst_addr))
2049		ret = cma_resolve_loopback(id_priv);
2050	else
2051		ret = rdma_resolve_ip(&addr_client, (struct sockaddr *) &id->route.addr.src_addr,
2052				      dst_addr, &id->route.addr.dev_addr,
2053				      timeout_ms, addr_handler, id_priv);
2054	if (ret)
2055		goto err;
2056
2057	return 0;
2058err:
2059	cma_comp_exch(id_priv, CMA_ADDR_QUERY, CMA_ADDR_BOUND);
2060	cma_deref_id(id_priv);
2061	return ret;
2062}
2063EXPORT_SYMBOL(rdma_resolve_addr);
2064
2065int rdma_set_reuseaddr(struct rdma_cm_id *id, int reuse)
2066{
2067	struct rdma_id_private *id_priv;
2068	unsigned long flags;
2069	int ret;
2070
2071	id_priv = container_of(id, struct rdma_id_private, id);
2072	spin_lock_irqsave(&id_priv->lock, flags);
2073	if (id_priv->state == CMA_IDLE) {
2074		id_priv->reuseaddr = reuse;
2075		ret = 0;
2076	} else {
2077		ret = -EINVAL;
2078	}
2079	spin_unlock_irqrestore(&id_priv->lock, flags);
2080	return ret;
2081}
2082EXPORT_SYMBOL(rdma_set_reuseaddr);
2083
2084static void cma_bind_port(struct rdma_bind_list *bind_list,
2085			  struct rdma_id_private *id_priv)
2086{
2087	struct sockaddr_in *sin;
2088
2089	sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
2090	sin->sin_port = htons(bind_list->port);
2091	id_priv->bind_list = bind_list;
2092	hlist_add_head(&id_priv->node, &bind_list->owners);
2093}
2094
2095static int cma_alloc_port(struct idr *ps, struct rdma_id_private *id_priv,
2096			  unsigned short snum)
2097{
2098	struct rdma_bind_list *bind_list;
2099	int port, ret;
2100
2101	bind_list = kzalloc(sizeof *bind_list, GFP_KERNEL);
2102	if (!bind_list)
2103		return -ENOMEM;
2104
2105	do {
2106		ret = idr_get_new_above(ps, bind_list, snum, &port);
2107	} while ((ret == -EAGAIN) && idr_pre_get(ps, GFP_KERNEL));
2108
2109	if (ret)
2110		goto err1;
2111
2112	if (port != snum) {
2113		ret = -EADDRNOTAVAIL;
2114		goto err2;
2115	}
2116
2117	bind_list->ps = ps;
2118	bind_list->port = (unsigned short) port;
2119	cma_bind_port(bind_list, id_priv);
2120	return 0;
2121err2:
2122	idr_remove(ps, port);
2123err1:
2124	kfree(bind_list);
2125	return ret;
2126}
2127
2128static int cma_alloc_any_port(struct idr *ps, struct rdma_id_private *id_priv)
2129{
2130	static unsigned int last_used_port;
2131	int low, high, remaining;
2132	unsigned int rover;
2133
2134	inet_get_local_port_range(&low, &high);
2135	remaining = (high - low) + 1;
2136	rover = net_random() % remaining + low;
2137retry:
2138	if (last_used_port != rover &&
2139	    !idr_find(ps, (unsigned short) rover)) {
2140		int ret = cma_alloc_port(ps, id_priv, rover);
2141		/*
2142		 * Remember previously used port number in order to avoid
2143		 * re-using same port immediately after it is closed.
2144		 */
2145		if (!ret)
2146			last_used_port = rover;
2147		if (ret != -EADDRNOTAVAIL)
2148			return ret;
2149	}
2150	if (--remaining) {
2151		rover++;
2152		if ((rover < low) || (rover > high))
2153			rover = low;
2154		goto retry;
2155	}
2156	return -EADDRNOTAVAIL;
2157}
2158
2159/*
2160 * Check that the requested port is available.  This is called when trying to
2161 * bind to a specific port, or when trying to listen on a bound port.  In
2162 * the latter case, the provided id_priv may already be on the bind_list, but
2163 * we still need to check that it's okay to start listening.
2164 */
2165static int cma_check_port(struct rdma_bind_list *bind_list,
2166			  struct rdma_id_private *id_priv, uint8_t reuseaddr)
2167{
2168	struct rdma_id_private *cur_id;
2169	struct sockaddr *addr, *cur_addr;
2170	struct hlist_node *node;
2171
2172	addr = (struct sockaddr *) &id_priv->id.route.addr.src_addr;
2173	if (cma_any_addr(addr) && !reuseaddr)
2174		return -EADDRNOTAVAIL;
2175
2176	hlist_for_each_entry(cur_id, node, &bind_list->owners, node) {
2177		if (id_priv == cur_id)
2178			continue;
2179
2180		if ((cur_id->state == CMA_LISTEN) ||
2181		    !reuseaddr || !cur_id->reuseaddr) {
2182			cur_addr = (struct sockaddr *) &cur_id->id.route.addr.src_addr;
2183			if (cma_any_addr(cur_addr))
2184				return -EADDRNOTAVAIL;
2185
2186			if (!cma_addr_cmp(addr, cur_addr))
2187				return -EADDRINUSE;
2188		}
2189	}
2190	return 0;
2191}
2192
2193static int cma_use_port(struct idr *ps, struct rdma_id_private *id_priv)
2194{
2195	struct rdma_bind_list *bind_list;
2196	unsigned short snum;
2197	int ret;
2198
2199	snum = ntohs(cma_port((struct sockaddr *) &id_priv->id.route.addr.src_addr));
2200	if (snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE))
2201		return -EACCES;
2202
2203	bind_list = idr_find(ps, snum);
2204	if (!bind_list) {
2205		ret = cma_alloc_port(ps, id_priv, snum);
2206	} else {
2207		ret = cma_check_port(bind_list, id_priv, id_priv->reuseaddr);
2208		if (!ret)
2209			cma_bind_port(bind_list, id_priv);
2210	}
2211	return ret;
2212}
2213
2214static int cma_bind_listen(struct rdma_id_private *id_priv)
2215{
2216	struct rdma_bind_list *bind_list = id_priv->bind_list;
2217	int ret = 0;
2218
2219	mutex_lock(&lock);
2220	if (bind_list->owners.first->next)
2221		ret = cma_check_port(bind_list, id_priv, 0);
2222	mutex_unlock(&lock);
2223	return ret;
2224}
2225
2226static int cma_get_port(struct rdma_id_private *id_priv)
2227{
2228	struct idr *ps;
2229	int ret;
2230
2231	switch (id_priv->id.ps) {
2232	case RDMA_PS_SDP:
2233		ps = &sdp_ps;
2234		break;
2235	case RDMA_PS_TCP:
2236		ps = &tcp_ps;
2237		break;
2238	case RDMA_PS_UDP:
2239		ps = &udp_ps;
2240		break;
2241	case RDMA_PS_IPOIB:
2242		ps = &ipoib_ps;
2243		break;
2244	default:
2245		return -EPROTONOSUPPORT;
2246	}
2247
2248	mutex_lock(&lock);
2249	if (cma_any_port((struct sockaddr *) &id_priv->id.route.addr.src_addr))
2250		ret = cma_alloc_any_port(ps, id_priv);
2251	else
2252		ret = cma_use_port(ps, id_priv);
2253	mutex_unlock(&lock);
2254
2255	return ret;
2256}
2257
2258static int cma_check_linklocal(struct rdma_dev_addr *dev_addr,
2259			       struct sockaddr *addr)
2260{
2261#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
2262	struct sockaddr_in6 *sin6;
2263
2264	if (addr->sa_family != AF_INET6)
2265		return 0;
2266
2267	sin6 = (struct sockaddr_in6 *) addr;
2268	if ((ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) &&
2269	    !sin6->sin6_scope_id)
2270			return -EINVAL;
2271
2272	dev_addr->bound_dev_if = sin6->sin6_scope_id;
2273#endif
2274	return 0;
2275}
2276
2277int rdma_listen(struct rdma_cm_id *id, int backlog)
2278{
2279	struct rdma_id_private *id_priv;
2280	int ret;
2281
2282	id_priv = container_of(id, struct rdma_id_private, id);
2283	if (id_priv->state == CMA_IDLE) {
2284		((struct sockaddr *) &id->route.addr.src_addr)->sa_family = AF_INET;
2285		ret = rdma_bind_addr(id, (struct sockaddr *) &id->route.addr.src_addr);
2286		if (ret)
2287			return ret;
2288	}
2289
2290	if (!cma_comp_exch(id_priv, CMA_ADDR_BOUND, CMA_LISTEN))
2291		return -EINVAL;
2292
2293	if (id_priv->reuseaddr) {
2294		ret = cma_bind_listen(id_priv);
2295		if (ret)
2296			goto err;
2297	}
2298
2299	id_priv->backlog = backlog;
2300	if (id->device) {
2301		switch (rdma_node_get_transport(id->device->node_type)) {
2302		case RDMA_TRANSPORT_IB:
2303			ret = cma_ib_listen(id_priv);
2304			if (ret)
2305				goto err;
2306			break;
2307		case RDMA_TRANSPORT_IWARP:
2308			ret = cma_iw_listen(id_priv, backlog);
2309			if (ret)
2310				goto err;
2311			break;
2312		default:
2313			ret = -ENOSYS;
2314			goto err;
2315		}
2316	} else
2317		cma_listen_on_all(id_priv);
2318
2319	return 0;
2320err:
2321	id_priv->backlog = 0;
2322	cma_comp_exch(id_priv, CMA_LISTEN, CMA_ADDR_BOUND);
2323	return ret;
2324}
2325EXPORT_SYMBOL(rdma_listen);
2326
2327int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
2328{
2329	struct rdma_id_private *id_priv;
2330	int ret;
2331
2332	if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6)
2333		return -EAFNOSUPPORT;
2334
2335	id_priv = container_of(id, struct rdma_id_private, id);
2336	if (!cma_comp_exch(id_priv, CMA_IDLE, CMA_ADDR_BOUND))
2337		return -EINVAL;
2338
2339	ret = cma_check_linklocal(&id->route.addr.dev_addr, addr);
2340	if (ret)
2341		goto err1;
2342
2343	if (!cma_any_addr(addr)) {
2344		ret = rdma_translate_ip(addr, &id->route.addr.dev_addr);
2345		if (ret)
2346			goto err1;
2347
2348		ret = cma_acquire_dev(id_priv);
2349		if (ret)
2350			goto err1;
2351	}
2352
2353	memcpy(&id->route.addr.src_addr, addr, ip_addr_size(addr));
2354	ret = cma_get_port(id_priv);
2355	if (ret)
2356		goto err2;
2357
2358	return 0;
2359err2:
2360	if (id_priv->cma_dev)
2361		cma_release_dev(id_priv);
2362err1:
2363	cma_comp_exch(id_priv, CMA_ADDR_BOUND, CMA_IDLE);
2364	return ret;
2365}
2366EXPORT_SYMBOL(rdma_bind_addr);
2367
2368static int cma_format_hdr(void *hdr, enum rdma_port_space ps,
2369			  struct rdma_route *route)
2370{
2371	struct cma_hdr *cma_hdr;
2372	struct sdp_hh *sdp_hdr;
2373
2374	if (route->addr.src_addr.ss_family == AF_INET) {
2375		struct sockaddr_in *src4, *dst4;
2376
2377		src4 = (struct sockaddr_in *) &route->addr.src_addr;
2378		dst4 = (struct sockaddr_in *) &route->addr.dst_addr;
2379
2380		switch (ps) {
2381		case RDMA_PS_SDP:
2382			sdp_hdr = hdr;
2383			if (sdp_get_majv(sdp_hdr->sdp_version) != SDP_MAJ_VERSION)
2384				return -EINVAL;
2385			sdp_set_ip_ver(sdp_hdr, 4);
2386			sdp_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr;
2387			sdp_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr;
2388			sdp_hdr->port = src4->sin_port;
2389			break;
2390		default:
2391			cma_hdr = hdr;
2392			cma_hdr->cma_version = CMA_VERSION;
2393			cma_set_ip_ver(cma_hdr, 4);
2394			cma_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr;
2395			cma_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr;
2396			cma_hdr->port = src4->sin_port;
2397			break;
2398		}
2399	} else {
2400		struct sockaddr_in6 *src6, *dst6;
2401
2402		src6 = (struct sockaddr_in6 *) &route->addr.src_addr;
2403		dst6 = (struct sockaddr_in6 *) &route->addr.dst_addr;
2404
2405		switch (ps) {
2406		case RDMA_PS_SDP:
2407			sdp_hdr = hdr;
2408			if (sdp_get_majv(sdp_hdr->sdp_version) != SDP_MAJ_VERSION)
2409				return -EINVAL;
2410			sdp_set_ip_ver(sdp_hdr, 6);
2411			sdp_hdr->src_addr.ip6 = src6->sin6_addr;
2412			sdp_hdr->dst_addr.ip6 = dst6->sin6_addr;
2413			sdp_hdr->port = src6->sin6_port;
2414			break;
2415		default:
2416			cma_hdr = hdr;
2417			cma_hdr->cma_version = CMA_VERSION;
2418			cma_set_ip_ver(cma_hdr, 6);
2419			cma_hdr->src_addr.ip6 = src6->sin6_addr;
2420			cma_hdr->dst_addr.ip6 = dst6->sin6_addr;
2421			cma_hdr->port = src6->sin6_port;
2422			break;
2423		}
2424	}
2425	return 0;
2426}
2427
2428static int cma_sidr_rep_handler(struct ib_cm_id *cm_id,
2429				struct ib_cm_event *ib_event)
2430{
2431	struct rdma_id_private *id_priv = cm_id->context;
2432	struct rdma_cm_event event;
2433	struct ib_cm_sidr_rep_event_param *rep = &ib_event->param.sidr_rep_rcvd;
2434	int ret = 0;
2435
2436	if (cma_disable_callback(id_priv, CMA_CONNECT))
2437		return 0;
2438
2439	memset(&event, 0, sizeof event);
2440	switch (ib_event->event) {
2441	case IB_CM_SIDR_REQ_ERROR:
2442		event.event = RDMA_CM_EVENT_UNREACHABLE;
2443		event.status = -ETIMEDOUT;
2444		break;
2445	case IB_CM_SIDR_REP_RECEIVED:
2446		event.param.ud.private_data = ib_event->private_data;
2447		event.param.ud.private_data_len = IB_CM_SIDR_REP_PRIVATE_DATA_SIZE;
2448		if (rep->status != IB_SIDR_SUCCESS) {
2449			event.event = RDMA_CM_EVENT_UNREACHABLE;
2450			event.status = ib_event->param.sidr_rep_rcvd.status;
2451			break;
2452		}
2453		ret = cma_set_qkey(id_priv);
2454		if (ret) {
2455			event.event = RDMA_CM_EVENT_ADDR_ERROR;
2456			event.status = -EINVAL;
2457			break;
2458		}
2459		if (id_priv->qkey != rep->qkey) {
2460			event.event = RDMA_CM_EVENT_UNREACHABLE;
2461			event.status = -EINVAL;
2462			break;
2463		}
2464		ib_init_ah_from_path(id_priv->id.device, id_priv->id.port_num,
2465				     id_priv->id.route.path_rec,
2466				     &event.param.ud.ah_attr);
2467		event.param.ud.qp_num = rep->qpn;
2468		event.param.ud.qkey = rep->qkey;
2469		event.event = RDMA_CM_EVENT_ESTABLISHED;
2470		event.status = 0;
2471		break;
2472	default:
2473		printk(KERN_ERR "RDMA CMA: unexpected IB CM event: %d\n",
2474		       ib_event->event);
2475		goto out;
2476	}
2477
2478	ret = id_priv->id.event_handler(&id_priv->id, &event);
2479	if (ret) {
2480		/* Destroy the CM ID by returning a non-zero value. */
2481		id_priv->cm_id.ib = NULL;
2482		cma_exch(id_priv, CMA_DESTROYING);
2483		mutex_unlock(&id_priv->handler_mutex);
2484		rdma_destroy_id(&id_priv->id);
2485		return ret;
2486	}
2487out:
2488	mutex_unlock(&id_priv->handler_mutex);
2489	return ret;
2490}
2491
2492static int cma_resolve_ib_udp(struct rdma_id_private *id_priv,
2493			      struct rdma_conn_param *conn_param)
2494{
2495	struct ib_cm_sidr_req_param req;
2496	struct rdma_route *route;
2497	int ret;
2498
2499	req.private_data_len = sizeof(struct cma_hdr) +
2500			       conn_param->private_data_len;
2501	req.private_data = kzalloc(req.private_data_len, GFP_ATOMIC);
2502	if (!req.private_data)
2503		return -ENOMEM;
2504
2505	if (conn_param->private_data && conn_param->private_data_len)
2506		memcpy((void *) req.private_data + sizeof(struct cma_hdr),
2507		       conn_param->private_data, conn_param->private_data_len);
2508
2509	route = &id_priv->id.route;
2510	ret = cma_format_hdr((void *) req.private_data, id_priv->id.ps, route);
2511	if (ret)
2512		goto out;
2513
2514	id_priv->cm_id.ib = ib_create_cm_id(id_priv->id.device,
2515					    cma_sidr_rep_handler, id_priv);
2516	if (IS_ERR(id_priv->cm_id.ib)) {
2517		ret = PTR_ERR(id_priv->cm_id.ib);
2518		goto out;
2519	}
2520
2521	req.path = route->path_rec;
2522	req.service_id = cma_get_service_id(id_priv->id.ps,
2523					    (struct sockaddr *) &route->addr.dst_addr);
2524	req.timeout_ms = 1 << (CMA_CM_RESPONSE_TIMEOUT - 8);
2525	req.max_cm_retries = CMA_MAX_CM_RETRIES;
2526
2527	ret = ib_send_cm_sidr_req(id_priv->cm_id.ib, &req);
2528	if (ret) {
2529		ib_destroy_cm_id(id_priv->cm_id.ib);
2530		id_priv->cm_id.ib = NULL;
2531	}
2532out:
2533	kfree(req.private_data);
2534	return ret;
2535}
2536
2537static int cma_connect_ib(struct rdma_id_private *id_priv,
2538			  struct rdma_conn_param *conn_param)
2539{
2540	struct ib_cm_req_param req;
2541	struct rdma_route *route;
2542	void *private_data;
2543	int offset, ret;
2544
2545	memset(&req, 0, sizeof req);
2546	offset = cma_user_data_offset(id_priv->id.ps);
2547	req.private_data_len = offset + conn_param->private_data_len;
2548	private_data = kzalloc(req.private_data_len, GFP_ATOMIC);
2549	if (!private_data)
2550		return -ENOMEM;
2551
2552	if (conn_param->private_data && conn_param->private_data_len)
2553		memcpy(private_data + offset, conn_param->private_data,
2554		       conn_param->private_data_len);
2555
2556	id_priv->cm_id.ib = ib_create_cm_id(id_priv->id.device, cma_ib_handler,
2557					    id_priv);
2558	if (IS_ERR(id_priv->cm_id.ib)) {
2559		ret = PTR_ERR(id_priv->cm_id.ib);
2560		goto out;
2561	}
2562
2563	route = &id_priv->id.route;
2564	ret = cma_format_hdr(private_data, id_priv->id.ps, route);
2565	if (ret)
2566		goto out;
2567	req.private_data = private_data;
2568
2569	req.primary_path = &route->path_rec[0];
2570	if (route->num_paths == 2)
2571		req.alternate_path = &route->path_rec[1];
2572
2573	req.service_id = cma_get_service_id(id_priv->id.ps,
2574					    (struct sockaddr *) &route->addr.dst_addr);
2575	req.qp_num = id_priv->qp_num;
2576	req.qp_type = IB_QPT_RC;
2577	req.starting_psn = id_priv->seq_num;
2578	req.responder_resources = conn_param->responder_resources;
2579	req.initiator_depth = conn_param->initiator_depth;
2580	req.flow_control = conn_param->flow_control;
2581	req.retry_count = conn_param->retry_count;
2582	req.rnr_retry_count = conn_param->rnr_retry_count;
2583	req.remote_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT;
2584	req.local_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT;
2585	req.max_cm_retries = CMA_MAX_CM_RETRIES;
2586	req.srq = id_priv->srq ? 1 : 0;
2587
2588	ret = ib_send_cm_req(id_priv->cm_id.ib, &req);
2589out:
2590	if (ret && !IS_ERR(id_priv->cm_id.ib)) {
2591		ib_destroy_cm_id(id_priv->cm_id.ib);
2592		id_priv->cm_id.ib = NULL;
2593	}
2594
2595	kfree(private_data);
2596	return ret;
2597}
2598
2599static int cma_connect_iw(struct rdma_id_private *id_priv,
2600			  struct rdma_conn_param *conn_param)
2601{
2602	struct iw_cm_id *cm_id;
2603	struct sockaddr_in* sin;
2604	int ret;
2605	struct iw_cm_conn_param iw_param;
2606
2607	cm_id = iw_create_cm_id(id_priv->id.device, cma_iw_handler, id_priv);
2608	if (IS_ERR(cm_id)) {
2609		ret = PTR_ERR(cm_id);
2610		goto out;
2611	}
2612
2613	id_priv->cm_id.iw = cm_id;
2614
2615	sin = (struct sockaddr_in*) &id_priv->id.route.addr.src_addr;
2616	cm_id->local_addr = *sin;
2617
2618	sin = (struct sockaddr_in*) &id_priv->id.route.addr.dst_addr;
2619	cm_id->remote_addr = *sin;
2620
2621	ret = cma_modify_qp_rtr(id_priv, conn_param);
2622	if (ret)
2623		goto out;
2624
2625	iw_param.ord = conn_param->initiator_depth;
2626	iw_param.ird = conn_param->responder_resources;
2627	iw_param.private_data = conn_param->private_data;
2628	iw_param.private_data_len = conn_param->private_data_len;
2629	if (id_priv->id.qp)
2630		iw_param.qpn = id_priv->qp_num;
2631	else
2632		iw_param.qpn = conn_param->qp_num;
2633	ret = iw_cm_connect(cm_id, &iw_param);
2634out:
2635	if (ret && !IS_ERR(cm_id)) {
2636		iw_destroy_cm_id(cm_id);
2637		id_priv->cm_id.iw = NULL;
2638	}
2639	return ret;
2640}
2641
2642int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
2643{
2644	struct rdma_id_private *id_priv;
2645	int ret;
2646
2647	id_priv = container_of(id, struct rdma_id_private, id);
2648	if (!cma_comp_exch(id_priv, CMA_ROUTE_RESOLVED, CMA_CONNECT))
2649		return -EINVAL;
2650
2651	if (!id->qp) {
2652		id_priv->qp_num = conn_param->qp_num;
2653		id_priv->srq = conn_param->srq;
2654	}
2655
2656	switch (rdma_node_get_transport(id->device->node_type)) {
2657	case RDMA_TRANSPORT_IB:
2658		if (cma_is_ud_ps(id->ps))
2659			ret = cma_resolve_ib_udp(id_priv, conn_param);
2660		else
2661			ret = cma_connect_ib(id_priv, conn_param);
2662		break;
2663	case RDMA_TRANSPORT_IWARP:
2664		ret = cma_connect_iw(id_priv, conn_param);
2665		break;
2666	default:
2667		ret = -ENOSYS;
2668		break;
2669	}
2670	if (ret)
2671		goto err;
2672
2673	return 0;
2674err:
2675	cma_comp_exch(id_priv, CMA_CONNECT, CMA_ROUTE_RESOLVED);
2676	return ret;
2677}
2678EXPORT_SYMBOL(rdma_connect);
2679
2680static int cma_accept_ib(struct rdma_id_private *id_priv,
2681			 struct rdma_conn_param *conn_param)
2682{
2683	struct ib_cm_rep_param rep;
2684	int ret;
2685
2686	ret = cma_modify_qp_rtr(id_priv, conn_param);
2687	if (ret)
2688		goto out;
2689
2690	ret = cma_modify_qp_rts(id_priv, conn_param);
2691	if (ret)
2692		goto out;
2693
2694	memset(&rep, 0, sizeof rep);
2695	rep.qp_num = id_priv->qp_num;
2696	rep.starting_psn = id_priv->seq_num;
2697	rep.private_data = conn_param->private_data;
2698	rep.private_data_len = conn_param->private_data_len;
2699	rep.responder_resources = conn_param->responder_resources;
2700	rep.initiator_depth = conn_param->initiator_depth;
2701	rep.failover_accepted = 0;
2702	rep.flow_control = conn_param->flow_control;
2703	rep.rnr_retry_count = conn_param->rnr_retry_count;
2704	rep.srq = id_priv->srq ? 1 : 0;
2705
2706	ret = ib_send_cm_rep(id_priv->cm_id.ib, &rep);
2707out:
2708	return ret;
2709}
2710
2711static int cma_accept_iw(struct rdma_id_private *id_priv,
2712		  struct rdma_conn_param *conn_param)
2713{
2714	struct iw_cm_conn_param iw_param;
2715	int ret;
2716
2717	ret = cma_modify_qp_rtr(id_priv, conn_param);
2718	if (ret)
2719		return ret;
2720
2721	iw_param.ord = conn_param->initiator_depth;
2722	iw_param.ird = conn_param->responder_resources;
2723	iw_param.private_data = conn_param->private_data;
2724	iw_param.private_data_len = conn_param->private_data_len;
2725	if (id_priv->id.qp) {
2726		iw_param.qpn = id_priv->qp_num;
2727	} else
2728		iw_param.qpn = conn_param->qp_num;
2729
2730	return iw_cm_accept(id_priv->cm_id.iw, &iw_param);
2731}
2732
2733static int cma_send_sidr_rep(struct rdma_id_private *id_priv,
2734			     enum ib_cm_sidr_status status,
2735			     const void *private_data, int private_data_len)
2736{
2737	struct ib_cm_sidr_rep_param rep;
2738	int ret;
2739
2740	memset(&rep, 0, sizeof rep);
2741	rep.status = status;
2742	if (status == IB_SIDR_SUCCESS) {
2743		ret = cma_set_qkey(id_priv);
2744		if (ret)
2745			return ret;
2746		rep.qp_num = id_priv->qp_num;
2747		rep.qkey = id_priv->qkey;
2748	}
2749	rep.private_data = private_data;
2750	rep.private_data_len = private_data_len;
2751
2752	return ib_send_cm_sidr_rep(id_priv->cm_id.ib, &rep);
2753}
2754
2755int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
2756{
2757	struct rdma_id_private *id_priv;
2758	int ret;
2759
2760	id_priv = container_of(id, struct rdma_id_private, id);
2761	if (!cma_comp(id_priv, CMA_CONNECT))
2762		return -EINVAL;
2763
2764	if (!id->qp && conn_param) {
2765		id_priv->qp_num = conn_param->qp_num;
2766		id_priv->srq = conn_param->srq;
2767	}
2768
2769	switch (rdma_node_get_transport(id->device->node_type)) {
2770	case RDMA_TRANSPORT_IB:
2771		if (cma_is_ud_ps(id->ps))
2772			ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS,
2773						conn_param->private_data,
2774						conn_param->private_data_len);
2775		else if (conn_param)
2776			ret = cma_accept_ib(id_priv, conn_param);
2777		else
2778			ret = cma_rep_recv(id_priv);
2779		break;
2780	case RDMA_TRANSPORT_IWARP:
2781		ret = cma_accept_iw(id_priv, conn_param);
2782		break;
2783	default:
2784		ret = -ENOSYS;
2785		break;
2786	}
2787
2788	if (ret)
2789		goto reject;
2790
2791	return 0;
2792reject:
2793	cma_modify_qp_err(id_priv);
2794	rdma_reject(id, NULL, 0);
2795	return ret;
2796}
2797EXPORT_SYMBOL(rdma_accept);
2798
2799int rdma_notify(struct rdma_cm_id *id, enum ib_event_type event)
2800{
2801	struct rdma_id_private *id_priv;
2802	int ret;
2803
2804	id_priv = container_of(id, struct rdma_id_private, id);
2805	if (!cma_has_cm_dev(id_priv))
2806		return -EINVAL;
2807
2808	switch (id->device->node_type) {
2809	case RDMA_NODE_IB_CA:
2810		ret = ib_cm_notify(id_priv->cm_id.ib, event);
2811		break;
2812	default:
2813		ret = 0;
2814		break;
2815	}
2816	return ret;
2817}
2818EXPORT_SYMBOL(rdma_notify);
2819
2820int rdma_reject(struct rdma_cm_id *id, const void *private_data,
2821		u8 private_data_len)
2822{
2823	struct rdma_id_private *id_priv;
2824	int ret;
2825
2826	id_priv = container_of(id, struct rdma_id_private, id);
2827	if (!cma_has_cm_dev(id_priv))
2828		return -EINVAL;
2829
2830	switch (rdma_node_get_transport(id->device->node_type)) {
2831	case RDMA_TRANSPORT_IB:
2832		if (cma_is_ud_ps(id->ps))
2833			ret = cma_send_sidr_rep(id_priv, IB_SIDR_REJECT,
2834						private_data, private_data_len);
2835		else
2836			ret = ib_send_cm_rej(id_priv->cm_id.ib,
2837					     IB_CM_REJ_CONSUMER_DEFINED, NULL,
2838					     0, private_data, private_data_len);
2839		break;
2840	case RDMA_TRANSPORT_IWARP:
2841		ret = iw_cm_reject(id_priv->cm_id.iw,
2842				   private_data, private_data_len);
2843		break;
2844	default:
2845		ret = -ENOSYS;
2846		break;
2847	}
2848	return ret;
2849}
2850EXPORT_SYMBOL(rdma_reject);
2851
2852int rdma_disconnect(struct rdma_cm_id *id)
2853{
2854	struct rdma_id_private *id_priv;
2855	int ret;
2856
2857	id_priv = container_of(id, struct rdma_id_private, id);
2858	if (!cma_has_cm_dev(id_priv))
2859		return -EINVAL;
2860
2861	switch (rdma_node_get_transport(id->device->node_type)) {
2862	case RDMA_TRANSPORT_IB:
2863		ret = cma_modify_qp_err(id_priv);
2864		if (ret)
2865			goto out;
2866		/* Initiate or respond to a disconnect. */
2867		if (ib_send_cm_dreq(id_priv->cm_id.ib, NULL, 0))
2868			ib_send_cm_drep(id_priv->cm_id.ib, NULL, 0);
2869		break;
2870	case RDMA_TRANSPORT_IWARP:
2871		ret = iw_cm_disconnect(id_priv->cm_id.iw, 0);
2872		break;
2873	default:
2874		ret = -EINVAL;
2875		break;
2876	}
2877out:
2878	return ret;
2879}
2880EXPORT_SYMBOL(rdma_disconnect);
2881
2882static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
2883{
2884	struct rdma_id_private *id_priv;
2885	struct cma_multicast *mc = multicast->context;
2886	struct rdma_cm_event event;
2887	int ret;
2888
2889	id_priv = mc->id_priv;
2890	if (cma_disable_callback(id_priv, CMA_ADDR_BOUND) &&
2891	    cma_disable_callback(id_priv, CMA_ADDR_RESOLVED))
2892		return 0;
2893
2894	mutex_lock(&id_priv->qp_mutex);
2895	if (!status && id_priv->id.qp)
2896		status = ib_attach_mcast(id_priv->id.qp, &multicast->rec.mgid,
2897					 multicast->rec.mlid);
2898	mutex_unlock(&id_priv->qp_mutex);
2899
2900	memset(&event, 0, sizeof event);
2901	event.status = status;
2902	event.param.ud.private_data = mc->context;
2903	if (!status) {
2904		event.event = RDMA_CM_EVENT_MULTICAST_JOIN;
2905		ib_init_ah_from_mcmember(id_priv->id.device,
2906					 id_priv->id.port_num, &multicast->rec,
2907					 &event.param.ud.ah_attr);
2908		event.param.ud.qp_num = 0xFFFFFF;
2909		event.param.ud.qkey = be32_to_cpu(multicast->rec.qkey);
2910	} else
2911		event.event = RDMA_CM_EVENT_MULTICAST_ERROR;
2912
2913	ret = id_priv->id.event_handler(&id_priv->id, &event);
2914	if (ret) {
2915		cma_exch(id_priv, CMA_DESTROYING);
2916		mutex_unlock(&id_priv->handler_mutex);
2917		rdma_destroy_id(&id_priv->id);
2918		return 0;
2919	}
2920
2921	mutex_unlock(&id_priv->handler_mutex);
2922	return 0;
2923}
2924
2925static void cma_set_mgid(struct rdma_id_private *id_priv,
2926			 struct sockaddr *addr, union ib_gid *mgid)
2927{
2928	unsigned char mc_map[MAX_ADDR_LEN];
2929	struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
2930	struct sockaddr_in *sin = (struct sockaddr_in *) addr;
2931	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) addr;
2932
2933	if (cma_any_addr(addr)) {
2934		memset(mgid, 0, sizeof *mgid);
2935	} else if ((addr->sa_family == AF_INET6) &&
2936		   ((be32_to_cpu(sin6->sin6_addr.s6_addr32[0]) & 0xFFF0FFFF) ==
2937								 0xFF10A01B)) {
2938		/* IPv6 address is an SA assigned MGID. */
2939		memcpy(mgid, &sin6->sin6_addr, sizeof *mgid);
2940	} else if ((addr->sa_family == AF_INET6)) {
2941		ipv6_ib_mc_map(&sin6->sin6_addr, dev_addr->broadcast, mc_map);
2942		if (id_priv->id.ps == RDMA_PS_UDP)
2943			mc_map[7] = 0x01;	/* Use RDMA CM signature */
2944		*mgid = *(union ib_gid *) (mc_map + 4);
2945	} else {
2946		ip_ib_mc_map(sin->sin_addr.s_addr, dev_addr->broadcast, mc_map);
2947		if (id_priv->id.ps == RDMA_PS_UDP)
2948			mc_map[7] = 0x01;	/* Use RDMA CM signature */
2949		*mgid = *(union ib_gid *) (mc_map + 4);
2950	}
2951}
2952
2953static int cma_join_ib_multicast(struct rdma_id_private *id_priv,
2954				 struct cma_multicast *mc)
2955{
2956	struct ib_sa_mcmember_rec rec;
2957	struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
2958	ib_sa_comp_mask comp_mask;
2959	int ret;
2960
2961	ib_addr_get_mgid(dev_addr, &rec.mgid);
2962	ret = ib_sa_get_mcmember_rec(id_priv->id.device, id_priv->id.port_num,
2963				     &rec.mgid, &rec);
2964	if (ret)
2965		return ret;
2966
2967	cma_set_mgid(id_priv, (struct sockaddr *) &mc->addr, &rec.mgid);
2968	if (id_priv->id.ps == RDMA_PS_UDP)
2969		rec.qkey = cpu_to_be32(RDMA_UDP_QKEY);
2970	rdma_addr_get_sgid(dev_addr, &rec.port_gid);
2971	rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr));
2972	rec.join_state = 1;
2973
2974	comp_mask = IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID |
2975		    IB_SA_MCMEMBER_REC_PKEY | IB_SA_MCMEMBER_REC_JOIN_STATE |
2976		    IB_SA_MCMEMBER_REC_QKEY | IB_SA_MCMEMBER_REC_SL |
2977		    IB_SA_MCMEMBER_REC_FLOW_LABEL |
2978		    IB_SA_MCMEMBER_REC_TRAFFIC_CLASS;
2979
2980	if (id_priv->id.ps == RDMA_PS_IPOIB)
2981		comp_mask |= IB_SA_MCMEMBER_REC_RATE |
2982			     IB_SA_MCMEMBER_REC_RATE_SELECTOR;
2983
2984	mc->multicast.ib = ib_sa_join_multicast(&sa_client, id_priv->id.device,
2985						id_priv->id.port_num, &rec,
2986						comp_mask, GFP_KERNEL,
2987						cma_ib_mc_handler, mc);
2988	if (IS_ERR(mc->multicast.ib))
2989		return PTR_ERR(mc->multicast.ib);
2990
2991	return 0;
2992}
2993
2994static void iboe_mcast_work_handler(struct work_struct *work)
2995{
2996	struct iboe_mcast_work *mw = container_of(work, struct iboe_mcast_work, work);
2997	struct cma_multicast *mc = mw->mc;
2998	struct ib_sa_multicast *m = mc->multicast.ib;
2999
3000	mc->multicast.ib->context = mc;
3001	cma_ib_mc_handler(0, m);
3002	kref_put(&mc->mcref, release_mc);
3003	kfree(mw);
3004}
3005
3006static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid)
3007{
3008	struct sockaddr_in *sin = (struct sockaddr_in *)addr;
3009	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)addr;
3010
3011	if (cma_any_addr(addr)) {
3012		memset(mgid, 0, sizeof *mgid);
3013	} else if (addr->sa_family == AF_INET6) {
3014		memcpy(mgid, &sin6->sin6_addr, sizeof *mgid);
3015	} else {
3016		mgid->raw[0] = 0xff;
3017		mgid->raw[1] = 0x0e;
3018		mgid->raw[2] = 0;
3019		mgid->raw[3] = 0;
3020		mgid->raw[4] = 0;
3021		mgid->raw[5] = 0;
3022		mgid->raw[6] = 0;
3023		mgid->raw[7] = 0;
3024		mgid->raw[8] = 0;
3025		mgid->raw[9] = 0;
3026		mgid->raw[10] = 0xff;
3027		mgid->raw[11] = 0xff;
3028		*(__be32 *)(&mgid->raw[12]) = sin->sin_addr.s_addr;
3029	}
3030}
3031
3032static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
3033				   struct cma_multicast *mc)
3034{
3035	struct iboe_mcast_work *work;
3036	struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
3037	int err;
3038	struct sockaddr *addr = (struct sockaddr *)&mc->addr;
3039	struct net_device *ndev = NULL;
3040
3041	if (cma_zero_addr((struct sockaddr *)&mc->addr))
3042		return -EINVAL;
3043
3044	work = kzalloc(sizeof *work, GFP_KERNEL);
3045	if (!work)
3046		return -ENOMEM;
3047
3048	mc->multicast.ib = kzalloc(sizeof(struct ib_sa_multicast), GFP_KERNEL);
3049	if (!mc->multicast.ib) {
3050		err = -ENOMEM;
3051		goto out1;
3052	}
3053
3054	cma_iboe_set_mgid(addr, &mc->multicast.ib->rec.mgid);
3055
3056	mc->multicast.ib->rec.pkey = cpu_to_be16(0xffff);
3057	if (id_priv->id.ps == RDMA_PS_UDP)
3058		mc->multicast.ib->rec.qkey = cpu_to_be32(RDMA_UDP_QKEY);
3059
3060	if (dev_addr->bound_dev_if)
3061		ndev = dev_get_by_index(&init_net, dev_addr->bound_dev_if);
3062	if (!ndev) {
3063		err = -ENODEV;
3064		goto out2;
3065	}
3066	mc->multicast.ib->rec.rate = iboe_get_rate(ndev);
3067	mc->multicast.ib->rec.hop_limit = 1;
3068	mc->multicast.ib->rec.mtu = iboe_get_mtu(ndev->mtu);
3069	dev_put(ndev);
3070	if (!mc->multicast.ib->rec.mtu) {
3071		err = -EINVAL;
3072		goto out2;
3073	}
3074	iboe_addr_get_sgid(dev_addr, &mc->multicast.ib->rec.port_gid);
3075	work->id = id_priv;
3076	work->mc = mc;
3077	INIT_WORK(&work->work, iboe_mcast_work_handler);
3078	kref_get(&mc->mcref);
3079	queue_work(cma_wq, &work->work);
3080
3081	return 0;
3082
3083out2:
3084	kfree(mc->multicast.ib);
3085out1:
3086	kfree(work);
3087	return err;
3088}
3089
3090int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
3091			void *context)
3092{
3093	struct rdma_id_private *id_priv;
3094	struct cma_multicast *mc;
3095	int ret;
3096
3097	id_priv = container_of(id, struct rdma_id_private, id);
3098	if (!cma_comp(id_priv, CMA_ADDR_BOUND) &&
3099	    !cma_comp(id_priv, CMA_ADDR_RESOLVED))
3100		return -EINVAL;
3101
3102	mc = kmalloc(sizeof *mc, GFP_KERNEL);
3103	if (!mc)
3104		return -ENOMEM;
3105
3106	memcpy(&mc->addr, addr, ip_addr_size(addr));
3107	mc->context = context;
3108	mc->id_priv = id_priv;
3109
3110	spin_lock(&id_priv->lock);
3111	list_add(&mc->list, &id_priv->mc_list);
3112	spin_unlock(&id_priv->lock);
3113
3114	switch (rdma_node_get_transport(id->device->node_type)) {
3115	case RDMA_TRANSPORT_IB:
3116		switch (rdma_port_get_link_layer(id->device, id->port_num)) {
3117		case IB_LINK_LAYER_INFINIBAND:
3118			ret = cma_join_ib_multicast(id_priv, mc);
3119			break;
3120		case IB_LINK_LAYER_ETHERNET:
3121			kref_init(&mc->mcref);
3122			ret = cma_iboe_join_multicast(id_priv, mc);
3123			break;
3124		default:
3125			ret = -EINVAL;
3126		}
3127		break;
3128	default:
3129		ret = -ENOSYS;
3130		break;
3131	}
3132
3133	if (ret) {
3134		spin_lock_irq(&id_priv->lock);
3135		list_del(&mc->list);
3136		spin_unlock_irq(&id_priv->lock);
3137		kfree(mc);
3138	}
3139	return ret;
3140}
3141EXPORT_SYMBOL(rdma_join_multicast);
3142
3143void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr)
3144{
3145	struct rdma_id_private *id_priv;
3146	struct cma_multicast *mc;
3147
3148	id_priv = container_of(id, struct rdma_id_private, id);
3149	spin_lock_irq(&id_priv->lock);
3150	list_for_each_entry(mc, &id_priv->mc_list, list) {
3151		if (!memcmp(&mc->addr, addr, ip_addr_size(addr))) {
3152			list_del(&mc->list);
3153			spin_unlock_irq(&id_priv->lock);
3154
3155			if (id->qp)
3156				ib_detach_mcast(id->qp,
3157						&mc->multicast.ib->rec.mgid,
3158						mc->multicast.ib->rec.mlid);
3159			if (rdma_node_get_transport(id_priv->cma_dev->device->node_type) == RDMA_TRANSPORT_IB) {
3160				switch (rdma_port_get_link_layer(id->device, id->port_num)) {
3161				case IB_LINK_LAYER_INFINIBAND:
3162					ib_sa_free_multicast(mc->multicast.ib);
3163					kfree(mc);
3164					break;
3165				case IB_LINK_LAYER_ETHERNET:
3166					kref_put(&mc->mcref, release_mc);
3167					break;
3168				default:
3169					break;
3170				}
3171			}
3172			return;
3173		}
3174	}
3175	spin_unlock_irq(&id_priv->lock);
3176}
3177EXPORT_SYMBOL(rdma_leave_multicast);
3178
3179static int cma_netdev_change(struct net_device *ndev, struct rdma_id_private *id_priv)
3180{
3181	struct rdma_dev_addr *dev_addr;
3182	struct cma_ndev_work *work;
3183
3184	dev_addr = &id_priv->id.route.addr.dev_addr;
3185
3186	if ((dev_addr->bound_dev_if == ndev->ifindex) &&
3187	    memcmp(dev_addr->src_dev_addr, ndev->dev_addr, ndev->addr_len)) {
3188		printk(KERN_INFO "RDMA CM addr change for ndev %s used by id %p\n",
3189		       ndev->name, &id_priv->id);
3190		work = kzalloc(sizeof *work, GFP_KERNEL);
3191		if (!work)
3192			return -ENOMEM;
3193
3194		INIT_WORK(&work->work, cma_ndev_work_handler);
3195		work->id = id_priv;
3196		work->event.event = RDMA_CM_EVENT_ADDR_CHANGE;
3197		atomic_inc(&id_priv->refcount);
3198		queue_work(cma_wq, &work->work);
3199	}
3200
3201	return 0;
3202}
3203
3204static int cma_netdev_callback(struct notifier_block *self, unsigned long event,
3205			       void *ctx)
3206{
3207	struct net_device *ndev = (struct net_device *)ctx;
3208	struct cma_device *cma_dev;
3209	struct rdma_id_private *id_priv;
3210	int ret = NOTIFY_DONE;
3211
3212	if (dev_net(ndev) != &init_net)
3213		return NOTIFY_DONE;
3214
3215	if (event != NETDEV_BONDING_FAILOVER)
3216		return NOTIFY_DONE;
3217
3218	if (!(ndev->flags & IFF_MASTER) || !(ndev->priv_flags & IFF_BONDING))
3219		return NOTIFY_DONE;
3220
3221	mutex_lock(&lock);
3222	list_for_each_entry(cma_dev, &dev_list, list)
3223		list_for_each_entry(id_priv, &cma_dev->id_list, list) {
3224			ret = cma_netdev_change(ndev, id_priv);
3225			if (ret)
3226				goto out;
3227		}
3228
3229out:
3230	mutex_unlock(&lock);
3231	return ret;
3232}
3233
3234static struct notifier_block cma_nb = {
3235	.notifier_call = cma_netdev_callback
3236};
3237
3238static void cma_add_one(struct ib_device *device)
3239{
3240	struct cma_device *cma_dev;
3241	struct rdma_id_private *id_priv;
3242
3243	cma_dev = kmalloc(sizeof *cma_dev, GFP_KERNEL);
3244	if (!cma_dev)
3245		return;
3246
3247	cma_dev->device = device;
3248
3249	init_completion(&cma_dev->comp);
3250	atomic_set(&cma_dev->refcount, 1);
3251	INIT_LIST_HEAD(&cma_dev->id_list);
3252	ib_set_client_data(device, &cma_client, cma_dev);
3253
3254	mutex_lock(&lock);
3255	list_add_tail(&cma_dev->list, &dev_list);
3256	list_for_each_entry(id_priv, &listen_any_list, list)
3257		cma_listen_on_dev(id_priv, cma_dev);
3258	mutex_unlock(&lock);
3259}
3260
3261static int cma_remove_id_dev(struct rdma_id_private *id_priv)
3262{
3263	struct rdma_cm_event event;
3264	enum cma_state state;
3265	int ret = 0;
3266
3267	/* Record that we want to remove the device */
3268	state = cma_exch(id_priv, CMA_DEVICE_REMOVAL);
3269	if (state == CMA_DESTROYING)
3270		return 0;
3271
3272	cma_cancel_operation(id_priv, state);
3273	mutex_lock(&id_priv->handler_mutex);
3274
3275	/* Check for destruction from another callback. */
3276	if (!cma_comp(id_priv, CMA_DEVICE_REMOVAL))
3277		goto out;
3278
3279	memset(&event, 0, sizeof event);
3280	event.event = RDMA_CM_EVENT_DEVICE_REMOVAL;
3281	ret = id_priv->id.event_handler(&id_priv->id, &event);
3282out:
3283	mutex_unlock(&id_priv->handler_mutex);
3284	return ret;
3285}
3286
3287static void cma_process_remove(struct cma_device *cma_dev)
3288{
3289	struct rdma_id_private *id_priv;
3290	int ret;
3291
3292	mutex_lock(&lock);
3293	while (!list_empty(&cma_dev->id_list)) {
3294		id_priv = list_entry(cma_dev->id_list.next,
3295				     struct rdma_id_private, list);
3296
3297		list_del(&id_priv->listen_list);
3298		list_del_init(&id_priv->list);
3299		atomic_inc(&id_priv->refcount);
3300		mutex_unlock(&lock);
3301
3302		ret = id_priv->internal_id ? 1 : cma_remove_id_dev(id_priv);
3303		cma_deref_id(id_priv);
3304		if (ret)
3305			rdma_destroy_id(&id_priv->id);
3306
3307		mutex_lock(&lock);
3308	}
3309	mutex_unlock(&lock);
3310
3311	cma_deref_dev(cma_dev);
3312	wait_for_completion(&cma_dev->comp);
3313}
3314
3315static void cma_remove_one(struct ib_device *device)
3316{
3317	struct cma_device *cma_dev;
3318
3319	cma_dev = ib_get_client_data(device, &cma_client);
3320	if (!cma_dev)
3321		return;
3322
3323	mutex_lock(&lock);
3324	list_del(&cma_dev->list);
3325	mutex_unlock(&lock);
3326
3327	cma_process_remove(cma_dev);
3328	kfree(cma_dev);
3329}
3330
3331static int __init cma_init(void)
3332{
3333	int ret;
3334
3335	cma_wq = create_singlethread_workqueue("rdma_cm");
3336	if (!cma_wq)
3337		return -ENOMEM;
3338
3339	ib_sa_register_client(&sa_client);
3340	rdma_addr_register_client(&addr_client);
3341	register_netdevice_notifier(&cma_nb);
3342
3343	ret = ib_register_client(&cma_client);
3344	if (ret)
3345		goto err;
3346	return 0;
3347
3348err:
3349	unregister_netdevice_notifier(&cma_nb);
3350	rdma_addr_unregister_client(&addr_client);
3351	ib_sa_unregister_client(&sa_client);
3352	destroy_workqueue(cma_wq);
3353	return ret;
3354}
3355
3356static void __exit cma_cleanup(void)
3357{
3358	ib_unregister_client(&cma_client);
3359	unregister_netdevice_notifier(&cma_nb);
3360	rdma_addr_unregister_client(&addr_client);
3361	ib_sa_unregister_client(&sa_client);
3362	destroy_workqueue(cma_wq);
3363	idr_destroy(&sdp_ps);
3364	idr_destroy(&tcp_ps);
3365	idr_destroy(&udp_ps);
3366	idr_destroy(&ipoib_ps);
3367}
3368
3369module_init(cma_init);
3370module_exit(cma_cleanup);
3371