cm.c revision d4c4196f24ade5f336882587480652efde2c739c
1/*
2 * Copyright (c) 2004-2007 Intel Corporation.  All rights reserved.
3 * Copyright (c) 2004 Topspin Corporation.  All rights reserved.
4 * Copyright (c) 2004, 2005 Voltaire Corporation.  All rights reserved.
5 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
6 *
7 * This software is available to you under a choice of one of two
8 * licenses.  You may choose to be licensed under the terms of the GNU
9 * General Public License (GPL) Version 2, available from the file
10 * COPYING in the main directory of this source tree, or the
11 * OpenIB.org BSD license below:
12 *
13 *     Redistribution and use in source and binary forms, with or
14 *     without modification, are permitted provided that the following
15 *     conditions are met:
16 *
17 *      - Redistributions of source code must retain the above
18 *        copyright notice, this list of conditions and the following
19 *        disclaimer.
20 *
21 *      - Redistributions in binary form must reproduce the above
22 *        copyright notice, this list of conditions and the following
23 *        disclaimer in the documentation and/or other materials
24 *        provided with the distribution.
25 *
26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 * SOFTWARE.
34 */
35
36#include <linux/completion.h>
37#include <linux/dma-mapping.h>
38#include <linux/device.h>
39#include <linux/err.h>
40#include <linux/idr.h>
41#include <linux/interrupt.h>
42#include <linux/random.h>
43#include <linux/rbtree.h>
44#include <linux/spinlock.h>
45#include <linux/sysfs.h>
46#include <linux/workqueue.h>
47
48#include <rdma/ib_cache.h>
49#include <rdma/ib_cm.h>
50#include "cm_msgs.h"
51
52MODULE_AUTHOR("Sean Hefty");
53MODULE_DESCRIPTION("InfiniBand CM");
54MODULE_LICENSE("Dual BSD/GPL");
55
56static void cm_add_one(struct ib_device *device);
57static void cm_remove_one(struct ib_device *device);
58
59static struct ib_client cm_client = {
60	.name   = "cm",
61	.add    = cm_add_one,
62	.remove = cm_remove_one
63};
64
65static struct ib_cm {
66	spinlock_t lock;
67	struct list_head device_list;
68	rwlock_t device_lock;
69	struct rb_root listen_service_table;
70	u64 listen_service_id;
71	/* struct rb_root peer_service_table; todo: fix peer to peer */
72	struct rb_root remote_qp_table;
73	struct rb_root remote_id_table;
74	struct rb_root remote_sidr_table;
75	struct idr local_id_table;
76	__be32 random_id_operand;
77	struct list_head timewait_list;
78	struct workqueue_struct *wq;
79} cm;
80
81/* Counter indexes ordered by attribute ID */
82enum {
83	CM_REQ_COUNTER,
84	CM_MRA_COUNTER,
85	CM_REJ_COUNTER,
86	CM_REP_COUNTER,
87	CM_RTU_COUNTER,
88	CM_DREQ_COUNTER,
89	CM_DREP_COUNTER,
90	CM_SIDR_REQ_COUNTER,
91	CM_SIDR_REP_COUNTER,
92	CM_LAP_COUNTER,
93	CM_APR_COUNTER,
94	CM_ATTR_COUNT,
95	CM_ATTR_ID_OFFSET = 0x0010,
96};
97
98enum {
99	CM_XMIT,
100	CM_XMIT_RETRIES,
101	CM_RECV,
102	CM_RECV_DUPLICATES,
103	CM_COUNTER_GROUPS
104};
105
106static char const counter_group_names[CM_COUNTER_GROUPS]
107				     [sizeof("cm_rx_duplicates")] = {
108	"cm_tx_msgs", "cm_tx_retries",
109	"cm_rx_msgs", "cm_rx_duplicates"
110};
111
112struct cm_counter_group {
113	struct kobject obj;
114	atomic_long_t counter[CM_ATTR_COUNT];
115};
116
117struct cm_counter_attribute {
118	struct attribute attr;
119	int index;
120};
121
122#define CM_COUNTER_ATTR(_name, _index) \
123struct cm_counter_attribute cm_##_name##_counter_attr = { \
124	.attr = { .name = __stringify(_name), .mode = 0444, .owner = THIS_MODULE }, \
125	.index = _index \
126}
127
128static CM_COUNTER_ATTR(req, CM_REQ_COUNTER);
129static CM_COUNTER_ATTR(mra, CM_MRA_COUNTER);
130static CM_COUNTER_ATTR(rej, CM_REJ_COUNTER);
131static CM_COUNTER_ATTR(rep, CM_REP_COUNTER);
132static CM_COUNTER_ATTR(rtu, CM_RTU_COUNTER);
133static CM_COUNTER_ATTR(dreq, CM_DREQ_COUNTER);
134static CM_COUNTER_ATTR(drep, CM_DREP_COUNTER);
135static CM_COUNTER_ATTR(sidr_req, CM_SIDR_REQ_COUNTER);
136static CM_COUNTER_ATTR(sidr_rep, CM_SIDR_REP_COUNTER);
137static CM_COUNTER_ATTR(lap, CM_LAP_COUNTER);
138static CM_COUNTER_ATTR(apr, CM_APR_COUNTER);
139
140static struct attribute *cm_counter_default_attrs[] = {
141	&cm_req_counter_attr.attr,
142	&cm_mra_counter_attr.attr,
143	&cm_rej_counter_attr.attr,
144	&cm_rep_counter_attr.attr,
145	&cm_rtu_counter_attr.attr,
146	&cm_dreq_counter_attr.attr,
147	&cm_drep_counter_attr.attr,
148	&cm_sidr_req_counter_attr.attr,
149	&cm_sidr_rep_counter_attr.attr,
150	&cm_lap_counter_attr.attr,
151	&cm_apr_counter_attr.attr,
152	NULL
153};
154
155struct cm_port {
156	struct cm_device *cm_dev;
157	struct ib_mad_agent *mad_agent;
158	struct kobject port_obj;
159	u8 port_num;
160	struct cm_counter_group counter_group[CM_COUNTER_GROUPS];
161};
162
163struct cm_device {
164	struct list_head list;
165	struct ib_device *ib_device;
166	struct kobject dev_obj;
167	u8 ack_delay;
168	struct cm_port *port[0];
169};
170
171struct cm_av {
172	struct cm_port *port;
173	union ib_gid dgid;
174	struct ib_ah_attr ah_attr;
175	u16 pkey_index;
176	u8 timeout;
177};
178
179struct cm_work {
180	struct delayed_work work;
181	struct list_head list;
182	struct cm_port *port;
183	struct ib_mad_recv_wc *mad_recv_wc;	/* Received MADs */
184	__be32 local_id;			/* Established / timewait */
185	__be32 remote_id;
186	struct ib_cm_event cm_event;
187	struct ib_sa_path_rec path[0];
188};
189
190struct cm_timewait_info {
191	struct cm_work work;			/* Must be first. */
192	struct list_head list;
193	struct rb_node remote_qp_node;
194	struct rb_node remote_id_node;
195	__be64 remote_ca_guid;
196	__be32 remote_qpn;
197	u8 inserted_remote_qp;
198	u8 inserted_remote_id;
199};
200
201struct cm_id_private {
202	struct ib_cm_id	id;
203
204	struct rb_node service_node;
205	struct rb_node sidr_id_node;
206	spinlock_t lock;	/* Do not acquire inside cm.lock */
207	struct completion comp;
208	atomic_t refcount;
209
210	struct ib_mad_send_buf *msg;
211	struct cm_timewait_info *timewait_info;
212	/* todo: use alternate port on send failure */
213	struct cm_av av;
214	struct cm_av alt_av;
215	struct ib_cm_compare_data *compare_data;
216
217	void *private_data;
218	__be64 tid;
219	__be32 local_qpn;
220	__be32 remote_qpn;
221	enum ib_qp_type qp_type;
222	__be32 sq_psn;
223	__be32 rq_psn;
224	int timeout_ms;
225	enum ib_mtu path_mtu;
226	__be16 pkey;
227	u8 private_data_len;
228	u8 max_cm_retries;
229	u8 peer_to_peer;
230	u8 responder_resources;
231	u8 initiator_depth;
232	u8 retry_count;
233	u8 rnr_retry_count;
234	u8 service_timeout;
235	u8 target_ack_delay;
236
237	struct list_head work_list;
238	atomic_t work_count;
239};
240
241static void cm_work_handler(struct work_struct *work);
242
243static inline void cm_deref_id(struct cm_id_private *cm_id_priv)
244{
245	if (atomic_dec_and_test(&cm_id_priv->refcount))
246		complete(&cm_id_priv->comp);
247}
248
249static int cm_alloc_msg(struct cm_id_private *cm_id_priv,
250			struct ib_mad_send_buf **msg)
251{
252	struct ib_mad_agent *mad_agent;
253	struct ib_mad_send_buf *m;
254	struct ib_ah *ah;
255
256	mad_agent = cm_id_priv->av.port->mad_agent;
257	ah = ib_create_ah(mad_agent->qp->pd, &cm_id_priv->av.ah_attr);
258	if (IS_ERR(ah))
259		return PTR_ERR(ah);
260
261	m = ib_create_send_mad(mad_agent, cm_id_priv->id.remote_cm_qpn,
262			       cm_id_priv->av.pkey_index,
263			       0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
264			       GFP_ATOMIC);
265	if (IS_ERR(m)) {
266		ib_destroy_ah(ah);
267		return PTR_ERR(m);
268	}
269
270	/* Timeout set by caller if response is expected. */
271	m->ah = ah;
272	m->retries = cm_id_priv->max_cm_retries;
273
274	atomic_inc(&cm_id_priv->refcount);
275	m->context[0] = cm_id_priv;
276	*msg = m;
277	return 0;
278}
279
280static int cm_alloc_response_msg(struct cm_port *port,
281				 struct ib_mad_recv_wc *mad_recv_wc,
282				 struct ib_mad_send_buf **msg)
283{
284	struct ib_mad_send_buf *m;
285	struct ib_ah *ah;
286
287	ah = ib_create_ah_from_wc(port->mad_agent->qp->pd, mad_recv_wc->wc,
288				  mad_recv_wc->recv_buf.grh, port->port_num);
289	if (IS_ERR(ah))
290		return PTR_ERR(ah);
291
292	m = ib_create_send_mad(port->mad_agent, 1, mad_recv_wc->wc->pkey_index,
293			       0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
294			       GFP_ATOMIC);
295	if (IS_ERR(m)) {
296		ib_destroy_ah(ah);
297		return PTR_ERR(m);
298	}
299	m->ah = ah;
300	*msg = m;
301	return 0;
302}
303
304static void cm_free_msg(struct ib_mad_send_buf *msg)
305{
306	ib_destroy_ah(msg->ah);
307	if (msg->context[0])
308		cm_deref_id(msg->context[0]);
309	ib_free_send_mad(msg);
310}
311
312static void * cm_copy_private_data(const void *private_data,
313				   u8 private_data_len)
314{
315	void *data;
316
317	if (!private_data || !private_data_len)
318		return NULL;
319
320	data = kmemdup(private_data, private_data_len, GFP_KERNEL);
321	if (!data)
322		return ERR_PTR(-ENOMEM);
323
324	return data;
325}
326
327static void cm_set_private_data(struct cm_id_private *cm_id_priv,
328				 void *private_data, u8 private_data_len)
329{
330	if (cm_id_priv->private_data && cm_id_priv->private_data_len)
331		kfree(cm_id_priv->private_data);
332
333	cm_id_priv->private_data = private_data;
334	cm_id_priv->private_data_len = private_data_len;
335}
336
337static void cm_init_av_for_response(struct cm_port *port, struct ib_wc *wc,
338				    struct ib_grh *grh, struct cm_av *av)
339{
340	av->port = port;
341	av->pkey_index = wc->pkey_index;
342	ib_init_ah_from_wc(port->cm_dev->ib_device, port->port_num, wc,
343			   grh, &av->ah_attr);
344}
345
346static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av)
347{
348	struct cm_device *cm_dev;
349	struct cm_port *port = NULL;
350	unsigned long flags;
351	int ret;
352	u8 p;
353
354	read_lock_irqsave(&cm.device_lock, flags);
355	list_for_each_entry(cm_dev, &cm.device_list, list) {
356		if (!ib_find_cached_gid(cm_dev->ib_device, &path->sgid,
357					&p, NULL)) {
358			port = cm_dev->port[p-1];
359			break;
360		}
361	}
362	read_unlock_irqrestore(&cm.device_lock, flags);
363
364	if (!port)
365		return -EINVAL;
366
367	ret = ib_find_cached_pkey(cm_dev->ib_device, port->port_num,
368				  be16_to_cpu(path->pkey), &av->pkey_index);
369	if (ret)
370		return ret;
371
372	av->port = port;
373	ib_init_ah_from_path(cm_dev->ib_device, port->port_num, path,
374			     &av->ah_attr);
375	av->timeout = path->packet_life_time + 1;
376	return 0;
377}
378
379static int cm_alloc_id(struct cm_id_private *cm_id_priv)
380{
381	unsigned long flags;
382	int ret, id;
383	static int next_id;
384
385	do {
386		spin_lock_irqsave(&cm.lock, flags);
387		ret = idr_get_new_above(&cm.local_id_table, cm_id_priv,
388					next_id, &id);
389		if (!ret)
390			next_id = ((unsigned) id + 1) & MAX_ID_MASK;
391		spin_unlock_irqrestore(&cm.lock, flags);
392	} while( (ret == -EAGAIN) && idr_pre_get(&cm.local_id_table, GFP_KERNEL) );
393
394	cm_id_priv->id.local_id = (__force __be32)id ^ cm.random_id_operand;
395	return ret;
396}
397
398static void cm_free_id(__be32 local_id)
399{
400	spin_lock_irq(&cm.lock);
401	idr_remove(&cm.local_id_table,
402		   (__force int) (local_id ^ cm.random_id_operand));
403	spin_unlock_irq(&cm.lock);
404}
405
406static struct cm_id_private * cm_get_id(__be32 local_id, __be32 remote_id)
407{
408	struct cm_id_private *cm_id_priv;
409
410	cm_id_priv = idr_find(&cm.local_id_table,
411			      (__force int) (local_id ^ cm.random_id_operand));
412	if (cm_id_priv) {
413		if (cm_id_priv->id.remote_id == remote_id)
414			atomic_inc(&cm_id_priv->refcount);
415		else
416			cm_id_priv = NULL;
417	}
418
419	return cm_id_priv;
420}
421
422static struct cm_id_private * cm_acquire_id(__be32 local_id, __be32 remote_id)
423{
424	struct cm_id_private *cm_id_priv;
425
426	spin_lock_irq(&cm.lock);
427	cm_id_priv = cm_get_id(local_id, remote_id);
428	spin_unlock_irq(&cm.lock);
429
430	return cm_id_priv;
431}
432
433static void cm_mask_copy(u8 *dst, u8 *src, u8 *mask)
434{
435	int i;
436
437	for (i = 0; i < IB_CM_COMPARE_SIZE / sizeof(unsigned long); i++)
438		((unsigned long *) dst)[i] = ((unsigned long *) src)[i] &
439					     ((unsigned long *) mask)[i];
440}
441
442static int cm_compare_data(struct ib_cm_compare_data *src_data,
443			   struct ib_cm_compare_data *dst_data)
444{
445	u8 src[IB_CM_COMPARE_SIZE];
446	u8 dst[IB_CM_COMPARE_SIZE];
447
448	if (!src_data || !dst_data)
449		return 0;
450
451	cm_mask_copy(src, src_data->data, dst_data->mask);
452	cm_mask_copy(dst, dst_data->data, src_data->mask);
453	return memcmp(src, dst, IB_CM_COMPARE_SIZE);
454}
455
456static int cm_compare_private_data(u8 *private_data,
457				   struct ib_cm_compare_data *dst_data)
458{
459	u8 src[IB_CM_COMPARE_SIZE];
460
461	if (!dst_data)
462		return 0;
463
464	cm_mask_copy(src, private_data, dst_data->mask);
465	return memcmp(src, dst_data->data, IB_CM_COMPARE_SIZE);
466}
467
468/*
469 * Trivial helpers to strip endian annotation and compare; the
470 * endianness doesn't actually matter since we just need a stable
471 * order for the RB tree.
472 */
473static int be32_lt(__be32 a, __be32 b)
474{
475	return (__force u32) a < (__force u32) b;
476}
477
478static int be32_gt(__be32 a, __be32 b)
479{
480	return (__force u32) a > (__force u32) b;
481}
482
483static int be64_lt(__be64 a, __be64 b)
484{
485	return (__force u64) a < (__force u64) b;
486}
487
488static int be64_gt(__be64 a, __be64 b)
489{
490	return (__force u64) a > (__force u64) b;
491}
492
493static struct cm_id_private * cm_insert_listen(struct cm_id_private *cm_id_priv)
494{
495	struct rb_node **link = &cm.listen_service_table.rb_node;
496	struct rb_node *parent = NULL;
497	struct cm_id_private *cur_cm_id_priv;
498	__be64 service_id = cm_id_priv->id.service_id;
499	__be64 service_mask = cm_id_priv->id.service_mask;
500	int data_cmp;
501
502	while (*link) {
503		parent = *link;
504		cur_cm_id_priv = rb_entry(parent, struct cm_id_private,
505					  service_node);
506		data_cmp = cm_compare_data(cm_id_priv->compare_data,
507					   cur_cm_id_priv->compare_data);
508		if ((cur_cm_id_priv->id.service_mask & service_id) ==
509		    (service_mask & cur_cm_id_priv->id.service_id) &&
510		    (cm_id_priv->id.device == cur_cm_id_priv->id.device) &&
511		    !data_cmp)
512			return cur_cm_id_priv;
513
514		if (cm_id_priv->id.device < cur_cm_id_priv->id.device)
515			link = &(*link)->rb_left;
516		else if (cm_id_priv->id.device > cur_cm_id_priv->id.device)
517			link = &(*link)->rb_right;
518		else if (be64_lt(service_id, cur_cm_id_priv->id.service_id))
519			link = &(*link)->rb_left;
520		else if (be64_gt(service_id, cur_cm_id_priv->id.service_id))
521			link = &(*link)->rb_right;
522		else if (data_cmp < 0)
523			link = &(*link)->rb_left;
524		else
525			link = &(*link)->rb_right;
526	}
527	rb_link_node(&cm_id_priv->service_node, parent, link);
528	rb_insert_color(&cm_id_priv->service_node, &cm.listen_service_table);
529	return NULL;
530}
531
532static struct cm_id_private * cm_find_listen(struct ib_device *device,
533					     __be64 service_id,
534					     u8 *private_data)
535{
536	struct rb_node *node = cm.listen_service_table.rb_node;
537	struct cm_id_private *cm_id_priv;
538	int data_cmp;
539
540	while (node) {
541		cm_id_priv = rb_entry(node, struct cm_id_private, service_node);
542		data_cmp = cm_compare_private_data(private_data,
543						   cm_id_priv->compare_data);
544		if ((cm_id_priv->id.service_mask & service_id) ==
545		     cm_id_priv->id.service_id &&
546		    (cm_id_priv->id.device == device) && !data_cmp)
547			return cm_id_priv;
548
549		if (device < cm_id_priv->id.device)
550			node = node->rb_left;
551		else if (device > cm_id_priv->id.device)
552			node = node->rb_right;
553		else if (be64_lt(service_id, cm_id_priv->id.service_id))
554			node = node->rb_left;
555		else if (be64_gt(service_id, cm_id_priv->id.service_id))
556			node = node->rb_right;
557		else if (data_cmp < 0)
558			node = node->rb_left;
559		else
560			node = node->rb_right;
561	}
562	return NULL;
563}
564
565static struct cm_timewait_info * cm_insert_remote_id(struct cm_timewait_info
566						     *timewait_info)
567{
568	struct rb_node **link = &cm.remote_id_table.rb_node;
569	struct rb_node *parent = NULL;
570	struct cm_timewait_info *cur_timewait_info;
571	__be64 remote_ca_guid = timewait_info->remote_ca_guid;
572	__be32 remote_id = timewait_info->work.remote_id;
573
574	while (*link) {
575		parent = *link;
576		cur_timewait_info = rb_entry(parent, struct cm_timewait_info,
577					     remote_id_node);
578		if (be32_lt(remote_id, cur_timewait_info->work.remote_id))
579			link = &(*link)->rb_left;
580		else if (be32_gt(remote_id, cur_timewait_info->work.remote_id))
581			link = &(*link)->rb_right;
582		else if (be64_lt(remote_ca_guid, cur_timewait_info->remote_ca_guid))
583			link = &(*link)->rb_left;
584		else if (be64_gt(remote_ca_guid, cur_timewait_info->remote_ca_guid))
585			link = &(*link)->rb_right;
586		else
587			return cur_timewait_info;
588	}
589	timewait_info->inserted_remote_id = 1;
590	rb_link_node(&timewait_info->remote_id_node, parent, link);
591	rb_insert_color(&timewait_info->remote_id_node, &cm.remote_id_table);
592	return NULL;
593}
594
595static struct cm_timewait_info * cm_find_remote_id(__be64 remote_ca_guid,
596						   __be32 remote_id)
597{
598	struct rb_node *node = cm.remote_id_table.rb_node;
599	struct cm_timewait_info *timewait_info;
600
601	while (node) {
602		timewait_info = rb_entry(node, struct cm_timewait_info,
603					 remote_id_node);
604		if (be32_lt(remote_id, timewait_info->work.remote_id))
605			node = node->rb_left;
606		else if (be32_gt(remote_id, timewait_info->work.remote_id))
607			node = node->rb_right;
608		else if (be64_lt(remote_ca_guid, timewait_info->remote_ca_guid))
609			node = node->rb_left;
610		else if (be64_gt(remote_ca_guid, timewait_info->remote_ca_guid))
611			node = node->rb_right;
612		else
613			return timewait_info;
614	}
615	return NULL;
616}
617
618static struct cm_timewait_info * cm_insert_remote_qpn(struct cm_timewait_info
619						      *timewait_info)
620{
621	struct rb_node **link = &cm.remote_qp_table.rb_node;
622	struct rb_node *parent = NULL;
623	struct cm_timewait_info *cur_timewait_info;
624	__be64 remote_ca_guid = timewait_info->remote_ca_guid;
625	__be32 remote_qpn = timewait_info->remote_qpn;
626
627	while (*link) {
628		parent = *link;
629		cur_timewait_info = rb_entry(parent, struct cm_timewait_info,
630					     remote_qp_node);
631		if (be32_lt(remote_qpn, cur_timewait_info->remote_qpn))
632			link = &(*link)->rb_left;
633		else if (be32_gt(remote_qpn, cur_timewait_info->remote_qpn))
634			link = &(*link)->rb_right;
635		else if (be64_lt(remote_ca_guid, cur_timewait_info->remote_ca_guid))
636			link = &(*link)->rb_left;
637		else if (be64_gt(remote_ca_guid, cur_timewait_info->remote_ca_guid))
638			link = &(*link)->rb_right;
639		else
640			return cur_timewait_info;
641	}
642	timewait_info->inserted_remote_qp = 1;
643	rb_link_node(&timewait_info->remote_qp_node, parent, link);
644	rb_insert_color(&timewait_info->remote_qp_node, &cm.remote_qp_table);
645	return NULL;
646}
647
648static struct cm_id_private * cm_insert_remote_sidr(struct cm_id_private
649						    *cm_id_priv)
650{
651	struct rb_node **link = &cm.remote_sidr_table.rb_node;
652	struct rb_node *parent = NULL;
653	struct cm_id_private *cur_cm_id_priv;
654	union ib_gid *port_gid = &cm_id_priv->av.dgid;
655	__be32 remote_id = cm_id_priv->id.remote_id;
656
657	while (*link) {
658		parent = *link;
659		cur_cm_id_priv = rb_entry(parent, struct cm_id_private,
660					  sidr_id_node);
661		if (be32_lt(remote_id, cur_cm_id_priv->id.remote_id))
662			link = &(*link)->rb_left;
663		else if (be32_gt(remote_id, cur_cm_id_priv->id.remote_id))
664			link = &(*link)->rb_right;
665		else {
666			int cmp;
667			cmp = memcmp(port_gid, &cur_cm_id_priv->av.dgid,
668				     sizeof *port_gid);
669			if (cmp < 0)
670				link = &(*link)->rb_left;
671			else if (cmp > 0)
672				link = &(*link)->rb_right;
673			else
674				return cur_cm_id_priv;
675		}
676	}
677	rb_link_node(&cm_id_priv->sidr_id_node, parent, link);
678	rb_insert_color(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table);
679	return NULL;
680}
681
682static void cm_reject_sidr_req(struct cm_id_private *cm_id_priv,
683			       enum ib_cm_sidr_status status)
684{
685	struct ib_cm_sidr_rep_param param;
686
687	memset(&param, 0, sizeof param);
688	param.status = status;
689	ib_send_cm_sidr_rep(&cm_id_priv->id, &param);
690}
691
692struct ib_cm_id *ib_create_cm_id(struct ib_device *device,
693				 ib_cm_handler cm_handler,
694				 void *context)
695{
696	struct cm_id_private *cm_id_priv;
697	int ret;
698
699	cm_id_priv = kzalloc(sizeof *cm_id_priv, GFP_KERNEL);
700	if (!cm_id_priv)
701		return ERR_PTR(-ENOMEM);
702
703	cm_id_priv->id.state = IB_CM_IDLE;
704	cm_id_priv->id.device = device;
705	cm_id_priv->id.cm_handler = cm_handler;
706	cm_id_priv->id.context = context;
707	cm_id_priv->id.remote_cm_qpn = 1;
708	ret = cm_alloc_id(cm_id_priv);
709	if (ret)
710		goto error;
711
712	spin_lock_init(&cm_id_priv->lock);
713	init_completion(&cm_id_priv->comp);
714	INIT_LIST_HEAD(&cm_id_priv->work_list);
715	atomic_set(&cm_id_priv->work_count, -1);
716	atomic_set(&cm_id_priv->refcount, 1);
717	return &cm_id_priv->id;
718
719error:
720	kfree(cm_id_priv);
721	return ERR_PTR(-ENOMEM);
722}
723EXPORT_SYMBOL(ib_create_cm_id);
724
725static struct cm_work * cm_dequeue_work(struct cm_id_private *cm_id_priv)
726{
727	struct cm_work *work;
728
729	if (list_empty(&cm_id_priv->work_list))
730		return NULL;
731
732	work = list_entry(cm_id_priv->work_list.next, struct cm_work, list);
733	list_del(&work->list);
734	return work;
735}
736
737static void cm_free_work(struct cm_work *work)
738{
739	if (work->mad_recv_wc)
740		ib_free_recv_mad(work->mad_recv_wc);
741	kfree(work);
742}
743
744static inline int cm_convert_to_ms(int iba_time)
745{
746	/* approximate conversion to ms from 4.096us x 2^iba_time */
747	return 1 << max(iba_time - 8, 0);
748}
749
750/*
751 * calculate: 4.096x2^ack_timeout = 4.096x2^ack_delay + 2x4.096x2^life_time
752 * Because of how ack_timeout is stored, adding one doubles the timeout.
753 * To avoid large timeouts, select the max(ack_delay, life_time + 1), and
754 * increment it (round up) only if the other is within 50%.
755 */
756static u8 cm_ack_timeout(u8 ca_ack_delay, u8 packet_life_time)
757{
758	int ack_timeout = packet_life_time + 1;
759
760	if (ack_timeout >= ca_ack_delay)
761		ack_timeout += (ca_ack_delay >= (ack_timeout - 1));
762	else
763		ack_timeout = ca_ack_delay +
764			      (ack_timeout >= (ca_ack_delay - 1));
765
766	return min(31, ack_timeout);
767}
768
769static void cm_cleanup_timewait(struct cm_timewait_info *timewait_info)
770{
771	if (timewait_info->inserted_remote_id) {
772		rb_erase(&timewait_info->remote_id_node, &cm.remote_id_table);
773		timewait_info->inserted_remote_id = 0;
774	}
775
776	if (timewait_info->inserted_remote_qp) {
777		rb_erase(&timewait_info->remote_qp_node, &cm.remote_qp_table);
778		timewait_info->inserted_remote_qp = 0;
779	}
780}
781
782static struct cm_timewait_info * cm_create_timewait_info(__be32 local_id)
783{
784	struct cm_timewait_info *timewait_info;
785
786	timewait_info = kzalloc(sizeof *timewait_info, GFP_KERNEL);
787	if (!timewait_info)
788		return ERR_PTR(-ENOMEM);
789
790	timewait_info->work.local_id = local_id;
791	INIT_DELAYED_WORK(&timewait_info->work.work, cm_work_handler);
792	timewait_info->work.cm_event.event = IB_CM_TIMEWAIT_EXIT;
793	return timewait_info;
794}
795
796static void cm_enter_timewait(struct cm_id_private *cm_id_priv)
797{
798	int wait_time;
799	unsigned long flags;
800
801	spin_lock_irqsave(&cm.lock, flags);
802	cm_cleanup_timewait(cm_id_priv->timewait_info);
803	list_add_tail(&cm_id_priv->timewait_info->list, &cm.timewait_list);
804	spin_unlock_irqrestore(&cm.lock, flags);
805
806	/*
807	 * The cm_id could be destroyed by the user before we exit timewait.
808	 * To protect against this, we search for the cm_id after exiting
809	 * timewait before notifying the user that we've exited timewait.
810	 */
811	cm_id_priv->id.state = IB_CM_TIMEWAIT;
812	wait_time = cm_convert_to_ms(cm_id_priv->av.timeout);
813	queue_delayed_work(cm.wq, &cm_id_priv->timewait_info->work.work,
814			   msecs_to_jiffies(wait_time));
815	cm_id_priv->timewait_info = NULL;
816}
817
818static void cm_reset_to_idle(struct cm_id_private *cm_id_priv)
819{
820	unsigned long flags;
821
822	cm_id_priv->id.state = IB_CM_IDLE;
823	if (cm_id_priv->timewait_info) {
824		spin_lock_irqsave(&cm.lock, flags);
825		cm_cleanup_timewait(cm_id_priv->timewait_info);
826		spin_unlock_irqrestore(&cm.lock, flags);
827		kfree(cm_id_priv->timewait_info);
828		cm_id_priv->timewait_info = NULL;
829	}
830}
831
832static void cm_destroy_id(struct ib_cm_id *cm_id, int err)
833{
834	struct cm_id_private *cm_id_priv;
835	struct cm_work *work;
836
837	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
838retest:
839	spin_lock_irq(&cm_id_priv->lock);
840	switch (cm_id->state) {
841	case IB_CM_LISTEN:
842		cm_id->state = IB_CM_IDLE;
843		spin_unlock_irq(&cm_id_priv->lock);
844		spin_lock_irq(&cm.lock);
845		rb_erase(&cm_id_priv->service_node, &cm.listen_service_table);
846		spin_unlock_irq(&cm.lock);
847		break;
848	case IB_CM_SIDR_REQ_SENT:
849		cm_id->state = IB_CM_IDLE;
850		ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
851		spin_unlock_irq(&cm_id_priv->lock);
852		break;
853	case IB_CM_SIDR_REQ_RCVD:
854		spin_unlock_irq(&cm_id_priv->lock);
855		cm_reject_sidr_req(cm_id_priv, IB_SIDR_REJECT);
856		break;
857	case IB_CM_REQ_SENT:
858		ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
859		spin_unlock_irq(&cm_id_priv->lock);
860		ib_send_cm_rej(cm_id, IB_CM_REJ_TIMEOUT,
861			       &cm_id_priv->id.device->node_guid,
862			       sizeof cm_id_priv->id.device->node_guid,
863			       NULL, 0);
864		break;
865	case IB_CM_REQ_RCVD:
866		if (err == -ENOMEM) {
867			/* Do not reject to allow future retries. */
868			cm_reset_to_idle(cm_id_priv);
869			spin_unlock_irq(&cm_id_priv->lock);
870		} else {
871			spin_unlock_irq(&cm_id_priv->lock);
872			ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED,
873				       NULL, 0, NULL, 0);
874		}
875		break;
876	case IB_CM_MRA_REQ_RCVD:
877	case IB_CM_REP_SENT:
878	case IB_CM_MRA_REP_RCVD:
879		ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
880		/* Fall through */
881	case IB_CM_MRA_REQ_SENT:
882	case IB_CM_REP_RCVD:
883	case IB_CM_MRA_REP_SENT:
884		spin_unlock_irq(&cm_id_priv->lock);
885		ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED,
886			       NULL, 0, NULL, 0);
887		break;
888	case IB_CM_ESTABLISHED:
889		spin_unlock_irq(&cm_id_priv->lock);
890		ib_send_cm_dreq(cm_id, NULL, 0);
891		goto retest;
892	case IB_CM_DREQ_SENT:
893		ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
894		cm_enter_timewait(cm_id_priv);
895		spin_unlock_irq(&cm_id_priv->lock);
896		break;
897	case IB_CM_DREQ_RCVD:
898		spin_unlock_irq(&cm_id_priv->lock);
899		ib_send_cm_drep(cm_id, NULL, 0);
900		break;
901	default:
902		spin_unlock_irq(&cm_id_priv->lock);
903		break;
904	}
905
906	cm_free_id(cm_id->local_id);
907	cm_deref_id(cm_id_priv);
908	wait_for_completion(&cm_id_priv->comp);
909	while ((work = cm_dequeue_work(cm_id_priv)) != NULL)
910		cm_free_work(work);
911	kfree(cm_id_priv->compare_data);
912	kfree(cm_id_priv->private_data);
913	kfree(cm_id_priv);
914}
915
916void ib_destroy_cm_id(struct ib_cm_id *cm_id)
917{
918	cm_destroy_id(cm_id, 0);
919}
920EXPORT_SYMBOL(ib_destroy_cm_id);
921
922int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id, __be64 service_mask,
923		 struct ib_cm_compare_data *compare_data)
924{
925	struct cm_id_private *cm_id_priv, *cur_cm_id_priv;
926	unsigned long flags;
927	int ret = 0;
928
929	service_mask = service_mask ? service_mask :
930		       __constant_cpu_to_be64(~0ULL);
931	service_id &= service_mask;
932	if ((service_id & IB_SERVICE_ID_AGN_MASK) == IB_CM_ASSIGN_SERVICE_ID &&
933	    (service_id != IB_CM_ASSIGN_SERVICE_ID))
934		return -EINVAL;
935
936	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
937	if (cm_id->state != IB_CM_IDLE)
938		return -EINVAL;
939
940	if (compare_data) {
941		cm_id_priv->compare_data = kzalloc(sizeof *compare_data,
942						   GFP_KERNEL);
943		if (!cm_id_priv->compare_data)
944			return -ENOMEM;
945		cm_mask_copy(cm_id_priv->compare_data->data,
946			     compare_data->data, compare_data->mask);
947		memcpy(cm_id_priv->compare_data->mask, compare_data->mask,
948		       IB_CM_COMPARE_SIZE);
949	}
950
951	cm_id->state = IB_CM_LISTEN;
952
953	spin_lock_irqsave(&cm.lock, flags);
954	if (service_id == IB_CM_ASSIGN_SERVICE_ID) {
955		cm_id->service_id = cpu_to_be64(cm.listen_service_id++);
956		cm_id->service_mask = __constant_cpu_to_be64(~0ULL);
957	} else {
958		cm_id->service_id = service_id;
959		cm_id->service_mask = service_mask;
960	}
961	cur_cm_id_priv = cm_insert_listen(cm_id_priv);
962	spin_unlock_irqrestore(&cm.lock, flags);
963
964	if (cur_cm_id_priv) {
965		cm_id->state = IB_CM_IDLE;
966		kfree(cm_id_priv->compare_data);
967		cm_id_priv->compare_data = NULL;
968		ret = -EBUSY;
969	}
970	return ret;
971}
972EXPORT_SYMBOL(ib_cm_listen);
973
974static __be64 cm_form_tid(struct cm_id_private *cm_id_priv,
975			  enum cm_msg_sequence msg_seq)
976{
977	u64 hi_tid, low_tid;
978
979	hi_tid   = ((u64) cm_id_priv->av.port->mad_agent->hi_tid) << 32;
980	low_tid  = (u64) ((__force u32)cm_id_priv->id.local_id |
981			  (msg_seq << 30));
982	return cpu_to_be64(hi_tid | low_tid);
983}
984
985static void cm_format_mad_hdr(struct ib_mad_hdr *hdr,
986			      __be16 attr_id, __be64 tid)
987{
988	hdr->base_version  = IB_MGMT_BASE_VERSION;
989	hdr->mgmt_class	   = IB_MGMT_CLASS_CM;
990	hdr->class_version = IB_CM_CLASS_VERSION;
991	hdr->method	   = IB_MGMT_METHOD_SEND;
992	hdr->attr_id	   = attr_id;
993	hdr->tid	   = tid;
994}
995
996static void cm_format_req(struct cm_req_msg *req_msg,
997			  struct cm_id_private *cm_id_priv,
998			  struct ib_cm_req_param *param)
999{
1000	struct ib_sa_path_rec *pri_path = param->primary_path;
1001	struct ib_sa_path_rec *alt_path = param->alternate_path;
1002
1003	cm_format_mad_hdr(&req_msg->hdr, CM_REQ_ATTR_ID,
1004			  cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_REQ));
1005
1006	req_msg->local_comm_id = cm_id_priv->id.local_id;
1007	req_msg->service_id = param->service_id;
1008	req_msg->local_ca_guid = cm_id_priv->id.device->node_guid;
1009	cm_req_set_local_qpn(req_msg, cpu_to_be32(param->qp_num));
1010	cm_req_set_resp_res(req_msg, param->responder_resources);
1011	cm_req_set_init_depth(req_msg, param->initiator_depth);
1012	cm_req_set_remote_resp_timeout(req_msg,
1013				       param->remote_cm_response_timeout);
1014	cm_req_set_qp_type(req_msg, param->qp_type);
1015	cm_req_set_flow_ctrl(req_msg, param->flow_control);
1016	cm_req_set_starting_psn(req_msg, cpu_to_be32(param->starting_psn));
1017	cm_req_set_local_resp_timeout(req_msg,
1018				      param->local_cm_response_timeout);
1019	cm_req_set_retry_count(req_msg, param->retry_count);
1020	req_msg->pkey = param->primary_path->pkey;
1021	cm_req_set_path_mtu(req_msg, param->primary_path->mtu);
1022	cm_req_set_rnr_retry_count(req_msg, param->rnr_retry_count);
1023	cm_req_set_max_cm_retries(req_msg, param->max_cm_retries);
1024	cm_req_set_srq(req_msg, param->srq);
1025
1026	if (pri_path->hop_limit <= 1) {
1027		req_msg->primary_local_lid = pri_path->slid;
1028		req_msg->primary_remote_lid = pri_path->dlid;
1029	} else {
1030		/* Work-around until there's a way to obtain remote LID info */
1031		req_msg->primary_local_lid = IB_LID_PERMISSIVE;
1032		req_msg->primary_remote_lid = IB_LID_PERMISSIVE;
1033	}
1034	req_msg->primary_local_gid = pri_path->sgid;
1035	req_msg->primary_remote_gid = pri_path->dgid;
1036	cm_req_set_primary_flow_label(req_msg, pri_path->flow_label);
1037	cm_req_set_primary_packet_rate(req_msg, pri_path->rate);
1038	req_msg->primary_traffic_class = pri_path->traffic_class;
1039	req_msg->primary_hop_limit = pri_path->hop_limit;
1040	cm_req_set_primary_sl(req_msg, pri_path->sl);
1041	cm_req_set_primary_subnet_local(req_msg, (pri_path->hop_limit <= 1));
1042	cm_req_set_primary_local_ack_timeout(req_msg,
1043		cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay,
1044			       pri_path->packet_life_time));
1045
1046	if (alt_path) {
1047		if (alt_path->hop_limit <= 1) {
1048			req_msg->alt_local_lid = alt_path->slid;
1049			req_msg->alt_remote_lid = alt_path->dlid;
1050		} else {
1051			req_msg->alt_local_lid = IB_LID_PERMISSIVE;
1052			req_msg->alt_remote_lid = IB_LID_PERMISSIVE;
1053		}
1054		req_msg->alt_local_gid = alt_path->sgid;
1055		req_msg->alt_remote_gid = alt_path->dgid;
1056		cm_req_set_alt_flow_label(req_msg,
1057					  alt_path->flow_label);
1058		cm_req_set_alt_packet_rate(req_msg, alt_path->rate);
1059		req_msg->alt_traffic_class = alt_path->traffic_class;
1060		req_msg->alt_hop_limit = alt_path->hop_limit;
1061		cm_req_set_alt_sl(req_msg, alt_path->sl);
1062		cm_req_set_alt_subnet_local(req_msg, (alt_path->hop_limit <= 1));
1063		cm_req_set_alt_local_ack_timeout(req_msg,
1064			cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay,
1065				       alt_path->packet_life_time));
1066	}
1067
1068	if (param->private_data && param->private_data_len)
1069		memcpy(req_msg->private_data, param->private_data,
1070		       param->private_data_len);
1071}
1072
1073static int cm_validate_req_param(struct ib_cm_req_param *param)
1074{
1075	/* peer-to-peer not supported */
1076	if (param->peer_to_peer)
1077		return -EINVAL;
1078
1079	if (!param->primary_path)
1080		return -EINVAL;
1081
1082	if (param->qp_type != IB_QPT_RC && param->qp_type != IB_QPT_UC)
1083		return -EINVAL;
1084
1085	if (param->private_data &&
1086	    param->private_data_len > IB_CM_REQ_PRIVATE_DATA_SIZE)
1087		return -EINVAL;
1088
1089	if (param->alternate_path &&
1090	    (param->alternate_path->pkey != param->primary_path->pkey ||
1091	     param->alternate_path->mtu != param->primary_path->mtu))
1092		return -EINVAL;
1093
1094	return 0;
1095}
1096
1097int ib_send_cm_req(struct ib_cm_id *cm_id,
1098		   struct ib_cm_req_param *param)
1099{
1100	struct cm_id_private *cm_id_priv;
1101	struct cm_req_msg *req_msg;
1102	unsigned long flags;
1103	int ret;
1104
1105	ret = cm_validate_req_param(param);
1106	if (ret)
1107		return ret;
1108
1109	/* Verify that we're not in timewait. */
1110	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
1111	spin_lock_irqsave(&cm_id_priv->lock, flags);
1112	if (cm_id->state != IB_CM_IDLE) {
1113		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1114		ret = -EINVAL;
1115		goto out;
1116	}
1117	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1118
1119	cm_id_priv->timewait_info = cm_create_timewait_info(cm_id_priv->
1120							    id.local_id);
1121	if (IS_ERR(cm_id_priv->timewait_info)) {
1122		ret = PTR_ERR(cm_id_priv->timewait_info);
1123		goto out;
1124	}
1125
1126	ret = cm_init_av_by_path(param->primary_path, &cm_id_priv->av);
1127	if (ret)
1128		goto error1;
1129	if (param->alternate_path) {
1130		ret = cm_init_av_by_path(param->alternate_path,
1131					 &cm_id_priv->alt_av);
1132		if (ret)
1133			goto error1;
1134	}
1135	cm_id->service_id = param->service_id;
1136	cm_id->service_mask = __constant_cpu_to_be64(~0ULL);
1137	cm_id_priv->timeout_ms = cm_convert_to_ms(
1138				    param->primary_path->packet_life_time) * 2 +
1139				 cm_convert_to_ms(
1140				    param->remote_cm_response_timeout);
1141	cm_id_priv->max_cm_retries = param->max_cm_retries;
1142	cm_id_priv->initiator_depth = param->initiator_depth;
1143	cm_id_priv->responder_resources = param->responder_resources;
1144	cm_id_priv->retry_count = param->retry_count;
1145	cm_id_priv->path_mtu = param->primary_path->mtu;
1146	cm_id_priv->pkey = param->primary_path->pkey;
1147	cm_id_priv->qp_type = param->qp_type;
1148
1149	ret = cm_alloc_msg(cm_id_priv, &cm_id_priv->msg);
1150	if (ret)
1151		goto error1;
1152
1153	req_msg = (struct cm_req_msg *) cm_id_priv->msg->mad;
1154	cm_format_req(req_msg, cm_id_priv, param);
1155	cm_id_priv->tid = req_msg->hdr.tid;
1156	cm_id_priv->msg->timeout_ms = cm_id_priv->timeout_ms;
1157	cm_id_priv->msg->context[1] = (void *) (unsigned long) IB_CM_REQ_SENT;
1158
1159	cm_id_priv->local_qpn = cm_req_get_local_qpn(req_msg);
1160	cm_id_priv->rq_psn = cm_req_get_starting_psn(req_msg);
1161
1162	spin_lock_irqsave(&cm_id_priv->lock, flags);
1163	ret = ib_post_send_mad(cm_id_priv->msg, NULL);
1164	if (ret) {
1165		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1166		goto error2;
1167	}
1168	BUG_ON(cm_id->state != IB_CM_IDLE);
1169	cm_id->state = IB_CM_REQ_SENT;
1170	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1171	return 0;
1172
1173error2:	cm_free_msg(cm_id_priv->msg);
1174error1:	kfree(cm_id_priv->timewait_info);
1175out:	return ret;
1176}
1177EXPORT_SYMBOL(ib_send_cm_req);
1178
1179static int cm_issue_rej(struct cm_port *port,
1180			struct ib_mad_recv_wc *mad_recv_wc,
1181			enum ib_cm_rej_reason reason,
1182			enum cm_msg_response msg_rejected,
1183			void *ari, u8 ari_length)
1184{
1185	struct ib_mad_send_buf *msg = NULL;
1186	struct cm_rej_msg *rej_msg, *rcv_msg;
1187	int ret;
1188
1189	ret = cm_alloc_response_msg(port, mad_recv_wc, &msg);
1190	if (ret)
1191		return ret;
1192
1193	/* We just need common CM header information.  Cast to any message. */
1194	rcv_msg = (struct cm_rej_msg *) mad_recv_wc->recv_buf.mad;
1195	rej_msg = (struct cm_rej_msg *) msg->mad;
1196
1197	cm_format_mad_hdr(&rej_msg->hdr, CM_REJ_ATTR_ID, rcv_msg->hdr.tid);
1198	rej_msg->remote_comm_id = rcv_msg->local_comm_id;
1199	rej_msg->local_comm_id = rcv_msg->remote_comm_id;
1200	cm_rej_set_msg_rejected(rej_msg, msg_rejected);
1201	rej_msg->reason = cpu_to_be16(reason);
1202
1203	if (ari && ari_length) {
1204		cm_rej_set_reject_info_len(rej_msg, ari_length);
1205		memcpy(rej_msg->ari, ari, ari_length);
1206	}
1207
1208	ret = ib_post_send_mad(msg, NULL);
1209	if (ret)
1210		cm_free_msg(msg);
1211
1212	return ret;
1213}
1214
1215static inline int cm_is_active_peer(__be64 local_ca_guid, __be64 remote_ca_guid,
1216				    __be32 local_qpn, __be32 remote_qpn)
1217{
1218	return (be64_to_cpu(local_ca_guid) > be64_to_cpu(remote_ca_guid) ||
1219		((local_ca_guid == remote_ca_guid) &&
1220		 (be32_to_cpu(local_qpn) > be32_to_cpu(remote_qpn))));
1221}
1222
1223static void cm_format_paths_from_req(struct cm_req_msg *req_msg,
1224					    struct ib_sa_path_rec *primary_path,
1225					    struct ib_sa_path_rec *alt_path)
1226{
1227	memset(primary_path, 0, sizeof *primary_path);
1228	primary_path->dgid = req_msg->primary_local_gid;
1229	primary_path->sgid = req_msg->primary_remote_gid;
1230	primary_path->dlid = req_msg->primary_local_lid;
1231	primary_path->slid = req_msg->primary_remote_lid;
1232	primary_path->flow_label = cm_req_get_primary_flow_label(req_msg);
1233	primary_path->hop_limit = req_msg->primary_hop_limit;
1234	primary_path->traffic_class = req_msg->primary_traffic_class;
1235	primary_path->reversible = 1;
1236	primary_path->pkey = req_msg->pkey;
1237	primary_path->sl = cm_req_get_primary_sl(req_msg);
1238	primary_path->mtu_selector = IB_SA_EQ;
1239	primary_path->mtu = cm_req_get_path_mtu(req_msg);
1240	primary_path->rate_selector = IB_SA_EQ;
1241	primary_path->rate = cm_req_get_primary_packet_rate(req_msg);
1242	primary_path->packet_life_time_selector = IB_SA_EQ;
1243	primary_path->packet_life_time =
1244		cm_req_get_primary_local_ack_timeout(req_msg);
1245	primary_path->packet_life_time -= (primary_path->packet_life_time > 0);
1246
1247	if (req_msg->alt_local_lid) {
1248		memset(alt_path, 0, sizeof *alt_path);
1249		alt_path->dgid = req_msg->alt_local_gid;
1250		alt_path->sgid = req_msg->alt_remote_gid;
1251		alt_path->dlid = req_msg->alt_local_lid;
1252		alt_path->slid = req_msg->alt_remote_lid;
1253		alt_path->flow_label = cm_req_get_alt_flow_label(req_msg);
1254		alt_path->hop_limit = req_msg->alt_hop_limit;
1255		alt_path->traffic_class = req_msg->alt_traffic_class;
1256		alt_path->reversible = 1;
1257		alt_path->pkey = req_msg->pkey;
1258		alt_path->sl = cm_req_get_alt_sl(req_msg);
1259		alt_path->mtu_selector = IB_SA_EQ;
1260		alt_path->mtu = cm_req_get_path_mtu(req_msg);
1261		alt_path->rate_selector = IB_SA_EQ;
1262		alt_path->rate = cm_req_get_alt_packet_rate(req_msg);
1263		alt_path->packet_life_time_selector = IB_SA_EQ;
1264		alt_path->packet_life_time =
1265			cm_req_get_alt_local_ack_timeout(req_msg);
1266		alt_path->packet_life_time -= (alt_path->packet_life_time > 0);
1267	}
1268}
1269
1270static void cm_format_req_event(struct cm_work *work,
1271				struct cm_id_private *cm_id_priv,
1272				struct ib_cm_id *listen_id)
1273{
1274	struct cm_req_msg *req_msg;
1275	struct ib_cm_req_event_param *param;
1276
1277	req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
1278	param = &work->cm_event.param.req_rcvd;
1279	param->listen_id = listen_id;
1280	param->port = cm_id_priv->av.port->port_num;
1281	param->primary_path = &work->path[0];
1282	if (req_msg->alt_local_lid)
1283		param->alternate_path = &work->path[1];
1284	else
1285		param->alternate_path = NULL;
1286	param->remote_ca_guid = req_msg->local_ca_guid;
1287	param->remote_qkey = be32_to_cpu(req_msg->local_qkey);
1288	param->remote_qpn = be32_to_cpu(cm_req_get_local_qpn(req_msg));
1289	param->qp_type = cm_req_get_qp_type(req_msg);
1290	param->starting_psn = be32_to_cpu(cm_req_get_starting_psn(req_msg));
1291	param->responder_resources = cm_req_get_init_depth(req_msg);
1292	param->initiator_depth = cm_req_get_resp_res(req_msg);
1293	param->local_cm_response_timeout =
1294					cm_req_get_remote_resp_timeout(req_msg);
1295	param->flow_control = cm_req_get_flow_ctrl(req_msg);
1296	param->remote_cm_response_timeout =
1297					cm_req_get_local_resp_timeout(req_msg);
1298	param->retry_count = cm_req_get_retry_count(req_msg);
1299	param->rnr_retry_count = cm_req_get_rnr_retry_count(req_msg);
1300	param->srq = cm_req_get_srq(req_msg);
1301	work->cm_event.private_data = &req_msg->private_data;
1302}
1303
1304static void cm_process_work(struct cm_id_private *cm_id_priv,
1305			    struct cm_work *work)
1306{
1307	int ret;
1308
1309	/* We will typically only have the current event to report. */
1310	ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, &work->cm_event);
1311	cm_free_work(work);
1312
1313	while (!ret && !atomic_add_negative(-1, &cm_id_priv->work_count)) {
1314		spin_lock_irq(&cm_id_priv->lock);
1315		work = cm_dequeue_work(cm_id_priv);
1316		spin_unlock_irq(&cm_id_priv->lock);
1317		BUG_ON(!work);
1318		ret = cm_id_priv->id.cm_handler(&cm_id_priv->id,
1319						&work->cm_event);
1320		cm_free_work(work);
1321	}
1322	cm_deref_id(cm_id_priv);
1323	if (ret)
1324		cm_destroy_id(&cm_id_priv->id, ret);
1325}
1326
1327static void cm_format_mra(struct cm_mra_msg *mra_msg,
1328			  struct cm_id_private *cm_id_priv,
1329			  enum cm_msg_response msg_mraed, u8 service_timeout,
1330			  const void *private_data, u8 private_data_len)
1331{
1332	cm_format_mad_hdr(&mra_msg->hdr, CM_MRA_ATTR_ID, cm_id_priv->tid);
1333	cm_mra_set_msg_mraed(mra_msg, msg_mraed);
1334	mra_msg->local_comm_id = cm_id_priv->id.local_id;
1335	mra_msg->remote_comm_id = cm_id_priv->id.remote_id;
1336	cm_mra_set_service_timeout(mra_msg, service_timeout);
1337
1338	if (private_data && private_data_len)
1339		memcpy(mra_msg->private_data, private_data, private_data_len);
1340}
1341
1342static void cm_format_rej(struct cm_rej_msg *rej_msg,
1343			  struct cm_id_private *cm_id_priv,
1344			  enum ib_cm_rej_reason reason,
1345			  void *ari,
1346			  u8 ari_length,
1347			  const void *private_data,
1348			  u8 private_data_len)
1349{
1350	cm_format_mad_hdr(&rej_msg->hdr, CM_REJ_ATTR_ID, cm_id_priv->tid);
1351	rej_msg->remote_comm_id = cm_id_priv->id.remote_id;
1352
1353	switch(cm_id_priv->id.state) {
1354	case IB_CM_REQ_RCVD:
1355		rej_msg->local_comm_id = 0;
1356		cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_REQ);
1357		break;
1358	case IB_CM_MRA_REQ_SENT:
1359		rej_msg->local_comm_id = cm_id_priv->id.local_id;
1360		cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_REQ);
1361		break;
1362	case IB_CM_REP_RCVD:
1363	case IB_CM_MRA_REP_SENT:
1364		rej_msg->local_comm_id = cm_id_priv->id.local_id;
1365		cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_REP);
1366		break;
1367	default:
1368		rej_msg->local_comm_id = cm_id_priv->id.local_id;
1369		cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_OTHER);
1370		break;
1371	}
1372
1373	rej_msg->reason = cpu_to_be16(reason);
1374	if (ari && ari_length) {
1375		cm_rej_set_reject_info_len(rej_msg, ari_length);
1376		memcpy(rej_msg->ari, ari, ari_length);
1377	}
1378
1379	if (private_data && private_data_len)
1380		memcpy(rej_msg->private_data, private_data, private_data_len);
1381}
1382
1383static void cm_dup_req_handler(struct cm_work *work,
1384			       struct cm_id_private *cm_id_priv)
1385{
1386	struct ib_mad_send_buf *msg = NULL;
1387	int ret;
1388
1389	atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
1390			counter[CM_REQ_COUNTER]);
1391
1392	/* Quick state check to discard duplicate REQs. */
1393	if (cm_id_priv->id.state == IB_CM_REQ_RCVD)
1394		return;
1395
1396	ret = cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg);
1397	if (ret)
1398		return;
1399
1400	spin_lock_irq(&cm_id_priv->lock);
1401	switch (cm_id_priv->id.state) {
1402	case IB_CM_MRA_REQ_SENT:
1403		cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
1404			      CM_MSG_RESPONSE_REQ, cm_id_priv->service_timeout,
1405			      cm_id_priv->private_data,
1406			      cm_id_priv->private_data_len);
1407		break;
1408	case IB_CM_TIMEWAIT:
1409		cm_format_rej((struct cm_rej_msg *) msg->mad, cm_id_priv,
1410			      IB_CM_REJ_STALE_CONN, NULL, 0, NULL, 0);
1411		break;
1412	default:
1413		goto unlock;
1414	}
1415	spin_unlock_irq(&cm_id_priv->lock);
1416
1417	ret = ib_post_send_mad(msg, NULL);
1418	if (ret)
1419		goto free;
1420	return;
1421
1422unlock:	spin_unlock_irq(&cm_id_priv->lock);
1423free:	cm_free_msg(msg);
1424}
1425
1426static struct cm_id_private * cm_match_req(struct cm_work *work,
1427					   struct cm_id_private *cm_id_priv)
1428{
1429	struct cm_id_private *listen_cm_id_priv, *cur_cm_id_priv;
1430	struct cm_timewait_info *timewait_info;
1431	struct cm_req_msg *req_msg;
1432
1433	req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
1434
1435	/* Check for possible duplicate REQ. */
1436	spin_lock_irq(&cm.lock);
1437	timewait_info = cm_insert_remote_id(cm_id_priv->timewait_info);
1438	if (timewait_info) {
1439		cur_cm_id_priv = cm_get_id(timewait_info->work.local_id,
1440					   timewait_info->work.remote_id);
1441		spin_unlock_irq(&cm.lock);
1442		if (cur_cm_id_priv) {
1443			cm_dup_req_handler(work, cur_cm_id_priv);
1444			cm_deref_id(cur_cm_id_priv);
1445		}
1446		return NULL;
1447	}
1448
1449	/* Check for stale connections. */
1450	timewait_info = cm_insert_remote_qpn(cm_id_priv->timewait_info);
1451	if (timewait_info) {
1452		cm_cleanup_timewait(cm_id_priv->timewait_info);
1453		spin_unlock_irq(&cm.lock);
1454		cm_issue_rej(work->port, work->mad_recv_wc,
1455			     IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REQ,
1456			     NULL, 0);
1457		return NULL;
1458	}
1459
1460	/* Find matching listen request. */
1461	listen_cm_id_priv = cm_find_listen(cm_id_priv->id.device,
1462					   req_msg->service_id,
1463					   req_msg->private_data);
1464	if (!listen_cm_id_priv) {
1465		cm_cleanup_timewait(cm_id_priv->timewait_info);
1466		spin_unlock_irq(&cm.lock);
1467		cm_issue_rej(work->port, work->mad_recv_wc,
1468			     IB_CM_REJ_INVALID_SERVICE_ID, CM_MSG_RESPONSE_REQ,
1469			     NULL, 0);
1470		goto out;
1471	}
1472	atomic_inc(&listen_cm_id_priv->refcount);
1473	atomic_inc(&cm_id_priv->refcount);
1474	cm_id_priv->id.state = IB_CM_REQ_RCVD;
1475	atomic_inc(&cm_id_priv->work_count);
1476	spin_unlock_irq(&cm.lock);
1477out:
1478	return listen_cm_id_priv;
1479}
1480
1481/*
1482 * Work-around for inter-subnet connections.  If the LIDs are permissive,
1483 * we need to override the LID/SL data in the REQ with the LID information
1484 * in the work completion.
1485 */
1486static void cm_process_routed_req(struct cm_req_msg *req_msg, struct ib_wc *wc)
1487{
1488	if (!cm_req_get_primary_subnet_local(req_msg)) {
1489		if (req_msg->primary_local_lid == IB_LID_PERMISSIVE) {
1490			req_msg->primary_local_lid = cpu_to_be16(wc->slid);
1491			cm_req_set_primary_sl(req_msg, wc->sl);
1492		}
1493
1494		if (req_msg->primary_remote_lid == IB_LID_PERMISSIVE)
1495			req_msg->primary_remote_lid = cpu_to_be16(wc->dlid_path_bits);
1496	}
1497
1498	if (!cm_req_get_alt_subnet_local(req_msg)) {
1499		if (req_msg->alt_local_lid == IB_LID_PERMISSIVE) {
1500			req_msg->alt_local_lid = cpu_to_be16(wc->slid);
1501			cm_req_set_alt_sl(req_msg, wc->sl);
1502		}
1503
1504		if (req_msg->alt_remote_lid == IB_LID_PERMISSIVE)
1505			req_msg->alt_remote_lid = cpu_to_be16(wc->dlid_path_bits);
1506	}
1507}
1508
1509static int cm_req_handler(struct cm_work *work)
1510{
1511	struct ib_cm_id *cm_id;
1512	struct cm_id_private *cm_id_priv, *listen_cm_id_priv;
1513	struct cm_req_msg *req_msg;
1514	int ret;
1515
1516	req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
1517
1518	cm_id = ib_create_cm_id(work->port->cm_dev->ib_device, NULL, NULL);
1519	if (IS_ERR(cm_id))
1520		return PTR_ERR(cm_id);
1521
1522	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
1523	cm_id_priv->id.remote_id = req_msg->local_comm_id;
1524	cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
1525				work->mad_recv_wc->recv_buf.grh,
1526				&cm_id_priv->av);
1527	cm_id_priv->timewait_info = cm_create_timewait_info(cm_id_priv->
1528							    id.local_id);
1529	if (IS_ERR(cm_id_priv->timewait_info)) {
1530		ret = PTR_ERR(cm_id_priv->timewait_info);
1531		goto destroy;
1532	}
1533	cm_id_priv->timewait_info->work.remote_id = req_msg->local_comm_id;
1534	cm_id_priv->timewait_info->remote_ca_guid = req_msg->local_ca_guid;
1535	cm_id_priv->timewait_info->remote_qpn = cm_req_get_local_qpn(req_msg);
1536
1537	listen_cm_id_priv = cm_match_req(work, cm_id_priv);
1538	if (!listen_cm_id_priv) {
1539		ret = -EINVAL;
1540		kfree(cm_id_priv->timewait_info);
1541		goto destroy;
1542	}
1543
1544	cm_id_priv->id.cm_handler = listen_cm_id_priv->id.cm_handler;
1545	cm_id_priv->id.context = listen_cm_id_priv->id.context;
1546	cm_id_priv->id.service_id = req_msg->service_id;
1547	cm_id_priv->id.service_mask = __constant_cpu_to_be64(~0ULL);
1548
1549	cm_process_routed_req(req_msg, work->mad_recv_wc->wc);
1550	cm_format_paths_from_req(req_msg, &work->path[0], &work->path[1]);
1551	ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av);
1552	if (ret) {
1553		ib_get_cached_gid(work->port->cm_dev->ib_device,
1554				  work->port->port_num, 0, &work->path[0].sgid);
1555		ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_GID,
1556			       &work->path[0].sgid, sizeof work->path[0].sgid,
1557			       NULL, 0);
1558		goto rejected;
1559	}
1560	if (req_msg->alt_local_lid) {
1561		ret = cm_init_av_by_path(&work->path[1], &cm_id_priv->alt_av);
1562		if (ret) {
1563			ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_ALT_GID,
1564				       &work->path[0].sgid,
1565				       sizeof work->path[0].sgid, NULL, 0);
1566			goto rejected;
1567		}
1568	}
1569	cm_id_priv->tid = req_msg->hdr.tid;
1570	cm_id_priv->timeout_ms = cm_convert_to_ms(
1571					cm_req_get_local_resp_timeout(req_msg));
1572	cm_id_priv->max_cm_retries = cm_req_get_max_cm_retries(req_msg);
1573	cm_id_priv->remote_qpn = cm_req_get_local_qpn(req_msg);
1574	cm_id_priv->initiator_depth = cm_req_get_resp_res(req_msg);
1575	cm_id_priv->responder_resources = cm_req_get_init_depth(req_msg);
1576	cm_id_priv->path_mtu = cm_req_get_path_mtu(req_msg);
1577	cm_id_priv->pkey = req_msg->pkey;
1578	cm_id_priv->sq_psn = cm_req_get_starting_psn(req_msg);
1579	cm_id_priv->retry_count = cm_req_get_retry_count(req_msg);
1580	cm_id_priv->rnr_retry_count = cm_req_get_rnr_retry_count(req_msg);
1581	cm_id_priv->qp_type = cm_req_get_qp_type(req_msg);
1582
1583	cm_format_req_event(work, cm_id_priv, &listen_cm_id_priv->id);
1584	cm_process_work(cm_id_priv, work);
1585	cm_deref_id(listen_cm_id_priv);
1586	return 0;
1587
1588rejected:
1589	atomic_dec(&cm_id_priv->refcount);
1590	cm_deref_id(listen_cm_id_priv);
1591destroy:
1592	ib_destroy_cm_id(cm_id);
1593	return ret;
1594}
1595
1596static void cm_format_rep(struct cm_rep_msg *rep_msg,
1597			  struct cm_id_private *cm_id_priv,
1598			  struct ib_cm_rep_param *param)
1599{
1600	cm_format_mad_hdr(&rep_msg->hdr, CM_REP_ATTR_ID, cm_id_priv->tid);
1601	rep_msg->local_comm_id = cm_id_priv->id.local_id;
1602	rep_msg->remote_comm_id = cm_id_priv->id.remote_id;
1603	cm_rep_set_local_qpn(rep_msg, cpu_to_be32(param->qp_num));
1604	cm_rep_set_starting_psn(rep_msg, cpu_to_be32(param->starting_psn));
1605	rep_msg->resp_resources = param->responder_resources;
1606	rep_msg->initiator_depth = param->initiator_depth;
1607	cm_rep_set_target_ack_delay(rep_msg,
1608				    cm_id_priv->av.port->cm_dev->ack_delay);
1609	cm_rep_set_failover(rep_msg, param->failover_accepted);
1610	cm_rep_set_flow_ctrl(rep_msg, param->flow_control);
1611	cm_rep_set_rnr_retry_count(rep_msg, param->rnr_retry_count);
1612	cm_rep_set_srq(rep_msg, param->srq);
1613	rep_msg->local_ca_guid = cm_id_priv->id.device->node_guid;
1614
1615	if (param->private_data && param->private_data_len)
1616		memcpy(rep_msg->private_data, param->private_data,
1617		       param->private_data_len);
1618}
1619
1620int ib_send_cm_rep(struct ib_cm_id *cm_id,
1621		   struct ib_cm_rep_param *param)
1622{
1623	struct cm_id_private *cm_id_priv;
1624	struct ib_mad_send_buf *msg;
1625	struct cm_rep_msg *rep_msg;
1626	unsigned long flags;
1627	int ret;
1628
1629	if (param->private_data &&
1630	    param->private_data_len > IB_CM_REP_PRIVATE_DATA_SIZE)
1631		return -EINVAL;
1632
1633	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
1634	spin_lock_irqsave(&cm_id_priv->lock, flags);
1635	if (cm_id->state != IB_CM_REQ_RCVD &&
1636	    cm_id->state != IB_CM_MRA_REQ_SENT) {
1637		ret = -EINVAL;
1638		goto out;
1639	}
1640
1641	ret = cm_alloc_msg(cm_id_priv, &msg);
1642	if (ret)
1643		goto out;
1644
1645	rep_msg = (struct cm_rep_msg *) msg->mad;
1646	cm_format_rep(rep_msg, cm_id_priv, param);
1647	msg->timeout_ms = cm_id_priv->timeout_ms;
1648	msg->context[1] = (void *) (unsigned long) IB_CM_REP_SENT;
1649
1650	ret = ib_post_send_mad(msg, NULL);
1651	if (ret) {
1652		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1653		cm_free_msg(msg);
1654		return ret;
1655	}
1656
1657	cm_id->state = IB_CM_REP_SENT;
1658	cm_id_priv->msg = msg;
1659	cm_id_priv->initiator_depth = param->initiator_depth;
1660	cm_id_priv->responder_resources = param->responder_resources;
1661	cm_id_priv->rq_psn = cm_rep_get_starting_psn(rep_msg);
1662	cm_id_priv->local_qpn = cm_rep_get_local_qpn(rep_msg);
1663
1664out:	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1665	return ret;
1666}
1667EXPORT_SYMBOL(ib_send_cm_rep);
1668
1669static void cm_format_rtu(struct cm_rtu_msg *rtu_msg,
1670			  struct cm_id_private *cm_id_priv,
1671			  const void *private_data,
1672			  u8 private_data_len)
1673{
1674	cm_format_mad_hdr(&rtu_msg->hdr, CM_RTU_ATTR_ID, cm_id_priv->tid);
1675	rtu_msg->local_comm_id = cm_id_priv->id.local_id;
1676	rtu_msg->remote_comm_id = cm_id_priv->id.remote_id;
1677
1678	if (private_data && private_data_len)
1679		memcpy(rtu_msg->private_data, private_data, private_data_len);
1680}
1681
1682int ib_send_cm_rtu(struct ib_cm_id *cm_id,
1683		   const void *private_data,
1684		   u8 private_data_len)
1685{
1686	struct cm_id_private *cm_id_priv;
1687	struct ib_mad_send_buf *msg;
1688	unsigned long flags;
1689	void *data;
1690	int ret;
1691
1692	if (private_data && private_data_len > IB_CM_RTU_PRIVATE_DATA_SIZE)
1693		return -EINVAL;
1694
1695	data = cm_copy_private_data(private_data, private_data_len);
1696	if (IS_ERR(data))
1697		return PTR_ERR(data);
1698
1699	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
1700	spin_lock_irqsave(&cm_id_priv->lock, flags);
1701	if (cm_id->state != IB_CM_REP_RCVD &&
1702	    cm_id->state != IB_CM_MRA_REP_SENT) {
1703		ret = -EINVAL;
1704		goto error;
1705	}
1706
1707	ret = cm_alloc_msg(cm_id_priv, &msg);
1708	if (ret)
1709		goto error;
1710
1711	cm_format_rtu((struct cm_rtu_msg *) msg->mad, cm_id_priv,
1712		      private_data, private_data_len);
1713
1714	ret = ib_post_send_mad(msg, NULL);
1715	if (ret) {
1716		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1717		cm_free_msg(msg);
1718		kfree(data);
1719		return ret;
1720	}
1721
1722	cm_id->state = IB_CM_ESTABLISHED;
1723	cm_set_private_data(cm_id_priv, data, private_data_len);
1724	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1725	return 0;
1726
1727error:	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1728	kfree(data);
1729	return ret;
1730}
1731EXPORT_SYMBOL(ib_send_cm_rtu);
1732
1733static void cm_format_rep_event(struct cm_work *work)
1734{
1735	struct cm_rep_msg *rep_msg;
1736	struct ib_cm_rep_event_param *param;
1737
1738	rep_msg = (struct cm_rep_msg *)work->mad_recv_wc->recv_buf.mad;
1739	param = &work->cm_event.param.rep_rcvd;
1740	param->remote_ca_guid = rep_msg->local_ca_guid;
1741	param->remote_qkey = be32_to_cpu(rep_msg->local_qkey);
1742	param->remote_qpn = be32_to_cpu(cm_rep_get_local_qpn(rep_msg));
1743	param->starting_psn = be32_to_cpu(cm_rep_get_starting_psn(rep_msg));
1744	param->responder_resources = rep_msg->initiator_depth;
1745	param->initiator_depth = rep_msg->resp_resources;
1746	param->target_ack_delay = cm_rep_get_target_ack_delay(rep_msg);
1747	param->failover_accepted = cm_rep_get_failover(rep_msg);
1748	param->flow_control = cm_rep_get_flow_ctrl(rep_msg);
1749	param->rnr_retry_count = cm_rep_get_rnr_retry_count(rep_msg);
1750	param->srq = cm_rep_get_srq(rep_msg);
1751	work->cm_event.private_data = &rep_msg->private_data;
1752}
1753
1754static void cm_dup_rep_handler(struct cm_work *work)
1755{
1756	struct cm_id_private *cm_id_priv;
1757	struct cm_rep_msg *rep_msg;
1758	struct ib_mad_send_buf *msg = NULL;
1759	int ret;
1760
1761	rep_msg = (struct cm_rep_msg *) work->mad_recv_wc->recv_buf.mad;
1762	cm_id_priv = cm_acquire_id(rep_msg->remote_comm_id,
1763				   rep_msg->local_comm_id);
1764	if (!cm_id_priv)
1765		return;
1766
1767	atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
1768			counter[CM_REP_COUNTER]);
1769	ret = cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg);
1770	if (ret)
1771		goto deref;
1772
1773	spin_lock_irq(&cm_id_priv->lock);
1774	if (cm_id_priv->id.state == IB_CM_ESTABLISHED)
1775		cm_format_rtu((struct cm_rtu_msg *) msg->mad, cm_id_priv,
1776			      cm_id_priv->private_data,
1777			      cm_id_priv->private_data_len);
1778	else if (cm_id_priv->id.state == IB_CM_MRA_REP_SENT)
1779		cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
1780			      CM_MSG_RESPONSE_REP, cm_id_priv->service_timeout,
1781			      cm_id_priv->private_data,
1782			      cm_id_priv->private_data_len);
1783	else
1784		goto unlock;
1785	spin_unlock_irq(&cm_id_priv->lock);
1786
1787	ret = ib_post_send_mad(msg, NULL);
1788	if (ret)
1789		goto free;
1790	goto deref;
1791
1792unlock:	spin_unlock_irq(&cm_id_priv->lock);
1793free:	cm_free_msg(msg);
1794deref:	cm_deref_id(cm_id_priv);
1795}
1796
1797static int cm_rep_handler(struct cm_work *work)
1798{
1799	struct cm_id_private *cm_id_priv;
1800	struct cm_rep_msg *rep_msg;
1801	int ret;
1802
1803	rep_msg = (struct cm_rep_msg *)work->mad_recv_wc->recv_buf.mad;
1804	cm_id_priv = cm_acquire_id(rep_msg->remote_comm_id, 0);
1805	if (!cm_id_priv) {
1806		cm_dup_rep_handler(work);
1807		return -EINVAL;
1808	}
1809
1810	cm_format_rep_event(work);
1811
1812	spin_lock_irq(&cm_id_priv->lock);
1813	switch (cm_id_priv->id.state) {
1814	case IB_CM_REQ_SENT:
1815	case IB_CM_MRA_REQ_RCVD:
1816		break;
1817	default:
1818		spin_unlock_irq(&cm_id_priv->lock);
1819		ret = -EINVAL;
1820		goto error;
1821	}
1822
1823	cm_id_priv->timewait_info->work.remote_id = rep_msg->local_comm_id;
1824	cm_id_priv->timewait_info->remote_ca_guid = rep_msg->local_ca_guid;
1825	cm_id_priv->timewait_info->remote_qpn = cm_rep_get_local_qpn(rep_msg);
1826
1827	spin_lock(&cm.lock);
1828	/* Check for duplicate REP. */
1829	if (cm_insert_remote_id(cm_id_priv->timewait_info)) {
1830		spin_unlock(&cm.lock);
1831		spin_unlock_irq(&cm_id_priv->lock);
1832		ret = -EINVAL;
1833		goto error;
1834	}
1835	/* Check for a stale connection. */
1836	if (cm_insert_remote_qpn(cm_id_priv->timewait_info)) {
1837		rb_erase(&cm_id_priv->timewait_info->remote_id_node,
1838			 &cm.remote_id_table);
1839		cm_id_priv->timewait_info->inserted_remote_id = 0;
1840		spin_unlock(&cm.lock);
1841		spin_unlock_irq(&cm_id_priv->lock);
1842		cm_issue_rej(work->port, work->mad_recv_wc,
1843			     IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REP,
1844			     NULL, 0);
1845		ret = -EINVAL;
1846		goto error;
1847	}
1848	spin_unlock(&cm.lock);
1849
1850	cm_id_priv->id.state = IB_CM_REP_RCVD;
1851	cm_id_priv->id.remote_id = rep_msg->local_comm_id;
1852	cm_id_priv->remote_qpn = cm_rep_get_local_qpn(rep_msg);
1853	cm_id_priv->initiator_depth = rep_msg->resp_resources;
1854	cm_id_priv->responder_resources = rep_msg->initiator_depth;
1855	cm_id_priv->sq_psn = cm_rep_get_starting_psn(rep_msg);
1856	cm_id_priv->rnr_retry_count = cm_rep_get_rnr_retry_count(rep_msg);
1857	cm_id_priv->target_ack_delay = cm_rep_get_target_ack_delay(rep_msg);
1858	cm_id_priv->av.timeout =
1859			cm_ack_timeout(cm_id_priv->target_ack_delay,
1860				       cm_id_priv->av.timeout - 1);
1861	cm_id_priv->alt_av.timeout =
1862			cm_ack_timeout(cm_id_priv->target_ack_delay,
1863				       cm_id_priv->alt_av.timeout - 1);
1864
1865	/* todo: handle peer_to_peer */
1866
1867	ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
1868	ret = atomic_inc_and_test(&cm_id_priv->work_count);
1869	if (!ret)
1870		list_add_tail(&work->list, &cm_id_priv->work_list);
1871	spin_unlock_irq(&cm_id_priv->lock);
1872
1873	if (ret)
1874		cm_process_work(cm_id_priv, work);
1875	else
1876		cm_deref_id(cm_id_priv);
1877	return 0;
1878
1879error:
1880	cm_deref_id(cm_id_priv);
1881	return ret;
1882}
1883
1884static int cm_establish_handler(struct cm_work *work)
1885{
1886	struct cm_id_private *cm_id_priv;
1887	int ret;
1888
1889	/* See comment in cm_establish about lookup. */
1890	cm_id_priv = cm_acquire_id(work->local_id, work->remote_id);
1891	if (!cm_id_priv)
1892		return -EINVAL;
1893
1894	spin_lock_irq(&cm_id_priv->lock);
1895	if (cm_id_priv->id.state != IB_CM_ESTABLISHED) {
1896		spin_unlock_irq(&cm_id_priv->lock);
1897		goto out;
1898	}
1899
1900	ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
1901	ret = atomic_inc_and_test(&cm_id_priv->work_count);
1902	if (!ret)
1903		list_add_tail(&work->list, &cm_id_priv->work_list);
1904	spin_unlock_irq(&cm_id_priv->lock);
1905
1906	if (ret)
1907		cm_process_work(cm_id_priv, work);
1908	else
1909		cm_deref_id(cm_id_priv);
1910	return 0;
1911out:
1912	cm_deref_id(cm_id_priv);
1913	return -EINVAL;
1914}
1915
1916static int cm_rtu_handler(struct cm_work *work)
1917{
1918	struct cm_id_private *cm_id_priv;
1919	struct cm_rtu_msg *rtu_msg;
1920	int ret;
1921
1922	rtu_msg = (struct cm_rtu_msg *)work->mad_recv_wc->recv_buf.mad;
1923	cm_id_priv = cm_acquire_id(rtu_msg->remote_comm_id,
1924				   rtu_msg->local_comm_id);
1925	if (!cm_id_priv)
1926		return -EINVAL;
1927
1928	work->cm_event.private_data = &rtu_msg->private_data;
1929
1930	spin_lock_irq(&cm_id_priv->lock);
1931	if (cm_id_priv->id.state != IB_CM_REP_SENT &&
1932	    cm_id_priv->id.state != IB_CM_MRA_REP_RCVD) {
1933		spin_unlock_irq(&cm_id_priv->lock);
1934		atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
1935				counter[CM_RTU_COUNTER]);
1936		goto out;
1937	}
1938	cm_id_priv->id.state = IB_CM_ESTABLISHED;
1939
1940	ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
1941	ret = atomic_inc_and_test(&cm_id_priv->work_count);
1942	if (!ret)
1943		list_add_tail(&work->list, &cm_id_priv->work_list);
1944	spin_unlock_irq(&cm_id_priv->lock);
1945
1946	if (ret)
1947		cm_process_work(cm_id_priv, work);
1948	else
1949		cm_deref_id(cm_id_priv);
1950	return 0;
1951out:
1952	cm_deref_id(cm_id_priv);
1953	return -EINVAL;
1954}
1955
1956static void cm_format_dreq(struct cm_dreq_msg *dreq_msg,
1957			  struct cm_id_private *cm_id_priv,
1958			  const void *private_data,
1959			  u8 private_data_len)
1960{
1961	cm_format_mad_hdr(&dreq_msg->hdr, CM_DREQ_ATTR_ID,
1962			  cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_DREQ));
1963	dreq_msg->local_comm_id = cm_id_priv->id.local_id;
1964	dreq_msg->remote_comm_id = cm_id_priv->id.remote_id;
1965	cm_dreq_set_remote_qpn(dreq_msg, cm_id_priv->remote_qpn);
1966
1967	if (private_data && private_data_len)
1968		memcpy(dreq_msg->private_data, private_data, private_data_len);
1969}
1970
1971int ib_send_cm_dreq(struct ib_cm_id *cm_id,
1972		    const void *private_data,
1973		    u8 private_data_len)
1974{
1975	struct cm_id_private *cm_id_priv;
1976	struct ib_mad_send_buf *msg;
1977	unsigned long flags;
1978	int ret;
1979
1980	if (private_data && private_data_len > IB_CM_DREQ_PRIVATE_DATA_SIZE)
1981		return -EINVAL;
1982
1983	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
1984	spin_lock_irqsave(&cm_id_priv->lock, flags);
1985	if (cm_id->state != IB_CM_ESTABLISHED) {
1986		ret = -EINVAL;
1987		goto out;
1988	}
1989
1990	ret = cm_alloc_msg(cm_id_priv, &msg);
1991	if (ret) {
1992		cm_enter_timewait(cm_id_priv);
1993		goto out;
1994	}
1995
1996	cm_format_dreq((struct cm_dreq_msg *) msg->mad, cm_id_priv,
1997		       private_data, private_data_len);
1998	msg->timeout_ms = cm_id_priv->timeout_ms;
1999	msg->context[1] = (void *) (unsigned long) IB_CM_DREQ_SENT;
2000
2001	ret = ib_post_send_mad(msg, NULL);
2002	if (ret) {
2003		cm_enter_timewait(cm_id_priv);
2004		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2005		cm_free_msg(msg);
2006		return ret;
2007	}
2008
2009	cm_id->state = IB_CM_DREQ_SENT;
2010	cm_id_priv->msg = msg;
2011out:	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2012	return ret;
2013}
2014EXPORT_SYMBOL(ib_send_cm_dreq);
2015
2016static void cm_format_drep(struct cm_drep_msg *drep_msg,
2017			  struct cm_id_private *cm_id_priv,
2018			  const void *private_data,
2019			  u8 private_data_len)
2020{
2021	cm_format_mad_hdr(&drep_msg->hdr, CM_DREP_ATTR_ID, cm_id_priv->tid);
2022	drep_msg->local_comm_id = cm_id_priv->id.local_id;
2023	drep_msg->remote_comm_id = cm_id_priv->id.remote_id;
2024
2025	if (private_data && private_data_len)
2026		memcpy(drep_msg->private_data, private_data, private_data_len);
2027}
2028
2029int ib_send_cm_drep(struct ib_cm_id *cm_id,
2030		    const void *private_data,
2031		    u8 private_data_len)
2032{
2033	struct cm_id_private *cm_id_priv;
2034	struct ib_mad_send_buf *msg;
2035	unsigned long flags;
2036	void *data;
2037	int ret;
2038
2039	if (private_data && private_data_len > IB_CM_DREP_PRIVATE_DATA_SIZE)
2040		return -EINVAL;
2041
2042	data = cm_copy_private_data(private_data, private_data_len);
2043	if (IS_ERR(data))
2044		return PTR_ERR(data);
2045
2046	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2047	spin_lock_irqsave(&cm_id_priv->lock, flags);
2048	if (cm_id->state != IB_CM_DREQ_RCVD) {
2049		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2050		kfree(data);
2051		return -EINVAL;
2052	}
2053
2054	cm_set_private_data(cm_id_priv, data, private_data_len);
2055	cm_enter_timewait(cm_id_priv);
2056
2057	ret = cm_alloc_msg(cm_id_priv, &msg);
2058	if (ret)
2059		goto out;
2060
2061	cm_format_drep((struct cm_drep_msg *) msg->mad, cm_id_priv,
2062		       private_data, private_data_len);
2063
2064	ret = ib_post_send_mad(msg, NULL);
2065	if (ret) {
2066		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2067		cm_free_msg(msg);
2068		return ret;
2069	}
2070
2071out:	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2072	return ret;
2073}
2074EXPORT_SYMBOL(ib_send_cm_drep);
2075
2076static int cm_issue_drep(struct cm_port *port,
2077			 struct ib_mad_recv_wc *mad_recv_wc)
2078{
2079	struct ib_mad_send_buf *msg = NULL;
2080	struct cm_dreq_msg *dreq_msg;
2081	struct cm_drep_msg *drep_msg;
2082	int ret;
2083
2084	ret = cm_alloc_response_msg(port, mad_recv_wc, &msg);
2085	if (ret)
2086		return ret;
2087
2088	dreq_msg = (struct cm_dreq_msg *) mad_recv_wc->recv_buf.mad;
2089	drep_msg = (struct cm_drep_msg *) msg->mad;
2090
2091	cm_format_mad_hdr(&drep_msg->hdr, CM_DREP_ATTR_ID, dreq_msg->hdr.tid);
2092	drep_msg->remote_comm_id = dreq_msg->local_comm_id;
2093	drep_msg->local_comm_id = dreq_msg->remote_comm_id;
2094
2095	ret = ib_post_send_mad(msg, NULL);
2096	if (ret)
2097		cm_free_msg(msg);
2098
2099	return ret;
2100}
2101
2102static int cm_dreq_handler(struct cm_work *work)
2103{
2104	struct cm_id_private *cm_id_priv;
2105	struct cm_dreq_msg *dreq_msg;
2106	struct ib_mad_send_buf *msg = NULL;
2107	int ret;
2108
2109	dreq_msg = (struct cm_dreq_msg *)work->mad_recv_wc->recv_buf.mad;
2110	cm_id_priv = cm_acquire_id(dreq_msg->remote_comm_id,
2111				   dreq_msg->local_comm_id);
2112	if (!cm_id_priv) {
2113		atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2114				counter[CM_DREQ_COUNTER]);
2115		cm_issue_drep(work->port, work->mad_recv_wc);
2116		return -EINVAL;
2117	}
2118
2119	work->cm_event.private_data = &dreq_msg->private_data;
2120
2121	spin_lock_irq(&cm_id_priv->lock);
2122	if (cm_id_priv->local_qpn != cm_dreq_get_remote_qpn(dreq_msg))
2123		goto unlock;
2124
2125	switch (cm_id_priv->id.state) {
2126	case IB_CM_REP_SENT:
2127	case IB_CM_DREQ_SENT:
2128		ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2129		break;
2130	case IB_CM_ESTABLISHED:
2131	case IB_CM_MRA_REP_RCVD:
2132		break;
2133	case IB_CM_TIMEWAIT:
2134		atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2135				counter[CM_DREQ_COUNTER]);
2136		if (cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg))
2137			goto unlock;
2138
2139		cm_format_drep((struct cm_drep_msg *) msg->mad, cm_id_priv,
2140			       cm_id_priv->private_data,
2141			       cm_id_priv->private_data_len);
2142		spin_unlock_irq(&cm_id_priv->lock);
2143
2144		if (ib_post_send_mad(msg, NULL))
2145			cm_free_msg(msg);
2146		goto deref;
2147	case IB_CM_DREQ_RCVD:
2148		atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2149				counter[CM_DREQ_COUNTER]);
2150		goto unlock;
2151	default:
2152		goto unlock;
2153	}
2154	cm_id_priv->id.state = IB_CM_DREQ_RCVD;
2155	cm_id_priv->tid = dreq_msg->hdr.tid;
2156	ret = atomic_inc_and_test(&cm_id_priv->work_count);
2157	if (!ret)
2158		list_add_tail(&work->list, &cm_id_priv->work_list);
2159	spin_unlock_irq(&cm_id_priv->lock);
2160
2161	if (ret)
2162		cm_process_work(cm_id_priv, work);
2163	else
2164		cm_deref_id(cm_id_priv);
2165	return 0;
2166
2167unlock:	spin_unlock_irq(&cm_id_priv->lock);
2168deref:	cm_deref_id(cm_id_priv);
2169	return -EINVAL;
2170}
2171
2172static int cm_drep_handler(struct cm_work *work)
2173{
2174	struct cm_id_private *cm_id_priv;
2175	struct cm_drep_msg *drep_msg;
2176	int ret;
2177
2178	drep_msg = (struct cm_drep_msg *)work->mad_recv_wc->recv_buf.mad;
2179	cm_id_priv = cm_acquire_id(drep_msg->remote_comm_id,
2180				   drep_msg->local_comm_id);
2181	if (!cm_id_priv)
2182		return -EINVAL;
2183
2184	work->cm_event.private_data = &drep_msg->private_data;
2185
2186	spin_lock_irq(&cm_id_priv->lock);
2187	if (cm_id_priv->id.state != IB_CM_DREQ_SENT &&
2188	    cm_id_priv->id.state != IB_CM_DREQ_RCVD) {
2189		spin_unlock_irq(&cm_id_priv->lock);
2190		goto out;
2191	}
2192	cm_enter_timewait(cm_id_priv);
2193
2194	ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2195	ret = atomic_inc_and_test(&cm_id_priv->work_count);
2196	if (!ret)
2197		list_add_tail(&work->list, &cm_id_priv->work_list);
2198	spin_unlock_irq(&cm_id_priv->lock);
2199
2200	if (ret)
2201		cm_process_work(cm_id_priv, work);
2202	else
2203		cm_deref_id(cm_id_priv);
2204	return 0;
2205out:
2206	cm_deref_id(cm_id_priv);
2207	return -EINVAL;
2208}
2209
2210int ib_send_cm_rej(struct ib_cm_id *cm_id,
2211		   enum ib_cm_rej_reason reason,
2212		   void *ari,
2213		   u8 ari_length,
2214		   const void *private_data,
2215		   u8 private_data_len)
2216{
2217	struct cm_id_private *cm_id_priv;
2218	struct ib_mad_send_buf *msg;
2219	unsigned long flags;
2220	int ret;
2221
2222	if ((private_data && private_data_len > IB_CM_REJ_PRIVATE_DATA_SIZE) ||
2223	    (ari && ari_length > IB_CM_REJ_ARI_LENGTH))
2224		return -EINVAL;
2225
2226	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2227
2228	spin_lock_irqsave(&cm_id_priv->lock, flags);
2229	switch (cm_id->state) {
2230	case IB_CM_REQ_SENT:
2231	case IB_CM_MRA_REQ_RCVD:
2232	case IB_CM_REQ_RCVD:
2233	case IB_CM_MRA_REQ_SENT:
2234	case IB_CM_REP_RCVD:
2235	case IB_CM_MRA_REP_SENT:
2236		ret = cm_alloc_msg(cm_id_priv, &msg);
2237		if (!ret)
2238			cm_format_rej((struct cm_rej_msg *) msg->mad,
2239				      cm_id_priv, reason, ari, ari_length,
2240				      private_data, private_data_len);
2241
2242		cm_reset_to_idle(cm_id_priv);
2243		break;
2244	case IB_CM_REP_SENT:
2245	case IB_CM_MRA_REP_RCVD:
2246		ret = cm_alloc_msg(cm_id_priv, &msg);
2247		if (!ret)
2248			cm_format_rej((struct cm_rej_msg *) msg->mad,
2249				      cm_id_priv, reason, ari, ari_length,
2250				      private_data, private_data_len);
2251
2252		cm_enter_timewait(cm_id_priv);
2253		break;
2254	default:
2255		ret = -EINVAL;
2256		goto out;
2257	}
2258
2259	if (ret)
2260		goto out;
2261
2262	ret = ib_post_send_mad(msg, NULL);
2263	if (ret)
2264		cm_free_msg(msg);
2265
2266out:	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2267	return ret;
2268}
2269EXPORT_SYMBOL(ib_send_cm_rej);
2270
2271static void cm_format_rej_event(struct cm_work *work)
2272{
2273	struct cm_rej_msg *rej_msg;
2274	struct ib_cm_rej_event_param *param;
2275
2276	rej_msg = (struct cm_rej_msg *)work->mad_recv_wc->recv_buf.mad;
2277	param = &work->cm_event.param.rej_rcvd;
2278	param->ari = rej_msg->ari;
2279	param->ari_length = cm_rej_get_reject_info_len(rej_msg);
2280	param->reason = __be16_to_cpu(rej_msg->reason);
2281	work->cm_event.private_data = &rej_msg->private_data;
2282}
2283
2284static struct cm_id_private * cm_acquire_rejected_id(struct cm_rej_msg *rej_msg)
2285{
2286	struct cm_timewait_info *timewait_info;
2287	struct cm_id_private *cm_id_priv;
2288	__be32 remote_id;
2289
2290	remote_id = rej_msg->local_comm_id;
2291
2292	if (__be16_to_cpu(rej_msg->reason) == IB_CM_REJ_TIMEOUT) {
2293		spin_lock_irq(&cm.lock);
2294		timewait_info = cm_find_remote_id( *((__be64 *) rej_msg->ari),
2295						  remote_id);
2296		if (!timewait_info) {
2297			spin_unlock_irq(&cm.lock);
2298			return NULL;
2299		}
2300		cm_id_priv = idr_find(&cm.local_id_table, (__force int)
2301				      (timewait_info->work.local_id ^
2302				       cm.random_id_operand));
2303		if (cm_id_priv) {
2304			if (cm_id_priv->id.remote_id == remote_id)
2305				atomic_inc(&cm_id_priv->refcount);
2306			else
2307				cm_id_priv = NULL;
2308		}
2309		spin_unlock_irq(&cm.lock);
2310	} else if (cm_rej_get_msg_rejected(rej_msg) == CM_MSG_RESPONSE_REQ)
2311		cm_id_priv = cm_acquire_id(rej_msg->remote_comm_id, 0);
2312	else
2313		cm_id_priv = cm_acquire_id(rej_msg->remote_comm_id, remote_id);
2314
2315	return cm_id_priv;
2316}
2317
2318static int cm_rej_handler(struct cm_work *work)
2319{
2320	struct cm_id_private *cm_id_priv;
2321	struct cm_rej_msg *rej_msg;
2322	int ret;
2323
2324	rej_msg = (struct cm_rej_msg *)work->mad_recv_wc->recv_buf.mad;
2325	cm_id_priv = cm_acquire_rejected_id(rej_msg);
2326	if (!cm_id_priv)
2327		return -EINVAL;
2328
2329	cm_format_rej_event(work);
2330
2331	spin_lock_irq(&cm_id_priv->lock);
2332	switch (cm_id_priv->id.state) {
2333	case IB_CM_REQ_SENT:
2334	case IB_CM_MRA_REQ_RCVD:
2335	case IB_CM_REP_SENT:
2336	case IB_CM_MRA_REP_RCVD:
2337		ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2338		/* fall through */
2339	case IB_CM_REQ_RCVD:
2340	case IB_CM_MRA_REQ_SENT:
2341		if (__be16_to_cpu(rej_msg->reason) == IB_CM_REJ_STALE_CONN)
2342			cm_enter_timewait(cm_id_priv);
2343		else
2344			cm_reset_to_idle(cm_id_priv);
2345		break;
2346	case IB_CM_DREQ_SENT:
2347		ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2348		/* fall through */
2349	case IB_CM_REP_RCVD:
2350	case IB_CM_MRA_REP_SENT:
2351	case IB_CM_ESTABLISHED:
2352		cm_enter_timewait(cm_id_priv);
2353		break;
2354	default:
2355		spin_unlock_irq(&cm_id_priv->lock);
2356		ret = -EINVAL;
2357		goto out;
2358	}
2359
2360	ret = atomic_inc_and_test(&cm_id_priv->work_count);
2361	if (!ret)
2362		list_add_tail(&work->list, &cm_id_priv->work_list);
2363	spin_unlock_irq(&cm_id_priv->lock);
2364
2365	if (ret)
2366		cm_process_work(cm_id_priv, work);
2367	else
2368		cm_deref_id(cm_id_priv);
2369	return 0;
2370out:
2371	cm_deref_id(cm_id_priv);
2372	return -EINVAL;
2373}
2374
2375int ib_send_cm_mra(struct ib_cm_id *cm_id,
2376		   u8 service_timeout,
2377		   const void *private_data,
2378		   u8 private_data_len)
2379{
2380	struct cm_id_private *cm_id_priv;
2381	struct ib_mad_send_buf *msg;
2382	enum ib_cm_state cm_state;
2383	enum ib_cm_lap_state lap_state;
2384	enum cm_msg_response msg_response;
2385	void *data;
2386	unsigned long flags;
2387	int ret;
2388
2389	if (private_data && private_data_len > IB_CM_MRA_PRIVATE_DATA_SIZE)
2390		return -EINVAL;
2391
2392	data = cm_copy_private_data(private_data, private_data_len);
2393	if (IS_ERR(data))
2394		return PTR_ERR(data);
2395
2396	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2397
2398	spin_lock_irqsave(&cm_id_priv->lock, flags);
2399	switch(cm_id_priv->id.state) {
2400	case IB_CM_REQ_RCVD:
2401		cm_state = IB_CM_MRA_REQ_SENT;
2402		lap_state = cm_id->lap_state;
2403		msg_response = CM_MSG_RESPONSE_REQ;
2404		break;
2405	case IB_CM_REP_RCVD:
2406		cm_state = IB_CM_MRA_REP_SENT;
2407		lap_state = cm_id->lap_state;
2408		msg_response = CM_MSG_RESPONSE_REP;
2409		break;
2410	case IB_CM_ESTABLISHED:
2411		cm_state = cm_id->state;
2412		lap_state = IB_CM_MRA_LAP_SENT;
2413		msg_response = CM_MSG_RESPONSE_OTHER;
2414		break;
2415	default:
2416		ret = -EINVAL;
2417		goto error1;
2418	}
2419
2420	if (!(service_timeout & IB_CM_MRA_FLAG_DELAY)) {
2421		ret = cm_alloc_msg(cm_id_priv, &msg);
2422		if (ret)
2423			goto error1;
2424
2425		cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
2426			      msg_response, service_timeout,
2427			      private_data, private_data_len);
2428		ret = ib_post_send_mad(msg, NULL);
2429		if (ret)
2430			goto error2;
2431	}
2432
2433	cm_id->state = cm_state;
2434	cm_id->lap_state = lap_state;
2435	cm_id_priv->service_timeout = service_timeout;
2436	cm_set_private_data(cm_id_priv, data, private_data_len);
2437	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2438	return 0;
2439
2440error1:	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2441	kfree(data);
2442	return ret;
2443
2444error2:	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2445	kfree(data);
2446	cm_free_msg(msg);
2447	return ret;
2448}
2449EXPORT_SYMBOL(ib_send_cm_mra);
2450
2451static struct cm_id_private * cm_acquire_mraed_id(struct cm_mra_msg *mra_msg)
2452{
2453	switch (cm_mra_get_msg_mraed(mra_msg)) {
2454	case CM_MSG_RESPONSE_REQ:
2455		return cm_acquire_id(mra_msg->remote_comm_id, 0);
2456	case CM_MSG_RESPONSE_REP:
2457	case CM_MSG_RESPONSE_OTHER:
2458		return cm_acquire_id(mra_msg->remote_comm_id,
2459				     mra_msg->local_comm_id);
2460	default:
2461		return NULL;
2462	}
2463}
2464
2465static int cm_mra_handler(struct cm_work *work)
2466{
2467	struct cm_id_private *cm_id_priv;
2468	struct cm_mra_msg *mra_msg;
2469	int timeout, ret;
2470
2471	mra_msg = (struct cm_mra_msg *)work->mad_recv_wc->recv_buf.mad;
2472	cm_id_priv = cm_acquire_mraed_id(mra_msg);
2473	if (!cm_id_priv)
2474		return -EINVAL;
2475
2476	work->cm_event.private_data = &mra_msg->private_data;
2477	work->cm_event.param.mra_rcvd.service_timeout =
2478					cm_mra_get_service_timeout(mra_msg);
2479	timeout = cm_convert_to_ms(cm_mra_get_service_timeout(mra_msg)) +
2480		  cm_convert_to_ms(cm_id_priv->av.timeout);
2481
2482	spin_lock_irq(&cm_id_priv->lock);
2483	switch (cm_id_priv->id.state) {
2484	case IB_CM_REQ_SENT:
2485		if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_REQ ||
2486		    ib_modify_mad(cm_id_priv->av.port->mad_agent,
2487				  cm_id_priv->msg, timeout))
2488			goto out;
2489		cm_id_priv->id.state = IB_CM_MRA_REQ_RCVD;
2490		break;
2491	case IB_CM_REP_SENT:
2492		if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_REP ||
2493		    ib_modify_mad(cm_id_priv->av.port->mad_agent,
2494				  cm_id_priv->msg, timeout))
2495			goto out;
2496		cm_id_priv->id.state = IB_CM_MRA_REP_RCVD;
2497		break;
2498	case IB_CM_ESTABLISHED:
2499		if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_OTHER ||
2500		    cm_id_priv->id.lap_state != IB_CM_LAP_SENT ||
2501		    ib_modify_mad(cm_id_priv->av.port->mad_agent,
2502				  cm_id_priv->msg, timeout)) {
2503			if (cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD)
2504				atomic_long_inc(&work->port->
2505						counter_group[CM_RECV_DUPLICATES].
2506						counter[CM_MRA_COUNTER]);
2507			goto out;
2508		}
2509		cm_id_priv->id.lap_state = IB_CM_MRA_LAP_RCVD;
2510		break;
2511	case IB_CM_MRA_REQ_RCVD:
2512	case IB_CM_MRA_REP_RCVD:
2513		atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2514				counter[CM_MRA_COUNTER]);
2515		/* fall through */
2516	default:
2517		goto out;
2518	}
2519
2520	cm_id_priv->msg->context[1] = (void *) (unsigned long)
2521				      cm_id_priv->id.state;
2522	ret = atomic_inc_and_test(&cm_id_priv->work_count);
2523	if (!ret)
2524		list_add_tail(&work->list, &cm_id_priv->work_list);
2525	spin_unlock_irq(&cm_id_priv->lock);
2526
2527	if (ret)
2528		cm_process_work(cm_id_priv, work);
2529	else
2530		cm_deref_id(cm_id_priv);
2531	return 0;
2532out:
2533	spin_unlock_irq(&cm_id_priv->lock);
2534	cm_deref_id(cm_id_priv);
2535	return -EINVAL;
2536}
2537
2538static void cm_format_lap(struct cm_lap_msg *lap_msg,
2539			  struct cm_id_private *cm_id_priv,
2540			  struct ib_sa_path_rec *alternate_path,
2541			  const void *private_data,
2542			  u8 private_data_len)
2543{
2544	cm_format_mad_hdr(&lap_msg->hdr, CM_LAP_ATTR_ID,
2545			  cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_LAP));
2546	lap_msg->local_comm_id = cm_id_priv->id.local_id;
2547	lap_msg->remote_comm_id = cm_id_priv->id.remote_id;
2548	cm_lap_set_remote_qpn(lap_msg, cm_id_priv->remote_qpn);
2549	/* todo: need remote CM response timeout */
2550	cm_lap_set_remote_resp_timeout(lap_msg, 0x1F);
2551	lap_msg->alt_local_lid = alternate_path->slid;
2552	lap_msg->alt_remote_lid = alternate_path->dlid;
2553	lap_msg->alt_local_gid = alternate_path->sgid;
2554	lap_msg->alt_remote_gid = alternate_path->dgid;
2555	cm_lap_set_flow_label(lap_msg, alternate_path->flow_label);
2556	cm_lap_set_traffic_class(lap_msg, alternate_path->traffic_class);
2557	lap_msg->alt_hop_limit = alternate_path->hop_limit;
2558	cm_lap_set_packet_rate(lap_msg, alternate_path->rate);
2559	cm_lap_set_sl(lap_msg, alternate_path->sl);
2560	cm_lap_set_subnet_local(lap_msg, 1); /* local only... */
2561	cm_lap_set_local_ack_timeout(lap_msg,
2562		cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay,
2563			       alternate_path->packet_life_time));
2564
2565	if (private_data && private_data_len)
2566		memcpy(lap_msg->private_data, private_data, private_data_len);
2567}
2568
2569int ib_send_cm_lap(struct ib_cm_id *cm_id,
2570		   struct ib_sa_path_rec *alternate_path,
2571		   const void *private_data,
2572		   u8 private_data_len)
2573{
2574	struct cm_id_private *cm_id_priv;
2575	struct ib_mad_send_buf *msg;
2576	unsigned long flags;
2577	int ret;
2578
2579	if (private_data && private_data_len > IB_CM_LAP_PRIVATE_DATA_SIZE)
2580		return -EINVAL;
2581
2582	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2583	spin_lock_irqsave(&cm_id_priv->lock, flags);
2584	if (cm_id->state != IB_CM_ESTABLISHED ||
2585	    (cm_id->lap_state != IB_CM_LAP_UNINIT &&
2586	     cm_id->lap_state != IB_CM_LAP_IDLE)) {
2587		ret = -EINVAL;
2588		goto out;
2589	}
2590
2591	ret = cm_init_av_by_path(alternate_path, &cm_id_priv->alt_av);
2592	if (ret)
2593		goto out;
2594	cm_id_priv->alt_av.timeout =
2595			cm_ack_timeout(cm_id_priv->target_ack_delay,
2596				       cm_id_priv->alt_av.timeout - 1);
2597
2598	ret = cm_alloc_msg(cm_id_priv, &msg);
2599	if (ret)
2600		goto out;
2601
2602	cm_format_lap((struct cm_lap_msg *) msg->mad, cm_id_priv,
2603		      alternate_path, private_data, private_data_len);
2604	msg->timeout_ms = cm_id_priv->timeout_ms;
2605	msg->context[1] = (void *) (unsigned long) IB_CM_ESTABLISHED;
2606
2607	ret = ib_post_send_mad(msg, NULL);
2608	if (ret) {
2609		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2610		cm_free_msg(msg);
2611		return ret;
2612	}
2613
2614	cm_id->lap_state = IB_CM_LAP_SENT;
2615	cm_id_priv->msg = msg;
2616
2617out:	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2618	return ret;
2619}
2620EXPORT_SYMBOL(ib_send_cm_lap);
2621
2622static void cm_format_path_from_lap(struct cm_id_private *cm_id_priv,
2623				    struct ib_sa_path_rec *path,
2624				    struct cm_lap_msg *lap_msg)
2625{
2626	memset(path, 0, sizeof *path);
2627	path->dgid = lap_msg->alt_local_gid;
2628	path->sgid = lap_msg->alt_remote_gid;
2629	path->dlid = lap_msg->alt_local_lid;
2630	path->slid = lap_msg->alt_remote_lid;
2631	path->flow_label = cm_lap_get_flow_label(lap_msg);
2632	path->hop_limit = lap_msg->alt_hop_limit;
2633	path->traffic_class = cm_lap_get_traffic_class(lap_msg);
2634	path->reversible = 1;
2635	path->pkey = cm_id_priv->pkey;
2636	path->sl = cm_lap_get_sl(lap_msg);
2637	path->mtu_selector = IB_SA_EQ;
2638	path->mtu = cm_id_priv->path_mtu;
2639	path->rate_selector = IB_SA_EQ;
2640	path->rate = cm_lap_get_packet_rate(lap_msg);
2641	path->packet_life_time_selector = IB_SA_EQ;
2642	path->packet_life_time = cm_lap_get_local_ack_timeout(lap_msg);
2643	path->packet_life_time -= (path->packet_life_time > 0);
2644}
2645
2646static int cm_lap_handler(struct cm_work *work)
2647{
2648	struct cm_id_private *cm_id_priv;
2649	struct cm_lap_msg *lap_msg;
2650	struct ib_cm_lap_event_param *param;
2651	struct ib_mad_send_buf *msg = NULL;
2652	int ret;
2653
2654	/* todo: verify LAP request and send reject APR if invalid. */
2655	lap_msg = (struct cm_lap_msg *)work->mad_recv_wc->recv_buf.mad;
2656	cm_id_priv = cm_acquire_id(lap_msg->remote_comm_id,
2657				   lap_msg->local_comm_id);
2658	if (!cm_id_priv)
2659		return -EINVAL;
2660
2661	param = &work->cm_event.param.lap_rcvd;
2662	param->alternate_path = &work->path[0];
2663	cm_format_path_from_lap(cm_id_priv, param->alternate_path, lap_msg);
2664	work->cm_event.private_data = &lap_msg->private_data;
2665
2666	spin_lock_irq(&cm_id_priv->lock);
2667	if (cm_id_priv->id.state != IB_CM_ESTABLISHED)
2668		goto unlock;
2669
2670	switch (cm_id_priv->id.lap_state) {
2671	case IB_CM_LAP_UNINIT:
2672	case IB_CM_LAP_IDLE:
2673		break;
2674	case IB_CM_MRA_LAP_SENT:
2675		atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2676				counter[CM_LAP_COUNTER]);
2677		if (cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg))
2678			goto unlock;
2679
2680		cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
2681			      CM_MSG_RESPONSE_OTHER,
2682			      cm_id_priv->service_timeout,
2683			      cm_id_priv->private_data,
2684			      cm_id_priv->private_data_len);
2685		spin_unlock_irq(&cm_id_priv->lock);
2686
2687		if (ib_post_send_mad(msg, NULL))
2688			cm_free_msg(msg);
2689		goto deref;
2690	case IB_CM_LAP_RCVD:
2691		atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2692				counter[CM_LAP_COUNTER]);
2693		goto unlock;
2694	default:
2695		goto unlock;
2696	}
2697
2698	cm_id_priv->id.lap_state = IB_CM_LAP_RCVD;
2699	cm_id_priv->tid = lap_msg->hdr.tid;
2700	cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
2701				work->mad_recv_wc->recv_buf.grh,
2702				&cm_id_priv->av);
2703	cm_init_av_by_path(param->alternate_path, &cm_id_priv->alt_av);
2704	ret = atomic_inc_and_test(&cm_id_priv->work_count);
2705	if (!ret)
2706		list_add_tail(&work->list, &cm_id_priv->work_list);
2707	spin_unlock_irq(&cm_id_priv->lock);
2708
2709	if (ret)
2710		cm_process_work(cm_id_priv, work);
2711	else
2712		cm_deref_id(cm_id_priv);
2713	return 0;
2714
2715unlock:	spin_unlock_irq(&cm_id_priv->lock);
2716deref:	cm_deref_id(cm_id_priv);
2717	return -EINVAL;
2718}
2719
2720static void cm_format_apr(struct cm_apr_msg *apr_msg,
2721			  struct cm_id_private *cm_id_priv,
2722			  enum ib_cm_apr_status status,
2723			  void *info,
2724			  u8 info_length,
2725			  const void *private_data,
2726			  u8 private_data_len)
2727{
2728	cm_format_mad_hdr(&apr_msg->hdr, CM_APR_ATTR_ID, cm_id_priv->tid);
2729	apr_msg->local_comm_id = cm_id_priv->id.local_id;
2730	apr_msg->remote_comm_id = cm_id_priv->id.remote_id;
2731	apr_msg->ap_status = (u8) status;
2732
2733	if (info && info_length) {
2734		apr_msg->info_length = info_length;
2735		memcpy(apr_msg->info, info, info_length);
2736	}
2737
2738	if (private_data && private_data_len)
2739		memcpy(apr_msg->private_data, private_data, private_data_len);
2740}
2741
2742int ib_send_cm_apr(struct ib_cm_id *cm_id,
2743		   enum ib_cm_apr_status status,
2744		   void *info,
2745		   u8 info_length,
2746		   const void *private_data,
2747		   u8 private_data_len)
2748{
2749	struct cm_id_private *cm_id_priv;
2750	struct ib_mad_send_buf *msg;
2751	unsigned long flags;
2752	int ret;
2753
2754	if ((private_data && private_data_len > IB_CM_APR_PRIVATE_DATA_SIZE) ||
2755	    (info && info_length > IB_CM_APR_INFO_LENGTH))
2756		return -EINVAL;
2757
2758	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2759	spin_lock_irqsave(&cm_id_priv->lock, flags);
2760	if (cm_id->state != IB_CM_ESTABLISHED ||
2761	    (cm_id->lap_state != IB_CM_LAP_RCVD &&
2762	     cm_id->lap_state != IB_CM_MRA_LAP_SENT)) {
2763		ret = -EINVAL;
2764		goto out;
2765	}
2766
2767	ret = cm_alloc_msg(cm_id_priv, &msg);
2768	if (ret)
2769		goto out;
2770
2771	cm_format_apr((struct cm_apr_msg *) msg->mad, cm_id_priv, status,
2772		      info, info_length, private_data, private_data_len);
2773	ret = ib_post_send_mad(msg, NULL);
2774	if (ret) {
2775		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2776		cm_free_msg(msg);
2777		return ret;
2778	}
2779
2780	cm_id->lap_state = IB_CM_LAP_IDLE;
2781out:	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2782	return ret;
2783}
2784EXPORT_SYMBOL(ib_send_cm_apr);
2785
2786static int cm_apr_handler(struct cm_work *work)
2787{
2788	struct cm_id_private *cm_id_priv;
2789	struct cm_apr_msg *apr_msg;
2790	int ret;
2791
2792	apr_msg = (struct cm_apr_msg *)work->mad_recv_wc->recv_buf.mad;
2793	cm_id_priv = cm_acquire_id(apr_msg->remote_comm_id,
2794				   apr_msg->local_comm_id);
2795	if (!cm_id_priv)
2796		return -EINVAL; /* Unmatched reply. */
2797
2798	work->cm_event.param.apr_rcvd.ap_status = apr_msg->ap_status;
2799	work->cm_event.param.apr_rcvd.apr_info = &apr_msg->info;
2800	work->cm_event.param.apr_rcvd.info_len = apr_msg->info_length;
2801	work->cm_event.private_data = &apr_msg->private_data;
2802
2803	spin_lock_irq(&cm_id_priv->lock);
2804	if (cm_id_priv->id.state != IB_CM_ESTABLISHED ||
2805	    (cm_id_priv->id.lap_state != IB_CM_LAP_SENT &&
2806	     cm_id_priv->id.lap_state != IB_CM_MRA_LAP_RCVD)) {
2807		spin_unlock_irq(&cm_id_priv->lock);
2808		goto out;
2809	}
2810	cm_id_priv->id.lap_state = IB_CM_LAP_IDLE;
2811	ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2812	cm_id_priv->msg = NULL;
2813
2814	ret = atomic_inc_and_test(&cm_id_priv->work_count);
2815	if (!ret)
2816		list_add_tail(&work->list, &cm_id_priv->work_list);
2817	spin_unlock_irq(&cm_id_priv->lock);
2818
2819	if (ret)
2820		cm_process_work(cm_id_priv, work);
2821	else
2822		cm_deref_id(cm_id_priv);
2823	return 0;
2824out:
2825	cm_deref_id(cm_id_priv);
2826	return -EINVAL;
2827}
2828
2829static int cm_timewait_handler(struct cm_work *work)
2830{
2831	struct cm_timewait_info *timewait_info;
2832	struct cm_id_private *cm_id_priv;
2833	int ret;
2834
2835	timewait_info = (struct cm_timewait_info *)work;
2836	spin_lock_irq(&cm.lock);
2837	list_del(&timewait_info->list);
2838	spin_unlock_irq(&cm.lock);
2839
2840	cm_id_priv = cm_acquire_id(timewait_info->work.local_id,
2841				   timewait_info->work.remote_id);
2842	if (!cm_id_priv)
2843		return -EINVAL;
2844
2845	spin_lock_irq(&cm_id_priv->lock);
2846	if (cm_id_priv->id.state != IB_CM_TIMEWAIT ||
2847	    cm_id_priv->remote_qpn != timewait_info->remote_qpn) {
2848		spin_unlock_irq(&cm_id_priv->lock);
2849		goto out;
2850	}
2851	cm_id_priv->id.state = IB_CM_IDLE;
2852	ret = atomic_inc_and_test(&cm_id_priv->work_count);
2853	if (!ret)
2854		list_add_tail(&work->list, &cm_id_priv->work_list);
2855	spin_unlock_irq(&cm_id_priv->lock);
2856
2857	if (ret)
2858		cm_process_work(cm_id_priv, work);
2859	else
2860		cm_deref_id(cm_id_priv);
2861	return 0;
2862out:
2863	cm_deref_id(cm_id_priv);
2864	return -EINVAL;
2865}
2866
2867static void cm_format_sidr_req(struct cm_sidr_req_msg *sidr_req_msg,
2868			       struct cm_id_private *cm_id_priv,
2869			       struct ib_cm_sidr_req_param *param)
2870{
2871	cm_format_mad_hdr(&sidr_req_msg->hdr, CM_SIDR_REQ_ATTR_ID,
2872			  cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_SIDR));
2873	sidr_req_msg->request_id = cm_id_priv->id.local_id;
2874	sidr_req_msg->pkey = param->path->pkey;
2875	sidr_req_msg->service_id = param->service_id;
2876
2877	if (param->private_data && param->private_data_len)
2878		memcpy(sidr_req_msg->private_data, param->private_data,
2879		       param->private_data_len);
2880}
2881
2882int ib_send_cm_sidr_req(struct ib_cm_id *cm_id,
2883			struct ib_cm_sidr_req_param *param)
2884{
2885	struct cm_id_private *cm_id_priv;
2886	struct ib_mad_send_buf *msg;
2887	unsigned long flags;
2888	int ret;
2889
2890	if (!param->path || (param->private_data &&
2891	     param->private_data_len > IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE))
2892		return -EINVAL;
2893
2894	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2895	ret = cm_init_av_by_path(param->path, &cm_id_priv->av);
2896	if (ret)
2897		goto out;
2898
2899	cm_id->service_id = param->service_id;
2900	cm_id->service_mask = __constant_cpu_to_be64(~0ULL);
2901	cm_id_priv->timeout_ms = param->timeout_ms;
2902	cm_id_priv->max_cm_retries = param->max_cm_retries;
2903	ret = cm_alloc_msg(cm_id_priv, &msg);
2904	if (ret)
2905		goto out;
2906
2907	cm_format_sidr_req((struct cm_sidr_req_msg *) msg->mad, cm_id_priv,
2908			   param);
2909	msg->timeout_ms = cm_id_priv->timeout_ms;
2910	msg->context[1] = (void *) (unsigned long) IB_CM_SIDR_REQ_SENT;
2911
2912	spin_lock_irqsave(&cm_id_priv->lock, flags);
2913	if (cm_id->state == IB_CM_IDLE)
2914		ret = ib_post_send_mad(msg, NULL);
2915	else
2916		ret = -EINVAL;
2917
2918	if (ret) {
2919		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2920		cm_free_msg(msg);
2921		goto out;
2922	}
2923	cm_id->state = IB_CM_SIDR_REQ_SENT;
2924	cm_id_priv->msg = msg;
2925	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2926out:
2927	return ret;
2928}
2929EXPORT_SYMBOL(ib_send_cm_sidr_req);
2930
2931static void cm_format_sidr_req_event(struct cm_work *work,
2932				     struct ib_cm_id *listen_id)
2933{
2934	struct cm_sidr_req_msg *sidr_req_msg;
2935	struct ib_cm_sidr_req_event_param *param;
2936
2937	sidr_req_msg = (struct cm_sidr_req_msg *)
2938				work->mad_recv_wc->recv_buf.mad;
2939	param = &work->cm_event.param.sidr_req_rcvd;
2940	param->pkey = __be16_to_cpu(sidr_req_msg->pkey);
2941	param->listen_id = listen_id;
2942	param->port = work->port->port_num;
2943	work->cm_event.private_data = &sidr_req_msg->private_data;
2944}
2945
2946static int cm_sidr_req_handler(struct cm_work *work)
2947{
2948	struct ib_cm_id *cm_id;
2949	struct cm_id_private *cm_id_priv, *cur_cm_id_priv;
2950	struct cm_sidr_req_msg *sidr_req_msg;
2951	struct ib_wc *wc;
2952
2953	cm_id = ib_create_cm_id(work->port->cm_dev->ib_device, NULL, NULL);
2954	if (IS_ERR(cm_id))
2955		return PTR_ERR(cm_id);
2956	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2957
2958	/* Record SGID/SLID and request ID for lookup. */
2959	sidr_req_msg = (struct cm_sidr_req_msg *)
2960				work->mad_recv_wc->recv_buf.mad;
2961	wc = work->mad_recv_wc->wc;
2962	cm_id_priv->av.dgid.global.subnet_prefix = cpu_to_be64(wc->slid);
2963	cm_id_priv->av.dgid.global.interface_id = 0;
2964	cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
2965				work->mad_recv_wc->recv_buf.grh,
2966				&cm_id_priv->av);
2967	cm_id_priv->id.remote_id = sidr_req_msg->request_id;
2968	cm_id_priv->tid = sidr_req_msg->hdr.tid;
2969	atomic_inc(&cm_id_priv->work_count);
2970
2971	spin_lock_irq(&cm.lock);
2972	cur_cm_id_priv = cm_insert_remote_sidr(cm_id_priv);
2973	if (cur_cm_id_priv) {
2974		spin_unlock_irq(&cm.lock);
2975		atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2976				counter[CM_SIDR_REQ_COUNTER]);
2977		goto out; /* Duplicate message. */
2978	}
2979	cm_id_priv->id.state = IB_CM_SIDR_REQ_RCVD;
2980	cur_cm_id_priv = cm_find_listen(cm_id->device,
2981					sidr_req_msg->service_id,
2982					sidr_req_msg->private_data);
2983	if (!cur_cm_id_priv) {
2984		spin_unlock_irq(&cm.lock);
2985		cm_reject_sidr_req(cm_id_priv, IB_SIDR_UNSUPPORTED);
2986		goto out; /* No match. */
2987	}
2988	atomic_inc(&cur_cm_id_priv->refcount);
2989	spin_unlock_irq(&cm.lock);
2990
2991	cm_id_priv->id.cm_handler = cur_cm_id_priv->id.cm_handler;
2992	cm_id_priv->id.context = cur_cm_id_priv->id.context;
2993	cm_id_priv->id.service_id = sidr_req_msg->service_id;
2994	cm_id_priv->id.service_mask = __constant_cpu_to_be64(~0ULL);
2995
2996	cm_format_sidr_req_event(work, &cur_cm_id_priv->id);
2997	cm_process_work(cm_id_priv, work);
2998	cm_deref_id(cur_cm_id_priv);
2999	return 0;
3000out:
3001	ib_destroy_cm_id(&cm_id_priv->id);
3002	return -EINVAL;
3003}
3004
3005static void cm_format_sidr_rep(struct cm_sidr_rep_msg *sidr_rep_msg,
3006			       struct cm_id_private *cm_id_priv,
3007			       struct ib_cm_sidr_rep_param *param)
3008{
3009	cm_format_mad_hdr(&sidr_rep_msg->hdr, CM_SIDR_REP_ATTR_ID,
3010			  cm_id_priv->tid);
3011	sidr_rep_msg->request_id = cm_id_priv->id.remote_id;
3012	sidr_rep_msg->status = param->status;
3013	cm_sidr_rep_set_qpn(sidr_rep_msg, cpu_to_be32(param->qp_num));
3014	sidr_rep_msg->service_id = cm_id_priv->id.service_id;
3015	sidr_rep_msg->qkey = cpu_to_be32(param->qkey);
3016
3017	if (param->info && param->info_length)
3018		memcpy(sidr_rep_msg->info, param->info, param->info_length);
3019
3020	if (param->private_data && param->private_data_len)
3021		memcpy(sidr_rep_msg->private_data, param->private_data,
3022		       param->private_data_len);
3023}
3024
3025int ib_send_cm_sidr_rep(struct ib_cm_id *cm_id,
3026			struct ib_cm_sidr_rep_param *param)
3027{
3028	struct cm_id_private *cm_id_priv;
3029	struct ib_mad_send_buf *msg;
3030	unsigned long flags;
3031	int ret;
3032
3033	if ((param->info && param->info_length > IB_CM_SIDR_REP_INFO_LENGTH) ||
3034	    (param->private_data &&
3035	     param->private_data_len > IB_CM_SIDR_REP_PRIVATE_DATA_SIZE))
3036		return -EINVAL;
3037
3038	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
3039	spin_lock_irqsave(&cm_id_priv->lock, flags);
3040	if (cm_id->state != IB_CM_SIDR_REQ_RCVD) {
3041		ret = -EINVAL;
3042		goto error;
3043	}
3044
3045	ret = cm_alloc_msg(cm_id_priv, &msg);
3046	if (ret)
3047		goto error;
3048
3049	cm_format_sidr_rep((struct cm_sidr_rep_msg *) msg->mad, cm_id_priv,
3050			   param);
3051	ret = ib_post_send_mad(msg, NULL);
3052	if (ret) {
3053		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3054		cm_free_msg(msg);
3055		return ret;
3056	}
3057	cm_id->state = IB_CM_IDLE;
3058	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3059
3060	spin_lock_irqsave(&cm.lock, flags);
3061	rb_erase(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table);
3062	spin_unlock_irqrestore(&cm.lock, flags);
3063	return 0;
3064
3065error:	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3066	return ret;
3067}
3068EXPORT_SYMBOL(ib_send_cm_sidr_rep);
3069
3070static void cm_format_sidr_rep_event(struct cm_work *work)
3071{
3072	struct cm_sidr_rep_msg *sidr_rep_msg;
3073	struct ib_cm_sidr_rep_event_param *param;
3074
3075	sidr_rep_msg = (struct cm_sidr_rep_msg *)
3076				work->mad_recv_wc->recv_buf.mad;
3077	param = &work->cm_event.param.sidr_rep_rcvd;
3078	param->status = sidr_rep_msg->status;
3079	param->qkey = be32_to_cpu(sidr_rep_msg->qkey);
3080	param->qpn = be32_to_cpu(cm_sidr_rep_get_qpn(sidr_rep_msg));
3081	param->info = &sidr_rep_msg->info;
3082	param->info_len = sidr_rep_msg->info_length;
3083	work->cm_event.private_data = &sidr_rep_msg->private_data;
3084}
3085
3086static int cm_sidr_rep_handler(struct cm_work *work)
3087{
3088	struct cm_sidr_rep_msg *sidr_rep_msg;
3089	struct cm_id_private *cm_id_priv;
3090
3091	sidr_rep_msg = (struct cm_sidr_rep_msg *)
3092				work->mad_recv_wc->recv_buf.mad;
3093	cm_id_priv = cm_acquire_id(sidr_rep_msg->request_id, 0);
3094	if (!cm_id_priv)
3095		return -EINVAL; /* Unmatched reply. */
3096
3097	spin_lock_irq(&cm_id_priv->lock);
3098	if (cm_id_priv->id.state != IB_CM_SIDR_REQ_SENT) {
3099		spin_unlock_irq(&cm_id_priv->lock);
3100		goto out;
3101	}
3102	cm_id_priv->id.state = IB_CM_IDLE;
3103	ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
3104	spin_unlock_irq(&cm_id_priv->lock);
3105
3106	cm_format_sidr_rep_event(work);
3107	cm_process_work(cm_id_priv, work);
3108	return 0;
3109out:
3110	cm_deref_id(cm_id_priv);
3111	return -EINVAL;
3112}
3113
3114static void cm_process_send_error(struct ib_mad_send_buf *msg,
3115				  enum ib_wc_status wc_status)
3116{
3117	struct cm_id_private *cm_id_priv;
3118	struct ib_cm_event cm_event;
3119	enum ib_cm_state state;
3120	int ret;
3121
3122	memset(&cm_event, 0, sizeof cm_event);
3123	cm_id_priv = msg->context[0];
3124
3125	/* Discard old sends or ones without a response. */
3126	spin_lock_irq(&cm_id_priv->lock);
3127	state = (enum ib_cm_state) (unsigned long) msg->context[1];
3128	if (msg != cm_id_priv->msg || state != cm_id_priv->id.state)
3129		goto discard;
3130
3131	switch (state) {
3132	case IB_CM_REQ_SENT:
3133	case IB_CM_MRA_REQ_RCVD:
3134		cm_reset_to_idle(cm_id_priv);
3135		cm_event.event = IB_CM_REQ_ERROR;
3136		break;
3137	case IB_CM_REP_SENT:
3138	case IB_CM_MRA_REP_RCVD:
3139		cm_reset_to_idle(cm_id_priv);
3140		cm_event.event = IB_CM_REP_ERROR;
3141		break;
3142	case IB_CM_DREQ_SENT:
3143		cm_enter_timewait(cm_id_priv);
3144		cm_event.event = IB_CM_DREQ_ERROR;
3145		break;
3146	case IB_CM_SIDR_REQ_SENT:
3147		cm_id_priv->id.state = IB_CM_IDLE;
3148		cm_event.event = IB_CM_SIDR_REQ_ERROR;
3149		break;
3150	default:
3151		goto discard;
3152	}
3153	spin_unlock_irq(&cm_id_priv->lock);
3154	cm_event.param.send_status = wc_status;
3155
3156	/* No other events can occur on the cm_id at this point. */
3157	ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, &cm_event);
3158	cm_free_msg(msg);
3159	if (ret)
3160		ib_destroy_cm_id(&cm_id_priv->id);
3161	return;
3162discard:
3163	spin_unlock_irq(&cm_id_priv->lock);
3164	cm_free_msg(msg);
3165}
3166
3167static void cm_send_handler(struct ib_mad_agent *mad_agent,
3168			    struct ib_mad_send_wc *mad_send_wc)
3169{
3170	struct ib_mad_send_buf *msg = mad_send_wc->send_buf;
3171	struct cm_port *port;
3172	u16 attr_index;
3173
3174	port = mad_agent->context;
3175	attr_index = be16_to_cpu(((struct ib_mad_hdr *)
3176				  msg->mad)->attr_id) - CM_ATTR_ID_OFFSET;
3177
3178	/*
3179	 * If the send was in response to a received message (context[0] is not
3180	 * set to a cm_id), and is not a REJ, then it is a send that was
3181	 * manually retried.
3182	 */
3183	if (!msg->context[0] && (attr_index != CM_REJ_COUNTER))
3184		msg->retries = 1;
3185
3186	atomic_long_add(1 + msg->retries,
3187			&port->counter_group[CM_XMIT].counter[attr_index]);
3188	if (msg->retries)
3189		atomic_long_add(msg->retries,
3190				&port->counter_group[CM_XMIT_RETRIES].
3191				counter[attr_index]);
3192
3193	switch (mad_send_wc->status) {
3194	case IB_WC_SUCCESS:
3195	case IB_WC_WR_FLUSH_ERR:
3196		cm_free_msg(msg);
3197		break;
3198	default:
3199		if (msg->context[0] && msg->context[1])
3200			cm_process_send_error(msg, mad_send_wc->status);
3201		else
3202			cm_free_msg(msg);
3203		break;
3204	}
3205}
3206
3207static void cm_work_handler(struct work_struct *_work)
3208{
3209	struct cm_work *work = container_of(_work, struct cm_work, work.work);
3210	int ret;
3211
3212	switch (work->cm_event.event) {
3213	case IB_CM_REQ_RECEIVED:
3214		ret = cm_req_handler(work);
3215		break;
3216	case IB_CM_MRA_RECEIVED:
3217		ret = cm_mra_handler(work);
3218		break;
3219	case IB_CM_REJ_RECEIVED:
3220		ret = cm_rej_handler(work);
3221		break;
3222	case IB_CM_REP_RECEIVED:
3223		ret = cm_rep_handler(work);
3224		break;
3225	case IB_CM_RTU_RECEIVED:
3226		ret = cm_rtu_handler(work);
3227		break;
3228	case IB_CM_USER_ESTABLISHED:
3229		ret = cm_establish_handler(work);
3230		break;
3231	case IB_CM_DREQ_RECEIVED:
3232		ret = cm_dreq_handler(work);
3233		break;
3234	case IB_CM_DREP_RECEIVED:
3235		ret = cm_drep_handler(work);
3236		break;
3237	case IB_CM_SIDR_REQ_RECEIVED:
3238		ret = cm_sidr_req_handler(work);
3239		break;
3240	case IB_CM_SIDR_REP_RECEIVED:
3241		ret = cm_sidr_rep_handler(work);
3242		break;
3243	case IB_CM_LAP_RECEIVED:
3244		ret = cm_lap_handler(work);
3245		break;
3246	case IB_CM_APR_RECEIVED:
3247		ret = cm_apr_handler(work);
3248		break;
3249	case IB_CM_TIMEWAIT_EXIT:
3250		ret = cm_timewait_handler(work);
3251		break;
3252	default:
3253		ret = -EINVAL;
3254		break;
3255	}
3256	if (ret)
3257		cm_free_work(work);
3258}
3259
3260static int cm_establish(struct ib_cm_id *cm_id)
3261{
3262	struct cm_id_private *cm_id_priv;
3263	struct cm_work *work;
3264	unsigned long flags;
3265	int ret = 0;
3266
3267	work = kmalloc(sizeof *work, GFP_ATOMIC);
3268	if (!work)
3269		return -ENOMEM;
3270
3271	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
3272	spin_lock_irqsave(&cm_id_priv->lock, flags);
3273	switch (cm_id->state)
3274	{
3275	case IB_CM_REP_SENT:
3276	case IB_CM_MRA_REP_RCVD:
3277		cm_id->state = IB_CM_ESTABLISHED;
3278		break;
3279	case IB_CM_ESTABLISHED:
3280		ret = -EISCONN;
3281		break;
3282	default:
3283		ret = -EINVAL;
3284		break;
3285	}
3286	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3287
3288	if (ret) {
3289		kfree(work);
3290		goto out;
3291	}
3292
3293	/*
3294	 * The CM worker thread may try to destroy the cm_id before it
3295	 * can execute this work item.  To prevent potential deadlock,
3296	 * we need to find the cm_id once we're in the context of the
3297	 * worker thread, rather than holding a reference on it.
3298	 */
3299	INIT_DELAYED_WORK(&work->work, cm_work_handler);
3300	work->local_id = cm_id->local_id;
3301	work->remote_id = cm_id->remote_id;
3302	work->mad_recv_wc = NULL;
3303	work->cm_event.event = IB_CM_USER_ESTABLISHED;
3304	queue_delayed_work(cm.wq, &work->work, 0);
3305out:
3306	return ret;
3307}
3308
3309static int cm_migrate(struct ib_cm_id *cm_id)
3310{
3311	struct cm_id_private *cm_id_priv;
3312	unsigned long flags;
3313	int ret = 0;
3314
3315	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
3316	spin_lock_irqsave(&cm_id_priv->lock, flags);
3317	if (cm_id->state == IB_CM_ESTABLISHED &&
3318	    (cm_id->lap_state == IB_CM_LAP_UNINIT ||
3319	     cm_id->lap_state == IB_CM_LAP_IDLE)) {
3320		cm_id->lap_state = IB_CM_LAP_IDLE;
3321		cm_id_priv->av = cm_id_priv->alt_av;
3322	} else
3323		ret = -EINVAL;
3324	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3325
3326	return ret;
3327}
3328
3329int ib_cm_notify(struct ib_cm_id *cm_id, enum ib_event_type event)
3330{
3331	int ret;
3332
3333	switch (event) {
3334	case IB_EVENT_COMM_EST:
3335		ret = cm_establish(cm_id);
3336		break;
3337	case IB_EVENT_PATH_MIG:
3338		ret = cm_migrate(cm_id);
3339		break;
3340	default:
3341		ret = -EINVAL;
3342	}
3343	return ret;
3344}
3345EXPORT_SYMBOL(ib_cm_notify);
3346
3347static void cm_recv_handler(struct ib_mad_agent *mad_agent,
3348			    struct ib_mad_recv_wc *mad_recv_wc)
3349{
3350	struct cm_port *port = mad_agent->context;
3351	struct cm_work *work;
3352	enum ib_cm_event_type event;
3353	u16 attr_id;
3354	int paths = 0;
3355
3356	switch (mad_recv_wc->recv_buf.mad->mad_hdr.attr_id) {
3357	case CM_REQ_ATTR_ID:
3358		paths = 1 + (((struct cm_req_msg *) mad_recv_wc->recv_buf.mad)->
3359						    alt_local_lid != 0);
3360		event = IB_CM_REQ_RECEIVED;
3361		break;
3362	case CM_MRA_ATTR_ID:
3363		event = IB_CM_MRA_RECEIVED;
3364		break;
3365	case CM_REJ_ATTR_ID:
3366		event = IB_CM_REJ_RECEIVED;
3367		break;
3368	case CM_REP_ATTR_ID:
3369		event = IB_CM_REP_RECEIVED;
3370		break;
3371	case CM_RTU_ATTR_ID:
3372		event = IB_CM_RTU_RECEIVED;
3373		break;
3374	case CM_DREQ_ATTR_ID:
3375		event = IB_CM_DREQ_RECEIVED;
3376		break;
3377	case CM_DREP_ATTR_ID:
3378		event = IB_CM_DREP_RECEIVED;
3379		break;
3380	case CM_SIDR_REQ_ATTR_ID:
3381		event = IB_CM_SIDR_REQ_RECEIVED;
3382		break;
3383	case CM_SIDR_REP_ATTR_ID:
3384		event = IB_CM_SIDR_REP_RECEIVED;
3385		break;
3386	case CM_LAP_ATTR_ID:
3387		paths = 1;
3388		event = IB_CM_LAP_RECEIVED;
3389		break;
3390	case CM_APR_ATTR_ID:
3391		event = IB_CM_APR_RECEIVED;
3392		break;
3393	default:
3394		ib_free_recv_mad(mad_recv_wc);
3395		return;
3396	}
3397
3398	attr_id = be16_to_cpu(mad_recv_wc->recv_buf.mad->mad_hdr.attr_id);
3399	atomic_long_inc(&port->counter_group[CM_RECV].
3400			counter[attr_id - CM_ATTR_ID_OFFSET]);
3401
3402	work = kmalloc(sizeof *work + sizeof(struct ib_sa_path_rec) * paths,
3403		       GFP_KERNEL);
3404	if (!work) {
3405		ib_free_recv_mad(mad_recv_wc);
3406		return;
3407	}
3408
3409	INIT_DELAYED_WORK(&work->work, cm_work_handler);
3410	work->cm_event.event = event;
3411	work->mad_recv_wc = mad_recv_wc;
3412	work->port = port;
3413	queue_delayed_work(cm.wq, &work->work, 0);
3414}
3415
3416static int cm_init_qp_init_attr(struct cm_id_private *cm_id_priv,
3417				struct ib_qp_attr *qp_attr,
3418				int *qp_attr_mask)
3419{
3420	unsigned long flags;
3421	int ret;
3422
3423	spin_lock_irqsave(&cm_id_priv->lock, flags);
3424	switch (cm_id_priv->id.state) {
3425	case IB_CM_REQ_SENT:
3426	case IB_CM_MRA_REQ_RCVD:
3427	case IB_CM_REQ_RCVD:
3428	case IB_CM_MRA_REQ_SENT:
3429	case IB_CM_REP_RCVD:
3430	case IB_CM_MRA_REP_SENT:
3431	case IB_CM_REP_SENT:
3432	case IB_CM_MRA_REP_RCVD:
3433	case IB_CM_ESTABLISHED:
3434		*qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS |
3435				IB_QP_PKEY_INDEX | IB_QP_PORT;
3436		qp_attr->qp_access_flags = IB_ACCESS_REMOTE_WRITE;
3437		if (cm_id_priv->responder_resources)
3438			qp_attr->qp_access_flags |= IB_ACCESS_REMOTE_READ |
3439						    IB_ACCESS_REMOTE_ATOMIC;
3440		qp_attr->pkey_index = cm_id_priv->av.pkey_index;
3441		qp_attr->port_num = cm_id_priv->av.port->port_num;
3442		ret = 0;
3443		break;
3444	default:
3445		ret = -EINVAL;
3446		break;
3447	}
3448	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3449	return ret;
3450}
3451
3452static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv,
3453			       struct ib_qp_attr *qp_attr,
3454			       int *qp_attr_mask)
3455{
3456	unsigned long flags;
3457	int ret;
3458
3459	spin_lock_irqsave(&cm_id_priv->lock, flags);
3460	switch (cm_id_priv->id.state) {
3461	case IB_CM_REQ_RCVD:
3462	case IB_CM_MRA_REQ_SENT:
3463	case IB_CM_REP_RCVD:
3464	case IB_CM_MRA_REP_SENT:
3465	case IB_CM_REP_SENT:
3466	case IB_CM_MRA_REP_RCVD:
3467	case IB_CM_ESTABLISHED:
3468		*qp_attr_mask = IB_QP_STATE | IB_QP_AV | IB_QP_PATH_MTU |
3469				IB_QP_DEST_QPN | IB_QP_RQ_PSN;
3470		qp_attr->ah_attr = cm_id_priv->av.ah_attr;
3471		qp_attr->path_mtu = cm_id_priv->path_mtu;
3472		qp_attr->dest_qp_num = be32_to_cpu(cm_id_priv->remote_qpn);
3473		qp_attr->rq_psn = be32_to_cpu(cm_id_priv->rq_psn);
3474		if (cm_id_priv->qp_type == IB_QPT_RC) {
3475			*qp_attr_mask |= IB_QP_MAX_DEST_RD_ATOMIC |
3476					 IB_QP_MIN_RNR_TIMER;
3477			qp_attr->max_dest_rd_atomic =
3478					cm_id_priv->responder_resources;
3479			qp_attr->min_rnr_timer = 0;
3480		}
3481		if (cm_id_priv->alt_av.ah_attr.dlid) {
3482			*qp_attr_mask |= IB_QP_ALT_PATH;
3483			qp_attr->alt_port_num = cm_id_priv->alt_av.port->port_num;
3484			qp_attr->alt_pkey_index = cm_id_priv->alt_av.pkey_index;
3485			qp_attr->alt_timeout = cm_id_priv->alt_av.timeout;
3486			qp_attr->alt_ah_attr = cm_id_priv->alt_av.ah_attr;
3487		}
3488		ret = 0;
3489		break;
3490	default:
3491		ret = -EINVAL;
3492		break;
3493	}
3494	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3495	return ret;
3496}
3497
3498static int cm_init_qp_rts_attr(struct cm_id_private *cm_id_priv,
3499			       struct ib_qp_attr *qp_attr,
3500			       int *qp_attr_mask)
3501{
3502	unsigned long flags;
3503	int ret;
3504
3505	spin_lock_irqsave(&cm_id_priv->lock, flags);
3506	switch (cm_id_priv->id.state) {
3507	/* Allow transition to RTS before sending REP */
3508	case IB_CM_REQ_RCVD:
3509	case IB_CM_MRA_REQ_SENT:
3510
3511	case IB_CM_REP_RCVD:
3512	case IB_CM_MRA_REP_SENT:
3513	case IB_CM_REP_SENT:
3514	case IB_CM_MRA_REP_RCVD:
3515	case IB_CM_ESTABLISHED:
3516		if (cm_id_priv->id.lap_state == IB_CM_LAP_UNINIT) {
3517			*qp_attr_mask = IB_QP_STATE | IB_QP_SQ_PSN;
3518			qp_attr->sq_psn = be32_to_cpu(cm_id_priv->sq_psn);
3519			if (cm_id_priv->qp_type == IB_QPT_RC) {
3520				*qp_attr_mask |= IB_QP_TIMEOUT | IB_QP_RETRY_CNT |
3521						 IB_QP_RNR_RETRY |
3522						 IB_QP_MAX_QP_RD_ATOMIC;
3523				qp_attr->timeout = cm_id_priv->av.timeout;
3524				qp_attr->retry_cnt = cm_id_priv->retry_count;
3525				qp_attr->rnr_retry = cm_id_priv->rnr_retry_count;
3526				qp_attr->max_rd_atomic =
3527					cm_id_priv->initiator_depth;
3528			}
3529			if (cm_id_priv->alt_av.ah_attr.dlid) {
3530				*qp_attr_mask |= IB_QP_PATH_MIG_STATE;
3531				qp_attr->path_mig_state = IB_MIG_REARM;
3532			}
3533		} else {
3534			*qp_attr_mask = IB_QP_ALT_PATH | IB_QP_PATH_MIG_STATE;
3535			qp_attr->alt_port_num = cm_id_priv->alt_av.port->port_num;
3536			qp_attr->alt_pkey_index = cm_id_priv->alt_av.pkey_index;
3537			qp_attr->alt_timeout = cm_id_priv->alt_av.timeout;
3538			qp_attr->alt_ah_attr = cm_id_priv->alt_av.ah_attr;
3539			qp_attr->path_mig_state = IB_MIG_REARM;
3540		}
3541		ret = 0;
3542		break;
3543	default:
3544		ret = -EINVAL;
3545		break;
3546	}
3547	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3548	return ret;
3549}
3550
3551int ib_cm_init_qp_attr(struct ib_cm_id *cm_id,
3552		       struct ib_qp_attr *qp_attr,
3553		       int *qp_attr_mask)
3554{
3555	struct cm_id_private *cm_id_priv;
3556	int ret;
3557
3558	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
3559	switch (qp_attr->qp_state) {
3560	case IB_QPS_INIT:
3561		ret = cm_init_qp_init_attr(cm_id_priv, qp_attr, qp_attr_mask);
3562		break;
3563	case IB_QPS_RTR:
3564		ret = cm_init_qp_rtr_attr(cm_id_priv, qp_attr, qp_attr_mask);
3565		break;
3566	case IB_QPS_RTS:
3567		ret = cm_init_qp_rts_attr(cm_id_priv, qp_attr, qp_attr_mask);
3568		break;
3569	default:
3570		ret = -EINVAL;
3571		break;
3572	}
3573	return ret;
3574}
3575EXPORT_SYMBOL(ib_cm_init_qp_attr);
3576
3577static void cm_get_ack_delay(struct cm_device *cm_dev)
3578{
3579	struct ib_device_attr attr;
3580
3581	if (ib_query_device(cm_dev->ib_device, &attr))
3582		cm_dev->ack_delay = 0; /* acks will rely on packet life time */
3583	else
3584		cm_dev->ack_delay = attr.local_ca_ack_delay;
3585}
3586
3587static ssize_t cm_show_counter(struct kobject *obj, struct attribute *attr,
3588			       char *buf)
3589{
3590	struct cm_counter_group *group;
3591	struct cm_counter_attribute *cm_attr;
3592
3593	group = container_of(obj, struct cm_counter_group, obj);
3594	cm_attr = container_of(attr, struct cm_counter_attribute, attr);
3595
3596	return sprintf(buf, "%ld\n",
3597		       atomic_long_read(&group->counter[cm_attr->index]));
3598}
3599
3600static struct sysfs_ops cm_counter_ops = {
3601	.show = cm_show_counter
3602};
3603
3604static struct kobj_type cm_counter_obj_type = {
3605	.sysfs_ops = &cm_counter_ops,
3606	.default_attrs = cm_counter_default_attrs
3607};
3608
3609static void cm_release_port_obj(struct kobject *obj)
3610{
3611	struct cm_port *cm_port;
3612
3613	cm_port = container_of(obj, struct cm_port, port_obj);
3614	kfree(cm_port);
3615}
3616
3617static struct kobj_type cm_port_obj_type = {
3618	.release = cm_release_port_obj
3619};
3620
3621static void cm_release_dev_obj(struct kobject *obj)
3622{
3623	struct cm_device *cm_dev;
3624
3625	cm_dev = container_of(obj, struct cm_device, dev_obj);
3626	kfree(cm_dev);
3627}
3628
3629static struct kobj_type cm_dev_obj_type = {
3630	.release = cm_release_dev_obj
3631};
3632
3633struct class cm_class = {
3634	.name    = "infiniband_cm",
3635};
3636EXPORT_SYMBOL(cm_class);
3637
3638static int cm_create_port_fs(struct cm_port *port)
3639{
3640	int i, ret;
3641
3642	ret = kobject_init_and_add(&port->port_obj, &cm_port_obj_type,
3643				   &port->cm_dev->dev_obj,
3644				   "%d", port->port_num);
3645	if (ret) {
3646		kfree(port);
3647		return ret;
3648	}
3649
3650	for (i = 0; i < CM_COUNTER_GROUPS; i++) {
3651		ret = kobject_init_and_add(&port->counter_group[i].obj,
3652					   &cm_counter_obj_type,
3653					   &port->port_obj,
3654					   "%s", counter_group_names[i]);
3655		if (ret)
3656			goto error;
3657	}
3658
3659	return 0;
3660
3661error:
3662	while (i--)
3663		kobject_put(&port->counter_group[i].obj);
3664	kobject_put(&port->port_obj);
3665	return ret;
3666
3667}
3668
3669static void cm_remove_port_fs(struct cm_port *port)
3670{
3671	int i;
3672
3673	for (i = 0; i < CM_COUNTER_GROUPS; i++)
3674		kobject_put(&port->counter_group[i].obj);
3675
3676	kobject_put(&port->port_obj);
3677}
3678
3679static void cm_add_one(struct ib_device *ib_device)
3680{
3681	struct cm_device *cm_dev;
3682	struct cm_port *port;
3683	struct ib_mad_reg_req reg_req = {
3684		.mgmt_class = IB_MGMT_CLASS_CM,
3685		.mgmt_class_version = IB_CM_CLASS_VERSION
3686	};
3687	struct ib_port_modify port_modify = {
3688		.set_port_cap_mask = IB_PORT_CM_SUP
3689	};
3690	unsigned long flags;
3691	int ret;
3692	u8 i;
3693
3694	if (rdma_node_get_transport(ib_device->node_type) != RDMA_TRANSPORT_IB)
3695		return;
3696
3697	cm_dev = kzalloc(sizeof(*cm_dev) + sizeof(*port) *
3698			 ib_device->phys_port_cnt, GFP_KERNEL);
3699	if (!cm_dev)
3700		return;
3701
3702	cm_dev->ib_device = ib_device;
3703	cm_get_ack_delay(cm_dev);
3704
3705	ret = kobject_init_and_add(&cm_dev->dev_obj, &cm_dev_obj_type,
3706				   &cm_class.subsys.kobj, "%s",
3707				   ib_device->name);
3708	if (ret) {
3709		kfree(cm_dev);
3710		return;
3711	}
3712
3713	set_bit(IB_MGMT_METHOD_SEND, reg_req.method_mask);
3714	for (i = 1; i <= ib_device->phys_port_cnt; i++) {
3715		port = kzalloc(sizeof *port, GFP_KERNEL);
3716		if (!port)
3717			goto error1;
3718
3719		cm_dev->port[i-1] = port;
3720		port->cm_dev = cm_dev;
3721		port->port_num = i;
3722
3723		ret = cm_create_port_fs(port);
3724		if (ret)
3725			goto error1;
3726
3727		port->mad_agent = ib_register_mad_agent(ib_device, i,
3728							IB_QPT_GSI,
3729							&reg_req,
3730							0,
3731							cm_send_handler,
3732							cm_recv_handler,
3733							port);
3734		if (IS_ERR(port->mad_agent))
3735			goto error2;
3736
3737		ret = ib_modify_port(ib_device, i, 0, &port_modify);
3738		if (ret)
3739			goto error3;
3740	}
3741	ib_set_client_data(ib_device, &cm_client, cm_dev);
3742
3743	write_lock_irqsave(&cm.device_lock, flags);
3744	list_add_tail(&cm_dev->list, &cm.device_list);
3745	write_unlock_irqrestore(&cm.device_lock, flags);
3746	return;
3747
3748error3:
3749	ib_unregister_mad_agent(port->mad_agent);
3750error2:
3751	cm_remove_port_fs(port);
3752error1:
3753	port_modify.set_port_cap_mask = 0;
3754	port_modify.clr_port_cap_mask = IB_PORT_CM_SUP;
3755	while (--i) {
3756		port = cm_dev->port[i-1];
3757		ib_modify_port(ib_device, port->port_num, 0, &port_modify);
3758		ib_unregister_mad_agent(port->mad_agent);
3759		cm_remove_port_fs(port);
3760	}
3761	kobject_put(&cm_dev->dev_obj);
3762}
3763
3764static void cm_remove_one(struct ib_device *ib_device)
3765{
3766	struct cm_device *cm_dev;
3767	struct cm_port *port;
3768	struct ib_port_modify port_modify = {
3769		.clr_port_cap_mask = IB_PORT_CM_SUP
3770	};
3771	unsigned long flags;
3772	int i;
3773
3774	cm_dev = ib_get_client_data(ib_device, &cm_client);
3775	if (!cm_dev)
3776		return;
3777
3778	write_lock_irqsave(&cm.device_lock, flags);
3779	list_del(&cm_dev->list);
3780	write_unlock_irqrestore(&cm.device_lock, flags);
3781
3782	for (i = 1; i <= ib_device->phys_port_cnt; i++) {
3783		port = cm_dev->port[i-1];
3784		ib_modify_port(ib_device, port->port_num, 0, &port_modify);
3785		ib_unregister_mad_agent(port->mad_agent);
3786		flush_workqueue(cm.wq);
3787		cm_remove_port_fs(port);
3788	}
3789	kobject_put(&cm_dev->dev_obj);
3790}
3791
3792static int __init ib_cm_init(void)
3793{
3794	int ret;
3795
3796	memset(&cm, 0, sizeof cm);
3797	INIT_LIST_HEAD(&cm.device_list);
3798	rwlock_init(&cm.device_lock);
3799	spin_lock_init(&cm.lock);
3800	cm.listen_service_table = RB_ROOT;
3801	cm.listen_service_id = __constant_be64_to_cpu(IB_CM_ASSIGN_SERVICE_ID);
3802	cm.remote_id_table = RB_ROOT;
3803	cm.remote_qp_table = RB_ROOT;
3804	cm.remote_sidr_table = RB_ROOT;
3805	idr_init(&cm.local_id_table);
3806	get_random_bytes(&cm.random_id_operand, sizeof cm.random_id_operand);
3807	idr_pre_get(&cm.local_id_table, GFP_KERNEL);
3808	INIT_LIST_HEAD(&cm.timewait_list);
3809
3810	ret = class_register(&cm_class);
3811	if (ret)
3812		return -ENOMEM;
3813
3814	cm.wq = create_workqueue("ib_cm");
3815	if (!cm.wq) {
3816		ret = -ENOMEM;
3817		goto error1;
3818	}
3819
3820	ret = ib_register_client(&cm_client);
3821	if (ret)
3822		goto error2;
3823
3824	return 0;
3825error2:
3826	destroy_workqueue(cm.wq);
3827error1:
3828	class_unregister(&cm_class);
3829	return ret;
3830}
3831
3832static void __exit ib_cm_cleanup(void)
3833{
3834	struct cm_timewait_info *timewait_info, *tmp;
3835
3836	spin_lock_irq(&cm.lock);
3837	list_for_each_entry(timewait_info, &cm.timewait_list, list)
3838		cancel_delayed_work(&timewait_info->work.work);
3839	spin_unlock_irq(&cm.lock);
3840
3841	ib_unregister_client(&cm_client);
3842	destroy_workqueue(cm.wq);
3843
3844	list_for_each_entry_safe(timewait_info, tmp, &cm.timewait_list, list) {
3845		list_del(&timewait_info->list);
3846		kfree(timewait_info);
3847	}
3848
3849	class_unregister(&cm_class);
3850	idr_destroy(&cm.local_id_table);
3851}
3852
3853module_init(ib_cm_init);
3854module_exit(ib_cm_cleanup);
3855
3856