cm.c revision c4028958b6ecad064b1a6303a6a5906d4fe48d73
1/*
2 * Copyright (c) 2004-2006 Intel Corporation.  All rights reserved.
3 * Copyright (c) 2004 Topspin Corporation.  All rights reserved.
4 * Copyright (c) 2004, 2005 Voltaire Corporation.  All rights reserved.
5 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
6 *
7 * This software is available to you under a choice of one of two
8 * licenses.  You may choose to be licensed under the terms of the GNU
9 * General Public License (GPL) Version 2, available from the file
10 * COPYING in the main directory of this source tree, or the
11 * OpenIB.org BSD license below:
12 *
13 *     Redistribution and use in source and binary forms, with or
14 *     without modification, are permitted provided that the following
15 *     conditions are met:
16 *
17 *      - Redistributions of source code must retain the above
18 *        copyright notice, this list of conditions and the following
19 *        disclaimer.
20 *
21 *      - Redistributions in binary form must reproduce the above
22 *        copyright notice, this list of conditions and the following
23 *        disclaimer in the documentation and/or other materials
24 *        provided with the distribution.
25 *
26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 * SOFTWARE.
34 *
35 * $Id: cm.c 4311 2005-12-05 18:42:01Z sean.hefty $
36 */
37
38#include <linux/completion.h>
39#include <linux/dma-mapping.h>
40#include <linux/err.h>
41#include <linux/idr.h>
42#include <linux/interrupt.h>
43#include <linux/pci.h>
44#include <linux/random.h>
45#include <linux/rbtree.h>
46#include <linux/spinlock.h>
47#include <linux/workqueue.h>
48
49#include <rdma/ib_cache.h>
50#include <rdma/ib_cm.h>
51#include "cm_msgs.h"
52
53MODULE_AUTHOR("Sean Hefty");
54MODULE_DESCRIPTION("InfiniBand CM");
55MODULE_LICENSE("Dual BSD/GPL");
56
57static void cm_add_one(struct ib_device *device);
58static void cm_remove_one(struct ib_device *device);
59
60static struct ib_client cm_client = {
61	.name   = "cm",
62	.add    = cm_add_one,
63	.remove = cm_remove_one
64};
65
66static struct ib_cm {
67	spinlock_t lock;
68	struct list_head device_list;
69	rwlock_t device_lock;
70	struct rb_root listen_service_table;
71	u64 listen_service_id;
72	/* struct rb_root peer_service_table; todo: fix peer to peer */
73	struct rb_root remote_qp_table;
74	struct rb_root remote_id_table;
75	struct rb_root remote_sidr_table;
76	struct idr local_id_table;
77	__be32 random_id_operand;
78	struct list_head timewait_list;
79	struct workqueue_struct *wq;
80} cm;
81
82struct cm_port {
83	struct cm_device *cm_dev;
84	struct ib_mad_agent *mad_agent;
85	u8 port_num;
86};
87
88struct cm_device {
89	struct list_head list;
90	struct ib_device *device;
91	__be64 ca_guid;
92	struct cm_port port[0];
93};
94
95struct cm_av {
96	struct cm_port *port;
97	union ib_gid dgid;
98	struct ib_ah_attr ah_attr;
99	u16 pkey_index;
100	u8 packet_life_time;
101};
102
103struct cm_work {
104	struct delayed_work work;
105	struct list_head list;
106	struct cm_port *port;
107	struct ib_mad_recv_wc *mad_recv_wc;	/* Received MADs */
108	__be32 local_id;			/* Established / timewait */
109	__be32 remote_id;
110	struct ib_cm_event cm_event;
111	struct ib_sa_path_rec path[0];
112};
113
114struct cm_timewait_info {
115	struct cm_work work;			/* Must be first. */
116	struct list_head list;
117	struct rb_node remote_qp_node;
118	struct rb_node remote_id_node;
119	__be64 remote_ca_guid;
120	__be32 remote_qpn;
121	u8 inserted_remote_qp;
122	u8 inserted_remote_id;
123};
124
125struct cm_id_private {
126	struct ib_cm_id	id;
127
128	struct rb_node service_node;
129	struct rb_node sidr_id_node;
130	spinlock_t lock;	/* Do not acquire inside cm.lock */
131	struct completion comp;
132	atomic_t refcount;
133
134	struct ib_mad_send_buf *msg;
135	struct cm_timewait_info *timewait_info;
136	/* todo: use alternate port on send failure */
137	struct cm_av av;
138	struct cm_av alt_av;
139	struct ib_cm_compare_data *compare_data;
140
141	void *private_data;
142	__be64 tid;
143	__be32 local_qpn;
144	__be32 remote_qpn;
145	enum ib_qp_type qp_type;
146	__be32 sq_psn;
147	__be32 rq_psn;
148	int timeout_ms;
149	enum ib_mtu path_mtu;
150	u8 private_data_len;
151	u8 max_cm_retries;
152	u8 peer_to_peer;
153	u8 responder_resources;
154	u8 initiator_depth;
155	u8 local_ack_timeout;
156	u8 retry_count;
157	u8 rnr_retry_count;
158	u8 service_timeout;
159
160	struct list_head work_list;
161	atomic_t work_count;
162};
163
164static void cm_work_handler(struct work_struct *work);
165
166static inline void cm_deref_id(struct cm_id_private *cm_id_priv)
167{
168	if (atomic_dec_and_test(&cm_id_priv->refcount))
169		complete(&cm_id_priv->comp);
170}
171
172static int cm_alloc_msg(struct cm_id_private *cm_id_priv,
173			struct ib_mad_send_buf **msg)
174{
175	struct ib_mad_agent *mad_agent;
176	struct ib_mad_send_buf *m;
177	struct ib_ah *ah;
178
179	mad_agent = cm_id_priv->av.port->mad_agent;
180	ah = ib_create_ah(mad_agent->qp->pd, &cm_id_priv->av.ah_attr);
181	if (IS_ERR(ah))
182		return PTR_ERR(ah);
183
184	m = ib_create_send_mad(mad_agent, cm_id_priv->id.remote_cm_qpn,
185			       cm_id_priv->av.pkey_index,
186			       0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
187			       GFP_ATOMIC);
188	if (IS_ERR(m)) {
189		ib_destroy_ah(ah);
190		return PTR_ERR(m);
191	}
192
193	/* Timeout set by caller if response is expected. */
194	m->ah = ah;
195	m->retries = cm_id_priv->max_cm_retries;
196
197	atomic_inc(&cm_id_priv->refcount);
198	m->context[0] = cm_id_priv;
199	*msg = m;
200	return 0;
201}
202
203static int cm_alloc_response_msg(struct cm_port *port,
204				 struct ib_mad_recv_wc *mad_recv_wc,
205				 struct ib_mad_send_buf **msg)
206{
207	struct ib_mad_send_buf *m;
208	struct ib_ah *ah;
209
210	ah = ib_create_ah_from_wc(port->mad_agent->qp->pd, mad_recv_wc->wc,
211				  mad_recv_wc->recv_buf.grh, port->port_num);
212	if (IS_ERR(ah))
213		return PTR_ERR(ah);
214
215	m = ib_create_send_mad(port->mad_agent, 1, mad_recv_wc->wc->pkey_index,
216			       0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
217			       GFP_ATOMIC);
218	if (IS_ERR(m)) {
219		ib_destroy_ah(ah);
220		return PTR_ERR(m);
221	}
222	m->ah = ah;
223	*msg = m;
224	return 0;
225}
226
227static void cm_free_msg(struct ib_mad_send_buf *msg)
228{
229	ib_destroy_ah(msg->ah);
230	if (msg->context[0])
231		cm_deref_id(msg->context[0]);
232	ib_free_send_mad(msg);
233}
234
235static void * cm_copy_private_data(const void *private_data,
236				   u8 private_data_len)
237{
238	void *data;
239
240	if (!private_data || !private_data_len)
241		return NULL;
242
243	data = kmalloc(private_data_len, GFP_KERNEL);
244	if (!data)
245		return ERR_PTR(-ENOMEM);
246
247	memcpy(data, private_data, private_data_len);
248	return data;
249}
250
251static void cm_set_private_data(struct cm_id_private *cm_id_priv,
252				 void *private_data, u8 private_data_len)
253{
254	if (cm_id_priv->private_data && cm_id_priv->private_data_len)
255		kfree(cm_id_priv->private_data);
256
257	cm_id_priv->private_data = private_data;
258	cm_id_priv->private_data_len = private_data_len;
259}
260
261static void cm_init_av_for_response(struct cm_port *port, struct ib_wc *wc,
262				    struct ib_grh *grh, struct cm_av *av)
263{
264	av->port = port;
265	av->pkey_index = wc->pkey_index;
266	ib_init_ah_from_wc(port->cm_dev->device, port->port_num, wc,
267			   grh, &av->ah_attr);
268}
269
270static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av)
271{
272	struct cm_device *cm_dev;
273	struct cm_port *port = NULL;
274	unsigned long flags;
275	int ret;
276	u8 p;
277
278	read_lock_irqsave(&cm.device_lock, flags);
279	list_for_each_entry(cm_dev, &cm.device_list, list) {
280		if (!ib_find_cached_gid(cm_dev->device, &path->sgid,
281					&p, NULL)) {
282			port = &cm_dev->port[p-1];
283			break;
284		}
285	}
286	read_unlock_irqrestore(&cm.device_lock, flags);
287
288	if (!port)
289		return -EINVAL;
290
291	ret = ib_find_cached_pkey(cm_dev->device, port->port_num,
292				  be16_to_cpu(path->pkey), &av->pkey_index);
293	if (ret)
294		return ret;
295
296	av->port = port;
297	ib_init_ah_from_path(cm_dev->device, port->port_num, path,
298			     &av->ah_attr);
299	av->packet_life_time = path->packet_life_time;
300	return 0;
301}
302
303static int cm_alloc_id(struct cm_id_private *cm_id_priv)
304{
305	unsigned long flags;
306	int ret, id;
307	static int next_id;
308
309	do {
310		spin_lock_irqsave(&cm.lock, flags);
311		ret = idr_get_new_above(&cm.local_id_table, cm_id_priv,
312					next_id++, &id);
313		spin_unlock_irqrestore(&cm.lock, flags);
314	} while( (ret == -EAGAIN) && idr_pre_get(&cm.local_id_table, GFP_KERNEL) );
315
316	cm_id_priv->id.local_id = (__force __be32) (id ^ cm.random_id_operand);
317	return ret;
318}
319
320static void cm_free_id(__be32 local_id)
321{
322	unsigned long flags;
323
324	spin_lock_irqsave(&cm.lock, flags);
325	idr_remove(&cm.local_id_table,
326		   (__force int) (local_id ^ cm.random_id_operand));
327	spin_unlock_irqrestore(&cm.lock, flags);
328}
329
330static struct cm_id_private * cm_get_id(__be32 local_id, __be32 remote_id)
331{
332	struct cm_id_private *cm_id_priv;
333
334	cm_id_priv = idr_find(&cm.local_id_table,
335			      (__force int) (local_id ^ cm.random_id_operand));
336	if (cm_id_priv) {
337		if (cm_id_priv->id.remote_id == remote_id)
338			atomic_inc(&cm_id_priv->refcount);
339		else
340			cm_id_priv = NULL;
341	}
342
343	return cm_id_priv;
344}
345
346static struct cm_id_private * cm_acquire_id(__be32 local_id, __be32 remote_id)
347{
348	struct cm_id_private *cm_id_priv;
349	unsigned long flags;
350
351	spin_lock_irqsave(&cm.lock, flags);
352	cm_id_priv = cm_get_id(local_id, remote_id);
353	spin_unlock_irqrestore(&cm.lock, flags);
354
355	return cm_id_priv;
356}
357
358static void cm_mask_copy(u8 *dst, u8 *src, u8 *mask)
359{
360	int i;
361
362	for (i = 0; i < IB_CM_COMPARE_SIZE / sizeof(unsigned long); i++)
363		((unsigned long *) dst)[i] = ((unsigned long *) src)[i] &
364					     ((unsigned long *) mask)[i];
365}
366
367static int cm_compare_data(struct ib_cm_compare_data *src_data,
368			   struct ib_cm_compare_data *dst_data)
369{
370	u8 src[IB_CM_COMPARE_SIZE];
371	u8 dst[IB_CM_COMPARE_SIZE];
372
373	if (!src_data || !dst_data)
374		return 0;
375
376	cm_mask_copy(src, src_data->data, dst_data->mask);
377	cm_mask_copy(dst, dst_data->data, src_data->mask);
378	return memcmp(src, dst, IB_CM_COMPARE_SIZE);
379}
380
381static int cm_compare_private_data(u8 *private_data,
382				   struct ib_cm_compare_data *dst_data)
383{
384	u8 src[IB_CM_COMPARE_SIZE];
385
386	if (!dst_data)
387		return 0;
388
389	cm_mask_copy(src, private_data, dst_data->mask);
390	return memcmp(src, dst_data->data, IB_CM_COMPARE_SIZE);
391}
392
393static struct cm_id_private * cm_insert_listen(struct cm_id_private *cm_id_priv)
394{
395	struct rb_node **link = &cm.listen_service_table.rb_node;
396	struct rb_node *parent = NULL;
397	struct cm_id_private *cur_cm_id_priv;
398	__be64 service_id = cm_id_priv->id.service_id;
399	__be64 service_mask = cm_id_priv->id.service_mask;
400	int data_cmp;
401
402	while (*link) {
403		parent = *link;
404		cur_cm_id_priv = rb_entry(parent, struct cm_id_private,
405					  service_node);
406		data_cmp = cm_compare_data(cm_id_priv->compare_data,
407					   cur_cm_id_priv->compare_data);
408		if ((cur_cm_id_priv->id.service_mask & service_id) ==
409		    (service_mask & cur_cm_id_priv->id.service_id) &&
410		    (cm_id_priv->id.device == cur_cm_id_priv->id.device) &&
411		    !data_cmp)
412			return cur_cm_id_priv;
413
414		if (cm_id_priv->id.device < cur_cm_id_priv->id.device)
415			link = &(*link)->rb_left;
416		else if (cm_id_priv->id.device > cur_cm_id_priv->id.device)
417			link = &(*link)->rb_right;
418		else if (service_id < cur_cm_id_priv->id.service_id)
419			link = &(*link)->rb_left;
420		else if (service_id > cur_cm_id_priv->id.service_id)
421			link = &(*link)->rb_right;
422		else if (data_cmp < 0)
423			link = &(*link)->rb_left;
424		else
425			link = &(*link)->rb_right;
426	}
427	rb_link_node(&cm_id_priv->service_node, parent, link);
428	rb_insert_color(&cm_id_priv->service_node, &cm.listen_service_table);
429	return NULL;
430}
431
432static struct cm_id_private * cm_find_listen(struct ib_device *device,
433					     __be64 service_id,
434					     u8 *private_data)
435{
436	struct rb_node *node = cm.listen_service_table.rb_node;
437	struct cm_id_private *cm_id_priv;
438	int data_cmp;
439
440	while (node) {
441		cm_id_priv = rb_entry(node, struct cm_id_private, service_node);
442		data_cmp = cm_compare_private_data(private_data,
443						   cm_id_priv->compare_data);
444		if ((cm_id_priv->id.service_mask & service_id) ==
445		     cm_id_priv->id.service_id &&
446		    (cm_id_priv->id.device == device) && !data_cmp)
447			return cm_id_priv;
448
449		if (device < cm_id_priv->id.device)
450			node = node->rb_left;
451		else if (device > cm_id_priv->id.device)
452			node = node->rb_right;
453		else if (service_id < cm_id_priv->id.service_id)
454			node = node->rb_left;
455		else if (service_id > cm_id_priv->id.service_id)
456			node = node->rb_right;
457		else if (data_cmp < 0)
458			node = node->rb_left;
459		else
460			node = node->rb_right;
461	}
462	return NULL;
463}
464
465static struct cm_timewait_info * cm_insert_remote_id(struct cm_timewait_info
466						     *timewait_info)
467{
468	struct rb_node **link = &cm.remote_id_table.rb_node;
469	struct rb_node *parent = NULL;
470	struct cm_timewait_info *cur_timewait_info;
471	__be64 remote_ca_guid = timewait_info->remote_ca_guid;
472	__be32 remote_id = timewait_info->work.remote_id;
473
474	while (*link) {
475		parent = *link;
476		cur_timewait_info = rb_entry(parent, struct cm_timewait_info,
477					     remote_id_node);
478		if (remote_id < cur_timewait_info->work.remote_id)
479			link = &(*link)->rb_left;
480		else if (remote_id > cur_timewait_info->work.remote_id)
481			link = &(*link)->rb_right;
482		else if (remote_ca_guid < cur_timewait_info->remote_ca_guid)
483			link = &(*link)->rb_left;
484		else if (remote_ca_guid > cur_timewait_info->remote_ca_guid)
485			link = &(*link)->rb_right;
486		else
487			return cur_timewait_info;
488	}
489	timewait_info->inserted_remote_id = 1;
490	rb_link_node(&timewait_info->remote_id_node, parent, link);
491	rb_insert_color(&timewait_info->remote_id_node, &cm.remote_id_table);
492	return NULL;
493}
494
495static struct cm_timewait_info * cm_find_remote_id(__be64 remote_ca_guid,
496						   __be32 remote_id)
497{
498	struct rb_node *node = cm.remote_id_table.rb_node;
499	struct cm_timewait_info *timewait_info;
500
501	while (node) {
502		timewait_info = rb_entry(node, struct cm_timewait_info,
503					 remote_id_node);
504		if (remote_id < timewait_info->work.remote_id)
505			node = node->rb_left;
506		else if (remote_id > timewait_info->work.remote_id)
507			node = node->rb_right;
508		else if (remote_ca_guid < timewait_info->remote_ca_guid)
509			node = node->rb_left;
510		else if (remote_ca_guid > timewait_info->remote_ca_guid)
511			node = node->rb_right;
512		else
513			return timewait_info;
514	}
515	return NULL;
516}
517
518static struct cm_timewait_info * cm_insert_remote_qpn(struct cm_timewait_info
519						      *timewait_info)
520{
521	struct rb_node **link = &cm.remote_qp_table.rb_node;
522	struct rb_node *parent = NULL;
523	struct cm_timewait_info *cur_timewait_info;
524	__be64 remote_ca_guid = timewait_info->remote_ca_guid;
525	__be32 remote_qpn = timewait_info->remote_qpn;
526
527	while (*link) {
528		parent = *link;
529		cur_timewait_info = rb_entry(parent, struct cm_timewait_info,
530					     remote_qp_node);
531		if (remote_qpn < cur_timewait_info->remote_qpn)
532			link = &(*link)->rb_left;
533		else if (remote_qpn > cur_timewait_info->remote_qpn)
534			link = &(*link)->rb_right;
535		else if (remote_ca_guid < cur_timewait_info->remote_ca_guid)
536			link = &(*link)->rb_left;
537		else if (remote_ca_guid > cur_timewait_info->remote_ca_guid)
538			link = &(*link)->rb_right;
539		else
540			return cur_timewait_info;
541	}
542	timewait_info->inserted_remote_qp = 1;
543	rb_link_node(&timewait_info->remote_qp_node, parent, link);
544	rb_insert_color(&timewait_info->remote_qp_node, &cm.remote_qp_table);
545	return NULL;
546}
547
548static struct cm_id_private * cm_insert_remote_sidr(struct cm_id_private
549						    *cm_id_priv)
550{
551	struct rb_node **link = &cm.remote_sidr_table.rb_node;
552	struct rb_node *parent = NULL;
553	struct cm_id_private *cur_cm_id_priv;
554	union ib_gid *port_gid = &cm_id_priv->av.dgid;
555	__be32 remote_id = cm_id_priv->id.remote_id;
556
557	while (*link) {
558		parent = *link;
559		cur_cm_id_priv = rb_entry(parent, struct cm_id_private,
560					  sidr_id_node);
561		if (remote_id < cur_cm_id_priv->id.remote_id)
562			link = &(*link)->rb_left;
563		else if (remote_id > cur_cm_id_priv->id.remote_id)
564			link = &(*link)->rb_right;
565		else {
566			int cmp;
567			cmp = memcmp(port_gid, &cur_cm_id_priv->av.dgid,
568				     sizeof *port_gid);
569			if (cmp < 0)
570				link = &(*link)->rb_left;
571			else if (cmp > 0)
572				link = &(*link)->rb_right;
573			else
574				return cur_cm_id_priv;
575		}
576	}
577	rb_link_node(&cm_id_priv->sidr_id_node, parent, link);
578	rb_insert_color(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table);
579	return NULL;
580}
581
582static void cm_reject_sidr_req(struct cm_id_private *cm_id_priv,
583			       enum ib_cm_sidr_status status)
584{
585	struct ib_cm_sidr_rep_param param;
586
587	memset(&param, 0, sizeof param);
588	param.status = status;
589	ib_send_cm_sidr_rep(&cm_id_priv->id, &param);
590}
591
592struct ib_cm_id *ib_create_cm_id(struct ib_device *device,
593				 ib_cm_handler cm_handler,
594				 void *context)
595{
596	struct cm_id_private *cm_id_priv;
597	int ret;
598
599	cm_id_priv = kzalloc(sizeof *cm_id_priv, GFP_KERNEL);
600	if (!cm_id_priv)
601		return ERR_PTR(-ENOMEM);
602
603	cm_id_priv->id.state = IB_CM_IDLE;
604	cm_id_priv->id.device = device;
605	cm_id_priv->id.cm_handler = cm_handler;
606	cm_id_priv->id.context = context;
607	cm_id_priv->id.remote_cm_qpn = 1;
608	ret = cm_alloc_id(cm_id_priv);
609	if (ret)
610		goto error;
611
612	spin_lock_init(&cm_id_priv->lock);
613	init_completion(&cm_id_priv->comp);
614	INIT_LIST_HEAD(&cm_id_priv->work_list);
615	atomic_set(&cm_id_priv->work_count, -1);
616	atomic_set(&cm_id_priv->refcount, 1);
617	return &cm_id_priv->id;
618
619error:
620	kfree(cm_id_priv);
621	return ERR_PTR(-ENOMEM);
622}
623EXPORT_SYMBOL(ib_create_cm_id);
624
625static struct cm_work * cm_dequeue_work(struct cm_id_private *cm_id_priv)
626{
627	struct cm_work *work;
628
629	if (list_empty(&cm_id_priv->work_list))
630		return NULL;
631
632	work = list_entry(cm_id_priv->work_list.next, struct cm_work, list);
633	list_del(&work->list);
634	return work;
635}
636
637static void cm_free_work(struct cm_work *work)
638{
639	if (work->mad_recv_wc)
640		ib_free_recv_mad(work->mad_recv_wc);
641	kfree(work);
642}
643
644static inline int cm_convert_to_ms(int iba_time)
645{
646	/* approximate conversion to ms from 4.096us x 2^iba_time */
647	return 1 << max(iba_time - 8, 0);
648}
649
650static void cm_cleanup_timewait(struct cm_timewait_info *timewait_info)
651{
652	if (timewait_info->inserted_remote_id) {
653		rb_erase(&timewait_info->remote_id_node, &cm.remote_id_table);
654		timewait_info->inserted_remote_id = 0;
655	}
656
657	if (timewait_info->inserted_remote_qp) {
658		rb_erase(&timewait_info->remote_qp_node, &cm.remote_qp_table);
659		timewait_info->inserted_remote_qp = 0;
660	}
661}
662
663static struct cm_timewait_info * cm_create_timewait_info(__be32 local_id)
664{
665	struct cm_timewait_info *timewait_info;
666
667	timewait_info = kzalloc(sizeof *timewait_info, GFP_KERNEL);
668	if (!timewait_info)
669		return ERR_PTR(-ENOMEM);
670
671	timewait_info->work.local_id = local_id;
672	INIT_DELAYED_WORK(&timewait_info->work.work, cm_work_handler);
673	timewait_info->work.cm_event.event = IB_CM_TIMEWAIT_EXIT;
674	return timewait_info;
675}
676
677static void cm_enter_timewait(struct cm_id_private *cm_id_priv)
678{
679	int wait_time;
680	unsigned long flags;
681
682	spin_lock_irqsave(&cm.lock, flags);
683	cm_cleanup_timewait(cm_id_priv->timewait_info);
684	list_add_tail(&cm_id_priv->timewait_info->list, &cm.timewait_list);
685	spin_unlock_irqrestore(&cm.lock, flags);
686
687	/*
688	 * The cm_id could be destroyed by the user before we exit timewait.
689	 * To protect against this, we search for the cm_id after exiting
690	 * timewait before notifying the user that we've exited timewait.
691	 */
692	cm_id_priv->id.state = IB_CM_TIMEWAIT;
693	wait_time = cm_convert_to_ms(cm_id_priv->local_ack_timeout);
694	queue_delayed_work(cm.wq, &cm_id_priv->timewait_info->work.work,
695			   msecs_to_jiffies(wait_time));
696	cm_id_priv->timewait_info = NULL;
697}
698
699static void cm_reset_to_idle(struct cm_id_private *cm_id_priv)
700{
701	unsigned long flags;
702
703	cm_id_priv->id.state = IB_CM_IDLE;
704	if (cm_id_priv->timewait_info) {
705		spin_lock_irqsave(&cm.lock, flags);
706		cm_cleanup_timewait(cm_id_priv->timewait_info);
707		spin_unlock_irqrestore(&cm.lock, flags);
708		kfree(cm_id_priv->timewait_info);
709		cm_id_priv->timewait_info = NULL;
710	}
711}
712
713static void cm_destroy_id(struct ib_cm_id *cm_id, int err)
714{
715	struct cm_id_private *cm_id_priv;
716	struct cm_work *work;
717	unsigned long flags;
718
719	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
720retest:
721	spin_lock_irqsave(&cm_id_priv->lock, flags);
722	switch (cm_id->state) {
723	case IB_CM_LISTEN:
724		cm_id->state = IB_CM_IDLE;
725		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
726		spin_lock_irqsave(&cm.lock, flags);
727		rb_erase(&cm_id_priv->service_node, &cm.listen_service_table);
728		spin_unlock_irqrestore(&cm.lock, flags);
729		break;
730	case IB_CM_SIDR_REQ_SENT:
731		cm_id->state = IB_CM_IDLE;
732		ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
733		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
734		break;
735	case IB_CM_SIDR_REQ_RCVD:
736		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
737		cm_reject_sidr_req(cm_id_priv, IB_SIDR_REJECT);
738		break;
739	case IB_CM_REQ_SENT:
740		ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
741		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
742		ib_send_cm_rej(cm_id, IB_CM_REJ_TIMEOUT,
743			       &cm_id_priv->av.port->cm_dev->ca_guid,
744			       sizeof cm_id_priv->av.port->cm_dev->ca_guid,
745			       NULL, 0);
746		break;
747	case IB_CM_REQ_RCVD:
748		if (err == -ENOMEM) {
749			/* Do not reject to allow future retries. */
750			cm_reset_to_idle(cm_id_priv);
751			spin_unlock_irqrestore(&cm_id_priv->lock, flags);
752		} else {
753			spin_unlock_irqrestore(&cm_id_priv->lock, flags);
754			ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED,
755				       NULL, 0, NULL, 0);
756		}
757		break;
758	case IB_CM_MRA_REQ_RCVD:
759	case IB_CM_REP_SENT:
760	case IB_CM_MRA_REP_RCVD:
761		ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
762		/* Fall through */
763	case IB_CM_MRA_REQ_SENT:
764	case IB_CM_REP_RCVD:
765	case IB_CM_MRA_REP_SENT:
766		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
767		ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED,
768			       NULL, 0, NULL, 0);
769		break;
770	case IB_CM_ESTABLISHED:
771		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
772		ib_send_cm_dreq(cm_id, NULL, 0);
773		goto retest;
774	case IB_CM_DREQ_SENT:
775		ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
776		cm_enter_timewait(cm_id_priv);
777		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
778		break;
779	case IB_CM_DREQ_RCVD:
780		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
781		ib_send_cm_drep(cm_id, NULL, 0);
782		break;
783	default:
784		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
785		break;
786	}
787
788	cm_free_id(cm_id->local_id);
789	cm_deref_id(cm_id_priv);
790	wait_for_completion(&cm_id_priv->comp);
791	while ((work = cm_dequeue_work(cm_id_priv)) != NULL)
792		cm_free_work(work);
793	kfree(cm_id_priv->compare_data);
794	kfree(cm_id_priv->private_data);
795	kfree(cm_id_priv);
796}
797
798void ib_destroy_cm_id(struct ib_cm_id *cm_id)
799{
800	cm_destroy_id(cm_id, 0);
801}
802EXPORT_SYMBOL(ib_destroy_cm_id);
803
804int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id, __be64 service_mask,
805		 struct ib_cm_compare_data *compare_data)
806{
807	struct cm_id_private *cm_id_priv, *cur_cm_id_priv;
808	unsigned long flags;
809	int ret = 0;
810
811	service_mask = service_mask ? service_mask :
812		       __constant_cpu_to_be64(~0ULL);
813	service_id &= service_mask;
814	if ((service_id & IB_SERVICE_ID_AGN_MASK) == IB_CM_ASSIGN_SERVICE_ID &&
815	    (service_id != IB_CM_ASSIGN_SERVICE_ID))
816		return -EINVAL;
817
818	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
819	if (cm_id->state != IB_CM_IDLE)
820		return -EINVAL;
821
822	if (compare_data) {
823		cm_id_priv->compare_data = kzalloc(sizeof *compare_data,
824						   GFP_KERNEL);
825		if (!cm_id_priv->compare_data)
826			return -ENOMEM;
827		cm_mask_copy(cm_id_priv->compare_data->data,
828			     compare_data->data, compare_data->mask);
829		memcpy(cm_id_priv->compare_data->mask, compare_data->mask,
830		       IB_CM_COMPARE_SIZE);
831	}
832
833	cm_id->state = IB_CM_LISTEN;
834
835	spin_lock_irqsave(&cm.lock, flags);
836	if (service_id == IB_CM_ASSIGN_SERVICE_ID) {
837		cm_id->service_id = cpu_to_be64(cm.listen_service_id++);
838		cm_id->service_mask = __constant_cpu_to_be64(~0ULL);
839	} else {
840		cm_id->service_id = service_id;
841		cm_id->service_mask = service_mask;
842	}
843	cur_cm_id_priv = cm_insert_listen(cm_id_priv);
844	spin_unlock_irqrestore(&cm.lock, flags);
845
846	if (cur_cm_id_priv) {
847		cm_id->state = IB_CM_IDLE;
848		kfree(cm_id_priv->compare_data);
849		cm_id_priv->compare_data = NULL;
850		ret = -EBUSY;
851	}
852	return ret;
853}
854EXPORT_SYMBOL(ib_cm_listen);
855
856static __be64 cm_form_tid(struct cm_id_private *cm_id_priv,
857			  enum cm_msg_sequence msg_seq)
858{
859	u64 hi_tid, low_tid;
860
861	hi_tid   = ((u64) cm_id_priv->av.port->mad_agent->hi_tid) << 32;
862	low_tid  = (u64) ((__force u32)cm_id_priv->id.local_id |
863			  (msg_seq << 30));
864	return cpu_to_be64(hi_tid | low_tid);
865}
866
867static void cm_format_mad_hdr(struct ib_mad_hdr *hdr,
868			      __be16 attr_id, __be64 tid)
869{
870	hdr->base_version  = IB_MGMT_BASE_VERSION;
871	hdr->mgmt_class	   = IB_MGMT_CLASS_CM;
872	hdr->class_version = IB_CM_CLASS_VERSION;
873	hdr->method	   = IB_MGMT_METHOD_SEND;
874	hdr->attr_id	   = attr_id;
875	hdr->tid	   = tid;
876}
877
878static void cm_format_req(struct cm_req_msg *req_msg,
879			  struct cm_id_private *cm_id_priv,
880			  struct ib_cm_req_param *param)
881{
882	cm_format_mad_hdr(&req_msg->hdr, CM_REQ_ATTR_ID,
883			  cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_REQ));
884
885	req_msg->local_comm_id = cm_id_priv->id.local_id;
886	req_msg->service_id = param->service_id;
887	req_msg->local_ca_guid = cm_id_priv->av.port->cm_dev->ca_guid;
888	cm_req_set_local_qpn(req_msg, cpu_to_be32(param->qp_num));
889	cm_req_set_resp_res(req_msg, param->responder_resources);
890	cm_req_set_init_depth(req_msg, param->initiator_depth);
891	cm_req_set_remote_resp_timeout(req_msg,
892				       param->remote_cm_response_timeout);
893	cm_req_set_qp_type(req_msg, param->qp_type);
894	cm_req_set_flow_ctrl(req_msg, param->flow_control);
895	cm_req_set_starting_psn(req_msg, cpu_to_be32(param->starting_psn));
896	cm_req_set_local_resp_timeout(req_msg,
897				      param->local_cm_response_timeout);
898	cm_req_set_retry_count(req_msg, param->retry_count);
899	req_msg->pkey = param->primary_path->pkey;
900	cm_req_set_path_mtu(req_msg, param->primary_path->mtu);
901	cm_req_set_rnr_retry_count(req_msg, param->rnr_retry_count);
902	cm_req_set_max_cm_retries(req_msg, param->max_cm_retries);
903	cm_req_set_srq(req_msg, param->srq);
904
905	req_msg->primary_local_lid = param->primary_path->slid;
906	req_msg->primary_remote_lid = param->primary_path->dlid;
907	req_msg->primary_local_gid = param->primary_path->sgid;
908	req_msg->primary_remote_gid = param->primary_path->dgid;
909	cm_req_set_primary_flow_label(req_msg, param->primary_path->flow_label);
910	cm_req_set_primary_packet_rate(req_msg, param->primary_path->rate);
911	req_msg->primary_traffic_class = param->primary_path->traffic_class;
912	req_msg->primary_hop_limit = param->primary_path->hop_limit;
913	cm_req_set_primary_sl(req_msg, param->primary_path->sl);
914	cm_req_set_primary_subnet_local(req_msg, 1); /* local only... */
915	cm_req_set_primary_local_ack_timeout(req_msg,
916		min(31, param->primary_path->packet_life_time + 1));
917
918	if (param->alternate_path) {
919		req_msg->alt_local_lid = param->alternate_path->slid;
920		req_msg->alt_remote_lid = param->alternate_path->dlid;
921		req_msg->alt_local_gid = param->alternate_path->sgid;
922		req_msg->alt_remote_gid = param->alternate_path->dgid;
923		cm_req_set_alt_flow_label(req_msg,
924					  param->alternate_path->flow_label);
925		cm_req_set_alt_packet_rate(req_msg, param->alternate_path->rate);
926		req_msg->alt_traffic_class = param->alternate_path->traffic_class;
927		req_msg->alt_hop_limit = param->alternate_path->hop_limit;
928		cm_req_set_alt_sl(req_msg, param->alternate_path->sl);
929		cm_req_set_alt_subnet_local(req_msg, 1); /* local only... */
930		cm_req_set_alt_local_ack_timeout(req_msg,
931			min(31, param->alternate_path->packet_life_time + 1));
932	}
933
934	if (param->private_data && param->private_data_len)
935		memcpy(req_msg->private_data, param->private_data,
936		       param->private_data_len);
937}
938
939static int cm_validate_req_param(struct ib_cm_req_param *param)
940{
941	/* peer-to-peer not supported */
942	if (param->peer_to_peer)
943		return -EINVAL;
944
945	if (!param->primary_path)
946		return -EINVAL;
947
948	if (param->qp_type != IB_QPT_RC && param->qp_type != IB_QPT_UC)
949		return -EINVAL;
950
951	if (param->private_data &&
952	    param->private_data_len > IB_CM_REQ_PRIVATE_DATA_SIZE)
953		return -EINVAL;
954
955	if (param->alternate_path &&
956	    (param->alternate_path->pkey != param->primary_path->pkey ||
957	     param->alternate_path->mtu != param->primary_path->mtu))
958		return -EINVAL;
959
960	return 0;
961}
962
963int ib_send_cm_req(struct ib_cm_id *cm_id,
964		   struct ib_cm_req_param *param)
965{
966	struct cm_id_private *cm_id_priv;
967	struct cm_req_msg *req_msg;
968	unsigned long flags;
969	int ret;
970
971	ret = cm_validate_req_param(param);
972	if (ret)
973		return ret;
974
975	/* Verify that we're not in timewait. */
976	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
977	spin_lock_irqsave(&cm_id_priv->lock, flags);
978	if (cm_id->state != IB_CM_IDLE) {
979		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
980		ret = -EINVAL;
981		goto out;
982	}
983	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
984
985	cm_id_priv->timewait_info = cm_create_timewait_info(cm_id_priv->
986							    id.local_id);
987	if (IS_ERR(cm_id_priv->timewait_info)) {
988		ret = PTR_ERR(cm_id_priv->timewait_info);
989		goto out;
990	}
991
992	ret = cm_init_av_by_path(param->primary_path, &cm_id_priv->av);
993	if (ret)
994		goto error1;
995	if (param->alternate_path) {
996		ret = cm_init_av_by_path(param->alternate_path,
997					 &cm_id_priv->alt_av);
998		if (ret)
999			goto error1;
1000	}
1001	cm_id->service_id = param->service_id;
1002	cm_id->service_mask = __constant_cpu_to_be64(~0ULL);
1003	cm_id_priv->timeout_ms = cm_convert_to_ms(
1004				    param->primary_path->packet_life_time) * 2 +
1005				 cm_convert_to_ms(
1006				    param->remote_cm_response_timeout);
1007	cm_id_priv->max_cm_retries = param->max_cm_retries;
1008	cm_id_priv->initiator_depth = param->initiator_depth;
1009	cm_id_priv->responder_resources = param->responder_resources;
1010	cm_id_priv->retry_count = param->retry_count;
1011	cm_id_priv->path_mtu = param->primary_path->mtu;
1012	cm_id_priv->qp_type = param->qp_type;
1013
1014	ret = cm_alloc_msg(cm_id_priv, &cm_id_priv->msg);
1015	if (ret)
1016		goto error1;
1017
1018	req_msg = (struct cm_req_msg *) cm_id_priv->msg->mad;
1019	cm_format_req(req_msg, cm_id_priv, param);
1020	cm_id_priv->tid = req_msg->hdr.tid;
1021	cm_id_priv->msg->timeout_ms = cm_id_priv->timeout_ms;
1022	cm_id_priv->msg->context[1] = (void *) (unsigned long) IB_CM_REQ_SENT;
1023
1024	cm_id_priv->local_qpn = cm_req_get_local_qpn(req_msg);
1025	cm_id_priv->rq_psn = cm_req_get_starting_psn(req_msg);
1026	cm_id_priv->local_ack_timeout =
1027				cm_req_get_primary_local_ack_timeout(req_msg);
1028
1029	spin_lock_irqsave(&cm_id_priv->lock, flags);
1030	ret = ib_post_send_mad(cm_id_priv->msg, NULL);
1031	if (ret) {
1032		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1033		goto error2;
1034	}
1035	BUG_ON(cm_id->state != IB_CM_IDLE);
1036	cm_id->state = IB_CM_REQ_SENT;
1037	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1038	return 0;
1039
1040error2:	cm_free_msg(cm_id_priv->msg);
1041error1:	kfree(cm_id_priv->timewait_info);
1042out:	return ret;
1043}
1044EXPORT_SYMBOL(ib_send_cm_req);
1045
1046static int cm_issue_rej(struct cm_port *port,
1047			struct ib_mad_recv_wc *mad_recv_wc,
1048			enum ib_cm_rej_reason reason,
1049			enum cm_msg_response msg_rejected,
1050			void *ari, u8 ari_length)
1051{
1052	struct ib_mad_send_buf *msg = NULL;
1053	struct cm_rej_msg *rej_msg, *rcv_msg;
1054	int ret;
1055
1056	ret = cm_alloc_response_msg(port, mad_recv_wc, &msg);
1057	if (ret)
1058		return ret;
1059
1060	/* We just need common CM header information.  Cast to any message. */
1061	rcv_msg = (struct cm_rej_msg *) mad_recv_wc->recv_buf.mad;
1062	rej_msg = (struct cm_rej_msg *) msg->mad;
1063
1064	cm_format_mad_hdr(&rej_msg->hdr, CM_REJ_ATTR_ID, rcv_msg->hdr.tid);
1065	rej_msg->remote_comm_id = rcv_msg->local_comm_id;
1066	rej_msg->local_comm_id = rcv_msg->remote_comm_id;
1067	cm_rej_set_msg_rejected(rej_msg, msg_rejected);
1068	rej_msg->reason = cpu_to_be16(reason);
1069
1070	if (ari && ari_length) {
1071		cm_rej_set_reject_info_len(rej_msg, ari_length);
1072		memcpy(rej_msg->ari, ari, ari_length);
1073	}
1074
1075	ret = ib_post_send_mad(msg, NULL);
1076	if (ret)
1077		cm_free_msg(msg);
1078
1079	return ret;
1080}
1081
1082static inline int cm_is_active_peer(__be64 local_ca_guid, __be64 remote_ca_guid,
1083				    __be32 local_qpn, __be32 remote_qpn)
1084{
1085	return (be64_to_cpu(local_ca_guid) > be64_to_cpu(remote_ca_guid) ||
1086		((local_ca_guid == remote_ca_guid) &&
1087		 (be32_to_cpu(local_qpn) > be32_to_cpu(remote_qpn))));
1088}
1089
1090static void cm_format_paths_from_req(struct cm_req_msg *req_msg,
1091					    struct ib_sa_path_rec *primary_path,
1092					    struct ib_sa_path_rec *alt_path)
1093{
1094	memset(primary_path, 0, sizeof *primary_path);
1095	primary_path->dgid = req_msg->primary_local_gid;
1096	primary_path->sgid = req_msg->primary_remote_gid;
1097	primary_path->dlid = req_msg->primary_local_lid;
1098	primary_path->slid = req_msg->primary_remote_lid;
1099	primary_path->flow_label = cm_req_get_primary_flow_label(req_msg);
1100	primary_path->hop_limit = req_msg->primary_hop_limit;
1101	primary_path->traffic_class = req_msg->primary_traffic_class;
1102	primary_path->reversible = 1;
1103	primary_path->pkey = req_msg->pkey;
1104	primary_path->sl = cm_req_get_primary_sl(req_msg);
1105	primary_path->mtu_selector = IB_SA_EQ;
1106	primary_path->mtu = cm_req_get_path_mtu(req_msg);
1107	primary_path->rate_selector = IB_SA_EQ;
1108	primary_path->rate = cm_req_get_primary_packet_rate(req_msg);
1109	primary_path->packet_life_time_selector = IB_SA_EQ;
1110	primary_path->packet_life_time =
1111		cm_req_get_primary_local_ack_timeout(req_msg);
1112	primary_path->packet_life_time -= (primary_path->packet_life_time > 0);
1113
1114	if (req_msg->alt_local_lid) {
1115		memset(alt_path, 0, sizeof *alt_path);
1116		alt_path->dgid = req_msg->alt_local_gid;
1117		alt_path->sgid = req_msg->alt_remote_gid;
1118		alt_path->dlid = req_msg->alt_local_lid;
1119		alt_path->slid = req_msg->alt_remote_lid;
1120		alt_path->flow_label = cm_req_get_alt_flow_label(req_msg);
1121		alt_path->hop_limit = req_msg->alt_hop_limit;
1122		alt_path->traffic_class = req_msg->alt_traffic_class;
1123		alt_path->reversible = 1;
1124		alt_path->pkey = req_msg->pkey;
1125		alt_path->sl = cm_req_get_alt_sl(req_msg);
1126		alt_path->mtu_selector = IB_SA_EQ;
1127		alt_path->mtu = cm_req_get_path_mtu(req_msg);
1128		alt_path->rate_selector = IB_SA_EQ;
1129		alt_path->rate = cm_req_get_alt_packet_rate(req_msg);
1130		alt_path->packet_life_time_selector = IB_SA_EQ;
1131		alt_path->packet_life_time =
1132			cm_req_get_alt_local_ack_timeout(req_msg);
1133		alt_path->packet_life_time -= (alt_path->packet_life_time > 0);
1134	}
1135}
1136
1137static void cm_format_req_event(struct cm_work *work,
1138				struct cm_id_private *cm_id_priv,
1139				struct ib_cm_id *listen_id)
1140{
1141	struct cm_req_msg *req_msg;
1142	struct ib_cm_req_event_param *param;
1143
1144	req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
1145	param = &work->cm_event.param.req_rcvd;
1146	param->listen_id = listen_id;
1147	param->port = cm_id_priv->av.port->port_num;
1148	param->primary_path = &work->path[0];
1149	if (req_msg->alt_local_lid)
1150		param->alternate_path = &work->path[1];
1151	else
1152		param->alternate_path = NULL;
1153	param->remote_ca_guid = req_msg->local_ca_guid;
1154	param->remote_qkey = be32_to_cpu(req_msg->local_qkey);
1155	param->remote_qpn = be32_to_cpu(cm_req_get_local_qpn(req_msg));
1156	param->qp_type = cm_req_get_qp_type(req_msg);
1157	param->starting_psn = be32_to_cpu(cm_req_get_starting_psn(req_msg));
1158	param->responder_resources = cm_req_get_init_depth(req_msg);
1159	param->initiator_depth = cm_req_get_resp_res(req_msg);
1160	param->local_cm_response_timeout =
1161					cm_req_get_remote_resp_timeout(req_msg);
1162	param->flow_control = cm_req_get_flow_ctrl(req_msg);
1163	param->remote_cm_response_timeout =
1164					cm_req_get_local_resp_timeout(req_msg);
1165	param->retry_count = cm_req_get_retry_count(req_msg);
1166	param->rnr_retry_count = cm_req_get_rnr_retry_count(req_msg);
1167	param->srq = cm_req_get_srq(req_msg);
1168	work->cm_event.private_data = &req_msg->private_data;
1169}
1170
1171static void cm_process_work(struct cm_id_private *cm_id_priv,
1172			    struct cm_work *work)
1173{
1174	unsigned long flags;
1175	int ret;
1176
1177	/* We will typically only have the current event to report. */
1178	ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, &work->cm_event);
1179	cm_free_work(work);
1180
1181	while (!ret && !atomic_add_negative(-1, &cm_id_priv->work_count)) {
1182		spin_lock_irqsave(&cm_id_priv->lock, flags);
1183		work = cm_dequeue_work(cm_id_priv);
1184		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1185		BUG_ON(!work);
1186		ret = cm_id_priv->id.cm_handler(&cm_id_priv->id,
1187						&work->cm_event);
1188		cm_free_work(work);
1189	}
1190	cm_deref_id(cm_id_priv);
1191	if (ret)
1192		cm_destroy_id(&cm_id_priv->id, ret);
1193}
1194
1195static void cm_format_mra(struct cm_mra_msg *mra_msg,
1196			  struct cm_id_private *cm_id_priv,
1197			  enum cm_msg_response msg_mraed, u8 service_timeout,
1198			  const void *private_data, u8 private_data_len)
1199{
1200	cm_format_mad_hdr(&mra_msg->hdr, CM_MRA_ATTR_ID, cm_id_priv->tid);
1201	cm_mra_set_msg_mraed(mra_msg, msg_mraed);
1202	mra_msg->local_comm_id = cm_id_priv->id.local_id;
1203	mra_msg->remote_comm_id = cm_id_priv->id.remote_id;
1204	cm_mra_set_service_timeout(mra_msg, service_timeout);
1205
1206	if (private_data && private_data_len)
1207		memcpy(mra_msg->private_data, private_data, private_data_len);
1208}
1209
1210static void cm_format_rej(struct cm_rej_msg *rej_msg,
1211			  struct cm_id_private *cm_id_priv,
1212			  enum ib_cm_rej_reason reason,
1213			  void *ari,
1214			  u8 ari_length,
1215			  const void *private_data,
1216			  u8 private_data_len)
1217{
1218	cm_format_mad_hdr(&rej_msg->hdr, CM_REJ_ATTR_ID, cm_id_priv->tid);
1219	rej_msg->remote_comm_id = cm_id_priv->id.remote_id;
1220
1221	switch(cm_id_priv->id.state) {
1222	case IB_CM_REQ_RCVD:
1223		rej_msg->local_comm_id = 0;
1224		cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_REQ);
1225		break;
1226	case IB_CM_MRA_REQ_SENT:
1227		rej_msg->local_comm_id = cm_id_priv->id.local_id;
1228		cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_REQ);
1229		break;
1230	case IB_CM_REP_RCVD:
1231	case IB_CM_MRA_REP_SENT:
1232		rej_msg->local_comm_id = cm_id_priv->id.local_id;
1233		cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_REP);
1234		break;
1235	default:
1236		rej_msg->local_comm_id = cm_id_priv->id.local_id;
1237		cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_OTHER);
1238		break;
1239	}
1240
1241	rej_msg->reason = cpu_to_be16(reason);
1242	if (ari && ari_length) {
1243		cm_rej_set_reject_info_len(rej_msg, ari_length);
1244		memcpy(rej_msg->ari, ari, ari_length);
1245	}
1246
1247	if (private_data && private_data_len)
1248		memcpy(rej_msg->private_data, private_data, private_data_len);
1249}
1250
1251static void cm_dup_req_handler(struct cm_work *work,
1252			       struct cm_id_private *cm_id_priv)
1253{
1254	struct ib_mad_send_buf *msg = NULL;
1255	unsigned long flags;
1256	int ret;
1257
1258	/* Quick state check to discard duplicate REQs. */
1259	if (cm_id_priv->id.state == IB_CM_REQ_RCVD)
1260		return;
1261
1262	ret = cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg);
1263	if (ret)
1264		return;
1265
1266	spin_lock_irqsave(&cm_id_priv->lock, flags);
1267	switch (cm_id_priv->id.state) {
1268	case IB_CM_MRA_REQ_SENT:
1269		cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
1270			      CM_MSG_RESPONSE_REQ, cm_id_priv->service_timeout,
1271			      cm_id_priv->private_data,
1272			      cm_id_priv->private_data_len);
1273		break;
1274	case IB_CM_TIMEWAIT:
1275		cm_format_rej((struct cm_rej_msg *) msg->mad, cm_id_priv,
1276			      IB_CM_REJ_STALE_CONN, NULL, 0, NULL, 0);
1277		break;
1278	default:
1279		goto unlock;
1280	}
1281	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1282
1283	ret = ib_post_send_mad(msg, NULL);
1284	if (ret)
1285		goto free;
1286	return;
1287
1288unlock:	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1289free:	cm_free_msg(msg);
1290}
1291
1292static struct cm_id_private * cm_match_req(struct cm_work *work,
1293					   struct cm_id_private *cm_id_priv)
1294{
1295	struct cm_id_private *listen_cm_id_priv, *cur_cm_id_priv;
1296	struct cm_timewait_info *timewait_info;
1297	struct cm_req_msg *req_msg;
1298	unsigned long flags;
1299
1300	req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
1301
1302	/* Check for duplicate REQ and stale connections. */
1303	spin_lock_irqsave(&cm.lock, flags);
1304	timewait_info = cm_insert_remote_id(cm_id_priv->timewait_info);
1305	if (!timewait_info)
1306		timewait_info = cm_insert_remote_qpn(cm_id_priv->timewait_info);
1307
1308	if (timewait_info) {
1309		cur_cm_id_priv = cm_get_id(timewait_info->work.local_id,
1310					   timewait_info->work.remote_id);
1311		cm_cleanup_timewait(cm_id_priv->timewait_info);
1312		spin_unlock_irqrestore(&cm.lock, flags);
1313		if (cur_cm_id_priv) {
1314			cm_dup_req_handler(work, cur_cm_id_priv);
1315			cm_deref_id(cur_cm_id_priv);
1316		} else
1317			cm_issue_rej(work->port, work->mad_recv_wc,
1318				     IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REQ,
1319				     NULL, 0);
1320		listen_cm_id_priv = NULL;
1321		goto out;
1322	}
1323
1324	/* Find matching listen request. */
1325	listen_cm_id_priv = cm_find_listen(cm_id_priv->id.device,
1326					   req_msg->service_id,
1327					   req_msg->private_data);
1328	if (!listen_cm_id_priv) {
1329		cm_cleanup_timewait(cm_id_priv->timewait_info);
1330		spin_unlock_irqrestore(&cm.lock, flags);
1331		cm_issue_rej(work->port, work->mad_recv_wc,
1332			     IB_CM_REJ_INVALID_SERVICE_ID, CM_MSG_RESPONSE_REQ,
1333			     NULL, 0);
1334		goto out;
1335	}
1336	atomic_inc(&listen_cm_id_priv->refcount);
1337	atomic_inc(&cm_id_priv->refcount);
1338	cm_id_priv->id.state = IB_CM_REQ_RCVD;
1339	atomic_inc(&cm_id_priv->work_count);
1340	spin_unlock_irqrestore(&cm.lock, flags);
1341out:
1342	return listen_cm_id_priv;
1343}
1344
1345static int cm_req_handler(struct cm_work *work)
1346{
1347	struct ib_cm_id *cm_id;
1348	struct cm_id_private *cm_id_priv, *listen_cm_id_priv;
1349	struct cm_req_msg *req_msg;
1350	int ret;
1351
1352	req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
1353
1354	cm_id = ib_create_cm_id(work->port->cm_dev->device, NULL, NULL);
1355	if (IS_ERR(cm_id))
1356		return PTR_ERR(cm_id);
1357
1358	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
1359	cm_id_priv->id.remote_id = req_msg->local_comm_id;
1360	cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
1361				work->mad_recv_wc->recv_buf.grh,
1362				&cm_id_priv->av);
1363	cm_id_priv->timewait_info = cm_create_timewait_info(cm_id_priv->
1364							    id.local_id);
1365	if (IS_ERR(cm_id_priv->timewait_info)) {
1366		ret = PTR_ERR(cm_id_priv->timewait_info);
1367		goto destroy;
1368	}
1369	cm_id_priv->timewait_info->work.remote_id = req_msg->local_comm_id;
1370	cm_id_priv->timewait_info->remote_ca_guid = req_msg->local_ca_guid;
1371	cm_id_priv->timewait_info->remote_qpn = cm_req_get_local_qpn(req_msg);
1372
1373	listen_cm_id_priv = cm_match_req(work, cm_id_priv);
1374	if (!listen_cm_id_priv) {
1375		ret = -EINVAL;
1376		kfree(cm_id_priv->timewait_info);
1377		goto destroy;
1378	}
1379
1380	cm_id_priv->id.cm_handler = listen_cm_id_priv->id.cm_handler;
1381	cm_id_priv->id.context = listen_cm_id_priv->id.context;
1382	cm_id_priv->id.service_id = req_msg->service_id;
1383	cm_id_priv->id.service_mask = __constant_cpu_to_be64(~0ULL);
1384
1385	cm_format_paths_from_req(req_msg, &work->path[0], &work->path[1]);
1386	ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av);
1387	if (ret) {
1388		ib_get_cached_gid(work->port->cm_dev->device,
1389				  work->port->port_num, 0, &work->path[0].sgid);
1390		ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_GID,
1391			       &work->path[0].sgid, sizeof work->path[0].sgid,
1392			       NULL, 0);
1393		goto rejected;
1394	}
1395	if (req_msg->alt_local_lid) {
1396		ret = cm_init_av_by_path(&work->path[1], &cm_id_priv->alt_av);
1397		if (ret) {
1398			ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_ALT_GID,
1399				       &work->path[0].sgid,
1400				       sizeof work->path[0].sgid, NULL, 0);
1401			goto rejected;
1402		}
1403	}
1404	cm_id_priv->tid = req_msg->hdr.tid;
1405	cm_id_priv->timeout_ms = cm_convert_to_ms(
1406					cm_req_get_local_resp_timeout(req_msg));
1407	cm_id_priv->max_cm_retries = cm_req_get_max_cm_retries(req_msg);
1408	cm_id_priv->remote_qpn = cm_req_get_local_qpn(req_msg);
1409	cm_id_priv->initiator_depth = cm_req_get_resp_res(req_msg);
1410	cm_id_priv->responder_resources = cm_req_get_init_depth(req_msg);
1411	cm_id_priv->path_mtu = cm_req_get_path_mtu(req_msg);
1412	cm_id_priv->sq_psn = cm_req_get_starting_psn(req_msg);
1413	cm_id_priv->local_ack_timeout =
1414				cm_req_get_primary_local_ack_timeout(req_msg);
1415	cm_id_priv->retry_count = cm_req_get_retry_count(req_msg);
1416	cm_id_priv->rnr_retry_count = cm_req_get_rnr_retry_count(req_msg);
1417	cm_id_priv->qp_type = cm_req_get_qp_type(req_msg);
1418
1419	cm_format_req_event(work, cm_id_priv, &listen_cm_id_priv->id);
1420	cm_process_work(cm_id_priv, work);
1421	cm_deref_id(listen_cm_id_priv);
1422	return 0;
1423
1424rejected:
1425	atomic_dec(&cm_id_priv->refcount);
1426	cm_deref_id(listen_cm_id_priv);
1427destroy:
1428	ib_destroy_cm_id(cm_id);
1429	return ret;
1430}
1431
1432static void cm_format_rep(struct cm_rep_msg *rep_msg,
1433			  struct cm_id_private *cm_id_priv,
1434			  struct ib_cm_rep_param *param)
1435{
1436	cm_format_mad_hdr(&rep_msg->hdr, CM_REP_ATTR_ID, cm_id_priv->tid);
1437	rep_msg->local_comm_id = cm_id_priv->id.local_id;
1438	rep_msg->remote_comm_id = cm_id_priv->id.remote_id;
1439	cm_rep_set_local_qpn(rep_msg, cpu_to_be32(param->qp_num));
1440	cm_rep_set_starting_psn(rep_msg, cpu_to_be32(param->starting_psn));
1441	rep_msg->resp_resources = param->responder_resources;
1442	rep_msg->initiator_depth = param->initiator_depth;
1443	cm_rep_set_target_ack_delay(rep_msg, param->target_ack_delay);
1444	cm_rep_set_failover(rep_msg, param->failover_accepted);
1445	cm_rep_set_flow_ctrl(rep_msg, param->flow_control);
1446	cm_rep_set_rnr_retry_count(rep_msg, param->rnr_retry_count);
1447	cm_rep_set_srq(rep_msg, param->srq);
1448	rep_msg->local_ca_guid = cm_id_priv->av.port->cm_dev->ca_guid;
1449
1450	if (param->private_data && param->private_data_len)
1451		memcpy(rep_msg->private_data, param->private_data,
1452		       param->private_data_len);
1453}
1454
1455int ib_send_cm_rep(struct ib_cm_id *cm_id,
1456		   struct ib_cm_rep_param *param)
1457{
1458	struct cm_id_private *cm_id_priv;
1459	struct ib_mad_send_buf *msg;
1460	struct cm_rep_msg *rep_msg;
1461	unsigned long flags;
1462	int ret;
1463
1464	if (param->private_data &&
1465	    param->private_data_len > IB_CM_REP_PRIVATE_DATA_SIZE)
1466		return -EINVAL;
1467
1468	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
1469	spin_lock_irqsave(&cm_id_priv->lock, flags);
1470	if (cm_id->state != IB_CM_REQ_RCVD &&
1471	    cm_id->state != IB_CM_MRA_REQ_SENT) {
1472		ret = -EINVAL;
1473		goto out;
1474	}
1475
1476	ret = cm_alloc_msg(cm_id_priv, &msg);
1477	if (ret)
1478		goto out;
1479
1480	rep_msg = (struct cm_rep_msg *) msg->mad;
1481	cm_format_rep(rep_msg, cm_id_priv, param);
1482	msg->timeout_ms = cm_id_priv->timeout_ms;
1483	msg->context[1] = (void *) (unsigned long) IB_CM_REP_SENT;
1484
1485	ret = ib_post_send_mad(msg, NULL);
1486	if (ret) {
1487		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1488		cm_free_msg(msg);
1489		return ret;
1490	}
1491
1492	cm_id->state = IB_CM_REP_SENT;
1493	cm_id_priv->msg = msg;
1494	cm_id_priv->initiator_depth = param->initiator_depth;
1495	cm_id_priv->responder_resources = param->responder_resources;
1496	cm_id_priv->rq_psn = cm_rep_get_starting_psn(rep_msg);
1497	cm_id_priv->local_qpn = cm_rep_get_local_qpn(rep_msg);
1498
1499out:	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1500	return ret;
1501}
1502EXPORT_SYMBOL(ib_send_cm_rep);
1503
1504static void cm_format_rtu(struct cm_rtu_msg *rtu_msg,
1505			  struct cm_id_private *cm_id_priv,
1506			  const void *private_data,
1507			  u8 private_data_len)
1508{
1509	cm_format_mad_hdr(&rtu_msg->hdr, CM_RTU_ATTR_ID, cm_id_priv->tid);
1510	rtu_msg->local_comm_id = cm_id_priv->id.local_id;
1511	rtu_msg->remote_comm_id = cm_id_priv->id.remote_id;
1512
1513	if (private_data && private_data_len)
1514		memcpy(rtu_msg->private_data, private_data, private_data_len);
1515}
1516
1517int ib_send_cm_rtu(struct ib_cm_id *cm_id,
1518		   const void *private_data,
1519		   u8 private_data_len)
1520{
1521	struct cm_id_private *cm_id_priv;
1522	struct ib_mad_send_buf *msg;
1523	unsigned long flags;
1524	void *data;
1525	int ret;
1526
1527	if (private_data && private_data_len > IB_CM_RTU_PRIVATE_DATA_SIZE)
1528		return -EINVAL;
1529
1530	data = cm_copy_private_data(private_data, private_data_len);
1531	if (IS_ERR(data))
1532		return PTR_ERR(data);
1533
1534	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
1535	spin_lock_irqsave(&cm_id_priv->lock, flags);
1536	if (cm_id->state != IB_CM_REP_RCVD &&
1537	    cm_id->state != IB_CM_MRA_REP_SENT) {
1538		ret = -EINVAL;
1539		goto error;
1540	}
1541
1542	ret = cm_alloc_msg(cm_id_priv, &msg);
1543	if (ret)
1544		goto error;
1545
1546	cm_format_rtu((struct cm_rtu_msg *) msg->mad, cm_id_priv,
1547		      private_data, private_data_len);
1548
1549	ret = ib_post_send_mad(msg, NULL);
1550	if (ret) {
1551		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1552		cm_free_msg(msg);
1553		kfree(data);
1554		return ret;
1555	}
1556
1557	cm_id->state = IB_CM_ESTABLISHED;
1558	cm_set_private_data(cm_id_priv, data, private_data_len);
1559	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1560	return 0;
1561
1562error:	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1563	kfree(data);
1564	return ret;
1565}
1566EXPORT_SYMBOL(ib_send_cm_rtu);
1567
1568static void cm_format_rep_event(struct cm_work *work)
1569{
1570	struct cm_rep_msg *rep_msg;
1571	struct ib_cm_rep_event_param *param;
1572
1573	rep_msg = (struct cm_rep_msg *)work->mad_recv_wc->recv_buf.mad;
1574	param = &work->cm_event.param.rep_rcvd;
1575	param->remote_ca_guid = rep_msg->local_ca_guid;
1576	param->remote_qkey = be32_to_cpu(rep_msg->local_qkey);
1577	param->remote_qpn = be32_to_cpu(cm_rep_get_local_qpn(rep_msg));
1578	param->starting_psn = be32_to_cpu(cm_rep_get_starting_psn(rep_msg));
1579	param->responder_resources = rep_msg->initiator_depth;
1580	param->initiator_depth = rep_msg->resp_resources;
1581	param->target_ack_delay = cm_rep_get_target_ack_delay(rep_msg);
1582	param->failover_accepted = cm_rep_get_failover(rep_msg);
1583	param->flow_control = cm_rep_get_flow_ctrl(rep_msg);
1584	param->rnr_retry_count = cm_rep_get_rnr_retry_count(rep_msg);
1585	param->srq = cm_rep_get_srq(rep_msg);
1586	work->cm_event.private_data = &rep_msg->private_data;
1587}
1588
1589static void cm_dup_rep_handler(struct cm_work *work)
1590{
1591	struct cm_id_private *cm_id_priv;
1592	struct cm_rep_msg *rep_msg;
1593	struct ib_mad_send_buf *msg = NULL;
1594	unsigned long flags;
1595	int ret;
1596
1597	rep_msg = (struct cm_rep_msg *) work->mad_recv_wc->recv_buf.mad;
1598	cm_id_priv = cm_acquire_id(rep_msg->remote_comm_id,
1599				   rep_msg->local_comm_id);
1600	if (!cm_id_priv)
1601		return;
1602
1603	ret = cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg);
1604	if (ret)
1605		goto deref;
1606
1607	spin_lock_irqsave(&cm_id_priv->lock, flags);
1608	if (cm_id_priv->id.state == IB_CM_ESTABLISHED)
1609		cm_format_rtu((struct cm_rtu_msg *) msg->mad, cm_id_priv,
1610			      cm_id_priv->private_data,
1611			      cm_id_priv->private_data_len);
1612	else if (cm_id_priv->id.state == IB_CM_MRA_REP_SENT)
1613		cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
1614			      CM_MSG_RESPONSE_REP, cm_id_priv->service_timeout,
1615			      cm_id_priv->private_data,
1616			      cm_id_priv->private_data_len);
1617	else
1618		goto unlock;
1619	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1620
1621	ret = ib_post_send_mad(msg, NULL);
1622	if (ret)
1623		goto free;
1624	goto deref;
1625
1626unlock:	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1627free:	cm_free_msg(msg);
1628deref:	cm_deref_id(cm_id_priv);
1629}
1630
1631static int cm_rep_handler(struct cm_work *work)
1632{
1633	struct cm_id_private *cm_id_priv;
1634	struct cm_rep_msg *rep_msg;
1635	unsigned long flags;
1636	int ret;
1637
1638	rep_msg = (struct cm_rep_msg *)work->mad_recv_wc->recv_buf.mad;
1639	cm_id_priv = cm_acquire_id(rep_msg->remote_comm_id, 0);
1640	if (!cm_id_priv) {
1641		cm_dup_rep_handler(work);
1642		return -EINVAL;
1643	}
1644
1645	cm_format_rep_event(work);
1646
1647	spin_lock_irqsave(&cm_id_priv->lock, flags);
1648	switch (cm_id_priv->id.state) {
1649	case IB_CM_REQ_SENT:
1650	case IB_CM_MRA_REQ_RCVD:
1651		break;
1652	default:
1653		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1654		ret = -EINVAL;
1655		goto error;
1656	}
1657
1658	cm_id_priv->timewait_info->work.remote_id = rep_msg->local_comm_id;
1659	cm_id_priv->timewait_info->remote_ca_guid = rep_msg->local_ca_guid;
1660	cm_id_priv->timewait_info->remote_qpn = cm_rep_get_local_qpn(rep_msg);
1661
1662	spin_lock(&cm.lock);
1663	/* Check for duplicate REP. */
1664	if (cm_insert_remote_id(cm_id_priv->timewait_info)) {
1665		spin_unlock(&cm.lock);
1666		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1667		ret = -EINVAL;
1668		goto error;
1669	}
1670	/* Check for a stale connection. */
1671	if (cm_insert_remote_qpn(cm_id_priv->timewait_info)) {
1672		rb_erase(&cm_id_priv->timewait_info->remote_id_node,
1673			 &cm.remote_id_table);
1674		cm_id_priv->timewait_info->inserted_remote_id = 0;
1675		spin_unlock(&cm.lock);
1676		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1677		cm_issue_rej(work->port, work->mad_recv_wc,
1678			     IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REP,
1679			     NULL, 0);
1680		ret = -EINVAL;
1681		goto error;
1682	}
1683	spin_unlock(&cm.lock);
1684
1685	cm_id_priv->id.state = IB_CM_REP_RCVD;
1686	cm_id_priv->id.remote_id = rep_msg->local_comm_id;
1687	cm_id_priv->remote_qpn = cm_rep_get_local_qpn(rep_msg);
1688	cm_id_priv->initiator_depth = rep_msg->resp_resources;
1689	cm_id_priv->responder_resources = rep_msg->initiator_depth;
1690	cm_id_priv->sq_psn = cm_rep_get_starting_psn(rep_msg);
1691	cm_id_priv->rnr_retry_count = cm_rep_get_rnr_retry_count(rep_msg);
1692
1693	/* todo: handle peer_to_peer */
1694
1695	ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
1696	ret = atomic_inc_and_test(&cm_id_priv->work_count);
1697	if (!ret)
1698		list_add_tail(&work->list, &cm_id_priv->work_list);
1699	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1700
1701	if (ret)
1702		cm_process_work(cm_id_priv, work);
1703	else
1704		cm_deref_id(cm_id_priv);
1705	return 0;
1706
1707error:
1708	cm_deref_id(cm_id_priv);
1709	return ret;
1710}
1711
1712static int cm_establish_handler(struct cm_work *work)
1713{
1714	struct cm_id_private *cm_id_priv;
1715	unsigned long flags;
1716	int ret;
1717
1718	/* See comment in ib_cm_establish about lookup. */
1719	cm_id_priv = cm_acquire_id(work->local_id, work->remote_id);
1720	if (!cm_id_priv)
1721		return -EINVAL;
1722
1723	spin_lock_irqsave(&cm_id_priv->lock, flags);
1724	if (cm_id_priv->id.state != IB_CM_ESTABLISHED) {
1725		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1726		goto out;
1727	}
1728
1729	ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
1730	ret = atomic_inc_and_test(&cm_id_priv->work_count);
1731	if (!ret)
1732		list_add_tail(&work->list, &cm_id_priv->work_list);
1733	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1734
1735	if (ret)
1736		cm_process_work(cm_id_priv, work);
1737	else
1738		cm_deref_id(cm_id_priv);
1739	return 0;
1740out:
1741	cm_deref_id(cm_id_priv);
1742	return -EINVAL;
1743}
1744
1745static int cm_rtu_handler(struct cm_work *work)
1746{
1747	struct cm_id_private *cm_id_priv;
1748	struct cm_rtu_msg *rtu_msg;
1749	unsigned long flags;
1750	int ret;
1751
1752	rtu_msg = (struct cm_rtu_msg *)work->mad_recv_wc->recv_buf.mad;
1753	cm_id_priv = cm_acquire_id(rtu_msg->remote_comm_id,
1754				   rtu_msg->local_comm_id);
1755	if (!cm_id_priv)
1756		return -EINVAL;
1757
1758	work->cm_event.private_data = &rtu_msg->private_data;
1759
1760	spin_lock_irqsave(&cm_id_priv->lock, flags);
1761	if (cm_id_priv->id.state != IB_CM_REP_SENT &&
1762	    cm_id_priv->id.state != IB_CM_MRA_REP_RCVD) {
1763		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1764		goto out;
1765	}
1766	cm_id_priv->id.state = IB_CM_ESTABLISHED;
1767
1768	ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
1769	ret = atomic_inc_and_test(&cm_id_priv->work_count);
1770	if (!ret)
1771		list_add_tail(&work->list, &cm_id_priv->work_list);
1772	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1773
1774	if (ret)
1775		cm_process_work(cm_id_priv, work);
1776	else
1777		cm_deref_id(cm_id_priv);
1778	return 0;
1779out:
1780	cm_deref_id(cm_id_priv);
1781	return -EINVAL;
1782}
1783
1784static void cm_format_dreq(struct cm_dreq_msg *dreq_msg,
1785			  struct cm_id_private *cm_id_priv,
1786			  const void *private_data,
1787			  u8 private_data_len)
1788{
1789	cm_format_mad_hdr(&dreq_msg->hdr, CM_DREQ_ATTR_ID,
1790			  cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_DREQ));
1791	dreq_msg->local_comm_id = cm_id_priv->id.local_id;
1792	dreq_msg->remote_comm_id = cm_id_priv->id.remote_id;
1793	cm_dreq_set_remote_qpn(dreq_msg, cm_id_priv->remote_qpn);
1794
1795	if (private_data && private_data_len)
1796		memcpy(dreq_msg->private_data, private_data, private_data_len);
1797}
1798
1799int ib_send_cm_dreq(struct ib_cm_id *cm_id,
1800		    const void *private_data,
1801		    u8 private_data_len)
1802{
1803	struct cm_id_private *cm_id_priv;
1804	struct ib_mad_send_buf *msg;
1805	unsigned long flags;
1806	int ret;
1807
1808	if (private_data && private_data_len > IB_CM_DREQ_PRIVATE_DATA_SIZE)
1809		return -EINVAL;
1810
1811	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
1812	spin_lock_irqsave(&cm_id_priv->lock, flags);
1813	if (cm_id->state != IB_CM_ESTABLISHED) {
1814		ret = -EINVAL;
1815		goto out;
1816	}
1817
1818	ret = cm_alloc_msg(cm_id_priv, &msg);
1819	if (ret) {
1820		cm_enter_timewait(cm_id_priv);
1821		goto out;
1822	}
1823
1824	cm_format_dreq((struct cm_dreq_msg *) msg->mad, cm_id_priv,
1825		       private_data, private_data_len);
1826	msg->timeout_ms = cm_id_priv->timeout_ms;
1827	msg->context[1] = (void *) (unsigned long) IB_CM_DREQ_SENT;
1828
1829	ret = ib_post_send_mad(msg, NULL);
1830	if (ret) {
1831		cm_enter_timewait(cm_id_priv);
1832		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1833		cm_free_msg(msg);
1834		return ret;
1835	}
1836
1837	cm_id->state = IB_CM_DREQ_SENT;
1838	cm_id_priv->msg = msg;
1839out:	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1840	return ret;
1841}
1842EXPORT_SYMBOL(ib_send_cm_dreq);
1843
1844static void cm_format_drep(struct cm_drep_msg *drep_msg,
1845			  struct cm_id_private *cm_id_priv,
1846			  const void *private_data,
1847			  u8 private_data_len)
1848{
1849	cm_format_mad_hdr(&drep_msg->hdr, CM_DREP_ATTR_ID, cm_id_priv->tid);
1850	drep_msg->local_comm_id = cm_id_priv->id.local_id;
1851	drep_msg->remote_comm_id = cm_id_priv->id.remote_id;
1852
1853	if (private_data && private_data_len)
1854		memcpy(drep_msg->private_data, private_data, private_data_len);
1855}
1856
1857int ib_send_cm_drep(struct ib_cm_id *cm_id,
1858		    const void *private_data,
1859		    u8 private_data_len)
1860{
1861	struct cm_id_private *cm_id_priv;
1862	struct ib_mad_send_buf *msg;
1863	unsigned long flags;
1864	void *data;
1865	int ret;
1866
1867	if (private_data && private_data_len > IB_CM_DREP_PRIVATE_DATA_SIZE)
1868		return -EINVAL;
1869
1870	data = cm_copy_private_data(private_data, private_data_len);
1871	if (IS_ERR(data))
1872		return PTR_ERR(data);
1873
1874	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
1875	spin_lock_irqsave(&cm_id_priv->lock, flags);
1876	if (cm_id->state != IB_CM_DREQ_RCVD) {
1877		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1878		kfree(data);
1879		return -EINVAL;
1880	}
1881
1882	cm_set_private_data(cm_id_priv, data, private_data_len);
1883	cm_enter_timewait(cm_id_priv);
1884
1885	ret = cm_alloc_msg(cm_id_priv, &msg);
1886	if (ret)
1887		goto out;
1888
1889	cm_format_drep((struct cm_drep_msg *) msg->mad, cm_id_priv,
1890		       private_data, private_data_len);
1891
1892	ret = ib_post_send_mad(msg, NULL);
1893	if (ret) {
1894		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1895		cm_free_msg(msg);
1896		return ret;
1897	}
1898
1899out:	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1900	return ret;
1901}
1902EXPORT_SYMBOL(ib_send_cm_drep);
1903
1904static int cm_issue_drep(struct cm_port *port,
1905			 struct ib_mad_recv_wc *mad_recv_wc)
1906{
1907	struct ib_mad_send_buf *msg = NULL;
1908	struct cm_dreq_msg *dreq_msg;
1909	struct cm_drep_msg *drep_msg;
1910	int ret;
1911
1912	ret = cm_alloc_response_msg(port, mad_recv_wc, &msg);
1913	if (ret)
1914		return ret;
1915
1916	dreq_msg = (struct cm_dreq_msg *) mad_recv_wc->recv_buf.mad;
1917	drep_msg = (struct cm_drep_msg *) msg->mad;
1918
1919	cm_format_mad_hdr(&drep_msg->hdr, CM_DREP_ATTR_ID, dreq_msg->hdr.tid);
1920	drep_msg->remote_comm_id = dreq_msg->local_comm_id;
1921	drep_msg->local_comm_id = dreq_msg->remote_comm_id;
1922
1923	ret = ib_post_send_mad(msg, NULL);
1924	if (ret)
1925		cm_free_msg(msg);
1926
1927	return ret;
1928}
1929
1930static int cm_dreq_handler(struct cm_work *work)
1931{
1932	struct cm_id_private *cm_id_priv;
1933	struct cm_dreq_msg *dreq_msg;
1934	struct ib_mad_send_buf *msg = NULL;
1935	unsigned long flags;
1936	int ret;
1937
1938	dreq_msg = (struct cm_dreq_msg *)work->mad_recv_wc->recv_buf.mad;
1939	cm_id_priv = cm_acquire_id(dreq_msg->remote_comm_id,
1940				   dreq_msg->local_comm_id);
1941	if (!cm_id_priv) {
1942		cm_issue_drep(work->port, work->mad_recv_wc);
1943		return -EINVAL;
1944	}
1945
1946	work->cm_event.private_data = &dreq_msg->private_data;
1947
1948	spin_lock_irqsave(&cm_id_priv->lock, flags);
1949	if (cm_id_priv->local_qpn != cm_dreq_get_remote_qpn(dreq_msg))
1950		goto unlock;
1951
1952	switch (cm_id_priv->id.state) {
1953	case IB_CM_REP_SENT:
1954	case IB_CM_DREQ_SENT:
1955		ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
1956		break;
1957	case IB_CM_ESTABLISHED:
1958	case IB_CM_MRA_REP_RCVD:
1959		break;
1960	case IB_CM_TIMEWAIT:
1961		if (cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg))
1962			goto unlock;
1963
1964		cm_format_drep((struct cm_drep_msg *) msg->mad, cm_id_priv,
1965			       cm_id_priv->private_data,
1966			       cm_id_priv->private_data_len);
1967		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1968
1969		if (ib_post_send_mad(msg, NULL))
1970			cm_free_msg(msg);
1971		goto deref;
1972	default:
1973		goto unlock;
1974	}
1975	cm_id_priv->id.state = IB_CM_DREQ_RCVD;
1976	cm_id_priv->tid = dreq_msg->hdr.tid;
1977	ret = atomic_inc_and_test(&cm_id_priv->work_count);
1978	if (!ret)
1979		list_add_tail(&work->list, &cm_id_priv->work_list);
1980	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1981
1982	if (ret)
1983		cm_process_work(cm_id_priv, work);
1984	else
1985		cm_deref_id(cm_id_priv);
1986	return 0;
1987
1988unlock:	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1989deref:	cm_deref_id(cm_id_priv);
1990	return -EINVAL;
1991}
1992
1993static int cm_drep_handler(struct cm_work *work)
1994{
1995	struct cm_id_private *cm_id_priv;
1996	struct cm_drep_msg *drep_msg;
1997	unsigned long flags;
1998	int ret;
1999
2000	drep_msg = (struct cm_drep_msg *)work->mad_recv_wc->recv_buf.mad;
2001	cm_id_priv = cm_acquire_id(drep_msg->remote_comm_id,
2002				   drep_msg->local_comm_id);
2003	if (!cm_id_priv)
2004		return -EINVAL;
2005
2006	work->cm_event.private_data = &drep_msg->private_data;
2007
2008	spin_lock_irqsave(&cm_id_priv->lock, flags);
2009	if (cm_id_priv->id.state != IB_CM_DREQ_SENT &&
2010	    cm_id_priv->id.state != IB_CM_DREQ_RCVD) {
2011		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2012		goto out;
2013	}
2014	cm_enter_timewait(cm_id_priv);
2015
2016	ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2017	ret = atomic_inc_and_test(&cm_id_priv->work_count);
2018	if (!ret)
2019		list_add_tail(&work->list, &cm_id_priv->work_list);
2020	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2021
2022	if (ret)
2023		cm_process_work(cm_id_priv, work);
2024	else
2025		cm_deref_id(cm_id_priv);
2026	return 0;
2027out:
2028	cm_deref_id(cm_id_priv);
2029	return -EINVAL;
2030}
2031
2032int ib_send_cm_rej(struct ib_cm_id *cm_id,
2033		   enum ib_cm_rej_reason reason,
2034		   void *ari,
2035		   u8 ari_length,
2036		   const void *private_data,
2037		   u8 private_data_len)
2038{
2039	struct cm_id_private *cm_id_priv;
2040	struct ib_mad_send_buf *msg;
2041	unsigned long flags;
2042	int ret;
2043
2044	if ((private_data && private_data_len > IB_CM_REJ_PRIVATE_DATA_SIZE) ||
2045	    (ari && ari_length > IB_CM_REJ_ARI_LENGTH))
2046		return -EINVAL;
2047
2048	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2049
2050	spin_lock_irqsave(&cm_id_priv->lock, flags);
2051	switch (cm_id->state) {
2052	case IB_CM_REQ_SENT:
2053	case IB_CM_MRA_REQ_RCVD:
2054	case IB_CM_REQ_RCVD:
2055	case IB_CM_MRA_REQ_SENT:
2056	case IB_CM_REP_RCVD:
2057	case IB_CM_MRA_REP_SENT:
2058		ret = cm_alloc_msg(cm_id_priv, &msg);
2059		if (!ret)
2060			cm_format_rej((struct cm_rej_msg *) msg->mad,
2061				      cm_id_priv, reason, ari, ari_length,
2062				      private_data, private_data_len);
2063
2064		cm_reset_to_idle(cm_id_priv);
2065		break;
2066	case IB_CM_REP_SENT:
2067	case IB_CM_MRA_REP_RCVD:
2068		ret = cm_alloc_msg(cm_id_priv, &msg);
2069		if (!ret)
2070			cm_format_rej((struct cm_rej_msg *) msg->mad,
2071				      cm_id_priv, reason, ari, ari_length,
2072				      private_data, private_data_len);
2073
2074		cm_enter_timewait(cm_id_priv);
2075		break;
2076	default:
2077		ret = -EINVAL;
2078		goto out;
2079	}
2080
2081	if (ret)
2082		goto out;
2083
2084	ret = ib_post_send_mad(msg, NULL);
2085	if (ret)
2086		cm_free_msg(msg);
2087
2088out:	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2089	return ret;
2090}
2091EXPORT_SYMBOL(ib_send_cm_rej);
2092
2093static void cm_format_rej_event(struct cm_work *work)
2094{
2095	struct cm_rej_msg *rej_msg;
2096	struct ib_cm_rej_event_param *param;
2097
2098	rej_msg = (struct cm_rej_msg *)work->mad_recv_wc->recv_buf.mad;
2099	param = &work->cm_event.param.rej_rcvd;
2100	param->ari = rej_msg->ari;
2101	param->ari_length = cm_rej_get_reject_info_len(rej_msg);
2102	param->reason = __be16_to_cpu(rej_msg->reason);
2103	work->cm_event.private_data = &rej_msg->private_data;
2104}
2105
2106static struct cm_id_private * cm_acquire_rejected_id(struct cm_rej_msg *rej_msg)
2107{
2108	struct cm_timewait_info *timewait_info;
2109	struct cm_id_private *cm_id_priv;
2110	unsigned long flags;
2111	__be32 remote_id;
2112
2113	remote_id = rej_msg->local_comm_id;
2114
2115	if (__be16_to_cpu(rej_msg->reason) == IB_CM_REJ_TIMEOUT) {
2116		spin_lock_irqsave(&cm.lock, flags);
2117		timewait_info = cm_find_remote_id( *((__be64 *) rej_msg->ari),
2118						  remote_id);
2119		if (!timewait_info) {
2120			spin_unlock_irqrestore(&cm.lock, flags);
2121			return NULL;
2122		}
2123		cm_id_priv = idr_find(&cm.local_id_table, (__force int)
2124				      (timewait_info->work.local_id ^
2125				       cm.random_id_operand));
2126		if (cm_id_priv) {
2127			if (cm_id_priv->id.remote_id == remote_id)
2128				atomic_inc(&cm_id_priv->refcount);
2129			else
2130				cm_id_priv = NULL;
2131		}
2132		spin_unlock_irqrestore(&cm.lock, flags);
2133	} else if (cm_rej_get_msg_rejected(rej_msg) == CM_MSG_RESPONSE_REQ)
2134		cm_id_priv = cm_acquire_id(rej_msg->remote_comm_id, 0);
2135	else
2136		cm_id_priv = cm_acquire_id(rej_msg->remote_comm_id, remote_id);
2137
2138	return cm_id_priv;
2139}
2140
2141static int cm_rej_handler(struct cm_work *work)
2142{
2143	struct cm_id_private *cm_id_priv;
2144	struct cm_rej_msg *rej_msg;
2145	unsigned long flags;
2146	int ret;
2147
2148	rej_msg = (struct cm_rej_msg *)work->mad_recv_wc->recv_buf.mad;
2149	cm_id_priv = cm_acquire_rejected_id(rej_msg);
2150	if (!cm_id_priv)
2151		return -EINVAL;
2152
2153	cm_format_rej_event(work);
2154
2155	spin_lock_irqsave(&cm_id_priv->lock, flags);
2156	switch (cm_id_priv->id.state) {
2157	case IB_CM_REQ_SENT:
2158	case IB_CM_MRA_REQ_RCVD:
2159	case IB_CM_REP_SENT:
2160	case IB_CM_MRA_REP_RCVD:
2161		ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2162		/* fall through */
2163	case IB_CM_REQ_RCVD:
2164	case IB_CM_MRA_REQ_SENT:
2165		if (__be16_to_cpu(rej_msg->reason) == IB_CM_REJ_STALE_CONN)
2166			cm_enter_timewait(cm_id_priv);
2167		else
2168			cm_reset_to_idle(cm_id_priv);
2169		break;
2170	case IB_CM_DREQ_SENT:
2171		ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2172		/* fall through */
2173	case IB_CM_REP_RCVD:
2174	case IB_CM_MRA_REP_SENT:
2175	case IB_CM_ESTABLISHED:
2176		cm_enter_timewait(cm_id_priv);
2177		break;
2178	default:
2179		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2180		ret = -EINVAL;
2181		goto out;
2182	}
2183
2184	ret = atomic_inc_and_test(&cm_id_priv->work_count);
2185	if (!ret)
2186		list_add_tail(&work->list, &cm_id_priv->work_list);
2187	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2188
2189	if (ret)
2190		cm_process_work(cm_id_priv, work);
2191	else
2192		cm_deref_id(cm_id_priv);
2193	return 0;
2194out:
2195	cm_deref_id(cm_id_priv);
2196	return -EINVAL;
2197}
2198
2199int ib_send_cm_mra(struct ib_cm_id *cm_id,
2200		   u8 service_timeout,
2201		   const void *private_data,
2202		   u8 private_data_len)
2203{
2204	struct cm_id_private *cm_id_priv;
2205	struct ib_mad_send_buf *msg;
2206	void *data;
2207	unsigned long flags;
2208	int ret;
2209
2210	if (private_data && private_data_len > IB_CM_MRA_PRIVATE_DATA_SIZE)
2211		return -EINVAL;
2212
2213	data = cm_copy_private_data(private_data, private_data_len);
2214	if (IS_ERR(data))
2215		return PTR_ERR(data);
2216
2217	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2218
2219	spin_lock_irqsave(&cm_id_priv->lock, flags);
2220	switch(cm_id_priv->id.state) {
2221	case IB_CM_REQ_RCVD:
2222		ret = cm_alloc_msg(cm_id_priv, &msg);
2223		if (ret)
2224			goto error1;
2225
2226		cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
2227			      CM_MSG_RESPONSE_REQ, service_timeout,
2228			      private_data, private_data_len);
2229		ret = ib_post_send_mad(msg, NULL);
2230		if (ret)
2231			goto error2;
2232		cm_id->state = IB_CM_MRA_REQ_SENT;
2233		break;
2234	case IB_CM_REP_RCVD:
2235		ret = cm_alloc_msg(cm_id_priv, &msg);
2236		if (ret)
2237			goto error1;
2238
2239		cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
2240			      CM_MSG_RESPONSE_REP, service_timeout,
2241			      private_data, private_data_len);
2242		ret = ib_post_send_mad(msg, NULL);
2243		if (ret)
2244			goto error2;
2245		cm_id->state = IB_CM_MRA_REP_SENT;
2246		break;
2247	case IB_CM_ESTABLISHED:
2248		ret = cm_alloc_msg(cm_id_priv, &msg);
2249		if (ret)
2250			goto error1;
2251
2252		cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
2253			      CM_MSG_RESPONSE_OTHER, service_timeout,
2254			      private_data, private_data_len);
2255		ret = ib_post_send_mad(msg, NULL);
2256		if (ret)
2257			goto error2;
2258		cm_id->lap_state = IB_CM_MRA_LAP_SENT;
2259		break;
2260	default:
2261		ret = -EINVAL;
2262		goto error1;
2263	}
2264	cm_id_priv->service_timeout = service_timeout;
2265	cm_set_private_data(cm_id_priv, data, private_data_len);
2266	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2267	return 0;
2268
2269error1:	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2270	kfree(data);
2271	return ret;
2272
2273error2:	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2274	kfree(data);
2275	cm_free_msg(msg);
2276	return ret;
2277}
2278EXPORT_SYMBOL(ib_send_cm_mra);
2279
2280static struct cm_id_private * cm_acquire_mraed_id(struct cm_mra_msg *mra_msg)
2281{
2282	switch (cm_mra_get_msg_mraed(mra_msg)) {
2283	case CM_MSG_RESPONSE_REQ:
2284		return cm_acquire_id(mra_msg->remote_comm_id, 0);
2285	case CM_MSG_RESPONSE_REP:
2286	case CM_MSG_RESPONSE_OTHER:
2287		return cm_acquire_id(mra_msg->remote_comm_id,
2288				     mra_msg->local_comm_id);
2289	default:
2290		return NULL;
2291	}
2292}
2293
2294static int cm_mra_handler(struct cm_work *work)
2295{
2296	struct cm_id_private *cm_id_priv;
2297	struct cm_mra_msg *mra_msg;
2298	unsigned long flags;
2299	int timeout, ret;
2300
2301	mra_msg = (struct cm_mra_msg *)work->mad_recv_wc->recv_buf.mad;
2302	cm_id_priv = cm_acquire_mraed_id(mra_msg);
2303	if (!cm_id_priv)
2304		return -EINVAL;
2305
2306	work->cm_event.private_data = &mra_msg->private_data;
2307	work->cm_event.param.mra_rcvd.service_timeout =
2308					cm_mra_get_service_timeout(mra_msg);
2309	timeout = cm_convert_to_ms(cm_mra_get_service_timeout(mra_msg)) +
2310		  cm_convert_to_ms(cm_id_priv->av.packet_life_time);
2311
2312	spin_lock_irqsave(&cm_id_priv->lock, flags);
2313	switch (cm_id_priv->id.state) {
2314	case IB_CM_REQ_SENT:
2315		if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_REQ ||
2316		    ib_modify_mad(cm_id_priv->av.port->mad_agent,
2317				  cm_id_priv->msg, timeout))
2318			goto out;
2319		cm_id_priv->id.state = IB_CM_MRA_REQ_RCVD;
2320		break;
2321	case IB_CM_REP_SENT:
2322		if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_REP ||
2323		    ib_modify_mad(cm_id_priv->av.port->mad_agent,
2324				  cm_id_priv->msg, timeout))
2325			goto out;
2326		cm_id_priv->id.state = IB_CM_MRA_REP_RCVD;
2327		break;
2328	case IB_CM_ESTABLISHED:
2329		if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_OTHER ||
2330		    cm_id_priv->id.lap_state != IB_CM_LAP_SENT ||
2331		    ib_modify_mad(cm_id_priv->av.port->mad_agent,
2332				  cm_id_priv->msg, timeout))
2333			goto out;
2334		cm_id_priv->id.lap_state = IB_CM_MRA_LAP_RCVD;
2335		break;
2336	default:
2337		goto out;
2338	}
2339
2340	cm_id_priv->msg->context[1] = (void *) (unsigned long)
2341				      cm_id_priv->id.state;
2342	ret = atomic_inc_and_test(&cm_id_priv->work_count);
2343	if (!ret)
2344		list_add_tail(&work->list, &cm_id_priv->work_list);
2345	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2346
2347	if (ret)
2348		cm_process_work(cm_id_priv, work);
2349	else
2350		cm_deref_id(cm_id_priv);
2351	return 0;
2352out:
2353	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2354	cm_deref_id(cm_id_priv);
2355	return -EINVAL;
2356}
2357
2358static void cm_format_lap(struct cm_lap_msg *lap_msg,
2359			  struct cm_id_private *cm_id_priv,
2360			  struct ib_sa_path_rec *alternate_path,
2361			  const void *private_data,
2362			  u8 private_data_len)
2363{
2364	cm_format_mad_hdr(&lap_msg->hdr, CM_LAP_ATTR_ID,
2365			  cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_LAP));
2366	lap_msg->local_comm_id = cm_id_priv->id.local_id;
2367	lap_msg->remote_comm_id = cm_id_priv->id.remote_id;
2368	cm_lap_set_remote_qpn(lap_msg, cm_id_priv->remote_qpn);
2369	/* todo: need remote CM response timeout */
2370	cm_lap_set_remote_resp_timeout(lap_msg, 0x1F);
2371	lap_msg->alt_local_lid = alternate_path->slid;
2372	lap_msg->alt_remote_lid = alternate_path->dlid;
2373	lap_msg->alt_local_gid = alternate_path->sgid;
2374	lap_msg->alt_remote_gid = alternate_path->dgid;
2375	cm_lap_set_flow_label(lap_msg, alternate_path->flow_label);
2376	cm_lap_set_traffic_class(lap_msg, alternate_path->traffic_class);
2377	lap_msg->alt_hop_limit = alternate_path->hop_limit;
2378	cm_lap_set_packet_rate(lap_msg, alternate_path->rate);
2379	cm_lap_set_sl(lap_msg, alternate_path->sl);
2380	cm_lap_set_subnet_local(lap_msg, 1); /* local only... */
2381	cm_lap_set_local_ack_timeout(lap_msg,
2382		min(31, alternate_path->packet_life_time + 1));
2383
2384	if (private_data && private_data_len)
2385		memcpy(lap_msg->private_data, private_data, private_data_len);
2386}
2387
2388int ib_send_cm_lap(struct ib_cm_id *cm_id,
2389		   struct ib_sa_path_rec *alternate_path,
2390		   const void *private_data,
2391		   u8 private_data_len)
2392{
2393	struct cm_id_private *cm_id_priv;
2394	struct ib_mad_send_buf *msg;
2395	unsigned long flags;
2396	int ret;
2397
2398	if (private_data && private_data_len > IB_CM_LAP_PRIVATE_DATA_SIZE)
2399		return -EINVAL;
2400
2401	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2402	spin_lock_irqsave(&cm_id_priv->lock, flags);
2403	if (cm_id->state != IB_CM_ESTABLISHED ||
2404	    cm_id->lap_state != IB_CM_LAP_IDLE) {
2405		ret = -EINVAL;
2406		goto out;
2407	}
2408
2409	ret = cm_alloc_msg(cm_id_priv, &msg);
2410	if (ret)
2411		goto out;
2412
2413	cm_format_lap((struct cm_lap_msg *) msg->mad, cm_id_priv,
2414		      alternate_path, private_data, private_data_len);
2415	msg->timeout_ms = cm_id_priv->timeout_ms;
2416	msg->context[1] = (void *) (unsigned long) IB_CM_ESTABLISHED;
2417
2418	ret = ib_post_send_mad(msg, NULL);
2419	if (ret) {
2420		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2421		cm_free_msg(msg);
2422		return ret;
2423	}
2424
2425	cm_id->lap_state = IB_CM_LAP_SENT;
2426	cm_id_priv->msg = msg;
2427
2428out:	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2429	return ret;
2430}
2431EXPORT_SYMBOL(ib_send_cm_lap);
2432
2433static void cm_format_path_from_lap(struct ib_sa_path_rec *path,
2434				    struct cm_lap_msg *lap_msg)
2435{
2436	memset(path, 0, sizeof *path);
2437	path->dgid = lap_msg->alt_local_gid;
2438	path->sgid = lap_msg->alt_remote_gid;
2439	path->dlid = lap_msg->alt_local_lid;
2440	path->slid = lap_msg->alt_remote_lid;
2441	path->flow_label = cm_lap_get_flow_label(lap_msg);
2442	path->hop_limit = lap_msg->alt_hop_limit;
2443	path->traffic_class = cm_lap_get_traffic_class(lap_msg);
2444	path->reversible = 1;
2445	/* pkey is same as in REQ */
2446	path->sl = cm_lap_get_sl(lap_msg);
2447	path->mtu_selector = IB_SA_EQ;
2448	/* mtu is same as in REQ */
2449	path->rate_selector = IB_SA_EQ;
2450	path->rate = cm_lap_get_packet_rate(lap_msg);
2451	path->packet_life_time_selector = IB_SA_EQ;
2452	path->packet_life_time = cm_lap_get_local_ack_timeout(lap_msg);
2453	path->packet_life_time -= (path->packet_life_time > 0);
2454}
2455
2456static int cm_lap_handler(struct cm_work *work)
2457{
2458	struct cm_id_private *cm_id_priv;
2459	struct cm_lap_msg *lap_msg;
2460	struct ib_cm_lap_event_param *param;
2461	struct ib_mad_send_buf *msg = NULL;
2462	unsigned long flags;
2463	int ret;
2464
2465	/* todo: verify LAP request and send reject APR if invalid. */
2466	lap_msg = (struct cm_lap_msg *)work->mad_recv_wc->recv_buf.mad;
2467	cm_id_priv = cm_acquire_id(lap_msg->remote_comm_id,
2468				   lap_msg->local_comm_id);
2469	if (!cm_id_priv)
2470		return -EINVAL;
2471
2472	param = &work->cm_event.param.lap_rcvd;
2473	param->alternate_path = &work->path[0];
2474	cm_format_path_from_lap(param->alternate_path, lap_msg);
2475	work->cm_event.private_data = &lap_msg->private_data;
2476
2477	spin_lock_irqsave(&cm_id_priv->lock, flags);
2478	if (cm_id_priv->id.state != IB_CM_ESTABLISHED)
2479		goto unlock;
2480
2481	switch (cm_id_priv->id.lap_state) {
2482	case IB_CM_LAP_IDLE:
2483		break;
2484	case IB_CM_MRA_LAP_SENT:
2485		if (cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg))
2486			goto unlock;
2487
2488		cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
2489			      CM_MSG_RESPONSE_OTHER,
2490			      cm_id_priv->service_timeout,
2491			      cm_id_priv->private_data,
2492			      cm_id_priv->private_data_len);
2493		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2494
2495		if (ib_post_send_mad(msg, NULL))
2496			cm_free_msg(msg);
2497		goto deref;
2498	default:
2499		goto unlock;
2500	}
2501
2502	cm_id_priv->id.lap_state = IB_CM_LAP_RCVD;
2503	cm_id_priv->tid = lap_msg->hdr.tid;
2504	ret = atomic_inc_and_test(&cm_id_priv->work_count);
2505	if (!ret)
2506		list_add_tail(&work->list, &cm_id_priv->work_list);
2507	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2508
2509	if (ret)
2510		cm_process_work(cm_id_priv, work);
2511	else
2512		cm_deref_id(cm_id_priv);
2513	return 0;
2514
2515unlock:	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2516deref:	cm_deref_id(cm_id_priv);
2517	return -EINVAL;
2518}
2519
2520static void cm_format_apr(struct cm_apr_msg *apr_msg,
2521			  struct cm_id_private *cm_id_priv,
2522			  enum ib_cm_apr_status status,
2523			  void *info,
2524			  u8 info_length,
2525			  const void *private_data,
2526			  u8 private_data_len)
2527{
2528	cm_format_mad_hdr(&apr_msg->hdr, CM_APR_ATTR_ID, cm_id_priv->tid);
2529	apr_msg->local_comm_id = cm_id_priv->id.local_id;
2530	apr_msg->remote_comm_id = cm_id_priv->id.remote_id;
2531	apr_msg->ap_status = (u8) status;
2532
2533	if (info && info_length) {
2534		apr_msg->info_length = info_length;
2535		memcpy(apr_msg->info, info, info_length);
2536	}
2537
2538	if (private_data && private_data_len)
2539		memcpy(apr_msg->private_data, private_data, private_data_len);
2540}
2541
2542int ib_send_cm_apr(struct ib_cm_id *cm_id,
2543		   enum ib_cm_apr_status status,
2544		   void *info,
2545		   u8 info_length,
2546		   const void *private_data,
2547		   u8 private_data_len)
2548{
2549	struct cm_id_private *cm_id_priv;
2550	struct ib_mad_send_buf *msg;
2551	unsigned long flags;
2552	int ret;
2553
2554	if ((private_data && private_data_len > IB_CM_APR_PRIVATE_DATA_SIZE) ||
2555	    (info && info_length > IB_CM_APR_INFO_LENGTH))
2556		return -EINVAL;
2557
2558	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2559	spin_lock_irqsave(&cm_id_priv->lock, flags);
2560	if (cm_id->state != IB_CM_ESTABLISHED ||
2561	    (cm_id->lap_state != IB_CM_LAP_RCVD &&
2562	     cm_id->lap_state != IB_CM_MRA_LAP_SENT)) {
2563		ret = -EINVAL;
2564		goto out;
2565	}
2566
2567	ret = cm_alloc_msg(cm_id_priv, &msg);
2568	if (ret)
2569		goto out;
2570
2571	cm_format_apr((struct cm_apr_msg *) msg->mad, cm_id_priv, status,
2572		      info, info_length, private_data, private_data_len);
2573	ret = ib_post_send_mad(msg, NULL);
2574	if (ret) {
2575		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2576		cm_free_msg(msg);
2577		return ret;
2578	}
2579
2580	cm_id->lap_state = IB_CM_LAP_IDLE;
2581out:	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2582	return ret;
2583}
2584EXPORT_SYMBOL(ib_send_cm_apr);
2585
2586static int cm_apr_handler(struct cm_work *work)
2587{
2588	struct cm_id_private *cm_id_priv;
2589	struct cm_apr_msg *apr_msg;
2590	unsigned long flags;
2591	int ret;
2592
2593	apr_msg = (struct cm_apr_msg *)work->mad_recv_wc->recv_buf.mad;
2594	cm_id_priv = cm_acquire_id(apr_msg->remote_comm_id,
2595				   apr_msg->local_comm_id);
2596	if (!cm_id_priv)
2597		return -EINVAL; /* Unmatched reply. */
2598
2599	work->cm_event.param.apr_rcvd.ap_status = apr_msg->ap_status;
2600	work->cm_event.param.apr_rcvd.apr_info = &apr_msg->info;
2601	work->cm_event.param.apr_rcvd.info_len = apr_msg->info_length;
2602	work->cm_event.private_data = &apr_msg->private_data;
2603
2604	spin_lock_irqsave(&cm_id_priv->lock, flags);
2605	if (cm_id_priv->id.state != IB_CM_ESTABLISHED ||
2606	    (cm_id_priv->id.lap_state != IB_CM_LAP_SENT &&
2607	     cm_id_priv->id.lap_state != IB_CM_MRA_LAP_RCVD)) {
2608		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2609		goto out;
2610	}
2611	cm_id_priv->id.lap_state = IB_CM_LAP_IDLE;
2612	ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2613	cm_id_priv->msg = NULL;
2614
2615	ret = atomic_inc_and_test(&cm_id_priv->work_count);
2616	if (!ret)
2617		list_add_tail(&work->list, &cm_id_priv->work_list);
2618	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2619
2620	if (ret)
2621		cm_process_work(cm_id_priv, work);
2622	else
2623		cm_deref_id(cm_id_priv);
2624	return 0;
2625out:
2626	cm_deref_id(cm_id_priv);
2627	return -EINVAL;
2628}
2629
2630static int cm_timewait_handler(struct cm_work *work)
2631{
2632	struct cm_timewait_info *timewait_info;
2633	struct cm_id_private *cm_id_priv;
2634	int ret;
2635
2636	timewait_info = (struct cm_timewait_info *)work;
2637	spin_lock_irq(&cm.lock);
2638	list_del(&timewait_info->list);
2639	spin_unlock_irq(&cm.lock);
2640
2641	cm_id_priv = cm_acquire_id(timewait_info->work.local_id,
2642				   timewait_info->work.remote_id);
2643	if (!cm_id_priv)
2644		return -EINVAL;
2645
2646	spin_lock_irq(&cm_id_priv->lock);
2647	if (cm_id_priv->id.state != IB_CM_TIMEWAIT ||
2648	    cm_id_priv->remote_qpn != timewait_info->remote_qpn) {
2649		spin_unlock_irq(&cm_id_priv->lock);
2650		goto out;
2651	}
2652	cm_id_priv->id.state = IB_CM_IDLE;
2653	ret = atomic_inc_and_test(&cm_id_priv->work_count);
2654	if (!ret)
2655		list_add_tail(&work->list, &cm_id_priv->work_list);
2656	spin_unlock_irq(&cm_id_priv->lock);
2657
2658	if (ret)
2659		cm_process_work(cm_id_priv, work);
2660	else
2661		cm_deref_id(cm_id_priv);
2662	return 0;
2663out:
2664	cm_deref_id(cm_id_priv);
2665	return -EINVAL;
2666}
2667
2668static void cm_format_sidr_req(struct cm_sidr_req_msg *sidr_req_msg,
2669			       struct cm_id_private *cm_id_priv,
2670			       struct ib_cm_sidr_req_param *param)
2671{
2672	cm_format_mad_hdr(&sidr_req_msg->hdr, CM_SIDR_REQ_ATTR_ID,
2673			  cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_SIDR));
2674	sidr_req_msg->request_id = cm_id_priv->id.local_id;
2675	sidr_req_msg->pkey = cpu_to_be16(param->path->pkey);
2676	sidr_req_msg->service_id = param->service_id;
2677
2678	if (param->private_data && param->private_data_len)
2679		memcpy(sidr_req_msg->private_data, param->private_data,
2680		       param->private_data_len);
2681}
2682
2683int ib_send_cm_sidr_req(struct ib_cm_id *cm_id,
2684			struct ib_cm_sidr_req_param *param)
2685{
2686	struct cm_id_private *cm_id_priv;
2687	struct ib_mad_send_buf *msg;
2688	unsigned long flags;
2689	int ret;
2690
2691	if (!param->path || (param->private_data &&
2692	     param->private_data_len > IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE))
2693		return -EINVAL;
2694
2695	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2696	ret = cm_init_av_by_path(param->path, &cm_id_priv->av);
2697	if (ret)
2698		goto out;
2699
2700	cm_id->service_id = param->service_id;
2701	cm_id->service_mask = __constant_cpu_to_be64(~0ULL);
2702	cm_id_priv->timeout_ms = param->timeout_ms;
2703	cm_id_priv->max_cm_retries = param->max_cm_retries;
2704	ret = cm_alloc_msg(cm_id_priv, &msg);
2705	if (ret)
2706		goto out;
2707
2708	cm_format_sidr_req((struct cm_sidr_req_msg *) msg->mad, cm_id_priv,
2709			   param);
2710	msg->timeout_ms = cm_id_priv->timeout_ms;
2711	msg->context[1] = (void *) (unsigned long) IB_CM_SIDR_REQ_SENT;
2712
2713	spin_lock_irqsave(&cm_id_priv->lock, flags);
2714	if (cm_id->state == IB_CM_IDLE)
2715		ret = ib_post_send_mad(msg, NULL);
2716	else
2717		ret = -EINVAL;
2718
2719	if (ret) {
2720		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2721		cm_free_msg(msg);
2722		goto out;
2723	}
2724	cm_id->state = IB_CM_SIDR_REQ_SENT;
2725	cm_id_priv->msg = msg;
2726	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2727out:
2728	return ret;
2729}
2730EXPORT_SYMBOL(ib_send_cm_sidr_req);
2731
2732static void cm_format_sidr_req_event(struct cm_work *work,
2733				     struct ib_cm_id *listen_id)
2734{
2735	struct cm_sidr_req_msg *sidr_req_msg;
2736	struct ib_cm_sidr_req_event_param *param;
2737
2738	sidr_req_msg = (struct cm_sidr_req_msg *)
2739				work->mad_recv_wc->recv_buf.mad;
2740	param = &work->cm_event.param.sidr_req_rcvd;
2741	param->pkey = __be16_to_cpu(sidr_req_msg->pkey);
2742	param->listen_id = listen_id;
2743	param->port = work->port->port_num;
2744	work->cm_event.private_data = &sidr_req_msg->private_data;
2745}
2746
2747static int cm_sidr_req_handler(struct cm_work *work)
2748{
2749	struct ib_cm_id *cm_id;
2750	struct cm_id_private *cm_id_priv, *cur_cm_id_priv;
2751	struct cm_sidr_req_msg *sidr_req_msg;
2752	struct ib_wc *wc;
2753	unsigned long flags;
2754
2755	cm_id = ib_create_cm_id(work->port->cm_dev->device, NULL, NULL);
2756	if (IS_ERR(cm_id))
2757		return PTR_ERR(cm_id);
2758	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2759
2760	/* Record SGID/SLID and request ID for lookup. */
2761	sidr_req_msg = (struct cm_sidr_req_msg *)
2762				work->mad_recv_wc->recv_buf.mad;
2763	wc = work->mad_recv_wc->wc;
2764	cm_id_priv->av.dgid.global.subnet_prefix = cpu_to_be64(wc->slid);
2765	cm_id_priv->av.dgid.global.interface_id = 0;
2766	cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
2767				work->mad_recv_wc->recv_buf.grh,
2768				&cm_id_priv->av);
2769	cm_id_priv->id.remote_id = sidr_req_msg->request_id;
2770	cm_id_priv->id.state = IB_CM_SIDR_REQ_RCVD;
2771	cm_id_priv->tid = sidr_req_msg->hdr.tid;
2772	atomic_inc(&cm_id_priv->work_count);
2773
2774	spin_lock_irqsave(&cm.lock, flags);
2775	cur_cm_id_priv = cm_insert_remote_sidr(cm_id_priv);
2776	if (cur_cm_id_priv) {
2777		spin_unlock_irqrestore(&cm.lock, flags);
2778		goto out; /* Duplicate message. */
2779	}
2780	cur_cm_id_priv = cm_find_listen(cm_id->device,
2781					sidr_req_msg->service_id,
2782					sidr_req_msg->private_data);
2783	if (!cur_cm_id_priv) {
2784		rb_erase(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table);
2785		spin_unlock_irqrestore(&cm.lock, flags);
2786		/* todo: reply with no match */
2787		goto out; /* No match. */
2788	}
2789	atomic_inc(&cur_cm_id_priv->refcount);
2790	spin_unlock_irqrestore(&cm.lock, flags);
2791
2792	cm_id_priv->id.cm_handler = cur_cm_id_priv->id.cm_handler;
2793	cm_id_priv->id.context = cur_cm_id_priv->id.context;
2794	cm_id_priv->id.service_id = sidr_req_msg->service_id;
2795	cm_id_priv->id.service_mask = __constant_cpu_to_be64(~0ULL);
2796
2797	cm_format_sidr_req_event(work, &cur_cm_id_priv->id);
2798	cm_process_work(cm_id_priv, work);
2799	cm_deref_id(cur_cm_id_priv);
2800	return 0;
2801out:
2802	ib_destroy_cm_id(&cm_id_priv->id);
2803	return -EINVAL;
2804}
2805
2806static void cm_format_sidr_rep(struct cm_sidr_rep_msg *sidr_rep_msg,
2807			       struct cm_id_private *cm_id_priv,
2808			       struct ib_cm_sidr_rep_param *param)
2809{
2810	cm_format_mad_hdr(&sidr_rep_msg->hdr, CM_SIDR_REP_ATTR_ID,
2811			  cm_id_priv->tid);
2812	sidr_rep_msg->request_id = cm_id_priv->id.remote_id;
2813	sidr_rep_msg->status = param->status;
2814	cm_sidr_rep_set_qpn(sidr_rep_msg, cpu_to_be32(param->qp_num));
2815	sidr_rep_msg->service_id = cm_id_priv->id.service_id;
2816	sidr_rep_msg->qkey = cpu_to_be32(param->qkey);
2817
2818	if (param->info && param->info_length)
2819		memcpy(sidr_rep_msg->info, param->info, param->info_length);
2820
2821	if (param->private_data && param->private_data_len)
2822		memcpy(sidr_rep_msg->private_data, param->private_data,
2823		       param->private_data_len);
2824}
2825
2826int ib_send_cm_sidr_rep(struct ib_cm_id *cm_id,
2827			struct ib_cm_sidr_rep_param *param)
2828{
2829	struct cm_id_private *cm_id_priv;
2830	struct ib_mad_send_buf *msg;
2831	unsigned long flags;
2832	int ret;
2833
2834	if ((param->info && param->info_length > IB_CM_SIDR_REP_INFO_LENGTH) ||
2835	    (param->private_data &&
2836	     param->private_data_len > IB_CM_SIDR_REP_PRIVATE_DATA_SIZE))
2837		return -EINVAL;
2838
2839	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2840	spin_lock_irqsave(&cm_id_priv->lock, flags);
2841	if (cm_id->state != IB_CM_SIDR_REQ_RCVD) {
2842		ret = -EINVAL;
2843		goto error;
2844	}
2845
2846	ret = cm_alloc_msg(cm_id_priv, &msg);
2847	if (ret)
2848		goto error;
2849
2850	cm_format_sidr_rep((struct cm_sidr_rep_msg *) msg->mad, cm_id_priv,
2851			   param);
2852	ret = ib_post_send_mad(msg, NULL);
2853	if (ret) {
2854		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2855		cm_free_msg(msg);
2856		return ret;
2857	}
2858	cm_id->state = IB_CM_IDLE;
2859	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2860
2861	spin_lock_irqsave(&cm.lock, flags);
2862	rb_erase(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table);
2863	spin_unlock_irqrestore(&cm.lock, flags);
2864	return 0;
2865
2866error:	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2867	return ret;
2868}
2869EXPORT_SYMBOL(ib_send_cm_sidr_rep);
2870
2871static void cm_format_sidr_rep_event(struct cm_work *work)
2872{
2873	struct cm_sidr_rep_msg *sidr_rep_msg;
2874	struct ib_cm_sidr_rep_event_param *param;
2875
2876	sidr_rep_msg = (struct cm_sidr_rep_msg *)
2877				work->mad_recv_wc->recv_buf.mad;
2878	param = &work->cm_event.param.sidr_rep_rcvd;
2879	param->status = sidr_rep_msg->status;
2880	param->qkey = be32_to_cpu(sidr_rep_msg->qkey);
2881	param->qpn = be32_to_cpu(cm_sidr_rep_get_qpn(sidr_rep_msg));
2882	param->info = &sidr_rep_msg->info;
2883	param->info_len = sidr_rep_msg->info_length;
2884	work->cm_event.private_data = &sidr_rep_msg->private_data;
2885}
2886
2887static int cm_sidr_rep_handler(struct cm_work *work)
2888{
2889	struct cm_sidr_rep_msg *sidr_rep_msg;
2890	struct cm_id_private *cm_id_priv;
2891	unsigned long flags;
2892
2893	sidr_rep_msg = (struct cm_sidr_rep_msg *)
2894				work->mad_recv_wc->recv_buf.mad;
2895	cm_id_priv = cm_acquire_id(sidr_rep_msg->request_id, 0);
2896	if (!cm_id_priv)
2897		return -EINVAL; /* Unmatched reply. */
2898
2899	spin_lock_irqsave(&cm_id_priv->lock, flags);
2900	if (cm_id_priv->id.state != IB_CM_SIDR_REQ_SENT) {
2901		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2902		goto out;
2903	}
2904	cm_id_priv->id.state = IB_CM_IDLE;
2905	ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2906	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2907
2908	cm_format_sidr_rep_event(work);
2909	cm_process_work(cm_id_priv, work);
2910	return 0;
2911out:
2912	cm_deref_id(cm_id_priv);
2913	return -EINVAL;
2914}
2915
2916static void cm_process_send_error(struct ib_mad_send_buf *msg,
2917				  enum ib_wc_status wc_status)
2918{
2919	struct cm_id_private *cm_id_priv;
2920	struct ib_cm_event cm_event;
2921	enum ib_cm_state state;
2922	unsigned long flags;
2923	int ret;
2924
2925	memset(&cm_event, 0, sizeof cm_event);
2926	cm_id_priv = msg->context[0];
2927
2928	/* Discard old sends or ones without a response. */
2929	spin_lock_irqsave(&cm_id_priv->lock, flags);
2930	state = (enum ib_cm_state) (unsigned long) msg->context[1];
2931	if (msg != cm_id_priv->msg || state != cm_id_priv->id.state)
2932		goto discard;
2933
2934	switch (state) {
2935	case IB_CM_REQ_SENT:
2936	case IB_CM_MRA_REQ_RCVD:
2937		cm_reset_to_idle(cm_id_priv);
2938		cm_event.event = IB_CM_REQ_ERROR;
2939		break;
2940	case IB_CM_REP_SENT:
2941	case IB_CM_MRA_REP_RCVD:
2942		cm_reset_to_idle(cm_id_priv);
2943		cm_event.event = IB_CM_REP_ERROR;
2944		break;
2945	case IB_CM_DREQ_SENT:
2946		cm_enter_timewait(cm_id_priv);
2947		cm_event.event = IB_CM_DREQ_ERROR;
2948		break;
2949	case IB_CM_SIDR_REQ_SENT:
2950		cm_id_priv->id.state = IB_CM_IDLE;
2951		cm_event.event = IB_CM_SIDR_REQ_ERROR;
2952		break;
2953	default:
2954		goto discard;
2955	}
2956	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2957	cm_event.param.send_status = wc_status;
2958
2959	/* No other events can occur on the cm_id at this point. */
2960	ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, &cm_event);
2961	cm_free_msg(msg);
2962	if (ret)
2963		ib_destroy_cm_id(&cm_id_priv->id);
2964	return;
2965discard:
2966	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2967	cm_free_msg(msg);
2968}
2969
2970static void cm_send_handler(struct ib_mad_agent *mad_agent,
2971			    struct ib_mad_send_wc *mad_send_wc)
2972{
2973	struct ib_mad_send_buf *msg = mad_send_wc->send_buf;
2974
2975	switch (mad_send_wc->status) {
2976	case IB_WC_SUCCESS:
2977	case IB_WC_WR_FLUSH_ERR:
2978		cm_free_msg(msg);
2979		break;
2980	default:
2981		if (msg->context[0] && msg->context[1])
2982			cm_process_send_error(msg, mad_send_wc->status);
2983		else
2984			cm_free_msg(msg);
2985		break;
2986	}
2987}
2988
2989static void cm_work_handler(struct work_struct *_work)
2990{
2991	struct cm_work *work = container_of(_work, struct cm_work, work.work);
2992	int ret;
2993
2994	switch (work->cm_event.event) {
2995	case IB_CM_REQ_RECEIVED:
2996		ret = cm_req_handler(work);
2997		break;
2998	case IB_CM_MRA_RECEIVED:
2999		ret = cm_mra_handler(work);
3000		break;
3001	case IB_CM_REJ_RECEIVED:
3002		ret = cm_rej_handler(work);
3003		break;
3004	case IB_CM_REP_RECEIVED:
3005		ret = cm_rep_handler(work);
3006		break;
3007	case IB_CM_RTU_RECEIVED:
3008		ret = cm_rtu_handler(work);
3009		break;
3010	case IB_CM_USER_ESTABLISHED:
3011		ret = cm_establish_handler(work);
3012		break;
3013	case IB_CM_DREQ_RECEIVED:
3014		ret = cm_dreq_handler(work);
3015		break;
3016	case IB_CM_DREP_RECEIVED:
3017		ret = cm_drep_handler(work);
3018		break;
3019	case IB_CM_SIDR_REQ_RECEIVED:
3020		ret = cm_sidr_req_handler(work);
3021		break;
3022	case IB_CM_SIDR_REP_RECEIVED:
3023		ret = cm_sidr_rep_handler(work);
3024		break;
3025	case IB_CM_LAP_RECEIVED:
3026		ret = cm_lap_handler(work);
3027		break;
3028	case IB_CM_APR_RECEIVED:
3029		ret = cm_apr_handler(work);
3030		break;
3031	case IB_CM_TIMEWAIT_EXIT:
3032		ret = cm_timewait_handler(work);
3033		break;
3034	default:
3035		ret = -EINVAL;
3036		break;
3037	}
3038	if (ret)
3039		cm_free_work(work);
3040}
3041
3042int ib_cm_establish(struct ib_cm_id *cm_id)
3043{
3044	struct cm_id_private *cm_id_priv;
3045	struct cm_work *work;
3046	unsigned long flags;
3047	int ret = 0;
3048
3049	work = kmalloc(sizeof *work, GFP_ATOMIC);
3050	if (!work)
3051		return -ENOMEM;
3052
3053	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
3054	spin_lock_irqsave(&cm_id_priv->lock, flags);
3055	switch (cm_id->state)
3056	{
3057	case IB_CM_REP_SENT:
3058	case IB_CM_MRA_REP_RCVD:
3059		cm_id->state = IB_CM_ESTABLISHED;
3060		break;
3061	case IB_CM_ESTABLISHED:
3062		ret = -EISCONN;
3063		break;
3064	default:
3065		ret = -EINVAL;
3066		break;
3067	}
3068	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3069
3070	if (ret) {
3071		kfree(work);
3072		goto out;
3073	}
3074
3075	/*
3076	 * The CM worker thread may try to destroy the cm_id before it
3077	 * can execute this work item.  To prevent potential deadlock,
3078	 * we need to find the cm_id once we're in the context of the
3079	 * worker thread, rather than holding a reference on it.
3080	 */
3081	INIT_DELAYED_WORK(&work->work, cm_work_handler);
3082	work->local_id = cm_id->local_id;
3083	work->remote_id = cm_id->remote_id;
3084	work->mad_recv_wc = NULL;
3085	work->cm_event.event = IB_CM_USER_ESTABLISHED;
3086	queue_delayed_work(cm.wq, &work->work, 0);
3087out:
3088	return ret;
3089}
3090EXPORT_SYMBOL(ib_cm_establish);
3091
3092static void cm_recv_handler(struct ib_mad_agent *mad_agent,
3093			    struct ib_mad_recv_wc *mad_recv_wc)
3094{
3095	struct cm_work *work;
3096	enum ib_cm_event_type event;
3097	int paths = 0;
3098
3099	switch (mad_recv_wc->recv_buf.mad->mad_hdr.attr_id) {
3100	case CM_REQ_ATTR_ID:
3101		paths = 1 + (((struct cm_req_msg *) mad_recv_wc->recv_buf.mad)->
3102						    alt_local_lid != 0);
3103		event = IB_CM_REQ_RECEIVED;
3104		break;
3105	case CM_MRA_ATTR_ID:
3106		event = IB_CM_MRA_RECEIVED;
3107		break;
3108	case CM_REJ_ATTR_ID:
3109		event = IB_CM_REJ_RECEIVED;
3110		break;
3111	case CM_REP_ATTR_ID:
3112		event = IB_CM_REP_RECEIVED;
3113		break;
3114	case CM_RTU_ATTR_ID:
3115		event = IB_CM_RTU_RECEIVED;
3116		break;
3117	case CM_DREQ_ATTR_ID:
3118		event = IB_CM_DREQ_RECEIVED;
3119		break;
3120	case CM_DREP_ATTR_ID:
3121		event = IB_CM_DREP_RECEIVED;
3122		break;
3123	case CM_SIDR_REQ_ATTR_ID:
3124		event = IB_CM_SIDR_REQ_RECEIVED;
3125		break;
3126	case CM_SIDR_REP_ATTR_ID:
3127		event = IB_CM_SIDR_REP_RECEIVED;
3128		break;
3129	case CM_LAP_ATTR_ID:
3130		paths = 1;
3131		event = IB_CM_LAP_RECEIVED;
3132		break;
3133	case CM_APR_ATTR_ID:
3134		event = IB_CM_APR_RECEIVED;
3135		break;
3136	default:
3137		ib_free_recv_mad(mad_recv_wc);
3138		return;
3139	}
3140
3141	work = kmalloc(sizeof *work + sizeof(struct ib_sa_path_rec) * paths,
3142		       GFP_KERNEL);
3143	if (!work) {
3144		ib_free_recv_mad(mad_recv_wc);
3145		return;
3146	}
3147
3148	INIT_DELAYED_WORK(&work->work, cm_work_handler);
3149	work->cm_event.event = event;
3150	work->mad_recv_wc = mad_recv_wc;
3151	work->port = (struct cm_port *)mad_agent->context;
3152	queue_delayed_work(cm.wq, &work->work, 0);
3153}
3154
3155static int cm_init_qp_init_attr(struct cm_id_private *cm_id_priv,
3156				struct ib_qp_attr *qp_attr,
3157				int *qp_attr_mask)
3158{
3159	unsigned long flags;
3160	int ret;
3161
3162	spin_lock_irqsave(&cm_id_priv->lock, flags);
3163	switch (cm_id_priv->id.state) {
3164	case IB_CM_REQ_SENT:
3165	case IB_CM_MRA_REQ_RCVD:
3166	case IB_CM_REQ_RCVD:
3167	case IB_CM_MRA_REQ_SENT:
3168	case IB_CM_REP_RCVD:
3169	case IB_CM_MRA_REP_SENT:
3170	case IB_CM_REP_SENT:
3171	case IB_CM_MRA_REP_RCVD:
3172	case IB_CM_ESTABLISHED:
3173		*qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS |
3174				IB_QP_PKEY_INDEX | IB_QP_PORT;
3175		qp_attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE |
3176					   IB_ACCESS_REMOTE_WRITE;
3177		if (cm_id_priv->responder_resources)
3178			qp_attr->qp_access_flags |= IB_ACCESS_REMOTE_READ |
3179						    IB_ACCESS_REMOTE_ATOMIC;
3180		qp_attr->pkey_index = cm_id_priv->av.pkey_index;
3181		qp_attr->port_num = cm_id_priv->av.port->port_num;
3182		ret = 0;
3183		break;
3184	default:
3185		ret = -EINVAL;
3186		break;
3187	}
3188	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3189	return ret;
3190}
3191
3192static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv,
3193			       struct ib_qp_attr *qp_attr,
3194			       int *qp_attr_mask)
3195{
3196	unsigned long flags;
3197	int ret;
3198
3199	spin_lock_irqsave(&cm_id_priv->lock, flags);
3200	switch (cm_id_priv->id.state) {
3201	case IB_CM_REQ_RCVD:
3202	case IB_CM_MRA_REQ_SENT:
3203	case IB_CM_REP_RCVD:
3204	case IB_CM_MRA_REP_SENT:
3205	case IB_CM_REP_SENT:
3206	case IB_CM_MRA_REP_RCVD:
3207	case IB_CM_ESTABLISHED:
3208		*qp_attr_mask = IB_QP_STATE | IB_QP_AV | IB_QP_PATH_MTU |
3209				IB_QP_DEST_QPN | IB_QP_RQ_PSN;
3210		qp_attr->ah_attr = cm_id_priv->av.ah_attr;
3211		qp_attr->path_mtu = cm_id_priv->path_mtu;
3212		qp_attr->dest_qp_num = be32_to_cpu(cm_id_priv->remote_qpn);
3213		qp_attr->rq_psn = be32_to_cpu(cm_id_priv->rq_psn);
3214		if (cm_id_priv->qp_type == IB_QPT_RC) {
3215			*qp_attr_mask |= IB_QP_MAX_DEST_RD_ATOMIC |
3216					 IB_QP_MIN_RNR_TIMER;
3217			qp_attr->max_dest_rd_atomic =
3218					cm_id_priv->responder_resources;
3219			qp_attr->min_rnr_timer = 0;
3220		}
3221		if (cm_id_priv->alt_av.ah_attr.dlid) {
3222			*qp_attr_mask |= IB_QP_ALT_PATH;
3223			qp_attr->alt_port_num = cm_id_priv->alt_av.port->port_num;
3224			qp_attr->alt_ah_attr = cm_id_priv->alt_av.ah_attr;
3225		}
3226		ret = 0;
3227		break;
3228	default:
3229		ret = -EINVAL;
3230		break;
3231	}
3232	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3233	return ret;
3234}
3235
3236static int cm_init_qp_rts_attr(struct cm_id_private *cm_id_priv,
3237			       struct ib_qp_attr *qp_attr,
3238			       int *qp_attr_mask)
3239{
3240	unsigned long flags;
3241	int ret;
3242
3243	spin_lock_irqsave(&cm_id_priv->lock, flags);
3244	switch (cm_id_priv->id.state) {
3245	case IB_CM_REP_RCVD:
3246	case IB_CM_MRA_REP_SENT:
3247	case IB_CM_REP_SENT:
3248	case IB_CM_MRA_REP_RCVD:
3249	case IB_CM_ESTABLISHED:
3250		*qp_attr_mask = IB_QP_STATE | IB_QP_SQ_PSN;
3251		qp_attr->sq_psn = be32_to_cpu(cm_id_priv->sq_psn);
3252		if (cm_id_priv->qp_type == IB_QPT_RC) {
3253			*qp_attr_mask |= IB_QP_TIMEOUT | IB_QP_RETRY_CNT |
3254					 IB_QP_RNR_RETRY |
3255					 IB_QP_MAX_QP_RD_ATOMIC;
3256			qp_attr->timeout = cm_id_priv->local_ack_timeout;
3257			qp_attr->retry_cnt = cm_id_priv->retry_count;
3258			qp_attr->rnr_retry = cm_id_priv->rnr_retry_count;
3259			qp_attr->max_rd_atomic = cm_id_priv->initiator_depth;
3260		}
3261		if (cm_id_priv->alt_av.ah_attr.dlid) {
3262			*qp_attr_mask |= IB_QP_PATH_MIG_STATE;
3263			qp_attr->path_mig_state = IB_MIG_REARM;
3264		}
3265		ret = 0;
3266		break;
3267	default:
3268		ret = -EINVAL;
3269		break;
3270	}
3271	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3272	return ret;
3273}
3274
3275int ib_cm_init_qp_attr(struct ib_cm_id *cm_id,
3276		       struct ib_qp_attr *qp_attr,
3277		       int *qp_attr_mask)
3278{
3279	struct cm_id_private *cm_id_priv;
3280	int ret;
3281
3282	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
3283	switch (qp_attr->qp_state) {
3284	case IB_QPS_INIT:
3285		ret = cm_init_qp_init_attr(cm_id_priv, qp_attr, qp_attr_mask);
3286		break;
3287	case IB_QPS_RTR:
3288		ret = cm_init_qp_rtr_attr(cm_id_priv, qp_attr, qp_attr_mask);
3289		break;
3290	case IB_QPS_RTS:
3291		ret = cm_init_qp_rts_attr(cm_id_priv, qp_attr, qp_attr_mask);
3292		break;
3293	default:
3294		ret = -EINVAL;
3295		break;
3296	}
3297	return ret;
3298}
3299EXPORT_SYMBOL(ib_cm_init_qp_attr);
3300
3301static void cm_add_one(struct ib_device *device)
3302{
3303	struct cm_device *cm_dev;
3304	struct cm_port *port;
3305	struct ib_mad_reg_req reg_req = {
3306		.mgmt_class = IB_MGMT_CLASS_CM,
3307		.mgmt_class_version = IB_CM_CLASS_VERSION
3308	};
3309	struct ib_port_modify port_modify = {
3310		.set_port_cap_mask = IB_PORT_CM_SUP
3311	};
3312	unsigned long flags;
3313	int ret;
3314	u8 i;
3315
3316	if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
3317		return;
3318
3319	cm_dev = kmalloc(sizeof(*cm_dev) + sizeof(*port) *
3320			 device->phys_port_cnt, GFP_KERNEL);
3321	if (!cm_dev)
3322		return;
3323
3324	cm_dev->device = device;
3325	cm_dev->ca_guid = device->node_guid;
3326
3327	set_bit(IB_MGMT_METHOD_SEND, reg_req.method_mask);
3328	for (i = 1; i <= device->phys_port_cnt; i++) {
3329		port = &cm_dev->port[i-1];
3330		port->cm_dev = cm_dev;
3331		port->port_num = i;
3332		port->mad_agent = ib_register_mad_agent(device, i,
3333							IB_QPT_GSI,
3334							&reg_req,
3335							0,
3336							cm_send_handler,
3337							cm_recv_handler,
3338							port);
3339		if (IS_ERR(port->mad_agent))
3340			goto error1;
3341
3342		ret = ib_modify_port(device, i, 0, &port_modify);
3343		if (ret)
3344			goto error2;
3345	}
3346	ib_set_client_data(device, &cm_client, cm_dev);
3347
3348	write_lock_irqsave(&cm.device_lock, flags);
3349	list_add_tail(&cm_dev->list, &cm.device_list);
3350	write_unlock_irqrestore(&cm.device_lock, flags);
3351	return;
3352
3353error2:
3354	ib_unregister_mad_agent(port->mad_agent);
3355error1:
3356	port_modify.set_port_cap_mask = 0;
3357	port_modify.clr_port_cap_mask = IB_PORT_CM_SUP;
3358	while (--i) {
3359		port = &cm_dev->port[i-1];
3360		ib_modify_port(device, port->port_num, 0, &port_modify);
3361		ib_unregister_mad_agent(port->mad_agent);
3362	}
3363	kfree(cm_dev);
3364}
3365
3366static void cm_remove_one(struct ib_device *device)
3367{
3368	struct cm_device *cm_dev;
3369	struct cm_port *port;
3370	struct ib_port_modify port_modify = {
3371		.clr_port_cap_mask = IB_PORT_CM_SUP
3372	};
3373	unsigned long flags;
3374	int i;
3375
3376	cm_dev = ib_get_client_data(device, &cm_client);
3377	if (!cm_dev)
3378		return;
3379
3380	write_lock_irqsave(&cm.device_lock, flags);
3381	list_del(&cm_dev->list);
3382	write_unlock_irqrestore(&cm.device_lock, flags);
3383
3384	for (i = 1; i <= device->phys_port_cnt; i++) {
3385		port = &cm_dev->port[i-1];
3386		ib_modify_port(device, port->port_num, 0, &port_modify);
3387		ib_unregister_mad_agent(port->mad_agent);
3388	}
3389	kfree(cm_dev);
3390}
3391
3392static int __init ib_cm_init(void)
3393{
3394	int ret;
3395
3396	memset(&cm, 0, sizeof cm);
3397	INIT_LIST_HEAD(&cm.device_list);
3398	rwlock_init(&cm.device_lock);
3399	spin_lock_init(&cm.lock);
3400	cm.listen_service_table = RB_ROOT;
3401	cm.listen_service_id = __constant_be64_to_cpu(IB_CM_ASSIGN_SERVICE_ID);
3402	cm.remote_id_table = RB_ROOT;
3403	cm.remote_qp_table = RB_ROOT;
3404	cm.remote_sidr_table = RB_ROOT;
3405	idr_init(&cm.local_id_table);
3406	get_random_bytes(&cm.random_id_operand, sizeof cm.random_id_operand);
3407	idr_pre_get(&cm.local_id_table, GFP_KERNEL);
3408	INIT_LIST_HEAD(&cm.timewait_list);
3409
3410	cm.wq = create_workqueue("ib_cm");
3411	if (!cm.wq)
3412		return -ENOMEM;
3413
3414	ret = ib_register_client(&cm_client);
3415	if (ret)
3416		goto error;
3417
3418	return 0;
3419error:
3420	destroy_workqueue(cm.wq);
3421	return ret;
3422}
3423
3424static void __exit ib_cm_cleanup(void)
3425{
3426	struct cm_timewait_info *timewait_info, *tmp;
3427
3428	spin_lock_irq(&cm.lock);
3429	list_for_each_entry(timewait_info, &cm.timewait_list, list)
3430		cancel_delayed_work(&timewait_info->work.work);
3431	spin_unlock_irq(&cm.lock);
3432
3433	destroy_workqueue(cm.wq);
3434
3435	list_for_each_entry_safe(timewait_info, tmp, &cm.timewait_list, list) {
3436		list_del(&timewait_info->list);
3437		kfree(timewait_info);
3438	}
3439
3440	ib_unregister_client(&cm_client);
3441	idr_destroy(&cm.local_id_table);
3442}
3443
3444module_init(ib_cm_init);
3445module_exit(ib_cm_cleanup);
3446
3447