1/*
2 * Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License as
6 * published by the Free Software Foundation; either version 2 of the
7 * License, or any later version.
8 *
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12 * General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17 */
18
19FILE_LICENCE ( GPL2_OR_LATER );
20
21#include <stdint.h>
22#include <stdlib.h>
23#include <stdio.h>
24#include <string.h>
25#include <unistd.h>
26#include <byteswap.h>
27#include <errno.h>
28#include <assert.h>
29#include <gpxe/list.h>
30#include <gpxe/errortab.h>
31#include <gpxe/if_arp.h>
32#include <gpxe/netdevice.h>
33#include <gpxe/iobuf.h>
34#include <gpxe/ipoib.h>
35#include <gpxe/process.h>
36#include <gpxe/infiniband.h>
37#include <gpxe/ib_mi.h>
38#include <gpxe/ib_sma.h>
39
40/** @file
41 *
42 * Infiniband protocol
43 *
44 */
45
46/** List of Infiniband devices */
47struct list_head ib_devices = LIST_HEAD_INIT ( ib_devices );
48
49/** List of open Infiniband devices, in reverse order of opening */
50static struct list_head open_ib_devices = LIST_HEAD_INIT ( open_ib_devices );
51
52/* Disambiguate the various possible EINPROGRESSes */
53#define EINPROGRESS_INIT ( EINPROGRESS | EUNIQ_01 )
54#define EINPROGRESS_ARMED ( EINPROGRESS | EUNIQ_02 )
55
56/** Human-readable message for the link statuses */
57struct errortab infiniband_errors[] __errortab = {
58	{ EINPROGRESS_INIT, "Initialising" },
59	{ EINPROGRESS_ARMED, "Armed" },
60};
61
62/***************************************************************************
63 *
64 * Completion queues
65 *
66 ***************************************************************************
67 */
68
69/**
70 * Create completion queue
71 *
72 * @v ibdev		Infiniband device
73 * @v num_cqes		Number of completion queue entries
74 * @v op		Completion queue operations
75 * @ret cq		New completion queue
76 */
77struct ib_completion_queue *
78ib_create_cq ( struct ib_device *ibdev, unsigned int num_cqes,
79	       struct ib_completion_queue_operations *op ) {
80	struct ib_completion_queue *cq;
81	int rc;
82
83	DBGC ( ibdev, "IBDEV %p creating completion queue\n", ibdev );
84
85	/* Allocate and initialise data structure */
86	cq = zalloc ( sizeof ( *cq ) );
87	if ( ! cq )
88		goto err_alloc_cq;
89	cq->ibdev = ibdev;
90	list_add ( &cq->list, &ibdev->cqs );
91	cq->num_cqes = num_cqes;
92	INIT_LIST_HEAD ( &cq->work_queues );
93	cq->op = op;
94
95	/* Perform device-specific initialisation and get CQN */
96	if ( ( rc = ibdev->op->create_cq ( ibdev, cq ) ) != 0 ) {
97		DBGC ( ibdev, "IBDEV %p could not initialise completion "
98		       "queue: %s\n", ibdev, strerror ( rc ) );
99		goto err_dev_create_cq;
100	}
101
102	DBGC ( ibdev, "IBDEV %p created %d-entry completion queue %p (%p) "
103	       "with CQN %#lx\n", ibdev, num_cqes, cq,
104	       ib_cq_get_drvdata ( cq ), cq->cqn );
105	return cq;
106
107	ibdev->op->destroy_cq ( ibdev, cq );
108 err_dev_create_cq:
109	list_del ( &cq->list );
110	free ( cq );
111 err_alloc_cq:
112	return NULL;
113}
114
115/**
116 * Destroy completion queue
117 *
118 * @v ibdev		Infiniband device
119 * @v cq		Completion queue
120 */
121void ib_destroy_cq ( struct ib_device *ibdev,
122		     struct ib_completion_queue *cq ) {
123	DBGC ( ibdev, "IBDEV %p destroying completion queue %#lx\n",
124	       ibdev, cq->cqn );
125	assert ( list_empty ( &cq->work_queues ) );
126	ibdev->op->destroy_cq ( ibdev, cq );
127	list_del ( &cq->list );
128	free ( cq );
129}
130
131/**
132 * Poll completion queue
133 *
134 * @v ibdev		Infiniband device
135 * @v cq		Completion queue
136 */
137void ib_poll_cq ( struct ib_device *ibdev,
138		  struct ib_completion_queue *cq ) {
139	struct ib_work_queue *wq;
140
141	/* Poll completion queue */
142	ibdev->op->poll_cq ( ibdev, cq );
143
144	/* Refill receive work queues */
145	list_for_each_entry ( wq, &cq->work_queues, list ) {
146		if ( ! wq->is_send )
147			ib_refill_recv ( ibdev, wq->qp );
148	}
149}
150
151/***************************************************************************
152 *
153 * Work queues
154 *
155 ***************************************************************************
156 */
157
158/**
159 * Create queue pair
160 *
161 * @v ibdev		Infiniband device
162 * @v type		Queue pair type
163 * @v num_send_wqes	Number of send work queue entries
164 * @v send_cq		Send completion queue
165 * @v num_recv_wqes	Number of receive work queue entries
166 * @v recv_cq		Receive completion queue
167 * @ret qp		Queue pair
168 *
169 * The queue pair will be left in the INIT state; you must call
170 * ib_modify_qp() before it is ready to use for sending and receiving.
171 */
172struct ib_queue_pair * ib_create_qp ( struct ib_device *ibdev,
173				      enum ib_queue_pair_type type,
174				      unsigned int num_send_wqes,
175				      struct ib_completion_queue *send_cq,
176				      unsigned int num_recv_wqes,
177				      struct ib_completion_queue *recv_cq ) {
178	struct ib_queue_pair *qp;
179	size_t total_size;
180	int rc;
181
182	DBGC ( ibdev, "IBDEV %p creating queue pair\n", ibdev );
183
184	/* Allocate and initialise data structure */
185	total_size = ( sizeof ( *qp ) +
186		       ( num_send_wqes * sizeof ( qp->send.iobufs[0] ) ) +
187		       ( num_recv_wqes * sizeof ( qp->recv.iobufs[0] ) ) );
188	qp = zalloc ( total_size );
189	if ( ! qp )
190		goto err_alloc_qp;
191	qp->ibdev = ibdev;
192	list_add ( &qp->list, &ibdev->qps );
193	qp->type = type;
194	qp->send.qp = qp;
195	qp->send.is_send = 1;
196	qp->send.cq = send_cq;
197	list_add ( &qp->send.list, &send_cq->work_queues );
198	qp->send.psn = ( random() & 0xffffffUL );
199	qp->send.num_wqes = num_send_wqes;
200	qp->send.iobufs = ( ( ( void * ) qp ) + sizeof ( *qp ) );
201	qp->recv.qp = qp;
202	qp->recv.cq = recv_cq;
203	list_add ( &qp->recv.list, &recv_cq->work_queues );
204	qp->recv.psn = ( random() & 0xffffffUL );
205	qp->recv.num_wqes = num_recv_wqes;
206	qp->recv.iobufs = ( ( ( void * ) qp ) + sizeof ( *qp ) +
207			    ( num_send_wqes * sizeof ( qp->send.iobufs[0] ) ));
208	INIT_LIST_HEAD ( &qp->mgids );
209
210	/* Perform device-specific initialisation and get QPN */
211	if ( ( rc = ibdev->op->create_qp ( ibdev, qp ) ) != 0 ) {
212		DBGC ( ibdev, "IBDEV %p could not initialise queue pair: "
213		       "%s\n", ibdev, strerror ( rc ) );
214		goto err_dev_create_qp;
215	}
216	DBGC ( ibdev, "IBDEV %p created queue pair %p (%p) with QPN %#lx\n",
217	       ibdev, qp, ib_qp_get_drvdata ( qp ), qp->qpn );
218	DBGC ( ibdev, "IBDEV %p QPN %#lx has %d send entries at [%p,%p)\n",
219	       ibdev, qp->qpn, num_send_wqes, qp->send.iobufs,
220	       qp->recv.iobufs );
221	DBGC ( ibdev, "IBDEV %p QPN %#lx has %d receive entries at [%p,%p)\n",
222	       ibdev, qp->qpn, num_recv_wqes, qp->recv.iobufs,
223	       ( ( ( void * ) qp ) + total_size ) );
224
225	/* Calculate externally-visible QPN */
226	switch ( type ) {
227	case IB_QPT_SMI:
228		qp->ext_qpn = IB_QPN_SMI;
229		break;
230	case IB_QPT_GSI:
231		qp->ext_qpn = IB_QPN_GSI;
232		break;
233	default:
234		qp->ext_qpn = qp->qpn;
235		break;
236	}
237	if ( qp->ext_qpn != qp->qpn ) {
238		DBGC ( ibdev, "IBDEV %p QPN %#lx has external QPN %#lx\n",
239		       ibdev, qp->qpn, qp->ext_qpn );
240	}
241
242	return qp;
243
244	ibdev->op->destroy_qp ( ibdev, qp );
245 err_dev_create_qp:
246	list_del ( &qp->send.list );
247	list_del ( &qp->recv.list );
248	list_del ( &qp->list );
249	free ( qp );
250 err_alloc_qp:
251	return NULL;
252}
253
254/**
255 * Modify queue pair
256 *
257 * @v ibdev		Infiniband device
258 * @v qp		Queue pair
259 * @v av		New address vector, if applicable
260 * @ret rc		Return status code
261 */
262int ib_modify_qp ( struct ib_device *ibdev, struct ib_queue_pair *qp ) {
263	int rc;
264
265	DBGC ( ibdev, "IBDEV %p modifying QPN %#lx\n", ibdev, qp->qpn );
266
267	if ( ( rc = ibdev->op->modify_qp ( ibdev, qp ) ) != 0 ) {
268		DBGC ( ibdev, "IBDEV %p could not modify QPN %#lx: %s\n",
269		       ibdev, qp->qpn, strerror ( rc ) );
270		return rc;
271	}
272
273	return 0;
274}
275
276/**
277 * Destroy queue pair
278 *
279 * @v ibdev		Infiniband device
280 * @v qp		Queue pair
281 */
282void ib_destroy_qp ( struct ib_device *ibdev, struct ib_queue_pair *qp ) {
283	struct io_buffer *iobuf;
284	unsigned int i;
285
286	DBGC ( ibdev, "IBDEV %p destroying QPN %#lx\n",
287	       ibdev, qp->qpn );
288
289	assert ( list_empty ( &qp->mgids ) );
290
291	/* Perform device-specific destruction */
292	ibdev->op->destroy_qp ( ibdev, qp );
293
294	/* Complete any remaining I/O buffers with errors */
295	for ( i = 0 ; i < qp->send.num_wqes ; i++ ) {
296		if ( ( iobuf = qp->send.iobufs[i] ) != NULL )
297			ib_complete_send ( ibdev, qp, iobuf, -ECANCELED );
298	}
299	for ( i = 0 ; i < qp->recv.num_wqes ; i++ ) {
300		if ( ( iobuf = qp->recv.iobufs[i] ) != NULL ) {
301			ib_complete_recv ( ibdev, qp, NULL, iobuf,
302					   -ECANCELED );
303		}
304	}
305
306	/* Remove work queues from completion queue */
307	list_del ( &qp->send.list );
308	list_del ( &qp->recv.list );
309
310	/* Free QP */
311	list_del ( &qp->list );
312	free ( qp );
313}
314
315/**
316 * Find queue pair by QPN
317 *
318 * @v ibdev		Infiniband device
319 * @v qpn		Queue pair number
320 * @ret qp		Queue pair, or NULL
321 */
322struct ib_queue_pair * ib_find_qp_qpn ( struct ib_device *ibdev,
323					unsigned long qpn ) {
324	struct ib_queue_pair *qp;
325
326	list_for_each_entry ( qp, &ibdev->qps, list ) {
327		if ( ( qpn == qp->qpn ) || ( qpn == qp->ext_qpn ) )
328			return qp;
329	}
330	return NULL;
331}
332
333/**
334 * Find queue pair by multicast GID
335 *
336 * @v ibdev		Infiniband device
337 * @v gid		Multicast GID
338 * @ret qp		Queue pair, or NULL
339 */
340struct ib_queue_pair * ib_find_qp_mgid ( struct ib_device *ibdev,
341					 struct ib_gid *gid ) {
342	struct ib_queue_pair *qp;
343	struct ib_multicast_gid *mgid;
344
345	list_for_each_entry ( qp, &ibdev->qps, list ) {
346		list_for_each_entry ( mgid, &qp->mgids, list ) {
347			if ( memcmp ( &mgid->gid, gid,
348				      sizeof ( mgid->gid ) ) == 0 ) {
349				return qp;
350			}
351		}
352	}
353	return NULL;
354}
355
356/**
357 * Find work queue belonging to completion queue
358 *
359 * @v cq		Completion queue
360 * @v qpn		Queue pair number
361 * @v is_send		Find send work queue (rather than receive)
362 * @ret wq		Work queue, or NULL if not found
363 */
364struct ib_work_queue * ib_find_wq ( struct ib_completion_queue *cq,
365				    unsigned long qpn, int is_send ) {
366	struct ib_work_queue *wq;
367
368	list_for_each_entry ( wq, &cq->work_queues, list ) {
369		if ( ( wq->qp->qpn == qpn ) && ( wq->is_send == is_send ) )
370			return wq;
371	}
372	return NULL;
373}
374
375/**
376 * Post send work queue entry
377 *
378 * @v ibdev		Infiniband device
379 * @v qp		Queue pair
380 * @v av		Address vector
381 * @v iobuf		I/O buffer
382 * @ret rc		Return status code
383 */
384int ib_post_send ( struct ib_device *ibdev, struct ib_queue_pair *qp,
385		   struct ib_address_vector *av,
386		   struct io_buffer *iobuf ) {
387	struct ib_address_vector av_copy;
388	int rc;
389
390	/* Check queue fill level */
391	if ( qp->send.fill >= qp->send.num_wqes ) {
392		DBGC ( ibdev, "IBDEV %p QPN %#lx send queue full\n",
393		       ibdev, qp->qpn );
394		return -ENOBUFS;
395	}
396
397	/* Use default address vector if none specified */
398	if ( ! av )
399		av = &qp->av;
400
401	/* Make modifiable copy of address vector */
402	memcpy ( &av_copy, av, sizeof ( av_copy ) );
403	av = &av_copy;
404
405	/* Fill in optional parameters in address vector */
406	if ( ! av->qkey )
407		av->qkey = qp->qkey;
408	if ( ! av->rate )
409		av->rate = IB_RATE_2_5;
410
411	/* Post to hardware */
412	if ( ( rc = ibdev->op->post_send ( ibdev, qp, av, iobuf ) ) != 0 ) {
413		DBGC ( ibdev, "IBDEV %p QPN %#lx could not post send WQE: "
414		       "%s\n", ibdev, qp->qpn, strerror ( rc ) );
415		return rc;
416	}
417
418	qp->send.fill++;
419	return 0;
420}
421
422/**
423 * Post receive work queue entry
424 *
425 * @v ibdev		Infiniband device
426 * @v qp		Queue pair
427 * @v iobuf		I/O buffer
428 * @ret rc		Return status code
429 */
430int ib_post_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp,
431		   struct io_buffer *iobuf ) {
432	int rc;
433
434	/* Check packet length */
435	if ( iob_tailroom ( iobuf ) < IB_MAX_PAYLOAD_SIZE ) {
436		DBGC ( ibdev, "IBDEV %p QPN %#lx wrong RX buffer size (%zd)\n",
437		       ibdev, qp->qpn, iob_tailroom ( iobuf ) );
438		return -EINVAL;
439	}
440
441	/* Check queue fill level */
442	if ( qp->recv.fill >= qp->recv.num_wqes ) {
443		DBGC ( ibdev, "IBDEV %p QPN %#lx receive queue full\n",
444		       ibdev, qp->qpn );
445		return -ENOBUFS;
446	}
447
448	/* Post to hardware */
449	if ( ( rc = ibdev->op->post_recv ( ibdev, qp, iobuf ) ) != 0 ) {
450		DBGC ( ibdev, "IBDEV %p QPN %#lx could not post receive WQE: "
451		       "%s\n", ibdev, qp->qpn, strerror ( rc ) );
452		return rc;
453	}
454
455	qp->recv.fill++;
456	return 0;
457}
458
459/**
460 * Complete send work queue entry
461 *
462 * @v ibdev		Infiniband device
463 * @v qp		Queue pair
464 * @v iobuf		I/O buffer
465 * @v rc		Completion status code
466 */
467void ib_complete_send ( struct ib_device *ibdev, struct ib_queue_pair *qp,
468			struct io_buffer *iobuf, int rc ) {
469
470	if ( qp->send.cq->op->complete_send ) {
471		qp->send.cq->op->complete_send ( ibdev, qp, iobuf, rc );
472	} else {
473		free_iob ( iobuf );
474	}
475	qp->send.fill--;
476}
477
478/**
479 * Complete receive work queue entry
480 *
481 * @v ibdev		Infiniband device
482 * @v qp		Queue pair
483 * @v av		Address vector
484 * @v iobuf		I/O buffer
485 * @v rc		Completion status code
486 */
487void ib_complete_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp,
488			struct ib_address_vector *av,
489			struct io_buffer *iobuf, int rc ) {
490
491	if ( qp->recv.cq->op->complete_recv ) {
492		qp->recv.cq->op->complete_recv ( ibdev, qp, av, iobuf, rc );
493	} else {
494		free_iob ( iobuf );
495	}
496	qp->recv.fill--;
497}
498
499/**
500 * Refill receive work queue
501 *
502 * @v ibdev		Infiniband device
503 * @v qp		Queue pair
504 */
505void ib_refill_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp ) {
506	struct io_buffer *iobuf;
507	int rc;
508
509	/* Keep filling while unfilled entries remain */
510	while ( qp->recv.fill < qp->recv.num_wqes ) {
511
512		/* Allocate I/O buffer */
513		iobuf = alloc_iob ( IB_MAX_PAYLOAD_SIZE );
514		if ( ! iobuf ) {
515			/* Non-fatal; we will refill on next attempt */
516			return;
517		}
518
519		/* Post I/O buffer */
520		if ( ( rc = ib_post_recv ( ibdev, qp, iobuf ) ) != 0 ) {
521			DBGC ( ibdev, "IBDEV %p could not refill: %s\n",
522			       ibdev, strerror ( rc ) );
523			free_iob ( iobuf );
524			/* Give up */
525			return;
526		}
527	}
528}
529
530/***************************************************************************
531 *
532 * Link control
533 *
534 ***************************************************************************
535 */
536
537/**
538 * Open port
539 *
540 * @v ibdev		Infiniband device
541 * @ret rc		Return status code
542 */
543int ib_open ( struct ib_device *ibdev ) {
544	int rc;
545
546	/* Increment device open request counter */
547	if ( ibdev->open_count++ > 0 ) {
548		/* Device was already open; do nothing */
549		return 0;
550	}
551
552	/* Create subnet management interface */
553	ibdev->smi = ib_create_mi ( ibdev, IB_QPT_SMI );
554	if ( ! ibdev->smi ) {
555		DBGC ( ibdev, "IBDEV %p could not create SMI\n", ibdev );
556		rc = -ENOMEM;
557		goto err_create_smi;
558	}
559
560	/* Create subnet management agent */
561	if ( ( rc = ib_create_sma ( ibdev, ibdev->smi ) ) != 0 ) {
562		DBGC ( ibdev, "IBDEV %p could not create SMA: %s\n",
563		       ibdev, strerror ( rc ) );
564		goto err_create_sma;
565	}
566
567	/* Create general services interface */
568	ibdev->gsi = ib_create_mi ( ibdev, IB_QPT_GSI );
569	if ( ! ibdev->gsi ) {
570		DBGC ( ibdev, "IBDEV %p could not create GSI\n", ibdev );
571		rc = -ENOMEM;
572		goto err_create_gsi;
573	}
574
575	/* Open device */
576	if ( ( rc = ibdev->op->open ( ibdev ) ) != 0 ) {
577		DBGC ( ibdev, "IBDEV %p could not open: %s\n",
578		       ibdev, strerror ( rc ) );
579		goto err_open;
580	}
581
582	/* Add to head of open devices list */
583	list_add ( &ibdev->open_list, &open_ib_devices );
584
585	assert ( ibdev->open_count == 1 );
586	return 0;
587
588	ibdev->op->close ( ibdev );
589 err_open:
590	ib_destroy_mi ( ibdev, ibdev->gsi );
591 err_create_gsi:
592	ib_destroy_sma ( ibdev, ibdev->smi );
593 err_create_sma:
594	ib_destroy_mi ( ibdev, ibdev->smi );
595 err_create_smi:
596	assert ( ibdev->open_count == 1 );
597	ibdev->open_count = 0;
598	return rc;
599}
600
601/**
602 * Close port
603 *
604 * @v ibdev		Infiniband device
605 */
606void ib_close ( struct ib_device *ibdev ) {
607
608	/* Decrement device open request counter */
609	ibdev->open_count--;
610
611	/* Close device if this was the last remaining requested opening */
612	if ( ibdev->open_count == 0 ) {
613		list_del ( &ibdev->open_list );
614		ib_destroy_mi ( ibdev, ibdev->gsi );
615		ib_destroy_sma ( ibdev, ibdev->smi );
616		ib_destroy_mi ( ibdev, ibdev->smi );
617		ibdev->op->close ( ibdev );
618	}
619}
620
621/**
622 * Get link state
623 *
624 * @v ibdev		Infiniband device
625 * @ret rc		Link status code
626 */
627int ib_link_rc ( struct ib_device *ibdev ) {
628	switch ( ibdev->port_state ) {
629	case IB_PORT_STATE_DOWN:	return -ENOTCONN;
630	case IB_PORT_STATE_INIT:	return -EINPROGRESS_INIT;
631	case IB_PORT_STATE_ARMED:	return -EINPROGRESS_ARMED;
632	case IB_PORT_STATE_ACTIVE:	return 0;
633	default:			return -EINVAL;
634	}
635}
636
637/***************************************************************************
638 *
639 * Multicast
640 *
641 ***************************************************************************
642 */
643
644/**
645 * Attach to multicast group
646 *
647 * @v ibdev		Infiniband device
648 * @v qp		Queue pair
649 * @v gid		Multicast GID
650 * @ret rc		Return status code
651 *
652 * Note that this function handles only the local device's attachment
653 * to the multicast GID; it does not issue the relevant MADs to join
654 * the multicast group on the subnet.
655 */
656int ib_mcast_attach ( struct ib_device *ibdev, struct ib_queue_pair *qp,
657		      struct ib_gid *gid ) {
658	struct ib_multicast_gid *mgid;
659	int rc;
660
661	/* Add to software multicast GID list */
662	mgid = zalloc ( sizeof ( *mgid ) );
663	if ( ! mgid ) {
664		rc = -ENOMEM;
665		goto err_alloc_mgid;
666	}
667	memcpy ( &mgid->gid, gid, sizeof ( mgid->gid ) );
668	list_add ( &mgid->list, &qp->mgids );
669
670	/* Add to hardware multicast GID list */
671	if ( ( rc = ibdev->op->mcast_attach ( ibdev, qp, gid ) ) != 0 )
672		goto err_dev_mcast_attach;
673
674	return 0;
675
676 err_dev_mcast_attach:
677	list_del ( &mgid->list );
678	free ( mgid );
679 err_alloc_mgid:
680	return rc;
681}
682
683/**
684 * Detach from multicast group
685 *
686 * @v ibdev		Infiniband device
687 * @v qp		Queue pair
688 * @v gid		Multicast GID
689 */
690void ib_mcast_detach ( struct ib_device *ibdev, struct ib_queue_pair *qp,
691		       struct ib_gid *gid ) {
692	struct ib_multicast_gid *mgid;
693
694	/* Remove from hardware multicast GID list */
695	ibdev->op->mcast_detach ( ibdev, qp, gid );
696
697	/* Remove from software multicast GID list */
698	list_for_each_entry ( mgid, &qp->mgids, list ) {
699		if ( memcmp ( &mgid->gid, gid, sizeof ( mgid->gid ) ) == 0 ) {
700			list_del ( &mgid->list );
701			free ( mgid );
702			break;
703		}
704	}
705}
706
707/***************************************************************************
708 *
709 * Miscellaneous
710 *
711 ***************************************************************************
712 */
713
714/**
715 * Get Infiniband HCA information
716 *
717 * @v ibdev		Infiniband device
718 * @ret hca_guid	HCA GUID
719 * @ret num_ports	Number of ports
720 */
721int ib_get_hca_info ( struct ib_device *ibdev,
722		      struct ib_gid_half *hca_guid ) {
723	struct ib_device *tmp;
724	int num_ports = 0;
725
726	/* Search for IB devices with the same physical device to
727	 * identify port count and a suitable Node GUID.
728	 */
729	for_each_ibdev ( tmp ) {
730		if ( tmp->dev != ibdev->dev )
731			continue;
732		if ( num_ports == 0 ) {
733			memcpy ( hca_guid, &tmp->gid.u.half[1],
734				 sizeof ( *hca_guid ) );
735		}
736		num_ports++;
737	}
738	return num_ports;
739}
740
741/**
742 * Set port information
743 *
744 * @v ibdev		Infiniband device
745 * @v mad		Set port information MAD
746 */
747int ib_set_port_info ( struct ib_device *ibdev, union ib_mad *mad ) {
748	int rc;
749
750	/* Adapters with embedded SMAs do not need to support this method */
751	if ( ! ibdev->op->set_port_info ) {
752		DBGC ( ibdev, "IBDEV %p does not support setting port "
753		       "information\n", ibdev );
754		return -ENOTSUP;
755	}
756
757	if ( ( rc = ibdev->op->set_port_info ( ibdev, mad ) ) != 0 ) {
758		DBGC ( ibdev, "IBDEV %p could not set port information: %s\n",
759		       ibdev, strerror ( rc ) );
760		return rc;
761	}
762
763	return 0;
764};
765
766/**
767 * Set partition key table
768 *
769 * @v ibdev		Infiniband device
770 * @v mad		Set partition key table MAD
771 */
772int ib_set_pkey_table ( struct ib_device *ibdev, union ib_mad *mad ) {
773	int rc;
774
775	/* Adapters with embedded SMAs do not need to support this method */
776	if ( ! ibdev->op->set_pkey_table ) {
777		DBGC ( ibdev, "IBDEV %p does not support setting partition "
778		       "key table\n", ibdev );
779		return -ENOTSUP;
780	}
781
782	if ( ( rc = ibdev->op->set_pkey_table ( ibdev, mad ) ) != 0 ) {
783		DBGC ( ibdev, "IBDEV %p could not set partition key table: "
784		       "%s\n", ibdev, strerror ( rc ) );
785		return rc;
786	}
787
788	return 0;
789};
790
791/***************************************************************************
792 *
793 * Event queues
794 *
795 ***************************************************************************
796 */
797
798/**
799 * Handle Infiniband link state change
800 *
801 * @v ibdev		Infiniband device
802 */
803void ib_link_state_changed ( struct ib_device *ibdev ) {
804
805	/* Notify IPoIB of link state change */
806	ipoib_link_state_changed ( ibdev );
807}
808
809/**
810 * Poll event queue
811 *
812 * @v ibdev		Infiniband device
813 */
814void ib_poll_eq ( struct ib_device *ibdev ) {
815	struct ib_completion_queue *cq;
816
817	/* Poll device's event queue */
818	ibdev->op->poll_eq ( ibdev );
819
820	/* Poll all completion queues */
821	list_for_each_entry ( cq, &ibdev->cqs, list )
822		ib_poll_cq ( ibdev, cq );
823}
824
825/**
826 * Single-step the Infiniband event queue
827 *
828 * @v process		Infiniband event queue process
829 */
830static void ib_step ( struct process *process __unused ) {
831	struct ib_device *ibdev;
832
833	for_each_ibdev ( ibdev )
834		ib_poll_eq ( ibdev );
835}
836
837/** Infiniband event queue process */
838struct process ib_process __permanent_process = {
839	.list = LIST_HEAD_INIT ( ib_process.list ),
840	.step = ib_step,
841};
842
843/***************************************************************************
844 *
845 * Infiniband device creation/destruction
846 *
847 ***************************************************************************
848 */
849
850/**
851 * Allocate Infiniband device
852 *
853 * @v priv_size		Size of driver private data area
854 * @ret ibdev		Infiniband device, or NULL
855 */
856struct ib_device * alloc_ibdev ( size_t priv_size ) {
857	struct ib_device *ibdev;
858	void *drv_priv;
859	size_t total_len;
860
861	total_len = ( sizeof ( *ibdev ) + priv_size );
862	ibdev = zalloc ( total_len );
863	if ( ibdev ) {
864		drv_priv = ( ( ( void * ) ibdev ) + sizeof ( *ibdev ) );
865		ib_set_drvdata ( ibdev, drv_priv );
866		INIT_LIST_HEAD ( &ibdev->cqs );
867		INIT_LIST_HEAD ( &ibdev->qps );
868		ibdev->port_state = IB_PORT_STATE_DOWN;
869		ibdev->lid = IB_LID_NONE;
870		ibdev->pkey = IB_PKEY_DEFAULT;
871	}
872	return ibdev;
873}
874
875/**
876 * Register Infiniband device
877 *
878 * @v ibdev		Infiniband device
879 * @ret rc		Return status code
880 */
881int register_ibdev ( struct ib_device *ibdev ) {
882	int rc;
883
884	/* Add to device list */
885	ibdev_get ( ibdev );
886	list_add_tail ( &ibdev->list, &ib_devices );
887
888	/* Add IPoIB device */
889	if ( ( rc = ipoib_probe ( ibdev ) ) != 0 ) {
890		DBGC ( ibdev, "IBDEV %p could not add IPoIB device: %s\n",
891		       ibdev, strerror ( rc ) );
892		goto err_ipoib_probe;
893	}
894
895	DBGC ( ibdev, "IBDEV %p registered (phys %s)\n", ibdev,
896	       ibdev->dev->name );
897	return 0;
898
899 err_ipoib_probe:
900	list_del ( &ibdev->list );
901	ibdev_put ( ibdev );
902	return rc;
903}
904
905/**
906 * Unregister Infiniband device
907 *
908 * @v ibdev		Infiniband device
909 */
910void unregister_ibdev ( struct ib_device *ibdev ) {
911
912	/* Close device */
913	ipoib_remove ( ibdev );
914
915	/* Remove from device list */
916	list_del ( &ibdev->list );
917	ibdev_put ( ibdev );
918	DBGC ( ibdev, "IBDEV %p unregistered\n", ibdev );
919}
920
921/**
922 * Find Infiniband device by GID
923 *
924 * @v gid		GID
925 * @ret ibdev		Infiniband device, or NULL
926 */
927struct ib_device * find_ibdev ( struct ib_gid *gid ) {
928	struct ib_device *ibdev;
929
930	for_each_ibdev ( ibdev ) {
931		if ( memcmp ( gid, &ibdev->gid, sizeof ( *gid ) ) == 0 )
932			return ibdev;
933	}
934	return NULL;
935}
936
937/**
938 * Get most recently opened Infiniband device
939 *
940 * @ret ibdev		Most recently opened Infiniband device, or NULL
941 */
942struct ib_device * last_opened_ibdev ( void ) {
943	struct ib_device *ibdev;
944
945	list_for_each_entry ( ibdev, &open_ib_devices, open_list ) {
946		assert ( ibdev->open_count != 0 );
947		return ibdev;
948	}
949
950	return NULL;
951}
952