1/*
2 * Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License as
6 * published by the Free Software Foundation; either version 2 of the
7 * License, or any later version.
8 *
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12 * General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17 */
18
19FILE_LICENCE ( GPL2_OR_LATER );
20
21#include <stdint.h>
22#include <stdio.h>
23#include <unistd.h>
24#include <string.h>
25#include <byteswap.h>
26#include <errno.h>
27#include <gpxe/errortab.h>
28#include <gpxe/if_arp.h>
29#include <gpxe/iobuf.h>
30#include <gpxe/netdevice.h>
31#include <gpxe/infiniband.h>
32#include <gpxe/ib_pathrec.h>
33#include <gpxe/ib_mcast.h>
34#include <gpxe/ipoib.h>
35
36/** @file
37 *
38 * IP over Infiniband
39 */
40
41/** Number of IPoIB send work queue entries */
42#define IPOIB_NUM_SEND_WQES 2
43
44/** Number of IPoIB receive work queue entries */
45#define IPOIB_NUM_RECV_WQES 4
46
47/** Number of IPoIB completion entries */
48#define IPOIB_NUM_CQES 8
49
50/** An IPoIB device */
51struct ipoib_device {
52	/** Network device */
53	struct net_device *netdev;
54	/** Underlying Infiniband device */
55	struct ib_device *ibdev;
56	/** Completion queue */
57	struct ib_completion_queue *cq;
58	/** Queue pair */
59	struct ib_queue_pair *qp;
60	/** Broadcast MAC */
61	struct ipoib_mac broadcast;
62	/** Joined to IPv4 broadcast multicast group
63	 *
64	 * This flag indicates whether or not we have initiated the
65	 * join to the IPv4 broadcast multicast group.
66	 */
67	int broadcast_joined;
68	/** IPv4 broadcast multicast group membership */
69	struct ib_mc_membership broadcast_membership;
70};
71
72/** Broadcast IPoIB address */
73static struct ipoib_mac ipoib_broadcast = {
74	.flags__qpn = htonl ( IB_QPN_BROADCAST ),
75	.gid.u.bytes = 	{ 0xff, 0x12, 0x40, 0x1b, 0x00, 0x00, 0x00, 0x00,
76			  0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff },
77};
78
79/** Link status for "broadcast join in progress" */
80#define EINPROGRESS_JOINING ( EINPROGRESS | EUNIQ_01 )
81
82/** Human-readable message for the link status */
83struct errortab ipoib_errors[] __errortab = {
84	{ EINPROGRESS_JOINING, "Joining" },
85};
86
87/****************************************************************************
88 *
89 * IPoIB peer cache
90 *
91 ****************************************************************************
92 */
93
94/**
95 * IPoIB peer address
96 *
97 * The IPoIB link-layer header is only four bytes long and so does not
98 * have sufficient room to store IPoIB MAC address(es).  We therefore
99 * maintain a cache of MAC addresses identified by a single-byte key,
100 * and abuse the spare two bytes within the link-layer header to
101 * communicate these MAC addresses between the link-layer code and the
102 * netdevice driver.
103 */
104struct ipoib_peer {
105	/** Key */
106	uint8_t key;
107	/** MAC address */
108	struct ipoib_mac mac;
109};
110
111/** Number of IPoIB peer cache entries
112 *
113 * Must be a power of two.
114 */
115#define IPOIB_NUM_CACHED_PEERS 4
116
117/** IPoIB peer address cache */
118static struct ipoib_peer ipoib_peer_cache[IPOIB_NUM_CACHED_PEERS];
119
120/** Oldest IPoIB peer cache entry index */
121static unsigned int ipoib_peer_cache_idx = 1;
122
123/**
124 * Look up cached peer by key
125 *
126 * @v key		Peer cache key
127 * @ret peer		Peer cache entry, or NULL
128 */
129static struct ipoib_peer * ipoib_lookup_peer_by_key ( unsigned int key ) {
130	struct ipoib_peer *peer;
131	unsigned int i;
132
133	for ( i = 0 ; i < IPOIB_NUM_CACHED_PEERS ; i++ ) {
134		peer = &ipoib_peer_cache[i];
135		if ( peer->key == key )
136			return peer;
137	}
138
139	if ( key != 0 ) {
140		DBG ( "IPoIB warning: peer cache lost track of key %x while "
141		      "still in use\n", key );
142	}
143	return NULL;
144}
145
146/**
147 * Store GID and QPN in peer cache
148 *
149 * @v mac		Peer MAC address
150 * @ret peer		Peer cache entry
151 */
152static struct ipoib_peer * ipoib_cache_peer ( const struct ipoib_mac *mac ) {
153	struct ipoib_peer *peer;
154	unsigned int key;
155	unsigned int i;
156
157	/* Look for existing cache entry */
158	for ( i = 0 ; i < IPOIB_NUM_CACHED_PEERS ; i++ ) {
159		peer = &ipoib_peer_cache[i];
160		if ( memcmp ( &peer->mac, mac, sizeof ( peer->mac ) ) == 0 )
161			return peer;
162	}
163
164	/* No entry found: create a new one */
165	key = ipoib_peer_cache_idx++;
166	peer = &ipoib_peer_cache[ key % IPOIB_NUM_CACHED_PEERS ];
167	if ( peer->key )
168		DBG ( "IPoIB peer %x evicted from cache\n", peer->key );
169
170	memset ( peer, 0, sizeof ( *peer ) );
171	peer->key = key;
172	memcpy ( &peer->mac, mac, sizeof ( peer->mac ) );
173	DBG ( "IPoIB peer %x has MAC %s\n",
174	      peer->key, ipoib_ntoa ( &peer->mac ) );
175	return peer;
176}
177
178/****************************************************************************
179 *
180 * IPoIB link layer
181 *
182 ****************************************************************************
183 */
184
185/**
186 * Add IPoIB link-layer header
187 *
188 * @v netdev		Network device
189 * @v iobuf		I/O buffer
190 * @v ll_dest		Link-layer destination address
191 * @v ll_source		Source link-layer address
192 * @v net_proto		Network-layer protocol, in network-byte order
193 * @ret rc		Return status code
194 */
195static int ipoib_push ( struct net_device *netdev __unused,
196			struct io_buffer *iobuf, const void *ll_dest,
197			const void *ll_source __unused, uint16_t net_proto ) {
198	struct ipoib_hdr *ipoib_hdr =
199		iob_push ( iobuf, sizeof ( *ipoib_hdr ) );
200	const struct ipoib_mac *dest_mac = ll_dest;
201	const struct ipoib_mac *src_mac = ll_source;
202	struct ipoib_peer *dest;
203	struct ipoib_peer *src;
204
205	/* Add link-layer addresses to cache */
206	dest = ipoib_cache_peer ( dest_mac );
207	src = ipoib_cache_peer ( src_mac );
208
209	/* Build IPoIB header */
210	ipoib_hdr->proto = net_proto;
211	ipoib_hdr->u.peer.dest = dest->key;
212	ipoib_hdr->u.peer.src = src->key;
213
214	return 0;
215}
216
217/**
218 * Remove IPoIB link-layer header
219 *
220 * @v netdev		Network device
221 * @v iobuf		I/O buffer
222 * @ret ll_dest		Link-layer destination address
223 * @ret ll_source	Source link-layer address
224 * @ret net_proto	Network-layer protocol, in network-byte order
225 * @ret rc		Return status code
226 */
227static int ipoib_pull ( struct net_device *netdev,
228			struct io_buffer *iobuf, const void **ll_dest,
229			const void **ll_source, uint16_t *net_proto ) {
230	struct ipoib_device *ipoib = netdev->priv;
231	struct ipoib_hdr *ipoib_hdr = iobuf->data;
232	struct ipoib_peer *dest;
233	struct ipoib_peer *source;
234
235	/* Sanity check */
236	if ( iob_len ( iobuf ) < sizeof ( *ipoib_hdr ) ) {
237		DBG ( "IPoIB packet too short for link-layer header\n" );
238		DBG_HD ( iobuf->data, iob_len ( iobuf ) );
239		return -EINVAL;
240	}
241
242	/* Strip off IPoIB header */
243	iob_pull ( iobuf, sizeof ( *ipoib_hdr ) );
244
245	/* Identify source and destination addresses, and clear
246	 * reserved word in IPoIB header
247	 */
248	dest = ipoib_lookup_peer_by_key ( ipoib_hdr->u.peer.dest );
249	source = ipoib_lookup_peer_by_key ( ipoib_hdr->u.peer.src );
250	ipoib_hdr->u.reserved = 0;
251
252	/* Fill in required fields */
253	*ll_dest = ( dest ? &dest->mac : &ipoib->broadcast );
254	*ll_source = ( source ? &source->mac : &ipoib->broadcast );
255	*net_proto = ipoib_hdr->proto;
256
257	return 0;
258}
259
260/**
261 * Initialise IPoIB link-layer address
262 *
263 * @v hw_addr		Hardware address
264 * @v ll_addr		Link-layer address
265 */
266static void ipoib_init_addr ( const void *hw_addr, void *ll_addr ) {
267	const struct ib_gid_half *guid = hw_addr;
268	struct ipoib_mac *mac = ll_addr;
269
270	memset ( mac, 0, sizeof ( *mac ) );
271	memcpy ( &mac->gid.u.half[1], guid, sizeof ( mac->gid.u.half[1] ) );
272}
273
274/**
275 * Transcribe IPoIB link-layer address
276 *
277 * @v ll_addr	Link-layer address
278 * @ret string	Link-layer address in human-readable format
279 */
280const char * ipoib_ntoa ( const void *ll_addr ) {
281	static char buf[45];
282	const struct ipoib_mac *mac = ll_addr;
283
284	snprintf ( buf, sizeof ( buf ), "%08x:%08x:%08x:%08x:%08x",
285		   htonl ( mac->flags__qpn ), htonl ( mac->gid.u.dwords[0] ),
286		   htonl ( mac->gid.u.dwords[1] ),
287		   htonl ( mac->gid.u.dwords[2] ),
288		   htonl ( mac->gid.u.dwords[3] ) );
289	return buf;
290}
291
292/**
293 * Hash multicast address
294 *
295 * @v af		Address family
296 * @v net_addr		Network-layer address
297 * @v ll_addr		Link-layer address to fill in
298 * @ret rc		Return status code
299 */
300static int ipoib_mc_hash ( unsigned int af __unused,
301			   const void *net_addr __unused,
302			   void *ll_addr __unused ) {
303
304	return -ENOTSUP;
305}
306
307/**
308 * Generate Mellanox Ethernet-compatible compressed link-layer address
309 *
310 * @v ll_addr		Link-layer address
311 * @v eth_addr		Ethernet-compatible address to fill in
312 */
313static int ipoib_mlx_eth_addr ( const struct ib_gid_half *guid,
314				uint8_t *eth_addr ) {
315	eth_addr[0] = ( ( guid->u.bytes[3] == 2 ) ? 0x00 : 0x02 );
316	eth_addr[1] = guid->u.bytes[1];
317	eth_addr[2] = guid->u.bytes[2];
318	eth_addr[3] = guid->u.bytes[5];
319	eth_addr[4] = guid->u.bytes[6];
320	eth_addr[5] = guid->u.bytes[7];
321	return 0;
322}
323
324/** An IPoIB Ethernet-compatible compressed link-layer address generator */
325struct ipoib_eth_addr_handler {
326	/** GUID byte 1 */
327	uint8_t byte1;
328	/** GUID byte 2 */
329	uint8_t byte2;
330	/** Handler */
331	int ( * eth_addr ) ( const struct ib_gid_half *guid,
332			     uint8_t *eth_addr );
333};
334
335/** IPoIB Ethernet-compatible compressed link-layer address generators */
336static struct ipoib_eth_addr_handler ipoib_eth_addr_handlers[] = {
337	{ 0x02, 0xc9, ipoib_mlx_eth_addr },
338};
339
340/**
341 * Generate Ethernet-compatible compressed link-layer address
342 *
343 * @v ll_addr		Link-layer address
344 * @v eth_addr		Ethernet-compatible address to fill in
345 */
346static int ipoib_eth_addr ( const void *ll_addr, void *eth_addr ) {
347	const struct ipoib_mac *ipoib_addr = ll_addr;
348	const struct ib_gid_half *guid = &ipoib_addr->gid.u.half[1];
349	struct ipoib_eth_addr_handler *handler;
350	unsigned int i;
351
352	for ( i = 0 ; i < ( sizeof ( ipoib_eth_addr_handlers ) /
353			    sizeof ( ipoib_eth_addr_handlers[0] ) ) ; i++ ) {
354		handler = &ipoib_eth_addr_handlers[i];
355		if ( ( handler->byte1 == guid->u.bytes[1] ) &&
356		     ( handler->byte2 == guid->u.bytes[2] ) ) {
357			return handler->eth_addr ( guid, eth_addr );
358		}
359	}
360	return -ENOTSUP;
361}
362
363/** IPoIB protocol */
364struct ll_protocol ipoib_protocol __ll_protocol = {
365	.name		= "IPoIB",
366	.ll_proto	= htons ( ARPHRD_INFINIBAND ),
367	.hw_addr_len	= sizeof ( struct ib_gid_half ),
368	.ll_addr_len	= IPOIB_ALEN,
369	.ll_header_len	= IPOIB_HLEN,
370	.push		= ipoib_push,
371	.pull		= ipoib_pull,
372	.init_addr	= ipoib_init_addr,
373	.ntoa		= ipoib_ntoa,
374	.mc_hash	= ipoib_mc_hash,
375	.eth_addr	= ipoib_eth_addr,
376};
377
378/**
379 * Allocate IPoIB device
380 *
381 * @v priv_size		Size of driver private data
382 * @ret netdev		Network device, or NULL
383 */
384struct net_device * alloc_ipoibdev ( size_t priv_size ) {
385	struct net_device *netdev;
386
387	netdev = alloc_netdev ( priv_size );
388	if ( netdev ) {
389		netdev->ll_protocol = &ipoib_protocol;
390		netdev->ll_broadcast = ( uint8_t * ) &ipoib_broadcast;
391		netdev->max_pkt_len = IB_MAX_PAYLOAD_SIZE;
392	}
393	return netdev;
394}
395
396/****************************************************************************
397 *
398 * IPoIB network device
399 *
400 ****************************************************************************
401 */
402
403/**
404 * Transmit packet via IPoIB network device
405 *
406 * @v netdev		Network device
407 * @v iobuf		I/O buffer
408 * @ret rc		Return status code
409 */
410static int ipoib_transmit ( struct net_device *netdev,
411			    struct io_buffer *iobuf ) {
412	struct ipoib_device *ipoib = netdev->priv;
413	struct ib_device *ibdev = ipoib->ibdev;
414	struct ipoib_hdr *ipoib_hdr;
415	struct ipoib_peer *dest;
416	struct ib_address_vector av;
417	int rc;
418
419	/* Sanity check */
420	if ( iob_len ( iobuf ) < sizeof ( *ipoib_hdr ) ) {
421		DBGC ( ipoib, "IPoIB %p buffer too short\n", ipoib );
422		return -EINVAL;
423	}
424	ipoib_hdr = iobuf->data;
425
426	/* Attempting transmission while link is down will put the
427	 * queue pair into an error state, so don't try it.
428	 */
429	if ( ! ib_link_ok ( ibdev ) )
430		return -ENETUNREACH;
431
432	/* Identify destination address */
433	dest = ipoib_lookup_peer_by_key ( ipoib_hdr->u.peer.dest );
434	if ( ! dest )
435		return -ENXIO;
436	ipoib_hdr->u.reserved = 0;
437
438	/* Construct address vector */
439	memset ( &av, 0, sizeof ( av ) );
440	av.qpn = ( ntohl ( dest->mac.flags__qpn ) & IB_QPN_MASK );
441	av.gid_present = 1;
442	memcpy ( &av.gid, &dest->mac.gid, sizeof ( av.gid ) );
443	if ( ( rc = ib_resolve_path ( ibdev, &av ) ) != 0 ) {
444		/* Path not resolved yet */
445		return rc;
446	}
447
448	return ib_post_send ( ibdev, ipoib->qp, &av, iobuf );
449}
450
451/**
452 * Handle IPoIB send completion
453 *
454 * @v ibdev		Infiniband device
455 * @v qp		Queue pair
456 * @v iobuf		I/O buffer
457 * @v rc		Completion status code
458 */
459static void ipoib_complete_send ( struct ib_device *ibdev __unused,
460				  struct ib_queue_pair *qp,
461				  struct io_buffer *iobuf, int rc ) {
462	struct ipoib_device *ipoib = ib_qp_get_ownerdata ( qp );
463
464	netdev_tx_complete_err ( ipoib->netdev, iobuf, rc );
465}
466
467/**
468 * Handle IPoIB receive completion
469 *
470 * @v ibdev		Infiniband device
471 * @v qp		Queue pair
472 * @v av		Address vector, or NULL
473 * @v iobuf		I/O buffer
474 * @v rc		Completion status code
475 */
476static void ipoib_complete_recv ( struct ib_device *ibdev __unused,
477				  struct ib_queue_pair *qp,
478				  struct ib_address_vector *av,
479				  struct io_buffer *iobuf, int rc ) {
480	struct ipoib_device *ipoib = ib_qp_get_ownerdata ( qp );
481	struct net_device *netdev = ipoib->netdev;
482	struct ipoib_hdr *ipoib_hdr;
483	struct ipoib_mac ll_src;
484	struct ipoib_peer *src;
485
486	if ( rc != 0 ) {
487		netdev_rx_err ( netdev, iobuf, rc );
488		return;
489	}
490
491	/* Sanity check */
492	if ( iob_len ( iobuf ) < sizeof ( struct ipoib_hdr ) ) {
493		DBGC ( ipoib, "IPoIB %p received packet too short to "
494		       "contain IPoIB header\n", ipoib );
495		DBGC_HD ( ipoib, iobuf->data, iob_len ( iobuf ) );
496		netdev_rx_err ( netdev, iobuf, -EIO );
497		return;
498	}
499	ipoib_hdr = iobuf->data;
500
501	/* Parse source address */
502	if ( av->gid_present ) {
503		ll_src.flags__qpn = htonl ( av->qpn );
504		memcpy ( &ll_src.gid, &av->gid, sizeof ( ll_src.gid ) );
505		src = ipoib_cache_peer ( &ll_src );
506		ipoib_hdr->u.peer.src = src->key;
507	}
508
509	/* Hand off to network layer */
510	netdev_rx ( netdev, iobuf );
511}
512
513/** IPoIB completion operations */
514static struct ib_completion_queue_operations ipoib_cq_op = {
515	.complete_send = ipoib_complete_send,
516	.complete_recv = ipoib_complete_recv,
517};
518
519/**
520 * Poll IPoIB network device
521 *
522 * @v netdev		Network device
523 */
524static void ipoib_poll ( struct net_device *netdev ) {
525	struct ipoib_device *ipoib = netdev->priv;
526	struct ib_device *ibdev = ipoib->ibdev;
527
528	ib_poll_eq ( ibdev );
529}
530
531/**
532 * Enable/disable interrupts on IPoIB network device
533 *
534 * @v netdev		Network device
535 * @v enable		Interrupts should be enabled
536 */
537static void ipoib_irq ( struct net_device *netdev __unused,
538			int enable __unused ) {
539	/* No implementation */
540}
541
542/**
543 * Handle IPv4 broadcast multicast group join completion
544 *
545 * @v ibdev		Infiniband device
546 * @v qp		Queue pair
547 * @v membership	Multicast group membership
548 * @v rc		Status code
549 * @v mad		Response MAD (or NULL on error)
550 */
551void ipoib_join_complete ( struct ib_device *ibdev __unused,
552			   struct ib_queue_pair *qp __unused,
553			   struct ib_mc_membership *membership, int rc,
554			   union ib_mad *mad __unused ) {
555	struct ipoib_device *ipoib = container_of ( membership,
556				   struct ipoib_device, broadcast_membership );
557
558	/* Record join status as link status */
559	netdev_link_err ( ipoib->netdev, rc );
560}
561
562/**
563 * Join IPv4 broadcast multicast group
564 *
565 * @v ipoib		IPoIB device
566 * @ret rc		Return status code
567 */
568static int ipoib_join_broadcast_group ( struct ipoib_device *ipoib ) {
569	int rc;
570
571	if ( ( rc = ib_mcast_join ( ipoib->ibdev, ipoib->qp,
572				    &ipoib->broadcast_membership,
573				    &ipoib->broadcast.gid,
574				    ipoib_join_complete ) ) != 0 ) {
575		DBGC ( ipoib, "IPoIB %p could not join broadcast group: %s\n",
576		       ipoib, strerror ( rc ) );
577		return rc;
578	}
579	ipoib->broadcast_joined = 1;
580
581	return 0;
582}
583
584/**
585 * Leave IPv4 broadcast multicast group
586 *
587 * @v ipoib		IPoIB device
588 */
589static void ipoib_leave_broadcast_group ( struct ipoib_device *ipoib ) {
590
591	if ( ipoib->broadcast_joined ) {
592		ib_mcast_leave ( ipoib->ibdev, ipoib->qp,
593				 &ipoib->broadcast_membership );
594		ipoib->broadcast_joined = 0;
595	}
596}
597
598/**
599 * Open IPoIB network device
600 *
601 * @v netdev		Network device
602 * @ret rc		Return status code
603 */
604static int ipoib_open ( struct net_device *netdev ) {
605	struct ipoib_device *ipoib = netdev->priv;
606	struct ib_device *ibdev = ipoib->ibdev;
607	struct ipoib_mac *mac = ( ( struct ipoib_mac * ) netdev->ll_addr );
608	int rc;
609
610	/* Open IB device */
611	if ( ( rc = ib_open ( ibdev ) ) != 0 ) {
612		DBGC ( ipoib, "IPoIB %p could not open device: %s\n",
613		       ipoib, strerror ( rc ) );
614		goto err_ib_open;
615	}
616
617	/* Allocate completion queue */
618	ipoib->cq = ib_create_cq ( ibdev, IPOIB_NUM_CQES, &ipoib_cq_op );
619	if ( ! ipoib->cq ) {
620		DBGC ( ipoib, "IPoIB %p could not allocate completion queue\n",
621		       ipoib );
622		rc = -ENOMEM;
623		goto err_create_cq;
624	}
625
626	/* Allocate queue pair */
627	ipoib->qp = ib_create_qp ( ibdev, IB_QPT_UD,
628				   IPOIB_NUM_SEND_WQES, ipoib->cq,
629				   IPOIB_NUM_RECV_WQES, ipoib->cq );
630	if ( ! ipoib->qp ) {
631		DBGC ( ipoib, "IPoIB %p could not allocate queue pair\n",
632		       ipoib );
633		rc = -ENOMEM;
634		goto err_create_qp;
635	}
636	ib_qp_set_ownerdata ( ipoib->qp, ipoib );
637
638	/* Update MAC address with QPN */
639	mac->flags__qpn = htonl ( ipoib->qp->qpn );
640
641	/* Fill receive rings */
642	ib_refill_recv ( ibdev, ipoib->qp );
643
644	/* Fake a link status change to join the broadcast group */
645	ipoib_link_state_changed ( ibdev );
646
647	return 0;
648
649	ib_destroy_qp ( ibdev, ipoib->qp );
650 err_create_qp:
651	ib_destroy_cq ( ibdev, ipoib->cq );
652 err_create_cq:
653	ib_close ( ibdev );
654 err_ib_open:
655	return rc;
656}
657
658/**
659 * Close IPoIB network device
660 *
661 * @v netdev		Network device
662 */
663static void ipoib_close ( struct net_device *netdev ) {
664	struct ipoib_device *ipoib = netdev->priv;
665	struct ib_device *ibdev = ipoib->ibdev;
666	struct ipoib_mac *mac = ( ( struct ipoib_mac * ) netdev->ll_addr );
667
668	/* Leave broadcast group */
669	ipoib_leave_broadcast_group ( ipoib );
670
671	/* Remove QPN from MAC address */
672	mac->flags__qpn = 0;
673
674	/* Tear down the queues */
675	ib_destroy_qp ( ibdev, ipoib->qp );
676	ib_destroy_cq ( ibdev, ipoib->cq );
677
678	/* Close IB device */
679	ib_close ( ibdev );
680}
681
682/** IPoIB network device operations */
683static struct net_device_operations ipoib_operations = {
684	.open		= ipoib_open,
685	.close		= ipoib_close,
686	.transmit	= ipoib_transmit,
687	.poll		= ipoib_poll,
688	.irq		= ipoib_irq,
689};
690
691/**
692 * Handle link status change
693 *
694 * @v ibdev		Infiniband device
695 */
696void ipoib_link_state_changed ( struct ib_device *ibdev ) {
697	struct net_device *netdev = ib_get_ownerdata ( ibdev );
698	struct ipoib_device *ipoib = netdev->priv;
699	struct ipoib_mac *mac = ( ( struct ipoib_mac * ) netdev->ll_addr );
700	int rc;
701
702	/* Leave existing broadcast group */
703	ipoib_leave_broadcast_group ( ipoib );
704
705	/* Update MAC address based on potentially-new GID prefix */
706	memcpy ( &mac->gid.u.half[0], &ibdev->gid.u.half[0],
707		 sizeof ( mac->gid.u.half[0] ) );
708
709	/* Update broadcast GID based on potentially-new partition key */
710	ipoib->broadcast.gid.u.words[2] =
711		htons ( ibdev->pkey | IB_PKEY_FULL );
712
713	/* Set net device link state to reflect Infiniband link state */
714	rc = ib_link_rc ( ibdev );
715	netdev_link_err ( netdev, ( rc ? rc : -EINPROGRESS_JOINING ) );
716
717	/* Join new broadcast group */
718	if ( ib_link_ok ( ibdev ) &&
719	     ( ( rc = ipoib_join_broadcast_group ( ipoib ) ) != 0 ) ) {
720		DBGC ( ipoib, "IPoIB %p could not rejoin broadcast group: "
721		       "%s\n", ipoib, strerror ( rc ) );
722		netdev_link_err ( netdev, rc );
723		return;
724	}
725}
726
727/**
728 * Probe IPoIB device
729 *
730 * @v ibdev		Infiniband device
731 * @ret rc		Return status code
732 */
733int ipoib_probe ( struct ib_device *ibdev ) {
734	struct net_device *netdev;
735	struct ipoib_device *ipoib;
736	int rc;
737
738	/* Allocate network device */
739	netdev = alloc_ipoibdev ( sizeof ( *ipoib ) );
740	if ( ! netdev )
741		return -ENOMEM;
742	netdev_init ( netdev, &ipoib_operations );
743	ipoib = netdev->priv;
744	ib_set_ownerdata ( ibdev, netdev );
745	netdev->dev = ibdev->dev;
746	memset ( ipoib, 0, sizeof ( *ipoib ) );
747	ipoib->netdev = netdev;
748	ipoib->ibdev = ibdev;
749
750	/* Extract hardware address */
751	memcpy ( netdev->hw_addr, &ibdev->gid.u.half[1],
752		 sizeof ( ibdev->gid.u.half[1] ) );
753
754	/* Set default broadcast address */
755	memcpy ( &ipoib->broadcast, &ipoib_broadcast,
756		 sizeof ( ipoib->broadcast ) );
757	netdev->ll_broadcast = ( ( uint8_t * ) &ipoib->broadcast );
758
759	/* Register network device */
760	if ( ( rc = register_netdev ( netdev ) ) != 0 )
761		goto err_register_netdev;
762
763	return 0;
764
765 err_register_netdev:
766	netdev_nullify ( netdev );
767	netdev_put ( netdev );
768	return rc;
769}
770
771/**
772 * Remove IPoIB device
773 *
774 * @v ibdev		Infiniband device
775 */
776void ipoib_remove ( struct ib_device *ibdev ) {
777	struct net_device *netdev = ib_get_ownerdata ( ibdev );
778
779	unregister_netdev ( netdev );
780	netdev_nullify ( netdev );
781	netdev_put ( netdev );
782}
783