1/************************************************* -*- linux-c -*-
2 * Myricom 10Gb Network Interface Card Software
3 * Copyright 2009, Myricom, Inc.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
17 ****************************************************************/
18
19FILE_LICENCE ( GPL2_ONLY );
20
21/*
22 * Author: Glenn Brown <glenn@myri.com>
23 */
24
25/*
26 * General Theory of Operation
27 *
28 * This is a minimal Myricom 10 gigabit Ethernet driver for network
29 * boot.
30 *
31 * Initialization
32 *
33 * myri10ge_pci_probe() is called by gPXE during initialization.
34 * Minimal NIC initialization is performed to minimize resources
35 * consumed when the driver is resident but unused.
36 *
37 * Network Boot
38 *
39 * myri10ge_net_open() is called by gPXE before attempting to network
40 * boot from the card.  Packet buffers are allocated and the NIC
41 * interface is initialized.
42 *
43 * Transmit
44 *
45 * myri10ge_net_transmit() enqueues frames for transmission by writing
46 * discriptors to the NIC's tx ring.  For simplicity and to avoid
47 * copies, we always have the NIC DMA up the packet.  The sent I/O
48 * buffer is released once the NIC signals myri10ge_interrupt_handler()
49 * that the send has completed.
50 *
51 * Receive
52 *
53 * Receives are posted to the NIC's receive ring.  The NIC fills a
54 * DMAable receive_completion ring with completion notifications.
55 * myri10ge_net_poll() polls for these receive notifications, posts
56 * replacement receive buffers to the NIC, and passes received frames
57 * to netdev_rx().
58 */
59
60/*
61 * Debugging levels:
62 *	- DBG() is for any errors, i.e. failed alloc_iob(), malloc_dma(),
63 *	  TX overflow, corrupted packets, ...
64 *	- DBG2() is for successful events, like packet received,
65 *	  packet transmitted, and other general notifications.
66 *	- DBGP() prints the name of each called function on entry
67 */
68
69#include <stdint.h>
70
71#include <byteswap.h>
72#include <errno.h>
73#include <gpxe/ethernet.h>
74#include <gpxe/if_ether.h>
75#include <gpxe/iobuf.h>
76#include <gpxe/malloc.h>
77#include <gpxe/netdevice.h>
78#include <gpxe/pci.h>
79#include <gpxe/timer.h>
80
81#include "myri10ge_mcp.h"
82
83/****************************************************************
84 * Forward declarations
85 ****************************************************************/
86
87/* PCI driver entry points */
88
89static int	myri10ge_pci_probe ( struct pci_device*,
90				     const struct pci_device_id* );
91static void	myri10ge_pci_remove ( struct pci_device* );
92
93/* Network device operations */
94
95static void	myri10ge_net_close ( struct net_device* );
96static void	myri10ge_net_irq ( struct net_device*, int enable );
97static int	myri10ge_net_open ( struct net_device* );
98static void	myri10ge_net_poll ( struct net_device* );
99static int	myri10ge_net_transmit ( struct net_device*, struct io_buffer* );
100
101/****************************************************************
102 * Constants
103 ****************************************************************/
104
105/* Maximum ring indices, used to wrap ring indices.  These must be 2**N-1. */
106
107#define MYRI10GE_TRANSMIT_WRAP                  1U
108#define MYRI10GE_RECEIVE_WRAP                   7U
109#define MYRI10GE_RECEIVE_COMPLETION_WRAP        31U
110
111/****************************************************************
112 * Driver internal data types.
113 ****************************************************************/
114
115/* Structure holding all DMA buffers for a NIC, which we will
116   allocated as contiguous read/write DMAable memory when the NIC is
117   initialized. */
118
119struct myri10ge_dma_buffers
120{
121	/* The NIC DMAs receive completion notifications into this ring */
122
123	mcp_slot_t receive_completion[1+MYRI10GE_RECEIVE_COMPLETION_WRAP];
124
125	/* Interrupt details are DMAd here before interrupting. */
126
127	mcp_irq_data_t irq_data; /* 64B */
128
129	/* NIC command completion status is DMAd here. */
130
131	mcp_cmd_response_t command_response; /* 8B */
132};
133
134struct myri10ge_private
135{
136	/* Interrupt support */
137
138	uint32	*irq_claim;	/* in NIC SRAM */
139	uint32	*irq_deassert;	/* in NIC SRAM */
140
141	/* DMA buffers. */
142
143	struct myri10ge_dma_buffers	*dma;
144
145	/*
146	 * Transmit state.
147	 *
148	 * The counts here are uint32 for easy comparison with
149	 * priv->dma->irq_data.send_done_count and with each other.
150	 */
151
152	mcp_kreq_ether_send_t	*transmit_ring;	/* in NIC SRAM */
153	uint32                   transmit_ring_wrap;
154	uint32                   transmits_posted;
155	uint32                   transmits_done;
156	struct io_buffer	*transmit_iob[1 + MYRI10GE_TRANSMIT_WRAP];
157
158	/*
159	 * Receive state.
160	 */
161
162	mcp_kreq_ether_recv_t	*receive_post_ring;	/* in NIC SRAM */
163	unsigned int             receive_post_ring_wrap;
164	unsigned int             receives_posted;
165	unsigned int             receives_done;
166	struct io_buffer	*receive_iob[1 + MYRI10GE_RECEIVE_WRAP];
167
168	/* Address for writing commands to the firmware.
169	   BEWARE: the value must be written 32 bits at a time. */
170
171	mcp_cmd_t	*command;
172};
173
174/****************************************************************
175 * Driver internal functions.
176 ****************************************************************/
177
178/* Print ring status when debugging.  Use this only after a printed
179   value changes. */
180
181#define DBG2_RINGS( priv ) 						\
182	DBG2 ( "tx %x/%x rx %x/%x in %s() \n",				\
183	       ( priv ) ->transmits_done, ( priv ) -> transmits_posted,	\
184	       ( priv ) ->receives_done, ( priv ) -> receives_posted,	\
185	       __FUNCTION__ )
186
187/*
188 * Return a pointer to the driver private data for a network device.
189 *
190 * @v netdev	Network device created by this driver.
191 * @ret priv	The corresponding driver private data.
192 */
193static inline struct myri10ge_private *myri10ge_priv ( struct net_device *nd )
194{
195	/* Our private data always follows the network device in memory,
196	   since we use alloc_netdev() to allocate the storage. */
197
198	return ( struct myri10ge_private * ) ( nd + 1 );
199}
200
201/*
202 * Pass a receive buffer to the NIC to be filled.
203 *
204 * @v priv	The network device to receive the buffer.
205 * @v iob	The I/O buffer to fill.
206 *
207 * Receive buffers are filled in FIFO order.
208 */
209static void myri10ge_post_receive ( struct myri10ge_private *priv,
210				    struct io_buffer *iob )
211{
212	unsigned int		 receives_posted;
213	mcp_kreq_ether_recv_t	*request;
214
215	/* Record the posted I/O buffer, to be passed to netdev_rx() on
216	   receive. */
217
218	receives_posted = priv->receives_posted;
219	priv->receive_iob[receives_posted & MYRI10GE_RECEIVE_WRAP] = iob;
220
221	/* Post the receive. */
222
223	request = &priv->receive_post_ring[receives_posted
224					   & priv->receive_post_ring_wrap];
225	request->addr_high = 0;
226	wmb();
227	request->addr_low = htonl ( virt_to_bus ( iob->data ) );
228	priv->receives_posted = ++receives_posted;
229}
230
231/*
232 * Execute a command on the NIC.
233 *
234 * @v priv	NIC to perform the command.
235 * @v cmd	The command to perform.
236 * @v data	I/O copy buffer for parameters/results
237 * @ret rc	0 on success, else an error code.
238 */
239static int myri10ge_command ( struct myri10ge_private *priv,
240			      uint32 cmd,
241			      uint32 data[3] )
242{
243	int				 i;
244	mcp_cmd_t			*command;
245	uint32				 result;
246	unsigned int			 slept_ms;
247	volatile mcp_cmd_response_t	*response;
248
249	DBGP ( "myri10ge_command ( ,%d, ) \n", cmd );
250	command = priv->command;
251	response = &priv->dma->command_response;
252
253	/* Mark the command as incomplete. */
254
255	response->result = 0xFFFFFFFF;
256
257	/* Pass the command to the NIC. */
258
259	command->cmd		    = htonl ( cmd );
260	command->data0		    = htonl ( data[0] );
261	command->data1		    = htonl ( data[1] );
262	command->data2		    = htonl ( data[2] );
263	command->response_addr.high = 0;
264	command->response_addr.low
265		= htonl ( virt_to_bus ( &priv->dma->command_response ) );
266	for ( i=0; i<36; i+=4 )
267		* ( uint32 * ) &command->pad[i] = 0;
268	wmb();
269	* ( uint32 * ) &command->pad[36] = 0;
270
271	/* Wait up to 2 seconds for a response. */
272
273	for ( slept_ms=0; slept_ms<2000; slept_ms++ ) {
274		result = response->result;
275		if ( result == 0 ) {
276			data[0] = ntohl ( response->data );
277			return 0;
278		} else if ( result != 0xFFFFFFFF ) {
279			DBG ( "cmd%d:0x%x\n",
280			      cmd,
281			      ntohl ( response->result ) );
282			return -EIO;
283		}
284		udelay ( 1000 );
285		rmb();
286	}
287	DBG ( "cmd%d:timed out\n", cmd );
288	return -ETIMEDOUT;
289}
290
291/*
292 * Handle any pending interrupt.
293 *
294 * @v netdev		Device being polled for interrupts.
295 *
296 * This is called periodically to let the driver check for interrupts.
297 */
298static void myri10ge_interrupt_handler ( struct net_device *netdev )
299{
300	struct myri10ge_private *priv;
301	mcp_irq_data_t		*irq_data;
302	uint8			 valid;
303
304	priv = myri10ge_priv ( netdev );
305	irq_data = &priv->dma->irq_data;
306
307	/* Return if there was no interrupt. */
308
309	rmb();
310	valid = irq_data->valid;
311	if ( !valid )
312		return;
313	DBG2 ( "irq " );
314
315	/* Tell the NIC to deassert the interrupt and clear
316	   irq_data->valid.*/
317
318	*priv->irq_deassert = 0;	/* any value is OK. */
319	mb();
320
321	/* Handle any new receives. */
322
323	if ( valid & 1 ) {
324
325		/* Pass the receive interrupt token back to the NIC. */
326
327		DBG2 ( "rx " );
328		*priv->irq_claim = htonl ( 3 );
329		wmb();
330	}
331
332	/* Handle any sent packet by freeing its I/O buffer, now that
333	   we know it has been DMAd. */
334
335	if ( valid & 2 ) {
336		unsigned int nic_done_count;
337
338		DBG2 ( "snt " );
339		nic_done_count = ntohl ( priv->dma->irq_data.send_done_count );
340		while ( priv->transmits_done != nic_done_count ) {
341			struct io_buffer *iob;
342
343			iob = priv->transmit_iob [priv->transmits_done
344						  & MYRI10GE_TRANSMIT_WRAP];
345			DBG2 ( "%p ", iob );
346			netdev_tx_complete ( netdev, iob );
347			++priv->transmits_done;
348		}
349	}
350
351	/* Record any statistics update. */
352
353	if ( irq_data->stats_updated ) {
354
355		/* Update the link status. */
356
357		DBG2 ( "stats " );
358		if ( ntohl ( irq_data->link_up ) == MXGEFW_LINK_UP )
359			netdev_link_up ( netdev );
360		else
361			netdev_link_down ( netdev );
362
363		/* Ignore all error counters from the NIC. */
364	}
365
366	/* Wait for the interrupt to be deasserted, as indicated by
367	   irq_data->valid, which is set by the NIC after the deassert. */
368
369	DBG2 ( "wait " );
370	do {
371		mb();
372	} while ( irq_data->valid );
373
374	/* Claim the interrupt to enable future interrupt generation. */
375
376	DBG2 ( "claim\n" );
377	* ( priv->irq_claim + 1 ) = htonl ( 3 );
378	mb();
379}
380
381/* Constants for reading the STRING_SPECS via the Myricom
382   Vendor Specific PCI configuration space capability. */
383
384#define VS_ADDR ( vs + 0x18 )
385#define VS_DATA ( vs + 0x14 )
386#define VS_MODE ( vs + 0x10 )
387#define 	VS_MODE_READ32 0x3
388#define 	VS_MODE_LOCATE 0x8
389#define 		VS_LOCATE_STRING_SPECS 0x3
390
391/*
392 * Read MAC address from its 'string specs' via the vendor-specific
393 * capability.  (This capability allows NIC SRAM and ROM to be read
394 * before it is mapped.)
395 *
396 * @v pci		The device.
397 * @v mac		Buffer to store the MAC address.
398 * @ret rc		Returns 0 on success, else an error code.
399 */
400static int mac_address_from_string_specs ( struct pci_device *pci,
401						   uint8 mac[ETH_ALEN] )
402{
403	char string_specs[256];
404	char *ptr, *limit;
405	char *to = string_specs;
406	uint32 addr;
407	uint32 len;
408	unsigned int vs;
409	int mac_set = 0;
410
411	/* Find the "vendor specific" capability. */
412
413	vs = pci_find_capability ( pci, 9 );
414	if ( vs == 0 ) {
415		DBG ( "no VS\n" );
416		return -ENOTSUP;
417	}
418
419	/* Locate the String specs in LANai SRAM. */
420
421	pci_write_config_byte ( pci, VS_MODE, VS_MODE_LOCATE );
422	pci_write_config_dword ( pci, VS_ADDR, VS_LOCATE_STRING_SPECS );
423	pci_read_config_dword ( pci, VS_ADDR, &addr );
424	pci_read_config_dword ( pci, VS_DATA, &len );
425	DBG2 ( "ss@%x,%x\n", addr, len );
426
427	/* Copy in the string specs.  Use 32-bit reads for performance. */
428
429	if ( len > sizeof ( string_specs ) || ( len & 3 ) ) {
430		DBG ( "SS too big\n" );
431		return -ENOTSUP;
432	}
433
434	pci_write_config_byte ( pci, VS_MODE, VS_MODE_READ32 );
435	while ( len >= 4 ) {
436		uint32 tmp;
437
438		pci_write_config_byte ( pci, VS_ADDR, addr );
439		pci_read_config_dword ( pci, VS_DATA, &tmp );
440		tmp = ntohl ( tmp );
441		memcpy ( to, &tmp, 4 );
442		to += 4;
443		addr += 4;
444		len -= 4;
445	}
446	pci_write_config_byte ( pci, VS_MODE, 0 );
447
448	/* Parse the string specs. */
449
450	DBG2 ( "STRING_SPECS:\n" );
451	ptr = string_specs;
452	limit = string_specs + sizeof ( string_specs );
453	while ( *ptr != '\0' && ptr < limit ) {
454		DBG2 ( "%s\n", ptr );
455		if ( memcmp ( ptr, "MAC=", 4 ) == 0 ) {
456			unsigned int i;
457
458			ptr += 4;
459			for ( i=0; i<6; i++ ) {
460				if ( ( ptr + 2 ) > limit ) {
461					DBG ( "bad MAC addr\n" );
462					return -ENOTSUP;
463				}
464				mac[i] = strtoul ( ptr, &ptr, 16 );
465				ptr += 1;
466			}
467			mac_set = 1;
468		}
469		else
470			while ( ptr < limit && *ptr++ );
471	}
472
473	/* Verify we parsed all we need. */
474
475	if ( !mac_set ) {
476		DBG ( "no MAC addr\n" );
477		return -ENOTSUP;
478	}
479
480	DBG2 ( "MAC %02x:%02x:%02x:%02x:%02x:%02x\n",
481	       mac[0], mac[1], mac[2], mac[3], mac[4], mac[5] );
482
483	return 0;
484}
485
486/****************************************************************
487 * gPXE PCI Device Driver API functions
488 ****************************************************************/
489
490/*
491 * Initialize the PCI device.
492 *
493 * @v pci 		The device's associated pci_device structure.
494 * @v id  		The PCI device + vendor id.
495 * @ret rc		Returns zero if successfully initialized.
496 *
497 * This function is called very early on, while gPXE is initializing.
498 * This is a gPXE PCI Device Driver API function.
499 */
500static int myri10ge_pci_probe ( struct pci_device *pci,
501				const struct pci_device_id *id __unused )
502{
503	static struct net_device_operations myri10ge_operations = {
504		.open     = myri10ge_net_open,
505		.close    = myri10ge_net_close,
506		.transmit = myri10ge_net_transmit,
507		.poll     = myri10ge_net_poll,
508		.irq      = myri10ge_net_irq
509	};
510
511	const char *dbg;
512	int rc;
513	struct net_device *netdev;
514	struct myri10ge_private *priv;
515
516	DBGP ( "myri10ge_pci_probe: " );
517
518	netdev = alloc_etherdev ( sizeof ( *priv ) );
519	if ( !netdev ) {
520		rc = -ENOMEM;
521		dbg = "alloc_etherdev";
522		goto abort_with_nothing;
523	}
524
525	netdev_init ( netdev, &myri10ge_operations );
526	priv = myri10ge_priv ( netdev );
527
528	pci_set_drvdata ( pci, netdev );
529	netdev->dev = &pci->dev;
530
531	/* Make sure interrupts are disabled. */
532
533	myri10ge_net_irq ( netdev, 0 );
534
535	/* Read the NIC HW address. */
536
537	rc = mac_address_from_string_specs ( pci, netdev->hw_addr );
538	if ( rc ) {
539		dbg = "mac_from_ss";
540		goto abort_with_netdev_init;
541	}
542	DBGP ( "mac " );
543
544	/* Enable bus master, etc. */
545
546	adjust_pci_device ( pci );
547	DBGP ( "pci " );
548
549	/* Register the initialized network device. */
550
551	rc = register_netdev ( netdev );
552	if ( rc ) {
553		dbg = "register_netdev";
554		goto abort_with_netdev_init;
555	}
556
557	DBGP ( "done\n" );
558
559	return 0;
560
561abort_with_netdev_init:
562	netdev_nullify ( netdev );
563	netdev_put ( netdev );
564abort_with_nothing:
565	DBG ( "%s:%s\n", dbg, strerror ( rc ) );
566	return rc;
567}
568
569/*
570 * Remove a device from the PCI device list.
571 *
572 * @v pci		PCI device to remove.
573 *
574 * This is a PCI Device Driver API function.
575 */
576static void myri10ge_pci_remove ( struct pci_device *pci )
577{
578	struct net_device	*netdev;
579
580	DBGP ( "myri10ge_pci_remove\n" );
581	netdev = pci_get_drvdata ( pci );
582
583	unregister_netdev ( netdev );
584	netdev_nullify ( netdev );
585	netdev_put ( netdev );
586}
587
588/****************************************************************
589 * gPXE Network Device Driver Operations
590 ****************************************************************/
591
592/*
593 * Close a network device.
594 *
595 * @v netdev		Device to close.
596 *
597 * This is a gPXE Network Device Driver API function.
598 */
599static void myri10ge_net_close ( struct net_device *netdev )
600{
601	struct myri10ge_private *priv;
602	uint32			 data[3];
603
604	DBGP ( "myri10ge_net_close\n" );
605	priv = myri10ge_priv ( netdev );
606
607	/* disable interrupts */
608
609	myri10ge_net_irq ( netdev, 0 );
610
611	/* Reset the NIC interface, so we won't get any more events from
612	   the NIC. */
613
614	myri10ge_command ( priv, MXGEFW_CMD_RESET, data );
615
616	/* Free receive buffers that were never filled. */
617
618	while ( priv->receives_done != priv->receives_posted ) {
619		free_iob ( priv->receive_iob[priv->receives_done
620					     & MYRI10GE_RECEIVE_WRAP] );
621		++priv->receives_done;
622	}
623
624	/* Release DMAable memory. */
625
626	free_dma ( priv->dma, sizeof ( *priv->dma ) );
627
628	/* Erase all state from the open. */
629
630	memset ( priv, 0, sizeof ( *priv ) );
631
632	DBG2_RINGS ( priv );
633}
634
635/*
636 * Enable or disable IRQ masking.
637 *
638 * @v netdev		Device to control.
639 * @v enable		Zero to mask off IRQ, non-zero to enable IRQ.
640 *
641 * This is a gPXE Network Driver API function.
642 */
643static void myri10ge_net_irq ( struct net_device *netdev, int enable )
644{
645	struct pci_device	*pci_dev;
646	uint16			 val;
647
648	DBGP ( "myri10ge_net_irq\n" );
649	pci_dev = ( struct pci_device * ) netdev->dev;
650
651	/* Adjust the Interrupt Disable bit in the Command register of the
652	   PCI Device. */
653
654	pci_read_config_word ( pci_dev, PCI_COMMAND, &val );
655	if ( enable )
656		val &= ~PCI_COMMAND_INTX_DISABLE;
657	else
658		val |= PCI_COMMAND_INTX_DISABLE;
659	pci_write_config_word ( pci_dev, PCI_COMMAND, val );
660}
661
662/*
663 * Opens a network device.
664 *
665 * @v netdev		Device to be opened.
666 * @ret rc  		Non-zero if failed to open.
667 *
668 * This enables tx and rx on the device.
669 * This is a gPXE Network Device Driver API function.
670 */
671static int myri10ge_net_open ( struct net_device *netdev )
672{
673	const char		*dbg;	/* printed upon error return */
674	int			 rc;
675	struct io_buffer	*iob;
676	struct myri10ge_private *priv;
677	uint32			 data[3];
678	struct pci_device	*pci_dev;
679	void			*membase;
680
681	DBGP ( "myri10ge_net_open\n" );
682	priv	= myri10ge_priv ( netdev );
683	pci_dev = ( struct pci_device * ) netdev->dev;
684	membase = phys_to_virt ( pci_dev->membase );
685
686	/* Compute address for passing commands to the firmware. */
687
688	priv->command = membase + MXGEFW_ETH_CMD;
689
690	/* Ensure interrupts are disabled. */
691
692	myri10ge_net_irq ( netdev, 0 );
693
694	/* Allocate cleared DMAable buffers. */
695
696	priv->dma = malloc_dma ( sizeof ( *priv->dma ) , 128 );
697	if ( !priv->dma ) {
698		rc = -ENOMEM;
699		dbg = "DMA";
700		goto abort_with_nothing;
701	}
702	memset ( priv->dma, 0, sizeof ( *priv->dma ) );
703
704	/* Simplify following code. */
705
706#define TRY( prefix, base, suffix ) do {		\
707		rc = myri10ge_command ( priv,		\
708					MXGEFW_		\
709					## prefix	\
710					## base		\
711					## suffix,	\
712					data );		\
713		if ( rc ) {				\
714			dbg = #base;			\
715			goto abort_with_dma;		\
716		}					\
717	} while ( 0 )
718
719	/* Send a reset command to the card to see if it is alive,
720	   and to reset its queue state. */
721
722	TRY ( CMD_, RESET , );
723
724	/* Set the interrupt queue size. */
725
726	data[0] = ( sizeof ( priv->dma->receive_completion )
727		    | MXGEFW_CMD_SET_INTRQ_SIZE_FLAG_NO_STRICT_SIZE_CHECK );
728	TRY ( CMD_SET_ , INTRQ_SIZE , );
729
730	/* Set the interrupt queue DMA address. */
731
732	data[0] = virt_to_bus ( &priv->dma->receive_completion );
733	data[1] = 0;
734	TRY ( CMD_SET_, INTRQ_DMA, );
735
736	/* Get the NIC interrupt claim address. */
737
738	TRY ( CMD_GET_, IRQ_ACK, _OFFSET );
739	priv->irq_claim = membase + data[0];
740
741	/* Get the NIC interrupt assert address. */
742
743	TRY ( CMD_GET_, IRQ_DEASSERT, _OFFSET );
744	priv->irq_deassert = membase + data[0];
745
746	/* Disable interrupt coalescing, which is inappropriate for the
747	   minimal buffering we provide. */
748
749	TRY ( CMD_GET_, INTR_COAL, _DELAY_OFFSET );
750	* ( ( uint32 * ) ( membase + data[0] ) ) = 0;
751
752	/* Set the NIC mac address. */
753
754	data[0] = ( netdev->ll_addr[0] << 24
755		    | netdev->ll_addr[1] << 16
756		    | netdev->ll_addr[2] << 8
757		    | netdev->ll_addr[3] );
758	data[1] = ( ( netdev->ll_addr[4] << 8 )
759		     | netdev->ll_addr[5] );
760	TRY ( SET_ , MAC_ADDRESS , );
761
762	/* Enable multicast receives, because some gPXE clients don't work
763	   without multicast. . */
764
765	TRY ( ENABLE_ , ALLMULTI , );
766
767	/* Disable Ethernet flow control, so the NIC cannot deadlock the
768	   network under any circumstances. */
769
770	TRY ( DISABLE_ , FLOW , _CONTROL );
771
772	/* Compute transmit ring sizes. */
773
774	data[0] = 0;		/* slice 0 */
775	TRY ( CMD_GET_, SEND_RING, _SIZE );
776	priv->transmit_ring_wrap
777		= data[0] / sizeof ( mcp_kreq_ether_send_t ) - 1;
778	if ( priv->transmit_ring_wrap
779	     & ( priv->transmit_ring_wrap + 1 ) ) {
780		rc = -EPROTO;
781		dbg = "TX_RING";
782		goto abort_with_dma;
783	}
784
785	/* Compute receive ring sizes. */
786
787	data[0] = 0;		/* slice 0 */
788	TRY ( CMD_GET_ , RX_RING , _SIZE );
789	priv->receive_post_ring_wrap = data[0] / sizeof ( mcp_dma_addr_t ) - 1;
790	if ( priv->receive_post_ring_wrap
791	     & ( priv->receive_post_ring_wrap + 1 ) ) {
792		rc = -EPROTO;
793		dbg = "RX_RING";
794		goto abort_with_dma;
795	}
796
797	/* Get NIC transmit ring address. */
798
799	data[0] = 0;		/* slice 0. */
800	TRY ( CMD_GET_, SEND, _OFFSET );
801	priv->transmit_ring = membase + data[0];
802
803	/* Get the NIC receive ring address. */
804
805	data[0] = 0;		/* slice 0. */
806	TRY ( CMD_GET_, SMALL_RX, _OFFSET );
807	priv->receive_post_ring = membase + data[0];
808
809	/* Set the Nic MTU. */
810
811	data[0] = ETH_FRAME_LEN;
812	TRY ( CMD_SET_, MTU, );
813
814	/* Tell the NIC our buffer sizes. ( We use only small buffers, so we
815	   set both buffer sizes to the same value, which will force all
816	   received frames to use small buffers. ) */
817
818	data[0] = MXGEFW_PAD + ETH_FRAME_LEN;
819	TRY ( CMD_SET_, SMALL_BUFFER, _SIZE );
820	data[0] = MXGEFW_PAD + ETH_FRAME_LEN;
821	TRY ( CMD_SET_, BIG_BUFFER, _SIZE );
822
823        /* Tell firmware where to DMA IRQ data */
824
825	data[0] = virt_to_bus ( &priv->dma->irq_data );
826	data[1] = 0;
827	data[2] = sizeof ( priv->dma->irq_data );
828	TRY ( CMD_SET_, STATS_DMA_V2, );
829
830	/* Post receives. */
831
832	while ( priv->receives_posted <= MYRI10GE_RECEIVE_WRAP ) {
833
834		/* Reserve 2 extra bytes at the start of packets, since
835		   the firmware always skips the first 2 bytes of the buffer
836		   so TCP headers will be aligned. */
837
838		iob = alloc_iob ( MXGEFW_PAD + ETH_FRAME_LEN );
839		if ( !iob ) {
840			rc = -ENOMEM;
841			dbg = "alloc_iob";
842			goto abort_with_receives_posted;
843		}
844		iob_reserve ( iob, MXGEFW_PAD );
845		myri10ge_post_receive ( priv, iob );
846	}
847
848	/* Bring up the link. */
849
850	TRY ( CMD_, ETHERNET_UP, );
851
852	DBG2_RINGS ( priv );
853	return 0;
854
855abort_with_receives_posted:
856	while ( priv->receives_posted-- )
857		free_iob ( priv->receive_iob[priv->receives_posted] );
858abort_with_dma:
859	/* Because the link is not up, we don't have to reset the NIC here. */
860	free_dma ( priv->dma, sizeof ( *priv->dma ) );
861abort_with_nothing:
862	/* Erase all signs of the failed open. */
863	memset ( priv, 0, sizeof ( *priv ) );
864	DBG ( "%s: %s\n", dbg, strerror ( rc ) );
865	return ( rc );
866}
867
868/*
869 * This function allows a driver to process events during operation.
870 *
871 * @v netdev		Device being polled.
872 *
873 * This is called periodically by gPXE to let the driver check the status of
874 * transmitted packets and to allow the driver to check for received packets.
875 * This is a gPXE Network Device Driver API function.
876 */
877static void myri10ge_net_poll ( struct net_device *netdev )
878{
879	struct io_buffer		*iob;
880	struct io_buffer		*replacement;
881	struct myri10ge_dma_buffers	*dma;
882	struct myri10ge_private		*priv;
883	unsigned int			 length;
884	unsigned int			 orig_receives_posted;
885
886	DBGP ( "myri10ge_net_poll\n" );
887	priv = myri10ge_priv ( netdev );
888	dma  = priv->dma;
889
890	/* Process any pending interrupt. */
891
892	myri10ge_interrupt_handler ( netdev );
893
894	/* Pass up received frames, but limit ourselves to receives posted
895	   before this function was called, so we cannot livelock if
896	   receives are arriving faster than we process them. */
897
898	orig_receives_posted = priv->receives_posted;
899	while ( priv->receives_done != orig_receives_posted ) {
900
901		/* Stop if there is no pending receive. */
902
903		length = ntohs ( dma->receive_completion
904				 [priv->receives_done
905				  & MYRI10GE_RECEIVE_COMPLETION_WRAP]
906				 .length );
907		if ( length == 0 )
908			break;
909
910		/* Allocate a replacement buffer.  If none is available,
911		   stop passing up packets until a buffer is available.
912
913		   Reserve 2 extra bytes at the start of packets, since
914		   the firmware always skips the first 2 bytes of the buffer
915		   so TCP headers will be aligned. */
916
917		replacement = alloc_iob ( MXGEFW_PAD + ETH_FRAME_LEN );
918		if ( !replacement ) {
919			DBG ( "NO RX BUF\n" );
920			break;
921		}
922		iob_reserve ( replacement, MXGEFW_PAD );
923
924		/* Pass up the received frame. */
925
926		iob = priv->receive_iob[priv->receives_done
927					& MYRI10GE_RECEIVE_WRAP];
928		iob_put ( iob, length );
929		netdev_rx ( netdev, iob );
930
931		/* We have consumed the packet, so clear the receive
932		   notification. */
933
934		dma->receive_completion [priv->receives_done
935					 & MYRI10GE_RECEIVE_COMPLETION_WRAP]
936			.length = 0;
937		wmb();
938
939		/* Replace the passed-up I/O buffer. */
940
941		myri10ge_post_receive ( priv, replacement );
942		++priv->receives_done;
943		DBG2_RINGS ( priv );
944	}
945}
946
947/*
948 * This transmits a packet.
949 *
950 * @v netdev		Device to transmit from.
951 * @v iobuf 		Data to transmit.
952 * @ret rc  		Non-zero if failed to transmit.
953 *
954 * This is a gPXE Network Driver API function.
955 */
956static int myri10ge_net_transmit ( struct net_device *netdev,
957				   struct io_buffer *iobuf )
958{
959	mcp_kreq_ether_send_t	*kreq;
960	size_t			 len;
961	struct myri10ge_private *priv;
962	uint32			 transmits_posted;
963
964	DBGP ( "myri10ge_net_transmit\n" );
965	priv = myri10ge_priv ( netdev );
966
967	/* Confirm space in the send ring. */
968
969	transmits_posted = priv->transmits_posted;
970	if ( transmits_posted - priv->transmits_done
971	     > MYRI10GE_TRANSMIT_WRAP ) {
972		DBG ( "TX ring full\n" );
973		return -ENOBUFS;
974	}
975
976	DBG2 ( "TX %p+%d ", iobuf->data, iob_len ( iobuf ) );
977	DBG2_HD ( iobuf->data, 14 );
978
979	/* Record the packet being transmitted, so we can later report
980	   send completion. */
981
982	priv->transmit_iob[transmits_posted & MYRI10GE_TRANSMIT_WRAP] = iobuf;
983
984	/* Copy and pad undersized frames, because the NIC does not pad,
985	   and we would rather copy small frames than do a gather. */
986
987	len = iob_len ( iobuf );
988	if ( len < ETH_ZLEN ) {
989		iob_pad ( iobuf, ETH_ZLEN );
990		len = ETH_ZLEN;
991	}
992
993	/* Enqueue the packet by writing a descriptor to the NIC.
994	   This is a bit tricky because the HW requires 32-bit writes,
995	   but the structure has smaller fields. */
996
997	kreq = &priv->transmit_ring[transmits_posted
998				    & priv->transmit_ring_wrap];
999	kreq->addr_high = 0;
1000	kreq->addr_low = htonl ( virt_to_bus ( iobuf->data ) );
1001	( ( uint32 * ) kreq ) [2] = htonl (
1002		0x0000 << 16	 /* pseudo_header_offset */
1003		| ( len & 0xFFFF ) /* length */
1004		);
1005	wmb();
1006	( ( uint32 * ) kreq ) [3] = htonl (
1007		0x00 << 24	/* pad */
1008		| 0x01 << 16	/* rdma_count */
1009		| 0x00 << 8	/* cksum_offset */
1010		| ( MXGEFW_FLAGS_SMALL
1011		    | MXGEFW_FLAGS_FIRST
1012		    | MXGEFW_FLAGS_NO_TSO ) /* flags */
1013		);
1014	wmb();
1015
1016	/* Mark the slot as consumed and return. */
1017
1018	priv->transmits_posted = ++transmits_posted;
1019	DBG2_RINGS ( priv );
1020	return 0;
1021}
1022
1023static struct pci_device_id myri10ge_nics[] = {
1024	/* Each of these macros must be a single line to satisfy a script. */
1025	PCI_ROM ( 0x14c1, 0x0008, "myri10ge", "Myricom 10Gb Ethernet Adapter", 0 ) ,
1026};
1027
1028struct pci_driver myri10ge_driver __pci_driver = {
1029	.ids      = myri10ge_nics,
1030	.id_count = ( sizeof ( myri10ge_nics ) / sizeof ( myri10ge_nics[0] ) ) ,
1031	.probe    = myri10ge_pci_probe,
1032	.remove   = myri10ge_pci_remove
1033};
1034
1035/*
1036 * Local variables:
1037 *  c-basic-offset: 8
1038 *  c-indent-level: 8
1039 *  tab-width: 8
1040 * End:
1041 */
1042