efx.c revision fadac6aae1b8d9344beaa18aa9035869d773fd98
1/****************************************************************************
2 * Driver for Solarflare Solarstorm network controllers and boards
3 * Copyright 2005-2006 Fen Systems Ltd.
4 * Copyright 2005-2011 Solarflare Communications Inc.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published
8 * by the Free Software Foundation, incorporated herein by reference.
9 */
10
11#include <linux/module.h>
12#include <linux/pci.h>
13#include <linux/netdevice.h>
14#include <linux/etherdevice.h>
15#include <linux/delay.h>
16#include <linux/notifier.h>
17#include <linux/ip.h>
18#include <linux/tcp.h>
19#include <linux/in.h>
20#include <linux/crc32.h>
21#include <linux/ethtool.h>
22#include <linux/topology.h>
23#include <linux/gfp.h>
24#include <linux/cpu_rmap.h>
25#include "net_driver.h"
26#include "efx.h"
27#include "nic.h"
28
29#include "mcdi.h"
30#include "workarounds.h"
31
32/**************************************************************************
33 *
34 * Type name strings
35 *
36 **************************************************************************
37 */
38
39/* Loopback mode names (see LOOPBACK_MODE()) */
40const unsigned int efx_loopback_mode_max = LOOPBACK_MAX;
41const char *const efx_loopback_mode_names[] = {
42	[LOOPBACK_NONE]		= "NONE",
43	[LOOPBACK_DATA]		= "DATAPATH",
44	[LOOPBACK_GMAC]		= "GMAC",
45	[LOOPBACK_XGMII]	= "XGMII",
46	[LOOPBACK_XGXS]		= "XGXS",
47	[LOOPBACK_XAUI]		= "XAUI",
48	[LOOPBACK_GMII]		= "GMII",
49	[LOOPBACK_SGMII]	= "SGMII",
50	[LOOPBACK_XGBR]		= "XGBR",
51	[LOOPBACK_XFI]		= "XFI",
52	[LOOPBACK_XAUI_FAR]	= "XAUI_FAR",
53	[LOOPBACK_GMII_FAR]	= "GMII_FAR",
54	[LOOPBACK_SGMII_FAR]	= "SGMII_FAR",
55	[LOOPBACK_XFI_FAR]	= "XFI_FAR",
56	[LOOPBACK_GPHY]		= "GPHY",
57	[LOOPBACK_PHYXS]	= "PHYXS",
58	[LOOPBACK_PCS]		= "PCS",
59	[LOOPBACK_PMAPMD]	= "PMA/PMD",
60	[LOOPBACK_XPORT]	= "XPORT",
61	[LOOPBACK_XGMII_WS]	= "XGMII_WS",
62	[LOOPBACK_XAUI_WS]	= "XAUI_WS",
63	[LOOPBACK_XAUI_WS_FAR]  = "XAUI_WS_FAR",
64	[LOOPBACK_XAUI_WS_NEAR] = "XAUI_WS_NEAR",
65	[LOOPBACK_GMII_WS]	= "GMII_WS",
66	[LOOPBACK_XFI_WS]	= "XFI_WS",
67	[LOOPBACK_XFI_WS_FAR]	= "XFI_WS_FAR",
68	[LOOPBACK_PHYXS_WS]	= "PHYXS_WS",
69};
70
71const unsigned int efx_reset_type_max = RESET_TYPE_MAX;
72const char *const efx_reset_type_names[] = {
73	[RESET_TYPE_INVISIBLE]     = "INVISIBLE",
74	[RESET_TYPE_ALL]           = "ALL",
75	[RESET_TYPE_WORLD]         = "WORLD",
76	[RESET_TYPE_DISABLE]       = "DISABLE",
77	[RESET_TYPE_TX_WATCHDOG]   = "TX_WATCHDOG",
78	[RESET_TYPE_INT_ERROR]     = "INT_ERROR",
79	[RESET_TYPE_RX_RECOVERY]   = "RX_RECOVERY",
80	[RESET_TYPE_RX_DESC_FETCH] = "RX_DESC_FETCH",
81	[RESET_TYPE_TX_DESC_FETCH] = "TX_DESC_FETCH",
82	[RESET_TYPE_TX_SKIP]       = "TX_SKIP",
83	[RESET_TYPE_MC_FAILURE]    = "MC_FAILURE",
84};
85
86#define EFX_MAX_MTU (9 * 1024)
87
88/* Reset workqueue. If any NIC has a hardware failure then a reset will be
89 * queued onto this work queue. This is not a per-nic work queue, because
90 * efx_reset_work() acquires the rtnl lock, so resets are naturally serialised.
91 */
92static struct workqueue_struct *reset_workqueue;
93
94/**************************************************************************
95 *
96 * Configurable values
97 *
98 *************************************************************************/
99
100/*
101 * Use separate channels for TX and RX events
102 *
103 * Set this to 1 to use separate channels for TX and RX. It allows us
104 * to control interrupt affinity separately for TX and RX.
105 *
106 * This is only used in MSI-X interrupt mode
107 */
108static unsigned int separate_tx_channels;
109module_param(separate_tx_channels, uint, 0444);
110MODULE_PARM_DESC(separate_tx_channels,
111		 "Use separate channels for TX and RX");
112
113/* This is the weight assigned to each of the (per-channel) virtual
114 * NAPI devices.
115 */
116static int napi_weight = 64;
117
118/* This is the time (in jiffies) between invocations of the hardware
119 * monitor.  On Falcon-based NICs, this will:
120 * - Check the on-board hardware monitor;
121 * - Poll the link state and reconfigure the hardware as necessary.
122 */
123static unsigned int efx_monitor_interval = 1 * HZ;
124
125/* Initial interrupt moderation settings.  They can be modified after
126 * module load with ethtool.
127 *
128 * The default for RX should strike a balance between increasing the
129 * round-trip latency and reducing overhead.
130 */
131static unsigned int rx_irq_mod_usec = 60;
132
133/* Initial interrupt moderation settings.  They can be modified after
134 * module load with ethtool.
135 *
136 * This default is chosen to ensure that a 10G link does not go idle
137 * while a TX queue is stopped after it has become full.  A queue is
138 * restarted when it drops below half full.  The time this takes (assuming
139 * worst case 3 descriptors per packet and 1024 descriptors) is
140 *   512 / 3 * 1.2 = 205 usec.
141 */
142static unsigned int tx_irq_mod_usec = 150;
143
144/* This is the first interrupt mode to try out of:
145 * 0 => MSI-X
146 * 1 => MSI
147 * 2 => legacy
148 */
149static unsigned int interrupt_mode;
150
151/* This is the requested number of CPUs to use for Receive-Side Scaling (RSS),
152 * i.e. the number of CPUs among which we may distribute simultaneous
153 * interrupt handling.
154 *
155 * Cards without MSI-X will only target one CPU via legacy or MSI interrupt.
156 * The default (0) means to assign an interrupt to each core.
157 */
158static unsigned int rss_cpus;
159module_param(rss_cpus, uint, 0444);
160MODULE_PARM_DESC(rss_cpus, "Number of CPUs to use for Receive-Side Scaling");
161
162static int phy_flash_cfg;
163module_param(phy_flash_cfg, int, 0644);
164MODULE_PARM_DESC(phy_flash_cfg, "Set PHYs into reflash mode initially");
165
166static unsigned irq_adapt_low_thresh = 10000;
167module_param(irq_adapt_low_thresh, uint, 0644);
168MODULE_PARM_DESC(irq_adapt_low_thresh,
169		 "Threshold score for reducing IRQ moderation");
170
171static unsigned irq_adapt_high_thresh = 20000;
172module_param(irq_adapt_high_thresh, uint, 0644);
173MODULE_PARM_DESC(irq_adapt_high_thresh,
174		 "Threshold score for increasing IRQ moderation");
175
176static unsigned debug = (NETIF_MSG_DRV | NETIF_MSG_PROBE |
177			 NETIF_MSG_LINK | NETIF_MSG_IFDOWN |
178			 NETIF_MSG_IFUP | NETIF_MSG_RX_ERR |
179			 NETIF_MSG_TX_ERR | NETIF_MSG_HW);
180module_param(debug, uint, 0);
181MODULE_PARM_DESC(debug, "Bitmapped debugging message enable value");
182
183/**************************************************************************
184 *
185 * Utility functions and prototypes
186 *
187 *************************************************************************/
188
189static void efx_remove_channels(struct efx_nic *efx);
190static void efx_remove_port(struct efx_nic *efx);
191static void efx_init_napi(struct efx_nic *efx);
192static void efx_fini_napi(struct efx_nic *efx);
193static void efx_fini_napi_channel(struct efx_channel *channel);
194static void efx_fini_struct(struct efx_nic *efx);
195static void efx_start_all(struct efx_nic *efx);
196static void efx_stop_all(struct efx_nic *efx);
197
198#define EFX_ASSERT_RESET_SERIALISED(efx)		\
199	do {						\
200		if ((efx->state == STATE_RUNNING) ||	\
201		    (efx->state == STATE_DISABLED))	\
202			ASSERT_RTNL();			\
203	} while (0)
204
205/**************************************************************************
206 *
207 * Event queue processing
208 *
209 *************************************************************************/
210
211/* Process channel's event queue
212 *
213 * This function is responsible for processing the event queue of a
214 * single channel.  The caller must guarantee that this function will
215 * never be concurrently called more than once on the same channel,
216 * though different channels may be being processed concurrently.
217 */
218static int efx_process_channel(struct efx_channel *channel, int budget)
219{
220	struct efx_nic *efx = channel->efx;
221	int spent;
222
223	if (unlikely(efx->reset_pending || !channel->enabled))
224		return 0;
225
226	spent = efx_nic_process_eventq(channel, budget);
227	if (spent && efx_channel_has_rx_queue(channel)) {
228		struct efx_rx_queue *rx_queue =
229			efx_channel_get_rx_queue(channel);
230
231		/* Deliver last RX packet. */
232		if (channel->rx_pkt) {
233			__efx_rx_packet(channel, channel->rx_pkt);
234			channel->rx_pkt = NULL;
235		}
236
237		efx_rx_strategy(channel);
238		efx_fast_push_rx_descriptors(rx_queue);
239	}
240
241	return spent;
242}
243
244/* Mark channel as finished processing
245 *
246 * Note that since we will not receive further interrupts for this
247 * channel before we finish processing and call the eventq_read_ack()
248 * method, there is no need to use the interrupt hold-off timers.
249 */
250static inline void efx_channel_processed(struct efx_channel *channel)
251{
252	/* The interrupt handler for this channel may set work_pending
253	 * as soon as we acknowledge the events we've seen.  Make sure
254	 * it's cleared before then. */
255	channel->work_pending = false;
256	smp_wmb();
257
258	efx_nic_eventq_read_ack(channel);
259}
260
261/* NAPI poll handler
262 *
263 * NAPI guarantees serialisation of polls of the same device, which
264 * provides the guarantee required by efx_process_channel().
265 */
266static int efx_poll(struct napi_struct *napi, int budget)
267{
268	struct efx_channel *channel =
269		container_of(napi, struct efx_channel, napi_str);
270	struct efx_nic *efx = channel->efx;
271	int spent;
272
273	netif_vdbg(efx, intr, efx->net_dev,
274		   "channel %d NAPI poll executing on CPU %d\n",
275		   channel->channel, raw_smp_processor_id());
276
277	spent = efx_process_channel(channel, budget);
278
279	if (spent < budget) {
280		if (channel->channel < efx->n_rx_channels &&
281		    efx->irq_rx_adaptive &&
282		    unlikely(++channel->irq_count == 1000)) {
283			if (unlikely(channel->irq_mod_score <
284				     irq_adapt_low_thresh)) {
285				if (channel->irq_moderation > 1) {
286					channel->irq_moderation -= 1;
287					efx->type->push_irq_moderation(channel);
288				}
289			} else if (unlikely(channel->irq_mod_score >
290					    irq_adapt_high_thresh)) {
291				if (channel->irq_moderation <
292				    efx->irq_rx_moderation) {
293					channel->irq_moderation += 1;
294					efx->type->push_irq_moderation(channel);
295				}
296			}
297			channel->irq_count = 0;
298			channel->irq_mod_score = 0;
299		}
300
301		efx_filter_rfs_expire(channel);
302
303		/* There is no race here; although napi_disable() will
304		 * only wait for napi_complete(), this isn't a problem
305		 * since efx_channel_processed() will have no effect if
306		 * interrupts have already been disabled.
307		 */
308		napi_complete(napi);
309		efx_channel_processed(channel);
310	}
311
312	return spent;
313}
314
315/* Process the eventq of the specified channel immediately on this CPU
316 *
317 * Disable hardware generated interrupts, wait for any existing
318 * processing to finish, then directly poll (and ack ) the eventq.
319 * Finally reenable NAPI and interrupts.
320 *
321 * This is for use only during a loopback self-test.  It must not
322 * deliver any packets up the stack as this can result in deadlock.
323 */
324void efx_process_channel_now(struct efx_channel *channel)
325{
326	struct efx_nic *efx = channel->efx;
327
328	BUG_ON(channel->channel >= efx->n_channels);
329	BUG_ON(!channel->enabled);
330	BUG_ON(!efx->loopback_selftest);
331
332	/* Disable interrupts and wait for ISRs to complete */
333	efx_nic_disable_interrupts(efx);
334	if (efx->legacy_irq) {
335		synchronize_irq(efx->legacy_irq);
336		efx->legacy_irq_enabled = false;
337	}
338	if (channel->irq)
339		synchronize_irq(channel->irq);
340
341	/* Wait for any NAPI processing to complete */
342	napi_disable(&channel->napi_str);
343
344	/* Poll the channel */
345	efx_process_channel(channel, channel->eventq_mask + 1);
346
347	/* Ack the eventq. This may cause an interrupt to be generated
348	 * when they are reenabled */
349	efx_channel_processed(channel);
350
351	napi_enable(&channel->napi_str);
352	if (efx->legacy_irq)
353		efx->legacy_irq_enabled = true;
354	efx_nic_enable_interrupts(efx);
355}
356
357/* Create event queue
358 * Event queue memory allocations are done only once.  If the channel
359 * is reset, the memory buffer will be reused; this guards against
360 * errors during channel reset and also simplifies interrupt handling.
361 */
362static int efx_probe_eventq(struct efx_channel *channel)
363{
364	struct efx_nic *efx = channel->efx;
365	unsigned long entries;
366
367	netif_dbg(efx, probe, efx->net_dev,
368		  "chan %d create event queue\n", channel->channel);
369
370	/* Build an event queue with room for one event per tx and rx buffer,
371	 * plus some extra for link state events and MCDI completions. */
372	entries = roundup_pow_of_two(efx->rxq_entries + efx->txq_entries + 128);
373	EFX_BUG_ON_PARANOID(entries > EFX_MAX_EVQ_SIZE);
374	channel->eventq_mask = max(entries, EFX_MIN_EVQ_SIZE) - 1;
375
376	return efx_nic_probe_eventq(channel);
377}
378
379/* Prepare channel's event queue */
380static void efx_init_eventq(struct efx_channel *channel)
381{
382	netif_dbg(channel->efx, drv, channel->efx->net_dev,
383		  "chan %d init event queue\n", channel->channel);
384
385	channel->eventq_read_ptr = 0;
386
387	efx_nic_init_eventq(channel);
388}
389
390static void efx_fini_eventq(struct efx_channel *channel)
391{
392	netif_dbg(channel->efx, drv, channel->efx->net_dev,
393		  "chan %d fini event queue\n", channel->channel);
394
395	efx_nic_fini_eventq(channel);
396}
397
398static void efx_remove_eventq(struct efx_channel *channel)
399{
400	netif_dbg(channel->efx, drv, channel->efx->net_dev,
401		  "chan %d remove event queue\n", channel->channel);
402
403	efx_nic_remove_eventq(channel);
404}
405
406/**************************************************************************
407 *
408 * Channel handling
409 *
410 *************************************************************************/
411
412/* Allocate and initialise a channel structure, optionally copying
413 * parameters (but not resources) from an old channel structure. */
414static struct efx_channel *
415efx_alloc_channel(struct efx_nic *efx, int i, struct efx_channel *old_channel)
416{
417	struct efx_channel *channel;
418	struct efx_rx_queue *rx_queue;
419	struct efx_tx_queue *tx_queue;
420	int j;
421
422	if (old_channel) {
423		channel = kmalloc(sizeof(*channel), GFP_KERNEL);
424		if (!channel)
425			return NULL;
426
427		*channel = *old_channel;
428
429		channel->napi_dev = NULL;
430		memset(&channel->eventq, 0, sizeof(channel->eventq));
431
432		rx_queue = &channel->rx_queue;
433		rx_queue->buffer = NULL;
434		memset(&rx_queue->rxd, 0, sizeof(rx_queue->rxd));
435
436		for (j = 0; j < EFX_TXQ_TYPES; j++) {
437			tx_queue = &channel->tx_queue[j];
438			if (tx_queue->channel)
439				tx_queue->channel = channel;
440			tx_queue->buffer = NULL;
441			memset(&tx_queue->txd, 0, sizeof(tx_queue->txd));
442		}
443	} else {
444		channel = kzalloc(sizeof(*channel), GFP_KERNEL);
445		if (!channel)
446			return NULL;
447
448		channel->efx = efx;
449		channel->channel = i;
450
451		for (j = 0; j < EFX_TXQ_TYPES; j++) {
452			tx_queue = &channel->tx_queue[j];
453			tx_queue->efx = efx;
454			tx_queue->queue = i * EFX_TXQ_TYPES + j;
455			tx_queue->channel = channel;
456		}
457	}
458
459	rx_queue = &channel->rx_queue;
460	rx_queue->efx = efx;
461	setup_timer(&rx_queue->slow_fill, efx_rx_slow_fill,
462		    (unsigned long)rx_queue);
463
464	return channel;
465}
466
467static int efx_probe_channel(struct efx_channel *channel)
468{
469	struct efx_tx_queue *tx_queue;
470	struct efx_rx_queue *rx_queue;
471	int rc;
472
473	netif_dbg(channel->efx, probe, channel->efx->net_dev,
474		  "creating channel %d\n", channel->channel);
475
476	rc = efx_probe_eventq(channel);
477	if (rc)
478		goto fail1;
479
480	efx_for_each_channel_tx_queue(tx_queue, channel) {
481		rc = efx_probe_tx_queue(tx_queue);
482		if (rc)
483			goto fail2;
484	}
485
486	efx_for_each_channel_rx_queue(rx_queue, channel) {
487		rc = efx_probe_rx_queue(rx_queue);
488		if (rc)
489			goto fail3;
490	}
491
492	channel->n_rx_frm_trunc = 0;
493
494	return 0;
495
496 fail3:
497	efx_for_each_channel_rx_queue(rx_queue, channel)
498		efx_remove_rx_queue(rx_queue);
499 fail2:
500	efx_for_each_channel_tx_queue(tx_queue, channel)
501		efx_remove_tx_queue(tx_queue);
502 fail1:
503	return rc;
504}
505
506
507static void efx_set_channel_names(struct efx_nic *efx)
508{
509	struct efx_channel *channel;
510	const char *type = "";
511	int number;
512
513	efx_for_each_channel(channel, efx) {
514		number = channel->channel;
515		if (efx->n_channels > efx->n_rx_channels) {
516			if (channel->channel < efx->n_rx_channels) {
517				type = "-rx";
518			} else {
519				type = "-tx";
520				number -= efx->n_rx_channels;
521			}
522		}
523		snprintf(efx->channel_name[channel->channel],
524			 sizeof(efx->channel_name[0]),
525			 "%s%s-%d", efx->name, type, number);
526	}
527}
528
529static int efx_probe_channels(struct efx_nic *efx)
530{
531	struct efx_channel *channel;
532	int rc;
533
534	/* Restart special buffer allocation */
535	efx->next_buffer_table = 0;
536
537	efx_for_each_channel(channel, efx) {
538		rc = efx_probe_channel(channel);
539		if (rc) {
540			netif_err(efx, probe, efx->net_dev,
541				  "failed to create channel %d\n",
542				  channel->channel);
543			goto fail;
544		}
545	}
546	efx_set_channel_names(efx);
547
548	return 0;
549
550fail:
551	efx_remove_channels(efx);
552	return rc;
553}
554
555/* Channels are shutdown and reinitialised whilst the NIC is running
556 * to propagate configuration changes (mtu, checksum offload), or
557 * to clear hardware error conditions
558 */
559static void efx_init_channels(struct efx_nic *efx)
560{
561	struct efx_tx_queue *tx_queue;
562	struct efx_rx_queue *rx_queue;
563	struct efx_channel *channel;
564
565	/* Calculate the rx buffer allocation parameters required to
566	 * support the current MTU, including padding for header
567	 * alignment and overruns.
568	 */
569	efx->rx_buffer_len = (max(EFX_PAGE_IP_ALIGN, NET_IP_ALIGN) +
570			      EFX_MAX_FRAME_LEN(efx->net_dev->mtu) +
571			      efx->type->rx_buffer_hash_size +
572			      efx->type->rx_buffer_padding);
573	efx->rx_buffer_order = get_order(efx->rx_buffer_len +
574					 sizeof(struct efx_rx_page_state));
575
576	/* Initialise the channels */
577	efx_for_each_channel(channel, efx) {
578		netif_dbg(channel->efx, drv, channel->efx->net_dev,
579			  "init chan %d\n", channel->channel);
580
581		efx_init_eventq(channel);
582
583		efx_for_each_channel_tx_queue(tx_queue, channel)
584			efx_init_tx_queue(tx_queue);
585
586		/* The rx buffer allocation strategy is MTU dependent */
587		efx_rx_strategy(channel);
588
589		efx_for_each_channel_rx_queue(rx_queue, channel)
590			efx_init_rx_queue(rx_queue);
591
592		WARN_ON(channel->rx_pkt != NULL);
593		efx_rx_strategy(channel);
594	}
595}
596
597/* This enables event queue processing and packet transmission.
598 *
599 * Note that this function is not allowed to fail, since that would
600 * introduce too much complexity into the suspend/resume path.
601 */
602static void efx_start_channel(struct efx_channel *channel)
603{
604	struct efx_rx_queue *rx_queue;
605
606	netif_dbg(channel->efx, ifup, channel->efx->net_dev,
607		  "starting chan %d\n", channel->channel);
608
609	/* The interrupt handler for this channel may set work_pending
610	 * as soon as we enable it.  Make sure it's cleared before
611	 * then.  Similarly, make sure it sees the enabled flag set. */
612	channel->work_pending = false;
613	channel->enabled = true;
614	smp_wmb();
615
616	/* Fill the queues before enabling NAPI */
617	efx_for_each_channel_rx_queue(rx_queue, channel)
618		efx_fast_push_rx_descriptors(rx_queue);
619
620	napi_enable(&channel->napi_str);
621}
622
623/* This disables event queue processing and packet transmission.
624 * This function does not guarantee that all queue processing
625 * (e.g. RX refill) is complete.
626 */
627static void efx_stop_channel(struct efx_channel *channel)
628{
629	if (!channel->enabled)
630		return;
631
632	netif_dbg(channel->efx, ifdown, channel->efx->net_dev,
633		  "stop chan %d\n", channel->channel);
634
635	channel->enabled = false;
636	napi_disable(&channel->napi_str);
637}
638
639static void efx_fini_channels(struct efx_nic *efx)
640{
641	struct efx_channel *channel;
642	struct efx_tx_queue *tx_queue;
643	struct efx_rx_queue *rx_queue;
644	int rc;
645
646	EFX_ASSERT_RESET_SERIALISED(efx);
647	BUG_ON(efx->port_enabled);
648
649	rc = efx_nic_flush_queues(efx);
650	if (rc && EFX_WORKAROUND_7803(efx)) {
651		/* Schedule a reset to recover from the flush failure. The
652		 * descriptor caches reference memory we're about to free,
653		 * but falcon_reconfigure_mac_wrapper() won't reconnect
654		 * the MACs because of the pending reset. */
655		netif_err(efx, drv, efx->net_dev,
656			  "Resetting to recover from flush failure\n");
657		efx_schedule_reset(efx, RESET_TYPE_ALL);
658	} else if (rc) {
659		netif_err(efx, drv, efx->net_dev, "failed to flush queues\n");
660	} else {
661		netif_dbg(efx, drv, efx->net_dev,
662			  "successfully flushed all queues\n");
663	}
664
665	efx_for_each_channel(channel, efx) {
666		netif_dbg(channel->efx, drv, channel->efx->net_dev,
667			  "shut down chan %d\n", channel->channel);
668
669		efx_for_each_channel_rx_queue(rx_queue, channel)
670			efx_fini_rx_queue(rx_queue);
671		efx_for_each_possible_channel_tx_queue(tx_queue, channel)
672			efx_fini_tx_queue(tx_queue);
673		efx_fini_eventq(channel);
674	}
675}
676
677static void efx_remove_channel(struct efx_channel *channel)
678{
679	struct efx_tx_queue *tx_queue;
680	struct efx_rx_queue *rx_queue;
681
682	netif_dbg(channel->efx, drv, channel->efx->net_dev,
683		  "destroy chan %d\n", channel->channel);
684
685	efx_for_each_channel_rx_queue(rx_queue, channel)
686		efx_remove_rx_queue(rx_queue);
687	efx_for_each_possible_channel_tx_queue(tx_queue, channel)
688		efx_remove_tx_queue(tx_queue);
689	efx_remove_eventq(channel);
690}
691
692static void efx_remove_channels(struct efx_nic *efx)
693{
694	struct efx_channel *channel;
695
696	efx_for_each_channel(channel, efx)
697		efx_remove_channel(channel);
698}
699
700int
701efx_realloc_channels(struct efx_nic *efx, u32 rxq_entries, u32 txq_entries)
702{
703	struct efx_channel *other_channel[EFX_MAX_CHANNELS], *channel;
704	u32 old_rxq_entries, old_txq_entries;
705	unsigned i;
706	int rc;
707
708	efx_stop_all(efx);
709	efx_fini_channels(efx);
710
711	/* Clone channels */
712	memset(other_channel, 0, sizeof(other_channel));
713	for (i = 0; i < efx->n_channels; i++) {
714		channel = efx_alloc_channel(efx, i, efx->channel[i]);
715		if (!channel) {
716			rc = -ENOMEM;
717			goto out;
718		}
719		other_channel[i] = channel;
720	}
721
722	/* Swap entry counts and channel pointers */
723	old_rxq_entries = efx->rxq_entries;
724	old_txq_entries = efx->txq_entries;
725	efx->rxq_entries = rxq_entries;
726	efx->txq_entries = txq_entries;
727	for (i = 0; i < efx->n_channels; i++) {
728		channel = efx->channel[i];
729		efx->channel[i] = other_channel[i];
730		other_channel[i] = channel;
731	}
732
733	rc = efx_probe_channels(efx);
734	if (rc)
735		goto rollback;
736
737	efx_init_napi(efx);
738
739	/* Destroy old channels */
740	for (i = 0; i < efx->n_channels; i++) {
741		efx_fini_napi_channel(other_channel[i]);
742		efx_remove_channel(other_channel[i]);
743	}
744out:
745	/* Free unused channel structures */
746	for (i = 0; i < efx->n_channels; i++)
747		kfree(other_channel[i]);
748
749	efx_init_channels(efx);
750	efx_start_all(efx);
751	return rc;
752
753rollback:
754	/* Swap back */
755	efx->rxq_entries = old_rxq_entries;
756	efx->txq_entries = old_txq_entries;
757	for (i = 0; i < efx->n_channels; i++) {
758		channel = efx->channel[i];
759		efx->channel[i] = other_channel[i];
760		other_channel[i] = channel;
761	}
762	goto out;
763}
764
765void efx_schedule_slow_fill(struct efx_rx_queue *rx_queue)
766{
767	mod_timer(&rx_queue->slow_fill, jiffies + msecs_to_jiffies(100));
768}
769
770/**************************************************************************
771 *
772 * Port handling
773 *
774 **************************************************************************/
775
776/* This ensures that the kernel is kept informed (via
777 * netif_carrier_on/off) of the link status, and also maintains the
778 * link status's stop on the port's TX queue.
779 */
780void efx_link_status_changed(struct efx_nic *efx)
781{
782	struct efx_link_state *link_state = &efx->link_state;
783
784	/* SFC Bug 5356: A net_dev notifier is registered, so we must ensure
785	 * that no events are triggered between unregister_netdev() and the
786	 * driver unloading. A more general condition is that NETDEV_CHANGE
787	 * can only be generated between NETDEV_UP and NETDEV_DOWN */
788	if (!netif_running(efx->net_dev))
789		return;
790
791	if (link_state->up != netif_carrier_ok(efx->net_dev)) {
792		efx->n_link_state_changes++;
793
794		if (link_state->up)
795			netif_carrier_on(efx->net_dev);
796		else
797			netif_carrier_off(efx->net_dev);
798	}
799
800	/* Status message for kernel log */
801	if (link_state->up)
802		netif_info(efx, link, efx->net_dev,
803			   "link up at %uMbps %s-duplex (MTU %d)%s\n",
804			   link_state->speed, link_state->fd ? "full" : "half",
805			   efx->net_dev->mtu,
806			   (efx->promiscuous ? " [PROMISC]" : ""));
807	else
808		netif_info(efx, link, efx->net_dev, "link down\n");
809}
810
811void efx_link_set_advertising(struct efx_nic *efx, u32 advertising)
812{
813	efx->link_advertising = advertising;
814	if (advertising) {
815		if (advertising & ADVERTISED_Pause)
816			efx->wanted_fc |= (EFX_FC_TX | EFX_FC_RX);
817		else
818			efx->wanted_fc &= ~(EFX_FC_TX | EFX_FC_RX);
819		if (advertising & ADVERTISED_Asym_Pause)
820			efx->wanted_fc ^= EFX_FC_TX;
821	}
822}
823
824void efx_link_set_wanted_fc(struct efx_nic *efx, u8 wanted_fc)
825{
826	efx->wanted_fc = wanted_fc;
827	if (efx->link_advertising) {
828		if (wanted_fc & EFX_FC_RX)
829			efx->link_advertising |= (ADVERTISED_Pause |
830						  ADVERTISED_Asym_Pause);
831		else
832			efx->link_advertising &= ~(ADVERTISED_Pause |
833						   ADVERTISED_Asym_Pause);
834		if (wanted_fc & EFX_FC_TX)
835			efx->link_advertising ^= ADVERTISED_Asym_Pause;
836	}
837}
838
839static void efx_fini_port(struct efx_nic *efx);
840
841/* Push loopback/power/transmit disable settings to the PHY, and reconfigure
842 * the MAC appropriately. All other PHY configuration changes are pushed
843 * through phy_op->set_settings(), and pushed asynchronously to the MAC
844 * through efx_monitor().
845 *
846 * Callers must hold the mac_lock
847 */
848int __efx_reconfigure_port(struct efx_nic *efx)
849{
850	enum efx_phy_mode phy_mode;
851	int rc;
852
853	WARN_ON(!mutex_is_locked(&efx->mac_lock));
854
855	/* Serialise the promiscuous flag with efx_set_rx_mode. */
856	netif_addr_lock_bh(efx->net_dev);
857	netif_addr_unlock_bh(efx->net_dev);
858
859	/* Disable PHY transmit in mac level loopbacks */
860	phy_mode = efx->phy_mode;
861	if (LOOPBACK_INTERNAL(efx))
862		efx->phy_mode |= PHY_MODE_TX_DISABLED;
863	else
864		efx->phy_mode &= ~PHY_MODE_TX_DISABLED;
865
866	rc = efx->type->reconfigure_port(efx);
867
868	if (rc)
869		efx->phy_mode = phy_mode;
870
871	return rc;
872}
873
874/* Reinitialise the MAC to pick up new PHY settings, even if the port is
875 * disabled. */
876int efx_reconfigure_port(struct efx_nic *efx)
877{
878	int rc;
879
880	EFX_ASSERT_RESET_SERIALISED(efx);
881
882	mutex_lock(&efx->mac_lock);
883	rc = __efx_reconfigure_port(efx);
884	mutex_unlock(&efx->mac_lock);
885
886	return rc;
887}
888
889/* Asynchronous work item for changing MAC promiscuity and multicast
890 * hash.  Avoid a drain/rx_ingress enable by reconfiguring the current
891 * MAC directly. */
892static void efx_mac_work(struct work_struct *data)
893{
894	struct efx_nic *efx = container_of(data, struct efx_nic, mac_work);
895
896	mutex_lock(&efx->mac_lock);
897	if (efx->port_enabled)
898		efx->type->reconfigure_mac(efx);
899	mutex_unlock(&efx->mac_lock);
900}
901
902static int efx_probe_port(struct efx_nic *efx)
903{
904	int rc;
905
906	netif_dbg(efx, probe, efx->net_dev, "create port\n");
907
908	if (phy_flash_cfg)
909		efx->phy_mode = PHY_MODE_SPECIAL;
910
911	/* Connect up MAC/PHY operations table */
912	rc = efx->type->probe_port(efx);
913	if (rc)
914		return rc;
915
916	/* Initialise MAC address to permanent address */
917	memcpy(efx->net_dev->dev_addr, efx->net_dev->perm_addr, ETH_ALEN);
918
919	return 0;
920}
921
922static int efx_init_port(struct efx_nic *efx)
923{
924	int rc;
925
926	netif_dbg(efx, drv, efx->net_dev, "init port\n");
927
928	mutex_lock(&efx->mac_lock);
929
930	rc = efx->phy_op->init(efx);
931	if (rc)
932		goto fail1;
933
934	efx->port_initialized = true;
935
936	/* Reconfigure the MAC before creating dma queues (required for
937	 * Falcon/A1 where RX_INGR_EN/TX_DRAIN_EN isn't supported) */
938	efx->type->reconfigure_mac(efx);
939
940	/* Ensure the PHY advertises the correct flow control settings */
941	rc = efx->phy_op->reconfigure(efx);
942	if (rc)
943		goto fail2;
944
945	mutex_unlock(&efx->mac_lock);
946	return 0;
947
948fail2:
949	efx->phy_op->fini(efx);
950fail1:
951	mutex_unlock(&efx->mac_lock);
952	return rc;
953}
954
955static void efx_start_port(struct efx_nic *efx)
956{
957	netif_dbg(efx, ifup, efx->net_dev, "start port\n");
958	BUG_ON(efx->port_enabled);
959
960	mutex_lock(&efx->mac_lock);
961	efx->port_enabled = true;
962
963	/* efx_mac_work() might have been scheduled after efx_stop_port(),
964	 * and then cancelled by efx_flush_all() */
965	efx->type->reconfigure_mac(efx);
966
967	mutex_unlock(&efx->mac_lock);
968}
969
970/* Prevent efx_mac_work() and efx_monitor() from working */
971static void efx_stop_port(struct efx_nic *efx)
972{
973	netif_dbg(efx, ifdown, efx->net_dev, "stop port\n");
974
975	mutex_lock(&efx->mac_lock);
976	efx->port_enabled = false;
977	mutex_unlock(&efx->mac_lock);
978
979	/* Serialise against efx_set_multicast_list() */
980	netif_addr_lock_bh(efx->net_dev);
981	netif_addr_unlock_bh(efx->net_dev);
982}
983
984static void efx_fini_port(struct efx_nic *efx)
985{
986	netif_dbg(efx, drv, efx->net_dev, "shut down port\n");
987
988	if (!efx->port_initialized)
989		return;
990
991	efx->phy_op->fini(efx);
992	efx->port_initialized = false;
993
994	efx->link_state.up = false;
995	efx_link_status_changed(efx);
996}
997
998static void efx_remove_port(struct efx_nic *efx)
999{
1000	netif_dbg(efx, drv, efx->net_dev, "destroying port\n");
1001
1002	efx->type->remove_port(efx);
1003}
1004
1005/**************************************************************************
1006 *
1007 * NIC handling
1008 *
1009 **************************************************************************/
1010
1011/* This configures the PCI device to enable I/O and DMA. */
1012static int efx_init_io(struct efx_nic *efx)
1013{
1014	struct pci_dev *pci_dev = efx->pci_dev;
1015	dma_addr_t dma_mask = efx->type->max_dma_mask;
1016	int rc;
1017
1018	netif_dbg(efx, probe, efx->net_dev, "initialising I/O\n");
1019
1020	rc = pci_enable_device(pci_dev);
1021	if (rc) {
1022		netif_err(efx, probe, efx->net_dev,
1023			  "failed to enable PCI device\n");
1024		goto fail1;
1025	}
1026
1027	pci_set_master(pci_dev);
1028
1029	/* Set the PCI DMA mask.  Try all possibilities from our
1030	 * genuine mask down to 32 bits, because some architectures
1031	 * (e.g. x86_64 with iommu_sac_force set) will allow 40 bit
1032	 * masks event though they reject 46 bit masks.
1033	 */
1034	while (dma_mask > 0x7fffffffUL) {
1035		if (pci_dma_supported(pci_dev, dma_mask)) {
1036			rc = pci_set_dma_mask(pci_dev, dma_mask);
1037			if (rc == 0)
1038				break;
1039		}
1040		dma_mask >>= 1;
1041	}
1042	if (rc) {
1043		netif_err(efx, probe, efx->net_dev,
1044			  "could not find a suitable DMA mask\n");
1045		goto fail2;
1046	}
1047	netif_dbg(efx, probe, efx->net_dev,
1048		  "using DMA mask %llx\n", (unsigned long long) dma_mask);
1049	rc = pci_set_consistent_dma_mask(pci_dev, dma_mask);
1050	if (rc) {
1051		/* pci_set_consistent_dma_mask() is not *allowed* to
1052		 * fail with a mask that pci_set_dma_mask() accepted,
1053		 * but just in case...
1054		 */
1055		netif_err(efx, probe, efx->net_dev,
1056			  "failed to set consistent DMA mask\n");
1057		goto fail2;
1058	}
1059
1060	efx->membase_phys = pci_resource_start(efx->pci_dev, EFX_MEM_BAR);
1061	rc = pci_request_region(pci_dev, EFX_MEM_BAR, "sfc");
1062	if (rc) {
1063		netif_err(efx, probe, efx->net_dev,
1064			  "request for memory BAR failed\n");
1065		rc = -EIO;
1066		goto fail3;
1067	}
1068	efx->membase = ioremap_nocache(efx->membase_phys,
1069				       efx->type->mem_map_size);
1070	if (!efx->membase) {
1071		netif_err(efx, probe, efx->net_dev,
1072			  "could not map memory BAR at %llx+%x\n",
1073			  (unsigned long long)efx->membase_phys,
1074			  efx->type->mem_map_size);
1075		rc = -ENOMEM;
1076		goto fail4;
1077	}
1078	netif_dbg(efx, probe, efx->net_dev,
1079		  "memory BAR at %llx+%x (virtual %p)\n",
1080		  (unsigned long long)efx->membase_phys,
1081		  efx->type->mem_map_size, efx->membase);
1082
1083	return 0;
1084
1085 fail4:
1086	pci_release_region(efx->pci_dev, EFX_MEM_BAR);
1087 fail3:
1088	efx->membase_phys = 0;
1089 fail2:
1090	pci_disable_device(efx->pci_dev);
1091 fail1:
1092	return rc;
1093}
1094
1095static void efx_fini_io(struct efx_nic *efx)
1096{
1097	netif_dbg(efx, drv, efx->net_dev, "shutting down I/O\n");
1098
1099	if (efx->membase) {
1100		iounmap(efx->membase);
1101		efx->membase = NULL;
1102	}
1103
1104	if (efx->membase_phys) {
1105		pci_release_region(efx->pci_dev, EFX_MEM_BAR);
1106		efx->membase_phys = 0;
1107	}
1108
1109	pci_disable_device(efx->pci_dev);
1110}
1111
1112static int efx_wanted_parallelism(void)
1113{
1114	cpumask_var_t thread_mask;
1115	int count;
1116	int cpu;
1117
1118	if (rss_cpus)
1119		return rss_cpus;
1120
1121	if (unlikely(!zalloc_cpumask_var(&thread_mask, GFP_KERNEL))) {
1122		printk(KERN_WARNING
1123		       "sfc: RSS disabled due to allocation failure\n");
1124		return 1;
1125	}
1126
1127	count = 0;
1128	for_each_online_cpu(cpu) {
1129		if (!cpumask_test_cpu(cpu, thread_mask)) {
1130			++count;
1131			cpumask_or(thread_mask, thread_mask,
1132				   topology_thread_cpumask(cpu));
1133		}
1134	}
1135
1136	free_cpumask_var(thread_mask);
1137	return count;
1138}
1139
1140static int
1141efx_init_rx_cpu_rmap(struct efx_nic *efx, struct msix_entry *xentries)
1142{
1143#ifdef CONFIG_RFS_ACCEL
1144	int i, rc;
1145
1146	efx->net_dev->rx_cpu_rmap = alloc_irq_cpu_rmap(efx->n_rx_channels);
1147	if (!efx->net_dev->rx_cpu_rmap)
1148		return -ENOMEM;
1149	for (i = 0; i < efx->n_rx_channels; i++) {
1150		rc = irq_cpu_rmap_add(efx->net_dev->rx_cpu_rmap,
1151				      xentries[i].vector);
1152		if (rc) {
1153			free_irq_cpu_rmap(efx->net_dev->rx_cpu_rmap);
1154			efx->net_dev->rx_cpu_rmap = NULL;
1155			return rc;
1156		}
1157	}
1158#endif
1159	return 0;
1160}
1161
1162/* Probe the number and type of interrupts we are able to obtain, and
1163 * the resulting numbers of channels and RX queues.
1164 */
1165static int efx_probe_interrupts(struct efx_nic *efx)
1166{
1167	int max_channels =
1168		min_t(int, efx->type->phys_addr_channels, EFX_MAX_CHANNELS);
1169	int rc, i;
1170
1171	if (efx->interrupt_mode == EFX_INT_MODE_MSIX) {
1172		struct msix_entry xentries[EFX_MAX_CHANNELS];
1173		int n_channels;
1174
1175		n_channels = efx_wanted_parallelism();
1176		if (separate_tx_channels)
1177			n_channels *= 2;
1178		n_channels = min(n_channels, max_channels);
1179
1180		for (i = 0; i < n_channels; i++)
1181			xentries[i].entry = i;
1182		rc = pci_enable_msix(efx->pci_dev, xentries, n_channels);
1183		if (rc > 0) {
1184			netif_err(efx, drv, efx->net_dev,
1185				  "WARNING: Insufficient MSI-X vectors"
1186				  " available (%d < %d).\n", rc, n_channels);
1187			netif_err(efx, drv, efx->net_dev,
1188				  "WARNING: Performance may be reduced.\n");
1189			EFX_BUG_ON_PARANOID(rc >= n_channels);
1190			n_channels = rc;
1191			rc = pci_enable_msix(efx->pci_dev, xentries,
1192					     n_channels);
1193		}
1194
1195		if (rc == 0) {
1196			efx->n_channels = n_channels;
1197			if (separate_tx_channels) {
1198				efx->n_tx_channels =
1199					max(efx->n_channels / 2, 1U);
1200				efx->n_rx_channels =
1201					max(efx->n_channels -
1202					    efx->n_tx_channels, 1U);
1203			} else {
1204				efx->n_tx_channels = efx->n_channels;
1205				efx->n_rx_channels = efx->n_channels;
1206			}
1207			rc = efx_init_rx_cpu_rmap(efx, xentries);
1208			if (rc) {
1209				pci_disable_msix(efx->pci_dev);
1210				return rc;
1211			}
1212			for (i = 0; i < n_channels; i++)
1213				efx_get_channel(efx, i)->irq =
1214					xentries[i].vector;
1215		} else {
1216			/* Fall back to single channel MSI */
1217			efx->interrupt_mode = EFX_INT_MODE_MSI;
1218			netif_err(efx, drv, efx->net_dev,
1219				  "could not enable MSI-X\n");
1220		}
1221	}
1222
1223	/* Try single interrupt MSI */
1224	if (efx->interrupt_mode == EFX_INT_MODE_MSI) {
1225		efx->n_channels = 1;
1226		efx->n_rx_channels = 1;
1227		efx->n_tx_channels = 1;
1228		rc = pci_enable_msi(efx->pci_dev);
1229		if (rc == 0) {
1230			efx_get_channel(efx, 0)->irq = efx->pci_dev->irq;
1231		} else {
1232			netif_err(efx, drv, efx->net_dev,
1233				  "could not enable MSI\n");
1234			efx->interrupt_mode = EFX_INT_MODE_LEGACY;
1235		}
1236	}
1237
1238	/* Assume legacy interrupts */
1239	if (efx->interrupt_mode == EFX_INT_MODE_LEGACY) {
1240		efx->n_channels = 1 + (separate_tx_channels ? 1 : 0);
1241		efx->n_rx_channels = 1;
1242		efx->n_tx_channels = 1;
1243		efx->legacy_irq = efx->pci_dev->irq;
1244	}
1245
1246	return 0;
1247}
1248
1249static void efx_remove_interrupts(struct efx_nic *efx)
1250{
1251	struct efx_channel *channel;
1252
1253	/* Remove MSI/MSI-X interrupts */
1254	efx_for_each_channel(channel, efx)
1255		channel->irq = 0;
1256	pci_disable_msi(efx->pci_dev);
1257	pci_disable_msix(efx->pci_dev);
1258
1259	/* Remove legacy interrupt */
1260	efx->legacy_irq = 0;
1261}
1262
1263static void efx_set_channels(struct efx_nic *efx)
1264{
1265	struct efx_channel *channel;
1266	struct efx_tx_queue *tx_queue;
1267
1268	efx->tx_channel_offset =
1269		separate_tx_channels ? efx->n_channels - efx->n_tx_channels : 0;
1270
1271	/* We need to adjust the TX queue numbers if we have separate
1272	 * RX-only and TX-only channels.
1273	 */
1274	efx_for_each_channel(channel, efx) {
1275		efx_for_each_channel_tx_queue(tx_queue, channel)
1276			tx_queue->queue -= (efx->tx_channel_offset *
1277					    EFX_TXQ_TYPES);
1278	}
1279}
1280
1281static int efx_probe_nic(struct efx_nic *efx)
1282{
1283	size_t i;
1284	int rc;
1285
1286	netif_dbg(efx, probe, efx->net_dev, "creating NIC\n");
1287
1288	/* Carry out hardware-type specific initialisation */
1289	rc = efx->type->probe(efx);
1290	if (rc)
1291		return rc;
1292
1293	/* Determine the number of channels and queues by trying to hook
1294	 * in MSI-X interrupts. */
1295	rc = efx_probe_interrupts(efx);
1296	if (rc)
1297		goto fail;
1298
1299	if (efx->n_channels > 1)
1300		get_random_bytes(&efx->rx_hash_key, sizeof(efx->rx_hash_key));
1301	for (i = 0; i < ARRAY_SIZE(efx->rx_indir_table); i++)
1302		efx->rx_indir_table[i] =
1303			ethtool_rxfh_indir_default(i, efx->n_rx_channels);
1304
1305	efx_set_channels(efx);
1306	netif_set_real_num_tx_queues(efx->net_dev, efx->n_tx_channels);
1307	netif_set_real_num_rx_queues(efx->net_dev, efx->n_rx_channels);
1308
1309	/* Initialise the interrupt moderation settings */
1310	efx_init_irq_moderation(efx, tx_irq_mod_usec, rx_irq_mod_usec, true,
1311				true);
1312
1313	return 0;
1314
1315fail:
1316	efx->type->remove(efx);
1317	return rc;
1318}
1319
1320static void efx_remove_nic(struct efx_nic *efx)
1321{
1322	netif_dbg(efx, drv, efx->net_dev, "destroying NIC\n");
1323
1324	efx_remove_interrupts(efx);
1325	efx->type->remove(efx);
1326}
1327
1328/**************************************************************************
1329 *
1330 * NIC startup/shutdown
1331 *
1332 *************************************************************************/
1333
1334static int efx_probe_all(struct efx_nic *efx)
1335{
1336	int rc;
1337
1338	rc = efx_probe_nic(efx);
1339	if (rc) {
1340		netif_err(efx, probe, efx->net_dev, "failed to create NIC\n");
1341		goto fail1;
1342	}
1343
1344	rc = efx_probe_port(efx);
1345	if (rc) {
1346		netif_err(efx, probe, efx->net_dev, "failed to create port\n");
1347		goto fail2;
1348	}
1349
1350	efx->rxq_entries = efx->txq_entries = EFX_DEFAULT_DMAQ_SIZE;
1351	rc = efx_probe_channels(efx);
1352	if (rc)
1353		goto fail3;
1354
1355	rc = efx_probe_filters(efx);
1356	if (rc) {
1357		netif_err(efx, probe, efx->net_dev,
1358			  "failed to create filter tables\n");
1359		goto fail4;
1360	}
1361
1362	return 0;
1363
1364 fail4:
1365	efx_remove_channels(efx);
1366 fail3:
1367	efx_remove_port(efx);
1368 fail2:
1369	efx_remove_nic(efx);
1370 fail1:
1371	return rc;
1372}
1373
1374/* Called after previous invocation(s) of efx_stop_all, restarts the
1375 * port, kernel transmit queue, NAPI processing and hardware interrupts,
1376 * and ensures that the port is scheduled to be reconfigured.
1377 * This function is safe to call multiple times when the NIC is in any
1378 * state. */
1379static void efx_start_all(struct efx_nic *efx)
1380{
1381	struct efx_channel *channel;
1382
1383	EFX_ASSERT_RESET_SERIALISED(efx);
1384
1385	/* Check that it is appropriate to restart the interface. All
1386	 * of these flags are safe to read under just the rtnl lock */
1387	if (efx->port_enabled)
1388		return;
1389	if ((efx->state != STATE_RUNNING) && (efx->state != STATE_INIT))
1390		return;
1391	if (!netif_running(efx->net_dev))
1392		return;
1393
1394	/* Mark the port as enabled so port reconfigurations can start, then
1395	 * restart the transmit interface early so the watchdog timer stops */
1396	efx_start_port(efx);
1397
1398	if (netif_device_present(efx->net_dev))
1399		netif_tx_wake_all_queues(efx->net_dev);
1400
1401	efx_for_each_channel(channel, efx)
1402		efx_start_channel(channel);
1403
1404	if (efx->legacy_irq)
1405		efx->legacy_irq_enabled = true;
1406	efx_nic_enable_interrupts(efx);
1407
1408	/* Switch to event based MCDI completions after enabling interrupts.
1409	 * If a reset has been scheduled, then we need to stay in polled mode.
1410	 * Rather than serialising efx_mcdi_mode_event() [which sleeps] and
1411	 * reset_pending [modified from an atomic context], we instead guarantee
1412	 * that efx_mcdi_mode_poll() isn't reverted erroneously */
1413	efx_mcdi_mode_event(efx);
1414	if (efx->reset_pending)
1415		efx_mcdi_mode_poll(efx);
1416
1417	/* Start the hardware monitor if there is one. Otherwise (we're link
1418	 * event driven), we have to poll the PHY because after an event queue
1419	 * flush, we could have a missed a link state change */
1420	if (efx->type->monitor != NULL) {
1421		queue_delayed_work(efx->workqueue, &efx->monitor_work,
1422				   efx_monitor_interval);
1423	} else {
1424		mutex_lock(&efx->mac_lock);
1425		if (efx->phy_op->poll(efx))
1426			efx_link_status_changed(efx);
1427		mutex_unlock(&efx->mac_lock);
1428	}
1429
1430	efx->type->start_stats(efx);
1431}
1432
1433/* Flush all delayed work. Should only be called when no more delayed work
1434 * will be scheduled. This doesn't flush pending online resets (efx_reset),
1435 * since we're holding the rtnl_lock at this point. */
1436static void efx_flush_all(struct efx_nic *efx)
1437{
1438	/* Make sure the hardware monitor is stopped */
1439	cancel_delayed_work_sync(&efx->monitor_work);
1440	/* Stop scheduled port reconfigurations */
1441	cancel_work_sync(&efx->mac_work);
1442}
1443
1444/* Quiesce hardware and software without bringing the link down.
1445 * Safe to call multiple times, when the nic and interface is in any
1446 * state. The caller is guaranteed to subsequently be in a position
1447 * to modify any hardware and software state they see fit without
1448 * taking locks. */
1449static void efx_stop_all(struct efx_nic *efx)
1450{
1451	struct efx_channel *channel;
1452
1453	EFX_ASSERT_RESET_SERIALISED(efx);
1454
1455	/* port_enabled can be read safely under the rtnl lock */
1456	if (!efx->port_enabled)
1457		return;
1458
1459	efx->type->stop_stats(efx);
1460
1461	/* Switch to MCDI polling on Siena before disabling interrupts */
1462	efx_mcdi_mode_poll(efx);
1463
1464	/* Disable interrupts and wait for ISR to complete */
1465	efx_nic_disable_interrupts(efx);
1466	if (efx->legacy_irq) {
1467		synchronize_irq(efx->legacy_irq);
1468		efx->legacy_irq_enabled = false;
1469	}
1470	efx_for_each_channel(channel, efx) {
1471		if (channel->irq)
1472			synchronize_irq(channel->irq);
1473	}
1474
1475	/* Stop all NAPI processing and synchronous rx refills */
1476	efx_for_each_channel(channel, efx)
1477		efx_stop_channel(channel);
1478
1479	/* Stop all asynchronous port reconfigurations. Since all
1480	 * event processing has already been stopped, there is no
1481	 * window to loose phy events */
1482	efx_stop_port(efx);
1483
1484	/* Flush efx_mac_work(), refill_workqueue, monitor_work */
1485	efx_flush_all(efx);
1486
1487	/* Stop the kernel transmit interface late, so the watchdog
1488	 * timer isn't ticking over the flush */
1489	netif_tx_stop_all_queues(efx->net_dev);
1490	netif_tx_lock_bh(efx->net_dev);
1491	netif_tx_unlock_bh(efx->net_dev);
1492}
1493
1494static void efx_remove_all(struct efx_nic *efx)
1495{
1496	efx_remove_filters(efx);
1497	efx_remove_channels(efx);
1498	efx_remove_port(efx);
1499	efx_remove_nic(efx);
1500}
1501
1502/**************************************************************************
1503 *
1504 * Interrupt moderation
1505 *
1506 **************************************************************************/
1507
1508static unsigned int irq_mod_ticks(unsigned int usecs, unsigned int quantum_ns)
1509{
1510	if (usecs == 0)
1511		return 0;
1512	if (usecs * 1000 < quantum_ns)
1513		return 1; /* never round down to 0 */
1514	return usecs * 1000 / quantum_ns;
1515}
1516
1517/* Set interrupt moderation parameters */
1518int efx_init_irq_moderation(struct efx_nic *efx, unsigned int tx_usecs,
1519			    unsigned int rx_usecs, bool rx_adaptive,
1520			    bool rx_may_override_tx)
1521{
1522	struct efx_channel *channel;
1523	unsigned int irq_mod_max = DIV_ROUND_UP(efx->type->timer_period_max *
1524						efx->timer_quantum_ns,
1525						1000);
1526	unsigned int tx_ticks;
1527	unsigned int rx_ticks;
1528
1529	EFX_ASSERT_RESET_SERIALISED(efx);
1530
1531	if (tx_usecs > irq_mod_max || rx_usecs > irq_mod_max)
1532		return -EINVAL;
1533
1534	tx_ticks = irq_mod_ticks(tx_usecs, efx->timer_quantum_ns);
1535	rx_ticks = irq_mod_ticks(rx_usecs, efx->timer_quantum_ns);
1536
1537	if (tx_ticks != rx_ticks && efx->tx_channel_offset == 0 &&
1538	    !rx_may_override_tx) {
1539		netif_err(efx, drv, efx->net_dev, "Channels are shared. "
1540			  "RX and TX IRQ moderation must be equal\n");
1541		return -EINVAL;
1542	}
1543
1544	efx->irq_rx_adaptive = rx_adaptive;
1545	efx->irq_rx_moderation = rx_ticks;
1546	efx_for_each_channel(channel, efx) {
1547		if (efx_channel_has_rx_queue(channel))
1548			channel->irq_moderation = rx_ticks;
1549		else if (efx_channel_has_tx_queues(channel))
1550			channel->irq_moderation = tx_ticks;
1551	}
1552
1553	return 0;
1554}
1555
1556void efx_get_irq_moderation(struct efx_nic *efx, unsigned int *tx_usecs,
1557			    unsigned int *rx_usecs, bool *rx_adaptive)
1558{
1559	/* We must round up when converting ticks to microseconds
1560	 * because we round down when converting the other way.
1561	 */
1562
1563	*rx_adaptive = efx->irq_rx_adaptive;
1564	*rx_usecs = DIV_ROUND_UP(efx->irq_rx_moderation *
1565				 efx->timer_quantum_ns,
1566				 1000);
1567
1568	/* If channels are shared between RX and TX, so is IRQ
1569	 * moderation.  Otherwise, IRQ moderation is the same for all
1570	 * TX channels and is not adaptive.
1571	 */
1572	if (efx->tx_channel_offset == 0)
1573		*tx_usecs = *rx_usecs;
1574	else
1575		*tx_usecs = DIV_ROUND_UP(
1576			efx->channel[efx->tx_channel_offset]->irq_moderation *
1577			efx->timer_quantum_ns,
1578			1000);
1579}
1580
1581/**************************************************************************
1582 *
1583 * Hardware monitor
1584 *
1585 **************************************************************************/
1586
1587/* Run periodically off the general workqueue */
1588static void efx_monitor(struct work_struct *data)
1589{
1590	struct efx_nic *efx = container_of(data, struct efx_nic,
1591					   monitor_work.work);
1592
1593	netif_vdbg(efx, timer, efx->net_dev,
1594		   "hardware monitor executing on CPU %d\n",
1595		   raw_smp_processor_id());
1596	BUG_ON(efx->type->monitor == NULL);
1597
1598	/* If the mac_lock is already held then it is likely a port
1599	 * reconfiguration is already in place, which will likely do
1600	 * most of the work of monitor() anyway. */
1601	if (mutex_trylock(&efx->mac_lock)) {
1602		if (efx->port_enabled)
1603			efx->type->monitor(efx);
1604		mutex_unlock(&efx->mac_lock);
1605	}
1606
1607	queue_delayed_work(efx->workqueue, &efx->monitor_work,
1608			   efx_monitor_interval);
1609}
1610
1611/**************************************************************************
1612 *
1613 * ioctls
1614 *
1615 *************************************************************************/
1616
1617/* Net device ioctl
1618 * Context: process, rtnl_lock() held.
1619 */
1620static int efx_ioctl(struct net_device *net_dev, struct ifreq *ifr, int cmd)
1621{
1622	struct efx_nic *efx = netdev_priv(net_dev);
1623	struct mii_ioctl_data *data = if_mii(ifr);
1624
1625	EFX_ASSERT_RESET_SERIALISED(efx);
1626
1627	/* Convert phy_id from older PRTAD/DEVAD format */
1628	if ((cmd == SIOCGMIIREG || cmd == SIOCSMIIREG) &&
1629	    (data->phy_id & 0xfc00) == 0x0400)
1630		data->phy_id ^= MDIO_PHY_ID_C45 | 0x0400;
1631
1632	return mdio_mii_ioctl(&efx->mdio, data, cmd);
1633}
1634
1635/**************************************************************************
1636 *
1637 * NAPI interface
1638 *
1639 **************************************************************************/
1640
1641static void efx_init_napi(struct efx_nic *efx)
1642{
1643	struct efx_channel *channel;
1644
1645	efx_for_each_channel(channel, efx) {
1646		channel->napi_dev = efx->net_dev;
1647		netif_napi_add(channel->napi_dev, &channel->napi_str,
1648			       efx_poll, napi_weight);
1649	}
1650}
1651
1652static void efx_fini_napi_channel(struct efx_channel *channel)
1653{
1654	if (channel->napi_dev)
1655		netif_napi_del(&channel->napi_str);
1656	channel->napi_dev = NULL;
1657}
1658
1659static void efx_fini_napi(struct efx_nic *efx)
1660{
1661	struct efx_channel *channel;
1662
1663	efx_for_each_channel(channel, efx)
1664		efx_fini_napi_channel(channel);
1665}
1666
1667/**************************************************************************
1668 *
1669 * Kernel netpoll interface
1670 *
1671 *************************************************************************/
1672
1673#ifdef CONFIG_NET_POLL_CONTROLLER
1674
1675/* Although in the common case interrupts will be disabled, this is not
1676 * guaranteed. However, all our work happens inside the NAPI callback,
1677 * so no locking is required.
1678 */
1679static void efx_netpoll(struct net_device *net_dev)
1680{
1681	struct efx_nic *efx = netdev_priv(net_dev);
1682	struct efx_channel *channel;
1683
1684	efx_for_each_channel(channel, efx)
1685		efx_schedule_channel(channel);
1686}
1687
1688#endif
1689
1690/**************************************************************************
1691 *
1692 * Kernel net device interface
1693 *
1694 *************************************************************************/
1695
1696/* Context: process, rtnl_lock() held. */
1697static int efx_net_open(struct net_device *net_dev)
1698{
1699	struct efx_nic *efx = netdev_priv(net_dev);
1700	EFX_ASSERT_RESET_SERIALISED(efx);
1701
1702	netif_dbg(efx, ifup, efx->net_dev, "opening device on CPU %d\n",
1703		  raw_smp_processor_id());
1704
1705	if (efx->state == STATE_DISABLED)
1706		return -EIO;
1707	if (efx->phy_mode & PHY_MODE_SPECIAL)
1708		return -EBUSY;
1709	if (efx_mcdi_poll_reboot(efx) && efx_reset(efx, RESET_TYPE_ALL))
1710		return -EIO;
1711
1712	/* Notify the kernel of the link state polled during driver load,
1713	 * before the monitor starts running */
1714	efx_link_status_changed(efx);
1715
1716	efx_start_all(efx);
1717	return 0;
1718}
1719
1720/* Context: process, rtnl_lock() held.
1721 * Note that the kernel will ignore our return code; this method
1722 * should really be a void.
1723 */
1724static int efx_net_stop(struct net_device *net_dev)
1725{
1726	struct efx_nic *efx = netdev_priv(net_dev);
1727
1728	netif_dbg(efx, ifdown, efx->net_dev, "closing on CPU %d\n",
1729		  raw_smp_processor_id());
1730
1731	if (efx->state != STATE_DISABLED) {
1732		/* Stop the device and flush all the channels */
1733		efx_stop_all(efx);
1734		efx_fini_channels(efx);
1735		efx_init_channels(efx);
1736	}
1737
1738	return 0;
1739}
1740
1741/* Context: process, dev_base_lock or RTNL held, non-blocking. */
1742static struct rtnl_link_stats64 *efx_net_stats(struct net_device *net_dev,
1743					       struct rtnl_link_stats64 *stats)
1744{
1745	struct efx_nic *efx = netdev_priv(net_dev);
1746	struct efx_mac_stats *mac_stats = &efx->mac_stats;
1747
1748	spin_lock_bh(&efx->stats_lock);
1749
1750	efx->type->update_stats(efx);
1751
1752	stats->rx_packets = mac_stats->rx_packets;
1753	stats->tx_packets = mac_stats->tx_packets;
1754	stats->rx_bytes = mac_stats->rx_bytes;
1755	stats->tx_bytes = mac_stats->tx_bytes;
1756	stats->rx_dropped = efx->n_rx_nodesc_drop_cnt;
1757	stats->multicast = mac_stats->rx_multicast;
1758	stats->collisions = mac_stats->tx_collision;
1759	stats->rx_length_errors = (mac_stats->rx_gtjumbo +
1760				   mac_stats->rx_length_error);
1761	stats->rx_crc_errors = mac_stats->rx_bad;
1762	stats->rx_frame_errors = mac_stats->rx_align_error;
1763	stats->rx_fifo_errors = mac_stats->rx_overflow;
1764	stats->rx_missed_errors = mac_stats->rx_missed;
1765	stats->tx_window_errors = mac_stats->tx_late_collision;
1766
1767	stats->rx_errors = (stats->rx_length_errors +
1768			    stats->rx_crc_errors +
1769			    stats->rx_frame_errors +
1770			    mac_stats->rx_symbol_error);
1771	stats->tx_errors = (stats->tx_window_errors +
1772			    mac_stats->tx_bad);
1773
1774	spin_unlock_bh(&efx->stats_lock);
1775
1776	return stats;
1777}
1778
1779/* Context: netif_tx_lock held, BHs disabled. */
1780static void efx_watchdog(struct net_device *net_dev)
1781{
1782	struct efx_nic *efx = netdev_priv(net_dev);
1783
1784	netif_err(efx, tx_err, efx->net_dev,
1785		  "TX stuck with port_enabled=%d: resetting channels\n",
1786		  efx->port_enabled);
1787
1788	efx_schedule_reset(efx, RESET_TYPE_TX_WATCHDOG);
1789}
1790
1791
1792/* Context: process, rtnl_lock() held. */
1793static int efx_change_mtu(struct net_device *net_dev, int new_mtu)
1794{
1795	struct efx_nic *efx = netdev_priv(net_dev);
1796
1797	EFX_ASSERT_RESET_SERIALISED(efx);
1798
1799	if (new_mtu > EFX_MAX_MTU)
1800		return -EINVAL;
1801
1802	efx_stop_all(efx);
1803
1804	netif_dbg(efx, drv, efx->net_dev, "changing MTU to %d\n", new_mtu);
1805
1806	efx_fini_channels(efx);
1807
1808	mutex_lock(&efx->mac_lock);
1809	/* Reconfigure the MAC before enabling the dma queues so that
1810	 * the RX buffers don't overflow */
1811	net_dev->mtu = new_mtu;
1812	efx->type->reconfigure_mac(efx);
1813	mutex_unlock(&efx->mac_lock);
1814
1815	efx_init_channels(efx);
1816
1817	efx_start_all(efx);
1818	return 0;
1819}
1820
1821static int efx_set_mac_address(struct net_device *net_dev, void *data)
1822{
1823	struct efx_nic *efx = netdev_priv(net_dev);
1824	struct sockaddr *addr = data;
1825	char *new_addr = addr->sa_data;
1826
1827	EFX_ASSERT_RESET_SERIALISED(efx);
1828
1829	if (!is_valid_ether_addr(new_addr)) {
1830		netif_err(efx, drv, efx->net_dev,
1831			  "invalid ethernet MAC address requested: %pM\n",
1832			  new_addr);
1833		return -EINVAL;
1834	}
1835
1836	memcpy(net_dev->dev_addr, new_addr, net_dev->addr_len);
1837
1838	/* Reconfigure the MAC */
1839	mutex_lock(&efx->mac_lock);
1840	efx->type->reconfigure_mac(efx);
1841	mutex_unlock(&efx->mac_lock);
1842
1843	return 0;
1844}
1845
1846/* Context: netif_addr_lock held, BHs disabled. */
1847static void efx_set_rx_mode(struct net_device *net_dev)
1848{
1849	struct efx_nic *efx = netdev_priv(net_dev);
1850	struct netdev_hw_addr *ha;
1851	union efx_multicast_hash *mc_hash = &efx->multicast_hash;
1852	u32 crc;
1853	int bit;
1854
1855	efx->promiscuous = !!(net_dev->flags & IFF_PROMISC);
1856
1857	/* Build multicast hash table */
1858	if (efx->promiscuous || (net_dev->flags & IFF_ALLMULTI)) {
1859		memset(mc_hash, 0xff, sizeof(*mc_hash));
1860	} else {
1861		memset(mc_hash, 0x00, sizeof(*mc_hash));
1862		netdev_for_each_mc_addr(ha, net_dev) {
1863			crc = ether_crc_le(ETH_ALEN, ha->addr);
1864			bit = crc & (EFX_MCAST_HASH_ENTRIES - 1);
1865			set_bit_le(bit, mc_hash->byte);
1866		}
1867
1868		/* Broadcast packets go through the multicast hash filter.
1869		 * ether_crc_le() of the broadcast address is 0xbe2612ff
1870		 * so we always add bit 0xff to the mask.
1871		 */
1872		set_bit_le(0xff, mc_hash->byte);
1873	}
1874
1875	if (efx->port_enabled)
1876		queue_work(efx->workqueue, &efx->mac_work);
1877	/* Otherwise efx_start_port() will do this */
1878}
1879
1880static int efx_set_features(struct net_device *net_dev, netdev_features_t data)
1881{
1882	struct efx_nic *efx = netdev_priv(net_dev);
1883
1884	/* If disabling RX n-tuple filtering, clear existing filters */
1885	if (net_dev->features & ~data & NETIF_F_NTUPLE)
1886		efx_filter_clear_rx(efx, EFX_FILTER_PRI_MANUAL);
1887
1888	return 0;
1889}
1890
1891static const struct net_device_ops efx_netdev_ops = {
1892	.ndo_open		= efx_net_open,
1893	.ndo_stop		= efx_net_stop,
1894	.ndo_get_stats64	= efx_net_stats,
1895	.ndo_tx_timeout		= efx_watchdog,
1896	.ndo_start_xmit		= efx_hard_start_xmit,
1897	.ndo_validate_addr	= eth_validate_addr,
1898	.ndo_do_ioctl		= efx_ioctl,
1899	.ndo_change_mtu		= efx_change_mtu,
1900	.ndo_set_mac_address	= efx_set_mac_address,
1901	.ndo_set_rx_mode	= efx_set_rx_mode,
1902	.ndo_set_features	= efx_set_features,
1903#ifdef CONFIG_NET_POLL_CONTROLLER
1904	.ndo_poll_controller = efx_netpoll,
1905#endif
1906	.ndo_setup_tc		= efx_setup_tc,
1907#ifdef CONFIG_RFS_ACCEL
1908	.ndo_rx_flow_steer	= efx_filter_rfs,
1909#endif
1910};
1911
1912static void efx_update_name(struct efx_nic *efx)
1913{
1914	strcpy(efx->name, efx->net_dev->name);
1915	efx_mtd_rename(efx);
1916	efx_set_channel_names(efx);
1917}
1918
1919static int efx_netdev_event(struct notifier_block *this,
1920			    unsigned long event, void *ptr)
1921{
1922	struct net_device *net_dev = ptr;
1923
1924	if (net_dev->netdev_ops == &efx_netdev_ops &&
1925	    event == NETDEV_CHANGENAME)
1926		efx_update_name(netdev_priv(net_dev));
1927
1928	return NOTIFY_DONE;
1929}
1930
1931static struct notifier_block efx_netdev_notifier = {
1932	.notifier_call = efx_netdev_event,
1933};
1934
1935static ssize_t
1936show_phy_type(struct device *dev, struct device_attribute *attr, char *buf)
1937{
1938	struct efx_nic *efx = pci_get_drvdata(to_pci_dev(dev));
1939	return sprintf(buf, "%d\n", efx->phy_type);
1940}
1941static DEVICE_ATTR(phy_type, 0644, show_phy_type, NULL);
1942
1943static int efx_register_netdev(struct efx_nic *efx)
1944{
1945	struct net_device *net_dev = efx->net_dev;
1946	struct efx_channel *channel;
1947	int rc;
1948
1949	net_dev->watchdog_timeo = 5 * HZ;
1950	net_dev->irq = efx->pci_dev->irq;
1951	net_dev->netdev_ops = &efx_netdev_ops;
1952	SET_ETHTOOL_OPS(net_dev, &efx_ethtool_ops);
1953
1954	rtnl_lock();
1955
1956	rc = dev_alloc_name(net_dev, net_dev->name);
1957	if (rc < 0)
1958		goto fail_locked;
1959	efx_update_name(efx);
1960
1961	rc = register_netdevice(net_dev);
1962	if (rc)
1963		goto fail_locked;
1964
1965	efx_for_each_channel(channel, efx) {
1966		struct efx_tx_queue *tx_queue;
1967		efx_for_each_channel_tx_queue(tx_queue, channel)
1968			efx_init_tx_queue_core_txq(tx_queue);
1969	}
1970
1971	/* Always start with carrier off; PHY events will detect the link */
1972	netif_carrier_off(net_dev);
1973
1974	rtnl_unlock();
1975
1976	rc = device_create_file(&efx->pci_dev->dev, &dev_attr_phy_type);
1977	if (rc) {
1978		netif_err(efx, drv, efx->net_dev,
1979			  "failed to init net dev attributes\n");
1980		goto fail_registered;
1981	}
1982
1983	return 0;
1984
1985fail_locked:
1986	rtnl_unlock();
1987	netif_err(efx, drv, efx->net_dev, "could not register net dev\n");
1988	return rc;
1989
1990fail_registered:
1991	unregister_netdev(net_dev);
1992	return rc;
1993}
1994
1995static void efx_unregister_netdev(struct efx_nic *efx)
1996{
1997	struct efx_channel *channel;
1998	struct efx_tx_queue *tx_queue;
1999
2000	if (!efx->net_dev)
2001		return;
2002
2003	BUG_ON(netdev_priv(efx->net_dev) != efx);
2004
2005	/* Free up any skbs still remaining. This has to happen before
2006	 * we try to unregister the netdev as running their destructors
2007	 * may be needed to get the device ref. count to 0. */
2008	efx_for_each_channel(channel, efx) {
2009		efx_for_each_channel_tx_queue(tx_queue, channel)
2010			efx_release_tx_buffers(tx_queue);
2011	}
2012
2013	strlcpy(efx->name, pci_name(efx->pci_dev), sizeof(efx->name));
2014	device_remove_file(&efx->pci_dev->dev, &dev_attr_phy_type);
2015	unregister_netdev(efx->net_dev);
2016}
2017
2018/**************************************************************************
2019 *
2020 * Device reset and suspend
2021 *
2022 **************************************************************************/
2023
2024/* Tears down the entire software state and most of the hardware state
2025 * before reset.  */
2026void efx_reset_down(struct efx_nic *efx, enum reset_type method)
2027{
2028	EFX_ASSERT_RESET_SERIALISED(efx);
2029
2030	efx_stop_all(efx);
2031	mutex_lock(&efx->mac_lock);
2032
2033	efx_fini_channels(efx);
2034	if (efx->port_initialized && method != RESET_TYPE_INVISIBLE)
2035		efx->phy_op->fini(efx);
2036	efx->type->fini(efx);
2037}
2038
2039/* This function will always ensure that the locks acquired in
2040 * efx_reset_down() are released. A failure return code indicates
2041 * that we were unable to reinitialise the hardware, and the
2042 * driver should be disabled. If ok is false, then the rx and tx
2043 * engines are not restarted, pending a RESET_DISABLE. */
2044int efx_reset_up(struct efx_nic *efx, enum reset_type method, bool ok)
2045{
2046	int rc;
2047
2048	EFX_ASSERT_RESET_SERIALISED(efx);
2049
2050	rc = efx->type->init(efx);
2051	if (rc) {
2052		netif_err(efx, drv, efx->net_dev, "failed to initialise NIC\n");
2053		goto fail;
2054	}
2055
2056	if (!ok)
2057		goto fail;
2058
2059	if (efx->port_initialized && method != RESET_TYPE_INVISIBLE) {
2060		rc = efx->phy_op->init(efx);
2061		if (rc)
2062			goto fail;
2063		if (efx->phy_op->reconfigure(efx))
2064			netif_err(efx, drv, efx->net_dev,
2065				  "could not restore PHY settings\n");
2066	}
2067
2068	efx->type->reconfigure_mac(efx);
2069
2070	efx_init_channels(efx);
2071	efx_restore_filters(efx);
2072
2073	mutex_unlock(&efx->mac_lock);
2074
2075	efx_start_all(efx);
2076
2077	return 0;
2078
2079fail:
2080	efx->port_initialized = false;
2081
2082	mutex_unlock(&efx->mac_lock);
2083
2084	return rc;
2085}
2086
2087/* Reset the NIC using the specified method.  Note that the reset may
2088 * fail, in which case the card will be left in an unusable state.
2089 *
2090 * Caller must hold the rtnl_lock.
2091 */
2092int efx_reset(struct efx_nic *efx, enum reset_type method)
2093{
2094	int rc, rc2;
2095	bool disabled;
2096
2097	netif_info(efx, drv, efx->net_dev, "resetting (%s)\n",
2098		   RESET_TYPE(method));
2099
2100	netif_device_detach(efx->net_dev);
2101	efx_reset_down(efx, method);
2102
2103	rc = efx->type->reset(efx, method);
2104	if (rc) {
2105		netif_err(efx, drv, efx->net_dev, "failed to reset hardware\n");
2106		goto out;
2107	}
2108
2109	/* Clear flags for the scopes we covered.  We assume the NIC and
2110	 * driver are now quiescent so that there is no race here.
2111	 */
2112	efx->reset_pending &= -(1 << (method + 1));
2113
2114	/* Reinitialise bus-mastering, which may have been turned off before
2115	 * the reset was scheduled. This is still appropriate, even in the
2116	 * RESET_TYPE_DISABLE since this driver generally assumes the hardware
2117	 * can respond to requests. */
2118	pci_set_master(efx->pci_dev);
2119
2120out:
2121	/* Leave device stopped if necessary */
2122	disabled = rc || method == RESET_TYPE_DISABLE;
2123	rc2 = efx_reset_up(efx, method, !disabled);
2124	if (rc2) {
2125		disabled = true;
2126		if (!rc)
2127			rc = rc2;
2128	}
2129
2130	if (disabled) {
2131		dev_close(efx->net_dev);
2132		netif_err(efx, drv, efx->net_dev, "has been disabled\n");
2133		efx->state = STATE_DISABLED;
2134	} else {
2135		netif_dbg(efx, drv, efx->net_dev, "reset complete\n");
2136		netif_device_attach(efx->net_dev);
2137	}
2138	return rc;
2139}
2140
2141/* The worker thread exists so that code that cannot sleep can
2142 * schedule a reset for later.
2143 */
2144static void efx_reset_work(struct work_struct *data)
2145{
2146	struct efx_nic *efx = container_of(data, struct efx_nic, reset_work);
2147	unsigned long pending = ACCESS_ONCE(efx->reset_pending);
2148
2149	if (!pending)
2150		return;
2151
2152	/* If we're not RUNNING then don't reset. Leave the reset_pending
2153	 * flags set so that efx_pci_probe_main will be retried */
2154	if (efx->state != STATE_RUNNING) {
2155		netif_info(efx, drv, efx->net_dev,
2156			   "scheduled reset quenched. NIC not RUNNING\n");
2157		return;
2158	}
2159
2160	rtnl_lock();
2161	(void)efx_reset(efx, fls(pending) - 1);
2162	rtnl_unlock();
2163}
2164
2165void efx_schedule_reset(struct efx_nic *efx, enum reset_type type)
2166{
2167	enum reset_type method;
2168
2169	switch (type) {
2170	case RESET_TYPE_INVISIBLE:
2171	case RESET_TYPE_ALL:
2172	case RESET_TYPE_WORLD:
2173	case RESET_TYPE_DISABLE:
2174		method = type;
2175		netif_dbg(efx, drv, efx->net_dev, "scheduling %s reset\n",
2176			  RESET_TYPE(method));
2177		break;
2178	default:
2179		method = efx->type->map_reset_reason(type);
2180		netif_dbg(efx, drv, efx->net_dev,
2181			  "scheduling %s reset for %s\n",
2182			  RESET_TYPE(method), RESET_TYPE(type));
2183		break;
2184	}
2185
2186	set_bit(method, &efx->reset_pending);
2187
2188	/* efx_process_channel() will no longer read events once a
2189	 * reset is scheduled. So switch back to poll'd MCDI completions. */
2190	efx_mcdi_mode_poll(efx);
2191
2192	queue_work(reset_workqueue, &efx->reset_work);
2193}
2194
2195/**************************************************************************
2196 *
2197 * List of NICs we support
2198 *
2199 **************************************************************************/
2200
2201/* PCI device ID table */
2202static DEFINE_PCI_DEVICE_TABLE(efx_pci_table) = {
2203	{PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE,
2204		    PCI_DEVICE_ID_SOLARFLARE_SFC4000A_0),
2205	 .driver_data = (unsigned long) &falcon_a1_nic_type},
2206	{PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE,
2207		    PCI_DEVICE_ID_SOLARFLARE_SFC4000B),
2208	 .driver_data = (unsigned long) &falcon_b0_nic_type},
2209	{PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x0803),	/* SFC9020 */
2210	 .driver_data = (unsigned long) &siena_a0_nic_type},
2211	{PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x0813),	/* SFL9021 */
2212	 .driver_data = (unsigned long) &siena_a0_nic_type},
2213	{0}			/* end of list */
2214};
2215
2216/**************************************************************************
2217 *
2218 * Dummy PHY/MAC operations
2219 *
2220 * Can be used for some unimplemented operations
2221 * Needed so all function pointers are valid and do not have to be tested
2222 * before use
2223 *
2224 **************************************************************************/
2225int efx_port_dummy_op_int(struct efx_nic *efx)
2226{
2227	return 0;
2228}
2229void efx_port_dummy_op_void(struct efx_nic *efx) {}
2230
2231static bool efx_port_dummy_op_poll(struct efx_nic *efx)
2232{
2233	return false;
2234}
2235
2236static const struct efx_phy_operations efx_dummy_phy_operations = {
2237	.init		 = efx_port_dummy_op_int,
2238	.reconfigure	 = efx_port_dummy_op_int,
2239	.poll		 = efx_port_dummy_op_poll,
2240	.fini		 = efx_port_dummy_op_void,
2241};
2242
2243/**************************************************************************
2244 *
2245 * Data housekeeping
2246 *
2247 **************************************************************************/
2248
2249/* This zeroes out and then fills in the invariants in a struct
2250 * efx_nic (including all sub-structures).
2251 */
2252static int efx_init_struct(struct efx_nic *efx, const struct efx_nic_type *type,
2253			   struct pci_dev *pci_dev, struct net_device *net_dev)
2254{
2255	int i;
2256
2257	/* Initialise common structures */
2258	memset(efx, 0, sizeof(*efx));
2259	spin_lock_init(&efx->biu_lock);
2260#ifdef CONFIG_SFC_MTD
2261	INIT_LIST_HEAD(&efx->mtd_list);
2262#endif
2263	INIT_WORK(&efx->reset_work, efx_reset_work);
2264	INIT_DELAYED_WORK(&efx->monitor_work, efx_monitor);
2265	efx->pci_dev = pci_dev;
2266	efx->msg_enable = debug;
2267	efx->state = STATE_INIT;
2268	strlcpy(efx->name, pci_name(pci_dev), sizeof(efx->name));
2269
2270	efx->net_dev = net_dev;
2271	spin_lock_init(&efx->stats_lock);
2272	mutex_init(&efx->mac_lock);
2273	efx->phy_op = &efx_dummy_phy_operations;
2274	efx->mdio.dev = net_dev;
2275	INIT_WORK(&efx->mac_work, efx_mac_work);
2276
2277	for (i = 0; i < EFX_MAX_CHANNELS; i++) {
2278		efx->channel[i] = efx_alloc_channel(efx, i, NULL);
2279		if (!efx->channel[i])
2280			goto fail;
2281	}
2282
2283	efx->type = type;
2284
2285	EFX_BUG_ON_PARANOID(efx->type->phys_addr_channels > EFX_MAX_CHANNELS);
2286
2287	/* Higher numbered interrupt modes are less capable! */
2288	efx->interrupt_mode = max(efx->type->max_interrupt_mode,
2289				  interrupt_mode);
2290
2291	/* Would be good to use the net_dev name, but we're too early */
2292	snprintf(efx->workqueue_name, sizeof(efx->workqueue_name), "sfc%s",
2293		 pci_name(pci_dev));
2294	efx->workqueue = create_singlethread_workqueue(efx->workqueue_name);
2295	if (!efx->workqueue)
2296		goto fail;
2297
2298	return 0;
2299
2300fail:
2301	efx_fini_struct(efx);
2302	return -ENOMEM;
2303}
2304
2305static void efx_fini_struct(struct efx_nic *efx)
2306{
2307	int i;
2308
2309	for (i = 0; i < EFX_MAX_CHANNELS; i++)
2310		kfree(efx->channel[i]);
2311
2312	if (efx->workqueue) {
2313		destroy_workqueue(efx->workqueue);
2314		efx->workqueue = NULL;
2315	}
2316}
2317
2318/**************************************************************************
2319 *
2320 * PCI interface
2321 *
2322 **************************************************************************/
2323
2324/* Main body of final NIC shutdown code
2325 * This is called only at module unload (or hotplug removal).
2326 */
2327static void efx_pci_remove_main(struct efx_nic *efx)
2328{
2329#ifdef CONFIG_RFS_ACCEL
2330	free_irq_cpu_rmap(efx->net_dev->rx_cpu_rmap);
2331	efx->net_dev->rx_cpu_rmap = NULL;
2332#endif
2333	efx_nic_fini_interrupt(efx);
2334	efx_fini_channels(efx);
2335	efx_fini_port(efx);
2336	efx->type->fini(efx);
2337	efx_fini_napi(efx);
2338	efx_remove_all(efx);
2339}
2340
2341/* Final NIC shutdown
2342 * This is called only at module unload (or hotplug removal).
2343 */
2344static void efx_pci_remove(struct pci_dev *pci_dev)
2345{
2346	struct efx_nic *efx;
2347
2348	efx = pci_get_drvdata(pci_dev);
2349	if (!efx)
2350		return;
2351
2352	/* Mark the NIC as fini, then stop the interface */
2353	rtnl_lock();
2354	efx->state = STATE_FINI;
2355	dev_close(efx->net_dev);
2356
2357	/* Allow any queued efx_resets() to complete */
2358	rtnl_unlock();
2359
2360	efx_unregister_netdev(efx);
2361
2362	efx_mtd_remove(efx);
2363
2364	/* Wait for any scheduled resets to complete. No more will be
2365	 * scheduled from this point because efx_stop_all() has been
2366	 * called, we are no longer registered with driverlink, and
2367	 * the net_device's have been removed. */
2368	cancel_work_sync(&efx->reset_work);
2369
2370	efx_pci_remove_main(efx);
2371
2372	efx_fini_io(efx);
2373	netif_dbg(efx, drv, efx->net_dev, "shutdown successful\n");
2374
2375	pci_set_drvdata(pci_dev, NULL);
2376	efx_fini_struct(efx);
2377	free_netdev(efx->net_dev);
2378};
2379
2380/* Main body of NIC initialisation
2381 * This is called at module load (or hotplug insertion, theoretically).
2382 */
2383static int efx_pci_probe_main(struct efx_nic *efx)
2384{
2385	int rc;
2386
2387	/* Do start-of-day initialisation */
2388	rc = efx_probe_all(efx);
2389	if (rc)
2390		goto fail1;
2391
2392	efx_init_napi(efx);
2393
2394	rc = efx->type->init(efx);
2395	if (rc) {
2396		netif_err(efx, probe, efx->net_dev,
2397			  "failed to initialise NIC\n");
2398		goto fail3;
2399	}
2400
2401	rc = efx_init_port(efx);
2402	if (rc) {
2403		netif_err(efx, probe, efx->net_dev,
2404			  "failed to initialise port\n");
2405		goto fail4;
2406	}
2407
2408	efx_init_channels(efx);
2409
2410	rc = efx_nic_init_interrupt(efx);
2411	if (rc)
2412		goto fail5;
2413
2414	return 0;
2415
2416 fail5:
2417	efx_fini_channels(efx);
2418	efx_fini_port(efx);
2419 fail4:
2420	efx->type->fini(efx);
2421 fail3:
2422	efx_fini_napi(efx);
2423	efx_remove_all(efx);
2424 fail1:
2425	return rc;
2426}
2427
2428/* NIC initialisation
2429 *
2430 * This is called at module load (or hotplug insertion,
2431 * theoretically).  It sets up PCI mappings, resets the NIC,
2432 * sets up and registers the network devices with the kernel and hooks
2433 * the interrupt service routine.  It does not prepare the device for
2434 * transmission; this is left to the first time one of the network
2435 * interfaces is brought up (i.e. efx_net_open).
2436 */
2437static int __devinit efx_pci_probe(struct pci_dev *pci_dev,
2438				   const struct pci_device_id *entry)
2439{
2440	const struct efx_nic_type *type = (const struct efx_nic_type *) entry->driver_data;
2441	struct net_device *net_dev;
2442	struct efx_nic *efx;
2443	int rc;
2444
2445	/* Allocate and initialise a struct net_device and struct efx_nic */
2446	net_dev = alloc_etherdev_mqs(sizeof(*efx), EFX_MAX_CORE_TX_QUEUES,
2447				     EFX_MAX_RX_QUEUES);
2448	if (!net_dev)
2449		return -ENOMEM;
2450	net_dev->features |= (type->offload_features | NETIF_F_SG |
2451			      NETIF_F_HIGHDMA | NETIF_F_TSO |
2452			      NETIF_F_RXCSUM);
2453	if (type->offload_features & NETIF_F_V6_CSUM)
2454		net_dev->features |= NETIF_F_TSO6;
2455	/* Mask for features that also apply to VLAN devices */
2456	net_dev->vlan_features |= (NETIF_F_ALL_CSUM | NETIF_F_SG |
2457				   NETIF_F_HIGHDMA | NETIF_F_ALL_TSO |
2458				   NETIF_F_RXCSUM);
2459	/* All offloads can be toggled */
2460	net_dev->hw_features = net_dev->features & ~NETIF_F_HIGHDMA;
2461	efx = netdev_priv(net_dev);
2462	pci_set_drvdata(pci_dev, efx);
2463	SET_NETDEV_DEV(net_dev, &pci_dev->dev);
2464	rc = efx_init_struct(efx, type, pci_dev, net_dev);
2465	if (rc)
2466		goto fail1;
2467
2468	netif_info(efx, probe, efx->net_dev,
2469		   "Solarflare NIC detected\n");
2470
2471	/* Set up basic I/O (BAR mappings etc) */
2472	rc = efx_init_io(efx);
2473	if (rc)
2474		goto fail2;
2475
2476	rc = efx_pci_probe_main(efx);
2477
2478	/* Serialise against efx_reset(). No more resets will be
2479	 * scheduled since efx_stop_all() has been called, and we have
2480	 * not and never have been registered.
2481	 */
2482	cancel_work_sync(&efx->reset_work);
2483
2484	if (rc)
2485		goto fail3;
2486
2487	/* If there was a scheduled reset during probe, the NIC is
2488	 * probably hosed anyway.
2489	 */
2490	if (efx->reset_pending) {
2491		rc = -EIO;
2492		goto fail4;
2493	}
2494
2495	/* Switch to the running state before we expose the device to the OS,
2496	 * so that dev_open()|efx_start_all() will actually start the device */
2497	efx->state = STATE_RUNNING;
2498
2499	rc = efx_register_netdev(efx);
2500	if (rc)
2501		goto fail4;
2502
2503	netif_dbg(efx, probe, efx->net_dev, "initialisation successful\n");
2504
2505	rtnl_lock();
2506	efx_mtd_probe(efx); /* allowed to fail */
2507	rtnl_unlock();
2508	return 0;
2509
2510 fail4:
2511	efx_pci_remove_main(efx);
2512 fail3:
2513	efx_fini_io(efx);
2514 fail2:
2515	efx_fini_struct(efx);
2516 fail1:
2517	WARN_ON(rc > 0);
2518	netif_dbg(efx, drv, efx->net_dev, "initialisation failed. rc=%d\n", rc);
2519	free_netdev(net_dev);
2520	return rc;
2521}
2522
2523static int efx_pm_freeze(struct device *dev)
2524{
2525	struct efx_nic *efx = pci_get_drvdata(to_pci_dev(dev));
2526
2527	efx->state = STATE_FINI;
2528
2529	netif_device_detach(efx->net_dev);
2530
2531	efx_stop_all(efx);
2532	efx_fini_channels(efx);
2533
2534	return 0;
2535}
2536
2537static int efx_pm_thaw(struct device *dev)
2538{
2539	struct efx_nic *efx = pci_get_drvdata(to_pci_dev(dev));
2540
2541	efx->state = STATE_INIT;
2542
2543	efx_init_channels(efx);
2544
2545	mutex_lock(&efx->mac_lock);
2546	efx->phy_op->reconfigure(efx);
2547	mutex_unlock(&efx->mac_lock);
2548
2549	efx_start_all(efx);
2550
2551	netif_device_attach(efx->net_dev);
2552
2553	efx->state = STATE_RUNNING;
2554
2555	efx->type->resume_wol(efx);
2556
2557	/* Reschedule any quenched resets scheduled during efx_pm_freeze() */
2558	queue_work(reset_workqueue, &efx->reset_work);
2559
2560	return 0;
2561}
2562
2563static int efx_pm_poweroff(struct device *dev)
2564{
2565	struct pci_dev *pci_dev = to_pci_dev(dev);
2566	struct efx_nic *efx = pci_get_drvdata(pci_dev);
2567
2568	efx->type->fini(efx);
2569
2570	efx->reset_pending = 0;
2571
2572	pci_save_state(pci_dev);
2573	return pci_set_power_state(pci_dev, PCI_D3hot);
2574}
2575
2576/* Used for both resume and restore */
2577static int efx_pm_resume(struct device *dev)
2578{
2579	struct pci_dev *pci_dev = to_pci_dev(dev);
2580	struct efx_nic *efx = pci_get_drvdata(pci_dev);
2581	int rc;
2582
2583	rc = pci_set_power_state(pci_dev, PCI_D0);
2584	if (rc)
2585		return rc;
2586	pci_restore_state(pci_dev);
2587	rc = pci_enable_device(pci_dev);
2588	if (rc)
2589		return rc;
2590	pci_set_master(efx->pci_dev);
2591	rc = efx->type->reset(efx, RESET_TYPE_ALL);
2592	if (rc)
2593		return rc;
2594	rc = efx->type->init(efx);
2595	if (rc)
2596		return rc;
2597	efx_pm_thaw(dev);
2598	return 0;
2599}
2600
2601static int efx_pm_suspend(struct device *dev)
2602{
2603	int rc;
2604
2605	efx_pm_freeze(dev);
2606	rc = efx_pm_poweroff(dev);
2607	if (rc)
2608		efx_pm_resume(dev);
2609	return rc;
2610}
2611
2612static const struct dev_pm_ops efx_pm_ops = {
2613	.suspend	= efx_pm_suspend,
2614	.resume		= efx_pm_resume,
2615	.freeze		= efx_pm_freeze,
2616	.thaw		= efx_pm_thaw,
2617	.poweroff	= efx_pm_poweroff,
2618	.restore	= efx_pm_resume,
2619};
2620
2621static struct pci_driver efx_pci_driver = {
2622	.name		= KBUILD_MODNAME,
2623	.id_table	= efx_pci_table,
2624	.probe		= efx_pci_probe,
2625	.remove		= efx_pci_remove,
2626	.driver.pm	= &efx_pm_ops,
2627};
2628
2629/**************************************************************************
2630 *
2631 * Kernel module interface
2632 *
2633 *************************************************************************/
2634
2635module_param(interrupt_mode, uint, 0444);
2636MODULE_PARM_DESC(interrupt_mode,
2637		 "Interrupt mode (0=>MSIX 1=>MSI 2=>legacy)");
2638
2639static int __init efx_init_module(void)
2640{
2641	int rc;
2642
2643	printk(KERN_INFO "Solarflare NET driver v" EFX_DRIVER_VERSION "\n");
2644
2645	rc = register_netdevice_notifier(&efx_netdev_notifier);
2646	if (rc)
2647		goto err_notifier;
2648
2649	reset_workqueue = create_singlethread_workqueue("sfc_reset");
2650	if (!reset_workqueue) {
2651		rc = -ENOMEM;
2652		goto err_reset;
2653	}
2654
2655	rc = pci_register_driver(&efx_pci_driver);
2656	if (rc < 0)
2657		goto err_pci;
2658
2659	return 0;
2660
2661 err_pci:
2662	destroy_workqueue(reset_workqueue);
2663 err_reset:
2664	unregister_netdevice_notifier(&efx_netdev_notifier);
2665 err_notifier:
2666	return rc;
2667}
2668
2669static void __exit efx_exit_module(void)
2670{
2671	printk(KERN_INFO "Solarflare NET driver unloading\n");
2672
2673	pci_unregister_driver(&efx_pci_driver);
2674	destroy_workqueue(reset_workqueue);
2675	unregister_netdevice_notifier(&efx_netdev_notifier);
2676
2677}
2678
2679module_init(efx_init_module);
2680module_exit(efx_exit_module);
2681
2682MODULE_AUTHOR("Solarflare Communications and "
2683	      "Michael Brown <mbrown@fensystems.co.uk>");
2684MODULE_DESCRIPTION("Solarflare Communications network driver");
2685MODULE_LICENSE("GPL");
2686MODULE_DEVICE_TABLE(pci, efx_pci_table);
2687