cxgb4vf_main.c revision 01789349ee52e4a3faf376f1485303d9723c4f1f
1/*
2 * This file is part of the Chelsio T4 PCI-E SR-IOV Virtual Function Ethernet
3 * driver for Linux.
4 *
5 * Copyright (c) 2009-2010 Chelsio Communications, Inc. All rights reserved.
6 *
7 * This software is available to you under a choice of one of two
8 * licenses.  You may choose to be licensed under the terms of the GNU
9 * General Public License (GPL) Version 2, available from the file
10 * COPYING in the main directory of this source tree, or the
11 * OpenIB.org BSD license below:
12 *
13 *     Redistribution and use in source and binary forms, with or
14 *     without modification, are permitted provided that the following
15 *     conditions are met:
16 *
17 *      - Redistributions of source code must retain the above
18 *        copyright notice, this list of conditions and the following
19 *        disclaimer.
20 *
21 *      - Redistributions in binary form must reproduce the above
22 *        copyright notice, this list of conditions and the following
23 *        disclaimer in the documentation and/or other materials
24 *        provided with the distribution.
25 *
26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 * SOFTWARE.
34 */
35
36#include <linux/module.h>
37#include <linux/moduleparam.h>
38#include <linux/init.h>
39#include <linux/pci.h>
40#include <linux/dma-mapping.h>
41#include <linux/netdevice.h>
42#include <linux/etherdevice.h>
43#include <linux/debugfs.h>
44#include <linux/ethtool.h>
45
46#include "t4vf_common.h"
47#include "t4vf_defs.h"
48
49#include "../cxgb4/t4_regs.h"
50#include "../cxgb4/t4_msg.h"
51
52/*
53 * Generic information about the driver.
54 */
55#define DRV_VERSION "1.0.0"
56#define DRV_DESC "Chelsio T4 Virtual Function (VF) Network Driver"
57
58/*
59 * Module Parameters.
60 * ==================
61 */
62
63/*
64 * Default ethtool "message level" for adapters.
65 */
66#define DFLT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK | \
67			 NETIF_MSG_TIMER | NETIF_MSG_IFDOWN | NETIF_MSG_IFUP |\
68			 NETIF_MSG_RX_ERR | NETIF_MSG_TX_ERR)
69
70static int dflt_msg_enable = DFLT_MSG_ENABLE;
71
72module_param(dflt_msg_enable, int, 0644);
73MODULE_PARM_DESC(dflt_msg_enable,
74		 "default adapter ethtool message level bitmap");
75
76/*
77 * The driver uses the best interrupt scheme available on a platform in the
78 * order MSI-X then MSI.  This parameter determines which of these schemes the
79 * driver may consider as follows:
80 *
81 *     msi = 2: choose from among MSI-X and MSI
82 *     msi = 1: only consider MSI interrupts
83 *
84 * Note that unlike the Physical Function driver, this Virtual Function driver
85 * does _not_ support legacy INTx interrupts (this limitation is mandated by
86 * the PCI-E SR-IOV standard).
87 */
88#define MSI_MSIX	2
89#define MSI_MSI		1
90#define MSI_DEFAULT	MSI_MSIX
91
92static int msi = MSI_DEFAULT;
93
94module_param(msi, int, 0644);
95MODULE_PARM_DESC(msi, "whether to use MSI-X or MSI");
96
97/*
98 * Fundamental constants.
99 * ======================
100 */
101
102enum {
103	MAX_TXQ_ENTRIES		= 16384,
104	MAX_RSPQ_ENTRIES	= 16384,
105	MAX_RX_BUFFERS		= 16384,
106
107	MIN_TXQ_ENTRIES		= 32,
108	MIN_RSPQ_ENTRIES	= 128,
109	MIN_FL_ENTRIES		= 16,
110
111	/*
112	 * For purposes of manipulating the Free List size we need to
113	 * recognize that Free Lists are actually Egress Queues (the host
114	 * produces free buffers which the hardware consumes), Egress Queues
115	 * indices are all in units of Egress Context Units bytes, and free
116	 * list entries are 64-bit PCI DMA addresses.  And since the state of
117	 * the Producer Index == the Consumer Index implies an EMPTY list, we
118	 * always have at least one Egress Unit's worth of Free List entries
119	 * unused.  See sge.c for more details ...
120	 */
121	EQ_UNIT = SGE_EQ_IDXSIZE,
122	FL_PER_EQ_UNIT = EQ_UNIT / sizeof(__be64),
123	MIN_FL_RESID = FL_PER_EQ_UNIT,
124};
125
126/*
127 * Global driver state.
128 * ====================
129 */
130
131static struct dentry *cxgb4vf_debugfs_root;
132
133/*
134 * OS "Callback" functions.
135 * ========================
136 */
137
138/*
139 * The link status has changed on the indicated "port" (Virtual Interface).
140 */
141void t4vf_os_link_changed(struct adapter *adapter, int pidx, int link_ok)
142{
143	struct net_device *dev = adapter->port[pidx];
144
145	/*
146	 * If the port is disabled or the current recorded "link up"
147	 * status matches the new status, just return.
148	 */
149	if (!netif_running(dev) || link_ok == netif_carrier_ok(dev))
150		return;
151
152	/*
153	 * Tell the OS that the link status has changed and print a short
154	 * informative message on the console about the event.
155	 */
156	if (link_ok) {
157		const char *s;
158		const char *fc;
159		const struct port_info *pi = netdev_priv(dev);
160
161		netif_carrier_on(dev);
162
163		switch (pi->link_cfg.speed) {
164		case SPEED_10000:
165			s = "10Gbps";
166			break;
167
168		case SPEED_1000:
169			s = "1000Mbps";
170			break;
171
172		case SPEED_100:
173			s = "100Mbps";
174			break;
175
176		default:
177			s = "unknown";
178			break;
179		}
180
181		switch (pi->link_cfg.fc) {
182		case PAUSE_RX:
183			fc = "RX";
184			break;
185
186		case PAUSE_TX:
187			fc = "TX";
188			break;
189
190		case PAUSE_RX|PAUSE_TX:
191			fc = "RX/TX";
192			break;
193
194		default:
195			fc = "no";
196			break;
197		}
198
199		printk(KERN_INFO "%s: link up, %s, full-duplex, %s PAUSE\n",
200		       dev->name, s, fc);
201	} else {
202		netif_carrier_off(dev);
203		printk(KERN_INFO "%s: link down\n", dev->name);
204	}
205}
206
207/*
208 * Net device operations.
209 * ======================
210 */
211
212
213
214
215/*
216 * Perform the MAC and PHY actions needed to enable a "port" (Virtual
217 * Interface).
218 */
219static int link_start(struct net_device *dev)
220{
221	int ret;
222	struct port_info *pi = netdev_priv(dev);
223
224	/*
225	 * We do not set address filters and promiscuity here, the stack does
226	 * that step explicitly. Enable vlan accel.
227	 */
228	ret = t4vf_set_rxmode(pi->adapter, pi->viid, dev->mtu, -1, -1, -1, 1,
229			      true);
230	if (ret == 0) {
231		ret = t4vf_change_mac(pi->adapter, pi->viid,
232				      pi->xact_addr_filt, dev->dev_addr, true);
233		if (ret >= 0) {
234			pi->xact_addr_filt = ret;
235			ret = 0;
236		}
237	}
238
239	/*
240	 * We don't need to actually "start the link" itself since the
241	 * firmware will do that for us when the first Virtual Interface
242	 * is enabled on a port.
243	 */
244	if (ret == 0)
245		ret = t4vf_enable_vi(pi->adapter, pi->viid, true, true);
246	return ret;
247}
248
249/*
250 * Name the MSI-X interrupts.
251 */
252static void name_msix_vecs(struct adapter *adapter)
253{
254	int namelen = sizeof(adapter->msix_info[0].desc) - 1;
255	int pidx;
256
257	/*
258	 * Firmware events.
259	 */
260	snprintf(adapter->msix_info[MSIX_FW].desc, namelen,
261		 "%s-FWeventq", adapter->name);
262	adapter->msix_info[MSIX_FW].desc[namelen] = 0;
263
264	/*
265	 * Ethernet queues.
266	 */
267	for_each_port(adapter, pidx) {
268		struct net_device *dev = adapter->port[pidx];
269		const struct port_info *pi = netdev_priv(dev);
270		int qs, msi;
271
272		for (qs = 0, msi = MSIX_IQFLINT; qs < pi->nqsets; qs++, msi++) {
273			snprintf(adapter->msix_info[msi].desc, namelen,
274				 "%s-%d", dev->name, qs);
275			adapter->msix_info[msi].desc[namelen] = 0;
276		}
277	}
278}
279
280/*
281 * Request all of our MSI-X resources.
282 */
283static int request_msix_queue_irqs(struct adapter *adapter)
284{
285	struct sge *s = &adapter->sge;
286	int rxq, msi, err;
287
288	/*
289	 * Firmware events.
290	 */
291	err = request_irq(adapter->msix_info[MSIX_FW].vec, t4vf_sge_intr_msix,
292			  0, adapter->msix_info[MSIX_FW].desc, &s->fw_evtq);
293	if (err)
294		return err;
295
296	/*
297	 * Ethernet queues.
298	 */
299	msi = MSIX_IQFLINT;
300	for_each_ethrxq(s, rxq) {
301		err = request_irq(adapter->msix_info[msi].vec,
302				  t4vf_sge_intr_msix, 0,
303				  adapter->msix_info[msi].desc,
304				  &s->ethrxq[rxq].rspq);
305		if (err)
306			goto err_free_irqs;
307		msi++;
308	}
309	return 0;
310
311err_free_irqs:
312	while (--rxq >= 0)
313		free_irq(adapter->msix_info[--msi].vec, &s->ethrxq[rxq].rspq);
314	free_irq(adapter->msix_info[MSIX_FW].vec, &s->fw_evtq);
315	return err;
316}
317
318/*
319 * Free our MSI-X resources.
320 */
321static void free_msix_queue_irqs(struct adapter *adapter)
322{
323	struct sge *s = &adapter->sge;
324	int rxq, msi;
325
326	free_irq(adapter->msix_info[MSIX_FW].vec, &s->fw_evtq);
327	msi = MSIX_IQFLINT;
328	for_each_ethrxq(s, rxq)
329		free_irq(adapter->msix_info[msi++].vec,
330			 &s->ethrxq[rxq].rspq);
331}
332
333/*
334 * Turn on NAPI and start up interrupts on a response queue.
335 */
336static void qenable(struct sge_rspq *rspq)
337{
338	napi_enable(&rspq->napi);
339
340	/*
341	 * 0-increment the Going To Sleep register to start the timer and
342	 * enable interrupts.
343	 */
344	t4_write_reg(rspq->adapter, T4VF_SGE_BASE_ADDR + SGE_VF_GTS,
345		     CIDXINC(0) |
346		     SEINTARM(rspq->intr_params) |
347		     INGRESSQID(rspq->cntxt_id));
348}
349
350/*
351 * Enable NAPI scheduling and interrupt generation for all Receive Queues.
352 */
353static void enable_rx(struct adapter *adapter)
354{
355	int rxq;
356	struct sge *s = &adapter->sge;
357
358	for_each_ethrxq(s, rxq)
359		qenable(&s->ethrxq[rxq].rspq);
360	qenable(&s->fw_evtq);
361
362	/*
363	 * The interrupt queue doesn't use NAPI so we do the 0-increment of
364	 * its Going To Sleep register here to get it started.
365	 */
366	if (adapter->flags & USING_MSI)
367		t4_write_reg(adapter, T4VF_SGE_BASE_ADDR + SGE_VF_GTS,
368			     CIDXINC(0) |
369			     SEINTARM(s->intrq.intr_params) |
370			     INGRESSQID(s->intrq.cntxt_id));
371
372}
373
374/*
375 * Wait until all NAPI handlers are descheduled.
376 */
377static void quiesce_rx(struct adapter *adapter)
378{
379	struct sge *s = &adapter->sge;
380	int rxq;
381
382	for_each_ethrxq(s, rxq)
383		napi_disable(&s->ethrxq[rxq].rspq.napi);
384	napi_disable(&s->fw_evtq.napi);
385}
386
387/*
388 * Response queue handler for the firmware event queue.
389 */
390static int fwevtq_handler(struct sge_rspq *rspq, const __be64 *rsp,
391			  const struct pkt_gl *gl)
392{
393	/*
394	 * Extract response opcode and get pointer to CPL message body.
395	 */
396	struct adapter *adapter = rspq->adapter;
397	u8 opcode = ((const struct rss_header *)rsp)->opcode;
398	void *cpl = (void *)(rsp + 1);
399
400	switch (opcode) {
401	case CPL_FW6_MSG: {
402		/*
403		 * We've received an asynchronous message from the firmware.
404		 */
405		const struct cpl_fw6_msg *fw_msg = cpl;
406		if (fw_msg->type == FW6_TYPE_CMD_RPL)
407			t4vf_handle_fw_rpl(adapter, fw_msg->data);
408		break;
409	}
410
411	case CPL_SGE_EGR_UPDATE: {
412		/*
413		 * We've received an Egress Queue Status Update message.  We
414		 * get these, if the SGE is configured to send these when the
415		 * firmware passes certain points in processing our TX
416		 * Ethernet Queue or if we make an explicit request for one.
417		 * We use these updates to determine when we may need to
418		 * restart a TX Ethernet Queue which was stopped for lack of
419		 * free TX Queue Descriptors ...
420		 */
421		const struct cpl_sge_egr_update *p = (void *)cpl;
422		unsigned int qid = EGR_QID(be32_to_cpu(p->opcode_qid));
423		struct sge *s = &adapter->sge;
424		struct sge_txq *tq;
425		struct sge_eth_txq *txq;
426		unsigned int eq_idx;
427
428		/*
429		 * Perform sanity checking on the Queue ID to make sure it
430		 * really refers to one of our TX Ethernet Egress Queues which
431		 * is active and matches the queue's ID.  None of these error
432		 * conditions should ever happen so we may want to either make
433		 * them fatal and/or conditionalized under DEBUG.
434		 */
435		eq_idx = EQ_IDX(s, qid);
436		if (unlikely(eq_idx >= MAX_EGRQ)) {
437			dev_err(adapter->pdev_dev,
438				"Egress Update QID %d out of range\n", qid);
439			break;
440		}
441		tq = s->egr_map[eq_idx];
442		if (unlikely(tq == NULL)) {
443			dev_err(adapter->pdev_dev,
444				"Egress Update QID %d TXQ=NULL\n", qid);
445			break;
446		}
447		txq = container_of(tq, struct sge_eth_txq, q);
448		if (unlikely(tq->abs_id != qid)) {
449			dev_err(adapter->pdev_dev,
450				"Egress Update QID %d refers to TXQ %d\n",
451				qid, tq->abs_id);
452			break;
453		}
454
455		/*
456		 * Restart a stopped TX Queue which has less than half of its
457		 * TX ring in use ...
458		 */
459		txq->q.restarts++;
460		netif_tx_wake_queue(txq->txq);
461		break;
462	}
463
464	default:
465		dev_err(adapter->pdev_dev,
466			"unexpected CPL %#x on FW event queue\n", opcode);
467	}
468
469	return 0;
470}
471
472/*
473 * Allocate SGE TX/RX response queues.  Determine how many sets of SGE queues
474 * to use and initializes them.  We support multiple "Queue Sets" per port if
475 * we have MSI-X, otherwise just one queue set per port.
476 */
477static int setup_sge_queues(struct adapter *adapter)
478{
479	struct sge *s = &adapter->sge;
480	int err, pidx, msix;
481
482	/*
483	 * Clear "Queue Set" Free List Starving and TX Queue Mapping Error
484	 * state.
485	 */
486	bitmap_zero(s->starving_fl, MAX_EGRQ);
487
488	/*
489	 * If we're using MSI interrupt mode we need to set up a "forwarded
490	 * interrupt" queue which we'll set up with our MSI vector.  The rest
491	 * of the ingress queues will be set up to forward their interrupts to
492	 * this queue ...  This must be first since t4vf_sge_alloc_rxq() uses
493	 * the intrq's queue ID as the interrupt forwarding queue for the
494	 * subsequent calls ...
495	 */
496	if (adapter->flags & USING_MSI) {
497		err = t4vf_sge_alloc_rxq(adapter, &s->intrq, false,
498					 adapter->port[0], 0, NULL, NULL);
499		if (err)
500			goto err_free_queues;
501	}
502
503	/*
504	 * Allocate our ingress queue for asynchronous firmware messages.
505	 */
506	err = t4vf_sge_alloc_rxq(adapter, &s->fw_evtq, true, adapter->port[0],
507				 MSIX_FW, NULL, fwevtq_handler);
508	if (err)
509		goto err_free_queues;
510
511	/*
512	 * Allocate each "port"'s initial Queue Sets.  These can be changed
513	 * later on ... up to the point where any interface on the adapter is
514	 * brought up at which point lots of things get nailed down
515	 * permanently ...
516	 */
517	msix = MSIX_IQFLINT;
518	for_each_port(adapter, pidx) {
519		struct net_device *dev = adapter->port[pidx];
520		struct port_info *pi = netdev_priv(dev);
521		struct sge_eth_rxq *rxq = &s->ethrxq[pi->first_qset];
522		struct sge_eth_txq *txq = &s->ethtxq[pi->first_qset];
523		int qs;
524
525		for (qs = 0; qs < pi->nqsets; qs++, rxq++, txq++) {
526			err = t4vf_sge_alloc_rxq(adapter, &rxq->rspq, false,
527						 dev, msix++,
528						 &rxq->fl, t4vf_ethrx_handler);
529			if (err)
530				goto err_free_queues;
531
532			err = t4vf_sge_alloc_eth_txq(adapter, txq, dev,
533					     netdev_get_tx_queue(dev, qs),
534					     s->fw_evtq.cntxt_id);
535			if (err)
536				goto err_free_queues;
537
538			rxq->rspq.idx = qs;
539			memset(&rxq->stats, 0, sizeof(rxq->stats));
540		}
541	}
542
543	/*
544	 * Create the reverse mappings for the queues.
545	 */
546	s->egr_base = s->ethtxq[0].q.abs_id - s->ethtxq[0].q.cntxt_id;
547	s->ingr_base = s->ethrxq[0].rspq.abs_id - s->ethrxq[0].rspq.cntxt_id;
548	IQ_MAP(s, s->fw_evtq.abs_id) = &s->fw_evtq;
549	for_each_port(adapter, pidx) {
550		struct net_device *dev = adapter->port[pidx];
551		struct port_info *pi = netdev_priv(dev);
552		struct sge_eth_rxq *rxq = &s->ethrxq[pi->first_qset];
553		struct sge_eth_txq *txq = &s->ethtxq[pi->first_qset];
554		int qs;
555
556		for (qs = 0; qs < pi->nqsets; qs++, rxq++, txq++) {
557			IQ_MAP(s, rxq->rspq.abs_id) = &rxq->rspq;
558			EQ_MAP(s, txq->q.abs_id) = &txq->q;
559
560			/*
561			 * The FW_IQ_CMD doesn't return the Absolute Queue IDs
562			 * for Free Lists but since all of the Egress Queues
563			 * (including Free Lists) have Relative Queue IDs
564			 * which are computed as Absolute - Base Queue ID, we
565			 * can synthesize the Absolute Queue IDs for the Free
566			 * Lists.  This is useful for debugging purposes when
567			 * we want to dump Queue Contexts via the PF Driver.
568			 */
569			rxq->fl.abs_id = rxq->fl.cntxt_id + s->egr_base;
570			EQ_MAP(s, rxq->fl.abs_id) = &rxq->fl;
571		}
572	}
573	return 0;
574
575err_free_queues:
576	t4vf_free_sge_resources(adapter);
577	return err;
578}
579
580/*
581 * Set up Receive Side Scaling (RSS) to distribute packets to multiple receive
582 * queues.  We configure the RSS CPU lookup table to distribute to the number
583 * of HW receive queues, and the response queue lookup table to narrow that
584 * down to the response queues actually configured for each "port" (Virtual
585 * Interface).  We always configure the RSS mapping for all ports since the
586 * mapping table has plenty of entries.
587 */
588static int setup_rss(struct adapter *adapter)
589{
590	int pidx;
591
592	for_each_port(adapter, pidx) {
593		struct port_info *pi = adap2pinfo(adapter, pidx);
594		struct sge_eth_rxq *rxq = &adapter->sge.ethrxq[pi->first_qset];
595		u16 rss[MAX_PORT_QSETS];
596		int qs, err;
597
598		for (qs = 0; qs < pi->nqsets; qs++)
599			rss[qs] = rxq[qs].rspq.abs_id;
600
601		err = t4vf_config_rss_range(adapter, pi->viid,
602					    0, pi->rss_size, rss, pi->nqsets);
603		if (err)
604			return err;
605
606		/*
607		 * Perform Global RSS Mode-specific initialization.
608		 */
609		switch (adapter->params.rss.mode) {
610		case FW_RSS_GLB_CONFIG_CMD_MODE_BASICVIRTUAL:
611			/*
612			 * If Tunnel All Lookup isn't specified in the global
613			 * RSS Configuration, then we need to specify a
614			 * default Ingress Queue for any ingress packets which
615			 * aren't hashed.  We'll use our first ingress queue
616			 * ...
617			 */
618			if (!adapter->params.rss.u.basicvirtual.tnlalllookup) {
619				union rss_vi_config config;
620				err = t4vf_read_rss_vi_config(adapter,
621							      pi->viid,
622							      &config);
623				if (err)
624					return err;
625				config.basicvirtual.defaultq =
626					rxq[0].rspq.abs_id;
627				err = t4vf_write_rss_vi_config(adapter,
628							       pi->viid,
629							       &config);
630				if (err)
631					return err;
632			}
633			break;
634		}
635	}
636
637	return 0;
638}
639
640/*
641 * Bring the adapter up.  Called whenever we go from no "ports" open to having
642 * one open.  This function performs the actions necessary to make an adapter
643 * operational, such as completing the initialization of HW modules, and
644 * enabling interrupts.  Must be called with the rtnl lock held.  (Note that
645 * this is called "cxgb_up" in the PF Driver.)
646 */
647static int adapter_up(struct adapter *adapter)
648{
649	int err;
650
651	/*
652	 * If this is the first time we've been called, perform basic
653	 * adapter setup.  Once we've done this, many of our adapter
654	 * parameters can no longer be changed ...
655	 */
656	if ((adapter->flags & FULL_INIT_DONE) == 0) {
657		err = setup_sge_queues(adapter);
658		if (err)
659			return err;
660		err = setup_rss(adapter);
661		if (err) {
662			t4vf_free_sge_resources(adapter);
663			return err;
664		}
665
666		if (adapter->flags & USING_MSIX)
667			name_msix_vecs(adapter);
668		adapter->flags |= FULL_INIT_DONE;
669	}
670
671	/*
672	 * Acquire our interrupt resources.  We only support MSI-X and MSI.
673	 */
674	BUG_ON((adapter->flags & (USING_MSIX|USING_MSI)) == 0);
675	if (adapter->flags & USING_MSIX)
676		err = request_msix_queue_irqs(adapter);
677	else
678		err = request_irq(adapter->pdev->irq,
679				  t4vf_intr_handler(adapter), 0,
680				  adapter->name, adapter);
681	if (err) {
682		dev_err(adapter->pdev_dev, "request_irq failed, err %d\n",
683			err);
684		return err;
685	}
686
687	/*
688	 * Enable NAPI ingress processing and return success.
689	 */
690	enable_rx(adapter);
691	t4vf_sge_start(adapter);
692	return 0;
693}
694
695/*
696 * Bring the adapter down.  Called whenever the last "port" (Virtual
697 * Interface) closed.  (Note that this routine is called "cxgb_down" in the PF
698 * Driver.)
699 */
700static void adapter_down(struct adapter *adapter)
701{
702	/*
703	 * Free interrupt resources.
704	 */
705	if (adapter->flags & USING_MSIX)
706		free_msix_queue_irqs(adapter);
707	else
708		free_irq(adapter->pdev->irq, adapter);
709
710	/*
711	 * Wait for NAPI handlers to finish.
712	 */
713	quiesce_rx(adapter);
714}
715
716/*
717 * Start up a net device.
718 */
719static int cxgb4vf_open(struct net_device *dev)
720{
721	int err;
722	struct port_info *pi = netdev_priv(dev);
723	struct adapter *adapter = pi->adapter;
724
725	/*
726	 * If this is the first interface that we're opening on the "adapter",
727	 * bring the "adapter" up now.
728	 */
729	if (adapter->open_device_map == 0) {
730		err = adapter_up(adapter);
731		if (err)
732			return err;
733	}
734
735	/*
736	 * Note that this interface is up and start everything up ...
737	 */
738	netif_set_real_num_tx_queues(dev, pi->nqsets);
739	err = netif_set_real_num_rx_queues(dev, pi->nqsets);
740	if (err)
741		goto err_unwind;
742	err = link_start(dev);
743	if (err)
744		goto err_unwind;
745
746	netif_tx_start_all_queues(dev);
747	set_bit(pi->port_id, &adapter->open_device_map);
748	return 0;
749
750err_unwind:
751	if (adapter->open_device_map == 0)
752		adapter_down(adapter);
753	return err;
754}
755
756/*
757 * Shut down a net device.  This routine is called "cxgb_close" in the PF
758 * Driver ...
759 */
760static int cxgb4vf_stop(struct net_device *dev)
761{
762	struct port_info *pi = netdev_priv(dev);
763	struct adapter *adapter = pi->adapter;
764
765	netif_tx_stop_all_queues(dev);
766	netif_carrier_off(dev);
767	t4vf_enable_vi(adapter, pi->viid, false, false);
768	pi->link_cfg.link_ok = 0;
769
770	clear_bit(pi->port_id, &adapter->open_device_map);
771	if (adapter->open_device_map == 0)
772		adapter_down(adapter);
773	return 0;
774}
775
776/*
777 * Translate our basic statistics into the standard "ifconfig" statistics.
778 */
779static struct net_device_stats *cxgb4vf_get_stats(struct net_device *dev)
780{
781	struct t4vf_port_stats stats;
782	struct port_info *pi = netdev2pinfo(dev);
783	struct adapter *adapter = pi->adapter;
784	struct net_device_stats *ns = &dev->stats;
785	int err;
786
787	spin_lock(&adapter->stats_lock);
788	err = t4vf_get_port_stats(adapter, pi->pidx, &stats);
789	spin_unlock(&adapter->stats_lock);
790
791	memset(ns, 0, sizeof(*ns));
792	if (err)
793		return ns;
794
795	ns->tx_bytes = (stats.tx_bcast_bytes + stats.tx_mcast_bytes +
796			stats.tx_ucast_bytes + stats.tx_offload_bytes);
797	ns->tx_packets = (stats.tx_bcast_frames + stats.tx_mcast_frames +
798			  stats.tx_ucast_frames + stats.tx_offload_frames);
799	ns->rx_bytes = (stats.rx_bcast_bytes + stats.rx_mcast_bytes +
800			stats.rx_ucast_bytes);
801	ns->rx_packets = (stats.rx_bcast_frames + stats.rx_mcast_frames +
802			  stats.rx_ucast_frames);
803	ns->multicast = stats.rx_mcast_frames;
804	ns->tx_errors = stats.tx_drop_frames;
805	ns->rx_errors = stats.rx_err_frames;
806
807	return ns;
808}
809
810/*
811 * Collect up to maxaddrs worth of a netdevice's unicast addresses, starting
812 * at a specified offset within the list, into an array of addrss pointers and
813 * return the number collected.
814 */
815static inline unsigned int collect_netdev_uc_list_addrs(const struct net_device *dev,
816							const u8 **addr,
817							unsigned int offset,
818							unsigned int maxaddrs)
819{
820	unsigned int index = 0;
821	unsigned int naddr = 0;
822	const struct netdev_hw_addr *ha;
823
824	for_each_dev_addr(dev, ha)
825		if (index++ >= offset) {
826			addr[naddr++] = ha->addr;
827			if (naddr >= maxaddrs)
828				break;
829		}
830	return naddr;
831}
832
833/*
834 * Collect up to maxaddrs worth of a netdevice's multicast addresses, starting
835 * at a specified offset within the list, into an array of addrss pointers and
836 * return the number collected.
837 */
838static inline unsigned int collect_netdev_mc_list_addrs(const struct net_device *dev,
839							const u8 **addr,
840							unsigned int offset,
841							unsigned int maxaddrs)
842{
843	unsigned int index = 0;
844	unsigned int naddr = 0;
845	const struct netdev_hw_addr *ha;
846
847	netdev_for_each_mc_addr(ha, dev)
848		if (index++ >= offset) {
849			addr[naddr++] = ha->addr;
850			if (naddr >= maxaddrs)
851				break;
852		}
853	return naddr;
854}
855
856/*
857 * Configure the exact and hash address filters to handle a port's multicast
858 * and secondary unicast MAC addresses.
859 */
860static int set_addr_filters(const struct net_device *dev, bool sleep)
861{
862	u64 mhash = 0;
863	u64 uhash = 0;
864	bool free = true;
865	unsigned int offset, naddr;
866	const u8 *addr[7];
867	int ret;
868	const struct port_info *pi = netdev_priv(dev);
869
870	/* first do the secondary unicast addresses */
871	for (offset = 0; ; offset += naddr) {
872		naddr = collect_netdev_uc_list_addrs(dev, addr, offset,
873						     ARRAY_SIZE(addr));
874		if (naddr == 0)
875			break;
876
877		ret = t4vf_alloc_mac_filt(pi->adapter, pi->viid, free,
878					  naddr, addr, NULL, &uhash, sleep);
879		if (ret < 0)
880			return ret;
881
882		free = false;
883	}
884
885	/* next set up the multicast addresses */
886	for (offset = 0; ; offset += naddr) {
887		naddr = collect_netdev_mc_list_addrs(dev, addr, offset,
888						     ARRAY_SIZE(addr));
889		if (naddr == 0)
890			break;
891
892		ret = t4vf_alloc_mac_filt(pi->adapter, pi->viid, free,
893					  naddr, addr, NULL, &mhash, sleep);
894		if (ret < 0)
895			return ret;
896		free = false;
897	}
898
899	return t4vf_set_addr_hash(pi->adapter, pi->viid, uhash != 0,
900				  uhash | mhash, sleep);
901}
902
903/*
904 * Set RX properties of a port, such as promiscruity, address filters, and MTU.
905 * If @mtu is -1 it is left unchanged.
906 */
907static int set_rxmode(struct net_device *dev, int mtu, bool sleep_ok)
908{
909	int ret;
910	struct port_info *pi = netdev_priv(dev);
911
912	ret = set_addr_filters(dev, sleep_ok);
913	if (ret == 0)
914		ret = t4vf_set_rxmode(pi->adapter, pi->viid, -1,
915				      (dev->flags & IFF_PROMISC) != 0,
916				      (dev->flags & IFF_ALLMULTI) != 0,
917				      1, -1, sleep_ok);
918	return ret;
919}
920
921/*
922 * Set the current receive modes on the device.
923 */
924static void cxgb4vf_set_rxmode(struct net_device *dev)
925{
926	/* unfortunately we can't return errors to the stack */
927	set_rxmode(dev, -1, false);
928}
929
930/*
931 * Find the entry in the interrupt holdoff timer value array which comes
932 * closest to the specified interrupt holdoff value.
933 */
934static int closest_timer(const struct sge *s, int us)
935{
936	int i, timer_idx = 0, min_delta = INT_MAX;
937
938	for (i = 0; i < ARRAY_SIZE(s->timer_val); i++) {
939		int delta = us - s->timer_val[i];
940		if (delta < 0)
941			delta = -delta;
942		if (delta < min_delta) {
943			min_delta = delta;
944			timer_idx = i;
945		}
946	}
947	return timer_idx;
948}
949
950static int closest_thres(const struct sge *s, int thres)
951{
952	int i, delta, pktcnt_idx = 0, min_delta = INT_MAX;
953
954	for (i = 0; i < ARRAY_SIZE(s->counter_val); i++) {
955		delta = thres - s->counter_val[i];
956		if (delta < 0)
957			delta = -delta;
958		if (delta < min_delta) {
959			min_delta = delta;
960			pktcnt_idx = i;
961		}
962	}
963	return pktcnt_idx;
964}
965
966/*
967 * Return a queue's interrupt hold-off time in us.  0 means no timer.
968 */
969static unsigned int qtimer_val(const struct adapter *adapter,
970			       const struct sge_rspq *rspq)
971{
972	unsigned int timer_idx = QINTR_TIMER_IDX_GET(rspq->intr_params);
973
974	return timer_idx < SGE_NTIMERS
975		? adapter->sge.timer_val[timer_idx]
976		: 0;
977}
978
979/**
980 *	set_rxq_intr_params - set a queue's interrupt holdoff parameters
981 *	@adapter: the adapter
982 *	@rspq: the RX response queue
983 *	@us: the hold-off time in us, or 0 to disable timer
984 *	@cnt: the hold-off packet count, or 0 to disable counter
985 *
986 *	Sets an RX response queue's interrupt hold-off time and packet count.
987 *	At least one of the two needs to be enabled for the queue to generate
988 *	interrupts.
989 */
990static int set_rxq_intr_params(struct adapter *adapter, struct sge_rspq *rspq,
991			       unsigned int us, unsigned int cnt)
992{
993	unsigned int timer_idx;
994
995	/*
996	 * If both the interrupt holdoff timer and count are specified as
997	 * zero, default to a holdoff count of 1 ...
998	 */
999	if ((us | cnt) == 0)
1000		cnt = 1;
1001
1002	/*
1003	 * If an interrupt holdoff count has been specified, then find the
1004	 * closest configured holdoff count and use that.  If the response
1005	 * queue has already been created, then update its queue context
1006	 * parameters ...
1007	 */
1008	if (cnt) {
1009		int err;
1010		u32 v, pktcnt_idx;
1011
1012		pktcnt_idx = closest_thres(&adapter->sge, cnt);
1013		if (rspq->desc && rspq->pktcnt_idx != pktcnt_idx) {
1014			v = FW_PARAMS_MNEM(FW_PARAMS_MNEM_DMAQ) |
1015			    FW_PARAMS_PARAM_X(
1016					FW_PARAMS_PARAM_DMAQ_IQ_INTCNTTHRESH) |
1017			    FW_PARAMS_PARAM_YZ(rspq->cntxt_id);
1018			err = t4vf_set_params(adapter, 1, &v, &pktcnt_idx);
1019			if (err)
1020				return err;
1021		}
1022		rspq->pktcnt_idx = pktcnt_idx;
1023	}
1024
1025	/*
1026	 * Compute the closest holdoff timer index from the supplied holdoff
1027	 * timer value.
1028	 */
1029	timer_idx = (us == 0
1030		     ? SGE_TIMER_RSTRT_CNTR
1031		     : closest_timer(&adapter->sge, us));
1032
1033	/*
1034	 * Update the response queue's interrupt coalescing parameters and
1035	 * return success.
1036	 */
1037	rspq->intr_params = (QINTR_TIMER_IDX(timer_idx) |
1038			     (cnt > 0 ? QINTR_CNT_EN : 0));
1039	return 0;
1040}
1041
1042/*
1043 * Return a version number to identify the type of adapter.  The scheme is:
1044 * - bits 0..9: chip version
1045 * - bits 10..15: chip revision
1046 */
1047static inline unsigned int mk_adap_vers(const struct adapter *adapter)
1048{
1049	/*
1050	 * Chip version 4, revision 0x3f (cxgb4vf).
1051	 */
1052	return 4 | (0x3f << 10);
1053}
1054
1055/*
1056 * Execute the specified ioctl command.
1057 */
1058static int cxgb4vf_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
1059{
1060	int ret = 0;
1061
1062	switch (cmd) {
1063	    /*
1064	     * The VF Driver doesn't have access to any of the other
1065	     * common Ethernet device ioctl()'s (like reading/writing
1066	     * PHY registers, etc.
1067	     */
1068
1069	default:
1070		ret = -EOPNOTSUPP;
1071		break;
1072	}
1073	return ret;
1074}
1075
1076/*
1077 * Change the device's MTU.
1078 */
1079static int cxgb4vf_change_mtu(struct net_device *dev, int new_mtu)
1080{
1081	int ret;
1082	struct port_info *pi = netdev_priv(dev);
1083
1084	/* accommodate SACK */
1085	if (new_mtu < 81)
1086		return -EINVAL;
1087
1088	ret = t4vf_set_rxmode(pi->adapter, pi->viid, new_mtu,
1089			      -1, -1, -1, -1, true);
1090	if (!ret)
1091		dev->mtu = new_mtu;
1092	return ret;
1093}
1094
1095static u32 cxgb4vf_fix_features(struct net_device *dev, u32 features)
1096{
1097	/*
1098	 * Since there is no support for separate rx/tx vlan accel
1099	 * enable/disable make sure tx flag is always in same state as rx.
1100	 */
1101	if (features & NETIF_F_HW_VLAN_RX)
1102		features |= NETIF_F_HW_VLAN_TX;
1103	else
1104		features &= ~NETIF_F_HW_VLAN_TX;
1105
1106	return features;
1107}
1108
1109static int cxgb4vf_set_features(struct net_device *dev, u32 features)
1110{
1111	struct port_info *pi = netdev_priv(dev);
1112	u32 changed = dev->features ^ features;
1113
1114	if (changed & NETIF_F_HW_VLAN_RX)
1115		t4vf_set_rxmode(pi->adapter, pi->viid, -1, -1, -1, -1,
1116				features & NETIF_F_HW_VLAN_TX, 0);
1117
1118	return 0;
1119}
1120
1121/*
1122 * Change the devices MAC address.
1123 */
1124static int cxgb4vf_set_mac_addr(struct net_device *dev, void *_addr)
1125{
1126	int ret;
1127	struct sockaddr *addr = _addr;
1128	struct port_info *pi = netdev_priv(dev);
1129
1130	if (!is_valid_ether_addr(addr->sa_data))
1131		return -EINVAL;
1132
1133	ret = t4vf_change_mac(pi->adapter, pi->viid, pi->xact_addr_filt,
1134			      addr->sa_data, true);
1135	if (ret < 0)
1136		return ret;
1137
1138	memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
1139	pi->xact_addr_filt = ret;
1140	return 0;
1141}
1142
1143#ifdef CONFIG_NET_POLL_CONTROLLER
1144/*
1145 * Poll all of our receive queues.  This is called outside of normal interrupt
1146 * context.
1147 */
1148static void cxgb4vf_poll_controller(struct net_device *dev)
1149{
1150	struct port_info *pi = netdev_priv(dev);
1151	struct adapter *adapter = pi->adapter;
1152
1153	if (adapter->flags & USING_MSIX) {
1154		struct sge_eth_rxq *rxq;
1155		int nqsets;
1156
1157		rxq = &adapter->sge.ethrxq[pi->first_qset];
1158		for (nqsets = pi->nqsets; nqsets; nqsets--) {
1159			t4vf_sge_intr_msix(0, &rxq->rspq);
1160			rxq++;
1161		}
1162	} else
1163		t4vf_intr_handler(adapter)(0, adapter);
1164}
1165#endif
1166
1167/*
1168 * Ethtool operations.
1169 * ===================
1170 *
1171 * Note that we don't support any ethtool operations which change the physical
1172 * state of the port to which we're linked.
1173 */
1174
1175/*
1176 * Return current port link settings.
1177 */
1178static int cxgb4vf_get_settings(struct net_device *dev,
1179				struct ethtool_cmd *cmd)
1180{
1181	const struct port_info *pi = netdev_priv(dev);
1182
1183	cmd->supported = pi->link_cfg.supported;
1184	cmd->advertising = pi->link_cfg.advertising;
1185	ethtool_cmd_speed_set(cmd,
1186			      netif_carrier_ok(dev) ? pi->link_cfg.speed : -1);
1187	cmd->duplex = DUPLEX_FULL;
1188
1189	cmd->port = (cmd->supported & SUPPORTED_TP) ? PORT_TP : PORT_FIBRE;
1190	cmd->phy_address = pi->port_id;
1191	cmd->transceiver = XCVR_EXTERNAL;
1192	cmd->autoneg = pi->link_cfg.autoneg;
1193	cmd->maxtxpkt = 0;
1194	cmd->maxrxpkt = 0;
1195	return 0;
1196}
1197
1198/*
1199 * Return our driver information.
1200 */
1201static void cxgb4vf_get_drvinfo(struct net_device *dev,
1202				struct ethtool_drvinfo *drvinfo)
1203{
1204	struct adapter *adapter = netdev2adap(dev);
1205
1206	strcpy(drvinfo->driver, KBUILD_MODNAME);
1207	strcpy(drvinfo->version, DRV_VERSION);
1208	strcpy(drvinfo->bus_info, pci_name(to_pci_dev(dev->dev.parent)));
1209	snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
1210		 "%u.%u.%u.%u, TP %u.%u.%u.%u",
1211		 FW_HDR_FW_VER_MAJOR_GET(adapter->params.dev.fwrev),
1212		 FW_HDR_FW_VER_MINOR_GET(adapter->params.dev.fwrev),
1213		 FW_HDR_FW_VER_MICRO_GET(adapter->params.dev.fwrev),
1214		 FW_HDR_FW_VER_BUILD_GET(adapter->params.dev.fwrev),
1215		 FW_HDR_FW_VER_MAJOR_GET(adapter->params.dev.tprev),
1216		 FW_HDR_FW_VER_MINOR_GET(adapter->params.dev.tprev),
1217		 FW_HDR_FW_VER_MICRO_GET(adapter->params.dev.tprev),
1218		 FW_HDR_FW_VER_BUILD_GET(adapter->params.dev.tprev));
1219}
1220
1221/*
1222 * Return current adapter message level.
1223 */
1224static u32 cxgb4vf_get_msglevel(struct net_device *dev)
1225{
1226	return netdev2adap(dev)->msg_enable;
1227}
1228
1229/*
1230 * Set current adapter message level.
1231 */
1232static void cxgb4vf_set_msglevel(struct net_device *dev, u32 msglevel)
1233{
1234	netdev2adap(dev)->msg_enable = msglevel;
1235}
1236
1237/*
1238 * Return the device's current Queue Set ring size parameters along with the
1239 * allowed maximum values.  Since ethtool doesn't understand the concept of
1240 * multi-queue devices, we just return the current values associated with the
1241 * first Queue Set.
1242 */
1243static void cxgb4vf_get_ringparam(struct net_device *dev,
1244				  struct ethtool_ringparam *rp)
1245{
1246	const struct port_info *pi = netdev_priv(dev);
1247	const struct sge *s = &pi->adapter->sge;
1248
1249	rp->rx_max_pending = MAX_RX_BUFFERS;
1250	rp->rx_mini_max_pending = MAX_RSPQ_ENTRIES;
1251	rp->rx_jumbo_max_pending = 0;
1252	rp->tx_max_pending = MAX_TXQ_ENTRIES;
1253
1254	rp->rx_pending = s->ethrxq[pi->first_qset].fl.size - MIN_FL_RESID;
1255	rp->rx_mini_pending = s->ethrxq[pi->first_qset].rspq.size;
1256	rp->rx_jumbo_pending = 0;
1257	rp->tx_pending = s->ethtxq[pi->first_qset].q.size;
1258}
1259
1260/*
1261 * Set the Queue Set ring size parameters for the device.  Again, since
1262 * ethtool doesn't allow for the concept of multiple queues per device, we'll
1263 * apply these new values across all of the Queue Sets associated with the
1264 * device -- after vetting them of course!
1265 */
1266static int cxgb4vf_set_ringparam(struct net_device *dev,
1267				 struct ethtool_ringparam *rp)
1268{
1269	const struct port_info *pi = netdev_priv(dev);
1270	struct adapter *adapter = pi->adapter;
1271	struct sge *s = &adapter->sge;
1272	int qs;
1273
1274	if (rp->rx_pending > MAX_RX_BUFFERS ||
1275	    rp->rx_jumbo_pending ||
1276	    rp->tx_pending > MAX_TXQ_ENTRIES ||
1277	    rp->rx_mini_pending > MAX_RSPQ_ENTRIES ||
1278	    rp->rx_mini_pending < MIN_RSPQ_ENTRIES ||
1279	    rp->rx_pending < MIN_FL_ENTRIES ||
1280	    rp->tx_pending < MIN_TXQ_ENTRIES)
1281		return -EINVAL;
1282
1283	if (adapter->flags & FULL_INIT_DONE)
1284		return -EBUSY;
1285
1286	for (qs = pi->first_qset; qs < pi->first_qset + pi->nqsets; qs++) {
1287		s->ethrxq[qs].fl.size = rp->rx_pending + MIN_FL_RESID;
1288		s->ethrxq[qs].rspq.size = rp->rx_mini_pending;
1289		s->ethtxq[qs].q.size = rp->tx_pending;
1290	}
1291	return 0;
1292}
1293
1294/*
1295 * Return the interrupt holdoff timer and count for the first Queue Set on the
1296 * device.  Our extension ioctl() (the cxgbtool interface) allows the
1297 * interrupt holdoff timer to be read on all of the device's Queue Sets.
1298 */
1299static int cxgb4vf_get_coalesce(struct net_device *dev,
1300				struct ethtool_coalesce *coalesce)
1301{
1302	const struct port_info *pi = netdev_priv(dev);
1303	const struct adapter *adapter = pi->adapter;
1304	const struct sge_rspq *rspq = &adapter->sge.ethrxq[pi->first_qset].rspq;
1305
1306	coalesce->rx_coalesce_usecs = qtimer_val(adapter, rspq);
1307	coalesce->rx_max_coalesced_frames =
1308		((rspq->intr_params & QINTR_CNT_EN)
1309		 ? adapter->sge.counter_val[rspq->pktcnt_idx]
1310		 : 0);
1311	return 0;
1312}
1313
1314/*
1315 * Set the RX interrupt holdoff timer and count for the first Queue Set on the
1316 * interface.  Our extension ioctl() (the cxgbtool interface) allows us to set
1317 * the interrupt holdoff timer on any of the device's Queue Sets.
1318 */
1319static int cxgb4vf_set_coalesce(struct net_device *dev,
1320				struct ethtool_coalesce *coalesce)
1321{
1322	const struct port_info *pi = netdev_priv(dev);
1323	struct adapter *adapter = pi->adapter;
1324
1325	return set_rxq_intr_params(adapter,
1326				   &adapter->sge.ethrxq[pi->first_qset].rspq,
1327				   coalesce->rx_coalesce_usecs,
1328				   coalesce->rx_max_coalesced_frames);
1329}
1330
1331/*
1332 * Report current port link pause parameter settings.
1333 */
1334static void cxgb4vf_get_pauseparam(struct net_device *dev,
1335				   struct ethtool_pauseparam *pauseparam)
1336{
1337	struct port_info *pi = netdev_priv(dev);
1338
1339	pauseparam->autoneg = (pi->link_cfg.requested_fc & PAUSE_AUTONEG) != 0;
1340	pauseparam->rx_pause = (pi->link_cfg.fc & PAUSE_RX) != 0;
1341	pauseparam->tx_pause = (pi->link_cfg.fc & PAUSE_TX) != 0;
1342}
1343
1344/*
1345 * Identify the port by blinking the port's LED.
1346 */
1347static int cxgb4vf_phys_id(struct net_device *dev,
1348			   enum ethtool_phys_id_state state)
1349{
1350	unsigned int val;
1351	struct port_info *pi = netdev_priv(dev);
1352
1353	if (state == ETHTOOL_ID_ACTIVE)
1354		val = 0xffff;
1355	else if (state == ETHTOOL_ID_INACTIVE)
1356		val = 0;
1357	else
1358		return -EINVAL;
1359
1360	return t4vf_identify_port(pi->adapter, pi->viid, val);
1361}
1362
1363/*
1364 * Port stats maintained per queue of the port.
1365 */
1366struct queue_port_stats {
1367	u64 tso;
1368	u64 tx_csum;
1369	u64 rx_csum;
1370	u64 vlan_ex;
1371	u64 vlan_ins;
1372	u64 lro_pkts;
1373	u64 lro_merged;
1374};
1375
1376/*
1377 * Strings for the ETH_SS_STATS statistics set ("ethtool -S").  Note that
1378 * these need to match the order of statistics returned by
1379 * t4vf_get_port_stats().
1380 */
1381static const char stats_strings[][ETH_GSTRING_LEN] = {
1382	/*
1383	 * These must match the layout of the t4vf_port_stats structure.
1384	 */
1385	"TxBroadcastBytes  ",
1386	"TxBroadcastFrames ",
1387	"TxMulticastBytes  ",
1388	"TxMulticastFrames ",
1389	"TxUnicastBytes    ",
1390	"TxUnicastFrames   ",
1391	"TxDroppedFrames   ",
1392	"TxOffloadBytes    ",
1393	"TxOffloadFrames   ",
1394	"RxBroadcastBytes  ",
1395	"RxBroadcastFrames ",
1396	"RxMulticastBytes  ",
1397	"RxMulticastFrames ",
1398	"RxUnicastBytes    ",
1399	"RxUnicastFrames   ",
1400	"RxErrorFrames     ",
1401
1402	/*
1403	 * These are accumulated per-queue statistics and must match the
1404	 * order of the fields in the queue_port_stats structure.
1405	 */
1406	"TSO               ",
1407	"TxCsumOffload     ",
1408	"RxCsumGood        ",
1409	"VLANextractions   ",
1410	"VLANinsertions    ",
1411	"GROPackets        ",
1412	"GROMerged         ",
1413};
1414
1415/*
1416 * Return the number of statistics in the specified statistics set.
1417 */
1418static int cxgb4vf_get_sset_count(struct net_device *dev, int sset)
1419{
1420	switch (sset) {
1421	case ETH_SS_STATS:
1422		return ARRAY_SIZE(stats_strings);
1423	default:
1424		return -EOPNOTSUPP;
1425	}
1426	/*NOTREACHED*/
1427}
1428
1429/*
1430 * Return the strings for the specified statistics set.
1431 */
1432static void cxgb4vf_get_strings(struct net_device *dev,
1433				u32 sset,
1434				u8 *data)
1435{
1436	switch (sset) {
1437	case ETH_SS_STATS:
1438		memcpy(data, stats_strings, sizeof(stats_strings));
1439		break;
1440	}
1441}
1442
1443/*
1444 * Small utility routine to accumulate queue statistics across the queues of
1445 * a "port".
1446 */
1447static void collect_sge_port_stats(const struct adapter *adapter,
1448				   const struct port_info *pi,
1449				   struct queue_port_stats *stats)
1450{
1451	const struct sge_eth_txq *txq = &adapter->sge.ethtxq[pi->first_qset];
1452	const struct sge_eth_rxq *rxq = &adapter->sge.ethrxq[pi->first_qset];
1453	int qs;
1454
1455	memset(stats, 0, sizeof(*stats));
1456	for (qs = 0; qs < pi->nqsets; qs++, rxq++, txq++) {
1457		stats->tso += txq->tso;
1458		stats->tx_csum += txq->tx_cso;
1459		stats->rx_csum += rxq->stats.rx_cso;
1460		stats->vlan_ex += rxq->stats.vlan_ex;
1461		stats->vlan_ins += txq->vlan_ins;
1462		stats->lro_pkts += rxq->stats.lro_pkts;
1463		stats->lro_merged += rxq->stats.lro_merged;
1464	}
1465}
1466
1467/*
1468 * Return the ETH_SS_STATS statistics set.
1469 */
1470static void cxgb4vf_get_ethtool_stats(struct net_device *dev,
1471				      struct ethtool_stats *stats,
1472				      u64 *data)
1473{
1474	struct port_info *pi = netdev2pinfo(dev);
1475	struct adapter *adapter = pi->adapter;
1476	int err = t4vf_get_port_stats(adapter, pi->pidx,
1477				      (struct t4vf_port_stats *)data);
1478	if (err)
1479		memset(data, 0, sizeof(struct t4vf_port_stats));
1480
1481	data += sizeof(struct t4vf_port_stats) / sizeof(u64);
1482	collect_sge_port_stats(adapter, pi, (struct queue_port_stats *)data);
1483}
1484
1485/*
1486 * Return the size of our register map.
1487 */
1488static int cxgb4vf_get_regs_len(struct net_device *dev)
1489{
1490	return T4VF_REGMAP_SIZE;
1491}
1492
1493/*
1494 * Dump a block of registers, start to end inclusive, into a buffer.
1495 */
1496static void reg_block_dump(struct adapter *adapter, void *regbuf,
1497			   unsigned int start, unsigned int end)
1498{
1499	u32 *bp = regbuf + start - T4VF_REGMAP_START;
1500
1501	for ( ; start <= end; start += sizeof(u32)) {
1502		/*
1503		 * Avoid reading the Mailbox Control register since that
1504		 * can trigger a Mailbox Ownership Arbitration cycle and
1505		 * interfere with communication with the firmware.
1506		 */
1507		if (start == T4VF_CIM_BASE_ADDR + CIM_VF_EXT_MAILBOX_CTRL)
1508			*bp++ = 0xffff;
1509		else
1510			*bp++ = t4_read_reg(adapter, start);
1511	}
1512}
1513
1514/*
1515 * Copy our entire register map into the provided buffer.
1516 */
1517static void cxgb4vf_get_regs(struct net_device *dev,
1518			     struct ethtool_regs *regs,
1519			     void *regbuf)
1520{
1521	struct adapter *adapter = netdev2adap(dev);
1522
1523	regs->version = mk_adap_vers(adapter);
1524
1525	/*
1526	 * Fill in register buffer with our register map.
1527	 */
1528	memset(regbuf, 0, T4VF_REGMAP_SIZE);
1529
1530	reg_block_dump(adapter, regbuf,
1531		       T4VF_SGE_BASE_ADDR + T4VF_MOD_MAP_SGE_FIRST,
1532		       T4VF_SGE_BASE_ADDR + T4VF_MOD_MAP_SGE_LAST);
1533	reg_block_dump(adapter, regbuf,
1534		       T4VF_MPS_BASE_ADDR + T4VF_MOD_MAP_MPS_FIRST,
1535		       T4VF_MPS_BASE_ADDR + T4VF_MOD_MAP_MPS_LAST);
1536	reg_block_dump(adapter, regbuf,
1537		       T4VF_PL_BASE_ADDR + T4VF_MOD_MAP_PL_FIRST,
1538		       T4VF_PL_BASE_ADDR + T4VF_MOD_MAP_PL_LAST);
1539	reg_block_dump(adapter, regbuf,
1540		       T4VF_CIM_BASE_ADDR + T4VF_MOD_MAP_CIM_FIRST,
1541		       T4VF_CIM_BASE_ADDR + T4VF_MOD_MAP_CIM_LAST);
1542
1543	reg_block_dump(adapter, regbuf,
1544		       T4VF_MBDATA_BASE_ADDR + T4VF_MBDATA_FIRST,
1545		       T4VF_MBDATA_BASE_ADDR + T4VF_MBDATA_LAST);
1546}
1547
1548/*
1549 * Report current Wake On LAN settings.
1550 */
1551static void cxgb4vf_get_wol(struct net_device *dev,
1552			    struct ethtool_wolinfo *wol)
1553{
1554	wol->supported = 0;
1555	wol->wolopts = 0;
1556	memset(&wol->sopass, 0, sizeof(wol->sopass));
1557}
1558
1559/*
1560 * TCP Segmentation Offload flags which we support.
1561 */
1562#define TSO_FLAGS (NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_TSO_ECN)
1563
1564static struct ethtool_ops cxgb4vf_ethtool_ops = {
1565	.get_settings		= cxgb4vf_get_settings,
1566	.get_drvinfo		= cxgb4vf_get_drvinfo,
1567	.get_msglevel		= cxgb4vf_get_msglevel,
1568	.set_msglevel		= cxgb4vf_set_msglevel,
1569	.get_ringparam		= cxgb4vf_get_ringparam,
1570	.set_ringparam		= cxgb4vf_set_ringparam,
1571	.get_coalesce		= cxgb4vf_get_coalesce,
1572	.set_coalesce		= cxgb4vf_set_coalesce,
1573	.get_pauseparam		= cxgb4vf_get_pauseparam,
1574	.get_link		= ethtool_op_get_link,
1575	.get_strings		= cxgb4vf_get_strings,
1576	.set_phys_id		= cxgb4vf_phys_id,
1577	.get_sset_count		= cxgb4vf_get_sset_count,
1578	.get_ethtool_stats	= cxgb4vf_get_ethtool_stats,
1579	.get_regs_len		= cxgb4vf_get_regs_len,
1580	.get_regs		= cxgb4vf_get_regs,
1581	.get_wol		= cxgb4vf_get_wol,
1582};
1583
1584/*
1585 * /sys/kernel/debug/cxgb4vf support code and data.
1586 * ================================================
1587 */
1588
1589/*
1590 * Show SGE Queue Set information.  We display QPL Queues Sets per line.
1591 */
1592#define QPL	4
1593
1594static int sge_qinfo_show(struct seq_file *seq, void *v)
1595{
1596	struct adapter *adapter = seq->private;
1597	int eth_entries = DIV_ROUND_UP(adapter->sge.ethqsets, QPL);
1598	int qs, r = (uintptr_t)v - 1;
1599
1600	if (r)
1601		seq_putc(seq, '\n');
1602
1603	#define S3(fmt_spec, s, v) \
1604		do {\
1605			seq_printf(seq, "%-12s", s); \
1606			for (qs = 0; qs < n; ++qs) \
1607				seq_printf(seq, " %16" fmt_spec, v); \
1608			seq_putc(seq, '\n'); \
1609		} while (0)
1610	#define S(s, v)		S3("s", s, v)
1611	#define T(s, v)		S3("u", s, txq[qs].v)
1612	#define R(s, v)		S3("u", s, rxq[qs].v)
1613
1614	if (r < eth_entries) {
1615		const struct sge_eth_rxq *rxq = &adapter->sge.ethrxq[r * QPL];
1616		const struct sge_eth_txq *txq = &adapter->sge.ethtxq[r * QPL];
1617		int n = min(QPL, adapter->sge.ethqsets - QPL * r);
1618
1619		S("QType:", "Ethernet");
1620		S("Interface:",
1621		  (rxq[qs].rspq.netdev
1622		   ? rxq[qs].rspq.netdev->name
1623		   : "N/A"));
1624		S3("d", "Port:",
1625		   (rxq[qs].rspq.netdev
1626		    ? ((struct port_info *)
1627		       netdev_priv(rxq[qs].rspq.netdev))->port_id
1628		    : -1));
1629		T("TxQ ID:", q.abs_id);
1630		T("TxQ size:", q.size);
1631		T("TxQ inuse:", q.in_use);
1632		T("TxQ PIdx:", q.pidx);
1633		T("TxQ CIdx:", q.cidx);
1634		R("RspQ ID:", rspq.abs_id);
1635		R("RspQ size:", rspq.size);
1636		R("RspQE size:", rspq.iqe_len);
1637		S3("u", "Intr delay:", qtimer_val(adapter, &rxq[qs].rspq));
1638		S3("u", "Intr pktcnt:",
1639		   adapter->sge.counter_val[rxq[qs].rspq.pktcnt_idx]);
1640		R("RspQ CIdx:", rspq.cidx);
1641		R("RspQ Gen:", rspq.gen);
1642		R("FL ID:", fl.abs_id);
1643		R("FL size:", fl.size - MIN_FL_RESID);
1644		R("FL avail:", fl.avail);
1645		R("FL PIdx:", fl.pidx);
1646		R("FL CIdx:", fl.cidx);
1647		return 0;
1648	}
1649
1650	r -= eth_entries;
1651	if (r == 0) {
1652		const struct sge_rspq *evtq = &adapter->sge.fw_evtq;
1653
1654		seq_printf(seq, "%-12s %16s\n", "QType:", "FW event queue");
1655		seq_printf(seq, "%-12s %16u\n", "RspQ ID:", evtq->abs_id);
1656		seq_printf(seq, "%-12s %16u\n", "Intr delay:",
1657			   qtimer_val(adapter, evtq));
1658		seq_printf(seq, "%-12s %16u\n", "Intr pktcnt:",
1659			   adapter->sge.counter_val[evtq->pktcnt_idx]);
1660		seq_printf(seq, "%-12s %16u\n", "RspQ Cidx:", evtq->cidx);
1661		seq_printf(seq, "%-12s %16u\n", "RspQ Gen:", evtq->gen);
1662	} else if (r == 1) {
1663		const struct sge_rspq *intrq = &adapter->sge.intrq;
1664
1665		seq_printf(seq, "%-12s %16s\n", "QType:", "Interrupt Queue");
1666		seq_printf(seq, "%-12s %16u\n", "RspQ ID:", intrq->abs_id);
1667		seq_printf(seq, "%-12s %16u\n", "Intr delay:",
1668			   qtimer_val(adapter, intrq));
1669		seq_printf(seq, "%-12s %16u\n", "Intr pktcnt:",
1670			   adapter->sge.counter_val[intrq->pktcnt_idx]);
1671		seq_printf(seq, "%-12s %16u\n", "RspQ Cidx:", intrq->cidx);
1672		seq_printf(seq, "%-12s %16u\n", "RspQ Gen:", intrq->gen);
1673	}
1674
1675	#undef R
1676	#undef T
1677	#undef S
1678	#undef S3
1679
1680	return 0;
1681}
1682
1683/*
1684 * Return the number of "entries" in our "file".  We group the multi-Queue
1685 * sections with QPL Queue Sets per "entry".  The sections of the output are:
1686 *
1687 *     Ethernet RX/TX Queue Sets
1688 *     Firmware Event Queue
1689 *     Forwarded Interrupt Queue (if in MSI mode)
1690 */
1691static int sge_queue_entries(const struct adapter *adapter)
1692{
1693	return DIV_ROUND_UP(adapter->sge.ethqsets, QPL) + 1 +
1694		((adapter->flags & USING_MSI) != 0);
1695}
1696
1697static void *sge_queue_start(struct seq_file *seq, loff_t *pos)
1698{
1699	int entries = sge_queue_entries(seq->private);
1700
1701	return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL;
1702}
1703
1704static void sge_queue_stop(struct seq_file *seq, void *v)
1705{
1706}
1707
1708static void *sge_queue_next(struct seq_file *seq, void *v, loff_t *pos)
1709{
1710	int entries = sge_queue_entries(seq->private);
1711
1712	++*pos;
1713	return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL;
1714}
1715
1716static const struct seq_operations sge_qinfo_seq_ops = {
1717	.start = sge_queue_start,
1718	.next  = sge_queue_next,
1719	.stop  = sge_queue_stop,
1720	.show  = sge_qinfo_show
1721};
1722
1723static int sge_qinfo_open(struct inode *inode, struct file *file)
1724{
1725	int res = seq_open(file, &sge_qinfo_seq_ops);
1726
1727	if (!res) {
1728		struct seq_file *seq = file->private_data;
1729		seq->private = inode->i_private;
1730	}
1731	return res;
1732}
1733
1734static const struct file_operations sge_qinfo_debugfs_fops = {
1735	.owner   = THIS_MODULE,
1736	.open    = sge_qinfo_open,
1737	.read    = seq_read,
1738	.llseek  = seq_lseek,
1739	.release = seq_release,
1740};
1741
1742/*
1743 * Show SGE Queue Set statistics.  We display QPL Queues Sets per line.
1744 */
1745#define QPL	4
1746
1747static int sge_qstats_show(struct seq_file *seq, void *v)
1748{
1749	struct adapter *adapter = seq->private;
1750	int eth_entries = DIV_ROUND_UP(adapter->sge.ethqsets, QPL);
1751	int qs, r = (uintptr_t)v - 1;
1752
1753	if (r)
1754		seq_putc(seq, '\n');
1755
1756	#define S3(fmt, s, v) \
1757		do { \
1758			seq_printf(seq, "%-16s", s); \
1759			for (qs = 0; qs < n; ++qs) \
1760				seq_printf(seq, " %8" fmt, v); \
1761			seq_putc(seq, '\n'); \
1762		} while (0)
1763	#define S(s, v)		S3("s", s, v)
1764
1765	#define T3(fmt, s, v)	S3(fmt, s, txq[qs].v)
1766	#define T(s, v)		T3("lu", s, v)
1767
1768	#define R3(fmt, s, v)	S3(fmt, s, rxq[qs].v)
1769	#define R(s, v)		R3("lu", s, v)
1770
1771	if (r < eth_entries) {
1772		const struct sge_eth_rxq *rxq = &adapter->sge.ethrxq[r * QPL];
1773		const struct sge_eth_txq *txq = &adapter->sge.ethtxq[r * QPL];
1774		int n = min(QPL, adapter->sge.ethqsets - QPL * r);
1775
1776		S("QType:", "Ethernet");
1777		S("Interface:",
1778		  (rxq[qs].rspq.netdev
1779		   ? rxq[qs].rspq.netdev->name
1780		   : "N/A"));
1781		R3("u", "RspQNullInts:", rspq.unhandled_irqs);
1782		R("RxPackets:", stats.pkts);
1783		R("RxCSO:", stats.rx_cso);
1784		R("VLANxtract:", stats.vlan_ex);
1785		R("LROmerged:", stats.lro_merged);
1786		R("LROpackets:", stats.lro_pkts);
1787		R("RxDrops:", stats.rx_drops);
1788		T("TSO:", tso);
1789		T("TxCSO:", tx_cso);
1790		T("VLANins:", vlan_ins);
1791		T("TxQFull:", q.stops);
1792		T("TxQRestarts:", q.restarts);
1793		T("TxMapErr:", mapping_err);
1794		R("FLAllocErr:", fl.alloc_failed);
1795		R("FLLrgAlcErr:", fl.large_alloc_failed);
1796		R("FLStarving:", fl.starving);
1797		return 0;
1798	}
1799
1800	r -= eth_entries;
1801	if (r == 0) {
1802		const struct sge_rspq *evtq = &adapter->sge.fw_evtq;
1803
1804		seq_printf(seq, "%-8s %16s\n", "QType:", "FW event queue");
1805		seq_printf(seq, "%-16s %8u\n", "RspQNullInts:",
1806			   evtq->unhandled_irqs);
1807		seq_printf(seq, "%-16s %8u\n", "RspQ CIdx:", evtq->cidx);
1808		seq_printf(seq, "%-16s %8u\n", "RspQ Gen:", evtq->gen);
1809	} else if (r == 1) {
1810		const struct sge_rspq *intrq = &adapter->sge.intrq;
1811
1812		seq_printf(seq, "%-8s %16s\n", "QType:", "Interrupt Queue");
1813		seq_printf(seq, "%-16s %8u\n", "RspQNullInts:",
1814			   intrq->unhandled_irqs);
1815		seq_printf(seq, "%-16s %8u\n", "RspQ CIdx:", intrq->cidx);
1816		seq_printf(seq, "%-16s %8u\n", "RspQ Gen:", intrq->gen);
1817	}
1818
1819	#undef R
1820	#undef T
1821	#undef S
1822	#undef R3
1823	#undef T3
1824	#undef S3
1825
1826	return 0;
1827}
1828
1829/*
1830 * Return the number of "entries" in our "file".  We group the multi-Queue
1831 * sections with QPL Queue Sets per "entry".  The sections of the output are:
1832 *
1833 *     Ethernet RX/TX Queue Sets
1834 *     Firmware Event Queue
1835 *     Forwarded Interrupt Queue (if in MSI mode)
1836 */
1837static int sge_qstats_entries(const struct adapter *adapter)
1838{
1839	return DIV_ROUND_UP(adapter->sge.ethqsets, QPL) + 1 +
1840		((adapter->flags & USING_MSI) != 0);
1841}
1842
1843static void *sge_qstats_start(struct seq_file *seq, loff_t *pos)
1844{
1845	int entries = sge_qstats_entries(seq->private);
1846
1847	return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL;
1848}
1849
1850static void sge_qstats_stop(struct seq_file *seq, void *v)
1851{
1852}
1853
1854static void *sge_qstats_next(struct seq_file *seq, void *v, loff_t *pos)
1855{
1856	int entries = sge_qstats_entries(seq->private);
1857
1858	(*pos)++;
1859	return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL;
1860}
1861
1862static const struct seq_operations sge_qstats_seq_ops = {
1863	.start = sge_qstats_start,
1864	.next  = sge_qstats_next,
1865	.stop  = sge_qstats_stop,
1866	.show  = sge_qstats_show
1867};
1868
1869static int sge_qstats_open(struct inode *inode, struct file *file)
1870{
1871	int res = seq_open(file, &sge_qstats_seq_ops);
1872
1873	if (res == 0) {
1874		struct seq_file *seq = file->private_data;
1875		seq->private = inode->i_private;
1876	}
1877	return res;
1878}
1879
1880static const struct file_operations sge_qstats_proc_fops = {
1881	.owner   = THIS_MODULE,
1882	.open    = sge_qstats_open,
1883	.read    = seq_read,
1884	.llseek  = seq_lseek,
1885	.release = seq_release,
1886};
1887
1888/*
1889 * Show PCI-E SR-IOV Virtual Function Resource Limits.
1890 */
1891static int resources_show(struct seq_file *seq, void *v)
1892{
1893	struct adapter *adapter = seq->private;
1894	struct vf_resources *vfres = &adapter->params.vfres;
1895
1896	#define S(desc, fmt, var) \
1897		seq_printf(seq, "%-60s " fmt "\n", \
1898			   desc " (" #var "):", vfres->var)
1899
1900	S("Virtual Interfaces", "%d", nvi);
1901	S("Egress Queues", "%d", neq);
1902	S("Ethernet Control", "%d", nethctrl);
1903	S("Ingress Queues/w Free Lists/Interrupts", "%d", niqflint);
1904	S("Ingress Queues", "%d", niq);
1905	S("Traffic Class", "%d", tc);
1906	S("Port Access Rights Mask", "%#x", pmask);
1907	S("MAC Address Filters", "%d", nexactf);
1908	S("Firmware Command Read Capabilities", "%#x", r_caps);
1909	S("Firmware Command Write/Execute Capabilities", "%#x", wx_caps);
1910
1911	#undef S
1912
1913	return 0;
1914}
1915
1916static int resources_open(struct inode *inode, struct file *file)
1917{
1918	return single_open(file, resources_show, inode->i_private);
1919}
1920
1921static const struct file_operations resources_proc_fops = {
1922	.owner   = THIS_MODULE,
1923	.open    = resources_open,
1924	.read    = seq_read,
1925	.llseek  = seq_lseek,
1926	.release = single_release,
1927};
1928
1929/*
1930 * Show Virtual Interfaces.
1931 */
1932static int interfaces_show(struct seq_file *seq, void *v)
1933{
1934	if (v == SEQ_START_TOKEN) {
1935		seq_puts(seq, "Interface  Port   VIID\n");
1936	} else {
1937		struct adapter *adapter = seq->private;
1938		int pidx = (uintptr_t)v - 2;
1939		struct net_device *dev = adapter->port[pidx];
1940		struct port_info *pi = netdev_priv(dev);
1941
1942		seq_printf(seq, "%9s  %4d  %#5x\n",
1943			   dev->name, pi->port_id, pi->viid);
1944	}
1945	return 0;
1946}
1947
1948static inline void *interfaces_get_idx(struct adapter *adapter, loff_t pos)
1949{
1950	return pos <= adapter->params.nports
1951		? (void *)(uintptr_t)(pos + 1)
1952		: NULL;
1953}
1954
1955static void *interfaces_start(struct seq_file *seq, loff_t *pos)
1956{
1957	return *pos
1958		? interfaces_get_idx(seq->private, *pos)
1959		: SEQ_START_TOKEN;
1960}
1961
1962static void *interfaces_next(struct seq_file *seq, void *v, loff_t *pos)
1963{
1964	(*pos)++;
1965	return interfaces_get_idx(seq->private, *pos);
1966}
1967
1968static void interfaces_stop(struct seq_file *seq, void *v)
1969{
1970}
1971
1972static const struct seq_operations interfaces_seq_ops = {
1973	.start = interfaces_start,
1974	.next  = interfaces_next,
1975	.stop  = interfaces_stop,
1976	.show  = interfaces_show
1977};
1978
1979static int interfaces_open(struct inode *inode, struct file *file)
1980{
1981	int res = seq_open(file, &interfaces_seq_ops);
1982
1983	if (res == 0) {
1984		struct seq_file *seq = file->private_data;
1985		seq->private = inode->i_private;
1986	}
1987	return res;
1988}
1989
1990static const struct file_operations interfaces_proc_fops = {
1991	.owner   = THIS_MODULE,
1992	.open    = interfaces_open,
1993	.read    = seq_read,
1994	.llseek  = seq_lseek,
1995	.release = seq_release,
1996};
1997
1998/*
1999 * /sys/kernel/debugfs/cxgb4vf/ files list.
2000 */
2001struct cxgb4vf_debugfs_entry {
2002	const char *name;		/* name of debugfs node */
2003	mode_t mode;			/* file system mode */
2004	const struct file_operations *fops;
2005};
2006
2007static struct cxgb4vf_debugfs_entry debugfs_files[] = {
2008	{ "sge_qinfo",  S_IRUGO, &sge_qinfo_debugfs_fops },
2009	{ "sge_qstats", S_IRUGO, &sge_qstats_proc_fops },
2010	{ "resources",  S_IRUGO, &resources_proc_fops },
2011	{ "interfaces", S_IRUGO, &interfaces_proc_fops },
2012};
2013
2014/*
2015 * Module and device initialization and cleanup code.
2016 * ==================================================
2017 */
2018
2019/*
2020 * Set up out /sys/kernel/debug/cxgb4vf sub-nodes.  We assume that the
2021 * directory (debugfs_root) has already been set up.
2022 */
2023static int __devinit setup_debugfs(struct adapter *adapter)
2024{
2025	int i;
2026
2027	BUG_ON(IS_ERR_OR_NULL(adapter->debugfs_root));
2028
2029	/*
2030	 * Debugfs support is best effort.
2031	 */
2032	for (i = 0; i < ARRAY_SIZE(debugfs_files); i++)
2033		(void)debugfs_create_file(debugfs_files[i].name,
2034				  debugfs_files[i].mode,
2035				  adapter->debugfs_root,
2036				  (void *)adapter,
2037				  debugfs_files[i].fops);
2038
2039	return 0;
2040}
2041
2042/*
2043 * Tear down the /sys/kernel/debug/cxgb4vf sub-nodes created above.  We leave
2044 * it to our caller to tear down the directory (debugfs_root).
2045 */
2046static void cleanup_debugfs(struct adapter *adapter)
2047{
2048	BUG_ON(IS_ERR_OR_NULL(adapter->debugfs_root));
2049
2050	/*
2051	 * Unlike our sister routine cleanup_proc(), we don't need to remove
2052	 * individual entries because a call will be made to
2053	 * debugfs_remove_recursive().  We just need to clean up any ancillary
2054	 * persistent state.
2055	 */
2056	/* nothing to do */
2057}
2058
2059/*
2060 * Perform early "adapter" initialization.  This is where we discover what
2061 * adapter parameters we're going to be using and initialize basic adapter
2062 * hardware support.
2063 */
2064static int __devinit adap_init0(struct adapter *adapter)
2065{
2066	struct vf_resources *vfres = &adapter->params.vfres;
2067	struct sge_params *sge_params = &adapter->params.sge;
2068	struct sge *s = &adapter->sge;
2069	unsigned int ethqsets;
2070	int err;
2071
2072	/*
2073	 * Wait for the device to become ready before proceeding ...
2074	 */
2075	err = t4vf_wait_dev_ready(adapter);
2076	if (err) {
2077		dev_err(adapter->pdev_dev, "device didn't become ready:"
2078			" err=%d\n", err);
2079		return err;
2080	}
2081
2082	/*
2083	 * Some environments do not properly handle PCIE FLRs -- e.g. in Linux
2084	 * 2.6.31 and later we can't call pci_reset_function() in order to
2085	 * issue an FLR because of a self- deadlock on the device semaphore.
2086	 * Meanwhile, the OS infrastructure doesn't issue FLRs in all the
2087	 * cases where they're needed -- for instance, some versions of KVM
2088	 * fail to reset "Assigned Devices" when the VM reboots.  Therefore we
2089	 * use the firmware based reset in order to reset any per function
2090	 * state.
2091	 */
2092	err = t4vf_fw_reset(adapter);
2093	if (err < 0) {
2094		dev_err(adapter->pdev_dev, "FW reset failed: err=%d\n", err);
2095		return err;
2096	}
2097
2098	/*
2099	 * Grab basic operational parameters.  These will predominantly have
2100	 * been set up by the Physical Function Driver or will be hard coded
2101	 * into the adapter.  We just have to live with them ...  Note that
2102	 * we _must_ get our VPD parameters before our SGE parameters because
2103	 * we need to know the adapter's core clock from the VPD in order to
2104	 * properly decode the SGE Timer Values.
2105	 */
2106	err = t4vf_get_dev_params(adapter);
2107	if (err) {
2108		dev_err(adapter->pdev_dev, "unable to retrieve adapter"
2109			" device parameters: err=%d\n", err);
2110		return err;
2111	}
2112	err = t4vf_get_vpd_params(adapter);
2113	if (err) {
2114		dev_err(adapter->pdev_dev, "unable to retrieve adapter"
2115			" VPD parameters: err=%d\n", err);
2116		return err;
2117	}
2118	err = t4vf_get_sge_params(adapter);
2119	if (err) {
2120		dev_err(adapter->pdev_dev, "unable to retrieve adapter"
2121			" SGE parameters: err=%d\n", err);
2122		return err;
2123	}
2124	err = t4vf_get_rss_glb_config(adapter);
2125	if (err) {
2126		dev_err(adapter->pdev_dev, "unable to retrieve adapter"
2127			" RSS parameters: err=%d\n", err);
2128		return err;
2129	}
2130	if (adapter->params.rss.mode !=
2131	    FW_RSS_GLB_CONFIG_CMD_MODE_BASICVIRTUAL) {
2132		dev_err(adapter->pdev_dev, "unable to operate with global RSS"
2133			" mode %d\n", adapter->params.rss.mode);
2134		return -EINVAL;
2135	}
2136	err = t4vf_sge_init(adapter);
2137	if (err) {
2138		dev_err(adapter->pdev_dev, "unable to use adapter parameters:"
2139			" err=%d\n", err);
2140		return err;
2141	}
2142
2143	/*
2144	 * Retrieve our RX interrupt holdoff timer values and counter
2145	 * threshold values from the SGE parameters.
2146	 */
2147	s->timer_val[0] = core_ticks_to_us(adapter,
2148		TIMERVALUE0_GET(sge_params->sge_timer_value_0_and_1));
2149	s->timer_val[1] = core_ticks_to_us(adapter,
2150		TIMERVALUE1_GET(sge_params->sge_timer_value_0_and_1));
2151	s->timer_val[2] = core_ticks_to_us(adapter,
2152		TIMERVALUE0_GET(sge_params->sge_timer_value_2_and_3));
2153	s->timer_val[3] = core_ticks_to_us(adapter,
2154		TIMERVALUE1_GET(sge_params->sge_timer_value_2_and_3));
2155	s->timer_val[4] = core_ticks_to_us(adapter,
2156		TIMERVALUE0_GET(sge_params->sge_timer_value_4_and_5));
2157	s->timer_val[5] = core_ticks_to_us(adapter,
2158		TIMERVALUE1_GET(sge_params->sge_timer_value_4_and_5));
2159
2160	s->counter_val[0] =
2161		THRESHOLD_0_GET(sge_params->sge_ingress_rx_threshold);
2162	s->counter_val[1] =
2163		THRESHOLD_1_GET(sge_params->sge_ingress_rx_threshold);
2164	s->counter_val[2] =
2165		THRESHOLD_2_GET(sge_params->sge_ingress_rx_threshold);
2166	s->counter_val[3] =
2167		THRESHOLD_3_GET(sge_params->sge_ingress_rx_threshold);
2168
2169	/*
2170	 * Grab our Virtual Interface resource allocation, extract the
2171	 * features that we're interested in and do a bit of sanity testing on
2172	 * what we discover.
2173	 */
2174	err = t4vf_get_vfres(adapter);
2175	if (err) {
2176		dev_err(adapter->pdev_dev, "unable to get virtual interface"
2177			" resources: err=%d\n", err);
2178		return err;
2179	}
2180
2181	/*
2182	 * The number of "ports" which we support is equal to the number of
2183	 * Virtual Interfaces with which we've been provisioned.
2184	 */
2185	adapter->params.nports = vfres->nvi;
2186	if (adapter->params.nports > MAX_NPORTS) {
2187		dev_warn(adapter->pdev_dev, "only using %d of %d allowed"
2188			 " virtual interfaces\n", MAX_NPORTS,
2189			 adapter->params.nports);
2190		adapter->params.nports = MAX_NPORTS;
2191	}
2192
2193	/*
2194	 * We need to reserve a number of the ingress queues with Free List
2195	 * and Interrupt capabilities for special interrupt purposes (like
2196	 * asynchronous firmware messages, or forwarded interrupts if we're
2197	 * using MSI).  The rest of the FL/Intr-capable ingress queues will be
2198	 * matched up one-for-one with Ethernet/Control egress queues in order
2199	 * to form "Queue Sets" which will be aportioned between the "ports".
2200	 * For each Queue Set, we'll need the ability to allocate two Egress
2201	 * Contexts -- one for the Ingress Queue Free List and one for the TX
2202	 * Ethernet Queue.
2203	 */
2204	ethqsets = vfres->niqflint - INGQ_EXTRAS;
2205	if (vfres->nethctrl != ethqsets) {
2206		dev_warn(adapter->pdev_dev, "unequal number of [available]"
2207			 " ingress/egress queues (%d/%d); using minimum for"
2208			 " number of Queue Sets\n", ethqsets, vfres->nethctrl);
2209		ethqsets = min(vfres->nethctrl, ethqsets);
2210	}
2211	if (vfres->neq < ethqsets*2) {
2212		dev_warn(adapter->pdev_dev, "Not enough Egress Contexts (%d)"
2213			 " to support Queue Sets (%d); reducing allowed Queue"
2214			 " Sets\n", vfres->neq, ethqsets);
2215		ethqsets = vfres->neq/2;
2216	}
2217	if (ethqsets > MAX_ETH_QSETS) {
2218		dev_warn(adapter->pdev_dev, "only using %d of %d allowed Queue"
2219			 " Sets\n", MAX_ETH_QSETS, adapter->sge.max_ethqsets);
2220		ethqsets = MAX_ETH_QSETS;
2221	}
2222	if (vfres->niq != 0 || vfres->neq > ethqsets*2) {
2223		dev_warn(adapter->pdev_dev, "unused resources niq/neq (%d/%d)"
2224			 " ignored\n", vfres->niq, vfres->neq - ethqsets*2);
2225	}
2226	adapter->sge.max_ethqsets = ethqsets;
2227
2228	/*
2229	 * Check for various parameter sanity issues.  Most checks simply
2230	 * result in us using fewer resources than our provissioning but we
2231	 * do need at least  one "port" with which to work ...
2232	 */
2233	if (adapter->sge.max_ethqsets < adapter->params.nports) {
2234		dev_warn(adapter->pdev_dev, "only using %d of %d available"
2235			 " virtual interfaces (too few Queue Sets)\n",
2236			 adapter->sge.max_ethqsets, adapter->params.nports);
2237		adapter->params.nports = adapter->sge.max_ethqsets;
2238	}
2239	if (adapter->params.nports == 0) {
2240		dev_err(adapter->pdev_dev, "no virtual interfaces configured/"
2241			"usable!\n");
2242		return -EINVAL;
2243	}
2244	return 0;
2245}
2246
2247static inline void init_rspq(struct sge_rspq *rspq, u8 timer_idx,
2248			     u8 pkt_cnt_idx, unsigned int size,
2249			     unsigned int iqe_size)
2250{
2251	rspq->intr_params = (QINTR_TIMER_IDX(timer_idx) |
2252			     (pkt_cnt_idx < SGE_NCOUNTERS ? QINTR_CNT_EN : 0));
2253	rspq->pktcnt_idx = (pkt_cnt_idx < SGE_NCOUNTERS
2254			    ? pkt_cnt_idx
2255			    : 0);
2256	rspq->iqe_len = iqe_size;
2257	rspq->size = size;
2258}
2259
2260/*
2261 * Perform default configuration of DMA queues depending on the number and
2262 * type of ports we found and the number of available CPUs.  Most settings can
2263 * be modified by the admin via ethtool and cxgbtool prior to the adapter
2264 * being brought up for the first time.
2265 */
2266static void __devinit cfg_queues(struct adapter *adapter)
2267{
2268	struct sge *s = &adapter->sge;
2269	int q10g, n10g, qidx, pidx, qs;
2270	size_t iqe_size;
2271
2272	/*
2273	 * We should not be called till we know how many Queue Sets we can
2274	 * support.  In particular, this means that we need to know what kind
2275	 * of interrupts we'll be using ...
2276	 */
2277	BUG_ON((adapter->flags & (USING_MSIX|USING_MSI)) == 0);
2278
2279	/*
2280	 * Count the number of 10GbE Virtual Interfaces that we have.
2281	 */
2282	n10g = 0;
2283	for_each_port(adapter, pidx)
2284		n10g += is_10g_port(&adap2pinfo(adapter, pidx)->link_cfg);
2285
2286	/*
2287	 * We default to 1 queue per non-10G port and up to # of cores queues
2288	 * per 10G port.
2289	 */
2290	if (n10g == 0)
2291		q10g = 0;
2292	else {
2293		int n1g = (adapter->params.nports - n10g);
2294		q10g = (adapter->sge.max_ethqsets - n1g) / n10g;
2295		if (q10g > num_online_cpus())
2296			q10g = num_online_cpus();
2297	}
2298
2299	/*
2300	 * Allocate the "Queue Sets" to the various Virtual Interfaces.
2301	 * The layout will be established in setup_sge_queues() when the
2302	 * adapter is brough up for the first time.
2303	 */
2304	qidx = 0;
2305	for_each_port(adapter, pidx) {
2306		struct port_info *pi = adap2pinfo(adapter, pidx);
2307
2308		pi->first_qset = qidx;
2309		pi->nqsets = is_10g_port(&pi->link_cfg) ? q10g : 1;
2310		qidx += pi->nqsets;
2311	}
2312	s->ethqsets = qidx;
2313
2314	/*
2315	 * The Ingress Queue Entry Size for our various Response Queues needs
2316	 * to be big enough to accommodate the largest message we can receive
2317	 * from the chip/firmware; which is 64 bytes ...
2318	 */
2319	iqe_size = 64;
2320
2321	/*
2322	 * Set up default Queue Set parameters ...  Start off with the
2323	 * shortest interrupt holdoff timer.
2324	 */
2325	for (qs = 0; qs < s->max_ethqsets; qs++) {
2326		struct sge_eth_rxq *rxq = &s->ethrxq[qs];
2327		struct sge_eth_txq *txq = &s->ethtxq[qs];
2328
2329		init_rspq(&rxq->rspq, 0, 0, 1024, iqe_size);
2330		rxq->fl.size = 72;
2331		txq->q.size = 1024;
2332	}
2333
2334	/*
2335	 * The firmware event queue is used for link state changes and
2336	 * notifications of TX DMA completions.
2337	 */
2338	init_rspq(&s->fw_evtq, SGE_TIMER_RSTRT_CNTR, 0, 512, iqe_size);
2339
2340	/*
2341	 * The forwarded interrupt queue is used when we're in MSI interrupt
2342	 * mode.  In this mode all interrupts associated with RX queues will
2343	 * be forwarded to a single queue which we'll associate with our MSI
2344	 * interrupt vector.  The messages dropped in the forwarded interrupt
2345	 * queue will indicate which ingress queue needs servicing ...  This
2346	 * queue needs to be large enough to accommodate all of the ingress
2347	 * queues which are forwarding their interrupt (+1 to prevent the PIDX
2348	 * from equalling the CIDX if every ingress queue has an outstanding
2349	 * interrupt).  The queue doesn't need to be any larger because no
2350	 * ingress queue will ever have more than one outstanding interrupt at
2351	 * any time ...
2352	 */
2353	init_rspq(&s->intrq, SGE_TIMER_RSTRT_CNTR, 0, MSIX_ENTRIES + 1,
2354		  iqe_size);
2355}
2356
2357/*
2358 * Reduce the number of Ethernet queues across all ports to at most n.
2359 * n provides at least one queue per port.
2360 */
2361static void __devinit reduce_ethqs(struct adapter *adapter, int n)
2362{
2363	int i;
2364	struct port_info *pi;
2365
2366	/*
2367	 * While we have too many active Ether Queue Sets, interate across the
2368	 * "ports" and reduce their individual Queue Set allocations.
2369	 */
2370	BUG_ON(n < adapter->params.nports);
2371	while (n < adapter->sge.ethqsets)
2372		for_each_port(adapter, i) {
2373			pi = adap2pinfo(adapter, i);
2374			if (pi->nqsets > 1) {
2375				pi->nqsets--;
2376				adapter->sge.ethqsets--;
2377				if (adapter->sge.ethqsets <= n)
2378					break;
2379			}
2380		}
2381
2382	/*
2383	 * Reassign the starting Queue Sets for each of the "ports" ...
2384	 */
2385	n = 0;
2386	for_each_port(adapter, i) {
2387		pi = adap2pinfo(adapter, i);
2388		pi->first_qset = n;
2389		n += pi->nqsets;
2390	}
2391}
2392
2393/*
2394 * We need to grab enough MSI-X vectors to cover our interrupt needs.  Ideally
2395 * we get a separate MSI-X vector for every "Queue Set" plus any extras we
2396 * need.  Minimally we need one for every Virtual Interface plus those needed
2397 * for our "extras".  Note that this process may lower the maximum number of
2398 * allowed Queue Sets ...
2399 */
2400static int __devinit enable_msix(struct adapter *adapter)
2401{
2402	int i, err, want, need;
2403	struct msix_entry entries[MSIX_ENTRIES];
2404	struct sge *s = &adapter->sge;
2405
2406	for (i = 0; i < MSIX_ENTRIES; ++i)
2407		entries[i].entry = i;
2408
2409	/*
2410	 * We _want_ enough MSI-X interrupts to cover all of our "Queue Sets"
2411	 * plus those needed for our "extras" (for example, the firmware
2412	 * message queue).  We _need_ at least one "Queue Set" per Virtual
2413	 * Interface plus those needed for our "extras".  So now we get to see
2414	 * if the song is right ...
2415	 */
2416	want = s->max_ethqsets + MSIX_EXTRAS;
2417	need = adapter->params.nports + MSIX_EXTRAS;
2418	while ((err = pci_enable_msix(adapter->pdev, entries, want)) >= need)
2419		want = err;
2420
2421	if (err == 0) {
2422		int nqsets = want - MSIX_EXTRAS;
2423		if (nqsets < s->max_ethqsets) {
2424			dev_warn(adapter->pdev_dev, "only enough MSI-X vectors"
2425				 " for %d Queue Sets\n", nqsets);
2426			s->max_ethqsets = nqsets;
2427			if (nqsets < s->ethqsets)
2428				reduce_ethqs(adapter, nqsets);
2429		}
2430		for (i = 0; i < want; ++i)
2431			adapter->msix_info[i].vec = entries[i].vector;
2432	} else if (err > 0) {
2433		pci_disable_msix(adapter->pdev);
2434		dev_info(adapter->pdev_dev, "only %d MSI-X vectors left,"
2435			 " not using MSI-X\n", err);
2436	}
2437	return err;
2438}
2439
2440static const struct net_device_ops cxgb4vf_netdev_ops	= {
2441	.ndo_open		= cxgb4vf_open,
2442	.ndo_stop		= cxgb4vf_stop,
2443	.ndo_start_xmit		= t4vf_eth_xmit,
2444	.ndo_get_stats		= cxgb4vf_get_stats,
2445	.ndo_set_rx_mode	= cxgb4vf_set_rxmode,
2446	.ndo_set_mac_address	= cxgb4vf_set_mac_addr,
2447	.ndo_validate_addr	= eth_validate_addr,
2448	.ndo_do_ioctl		= cxgb4vf_do_ioctl,
2449	.ndo_change_mtu		= cxgb4vf_change_mtu,
2450	.ndo_fix_features	= cxgb4vf_fix_features,
2451	.ndo_set_features	= cxgb4vf_set_features,
2452#ifdef CONFIG_NET_POLL_CONTROLLER
2453	.ndo_poll_controller	= cxgb4vf_poll_controller,
2454#endif
2455};
2456
2457/*
2458 * "Probe" a device: initialize a device and construct all kernel and driver
2459 * state needed to manage the device.  This routine is called "init_one" in
2460 * the PF Driver ...
2461 */
2462static int __devinit cxgb4vf_pci_probe(struct pci_dev *pdev,
2463				       const struct pci_device_id *ent)
2464{
2465	static int version_printed;
2466
2467	int pci_using_dac;
2468	int err, pidx;
2469	unsigned int pmask;
2470	struct adapter *adapter;
2471	struct port_info *pi;
2472	struct net_device *netdev;
2473
2474	/*
2475	 * Print our driver banner the first time we're called to initialize a
2476	 * device.
2477	 */
2478	if (version_printed == 0) {
2479		printk(KERN_INFO "%s - version %s\n", DRV_DESC, DRV_VERSION);
2480		version_printed = 1;
2481	}
2482
2483	/*
2484	 * Initialize generic PCI device state.
2485	 */
2486	err = pci_enable_device(pdev);
2487	if (err) {
2488		dev_err(&pdev->dev, "cannot enable PCI device\n");
2489		return err;
2490	}
2491
2492	/*
2493	 * Reserve PCI resources for the device.  If we can't get them some
2494	 * other driver may have already claimed the device ...
2495	 */
2496	err = pci_request_regions(pdev, KBUILD_MODNAME);
2497	if (err) {
2498		dev_err(&pdev->dev, "cannot obtain PCI resources\n");
2499		goto err_disable_device;
2500	}
2501
2502	/*
2503	 * Set up our DMA mask: try for 64-bit address masking first and
2504	 * fall back to 32-bit if we can't get 64 bits ...
2505	 */
2506	err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
2507	if (err == 0) {
2508		err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
2509		if (err) {
2510			dev_err(&pdev->dev, "unable to obtain 64-bit DMA for"
2511				" coherent allocations\n");
2512			goto err_release_regions;
2513		}
2514		pci_using_dac = 1;
2515	} else {
2516		err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
2517		if (err != 0) {
2518			dev_err(&pdev->dev, "no usable DMA configuration\n");
2519			goto err_release_regions;
2520		}
2521		pci_using_dac = 0;
2522	}
2523
2524	/*
2525	 * Enable bus mastering for the device ...
2526	 */
2527	pci_set_master(pdev);
2528
2529	/*
2530	 * Allocate our adapter data structure and attach it to the device.
2531	 */
2532	adapter = kzalloc(sizeof(*adapter), GFP_KERNEL);
2533	if (!adapter) {
2534		err = -ENOMEM;
2535		goto err_release_regions;
2536	}
2537	pci_set_drvdata(pdev, adapter);
2538	adapter->pdev = pdev;
2539	adapter->pdev_dev = &pdev->dev;
2540
2541	/*
2542	 * Initialize SMP data synchronization resources.
2543	 */
2544	spin_lock_init(&adapter->stats_lock);
2545
2546	/*
2547	 * Map our I/O registers in BAR0.
2548	 */
2549	adapter->regs = pci_ioremap_bar(pdev, 0);
2550	if (!adapter->regs) {
2551		dev_err(&pdev->dev, "cannot map device registers\n");
2552		err = -ENOMEM;
2553		goto err_free_adapter;
2554	}
2555
2556	/*
2557	 * Initialize adapter level features.
2558	 */
2559	adapter->name = pci_name(pdev);
2560	adapter->msg_enable = dflt_msg_enable;
2561	err = adap_init0(adapter);
2562	if (err)
2563		goto err_unmap_bar;
2564
2565	/*
2566	 * Allocate our "adapter ports" and stitch everything together.
2567	 */
2568	pmask = adapter->params.vfres.pmask;
2569	for_each_port(adapter, pidx) {
2570		int port_id, viid;
2571
2572		/*
2573		 * We simplistically allocate our virtual interfaces
2574		 * sequentially across the port numbers to which we have
2575		 * access rights.  This should be configurable in some manner
2576		 * ...
2577		 */
2578		if (pmask == 0)
2579			break;
2580		port_id = ffs(pmask) - 1;
2581		pmask &= ~(1 << port_id);
2582		viid = t4vf_alloc_vi(adapter, port_id);
2583		if (viid < 0) {
2584			dev_err(&pdev->dev, "cannot allocate VI for port %d:"
2585				" err=%d\n", port_id, viid);
2586			err = viid;
2587			goto err_free_dev;
2588		}
2589
2590		/*
2591		 * Allocate our network device and stitch things together.
2592		 */
2593		netdev = alloc_etherdev_mq(sizeof(struct port_info),
2594					   MAX_PORT_QSETS);
2595		if (netdev == NULL) {
2596			dev_err(&pdev->dev, "cannot allocate netdev for"
2597				" port %d\n", port_id);
2598			t4vf_free_vi(adapter, viid);
2599			err = -ENOMEM;
2600			goto err_free_dev;
2601		}
2602		adapter->port[pidx] = netdev;
2603		SET_NETDEV_DEV(netdev, &pdev->dev);
2604		pi = netdev_priv(netdev);
2605		pi->adapter = adapter;
2606		pi->pidx = pidx;
2607		pi->port_id = port_id;
2608		pi->viid = viid;
2609
2610		/*
2611		 * Initialize the starting state of our "port" and register
2612		 * it.
2613		 */
2614		pi->xact_addr_filt = -1;
2615		netif_carrier_off(netdev);
2616		netdev->irq = pdev->irq;
2617
2618		netdev->hw_features = NETIF_F_SG | TSO_FLAGS |
2619			NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
2620			NETIF_F_HW_VLAN_RX | NETIF_F_RXCSUM;
2621		netdev->vlan_features = NETIF_F_SG | TSO_FLAGS |
2622			NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
2623			NETIF_F_HIGHDMA;
2624		netdev->features = netdev->hw_features | NETIF_F_HW_VLAN_TX;
2625		if (pci_using_dac)
2626			netdev->features |= NETIF_F_HIGHDMA;
2627
2628		netdev->priv_flags |= IFF_UNICAST_FLT;
2629
2630		netdev->netdev_ops = &cxgb4vf_netdev_ops;
2631		SET_ETHTOOL_OPS(netdev, &cxgb4vf_ethtool_ops);
2632
2633		/*
2634		 * Initialize the hardware/software state for the port.
2635		 */
2636		err = t4vf_port_init(adapter, pidx);
2637		if (err) {
2638			dev_err(&pdev->dev, "cannot initialize port %d\n",
2639				pidx);
2640			goto err_free_dev;
2641		}
2642	}
2643
2644	/*
2645	 * The "card" is now ready to go.  If any errors occur during device
2646	 * registration we do not fail the whole "card" but rather proceed
2647	 * only with the ports we manage to register successfully.  However we
2648	 * must register at least one net device.
2649	 */
2650	for_each_port(adapter, pidx) {
2651		netdev = adapter->port[pidx];
2652		if (netdev == NULL)
2653			continue;
2654
2655		err = register_netdev(netdev);
2656		if (err) {
2657			dev_warn(&pdev->dev, "cannot register net device %s,"
2658				 " skipping\n", netdev->name);
2659			continue;
2660		}
2661
2662		set_bit(pidx, &adapter->registered_device_map);
2663	}
2664	if (adapter->registered_device_map == 0) {
2665		dev_err(&pdev->dev, "could not register any net devices\n");
2666		goto err_free_dev;
2667	}
2668
2669	/*
2670	 * Set up our debugfs entries.
2671	 */
2672	if (!IS_ERR_OR_NULL(cxgb4vf_debugfs_root)) {
2673		adapter->debugfs_root =
2674			debugfs_create_dir(pci_name(pdev),
2675					   cxgb4vf_debugfs_root);
2676		if (IS_ERR_OR_NULL(adapter->debugfs_root))
2677			dev_warn(&pdev->dev, "could not create debugfs"
2678				 " directory");
2679		else
2680			setup_debugfs(adapter);
2681	}
2682
2683	/*
2684	 * See what interrupts we'll be using.  If we've been configured to
2685	 * use MSI-X interrupts, try to enable them but fall back to using
2686	 * MSI interrupts if we can't enable MSI-X interrupts.  If we can't
2687	 * get MSI interrupts we bail with the error.
2688	 */
2689	if (msi == MSI_MSIX && enable_msix(adapter) == 0)
2690		adapter->flags |= USING_MSIX;
2691	else {
2692		err = pci_enable_msi(pdev);
2693		if (err) {
2694			dev_err(&pdev->dev, "Unable to allocate %s interrupts;"
2695				" err=%d\n",
2696				msi == MSI_MSIX ? "MSI-X or MSI" : "MSI", err);
2697			goto err_free_debugfs;
2698		}
2699		adapter->flags |= USING_MSI;
2700	}
2701
2702	/*
2703	 * Now that we know how many "ports" we have and what their types are,
2704	 * and how many Queue Sets we can support, we can configure our queue
2705	 * resources.
2706	 */
2707	cfg_queues(adapter);
2708
2709	/*
2710	 * Print a short notice on the existence and configuration of the new
2711	 * VF network device ...
2712	 */
2713	for_each_port(adapter, pidx) {
2714		dev_info(adapter->pdev_dev, "%s: Chelsio VF NIC PCIe %s\n",
2715			 adapter->port[pidx]->name,
2716			 (adapter->flags & USING_MSIX) ? "MSI-X" :
2717			 (adapter->flags & USING_MSI)  ? "MSI" : "");
2718	}
2719
2720	/*
2721	 * Return success!
2722	 */
2723	return 0;
2724
2725	/*
2726	 * Error recovery and exit code.  Unwind state that's been created
2727	 * so far and return the error.
2728	 */
2729
2730err_free_debugfs:
2731	if (!IS_ERR_OR_NULL(adapter->debugfs_root)) {
2732		cleanup_debugfs(adapter);
2733		debugfs_remove_recursive(adapter->debugfs_root);
2734	}
2735
2736err_free_dev:
2737	for_each_port(adapter, pidx) {
2738		netdev = adapter->port[pidx];
2739		if (netdev == NULL)
2740			continue;
2741		pi = netdev_priv(netdev);
2742		t4vf_free_vi(adapter, pi->viid);
2743		if (test_bit(pidx, &adapter->registered_device_map))
2744			unregister_netdev(netdev);
2745		free_netdev(netdev);
2746	}
2747
2748err_unmap_bar:
2749	iounmap(adapter->regs);
2750
2751err_free_adapter:
2752	kfree(adapter);
2753	pci_set_drvdata(pdev, NULL);
2754
2755err_release_regions:
2756	pci_release_regions(pdev);
2757	pci_set_drvdata(pdev, NULL);
2758	pci_clear_master(pdev);
2759
2760err_disable_device:
2761	pci_disable_device(pdev);
2762
2763	return err;
2764}
2765
2766/*
2767 * "Remove" a device: tear down all kernel and driver state created in the
2768 * "probe" routine and quiesce the device (disable interrupts, etc.).  (Note
2769 * that this is called "remove_one" in the PF Driver.)
2770 */
2771static void __devexit cxgb4vf_pci_remove(struct pci_dev *pdev)
2772{
2773	struct adapter *adapter = pci_get_drvdata(pdev);
2774
2775	/*
2776	 * Tear down driver state associated with device.
2777	 */
2778	if (adapter) {
2779		int pidx;
2780
2781		/*
2782		 * Stop all of our activity.  Unregister network port,
2783		 * disable interrupts, etc.
2784		 */
2785		for_each_port(adapter, pidx)
2786			if (test_bit(pidx, &adapter->registered_device_map))
2787				unregister_netdev(adapter->port[pidx]);
2788		t4vf_sge_stop(adapter);
2789		if (adapter->flags & USING_MSIX) {
2790			pci_disable_msix(adapter->pdev);
2791			adapter->flags &= ~USING_MSIX;
2792		} else if (adapter->flags & USING_MSI) {
2793			pci_disable_msi(adapter->pdev);
2794			adapter->flags &= ~USING_MSI;
2795		}
2796
2797		/*
2798		 * Tear down our debugfs entries.
2799		 */
2800		if (!IS_ERR_OR_NULL(adapter->debugfs_root)) {
2801			cleanup_debugfs(adapter);
2802			debugfs_remove_recursive(adapter->debugfs_root);
2803		}
2804
2805		/*
2806		 * Free all of the various resources which we've acquired ...
2807		 */
2808		t4vf_free_sge_resources(adapter);
2809		for_each_port(adapter, pidx) {
2810			struct net_device *netdev = adapter->port[pidx];
2811			struct port_info *pi;
2812
2813			if (netdev == NULL)
2814				continue;
2815
2816			pi = netdev_priv(netdev);
2817			t4vf_free_vi(adapter, pi->viid);
2818			free_netdev(netdev);
2819		}
2820		iounmap(adapter->regs);
2821		kfree(adapter);
2822		pci_set_drvdata(pdev, NULL);
2823	}
2824
2825	/*
2826	 * Disable the device and release its PCI resources.
2827	 */
2828	pci_disable_device(pdev);
2829	pci_clear_master(pdev);
2830	pci_release_regions(pdev);
2831}
2832
2833/*
2834 * "Shutdown" quiesce the device, stopping Ingress Packet and Interrupt
2835 * delivery.
2836 */
2837static void __devexit cxgb4vf_pci_shutdown(struct pci_dev *pdev)
2838{
2839	struct adapter *adapter;
2840	int pidx;
2841
2842	adapter = pci_get_drvdata(pdev);
2843	if (!adapter)
2844		return;
2845
2846	/*
2847	 * Disable all Virtual Interfaces.  This will shut down the
2848	 * delivery of all ingress packets into the chip for these
2849	 * Virtual Interfaces.
2850	 */
2851	for_each_port(adapter, pidx) {
2852		struct net_device *netdev;
2853		struct port_info *pi;
2854
2855		if (!test_bit(pidx, &adapter->registered_device_map))
2856			continue;
2857
2858		netdev = adapter->port[pidx];
2859		if (!netdev)
2860			continue;
2861
2862		pi = netdev_priv(netdev);
2863		t4vf_enable_vi(adapter, pi->viid, false, false);
2864	}
2865
2866	/*
2867	 * Free up all Queues which will prevent further DMA and
2868	 * Interrupts allowing various internal pathways to drain.
2869	 */
2870	t4vf_free_sge_resources(adapter);
2871}
2872
2873/*
2874 * PCI Device registration data structures.
2875 */
2876#define CH_DEVICE(devid, idx) \
2877	{ PCI_VENDOR_ID_CHELSIO, devid, PCI_ANY_ID, PCI_ANY_ID, 0, 0, idx }
2878
2879static struct pci_device_id cxgb4vf_pci_tbl[] = {
2880	CH_DEVICE(0xb000, 0),	/* PE10K FPGA */
2881	CH_DEVICE(0x4800, 0),	/* T440-dbg */
2882	CH_DEVICE(0x4801, 0),	/* T420-cr */
2883	CH_DEVICE(0x4802, 0),	/* T422-cr */
2884	CH_DEVICE(0x4803, 0),	/* T440-cr */
2885	CH_DEVICE(0x4804, 0),	/* T420-bch */
2886	CH_DEVICE(0x4805, 0),   /* T440-bch */
2887	CH_DEVICE(0x4806, 0),	/* T460-ch */
2888	CH_DEVICE(0x4807, 0),	/* T420-so */
2889	CH_DEVICE(0x4808, 0),	/* T420-cx */
2890	CH_DEVICE(0x4809, 0),	/* T420-bt */
2891	CH_DEVICE(0x480a, 0),   /* T404-bt */
2892	{ 0, }
2893};
2894
2895MODULE_DESCRIPTION(DRV_DESC);
2896MODULE_AUTHOR("Chelsio Communications");
2897MODULE_LICENSE("Dual BSD/GPL");
2898MODULE_VERSION(DRV_VERSION);
2899MODULE_DEVICE_TABLE(pci, cxgb4vf_pci_tbl);
2900
2901static struct pci_driver cxgb4vf_driver = {
2902	.name		= KBUILD_MODNAME,
2903	.id_table	= cxgb4vf_pci_tbl,
2904	.probe		= cxgb4vf_pci_probe,
2905	.remove		= __devexit_p(cxgb4vf_pci_remove),
2906	.shutdown	= __devexit_p(cxgb4vf_pci_shutdown),
2907};
2908
2909/*
2910 * Initialize global driver state.
2911 */
2912static int __init cxgb4vf_module_init(void)
2913{
2914	int ret;
2915
2916	/*
2917	 * Vet our module parameters.
2918	 */
2919	if (msi != MSI_MSIX && msi != MSI_MSI) {
2920		printk(KERN_WARNING KBUILD_MODNAME
2921		       ": bad module parameter msi=%d; must be %d"
2922		       " (MSI-X or MSI) or %d (MSI)\n",
2923		       msi, MSI_MSIX, MSI_MSI);
2924		return -EINVAL;
2925	}
2926
2927	/* Debugfs support is optional, just warn if this fails */
2928	cxgb4vf_debugfs_root = debugfs_create_dir(KBUILD_MODNAME, NULL);
2929	if (IS_ERR_OR_NULL(cxgb4vf_debugfs_root))
2930		printk(KERN_WARNING KBUILD_MODNAME ": could not create"
2931		       " debugfs entry, continuing\n");
2932
2933	ret = pci_register_driver(&cxgb4vf_driver);
2934	if (ret < 0 && !IS_ERR_OR_NULL(cxgb4vf_debugfs_root))
2935		debugfs_remove(cxgb4vf_debugfs_root);
2936	return ret;
2937}
2938
2939/*
2940 * Tear down global driver state.
2941 */
2942static void __exit cxgb4vf_module_exit(void)
2943{
2944	pci_unregister_driver(&cxgb4vf_driver);
2945	debugfs_remove(cxgb4vf_debugfs_root);
2946}
2947
2948module_init(cxgb4vf_module_init);
2949module_exit(cxgb4vf_module_exit);
2950