vmxnet3_drv.c revision 8b429468a6b4746e88abbf5649c9e592d7b3e355
1/*
2 * Linux driver for VMware's vmxnet3 ethernet NIC.
3 *
4 * Copyright (C) 2008-2009, VMware, Inc. All Rights Reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation; version 2 of the License and no later version.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
13 * NON INFRINGEMENT. See the GNU General Public License for more
14 * details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19 *
20 * The full GNU General Public License is included in this distribution in
21 * the file called "COPYING".
22 *
23 * Maintained by: Shreyas Bhatewara <pv-drivers@vmware.com>
24 *
25 */
26
27#include <linux/module.h>
28#include <net/ip6_checksum.h>
29
30#include "vmxnet3_int.h"
31
32char vmxnet3_driver_name[] = "vmxnet3";
33#define VMXNET3_DRIVER_DESC "VMware vmxnet3 virtual NIC driver"
34
35/*
36 * PCI Device ID Table
37 * Last entry must be all 0s
38 */
39static DEFINE_PCI_DEVICE_TABLE(vmxnet3_pciid_table) = {
40	{PCI_VDEVICE(VMWARE, PCI_DEVICE_ID_VMWARE_VMXNET3)},
41	{0}
42};
43
44MODULE_DEVICE_TABLE(pci, vmxnet3_pciid_table);
45
46static int enable_mq = 1;
47
48static void
49vmxnet3_write_mac_addr(struct vmxnet3_adapter *adapter, u8 *mac);
50
51/*
52 *    Enable/Disable the given intr
53 */
54static void
55vmxnet3_enable_intr(struct vmxnet3_adapter *adapter, unsigned intr_idx)
56{
57	VMXNET3_WRITE_BAR0_REG(adapter, VMXNET3_REG_IMR + intr_idx * 8, 0);
58}
59
60
61static void
62vmxnet3_disable_intr(struct vmxnet3_adapter *adapter, unsigned intr_idx)
63{
64	VMXNET3_WRITE_BAR0_REG(adapter, VMXNET3_REG_IMR + intr_idx * 8, 1);
65}
66
67
68/*
69 *    Enable/Disable all intrs used by the device
70 */
71static void
72vmxnet3_enable_all_intrs(struct vmxnet3_adapter *adapter)
73{
74	int i;
75
76	for (i = 0; i < adapter->intr.num_intrs; i++)
77		vmxnet3_enable_intr(adapter, i);
78	adapter->shared->devRead.intrConf.intrCtrl &=
79					cpu_to_le32(~VMXNET3_IC_DISABLE_ALL);
80}
81
82
83static void
84vmxnet3_disable_all_intrs(struct vmxnet3_adapter *adapter)
85{
86	int i;
87
88	adapter->shared->devRead.intrConf.intrCtrl |=
89					cpu_to_le32(VMXNET3_IC_DISABLE_ALL);
90	for (i = 0; i < adapter->intr.num_intrs; i++)
91		vmxnet3_disable_intr(adapter, i);
92}
93
94
95static void
96vmxnet3_ack_events(struct vmxnet3_adapter *adapter, u32 events)
97{
98	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_ECR, events);
99}
100
101
102static bool
103vmxnet3_tq_stopped(struct vmxnet3_tx_queue *tq, struct vmxnet3_adapter *adapter)
104{
105	return tq->stopped;
106}
107
108
109static void
110vmxnet3_tq_start(struct vmxnet3_tx_queue *tq, struct vmxnet3_adapter *adapter)
111{
112	tq->stopped = false;
113	netif_start_subqueue(adapter->netdev, tq - adapter->tx_queue);
114}
115
116
117static void
118vmxnet3_tq_wake(struct vmxnet3_tx_queue *tq, struct vmxnet3_adapter *adapter)
119{
120	tq->stopped = false;
121	netif_wake_subqueue(adapter->netdev, (tq - adapter->tx_queue));
122}
123
124
125static void
126vmxnet3_tq_stop(struct vmxnet3_tx_queue *tq, struct vmxnet3_adapter *adapter)
127{
128	tq->stopped = true;
129	tq->num_stop++;
130	netif_stop_subqueue(adapter->netdev, (tq - adapter->tx_queue));
131}
132
133
134/*
135 * Check the link state. This may start or stop the tx queue.
136 */
137static void
138vmxnet3_check_link(struct vmxnet3_adapter *adapter, bool affectTxQueue)
139{
140	u32 ret;
141	int i;
142	unsigned long flags;
143
144	spin_lock_irqsave(&adapter->cmd_lock, flags);
145	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, VMXNET3_CMD_GET_LINK);
146	ret = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD);
147	spin_unlock_irqrestore(&adapter->cmd_lock, flags);
148
149	adapter->link_speed = ret >> 16;
150	if (ret & 1) { /* Link is up. */
151		netdev_info(adapter->netdev, "NIC Link is Up %d Mbps\n",
152			    adapter->link_speed);
153		netif_carrier_on(adapter->netdev);
154
155		if (affectTxQueue) {
156			for (i = 0; i < adapter->num_tx_queues; i++)
157				vmxnet3_tq_start(&adapter->tx_queue[i],
158						 adapter);
159		}
160	} else {
161		netdev_info(adapter->netdev, "NIC Link is Down\n");
162		netif_carrier_off(adapter->netdev);
163
164		if (affectTxQueue) {
165			for (i = 0; i < adapter->num_tx_queues; i++)
166				vmxnet3_tq_stop(&adapter->tx_queue[i], adapter);
167		}
168	}
169}
170
171static void
172vmxnet3_process_events(struct vmxnet3_adapter *adapter)
173{
174	int i;
175	unsigned long flags;
176	u32 events = le32_to_cpu(adapter->shared->ecr);
177	if (!events)
178		return;
179
180	vmxnet3_ack_events(adapter, events);
181
182	/* Check if link state has changed */
183	if (events & VMXNET3_ECR_LINK)
184		vmxnet3_check_link(adapter, true);
185
186	/* Check if there is an error on xmit/recv queues */
187	if (events & (VMXNET3_ECR_TQERR | VMXNET3_ECR_RQERR)) {
188		spin_lock_irqsave(&adapter->cmd_lock, flags);
189		VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
190				       VMXNET3_CMD_GET_QUEUE_STATUS);
191		spin_unlock_irqrestore(&adapter->cmd_lock, flags);
192
193		for (i = 0; i < adapter->num_tx_queues; i++)
194			if (adapter->tqd_start[i].status.stopped)
195				dev_err(&adapter->netdev->dev,
196					"%s: tq[%d] error 0x%x\n",
197					adapter->netdev->name, i, le32_to_cpu(
198					adapter->tqd_start[i].status.error));
199		for (i = 0; i < adapter->num_rx_queues; i++)
200			if (adapter->rqd_start[i].status.stopped)
201				dev_err(&adapter->netdev->dev,
202					"%s: rq[%d] error 0x%x\n",
203					adapter->netdev->name, i,
204					adapter->rqd_start[i].status.error);
205
206		schedule_work(&adapter->work);
207	}
208}
209
210#ifdef __BIG_ENDIAN_BITFIELD
211/*
212 * The device expects the bitfields in shared structures to be written in
213 * little endian. When CPU is big endian, the following routines are used to
214 * correctly read and write into ABI.
215 * The general technique used here is : double word bitfields are defined in
216 * opposite order for big endian architecture. Then before reading them in
217 * driver the complete double word is translated using le32_to_cpu. Similarly
218 * After the driver writes into bitfields, cpu_to_le32 is used to translate the
219 * double words into required format.
220 * In order to avoid touching bits in shared structure more than once, temporary
221 * descriptors are used. These are passed as srcDesc to following functions.
222 */
223static void vmxnet3_RxDescToCPU(const struct Vmxnet3_RxDesc *srcDesc,
224				struct Vmxnet3_RxDesc *dstDesc)
225{
226	u32 *src = (u32 *)srcDesc + 2;
227	u32 *dst = (u32 *)dstDesc + 2;
228	dstDesc->addr = le64_to_cpu(srcDesc->addr);
229	*dst = le32_to_cpu(*src);
230	dstDesc->ext1 = le32_to_cpu(srcDesc->ext1);
231}
232
233static void vmxnet3_TxDescToLe(const struct Vmxnet3_TxDesc *srcDesc,
234			       struct Vmxnet3_TxDesc *dstDesc)
235{
236	int i;
237	u32 *src = (u32 *)(srcDesc + 1);
238	u32 *dst = (u32 *)(dstDesc + 1);
239
240	/* Working backwards so that the gen bit is set at the end. */
241	for (i = 2; i > 0; i--) {
242		src--;
243		dst--;
244		*dst = cpu_to_le32(*src);
245	}
246}
247
248
249static void vmxnet3_RxCompToCPU(const struct Vmxnet3_RxCompDesc *srcDesc,
250				struct Vmxnet3_RxCompDesc *dstDesc)
251{
252	int i = 0;
253	u32 *src = (u32 *)srcDesc;
254	u32 *dst = (u32 *)dstDesc;
255	for (i = 0; i < sizeof(struct Vmxnet3_RxCompDesc) / sizeof(u32); i++) {
256		*dst = le32_to_cpu(*src);
257		src++;
258		dst++;
259	}
260}
261
262
263/* Used to read bitfield values from double words. */
264static u32 get_bitfield32(const __le32 *bitfield, u32 pos, u32 size)
265{
266	u32 temp = le32_to_cpu(*bitfield);
267	u32 mask = ((1 << size) - 1) << pos;
268	temp &= mask;
269	temp >>= pos;
270	return temp;
271}
272
273
274
275#endif  /* __BIG_ENDIAN_BITFIELD */
276
277#ifdef __BIG_ENDIAN_BITFIELD
278
279#   define VMXNET3_TXDESC_GET_GEN(txdesc) get_bitfield32(((const __le32 *) \
280			txdesc) + VMXNET3_TXD_GEN_DWORD_SHIFT, \
281			VMXNET3_TXD_GEN_SHIFT, VMXNET3_TXD_GEN_SIZE)
282#   define VMXNET3_TXDESC_GET_EOP(txdesc) get_bitfield32(((const __le32 *) \
283			txdesc) + VMXNET3_TXD_EOP_DWORD_SHIFT, \
284			VMXNET3_TXD_EOP_SHIFT, VMXNET3_TXD_EOP_SIZE)
285#   define VMXNET3_TCD_GET_GEN(tcd) get_bitfield32(((const __le32 *)tcd) + \
286			VMXNET3_TCD_GEN_DWORD_SHIFT, VMXNET3_TCD_GEN_SHIFT, \
287			VMXNET3_TCD_GEN_SIZE)
288#   define VMXNET3_TCD_GET_TXIDX(tcd) get_bitfield32((const __le32 *)tcd, \
289			VMXNET3_TCD_TXIDX_SHIFT, VMXNET3_TCD_TXIDX_SIZE)
290#   define vmxnet3_getRxComp(dstrcd, rcd, tmp) do { \
291			(dstrcd) = (tmp); \
292			vmxnet3_RxCompToCPU((rcd), (tmp)); \
293		} while (0)
294#   define vmxnet3_getRxDesc(dstrxd, rxd, tmp) do { \
295			(dstrxd) = (tmp); \
296			vmxnet3_RxDescToCPU((rxd), (tmp)); \
297		} while (0)
298
299#else
300
301#   define VMXNET3_TXDESC_GET_GEN(txdesc) ((txdesc)->gen)
302#   define VMXNET3_TXDESC_GET_EOP(txdesc) ((txdesc)->eop)
303#   define VMXNET3_TCD_GET_GEN(tcd) ((tcd)->gen)
304#   define VMXNET3_TCD_GET_TXIDX(tcd) ((tcd)->txdIdx)
305#   define vmxnet3_getRxComp(dstrcd, rcd, tmp) (dstrcd) = (rcd)
306#   define vmxnet3_getRxDesc(dstrxd, rxd, tmp) (dstrxd) = (rxd)
307
308#endif /* __BIG_ENDIAN_BITFIELD  */
309
310
311static void
312vmxnet3_unmap_tx_buf(struct vmxnet3_tx_buf_info *tbi,
313		     struct pci_dev *pdev)
314{
315	if (tbi->map_type == VMXNET3_MAP_SINGLE)
316		dma_unmap_single(&pdev->dev, tbi->dma_addr, tbi->len,
317				 PCI_DMA_TODEVICE);
318	else if (tbi->map_type == VMXNET3_MAP_PAGE)
319		dma_unmap_page(&pdev->dev, tbi->dma_addr, tbi->len,
320			       PCI_DMA_TODEVICE);
321	else
322		BUG_ON(tbi->map_type != VMXNET3_MAP_NONE);
323
324	tbi->map_type = VMXNET3_MAP_NONE; /* to help debugging */
325}
326
327
328static int
329vmxnet3_unmap_pkt(u32 eop_idx, struct vmxnet3_tx_queue *tq,
330		  struct pci_dev *pdev,	struct vmxnet3_adapter *adapter)
331{
332	struct sk_buff *skb;
333	int entries = 0;
334
335	/* no out of order completion */
336	BUG_ON(tq->buf_info[eop_idx].sop_idx != tq->tx_ring.next2comp);
337	BUG_ON(VMXNET3_TXDESC_GET_EOP(&(tq->tx_ring.base[eop_idx].txd)) != 1);
338
339	skb = tq->buf_info[eop_idx].skb;
340	BUG_ON(skb == NULL);
341	tq->buf_info[eop_idx].skb = NULL;
342
343	VMXNET3_INC_RING_IDX_ONLY(eop_idx, tq->tx_ring.size);
344
345	while (tq->tx_ring.next2comp != eop_idx) {
346		vmxnet3_unmap_tx_buf(tq->buf_info + tq->tx_ring.next2comp,
347				     pdev);
348
349		/* update next2comp w/o tx_lock. Since we are marking more,
350		 * instead of less, tx ring entries avail, the worst case is
351		 * that the tx routine incorrectly re-queues a pkt due to
352		 * insufficient tx ring entries.
353		 */
354		vmxnet3_cmd_ring_adv_next2comp(&tq->tx_ring);
355		entries++;
356	}
357
358	dev_kfree_skb_any(skb);
359	return entries;
360}
361
362
363static int
364vmxnet3_tq_tx_complete(struct vmxnet3_tx_queue *tq,
365			struct vmxnet3_adapter *adapter)
366{
367	int completed = 0;
368	union Vmxnet3_GenericDesc *gdesc;
369
370	gdesc = tq->comp_ring.base + tq->comp_ring.next2proc;
371	while (VMXNET3_TCD_GET_GEN(&gdesc->tcd) == tq->comp_ring.gen) {
372		completed += vmxnet3_unmap_pkt(VMXNET3_TCD_GET_TXIDX(
373					       &gdesc->tcd), tq, adapter->pdev,
374					       adapter);
375
376		vmxnet3_comp_ring_adv_next2proc(&tq->comp_ring);
377		gdesc = tq->comp_ring.base + tq->comp_ring.next2proc;
378	}
379
380	if (completed) {
381		spin_lock(&tq->tx_lock);
382		if (unlikely(vmxnet3_tq_stopped(tq, adapter) &&
383			     vmxnet3_cmd_ring_desc_avail(&tq->tx_ring) >
384			     VMXNET3_WAKE_QUEUE_THRESHOLD(tq) &&
385			     netif_carrier_ok(adapter->netdev))) {
386			vmxnet3_tq_wake(tq, adapter);
387		}
388		spin_unlock(&tq->tx_lock);
389	}
390	return completed;
391}
392
393
394static void
395vmxnet3_tq_cleanup(struct vmxnet3_tx_queue *tq,
396		   struct vmxnet3_adapter *adapter)
397{
398	int i;
399
400	while (tq->tx_ring.next2comp != tq->tx_ring.next2fill) {
401		struct vmxnet3_tx_buf_info *tbi;
402
403		tbi = tq->buf_info + tq->tx_ring.next2comp;
404
405		vmxnet3_unmap_tx_buf(tbi, adapter->pdev);
406		if (tbi->skb) {
407			dev_kfree_skb_any(tbi->skb);
408			tbi->skb = NULL;
409		}
410		vmxnet3_cmd_ring_adv_next2comp(&tq->tx_ring);
411	}
412
413	/* sanity check, verify all buffers are indeed unmapped and freed */
414	for (i = 0; i < tq->tx_ring.size; i++) {
415		BUG_ON(tq->buf_info[i].skb != NULL ||
416		       tq->buf_info[i].map_type != VMXNET3_MAP_NONE);
417	}
418
419	tq->tx_ring.gen = VMXNET3_INIT_GEN;
420	tq->tx_ring.next2fill = tq->tx_ring.next2comp = 0;
421
422	tq->comp_ring.gen = VMXNET3_INIT_GEN;
423	tq->comp_ring.next2proc = 0;
424}
425
426
427static void
428vmxnet3_tq_destroy(struct vmxnet3_tx_queue *tq,
429		   struct vmxnet3_adapter *adapter)
430{
431	if (tq->tx_ring.base) {
432		dma_free_coherent(&adapter->pdev->dev, tq->tx_ring.size *
433				  sizeof(struct Vmxnet3_TxDesc),
434				  tq->tx_ring.base, tq->tx_ring.basePA);
435		tq->tx_ring.base = NULL;
436	}
437	if (tq->data_ring.base) {
438		dma_free_coherent(&adapter->pdev->dev, tq->data_ring.size *
439				  sizeof(struct Vmxnet3_TxDataDesc),
440				  tq->data_ring.base, tq->data_ring.basePA);
441		tq->data_ring.base = NULL;
442	}
443	if (tq->comp_ring.base) {
444		dma_free_coherent(&adapter->pdev->dev, tq->comp_ring.size *
445				  sizeof(struct Vmxnet3_TxCompDesc),
446				  tq->comp_ring.base, tq->comp_ring.basePA);
447		tq->comp_ring.base = NULL;
448	}
449	if (tq->buf_info) {
450		dma_free_coherent(&adapter->pdev->dev,
451				  tq->tx_ring.size * sizeof(tq->buf_info[0]),
452				  tq->buf_info, tq->buf_info_pa);
453		tq->buf_info = NULL;
454	}
455}
456
457
458/* Destroy all tx queues */
459void
460vmxnet3_tq_destroy_all(struct vmxnet3_adapter *adapter)
461{
462	int i;
463
464	for (i = 0; i < adapter->num_tx_queues; i++)
465		vmxnet3_tq_destroy(&adapter->tx_queue[i], adapter);
466}
467
468
469static void
470vmxnet3_tq_init(struct vmxnet3_tx_queue *tq,
471		struct vmxnet3_adapter *adapter)
472{
473	int i;
474
475	/* reset the tx ring contents to 0 and reset the tx ring states */
476	memset(tq->tx_ring.base, 0, tq->tx_ring.size *
477	       sizeof(struct Vmxnet3_TxDesc));
478	tq->tx_ring.next2fill = tq->tx_ring.next2comp = 0;
479	tq->tx_ring.gen = VMXNET3_INIT_GEN;
480
481	memset(tq->data_ring.base, 0, tq->data_ring.size *
482	       sizeof(struct Vmxnet3_TxDataDesc));
483
484	/* reset the tx comp ring contents to 0 and reset comp ring states */
485	memset(tq->comp_ring.base, 0, tq->comp_ring.size *
486	       sizeof(struct Vmxnet3_TxCompDesc));
487	tq->comp_ring.next2proc = 0;
488	tq->comp_ring.gen = VMXNET3_INIT_GEN;
489
490	/* reset the bookkeeping data */
491	memset(tq->buf_info, 0, sizeof(tq->buf_info[0]) * tq->tx_ring.size);
492	for (i = 0; i < tq->tx_ring.size; i++)
493		tq->buf_info[i].map_type = VMXNET3_MAP_NONE;
494
495	/* stats are not reset */
496}
497
498
499static int
500vmxnet3_tq_create(struct vmxnet3_tx_queue *tq,
501		  struct vmxnet3_adapter *adapter)
502{
503	size_t sz;
504
505	BUG_ON(tq->tx_ring.base || tq->data_ring.base ||
506	       tq->comp_ring.base || tq->buf_info);
507
508	tq->tx_ring.base = dma_alloc_coherent(&adapter->pdev->dev,
509			tq->tx_ring.size * sizeof(struct Vmxnet3_TxDesc),
510			&tq->tx_ring.basePA, GFP_KERNEL);
511	if (!tq->tx_ring.base) {
512		netdev_err(adapter->netdev, "failed to allocate tx ring\n");
513		goto err;
514	}
515
516	tq->data_ring.base = dma_alloc_coherent(&adapter->pdev->dev,
517			tq->data_ring.size * sizeof(struct Vmxnet3_TxDataDesc),
518			&tq->data_ring.basePA, GFP_KERNEL);
519	if (!tq->data_ring.base) {
520		netdev_err(adapter->netdev, "failed to allocate data ring\n");
521		goto err;
522	}
523
524	tq->comp_ring.base = dma_alloc_coherent(&adapter->pdev->dev,
525			tq->comp_ring.size * sizeof(struct Vmxnet3_TxCompDesc),
526			&tq->comp_ring.basePA, GFP_KERNEL);
527	if (!tq->comp_ring.base) {
528		netdev_err(adapter->netdev, "failed to allocate tx comp ring\n");
529		goto err;
530	}
531
532	sz = tq->tx_ring.size * sizeof(tq->buf_info[0]);
533	tq->buf_info = dma_zalloc_coherent(&adapter->pdev->dev, sz,
534					   &tq->buf_info_pa, GFP_KERNEL);
535	if (!tq->buf_info)
536		goto err;
537
538	return 0;
539
540err:
541	vmxnet3_tq_destroy(tq, adapter);
542	return -ENOMEM;
543}
544
545static void
546vmxnet3_tq_cleanup_all(struct vmxnet3_adapter *adapter)
547{
548	int i;
549
550	for (i = 0; i < adapter->num_tx_queues; i++)
551		vmxnet3_tq_cleanup(&adapter->tx_queue[i], adapter);
552}
553
554/*
555 *    starting from ring->next2fill, allocate rx buffers for the given ring
556 *    of the rx queue and update the rx desc. stop after @num_to_alloc buffers
557 *    are allocated or allocation fails
558 */
559
560static int
561vmxnet3_rq_alloc_rx_buf(struct vmxnet3_rx_queue *rq, u32 ring_idx,
562			int num_to_alloc, struct vmxnet3_adapter *adapter)
563{
564	int num_allocated = 0;
565	struct vmxnet3_rx_buf_info *rbi_base = rq->buf_info[ring_idx];
566	struct vmxnet3_cmd_ring *ring = &rq->rx_ring[ring_idx];
567	u32 val;
568
569	while (num_allocated <= num_to_alloc) {
570		struct vmxnet3_rx_buf_info *rbi;
571		union Vmxnet3_GenericDesc *gd;
572
573		rbi = rbi_base + ring->next2fill;
574		gd = ring->base + ring->next2fill;
575
576		if (rbi->buf_type == VMXNET3_RX_BUF_SKB) {
577			if (rbi->skb == NULL) {
578				rbi->skb = __netdev_alloc_skb_ip_align(adapter->netdev,
579								       rbi->len,
580								       GFP_KERNEL);
581				if (unlikely(rbi->skb == NULL)) {
582					rq->stats.rx_buf_alloc_failure++;
583					break;
584				}
585
586				rbi->dma_addr = dma_map_single(
587						&adapter->pdev->dev,
588						rbi->skb->data, rbi->len,
589						PCI_DMA_FROMDEVICE);
590			} else {
591				/* rx buffer skipped by the device */
592			}
593			val = VMXNET3_RXD_BTYPE_HEAD << VMXNET3_RXD_BTYPE_SHIFT;
594		} else {
595			BUG_ON(rbi->buf_type != VMXNET3_RX_BUF_PAGE ||
596			       rbi->len  != PAGE_SIZE);
597
598			if (rbi->page == NULL) {
599				rbi->page = alloc_page(GFP_ATOMIC);
600				if (unlikely(rbi->page == NULL)) {
601					rq->stats.rx_buf_alloc_failure++;
602					break;
603				}
604				rbi->dma_addr = dma_map_page(
605						&adapter->pdev->dev,
606						rbi->page, 0, PAGE_SIZE,
607						PCI_DMA_FROMDEVICE);
608			} else {
609				/* rx buffers skipped by the device */
610			}
611			val = VMXNET3_RXD_BTYPE_BODY << VMXNET3_RXD_BTYPE_SHIFT;
612		}
613
614		BUG_ON(rbi->dma_addr == 0);
615		gd->rxd.addr = cpu_to_le64(rbi->dma_addr);
616		gd->dword[2] = cpu_to_le32((!ring->gen << VMXNET3_RXD_GEN_SHIFT)
617					   | val | rbi->len);
618
619		/* Fill the last buffer but dont mark it ready, or else the
620		 * device will think that the queue is full */
621		if (num_allocated == num_to_alloc)
622			break;
623
624		gd->dword[2] |= cpu_to_le32(ring->gen << VMXNET3_RXD_GEN_SHIFT);
625		num_allocated++;
626		vmxnet3_cmd_ring_adv_next2fill(ring);
627	}
628
629	netdev_dbg(adapter->netdev,
630		"alloc_rx_buf: %d allocated, next2fill %u, next2comp %u\n",
631		num_allocated, ring->next2fill, ring->next2comp);
632
633	/* so that the device can distinguish a full ring and an empty ring */
634	BUG_ON(num_allocated != 0 && ring->next2fill == ring->next2comp);
635
636	return num_allocated;
637}
638
639
640static void
641vmxnet3_append_frag(struct sk_buff *skb, struct Vmxnet3_RxCompDesc *rcd,
642		    struct vmxnet3_rx_buf_info *rbi)
643{
644	struct skb_frag_struct *frag = skb_shinfo(skb)->frags +
645		skb_shinfo(skb)->nr_frags;
646
647	BUG_ON(skb_shinfo(skb)->nr_frags >= MAX_SKB_FRAGS);
648
649	__skb_frag_set_page(frag, rbi->page);
650	frag->page_offset = 0;
651	skb_frag_size_set(frag, rcd->len);
652	skb->data_len += rcd->len;
653	skb->truesize += PAGE_SIZE;
654	skb_shinfo(skb)->nr_frags++;
655}
656
657
658static void
659vmxnet3_map_pkt(struct sk_buff *skb, struct vmxnet3_tx_ctx *ctx,
660		struct vmxnet3_tx_queue *tq, struct pci_dev *pdev,
661		struct vmxnet3_adapter *adapter)
662{
663	u32 dw2, len;
664	unsigned long buf_offset;
665	int i;
666	union Vmxnet3_GenericDesc *gdesc;
667	struct vmxnet3_tx_buf_info *tbi = NULL;
668
669	BUG_ON(ctx->copy_size > skb_headlen(skb));
670
671	/* use the previous gen bit for the SOP desc */
672	dw2 = (tq->tx_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT;
673
674	ctx->sop_txd = tq->tx_ring.base + tq->tx_ring.next2fill;
675	gdesc = ctx->sop_txd; /* both loops below can be skipped */
676
677	/* no need to map the buffer if headers are copied */
678	if (ctx->copy_size) {
679		ctx->sop_txd->txd.addr = cpu_to_le64(tq->data_ring.basePA +
680					tq->tx_ring.next2fill *
681					sizeof(struct Vmxnet3_TxDataDesc));
682		ctx->sop_txd->dword[2] = cpu_to_le32(dw2 | ctx->copy_size);
683		ctx->sop_txd->dword[3] = 0;
684
685		tbi = tq->buf_info + tq->tx_ring.next2fill;
686		tbi->map_type = VMXNET3_MAP_NONE;
687
688		netdev_dbg(adapter->netdev,
689			"txd[%u]: 0x%Lx 0x%x 0x%x\n",
690			tq->tx_ring.next2fill,
691			le64_to_cpu(ctx->sop_txd->txd.addr),
692			ctx->sop_txd->dword[2], ctx->sop_txd->dword[3]);
693		vmxnet3_cmd_ring_adv_next2fill(&tq->tx_ring);
694
695		/* use the right gen for non-SOP desc */
696		dw2 = tq->tx_ring.gen << VMXNET3_TXD_GEN_SHIFT;
697	}
698
699	/* linear part can use multiple tx desc if it's big */
700	len = skb_headlen(skb) - ctx->copy_size;
701	buf_offset = ctx->copy_size;
702	while (len) {
703		u32 buf_size;
704
705		if (len < VMXNET3_MAX_TX_BUF_SIZE) {
706			buf_size = len;
707			dw2 |= len;
708		} else {
709			buf_size = VMXNET3_MAX_TX_BUF_SIZE;
710			/* spec says that for TxDesc.len, 0 == 2^14 */
711		}
712
713		tbi = tq->buf_info + tq->tx_ring.next2fill;
714		tbi->map_type = VMXNET3_MAP_SINGLE;
715		tbi->dma_addr = dma_map_single(&adapter->pdev->dev,
716				skb->data + buf_offset, buf_size,
717				PCI_DMA_TODEVICE);
718
719		tbi->len = buf_size;
720
721		gdesc = tq->tx_ring.base + tq->tx_ring.next2fill;
722		BUG_ON(gdesc->txd.gen == tq->tx_ring.gen);
723
724		gdesc->txd.addr = cpu_to_le64(tbi->dma_addr);
725		gdesc->dword[2] = cpu_to_le32(dw2);
726		gdesc->dword[3] = 0;
727
728		netdev_dbg(adapter->netdev,
729			"txd[%u]: 0x%Lx 0x%x 0x%x\n",
730			tq->tx_ring.next2fill, le64_to_cpu(gdesc->txd.addr),
731			le32_to_cpu(gdesc->dword[2]), gdesc->dword[3]);
732		vmxnet3_cmd_ring_adv_next2fill(&tq->tx_ring);
733		dw2 = tq->tx_ring.gen << VMXNET3_TXD_GEN_SHIFT;
734
735		len -= buf_size;
736		buf_offset += buf_size;
737	}
738
739	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
740		const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
741		u32 buf_size;
742
743		buf_offset = 0;
744		len = skb_frag_size(frag);
745		while (len) {
746			tbi = tq->buf_info + tq->tx_ring.next2fill;
747			if (len < VMXNET3_MAX_TX_BUF_SIZE) {
748				buf_size = len;
749				dw2 |= len;
750			} else {
751				buf_size = VMXNET3_MAX_TX_BUF_SIZE;
752				/* spec says that for TxDesc.len, 0 == 2^14 */
753			}
754			tbi->map_type = VMXNET3_MAP_PAGE;
755			tbi->dma_addr = skb_frag_dma_map(&adapter->pdev->dev, frag,
756							 buf_offset, buf_size,
757							 DMA_TO_DEVICE);
758
759			tbi->len = buf_size;
760
761			gdesc = tq->tx_ring.base + tq->tx_ring.next2fill;
762			BUG_ON(gdesc->txd.gen == tq->tx_ring.gen);
763
764			gdesc->txd.addr = cpu_to_le64(tbi->dma_addr);
765			gdesc->dword[2] = cpu_to_le32(dw2);
766			gdesc->dword[3] = 0;
767
768			netdev_dbg(adapter->netdev,
769				"txd[%u]: 0x%llx %u %u\n",
770				tq->tx_ring.next2fill, le64_to_cpu(gdesc->txd.addr),
771				le32_to_cpu(gdesc->dword[2]), gdesc->dword[3]);
772			vmxnet3_cmd_ring_adv_next2fill(&tq->tx_ring);
773			dw2 = tq->tx_ring.gen << VMXNET3_TXD_GEN_SHIFT;
774
775			len -= buf_size;
776			buf_offset += buf_size;
777		}
778	}
779
780	ctx->eop_txd = gdesc;
781
782	/* set the last buf_info for the pkt */
783	tbi->skb = skb;
784	tbi->sop_idx = ctx->sop_txd - tq->tx_ring.base;
785}
786
787
788/* Init all tx queues */
789static void
790vmxnet3_tq_init_all(struct vmxnet3_adapter *adapter)
791{
792	int i;
793
794	for (i = 0; i < adapter->num_tx_queues; i++)
795		vmxnet3_tq_init(&adapter->tx_queue[i], adapter);
796}
797
798
799/*
800 *    parse and copy relevant protocol headers:
801 *      For a tso pkt, relevant headers are L2/3/4 including options
802 *      For a pkt requesting csum offloading, they are L2/3 and may include L4
803 *      if it's a TCP/UDP pkt
804 *
805 * Returns:
806 *    -1:  error happens during parsing
807 *     0:  protocol headers parsed, but too big to be copied
808 *     1:  protocol headers parsed and copied
809 *
810 * Other effects:
811 *    1. related *ctx fields are updated.
812 *    2. ctx->copy_size is # of bytes copied
813 *    3. the portion copied is guaranteed to be in the linear part
814 *
815 */
816static int
817vmxnet3_parse_and_copy_hdr(struct sk_buff *skb, struct vmxnet3_tx_queue *tq,
818			   struct vmxnet3_tx_ctx *ctx,
819			   struct vmxnet3_adapter *adapter)
820{
821	struct Vmxnet3_TxDataDesc *tdd;
822
823	if (ctx->mss) {	/* TSO */
824		ctx->eth_ip_hdr_size = skb_transport_offset(skb);
825		ctx->l4_hdr_size = tcp_hdrlen(skb);
826		ctx->copy_size = ctx->eth_ip_hdr_size + ctx->l4_hdr_size;
827	} else {
828		if (skb->ip_summed == CHECKSUM_PARTIAL) {
829			ctx->eth_ip_hdr_size = skb_checksum_start_offset(skb);
830
831			if (ctx->ipv4) {
832				const struct iphdr *iph = ip_hdr(skb);
833
834				if (iph->protocol == IPPROTO_TCP)
835					ctx->l4_hdr_size = tcp_hdrlen(skb);
836				else if (iph->protocol == IPPROTO_UDP)
837					ctx->l4_hdr_size = sizeof(struct udphdr);
838				else
839					ctx->l4_hdr_size = 0;
840			} else {
841				/* for simplicity, don't copy L4 headers */
842				ctx->l4_hdr_size = 0;
843			}
844			ctx->copy_size = min(ctx->eth_ip_hdr_size +
845					 ctx->l4_hdr_size, skb->len);
846		} else {
847			ctx->eth_ip_hdr_size = 0;
848			ctx->l4_hdr_size = 0;
849			/* copy as much as allowed */
850			ctx->copy_size = min((unsigned int)VMXNET3_HDR_COPY_SIZE
851					     , skb_headlen(skb));
852		}
853
854		/* make sure headers are accessible directly */
855		if (unlikely(!pskb_may_pull(skb, ctx->copy_size)))
856			goto err;
857	}
858
859	if (unlikely(ctx->copy_size > VMXNET3_HDR_COPY_SIZE)) {
860		tq->stats.oversized_hdr++;
861		ctx->copy_size = 0;
862		return 0;
863	}
864
865	tdd = tq->data_ring.base + tq->tx_ring.next2fill;
866
867	memcpy(tdd->data, skb->data, ctx->copy_size);
868	netdev_dbg(adapter->netdev,
869		"copy %u bytes to dataRing[%u]\n",
870		ctx->copy_size, tq->tx_ring.next2fill);
871	return 1;
872
873err:
874	return -1;
875}
876
877
878static void
879vmxnet3_prepare_tso(struct sk_buff *skb,
880		    struct vmxnet3_tx_ctx *ctx)
881{
882	struct tcphdr *tcph = tcp_hdr(skb);
883
884	if (ctx->ipv4) {
885		struct iphdr *iph = ip_hdr(skb);
886
887		iph->check = 0;
888		tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, 0,
889						 IPPROTO_TCP, 0);
890	} else {
891		struct ipv6hdr *iph = ipv6_hdr(skb);
892
893		tcph->check = ~csum_ipv6_magic(&iph->saddr, &iph->daddr, 0,
894					       IPPROTO_TCP, 0);
895	}
896}
897
898static int txd_estimate(const struct sk_buff *skb)
899{
900	int count = VMXNET3_TXD_NEEDED(skb_headlen(skb)) + 1;
901	int i;
902
903	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
904		const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
905
906		count += VMXNET3_TXD_NEEDED(skb_frag_size(frag));
907	}
908	return count;
909}
910
911/*
912 * Transmits a pkt thru a given tq
913 * Returns:
914 *    NETDEV_TX_OK:      descriptors are setup successfully
915 *    NETDEV_TX_OK:      error occurred, the pkt is dropped
916 *    NETDEV_TX_BUSY:    tx ring is full, queue is stopped
917 *
918 * Side-effects:
919 *    1. tx ring may be changed
920 *    2. tq stats may be updated accordingly
921 *    3. shared->txNumDeferred may be updated
922 */
923
924static int
925vmxnet3_tq_xmit(struct sk_buff *skb, struct vmxnet3_tx_queue *tq,
926		struct vmxnet3_adapter *adapter, struct net_device *netdev)
927{
928	int ret;
929	u32 count;
930	unsigned long flags;
931	struct vmxnet3_tx_ctx ctx;
932	union Vmxnet3_GenericDesc *gdesc;
933#ifdef __BIG_ENDIAN_BITFIELD
934	/* Use temporary descriptor to avoid touching bits multiple times */
935	union Vmxnet3_GenericDesc tempTxDesc;
936#endif
937
938	count = txd_estimate(skb);
939
940	ctx.ipv4 = (vlan_get_protocol(skb) == cpu_to_be16(ETH_P_IP));
941
942	ctx.mss = skb_shinfo(skb)->gso_size;
943	if (ctx.mss) {
944		if (skb_header_cloned(skb)) {
945			if (unlikely(pskb_expand_head(skb, 0, 0,
946						      GFP_ATOMIC) != 0)) {
947				tq->stats.drop_tso++;
948				goto drop_pkt;
949			}
950			tq->stats.copy_skb_header++;
951		}
952		vmxnet3_prepare_tso(skb, &ctx);
953	} else {
954		if (unlikely(count > VMXNET3_MAX_TXD_PER_PKT)) {
955
956			/* non-tso pkts must not use more than
957			 * VMXNET3_MAX_TXD_PER_PKT entries
958			 */
959			if (skb_linearize(skb) != 0) {
960				tq->stats.drop_too_many_frags++;
961				goto drop_pkt;
962			}
963			tq->stats.linearized++;
964
965			/* recalculate the # of descriptors to use */
966			count = VMXNET3_TXD_NEEDED(skb_headlen(skb)) + 1;
967		}
968	}
969
970	spin_lock_irqsave(&tq->tx_lock, flags);
971
972	if (count > vmxnet3_cmd_ring_desc_avail(&tq->tx_ring)) {
973		tq->stats.tx_ring_full++;
974		netdev_dbg(adapter->netdev,
975			"tx queue stopped on %s, next2comp %u"
976			" next2fill %u\n", adapter->netdev->name,
977			tq->tx_ring.next2comp, tq->tx_ring.next2fill);
978
979		vmxnet3_tq_stop(tq, adapter);
980		spin_unlock_irqrestore(&tq->tx_lock, flags);
981		return NETDEV_TX_BUSY;
982	}
983
984
985	ret = vmxnet3_parse_and_copy_hdr(skb, tq, &ctx, adapter);
986	if (ret >= 0) {
987		BUG_ON(ret <= 0 && ctx.copy_size != 0);
988		/* hdrs parsed, check against other limits */
989		if (ctx.mss) {
990			if (unlikely(ctx.eth_ip_hdr_size + ctx.l4_hdr_size >
991				     VMXNET3_MAX_TX_BUF_SIZE)) {
992				goto hdr_too_big;
993			}
994		} else {
995			if (skb->ip_summed == CHECKSUM_PARTIAL) {
996				if (unlikely(ctx.eth_ip_hdr_size +
997					     skb->csum_offset >
998					     VMXNET3_MAX_CSUM_OFFSET)) {
999					goto hdr_too_big;
1000				}
1001			}
1002		}
1003	} else {
1004		tq->stats.drop_hdr_inspect_err++;
1005		goto unlock_drop_pkt;
1006	}
1007
1008	/* fill tx descs related to addr & len */
1009	vmxnet3_map_pkt(skb, &ctx, tq, adapter->pdev, adapter);
1010
1011	/* setup the EOP desc */
1012	ctx.eop_txd->dword[3] = cpu_to_le32(VMXNET3_TXD_CQ | VMXNET3_TXD_EOP);
1013
1014	/* setup the SOP desc */
1015#ifdef __BIG_ENDIAN_BITFIELD
1016	gdesc = &tempTxDesc;
1017	gdesc->dword[2] = ctx.sop_txd->dword[2];
1018	gdesc->dword[3] = ctx.sop_txd->dword[3];
1019#else
1020	gdesc = ctx.sop_txd;
1021#endif
1022	if (ctx.mss) {
1023		gdesc->txd.hlen = ctx.eth_ip_hdr_size + ctx.l4_hdr_size;
1024		gdesc->txd.om = VMXNET3_OM_TSO;
1025		gdesc->txd.msscof = ctx.mss;
1026		le32_add_cpu(&tq->shared->txNumDeferred, (skb->len -
1027			     gdesc->txd.hlen + ctx.mss - 1) / ctx.mss);
1028	} else {
1029		if (skb->ip_summed == CHECKSUM_PARTIAL) {
1030			gdesc->txd.hlen = ctx.eth_ip_hdr_size;
1031			gdesc->txd.om = VMXNET3_OM_CSUM;
1032			gdesc->txd.msscof = ctx.eth_ip_hdr_size +
1033					    skb->csum_offset;
1034		} else {
1035			gdesc->txd.om = 0;
1036			gdesc->txd.msscof = 0;
1037		}
1038		le32_add_cpu(&tq->shared->txNumDeferred, 1);
1039	}
1040
1041	if (vlan_tx_tag_present(skb)) {
1042		gdesc->txd.ti = 1;
1043		gdesc->txd.tci = vlan_tx_tag_get(skb);
1044	}
1045
1046	/* finally flips the GEN bit of the SOP desc. */
1047	gdesc->dword[2] = cpu_to_le32(le32_to_cpu(gdesc->dword[2]) ^
1048						  VMXNET3_TXD_GEN);
1049#ifdef __BIG_ENDIAN_BITFIELD
1050	/* Finished updating in bitfields of Tx Desc, so write them in original
1051	 * place.
1052	 */
1053	vmxnet3_TxDescToLe((struct Vmxnet3_TxDesc *)gdesc,
1054			   (struct Vmxnet3_TxDesc *)ctx.sop_txd);
1055	gdesc = ctx.sop_txd;
1056#endif
1057	netdev_dbg(adapter->netdev,
1058		"txd[%u]: SOP 0x%Lx 0x%x 0x%x\n",
1059		(u32)(ctx.sop_txd -
1060		tq->tx_ring.base), le64_to_cpu(gdesc->txd.addr),
1061		le32_to_cpu(gdesc->dword[2]), le32_to_cpu(gdesc->dword[3]));
1062
1063	spin_unlock_irqrestore(&tq->tx_lock, flags);
1064
1065	if (le32_to_cpu(tq->shared->txNumDeferred) >=
1066					le32_to_cpu(tq->shared->txThreshold)) {
1067		tq->shared->txNumDeferred = 0;
1068		VMXNET3_WRITE_BAR0_REG(adapter,
1069				       VMXNET3_REG_TXPROD + tq->qid * 8,
1070				       tq->tx_ring.next2fill);
1071	}
1072
1073	return NETDEV_TX_OK;
1074
1075hdr_too_big:
1076	tq->stats.drop_oversized_hdr++;
1077unlock_drop_pkt:
1078	spin_unlock_irqrestore(&tq->tx_lock, flags);
1079drop_pkt:
1080	tq->stats.drop_total++;
1081	dev_kfree_skb_any(skb);
1082	return NETDEV_TX_OK;
1083}
1084
1085
1086static netdev_tx_t
1087vmxnet3_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
1088{
1089	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
1090
1091	BUG_ON(skb->queue_mapping > adapter->num_tx_queues);
1092	return vmxnet3_tq_xmit(skb,
1093			       &adapter->tx_queue[skb->queue_mapping],
1094			       adapter, netdev);
1095}
1096
1097
1098static void
1099vmxnet3_rx_csum(struct vmxnet3_adapter *adapter,
1100		struct sk_buff *skb,
1101		union Vmxnet3_GenericDesc *gdesc)
1102{
1103	if (!gdesc->rcd.cnc && adapter->netdev->features & NETIF_F_RXCSUM) {
1104		/* typical case: TCP/UDP over IP and both csums are correct */
1105		if ((le32_to_cpu(gdesc->dword[3]) & VMXNET3_RCD_CSUM_OK) ==
1106							VMXNET3_RCD_CSUM_OK) {
1107			skb->ip_summed = CHECKSUM_UNNECESSARY;
1108			BUG_ON(!(gdesc->rcd.tcp || gdesc->rcd.udp));
1109			BUG_ON(!(gdesc->rcd.v4  || gdesc->rcd.v6));
1110			BUG_ON(gdesc->rcd.frg);
1111		} else {
1112			if (gdesc->rcd.csum) {
1113				skb->csum = htons(gdesc->rcd.csum);
1114				skb->ip_summed = CHECKSUM_PARTIAL;
1115			} else {
1116				skb_checksum_none_assert(skb);
1117			}
1118		}
1119	} else {
1120		skb_checksum_none_assert(skb);
1121	}
1122}
1123
1124
1125static void
1126vmxnet3_rx_error(struct vmxnet3_rx_queue *rq, struct Vmxnet3_RxCompDesc *rcd,
1127		 struct vmxnet3_rx_ctx *ctx,  struct vmxnet3_adapter *adapter)
1128{
1129	rq->stats.drop_err++;
1130	if (!rcd->fcs)
1131		rq->stats.drop_fcs++;
1132
1133	rq->stats.drop_total++;
1134
1135	/*
1136	 * We do not unmap and chain the rx buffer to the skb.
1137	 * We basically pretend this buffer is not used and will be recycled
1138	 * by vmxnet3_rq_alloc_rx_buf()
1139	 */
1140
1141	/*
1142	 * ctx->skb may be NULL if this is the first and the only one
1143	 * desc for the pkt
1144	 */
1145	if (ctx->skb)
1146		dev_kfree_skb_irq(ctx->skb);
1147
1148	ctx->skb = NULL;
1149}
1150
1151
1152static int
1153vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq,
1154		       struct vmxnet3_adapter *adapter, int quota)
1155{
1156	static const u32 rxprod_reg[2] = {
1157		VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2
1158	};
1159	u32 num_rxd = 0;
1160	bool skip_page_frags = false;
1161	struct Vmxnet3_RxCompDesc *rcd;
1162	struct vmxnet3_rx_ctx *ctx = &rq->rx_ctx;
1163#ifdef __BIG_ENDIAN_BITFIELD
1164	struct Vmxnet3_RxDesc rxCmdDesc;
1165	struct Vmxnet3_RxCompDesc rxComp;
1166#endif
1167	vmxnet3_getRxComp(rcd, &rq->comp_ring.base[rq->comp_ring.next2proc].rcd,
1168			  &rxComp);
1169	while (rcd->gen == rq->comp_ring.gen) {
1170		struct vmxnet3_rx_buf_info *rbi;
1171		struct sk_buff *skb, *new_skb = NULL;
1172		struct page *new_page = NULL;
1173		int num_to_alloc;
1174		struct Vmxnet3_RxDesc *rxd;
1175		u32 idx, ring_idx;
1176		struct vmxnet3_cmd_ring	*ring = NULL;
1177		if (num_rxd >= quota) {
1178			/* we may stop even before we see the EOP desc of
1179			 * the current pkt
1180			 */
1181			break;
1182		}
1183		num_rxd++;
1184		BUG_ON(rcd->rqID != rq->qid && rcd->rqID != rq->qid2);
1185		idx = rcd->rxdIdx;
1186		ring_idx = rcd->rqID < adapter->num_rx_queues ? 0 : 1;
1187		ring = rq->rx_ring + ring_idx;
1188		vmxnet3_getRxDesc(rxd, &rq->rx_ring[ring_idx].base[idx].rxd,
1189				  &rxCmdDesc);
1190		rbi = rq->buf_info[ring_idx] + idx;
1191
1192		BUG_ON(rxd->addr != rbi->dma_addr ||
1193		       rxd->len != rbi->len);
1194
1195		if (unlikely(rcd->eop && rcd->err)) {
1196			vmxnet3_rx_error(rq, rcd, ctx, adapter);
1197			goto rcd_done;
1198		}
1199
1200		if (rcd->sop) { /* first buf of the pkt */
1201			BUG_ON(rxd->btype != VMXNET3_RXD_BTYPE_HEAD ||
1202			       rcd->rqID != rq->qid);
1203
1204			BUG_ON(rbi->buf_type != VMXNET3_RX_BUF_SKB);
1205			BUG_ON(ctx->skb != NULL || rbi->skb == NULL);
1206
1207			if (unlikely(rcd->len == 0)) {
1208				/* Pretend the rx buffer is skipped. */
1209				BUG_ON(!(rcd->sop && rcd->eop));
1210				netdev_dbg(adapter->netdev,
1211					"rxRing[%u][%u] 0 length\n",
1212					ring_idx, idx);
1213				goto rcd_done;
1214			}
1215
1216			skip_page_frags = false;
1217			ctx->skb = rbi->skb;
1218			new_skb = netdev_alloc_skb_ip_align(adapter->netdev,
1219							    rbi->len);
1220			if (new_skb == NULL) {
1221				/* Skb allocation failed, do not handover this
1222				 * skb to stack. Reuse it. Drop the existing pkt
1223				 */
1224				rq->stats.rx_buf_alloc_failure++;
1225				ctx->skb = NULL;
1226				rq->stats.drop_total++;
1227				skip_page_frags = true;
1228				goto rcd_done;
1229			}
1230
1231			dma_unmap_single(&adapter->pdev->dev, rbi->dma_addr,
1232					 rbi->len,
1233					 PCI_DMA_FROMDEVICE);
1234
1235#ifdef VMXNET3_RSS
1236			if (rcd->rssType != VMXNET3_RCD_RSS_TYPE_NONE &&
1237			    (adapter->netdev->features & NETIF_F_RXHASH))
1238				skb_set_hash(ctx->skb,
1239					     le32_to_cpu(rcd->rssHash),
1240					     PKT_HASH_TYPE_L3);
1241#endif
1242			skb_put(ctx->skb, rcd->len);
1243
1244			/* Immediate refill */
1245			rbi->skb = new_skb;
1246			rbi->dma_addr = dma_map_single(&adapter->pdev->dev,
1247						       rbi->skb->data, rbi->len,
1248						       PCI_DMA_FROMDEVICE);
1249			rxd->addr = cpu_to_le64(rbi->dma_addr);
1250			rxd->len = rbi->len;
1251
1252		} else {
1253			BUG_ON(ctx->skb == NULL && !skip_page_frags);
1254
1255			/* non SOP buffer must be type 1 in most cases */
1256			BUG_ON(rbi->buf_type != VMXNET3_RX_BUF_PAGE);
1257			BUG_ON(rxd->btype != VMXNET3_RXD_BTYPE_BODY);
1258
1259			/* If an sop buffer was dropped, skip all
1260			 * following non-sop fragments. They will be reused.
1261			 */
1262			if (skip_page_frags)
1263				goto rcd_done;
1264
1265			new_page = alloc_page(GFP_ATOMIC);
1266			if (unlikely(new_page == NULL)) {
1267				/* Replacement page frag could not be allocated.
1268				 * Reuse this page. Drop the pkt and free the
1269				 * skb which contained this page as a frag. Skip
1270				 * processing all the following non-sop frags.
1271				 */
1272				rq->stats.rx_buf_alloc_failure++;
1273				dev_kfree_skb(ctx->skb);
1274				ctx->skb = NULL;
1275				skip_page_frags = true;
1276				goto rcd_done;
1277			}
1278
1279			if (rcd->len) {
1280				dma_unmap_page(&adapter->pdev->dev,
1281					       rbi->dma_addr, rbi->len,
1282					       PCI_DMA_FROMDEVICE);
1283
1284				vmxnet3_append_frag(ctx->skb, rcd, rbi);
1285			}
1286
1287			/* Immediate refill */
1288			rbi->page = new_page;
1289			rbi->dma_addr = dma_map_page(&adapter->pdev->dev,
1290						     rbi->page,
1291						     0, PAGE_SIZE,
1292						     PCI_DMA_FROMDEVICE);
1293			rxd->addr = cpu_to_le64(rbi->dma_addr);
1294			rxd->len = rbi->len;
1295		}
1296
1297
1298		skb = ctx->skb;
1299		if (rcd->eop) {
1300			skb->len += skb->data_len;
1301
1302			vmxnet3_rx_csum(adapter, skb,
1303					(union Vmxnet3_GenericDesc *)rcd);
1304			skb->protocol = eth_type_trans(skb, adapter->netdev);
1305
1306			if (unlikely(rcd->ts))
1307				__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rcd->tci);
1308
1309			if (adapter->netdev->features & NETIF_F_LRO)
1310				netif_receive_skb(skb);
1311			else
1312				napi_gro_receive(&rq->napi, skb);
1313
1314			ctx->skb = NULL;
1315		}
1316
1317rcd_done:
1318		/* device may have skipped some rx descs */
1319		ring->next2comp = idx;
1320		num_to_alloc = vmxnet3_cmd_ring_desc_avail(ring);
1321		ring = rq->rx_ring + ring_idx;
1322		while (num_to_alloc) {
1323			vmxnet3_getRxDesc(rxd, &ring->base[ring->next2fill].rxd,
1324					  &rxCmdDesc);
1325			BUG_ON(!rxd->addr);
1326
1327			/* Recv desc is ready to be used by the device */
1328			rxd->gen = ring->gen;
1329			vmxnet3_cmd_ring_adv_next2fill(ring);
1330			num_to_alloc--;
1331		}
1332
1333		/* if needed, update the register */
1334		if (unlikely(rq->shared->updateRxProd)) {
1335			VMXNET3_WRITE_BAR0_REG(adapter,
1336					       rxprod_reg[ring_idx] + rq->qid * 8,
1337					       ring->next2fill);
1338		}
1339
1340		vmxnet3_comp_ring_adv_next2proc(&rq->comp_ring);
1341		vmxnet3_getRxComp(rcd,
1342				  &rq->comp_ring.base[rq->comp_ring.next2proc].rcd, &rxComp);
1343	}
1344
1345	return num_rxd;
1346}
1347
1348
1349static void
1350vmxnet3_rq_cleanup(struct vmxnet3_rx_queue *rq,
1351		   struct vmxnet3_adapter *adapter)
1352{
1353	u32 i, ring_idx;
1354	struct Vmxnet3_RxDesc *rxd;
1355
1356	for (ring_idx = 0; ring_idx < 2; ring_idx++) {
1357		for (i = 0; i < rq->rx_ring[ring_idx].size; i++) {
1358#ifdef __BIG_ENDIAN_BITFIELD
1359			struct Vmxnet3_RxDesc rxDesc;
1360#endif
1361			vmxnet3_getRxDesc(rxd,
1362				&rq->rx_ring[ring_idx].base[i].rxd, &rxDesc);
1363
1364			if (rxd->btype == VMXNET3_RXD_BTYPE_HEAD &&
1365					rq->buf_info[ring_idx][i].skb) {
1366				dma_unmap_single(&adapter->pdev->dev, rxd->addr,
1367						 rxd->len, PCI_DMA_FROMDEVICE);
1368				dev_kfree_skb(rq->buf_info[ring_idx][i].skb);
1369				rq->buf_info[ring_idx][i].skb = NULL;
1370			} else if (rxd->btype == VMXNET3_RXD_BTYPE_BODY &&
1371					rq->buf_info[ring_idx][i].page) {
1372				dma_unmap_page(&adapter->pdev->dev, rxd->addr,
1373					       rxd->len, PCI_DMA_FROMDEVICE);
1374				put_page(rq->buf_info[ring_idx][i].page);
1375				rq->buf_info[ring_idx][i].page = NULL;
1376			}
1377		}
1378
1379		rq->rx_ring[ring_idx].gen = VMXNET3_INIT_GEN;
1380		rq->rx_ring[ring_idx].next2fill =
1381					rq->rx_ring[ring_idx].next2comp = 0;
1382	}
1383
1384	rq->comp_ring.gen = VMXNET3_INIT_GEN;
1385	rq->comp_ring.next2proc = 0;
1386}
1387
1388
1389static void
1390vmxnet3_rq_cleanup_all(struct vmxnet3_adapter *adapter)
1391{
1392	int i;
1393
1394	for (i = 0; i < adapter->num_rx_queues; i++)
1395		vmxnet3_rq_cleanup(&adapter->rx_queue[i], adapter);
1396}
1397
1398
1399static void vmxnet3_rq_destroy(struct vmxnet3_rx_queue *rq,
1400			       struct vmxnet3_adapter *adapter)
1401{
1402	int i;
1403	int j;
1404
1405	/* all rx buffers must have already been freed */
1406	for (i = 0; i < 2; i++) {
1407		if (rq->buf_info[i]) {
1408			for (j = 0; j < rq->rx_ring[i].size; j++)
1409				BUG_ON(rq->buf_info[i][j].page != NULL);
1410		}
1411	}
1412
1413
1414	for (i = 0; i < 2; i++) {
1415		if (rq->rx_ring[i].base) {
1416			dma_free_coherent(&adapter->pdev->dev,
1417					  rq->rx_ring[i].size
1418					  * sizeof(struct Vmxnet3_RxDesc),
1419					  rq->rx_ring[i].base,
1420					  rq->rx_ring[i].basePA);
1421			rq->rx_ring[i].base = NULL;
1422		}
1423		rq->buf_info[i] = NULL;
1424	}
1425
1426	if (rq->comp_ring.base) {
1427		dma_free_coherent(&adapter->pdev->dev, rq->comp_ring.size
1428				  * sizeof(struct Vmxnet3_RxCompDesc),
1429				  rq->comp_ring.base, rq->comp_ring.basePA);
1430		rq->comp_ring.base = NULL;
1431	}
1432
1433	if (rq->buf_info[0]) {
1434		size_t sz = sizeof(struct vmxnet3_rx_buf_info) *
1435			(rq->rx_ring[0].size + rq->rx_ring[1].size);
1436		dma_free_coherent(&adapter->pdev->dev, sz, rq->buf_info[0],
1437				  rq->buf_info_pa);
1438	}
1439}
1440
1441
1442static int
1443vmxnet3_rq_init(struct vmxnet3_rx_queue *rq,
1444		struct vmxnet3_adapter  *adapter)
1445{
1446	int i;
1447
1448	/* initialize buf_info */
1449	for (i = 0; i < rq->rx_ring[0].size; i++) {
1450
1451		/* 1st buf for a pkt is skbuff */
1452		if (i % adapter->rx_buf_per_pkt == 0) {
1453			rq->buf_info[0][i].buf_type = VMXNET3_RX_BUF_SKB;
1454			rq->buf_info[0][i].len = adapter->skb_buf_size;
1455		} else { /* subsequent bufs for a pkt is frag */
1456			rq->buf_info[0][i].buf_type = VMXNET3_RX_BUF_PAGE;
1457			rq->buf_info[0][i].len = PAGE_SIZE;
1458		}
1459	}
1460	for (i = 0; i < rq->rx_ring[1].size; i++) {
1461		rq->buf_info[1][i].buf_type = VMXNET3_RX_BUF_PAGE;
1462		rq->buf_info[1][i].len = PAGE_SIZE;
1463	}
1464
1465	/* reset internal state and allocate buffers for both rings */
1466	for (i = 0; i < 2; i++) {
1467		rq->rx_ring[i].next2fill = rq->rx_ring[i].next2comp = 0;
1468
1469		memset(rq->rx_ring[i].base, 0, rq->rx_ring[i].size *
1470		       sizeof(struct Vmxnet3_RxDesc));
1471		rq->rx_ring[i].gen = VMXNET3_INIT_GEN;
1472	}
1473	if (vmxnet3_rq_alloc_rx_buf(rq, 0, rq->rx_ring[0].size - 1,
1474				    adapter) == 0) {
1475		/* at least has 1 rx buffer for the 1st ring */
1476		return -ENOMEM;
1477	}
1478	vmxnet3_rq_alloc_rx_buf(rq, 1, rq->rx_ring[1].size - 1, adapter);
1479
1480	/* reset the comp ring */
1481	rq->comp_ring.next2proc = 0;
1482	memset(rq->comp_ring.base, 0, rq->comp_ring.size *
1483	       sizeof(struct Vmxnet3_RxCompDesc));
1484	rq->comp_ring.gen = VMXNET3_INIT_GEN;
1485
1486	/* reset rxctx */
1487	rq->rx_ctx.skb = NULL;
1488
1489	/* stats are not reset */
1490	return 0;
1491}
1492
1493
1494static int
1495vmxnet3_rq_init_all(struct vmxnet3_adapter *adapter)
1496{
1497	int i, err = 0;
1498
1499	for (i = 0; i < adapter->num_rx_queues; i++) {
1500		err = vmxnet3_rq_init(&adapter->rx_queue[i], adapter);
1501		if (unlikely(err)) {
1502			dev_err(&adapter->netdev->dev, "%s: failed to "
1503				"initialize rx queue%i\n",
1504				adapter->netdev->name, i);
1505			break;
1506		}
1507	}
1508	return err;
1509
1510}
1511
1512
1513static int
1514vmxnet3_rq_create(struct vmxnet3_rx_queue *rq, struct vmxnet3_adapter *adapter)
1515{
1516	int i;
1517	size_t sz;
1518	struct vmxnet3_rx_buf_info *bi;
1519
1520	for (i = 0; i < 2; i++) {
1521
1522		sz = rq->rx_ring[i].size * sizeof(struct Vmxnet3_RxDesc);
1523		rq->rx_ring[i].base = dma_alloc_coherent(
1524						&adapter->pdev->dev, sz,
1525						&rq->rx_ring[i].basePA,
1526						GFP_KERNEL);
1527		if (!rq->rx_ring[i].base) {
1528			netdev_err(adapter->netdev,
1529				   "failed to allocate rx ring %d\n", i);
1530			goto err;
1531		}
1532	}
1533
1534	sz = rq->comp_ring.size * sizeof(struct Vmxnet3_RxCompDesc);
1535	rq->comp_ring.base = dma_alloc_coherent(&adapter->pdev->dev, sz,
1536						&rq->comp_ring.basePA,
1537						GFP_KERNEL);
1538	if (!rq->comp_ring.base) {
1539		netdev_err(adapter->netdev, "failed to allocate rx comp ring\n");
1540		goto err;
1541	}
1542
1543	sz = sizeof(struct vmxnet3_rx_buf_info) * (rq->rx_ring[0].size +
1544						   rq->rx_ring[1].size);
1545	bi = dma_zalloc_coherent(&adapter->pdev->dev, sz, &rq->buf_info_pa,
1546				 GFP_KERNEL);
1547	if (!bi)
1548		goto err;
1549
1550	rq->buf_info[0] = bi;
1551	rq->buf_info[1] = bi + rq->rx_ring[0].size;
1552
1553	return 0;
1554
1555err:
1556	vmxnet3_rq_destroy(rq, adapter);
1557	return -ENOMEM;
1558}
1559
1560
1561static int
1562vmxnet3_rq_create_all(struct vmxnet3_adapter *adapter)
1563{
1564	int i, err = 0;
1565
1566	for (i = 0; i < adapter->num_rx_queues; i++) {
1567		err = vmxnet3_rq_create(&adapter->rx_queue[i], adapter);
1568		if (unlikely(err)) {
1569			dev_err(&adapter->netdev->dev,
1570				"%s: failed to create rx queue%i\n",
1571				adapter->netdev->name, i);
1572			goto err_out;
1573		}
1574	}
1575	return err;
1576err_out:
1577	vmxnet3_rq_destroy_all(adapter);
1578	return err;
1579
1580}
1581
1582/* Multiple queue aware polling function for tx and rx */
1583
1584static int
1585vmxnet3_do_poll(struct vmxnet3_adapter *adapter, int budget)
1586{
1587	int rcd_done = 0, i;
1588	if (unlikely(adapter->shared->ecr))
1589		vmxnet3_process_events(adapter);
1590	for (i = 0; i < adapter->num_tx_queues; i++)
1591		vmxnet3_tq_tx_complete(&adapter->tx_queue[i], adapter);
1592
1593	for (i = 0; i < adapter->num_rx_queues; i++)
1594		rcd_done += vmxnet3_rq_rx_complete(&adapter->rx_queue[i],
1595						   adapter, budget);
1596	return rcd_done;
1597}
1598
1599
1600static int
1601vmxnet3_poll(struct napi_struct *napi, int budget)
1602{
1603	struct vmxnet3_rx_queue *rx_queue = container_of(napi,
1604					  struct vmxnet3_rx_queue, napi);
1605	int rxd_done;
1606
1607	rxd_done = vmxnet3_do_poll(rx_queue->adapter, budget);
1608
1609	if (rxd_done < budget) {
1610		napi_complete(napi);
1611		vmxnet3_enable_all_intrs(rx_queue->adapter);
1612	}
1613	return rxd_done;
1614}
1615
1616/*
1617 * NAPI polling function for MSI-X mode with multiple Rx queues
1618 * Returns the # of the NAPI credit consumed (# of rx descriptors processed)
1619 */
1620
1621static int
1622vmxnet3_poll_rx_only(struct napi_struct *napi, int budget)
1623{
1624	struct vmxnet3_rx_queue *rq = container_of(napi,
1625						struct vmxnet3_rx_queue, napi);
1626	struct vmxnet3_adapter *adapter = rq->adapter;
1627	int rxd_done;
1628
1629	/* When sharing interrupt with corresponding tx queue, process
1630	 * tx completions in that queue as well
1631	 */
1632	if (adapter->share_intr == VMXNET3_INTR_BUDDYSHARE) {
1633		struct vmxnet3_tx_queue *tq =
1634				&adapter->tx_queue[rq - adapter->rx_queue];
1635		vmxnet3_tq_tx_complete(tq, adapter);
1636	}
1637
1638	rxd_done = vmxnet3_rq_rx_complete(rq, adapter, budget);
1639
1640	if (rxd_done < budget) {
1641		napi_complete(napi);
1642		vmxnet3_enable_intr(adapter, rq->comp_ring.intr_idx);
1643	}
1644	return rxd_done;
1645}
1646
1647
1648#ifdef CONFIG_PCI_MSI
1649
1650/*
1651 * Handle completion interrupts on tx queues
1652 * Returns whether or not the intr is handled
1653 */
1654
1655static irqreturn_t
1656vmxnet3_msix_tx(int irq, void *data)
1657{
1658	struct vmxnet3_tx_queue *tq = data;
1659	struct vmxnet3_adapter *adapter = tq->adapter;
1660
1661	if (adapter->intr.mask_mode == VMXNET3_IMM_ACTIVE)
1662		vmxnet3_disable_intr(adapter, tq->comp_ring.intr_idx);
1663
1664	/* Handle the case where only one irq is allocate for all tx queues */
1665	if (adapter->share_intr == VMXNET3_INTR_TXSHARE) {
1666		int i;
1667		for (i = 0; i < adapter->num_tx_queues; i++) {
1668			struct vmxnet3_tx_queue *txq = &adapter->tx_queue[i];
1669			vmxnet3_tq_tx_complete(txq, adapter);
1670		}
1671	} else {
1672		vmxnet3_tq_tx_complete(tq, adapter);
1673	}
1674	vmxnet3_enable_intr(adapter, tq->comp_ring.intr_idx);
1675
1676	return IRQ_HANDLED;
1677}
1678
1679
1680/*
1681 * Handle completion interrupts on rx queues. Returns whether or not the
1682 * intr is handled
1683 */
1684
1685static irqreturn_t
1686vmxnet3_msix_rx(int irq, void *data)
1687{
1688	struct vmxnet3_rx_queue *rq = data;
1689	struct vmxnet3_adapter *adapter = rq->adapter;
1690
1691	/* disable intr if needed */
1692	if (adapter->intr.mask_mode == VMXNET3_IMM_ACTIVE)
1693		vmxnet3_disable_intr(adapter, rq->comp_ring.intr_idx);
1694	napi_schedule(&rq->napi);
1695
1696	return IRQ_HANDLED;
1697}
1698
1699/*
1700 *----------------------------------------------------------------------------
1701 *
1702 * vmxnet3_msix_event --
1703 *
1704 *    vmxnet3 msix event intr handler
1705 *
1706 * Result:
1707 *    whether or not the intr is handled
1708 *
1709 *----------------------------------------------------------------------------
1710 */
1711
1712static irqreturn_t
1713vmxnet3_msix_event(int irq, void *data)
1714{
1715	struct net_device *dev = data;
1716	struct vmxnet3_adapter *adapter = netdev_priv(dev);
1717
1718	/* disable intr if needed */
1719	if (adapter->intr.mask_mode == VMXNET3_IMM_ACTIVE)
1720		vmxnet3_disable_intr(adapter, adapter->intr.event_intr_idx);
1721
1722	if (adapter->shared->ecr)
1723		vmxnet3_process_events(adapter);
1724
1725	vmxnet3_enable_intr(adapter, adapter->intr.event_intr_idx);
1726
1727	return IRQ_HANDLED;
1728}
1729
1730#endif /* CONFIG_PCI_MSI  */
1731
1732
1733/* Interrupt handler for vmxnet3  */
1734static irqreturn_t
1735vmxnet3_intr(int irq, void *dev_id)
1736{
1737	struct net_device *dev = dev_id;
1738	struct vmxnet3_adapter *adapter = netdev_priv(dev);
1739
1740	if (adapter->intr.type == VMXNET3_IT_INTX) {
1741		u32 icr = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_ICR);
1742		if (unlikely(icr == 0))
1743			/* not ours */
1744			return IRQ_NONE;
1745	}
1746
1747
1748	/* disable intr if needed */
1749	if (adapter->intr.mask_mode == VMXNET3_IMM_ACTIVE)
1750		vmxnet3_disable_all_intrs(adapter);
1751
1752	napi_schedule(&adapter->rx_queue[0].napi);
1753
1754	return IRQ_HANDLED;
1755}
1756
1757#ifdef CONFIG_NET_POLL_CONTROLLER
1758
1759/* netpoll callback. */
1760static void
1761vmxnet3_netpoll(struct net_device *netdev)
1762{
1763	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
1764
1765	switch (adapter->intr.type) {
1766#ifdef CONFIG_PCI_MSI
1767	case VMXNET3_IT_MSIX: {
1768		int i;
1769		for (i = 0; i < adapter->num_rx_queues; i++)
1770			vmxnet3_msix_rx(0, &adapter->rx_queue[i]);
1771		break;
1772	}
1773#endif
1774	case VMXNET3_IT_MSI:
1775	default:
1776		vmxnet3_intr(0, adapter->netdev);
1777		break;
1778	}
1779
1780}
1781#endif	/* CONFIG_NET_POLL_CONTROLLER */
1782
1783static int
1784vmxnet3_request_irqs(struct vmxnet3_adapter *adapter)
1785{
1786	struct vmxnet3_intr *intr = &adapter->intr;
1787	int err = 0, i;
1788	int vector = 0;
1789
1790#ifdef CONFIG_PCI_MSI
1791	if (adapter->intr.type == VMXNET3_IT_MSIX) {
1792		for (i = 0; i < adapter->num_tx_queues; i++) {
1793			if (adapter->share_intr != VMXNET3_INTR_BUDDYSHARE) {
1794				sprintf(adapter->tx_queue[i].name, "%s-tx-%d",
1795					adapter->netdev->name, vector);
1796				err = request_irq(
1797					      intr->msix_entries[vector].vector,
1798					      vmxnet3_msix_tx, 0,
1799					      adapter->tx_queue[i].name,
1800					      &adapter->tx_queue[i]);
1801			} else {
1802				sprintf(adapter->tx_queue[i].name, "%s-rxtx-%d",
1803					adapter->netdev->name, vector);
1804			}
1805			if (err) {
1806				dev_err(&adapter->netdev->dev,
1807					"Failed to request irq for MSIX, %s, "
1808					"error %d\n",
1809					adapter->tx_queue[i].name, err);
1810				return err;
1811			}
1812
1813			/* Handle the case where only 1 MSIx was allocated for
1814			 * all tx queues */
1815			if (adapter->share_intr == VMXNET3_INTR_TXSHARE) {
1816				for (; i < adapter->num_tx_queues; i++)
1817					adapter->tx_queue[i].comp_ring.intr_idx
1818								= vector;
1819				vector++;
1820				break;
1821			} else {
1822				adapter->tx_queue[i].comp_ring.intr_idx
1823								= vector++;
1824			}
1825		}
1826		if (adapter->share_intr == VMXNET3_INTR_BUDDYSHARE)
1827			vector = 0;
1828
1829		for (i = 0; i < adapter->num_rx_queues; i++) {
1830			if (adapter->share_intr != VMXNET3_INTR_BUDDYSHARE)
1831				sprintf(adapter->rx_queue[i].name, "%s-rx-%d",
1832					adapter->netdev->name, vector);
1833			else
1834				sprintf(adapter->rx_queue[i].name, "%s-rxtx-%d",
1835					adapter->netdev->name, vector);
1836			err = request_irq(intr->msix_entries[vector].vector,
1837					  vmxnet3_msix_rx, 0,
1838					  adapter->rx_queue[i].name,
1839					  &(adapter->rx_queue[i]));
1840			if (err) {
1841				netdev_err(adapter->netdev,
1842					   "Failed to request irq for MSIX, "
1843					   "%s, error %d\n",
1844					   adapter->rx_queue[i].name, err);
1845				return err;
1846			}
1847
1848			adapter->rx_queue[i].comp_ring.intr_idx = vector++;
1849		}
1850
1851		sprintf(intr->event_msi_vector_name, "%s-event-%d",
1852			adapter->netdev->name, vector);
1853		err = request_irq(intr->msix_entries[vector].vector,
1854				  vmxnet3_msix_event, 0,
1855				  intr->event_msi_vector_name, adapter->netdev);
1856		intr->event_intr_idx = vector;
1857
1858	} else if (intr->type == VMXNET3_IT_MSI) {
1859		adapter->num_rx_queues = 1;
1860		err = request_irq(adapter->pdev->irq, vmxnet3_intr, 0,
1861				  adapter->netdev->name, adapter->netdev);
1862	} else {
1863#endif
1864		adapter->num_rx_queues = 1;
1865		err = request_irq(adapter->pdev->irq, vmxnet3_intr,
1866				  IRQF_SHARED, adapter->netdev->name,
1867				  adapter->netdev);
1868#ifdef CONFIG_PCI_MSI
1869	}
1870#endif
1871	intr->num_intrs = vector + 1;
1872	if (err) {
1873		netdev_err(adapter->netdev,
1874			   "Failed to request irq (intr type:%d), error %d\n",
1875			   intr->type, err);
1876	} else {
1877		/* Number of rx queues will not change after this */
1878		for (i = 0; i < adapter->num_rx_queues; i++) {
1879			struct vmxnet3_rx_queue *rq = &adapter->rx_queue[i];
1880			rq->qid = i;
1881			rq->qid2 = i + adapter->num_rx_queues;
1882		}
1883
1884
1885
1886		/* init our intr settings */
1887		for (i = 0; i < intr->num_intrs; i++)
1888			intr->mod_levels[i] = UPT1_IML_ADAPTIVE;
1889		if (adapter->intr.type != VMXNET3_IT_MSIX) {
1890			adapter->intr.event_intr_idx = 0;
1891			for (i = 0; i < adapter->num_tx_queues; i++)
1892				adapter->tx_queue[i].comp_ring.intr_idx = 0;
1893			adapter->rx_queue[0].comp_ring.intr_idx = 0;
1894		}
1895
1896		netdev_info(adapter->netdev,
1897			    "intr type %u, mode %u, %u vectors allocated\n",
1898			    intr->type, intr->mask_mode, intr->num_intrs);
1899	}
1900
1901	return err;
1902}
1903
1904
1905static void
1906vmxnet3_free_irqs(struct vmxnet3_adapter *adapter)
1907{
1908	struct vmxnet3_intr *intr = &adapter->intr;
1909	BUG_ON(intr->type == VMXNET3_IT_AUTO || intr->num_intrs <= 0);
1910
1911	switch (intr->type) {
1912#ifdef CONFIG_PCI_MSI
1913	case VMXNET3_IT_MSIX:
1914	{
1915		int i, vector = 0;
1916
1917		if (adapter->share_intr != VMXNET3_INTR_BUDDYSHARE) {
1918			for (i = 0; i < adapter->num_tx_queues; i++) {
1919				free_irq(intr->msix_entries[vector++].vector,
1920					 &(adapter->tx_queue[i]));
1921				if (adapter->share_intr == VMXNET3_INTR_TXSHARE)
1922					break;
1923			}
1924		}
1925
1926		for (i = 0; i < adapter->num_rx_queues; i++) {
1927			free_irq(intr->msix_entries[vector++].vector,
1928				 &(adapter->rx_queue[i]));
1929		}
1930
1931		free_irq(intr->msix_entries[vector].vector,
1932			 adapter->netdev);
1933		BUG_ON(vector >= intr->num_intrs);
1934		break;
1935	}
1936#endif
1937	case VMXNET3_IT_MSI:
1938		free_irq(adapter->pdev->irq, adapter->netdev);
1939		break;
1940	case VMXNET3_IT_INTX:
1941		free_irq(adapter->pdev->irq, adapter->netdev);
1942		break;
1943	default:
1944		BUG();
1945	}
1946}
1947
1948
1949static void
1950vmxnet3_restore_vlan(struct vmxnet3_adapter *adapter)
1951{
1952	u32 *vfTable = adapter->shared->devRead.rxFilterConf.vfTable;
1953	u16 vid;
1954
1955	/* allow untagged pkts */
1956	VMXNET3_SET_VFTABLE_ENTRY(vfTable, 0);
1957
1958	for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID)
1959		VMXNET3_SET_VFTABLE_ENTRY(vfTable, vid);
1960}
1961
1962
1963static int
1964vmxnet3_vlan_rx_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
1965{
1966	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
1967
1968	if (!(netdev->flags & IFF_PROMISC)) {
1969		u32 *vfTable = adapter->shared->devRead.rxFilterConf.vfTable;
1970		unsigned long flags;
1971
1972		VMXNET3_SET_VFTABLE_ENTRY(vfTable, vid);
1973		spin_lock_irqsave(&adapter->cmd_lock, flags);
1974		VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
1975				       VMXNET3_CMD_UPDATE_VLAN_FILTERS);
1976		spin_unlock_irqrestore(&adapter->cmd_lock, flags);
1977	}
1978
1979	set_bit(vid, adapter->active_vlans);
1980
1981	return 0;
1982}
1983
1984
1985static int
1986vmxnet3_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto, u16 vid)
1987{
1988	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
1989
1990	if (!(netdev->flags & IFF_PROMISC)) {
1991		u32 *vfTable = adapter->shared->devRead.rxFilterConf.vfTable;
1992		unsigned long flags;
1993
1994		VMXNET3_CLEAR_VFTABLE_ENTRY(vfTable, vid);
1995		spin_lock_irqsave(&adapter->cmd_lock, flags);
1996		VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
1997				       VMXNET3_CMD_UPDATE_VLAN_FILTERS);
1998		spin_unlock_irqrestore(&adapter->cmd_lock, flags);
1999	}
2000
2001	clear_bit(vid, adapter->active_vlans);
2002
2003	return 0;
2004}
2005
2006
2007static u8 *
2008vmxnet3_copy_mc(struct net_device *netdev)
2009{
2010	u8 *buf = NULL;
2011	u32 sz = netdev_mc_count(netdev) * ETH_ALEN;
2012
2013	/* struct Vmxnet3_RxFilterConf.mfTableLen is u16. */
2014	if (sz <= 0xffff) {
2015		/* We may be called with BH disabled */
2016		buf = kmalloc(sz, GFP_ATOMIC);
2017		if (buf) {
2018			struct netdev_hw_addr *ha;
2019			int i = 0;
2020
2021			netdev_for_each_mc_addr(ha, netdev)
2022				memcpy(buf + i++ * ETH_ALEN, ha->addr,
2023				       ETH_ALEN);
2024		}
2025	}
2026	return buf;
2027}
2028
2029
2030static void
2031vmxnet3_set_mc(struct net_device *netdev)
2032{
2033	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
2034	unsigned long flags;
2035	struct Vmxnet3_RxFilterConf *rxConf =
2036					&adapter->shared->devRead.rxFilterConf;
2037	u8 *new_table = NULL;
2038	dma_addr_t new_table_pa = 0;
2039	u32 new_mode = VMXNET3_RXM_UCAST;
2040
2041	if (netdev->flags & IFF_PROMISC) {
2042		u32 *vfTable = adapter->shared->devRead.rxFilterConf.vfTable;
2043		memset(vfTable, 0, VMXNET3_VFT_SIZE * sizeof(*vfTable));
2044
2045		new_mode |= VMXNET3_RXM_PROMISC;
2046	} else {
2047		vmxnet3_restore_vlan(adapter);
2048	}
2049
2050	if (netdev->flags & IFF_BROADCAST)
2051		new_mode |= VMXNET3_RXM_BCAST;
2052
2053	if (netdev->flags & IFF_ALLMULTI)
2054		new_mode |= VMXNET3_RXM_ALL_MULTI;
2055	else
2056		if (!netdev_mc_empty(netdev)) {
2057			new_table = vmxnet3_copy_mc(netdev);
2058			if (new_table) {
2059				new_mode |= VMXNET3_RXM_MCAST;
2060				rxConf->mfTableLen = cpu_to_le16(
2061					netdev_mc_count(netdev) * ETH_ALEN);
2062				new_table_pa = dma_map_single(
2063							&adapter->pdev->dev,
2064							new_table,
2065							rxConf->mfTableLen,
2066							PCI_DMA_TODEVICE);
2067				rxConf->mfTablePA = cpu_to_le64(new_table_pa);
2068			} else {
2069				netdev_info(netdev, "failed to copy mcast list"
2070					    ", setting ALL_MULTI\n");
2071				new_mode |= VMXNET3_RXM_ALL_MULTI;
2072			}
2073		}
2074
2075
2076	if (!(new_mode & VMXNET3_RXM_MCAST)) {
2077		rxConf->mfTableLen = 0;
2078		rxConf->mfTablePA = 0;
2079	}
2080
2081	spin_lock_irqsave(&adapter->cmd_lock, flags);
2082	if (new_mode != rxConf->rxMode) {
2083		rxConf->rxMode = cpu_to_le32(new_mode);
2084		VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
2085				       VMXNET3_CMD_UPDATE_RX_MODE);
2086		VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
2087				       VMXNET3_CMD_UPDATE_VLAN_FILTERS);
2088	}
2089
2090	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
2091			       VMXNET3_CMD_UPDATE_MAC_FILTERS);
2092	spin_unlock_irqrestore(&adapter->cmd_lock, flags);
2093
2094	if (new_table) {
2095		dma_unmap_single(&adapter->pdev->dev, new_table_pa,
2096				 rxConf->mfTableLen, PCI_DMA_TODEVICE);
2097		kfree(new_table);
2098	}
2099}
2100
2101void
2102vmxnet3_rq_destroy_all(struct vmxnet3_adapter *adapter)
2103{
2104	int i;
2105
2106	for (i = 0; i < adapter->num_rx_queues; i++)
2107		vmxnet3_rq_destroy(&adapter->rx_queue[i], adapter);
2108}
2109
2110
2111/*
2112 *   Set up driver_shared based on settings in adapter.
2113 */
2114
2115static void
2116vmxnet3_setup_driver_shared(struct vmxnet3_adapter *adapter)
2117{
2118	struct Vmxnet3_DriverShared *shared = adapter->shared;
2119	struct Vmxnet3_DSDevRead *devRead = &shared->devRead;
2120	struct Vmxnet3_TxQueueConf *tqc;
2121	struct Vmxnet3_RxQueueConf *rqc;
2122	int i;
2123
2124	memset(shared, 0, sizeof(*shared));
2125
2126	/* driver settings */
2127	shared->magic = cpu_to_le32(VMXNET3_REV1_MAGIC);
2128	devRead->misc.driverInfo.version = cpu_to_le32(
2129						VMXNET3_DRIVER_VERSION_NUM);
2130	devRead->misc.driverInfo.gos.gosBits = (sizeof(void *) == 4 ?
2131				VMXNET3_GOS_BITS_32 : VMXNET3_GOS_BITS_64);
2132	devRead->misc.driverInfo.gos.gosType = VMXNET3_GOS_TYPE_LINUX;
2133	*((u32 *)&devRead->misc.driverInfo.gos) = cpu_to_le32(
2134				*((u32 *)&devRead->misc.driverInfo.gos));
2135	devRead->misc.driverInfo.vmxnet3RevSpt = cpu_to_le32(1);
2136	devRead->misc.driverInfo.uptVerSpt = cpu_to_le32(1);
2137
2138	devRead->misc.ddPA = cpu_to_le64(adapter->adapter_pa);
2139	devRead->misc.ddLen = cpu_to_le32(sizeof(struct vmxnet3_adapter));
2140
2141	/* set up feature flags */
2142	if (adapter->netdev->features & NETIF_F_RXCSUM)
2143		devRead->misc.uptFeatures |= UPT1_F_RXCSUM;
2144
2145	if (adapter->netdev->features & NETIF_F_LRO) {
2146		devRead->misc.uptFeatures |= UPT1_F_LRO;
2147		devRead->misc.maxNumRxSG = cpu_to_le16(1 + MAX_SKB_FRAGS);
2148	}
2149	if (adapter->netdev->features & NETIF_F_HW_VLAN_CTAG_RX)
2150		devRead->misc.uptFeatures |= UPT1_F_RXVLAN;
2151
2152	devRead->misc.mtu = cpu_to_le32(adapter->netdev->mtu);
2153	devRead->misc.queueDescPA = cpu_to_le64(adapter->queue_desc_pa);
2154	devRead->misc.queueDescLen = cpu_to_le32(
2155		adapter->num_tx_queues * sizeof(struct Vmxnet3_TxQueueDesc) +
2156		adapter->num_rx_queues * sizeof(struct Vmxnet3_RxQueueDesc));
2157
2158	/* tx queue settings */
2159	devRead->misc.numTxQueues =  adapter->num_tx_queues;
2160	for (i = 0; i < adapter->num_tx_queues; i++) {
2161		struct vmxnet3_tx_queue	*tq = &adapter->tx_queue[i];
2162		BUG_ON(adapter->tx_queue[i].tx_ring.base == NULL);
2163		tqc = &adapter->tqd_start[i].conf;
2164		tqc->txRingBasePA   = cpu_to_le64(tq->tx_ring.basePA);
2165		tqc->dataRingBasePA = cpu_to_le64(tq->data_ring.basePA);
2166		tqc->compRingBasePA = cpu_to_le64(tq->comp_ring.basePA);
2167		tqc->ddPA           = cpu_to_le64(tq->buf_info_pa);
2168		tqc->txRingSize     = cpu_to_le32(tq->tx_ring.size);
2169		tqc->dataRingSize   = cpu_to_le32(tq->data_ring.size);
2170		tqc->compRingSize   = cpu_to_le32(tq->comp_ring.size);
2171		tqc->ddLen          = cpu_to_le32(
2172					sizeof(struct vmxnet3_tx_buf_info) *
2173					tqc->txRingSize);
2174		tqc->intrIdx        = tq->comp_ring.intr_idx;
2175	}
2176
2177	/* rx queue settings */
2178	devRead->misc.numRxQueues = adapter->num_rx_queues;
2179	for (i = 0; i < adapter->num_rx_queues; i++) {
2180		struct vmxnet3_rx_queue	*rq = &adapter->rx_queue[i];
2181		rqc = &adapter->rqd_start[i].conf;
2182		rqc->rxRingBasePA[0] = cpu_to_le64(rq->rx_ring[0].basePA);
2183		rqc->rxRingBasePA[1] = cpu_to_le64(rq->rx_ring[1].basePA);
2184		rqc->compRingBasePA  = cpu_to_le64(rq->comp_ring.basePA);
2185		rqc->ddPA            = cpu_to_le64(rq->buf_info_pa);
2186		rqc->rxRingSize[0]   = cpu_to_le32(rq->rx_ring[0].size);
2187		rqc->rxRingSize[1]   = cpu_to_le32(rq->rx_ring[1].size);
2188		rqc->compRingSize    = cpu_to_le32(rq->comp_ring.size);
2189		rqc->ddLen           = cpu_to_le32(
2190					sizeof(struct vmxnet3_rx_buf_info) *
2191					(rqc->rxRingSize[0] +
2192					 rqc->rxRingSize[1]));
2193		rqc->intrIdx         = rq->comp_ring.intr_idx;
2194	}
2195
2196#ifdef VMXNET3_RSS
2197	memset(adapter->rss_conf, 0, sizeof(*adapter->rss_conf));
2198
2199	if (adapter->rss) {
2200		struct UPT1_RSSConf *rssConf = adapter->rss_conf;
2201		static const uint8_t rss_key[UPT1_RSS_MAX_KEY_SIZE] = {
2202			0x3b, 0x56, 0xd1, 0x56, 0x13, 0x4a, 0xe7, 0xac,
2203			0xe8, 0x79, 0x09, 0x75, 0xe8, 0x65, 0x79, 0x28,
2204			0x35, 0x12, 0xb9, 0x56, 0x7c, 0x76, 0x4b, 0x70,
2205			0xd8, 0x56, 0xa3, 0x18, 0x9b, 0x0a, 0xee, 0xf3,
2206			0x96, 0xa6, 0x9f, 0x8f, 0x9e, 0x8c, 0x90, 0xc9,
2207		};
2208
2209		devRead->misc.uptFeatures |= UPT1_F_RSS;
2210		devRead->misc.numRxQueues = adapter->num_rx_queues;
2211		rssConf->hashType = UPT1_RSS_HASH_TYPE_TCP_IPV4 |
2212				    UPT1_RSS_HASH_TYPE_IPV4 |
2213				    UPT1_RSS_HASH_TYPE_TCP_IPV6 |
2214				    UPT1_RSS_HASH_TYPE_IPV6;
2215		rssConf->hashFunc = UPT1_RSS_HASH_FUNC_TOEPLITZ;
2216		rssConf->hashKeySize = UPT1_RSS_MAX_KEY_SIZE;
2217		rssConf->indTableSize = VMXNET3_RSS_IND_TABLE_SIZE;
2218		memcpy(rssConf->hashKey, rss_key, sizeof(rss_key));
2219
2220		for (i = 0; i < rssConf->indTableSize; i++)
2221			rssConf->indTable[i] = ethtool_rxfh_indir_default(
2222				i, adapter->num_rx_queues);
2223
2224		devRead->rssConfDesc.confVer = 1;
2225		devRead->rssConfDesc.confLen = cpu_to_le32(sizeof(*rssConf));
2226		devRead->rssConfDesc.confPA =
2227			cpu_to_le64(adapter->rss_conf_pa);
2228	}
2229
2230#endif /* VMXNET3_RSS */
2231
2232	/* intr settings */
2233	devRead->intrConf.autoMask = adapter->intr.mask_mode ==
2234				     VMXNET3_IMM_AUTO;
2235	devRead->intrConf.numIntrs = adapter->intr.num_intrs;
2236	for (i = 0; i < adapter->intr.num_intrs; i++)
2237		devRead->intrConf.modLevels[i] = adapter->intr.mod_levels[i];
2238
2239	devRead->intrConf.eventIntrIdx = adapter->intr.event_intr_idx;
2240	devRead->intrConf.intrCtrl |= cpu_to_le32(VMXNET3_IC_DISABLE_ALL);
2241
2242	/* rx filter settings */
2243	devRead->rxFilterConf.rxMode = 0;
2244	vmxnet3_restore_vlan(adapter);
2245	vmxnet3_write_mac_addr(adapter, adapter->netdev->dev_addr);
2246
2247	/* the rest are already zeroed */
2248}
2249
2250
2251int
2252vmxnet3_activate_dev(struct vmxnet3_adapter *adapter)
2253{
2254	int err, i;
2255	u32 ret;
2256	unsigned long flags;
2257
2258	netdev_dbg(adapter->netdev, "%s: skb_buf_size %d, rx_buf_per_pkt %d,"
2259		" ring sizes %u %u %u\n", adapter->netdev->name,
2260		adapter->skb_buf_size, adapter->rx_buf_per_pkt,
2261		adapter->tx_queue[0].tx_ring.size,
2262		adapter->rx_queue[0].rx_ring[0].size,
2263		adapter->rx_queue[0].rx_ring[1].size);
2264
2265	vmxnet3_tq_init_all(adapter);
2266	err = vmxnet3_rq_init_all(adapter);
2267	if (err) {
2268		netdev_err(adapter->netdev,
2269			   "Failed to init rx queue error %d\n", err);
2270		goto rq_err;
2271	}
2272
2273	err = vmxnet3_request_irqs(adapter);
2274	if (err) {
2275		netdev_err(adapter->netdev,
2276			   "Failed to setup irq for error %d\n", err);
2277		goto irq_err;
2278	}
2279
2280	vmxnet3_setup_driver_shared(adapter);
2281
2282	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_DSAL, VMXNET3_GET_ADDR_LO(
2283			       adapter->shared_pa));
2284	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_DSAH, VMXNET3_GET_ADDR_HI(
2285			       adapter->shared_pa));
2286	spin_lock_irqsave(&adapter->cmd_lock, flags);
2287	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
2288			       VMXNET3_CMD_ACTIVATE_DEV);
2289	ret = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD);
2290	spin_unlock_irqrestore(&adapter->cmd_lock, flags);
2291
2292	if (ret != 0) {
2293		netdev_err(adapter->netdev,
2294			   "Failed to activate dev: error %u\n", ret);
2295		err = -EINVAL;
2296		goto activate_err;
2297	}
2298
2299	for (i = 0; i < adapter->num_rx_queues; i++) {
2300		VMXNET3_WRITE_BAR0_REG(adapter,
2301				VMXNET3_REG_RXPROD + i * VMXNET3_REG_ALIGN,
2302				adapter->rx_queue[i].rx_ring[0].next2fill);
2303		VMXNET3_WRITE_BAR0_REG(adapter, (VMXNET3_REG_RXPROD2 +
2304				(i * VMXNET3_REG_ALIGN)),
2305				adapter->rx_queue[i].rx_ring[1].next2fill);
2306	}
2307
2308	/* Apply the rx filter settins last. */
2309	vmxnet3_set_mc(adapter->netdev);
2310
2311	/*
2312	 * Check link state when first activating device. It will start the
2313	 * tx queue if the link is up.
2314	 */
2315	vmxnet3_check_link(adapter, true);
2316	for (i = 0; i < adapter->num_rx_queues; i++)
2317		napi_enable(&adapter->rx_queue[i].napi);
2318	vmxnet3_enable_all_intrs(adapter);
2319	clear_bit(VMXNET3_STATE_BIT_QUIESCED, &adapter->state);
2320	return 0;
2321
2322activate_err:
2323	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_DSAL, 0);
2324	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_DSAH, 0);
2325	vmxnet3_free_irqs(adapter);
2326irq_err:
2327rq_err:
2328	/* free up buffers we allocated */
2329	vmxnet3_rq_cleanup_all(adapter);
2330	return err;
2331}
2332
2333
2334void
2335vmxnet3_reset_dev(struct vmxnet3_adapter *adapter)
2336{
2337	unsigned long flags;
2338	spin_lock_irqsave(&adapter->cmd_lock, flags);
2339	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, VMXNET3_CMD_RESET_DEV);
2340	spin_unlock_irqrestore(&adapter->cmd_lock, flags);
2341}
2342
2343
2344int
2345vmxnet3_quiesce_dev(struct vmxnet3_adapter *adapter)
2346{
2347	int i;
2348	unsigned long flags;
2349	if (test_and_set_bit(VMXNET3_STATE_BIT_QUIESCED, &adapter->state))
2350		return 0;
2351
2352
2353	spin_lock_irqsave(&adapter->cmd_lock, flags);
2354	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
2355			       VMXNET3_CMD_QUIESCE_DEV);
2356	spin_unlock_irqrestore(&adapter->cmd_lock, flags);
2357	vmxnet3_disable_all_intrs(adapter);
2358
2359	for (i = 0; i < adapter->num_rx_queues; i++)
2360		napi_disable(&adapter->rx_queue[i].napi);
2361	netif_tx_disable(adapter->netdev);
2362	adapter->link_speed = 0;
2363	netif_carrier_off(adapter->netdev);
2364
2365	vmxnet3_tq_cleanup_all(adapter);
2366	vmxnet3_rq_cleanup_all(adapter);
2367	vmxnet3_free_irqs(adapter);
2368	return 0;
2369}
2370
2371
2372static void
2373vmxnet3_write_mac_addr(struct vmxnet3_adapter *adapter, u8 *mac)
2374{
2375	u32 tmp;
2376
2377	tmp = *(u32 *)mac;
2378	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_MACL, tmp);
2379
2380	tmp = (mac[5] << 8) | mac[4];
2381	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_MACH, tmp);
2382}
2383
2384
2385static int
2386vmxnet3_set_mac_addr(struct net_device *netdev, void *p)
2387{
2388	struct sockaddr *addr = p;
2389	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
2390
2391	memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
2392	vmxnet3_write_mac_addr(adapter, addr->sa_data);
2393
2394	return 0;
2395}
2396
2397
2398/* ==================== initialization and cleanup routines ============ */
2399
2400static int
2401vmxnet3_alloc_pci_resources(struct vmxnet3_adapter *adapter, bool *dma64)
2402{
2403	int err;
2404	unsigned long mmio_start, mmio_len;
2405	struct pci_dev *pdev = adapter->pdev;
2406
2407	err = pci_enable_device(pdev);
2408	if (err) {
2409		dev_err(&pdev->dev, "Failed to enable adapter: error %d\n", err);
2410		return err;
2411	}
2412
2413	if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64)) == 0) {
2414		if (pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)) != 0) {
2415			dev_err(&pdev->dev,
2416				"pci_set_consistent_dma_mask failed\n");
2417			err = -EIO;
2418			goto err_set_mask;
2419		}
2420		*dma64 = true;
2421	} else {
2422		if (pci_set_dma_mask(pdev, DMA_BIT_MASK(32)) != 0) {
2423			dev_err(&pdev->dev,
2424				"pci_set_dma_mask failed\n");
2425			err = -EIO;
2426			goto err_set_mask;
2427		}
2428		*dma64 = false;
2429	}
2430
2431	err = pci_request_selected_regions(pdev, (1 << 2) - 1,
2432					   vmxnet3_driver_name);
2433	if (err) {
2434		dev_err(&pdev->dev,
2435			"Failed to request region for adapter: error %d\n", err);
2436		goto err_set_mask;
2437	}
2438
2439	pci_set_master(pdev);
2440
2441	mmio_start = pci_resource_start(pdev, 0);
2442	mmio_len = pci_resource_len(pdev, 0);
2443	adapter->hw_addr0 = ioremap(mmio_start, mmio_len);
2444	if (!adapter->hw_addr0) {
2445		dev_err(&pdev->dev, "Failed to map bar0\n");
2446		err = -EIO;
2447		goto err_ioremap;
2448	}
2449
2450	mmio_start = pci_resource_start(pdev, 1);
2451	mmio_len = pci_resource_len(pdev, 1);
2452	adapter->hw_addr1 = ioremap(mmio_start, mmio_len);
2453	if (!adapter->hw_addr1) {
2454		dev_err(&pdev->dev, "Failed to map bar1\n");
2455		err = -EIO;
2456		goto err_bar1;
2457	}
2458	return 0;
2459
2460err_bar1:
2461	iounmap(adapter->hw_addr0);
2462err_ioremap:
2463	pci_release_selected_regions(pdev, (1 << 2) - 1);
2464err_set_mask:
2465	pci_disable_device(pdev);
2466	return err;
2467}
2468
2469
2470static void
2471vmxnet3_free_pci_resources(struct vmxnet3_adapter *adapter)
2472{
2473	BUG_ON(!adapter->pdev);
2474
2475	iounmap(adapter->hw_addr0);
2476	iounmap(adapter->hw_addr1);
2477	pci_release_selected_regions(adapter->pdev, (1 << 2) - 1);
2478	pci_disable_device(adapter->pdev);
2479}
2480
2481
2482static void
2483vmxnet3_adjust_rx_ring_size(struct vmxnet3_adapter *adapter)
2484{
2485	size_t sz, i, ring0_size, ring1_size, comp_size;
2486	struct vmxnet3_rx_queue	*rq = &adapter->rx_queue[0];
2487
2488
2489	if (adapter->netdev->mtu <= VMXNET3_MAX_SKB_BUF_SIZE -
2490				    VMXNET3_MAX_ETH_HDR_SIZE) {
2491		adapter->skb_buf_size = adapter->netdev->mtu +
2492					VMXNET3_MAX_ETH_HDR_SIZE;
2493		if (adapter->skb_buf_size < VMXNET3_MIN_T0_BUF_SIZE)
2494			adapter->skb_buf_size = VMXNET3_MIN_T0_BUF_SIZE;
2495
2496		adapter->rx_buf_per_pkt = 1;
2497	} else {
2498		adapter->skb_buf_size = VMXNET3_MAX_SKB_BUF_SIZE;
2499		sz = adapter->netdev->mtu - VMXNET3_MAX_SKB_BUF_SIZE +
2500					    VMXNET3_MAX_ETH_HDR_SIZE;
2501		adapter->rx_buf_per_pkt = 1 + (sz + PAGE_SIZE - 1) / PAGE_SIZE;
2502	}
2503
2504	/*
2505	 * for simplicity, force the ring0 size to be a multiple of
2506	 * rx_buf_per_pkt * VMXNET3_RING_SIZE_ALIGN
2507	 */
2508	sz = adapter->rx_buf_per_pkt * VMXNET3_RING_SIZE_ALIGN;
2509	ring0_size = adapter->rx_queue[0].rx_ring[0].size;
2510	ring0_size = (ring0_size + sz - 1) / sz * sz;
2511	ring0_size = min_t(u32, ring0_size, VMXNET3_RX_RING_MAX_SIZE /
2512			   sz * sz);
2513	ring1_size = adapter->rx_queue[0].rx_ring[1].size;
2514	comp_size = ring0_size + ring1_size;
2515
2516	for (i = 0; i < adapter->num_rx_queues; i++) {
2517		rq = &adapter->rx_queue[i];
2518		rq->rx_ring[0].size = ring0_size;
2519		rq->rx_ring[1].size = ring1_size;
2520		rq->comp_ring.size = comp_size;
2521	}
2522}
2523
2524
2525int
2526vmxnet3_create_queues(struct vmxnet3_adapter *adapter, u32 tx_ring_size,
2527		      u32 rx_ring_size, u32 rx_ring2_size)
2528{
2529	int err = 0, i;
2530
2531	for (i = 0; i < adapter->num_tx_queues; i++) {
2532		struct vmxnet3_tx_queue	*tq = &adapter->tx_queue[i];
2533		tq->tx_ring.size   = tx_ring_size;
2534		tq->data_ring.size = tx_ring_size;
2535		tq->comp_ring.size = tx_ring_size;
2536		tq->shared = &adapter->tqd_start[i].ctrl;
2537		tq->stopped = true;
2538		tq->adapter = adapter;
2539		tq->qid = i;
2540		err = vmxnet3_tq_create(tq, adapter);
2541		/*
2542		 * Too late to change num_tx_queues. We cannot do away with
2543		 * lesser number of queues than what we asked for
2544		 */
2545		if (err)
2546			goto queue_err;
2547	}
2548
2549	adapter->rx_queue[0].rx_ring[0].size = rx_ring_size;
2550	adapter->rx_queue[0].rx_ring[1].size = rx_ring2_size;
2551	vmxnet3_adjust_rx_ring_size(adapter);
2552	for (i = 0; i < adapter->num_rx_queues; i++) {
2553		struct vmxnet3_rx_queue *rq = &adapter->rx_queue[i];
2554		/* qid and qid2 for rx queues will be assigned later when num
2555		 * of rx queues is finalized after allocating intrs */
2556		rq->shared = &adapter->rqd_start[i].ctrl;
2557		rq->adapter = adapter;
2558		err = vmxnet3_rq_create(rq, adapter);
2559		if (err) {
2560			if (i == 0) {
2561				netdev_err(adapter->netdev,
2562					   "Could not allocate any rx queues. "
2563					   "Aborting.\n");
2564				goto queue_err;
2565			} else {
2566				netdev_info(adapter->netdev,
2567					    "Number of rx queues changed "
2568					    "to : %d.\n", i);
2569				adapter->num_rx_queues = i;
2570				err = 0;
2571				break;
2572			}
2573		}
2574	}
2575	return err;
2576queue_err:
2577	vmxnet3_tq_destroy_all(adapter);
2578	return err;
2579}
2580
2581static int
2582vmxnet3_open(struct net_device *netdev)
2583{
2584	struct vmxnet3_adapter *adapter;
2585	int err, i;
2586
2587	adapter = netdev_priv(netdev);
2588
2589	for (i = 0; i < adapter->num_tx_queues; i++)
2590		spin_lock_init(&adapter->tx_queue[i].tx_lock);
2591
2592	err = vmxnet3_create_queues(adapter, adapter->tx_ring_size,
2593				    adapter->rx_ring_size,
2594				    VMXNET3_DEF_RX_RING_SIZE);
2595	if (err)
2596		goto queue_err;
2597
2598	err = vmxnet3_activate_dev(adapter);
2599	if (err)
2600		goto activate_err;
2601
2602	return 0;
2603
2604activate_err:
2605	vmxnet3_rq_destroy_all(adapter);
2606	vmxnet3_tq_destroy_all(adapter);
2607queue_err:
2608	return err;
2609}
2610
2611
2612static int
2613vmxnet3_close(struct net_device *netdev)
2614{
2615	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
2616
2617	/*
2618	 * Reset_work may be in the middle of resetting the device, wait for its
2619	 * completion.
2620	 */
2621	while (test_and_set_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state))
2622		msleep(1);
2623
2624	vmxnet3_quiesce_dev(adapter);
2625
2626	vmxnet3_rq_destroy_all(adapter);
2627	vmxnet3_tq_destroy_all(adapter);
2628
2629	clear_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state);
2630
2631
2632	return 0;
2633}
2634
2635
2636void
2637vmxnet3_force_close(struct vmxnet3_adapter *adapter)
2638{
2639	int i;
2640
2641	/*
2642	 * we must clear VMXNET3_STATE_BIT_RESETTING, otherwise
2643	 * vmxnet3_close() will deadlock.
2644	 */
2645	BUG_ON(test_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state));
2646
2647	/* we need to enable NAPI, otherwise dev_close will deadlock */
2648	for (i = 0; i < adapter->num_rx_queues; i++)
2649		napi_enable(&adapter->rx_queue[i].napi);
2650	dev_close(adapter->netdev);
2651}
2652
2653
2654static int
2655vmxnet3_change_mtu(struct net_device *netdev, int new_mtu)
2656{
2657	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
2658	int err = 0;
2659
2660	if (new_mtu < VMXNET3_MIN_MTU || new_mtu > VMXNET3_MAX_MTU)
2661		return -EINVAL;
2662
2663	netdev->mtu = new_mtu;
2664
2665	/*
2666	 * Reset_work may be in the middle of resetting the device, wait for its
2667	 * completion.
2668	 */
2669	while (test_and_set_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state))
2670		msleep(1);
2671
2672	if (netif_running(netdev)) {
2673		vmxnet3_quiesce_dev(adapter);
2674		vmxnet3_reset_dev(adapter);
2675
2676		/* we need to re-create the rx queue based on the new mtu */
2677		vmxnet3_rq_destroy_all(adapter);
2678		vmxnet3_adjust_rx_ring_size(adapter);
2679		err = vmxnet3_rq_create_all(adapter);
2680		if (err) {
2681			netdev_err(netdev,
2682				   "failed to re-create rx queues, "
2683				   " error %d. Closing it.\n", err);
2684			goto out;
2685		}
2686
2687		err = vmxnet3_activate_dev(adapter);
2688		if (err) {
2689			netdev_err(netdev,
2690				   "failed to re-activate, error %d. "
2691				   "Closing it\n", err);
2692			goto out;
2693		}
2694	}
2695
2696out:
2697	clear_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state);
2698	if (err)
2699		vmxnet3_force_close(adapter);
2700
2701	return err;
2702}
2703
2704
2705static void
2706vmxnet3_declare_features(struct vmxnet3_adapter *adapter, bool dma64)
2707{
2708	struct net_device *netdev = adapter->netdev;
2709
2710	netdev->hw_features = NETIF_F_SG | NETIF_F_RXCSUM |
2711		NETIF_F_HW_CSUM | NETIF_F_HW_VLAN_CTAG_TX |
2712		NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_TSO | NETIF_F_TSO6 |
2713		NETIF_F_LRO;
2714	if (dma64)
2715		netdev->hw_features |= NETIF_F_HIGHDMA;
2716	netdev->vlan_features = netdev->hw_features &
2717				~(NETIF_F_HW_VLAN_CTAG_TX |
2718				  NETIF_F_HW_VLAN_CTAG_RX);
2719	netdev->features = netdev->hw_features | NETIF_F_HW_VLAN_CTAG_FILTER;
2720}
2721
2722
2723static void
2724vmxnet3_read_mac_addr(struct vmxnet3_adapter *adapter, u8 *mac)
2725{
2726	u32 tmp;
2727
2728	tmp = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_MACL);
2729	*(u32 *)mac = tmp;
2730
2731	tmp = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_MACH);
2732	mac[4] = tmp & 0xff;
2733	mac[5] = (tmp >> 8) & 0xff;
2734}
2735
2736#ifdef CONFIG_PCI_MSI
2737
2738/*
2739 * Enable MSIx vectors.
2740 * Returns :
2741 *	VMXNET3_LINUX_MIN_MSIX_VECT when only minimum number of vectors required
2742 *	 were enabled.
2743 *	number of vectors which were enabled otherwise (this number is greater
2744 *	 than VMXNET3_LINUX_MIN_MSIX_VECT)
2745 */
2746
2747static int
2748vmxnet3_acquire_msix_vectors(struct vmxnet3_adapter *adapter, int nvec)
2749{
2750	int ret = pci_enable_msix_range(adapter->pdev,
2751					adapter->intr.msix_entries, nvec, nvec);
2752
2753	if (ret == -ENOSPC && nvec > VMXNET3_LINUX_MIN_MSIX_VECT) {
2754		dev_err(&adapter->netdev->dev,
2755			"Failed to enable %d MSI-X, trying %d\n",
2756			nvec, VMXNET3_LINUX_MIN_MSIX_VECT);
2757
2758		ret = pci_enable_msix_range(adapter->pdev,
2759					    adapter->intr.msix_entries,
2760					    VMXNET3_LINUX_MIN_MSIX_VECT,
2761					    VMXNET3_LINUX_MIN_MSIX_VECT);
2762	}
2763
2764	if (ret < 0) {
2765		dev_err(&adapter->netdev->dev,
2766			"Failed to enable MSI-X, error: %d\n", ret);
2767	}
2768
2769	return ret;
2770}
2771
2772
2773#endif /* CONFIG_PCI_MSI */
2774
2775static void
2776vmxnet3_alloc_intr_resources(struct vmxnet3_adapter *adapter)
2777{
2778	u32 cfg;
2779	unsigned long flags;
2780
2781	/* intr settings */
2782	spin_lock_irqsave(&adapter->cmd_lock, flags);
2783	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
2784			       VMXNET3_CMD_GET_CONF_INTR);
2785	cfg = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD);
2786	spin_unlock_irqrestore(&adapter->cmd_lock, flags);
2787	adapter->intr.type = cfg & 0x3;
2788	adapter->intr.mask_mode = (cfg >> 2) & 0x3;
2789
2790	if (adapter->intr.type == VMXNET3_IT_AUTO) {
2791		adapter->intr.type = VMXNET3_IT_MSIX;
2792	}
2793
2794#ifdef CONFIG_PCI_MSI
2795	if (adapter->intr.type == VMXNET3_IT_MSIX) {
2796		int i, nvec;
2797
2798		nvec  = adapter->share_intr == VMXNET3_INTR_TXSHARE ?
2799			1 : adapter->num_tx_queues;
2800		nvec += adapter->share_intr == VMXNET3_INTR_BUDDYSHARE ?
2801			0 : adapter->num_rx_queues;
2802		nvec += 1;	/* for link event */
2803		nvec = nvec > VMXNET3_LINUX_MIN_MSIX_VECT ?
2804		       nvec : VMXNET3_LINUX_MIN_MSIX_VECT;
2805
2806		for (i = 0; i < nvec; i++)
2807			adapter->intr.msix_entries[i].entry = i;
2808
2809		nvec = vmxnet3_acquire_msix_vectors(adapter, nvec);
2810		if (nvec < 0)
2811			goto msix_err;
2812
2813		/* If we cannot allocate one MSIx vector per queue
2814		 * then limit the number of rx queues to 1
2815		 */
2816		if (nvec == VMXNET3_LINUX_MIN_MSIX_VECT) {
2817			if (adapter->share_intr != VMXNET3_INTR_BUDDYSHARE
2818			    || adapter->num_rx_queues != 1) {
2819				adapter->share_intr = VMXNET3_INTR_TXSHARE;
2820				netdev_err(adapter->netdev,
2821					   "Number of rx queues : 1\n");
2822				adapter->num_rx_queues = 1;
2823			}
2824		}
2825
2826		adapter->intr.num_intrs = nvec;
2827		return;
2828
2829msix_err:
2830		/* If we cannot allocate MSIx vectors use only one rx queue */
2831		dev_info(&adapter->pdev->dev,
2832			 "Failed to enable MSI-X, error %d. "
2833			 "Limiting #rx queues to 1, try MSI.\n", nvec);
2834
2835		adapter->intr.type = VMXNET3_IT_MSI;
2836	}
2837
2838	if (adapter->intr.type == VMXNET3_IT_MSI) {
2839		if (!pci_enable_msi(adapter->pdev)) {
2840			adapter->num_rx_queues = 1;
2841			adapter->intr.num_intrs = 1;
2842			return;
2843		}
2844	}
2845#endif /* CONFIG_PCI_MSI */
2846
2847	adapter->num_rx_queues = 1;
2848	dev_info(&adapter->netdev->dev,
2849		 "Using INTx interrupt, #Rx queues: 1.\n");
2850	adapter->intr.type = VMXNET3_IT_INTX;
2851
2852	/* INT-X related setting */
2853	adapter->intr.num_intrs = 1;
2854}
2855
2856
2857static void
2858vmxnet3_free_intr_resources(struct vmxnet3_adapter *adapter)
2859{
2860	if (adapter->intr.type == VMXNET3_IT_MSIX)
2861		pci_disable_msix(adapter->pdev);
2862	else if (adapter->intr.type == VMXNET3_IT_MSI)
2863		pci_disable_msi(adapter->pdev);
2864	else
2865		BUG_ON(adapter->intr.type != VMXNET3_IT_INTX);
2866}
2867
2868
2869static void
2870vmxnet3_tx_timeout(struct net_device *netdev)
2871{
2872	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
2873	adapter->tx_timeout_count++;
2874
2875	netdev_err(adapter->netdev, "tx hang\n");
2876	schedule_work(&adapter->work);
2877	netif_wake_queue(adapter->netdev);
2878}
2879
2880
2881static void
2882vmxnet3_reset_work(struct work_struct *data)
2883{
2884	struct vmxnet3_adapter *adapter;
2885
2886	adapter = container_of(data, struct vmxnet3_adapter, work);
2887
2888	/* if another thread is resetting the device, no need to proceed */
2889	if (test_and_set_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state))
2890		return;
2891
2892	/* if the device is closed, we must leave it alone */
2893	rtnl_lock();
2894	if (netif_running(adapter->netdev)) {
2895		netdev_notice(adapter->netdev, "resetting\n");
2896		vmxnet3_quiesce_dev(adapter);
2897		vmxnet3_reset_dev(adapter);
2898		vmxnet3_activate_dev(adapter);
2899	} else {
2900		netdev_info(adapter->netdev, "already closed\n");
2901	}
2902	rtnl_unlock();
2903
2904	clear_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state);
2905}
2906
2907
2908static int
2909vmxnet3_probe_device(struct pci_dev *pdev,
2910		     const struct pci_device_id *id)
2911{
2912	static const struct net_device_ops vmxnet3_netdev_ops = {
2913		.ndo_open = vmxnet3_open,
2914		.ndo_stop = vmxnet3_close,
2915		.ndo_start_xmit = vmxnet3_xmit_frame,
2916		.ndo_set_mac_address = vmxnet3_set_mac_addr,
2917		.ndo_change_mtu = vmxnet3_change_mtu,
2918		.ndo_set_features = vmxnet3_set_features,
2919		.ndo_get_stats64 = vmxnet3_get_stats64,
2920		.ndo_tx_timeout = vmxnet3_tx_timeout,
2921		.ndo_set_rx_mode = vmxnet3_set_mc,
2922		.ndo_vlan_rx_add_vid = vmxnet3_vlan_rx_add_vid,
2923		.ndo_vlan_rx_kill_vid = vmxnet3_vlan_rx_kill_vid,
2924#ifdef CONFIG_NET_POLL_CONTROLLER
2925		.ndo_poll_controller = vmxnet3_netpoll,
2926#endif
2927	};
2928	int err;
2929	bool dma64 = false; /* stupid gcc */
2930	u32 ver;
2931	struct net_device *netdev;
2932	struct vmxnet3_adapter *adapter;
2933	u8 mac[ETH_ALEN];
2934	int size;
2935	int num_tx_queues;
2936	int num_rx_queues;
2937
2938	if (!pci_msi_enabled())
2939		enable_mq = 0;
2940
2941#ifdef VMXNET3_RSS
2942	if (enable_mq)
2943		num_rx_queues = min(VMXNET3_DEVICE_MAX_RX_QUEUES,
2944				    (int)num_online_cpus());
2945	else
2946#endif
2947		num_rx_queues = 1;
2948	num_rx_queues = rounddown_pow_of_two(num_rx_queues);
2949
2950	if (enable_mq)
2951		num_tx_queues = min(VMXNET3_DEVICE_MAX_TX_QUEUES,
2952				    (int)num_online_cpus());
2953	else
2954		num_tx_queues = 1;
2955
2956	num_tx_queues = rounddown_pow_of_two(num_tx_queues);
2957	netdev = alloc_etherdev_mq(sizeof(struct vmxnet3_adapter),
2958				   max(num_tx_queues, num_rx_queues));
2959	dev_info(&pdev->dev,
2960		 "# of Tx queues : %d, # of Rx queues : %d\n",
2961		 num_tx_queues, num_rx_queues);
2962
2963	if (!netdev)
2964		return -ENOMEM;
2965
2966	pci_set_drvdata(pdev, netdev);
2967	adapter = netdev_priv(netdev);
2968	adapter->netdev = netdev;
2969	adapter->pdev = pdev;
2970
2971	adapter->tx_ring_size = VMXNET3_DEF_TX_RING_SIZE;
2972	adapter->rx_ring_size = VMXNET3_DEF_RX_RING_SIZE;
2973
2974	spin_lock_init(&adapter->cmd_lock);
2975	adapter->adapter_pa = dma_map_single(&adapter->pdev->dev, adapter,
2976					     sizeof(struct vmxnet3_adapter),
2977					     PCI_DMA_TODEVICE);
2978	adapter->shared = dma_alloc_coherent(
2979				&adapter->pdev->dev,
2980				sizeof(struct Vmxnet3_DriverShared),
2981				&adapter->shared_pa, GFP_KERNEL);
2982	if (!adapter->shared) {
2983		dev_err(&pdev->dev, "Failed to allocate memory\n");
2984		err = -ENOMEM;
2985		goto err_alloc_shared;
2986	}
2987
2988	adapter->num_rx_queues = num_rx_queues;
2989	adapter->num_tx_queues = num_tx_queues;
2990	adapter->rx_buf_per_pkt = 1;
2991
2992	size = sizeof(struct Vmxnet3_TxQueueDesc) * adapter->num_tx_queues;
2993	size += sizeof(struct Vmxnet3_RxQueueDesc) * adapter->num_rx_queues;
2994	adapter->tqd_start = dma_alloc_coherent(&adapter->pdev->dev, size,
2995						&adapter->queue_desc_pa,
2996						GFP_KERNEL);
2997
2998	if (!adapter->tqd_start) {
2999		dev_err(&pdev->dev, "Failed to allocate memory\n");
3000		err = -ENOMEM;
3001		goto err_alloc_queue_desc;
3002	}
3003	adapter->rqd_start = (struct Vmxnet3_RxQueueDesc *)(adapter->tqd_start +
3004							    adapter->num_tx_queues);
3005
3006	adapter->pm_conf = dma_alloc_coherent(&adapter->pdev->dev,
3007					      sizeof(struct Vmxnet3_PMConf),
3008					      &adapter->pm_conf_pa,
3009					      GFP_KERNEL);
3010	if (adapter->pm_conf == NULL) {
3011		err = -ENOMEM;
3012		goto err_alloc_pm;
3013	}
3014
3015#ifdef VMXNET3_RSS
3016
3017	adapter->rss_conf = dma_alloc_coherent(&adapter->pdev->dev,
3018					       sizeof(struct UPT1_RSSConf),
3019					       &adapter->rss_conf_pa,
3020					       GFP_KERNEL);
3021	if (adapter->rss_conf == NULL) {
3022		err = -ENOMEM;
3023		goto err_alloc_rss;
3024	}
3025#endif /* VMXNET3_RSS */
3026
3027	err = vmxnet3_alloc_pci_resources(adapter, &dma64);
3028	if (err < 0)
3029		goto err_alloc_pci;
3030
3031	ver = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_VRRS);
3032	if (ver & 1) {
3033		VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_VRRS, 1);
3034	} else {
3035		dev_err(&pdev->dev,
3036			"Incompatible h/w version (0x%x) for adapter\n", ver);
3037		err = -EBUSY;
3038		goto err_ver;
3039	}
3040
3041	ver = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_UVRS);
3042	if (ver & 1) {
3043		VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_UVRS, 1);
3044	} else {
3045		dev_err(&pdev->dev,
3046			"Incompatible upt version (0x%x) for adapter\n", ver);
3047		err = -EBUSY;
3048		goto err_ver;
3049	}
3050
3051	SET_NETDEV_DEV(netdev, &pdev->dev);
3052	vmxnet3_declare_features(adapter, dma64);
3053
3054	if (adapter->num_tx_queues == adapter->num_rx_queues)
3055		adapter->share_intr = VMXNET3_INTR_BUDDYSHARE;
3056	else
3057		adapter->share_intr = VMXNET3_INTR_DONTSHARE;
3058
3059	vmxnet3_alloc_intr_resources(adapter);
3060
3061#ifdef VMXNET3_RSS
3062	if (adapter->num_rx_queues > 1 &&
3063	    adapter->intr.type == VMXNET3_IT_MSIX) {
3064		adapter->rss = true;
3065		netdev->hw_features |= NETIF_F_RXHASH;
3066		netdev->features |= NETIF_F_RXHASH;
3067		dev_dbg(&pdev->dev, "RSS is enabled.\n");
3068	} else {
3069		adapter->rss = false;
3070	}
3071#endif
3072
3073	vmxnet3_read_mac_addr(adapter, mac);
3074	memcpy(netdev->dev_addr,  mac, netdev->addr_len);
3075
3076	netdev->netdev_ops = &vmxnet3_netdev_ops;
3077	vmxnet3_set_ethtool_ops(netdev);
3078	netdev->watchdog_timeo = 5 * HZ;
3079
3080	INIT_WORK(&adapter->work, vmxnet3_reset_work);
3081	set_bit(VMXNET3_STATE_BIT_QUIESCED, &adapter->state);
3082
3083	if (adapter->intr.type == VMXNET3_IT_MSIX) {
3084		int i;
3085		for (i = 0; i < adapter->num_rx_queues; i++) {
3086			netif_napi_add(adapter->netdev,
3087				       &adapter->rx_queue[i].napi,
3088				       vmxnet3_poll_rx_only, 64);
3089		}
3090	} else {
3091		netif_napi_add(adapter->netdev, &adapter->rx_queue[0].napi,
3092			       vmxnet3_poll, 64);
3093	}
3094
3095	netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues);
3096	netif_set_real_num_rx_queues(adapter->netdev, adapter->num_rx_queues);
3097
3098	netif_carrier_off(netdev);
3099	err = register_netdev(netdev);
3100
3101	if (err) {
3102		dev_err(&pdev->dev, "Failed to register adapter\n");
3103		goto err_register;
3104	}
3105
3106	vmxnet3_check_link(adapter, false);
3107	return 0;
3108
3109err_register:
3110	vmxnet3_free_intr_resources(adapter);
3111err_ver:
3112	vmxnet3_free_pci_resources(adapter);
3113err_alloc_pci:
3114#ifdef VMXNET3_RSS
3115	dma_free_coherent(&adapter->pdev->dev, sizeof(struct UPT1_RSSConf),
3116			  adapter->rss_conf, adapter->rss_conf_pa);
3117err_alloc_rss:
3118#endif
3119	dma_free_coherent(&adapter->pdev->dev, sizeof(struct Vmxnet3_PMConf),
3120			  adapter->pm_conf, adapter->pm_conf_pa);
3121err_alloc_pm:
3122	dma_free_coherent(&adapter->pdev->dev, size, adapter->tqd_start,
3123			  adapter->queue_desc_pa);
3124err_alloc_queue_desc:
3125	dma_free_coherent(&adapter->pdev->dev,
3126			  sizeof(struct Vmxnet3_DriverShared),
3127			  adapter->shared, adapter->shared_pa);
3128err_alloc_shared:
3129	dma_unmap_single(&adapter->pdev->dev, adapter->adapter_pa,
3130			 sizeof(struct vmxnet3_adapter), PCI_DMA_TODEVICE);
3131	free_netdev(netdev);
3132	return err;
3133}
3134
3135
3136static void
3137vmxnet3_remove_device(struct pci_dev *pdev)
3138{
3139	struct net_device *netdev = pci_get_drvdata(pdev);
3140	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
3141	int size = 0;
3142	int num_rx_queues;
3143
3144#ifdef VMXNET3_RSS
3145	if (enable_mq)
3146		num_rx_queues = min(VMXNET3_DEVICE_MAX_RX_QUEUES,
3147				    (int)num_online_cpus());
3148	else
3149#endif
3150		num_rx_queues = 1;
3151	num_rx_queues = rounddown_pow_of_two(num_rx_queues);
3152
3153	cancel_work_sync(&adapter->work);
3154
3155	unregister_netdev(netdev);
3156
3157	vmxnet3_free_intr_resources(adapter);
3158	vmxnet3_free_pci_resources(adapter);
3159#ifdef VMXNET3_RSS
3160	dma_free_coherent(&adapter->pdev->dev, sizeof(struct UPT1_RSSConf),
3161			  adapter->rss_conf, adapter->rss_conf_pa);
3162#endif
3163	dma_free_coherent(&adapter->pdev->dev, sizeof(struct Vmxnet3_PMConf),
3164			  adapter->pm_conf, adapter->pm_conf_pa);
3165
3166	size = sizeof(struct Vmxnet3_TxQueueDesc) * adapter->num_tx_queues;
3167	size += sizeof(struct Vmxnet3_RxQueueDesc) * num_rx_queues;
3168	dma_free_coherent(&adapter->pdev->dev, size, adapter->tqd_start,
3169			  adapter->queue_desc_pa);
3170	dma_free_coherent(&adapter->pdev->dev,
3171			  sizeof(struct Vmxnet3_DriverShared),
3172			  adapter->shared, adapter->shared_pa);
3173	dma_unmap_single(&adapter->pdev->dev, adapter->adapter_pa,
3174			 sizeof(struct vmxnet3_adapter), PCI_DMA_TODEVICE);
3175	free_netdev(netdev);
3176}
3177
3178
3179#ifdef CONFIG_PM
3180
3181static int
3182vmxnet3_suspend(struct device *device)
3183{
3184	struct pci_dev *pdev = to_pci_dev(device);
3185	struct net_device *netdev = pci_get_drvdata(pdev);
3186	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
3187	struct Vmxnet3_PMConf *pmConf;
3188	struct ethhdr *ehdr;
3189	struct arphdr *ahdr;
3190	u8 *arpreq;
3191	struct in_device *in_dev;
3192	struct in_ifaddr *ifa;
3193	unsigned long flags;
3194	int i = 0;
3195
3196	if (!netif_running(netdev))
3197		return 0;
3198
3199	for (i = 0; i < adapter->num_rx_queues; i++)
3200		napi_disable(&adapter->rx_queue[i].napi);
3201
3202	vmxnet3_disable_all_intrs(adapter);
3203	vmxnet3_free_irqs(adapter);
3204	vmxnet3_free_intr_resources(adapter);
3205
3206	netif_device_detach(netdev);
3207	netif_tx_stop_all_queues(netdev);
3208
3209	/* Create wake-up filters. */
3210	pmConf = adapter->pm_conf;
3211	memset(pmConf, 0, sizeof(*pmConf));
3212
3213	if (adapter->wol & WAKE_UCAST) {
3214		pmConf->filters[i].patternSize = ETH_ALEN;
3215		pmConf->filters[i].maskSize = 1;
3216		memcpy(pmConf->filters[i].pattern, netdev->dev_addr, ETH_ALEN);
3217		pmConf->filters[i].mask[0] = 0x3F; /* LSB ETH_ALEN bits */
3218
3219		pmConf->wakeUpEvents |= VMXNET3_PM_WAKEUP_FILTER;
3220		i++;
3221	}
3222
3223	if (adapter->wol & WAKE_ARP) {
3224		in_dev = in_dev_get(netdev);
3225		if (!in_dev)
3226			goto skip_arp;
3227
3228		ifa = (struct in_ifaddr *)in_dev->ifa_list;
3229		if (!ifa)
3230			goto skip_arp;
3231
3232		pmConf->filters[i].patternSize = ETH_HLEN + /* Ethernet header*/
3233			sizeof(struct arphdr) +		/* ARP header */
3234			2 * ETH_ALEN +		/* 2 Ethernet addresses*/
3235			2 * sizeof(u32);	/*2 IPv4 addresses */
3236		pmConf->filters[i].maskSize =
3237			(pmConf->filters[i].patternSize - 1) / 8 + 1;
3238
3239		/* ETH_P_ARP in Ethernet header. */
3240		ehdr = (struct ethhdr *)pmConf->filters[i].pattern;
3241		ehdr->h_proto = htons(ETH_P_ARP);
3242
3243		/* ARPOP_REQUEST in ARP header. */
3244		ahdr = (struct arphdr *)&pmConf->filters[i].pattern[ETH_HLEN];
3245		ahdr->ar_op = htons(ARPOP_REQUEST);
3246		arpreq = (u8 *)(ahdr + 1);
3247
3248		/* The Unicast IPv4 address in 'tip' field. */
3249		arpreq += 2 * ETH_ALEN + sizeof(u32);
3250		*(u32 *)arpreq = ifa->ifa_address;
3251
3252		/* The mask for the relevant bits. */
3253		pmConf->filters[i].mask[0] = 0x00;
3254		pmConf->filters[i].mask[1] = 0x30; /* ETH_P_ARP */
3255		pmConf->filters[i].mask[2] = 0x30; /* ARPOP_REQUEST */
3256		pmConf->filters[i].mask[3] = 0x00;
3257		pmConf->filters[i].mask[4] = 0xC0; /* IPv4 TIP */
3258		pmConf->filters[i].mask[5] = 0x03; /* IPv4 TIP */
3259		in_dev_put(in_dev);
3260
3261		pmConf->wakeUpEvents |= VMXNET3_PM_WAKEUP_FILTER;
3262		i++;
3263	}
3264
3265skip_arp:
3266	if (adapter->wol & WAKE_MAGIC)
3267		pmConf->wakeUpEvents |= VMXNET3_PM_WAKEUP_MAGIC;
3268
3269	pmConf->numFilters = i;
3270
3271	adapter->shared->devRead.pmConfDesc.confVer = cpu_to_le32(1);
3272	adapter->shared->devRead.pmConfDesc.confLen = cpu_to_le32(sizeof(
3273								  *pmConf));
3274	adapter->shared->devRead.pmConfDesc.confPA =
3275		cpu_to_le64(adapter->pm_conf_pa);
3276
3277	spin_lock_irqsave(&adapter->cmd_lock, flags);
3278	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
3279			       VMXNET3_CMD_UPDATE_PMCFG);
3280	spin_unlock_irqrestore(&adapter->cmd_lock, flags);
3281
3282	pci_save_state(pdev);
3283	pci_enable_wake(pdev, pci_choose_state(pdev, PMSG_SUSPEND),
3284			adapter->wol);
3285	pci_disable_device(pdev);
3286	pci_set_power_state(pdev, pci_choose_state(pdev, PMSG_SUSPEND));
3287
3288	return 0;
3289}
3290
3291
3292static int
3293vmxnet3_resume(struct device *device)
3294{
3295	int err, i = 0;
3296	unsigned long flags;
3297	struct pci_dev *pdev = to_pci_dev(device);
3298	struct net_device *netdev = pci_get_drvdata(pdev);
3299	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
3300	struct Vmxnet3_PMConf *pmConf;
3301
3302	if (!netif_running(netdev))
3303		return 0;
3304
3305	/* Destroy wake-up filters. */
3306	pmConf = adapter->pm_conf;
3307	memset(pmConf, 0, sizeof(*pmConf));
3308
3309	adapter->shared->devRead.pmConfDesc.confVer = cpu_to_le32(1);
3310	adapter->shared->devRead.pmConfDesc.confLen = cpu_to_le32(sizeof(
3311								  *pmConf));
3312	adapter->shared->devRead.pmConfDesc.confPA =
3313		cpu_to_le64(adapter->pm_conf_pa);
3314
3315	netif_device_attach(netdev);
3316	pci_set_power_state(pdev, PCI_D0);
3317	pci_restore_state(pdev);
3318	err = pci_enable_device_mem(pdev);
3319	if (err != 0)
3320		return err;
3321
3322	pci_enable_wake(pdev, PCI_D0, 0);
3323
3324	spin_lock_irqsave(&adapter->cmd_lock, flags);
3325	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
3326			       VMXNET3_CMD_UPDATE_PMCFG);
3327	spin_unlock_irqrestore(&adapter->cmd_lock, flags);
3328	vmxnet3_alloc_intr_resources(adapter);
3329	vmxnet3_request_irqs(adapter);
3330	for (i = 0; i < adapter->num_rx_queues; i++)
3331		napi_enable(&adapter->rx_queue[i].napi);
3332	vmxnet3_enable_all_intrs(adapter);
3333
3334	return 0;
3335}
3336
3337static const struct dev_pm_ops vmxnet3_pm_ops = {
3338	.suspend = vmxnet3_suspend,
3339	.resume = vmxnet3_resume,
3340};
3341#endif
3342
3343static struct pci_driver vmxnet3_driver = {
3344	.name		= vmxnet3_driver_name,
3345	.id_table	= vmxnet3_pciid_table,
3346	.probe		= vmxnet3_probe_device,
3347	.remove		= vmxnet3_remove_device,
3348#ifdef CONFIG_PM
3349	.driver.pm	= &vmxnet3_pm_ops,
3350#endif
3351};
3352
3353
3354static int __init
3355vmxnet3_init_module(void)
3356{
3357	pr_info("%s - version %s\n", VMXNET3_DRIVER_DESC,
3358		VMXNET3_DRIVER_VERSION_REPORT);
3359	return pci_register_driver(&vmxnet3_driver);
3360}
3361
3362module_init(vmxnet3_init_module);
3363
3364
3365static void
3366vmxnet3_exit_module(void)
3367{
3368	pci_unregister_driver(&vmxnet3_driver);
3369}
3370
3371module_exit(vmxnet3_exit_module);
3372
3373MODULE_AUTHOR("VMware, Inc.");
3374MODULE_DESCRIPTION(VMXNET3_DRIVER_DESC);
3375MODULE_LICENSE("GPL v2");
3376MODULE_VERSION(VMXNET3_DRIVER_VERSION_STRING);
3377