vmxnet3_drv.c revision b60b869d5f9f0987cf4e3fee22fb88786a281de7
1/*
2 * Linux driver for VMware's vmxnet3 ethernet NIC.
3 *
4 * Copyright (C) 2008-2009, VMware, Inc. All Rights Reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation; version 2 of the License and no later version.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
13 * NON INFRINGEMENT. See the GNU General Public License for more
14 * details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19 *
20 * The full GNU General Public License is included in this distribution in
21 * the file called "COPYING".
22 *
23 * Maintained by: Shreyas Bhatewara <pv-drivers@vmware.com>
24 *
25 */
26
27#include <linux/module.h>
28#include <net/ip6_checksum.h>
29
30#include "vmxnet3_int.h"
31
32char vmxnet3_driver_name[] = "vmxnet3";
33#define VMXNET3_DRIVER_DESC "VMware vmxnet3 virtual NIC driver"
34
35/*
36 * PCI Device ID Table
37 * Last entry must be all 0s
38 */
39static DEFINE_PCI_DEVICE_TABLE(vmxnet3_pciid_table) = {
40	{PCI_VDEVICE(VMWARE, PCI_DEVICE_ID_VMWARE_VMXNET3)},
41	{0}
42};
43
44MODULE_DEVICE_TABLE(pci, vmxnet3_pciid_table);
45
46static int enable_mq = 1;
47
48static void
49vmxnet3_write_mac_addr(struct vmxnet3_adapter *adapter, u8 *mac);
50
51/*
52 *    Enable/Disable the given intr
53 */
54static void
55vmxnet3_enable_intr(struct vmxnet3_adapter *adapter, unsigned intr_idx)
56{
57	VMXNET3_WRITE_BAR0_REG(adapter, VMXNET3_REG_IMR + intr_idx * 8, 0);
58}
59
60
61static void
62vmxnet3_disable_intr(struct vmxnet3_adapter *adapter, unsigned intr_idx)
63{
64	VMXNET3_WRITE_BAR0_REG(adapter, VMXNET3_REG_IMR + intr_idx * 8, 1);
65}
66
67
68/*
69 *    Enable/Disable all intrs used by the device
70 */
71static void
72vmxnet3_enable_all_intrs(struct vmxnet3_adapter *adapter)
73{
74	int i;
75
76	for (i = 0; i < adapter->intr.num_intrs; i++)
77		vmxnet3_enable_intr(adapter, i);
78	adapter->shared->devRead.intrConf.intrCtrl &=
79					cpu_to_le32(~VMXNET3_IC_DISABLE_ALL);
80}
81
82
83static void
84vmxnet3_disable_all_intrs(struct vmxnet3_adapter *adapter)
85{
86	int i;
87
88	adapter->shared->devRead.intrConf.intrCtrl |=
89					cpu_to_le32(VMXNET3_IC_DISABLE_ALL);
90	for (i = 0; i < adapter->intr.num_intrs; i++)
91		vmxnet3_disable_intr(adapter, i);
92}
93
94
95static void
96vmxnet3_ack_events(struct vmxnet3_adapter *adapter, u32 events)
97{
98	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_ECR, events);
99}
100
101
102static bool
103vmxnet3_tq_stopped(struct vmxnet3_tx_queue *tq, struct vmxnet3_adapter *adapter)
104{
105	return tq->stopped;
106}
107
108
109static void
110vmxnet3_tq_start(struct vmxnet3_tx_queue *tq, struct vmxnet3_adapter *adapter)
111{
112	tq->stopped = false;
113	netif_start_subqueue(adapter->netdev, tq - adapter->tx_queue);
114}
115
116
117static void
118vmxnet3_tq_wake(struct vmxnet3_tx_queue *tq, struct vmxnet3_adapter *adapter)
119{
120	tq->stopped = false;
121	netif_wake_subqueue(adapter->netdev, (tq - adapter->tx_queue));
122}
123
124
125static void
126vmxnet3_tq_stop(struct vmxnet3_tx_queue *tq, struct vmxnet3_adapter *adapter)
127{
128	tq->stopped = true;
129	tq->num_stop++;
130	netif_stop_subqueue(adapter->netdev, (tq - adapter->tx_queue));
131}
132
133
134/*
135 * Check the link state. This may start or stop the tx queue.
136 */
137static void
138vmxnet3_check_link(struct vmxnet3_adapter *adapter, bool affectTxQueue)
139{
140	u32 ret;
141	int i;
142	unsigned long flags;
143
144	spin_lock_irqsave(&adapter->cmd_lock, flags);
145	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, VMXNET3_CMD_GET_LINK);
146	ret = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD);
147	spin_unlock_irqrestore(&adapter->cmd_lock, flags);
148
149	adapter->link_speed = ret >> 16;
150	if (ret & 1) { /* Link is up. */
151		netdev_info(adapter->netdev, "NIC Link is Up %d Mbps\n",
152			    adapter->link_speed);
153		netif_carrier_on(adapter->netdev);
154
155		if (affectTxQueue) {
156			for (i = 0; i < adapter->num_tx_queues; i++)
157				vmxnet3_tq_start(&adapter->tx_queue[i],
158						 adapter);
159		}
160	} else {
161		netdev_info(adapter->netdev, "NIC Link is Down\n");
162		netif_carrier_off(adapter->netdev);
163
164		if (affectTxQueue) {
165			for (i = 0; i < adapter->num_tx_queues; i++)
166				vmxnet3_tq_stop(&adapter->tx_queue[i], adapter);
167		}
168	}
169}
170
171static void
172vmxnet3_process_events(struct vmxnet3_adapter *adapter)
173{
174	int i;
175	unsigned long flags;
176	u32 events = le32_to_cpu(adapter->shared->ecr);
177	if (!events)
178		return;
179
180	vmxnet3_ack_events(adapter, events);
181
182	/* Check if link state has changed */
183	if (events & VMXNET3_ECR_LINK)
184		vmxnet3_check_link(adapter, true);
185
186	/* Check if there is an error on xmit/recv queues */
187	if (events & (VMXNET3_ECR_TQERR | VMXNET3_ECR_RQERR)) {
188		spin_lock_irqsave(&adapter->cmd_lock, flags);
189		VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
190				       VMXNET3_CMD_GET_QUEUE_STATUS);
191		spin_unlock_irqrestore(&adapter->cmd_lock, flags);
192
193		for (i = 0; i < adapter->num_tx_queues; i++)
194			if (adapter->tqd_start[i].status.stopped)
195				dev_err(&adapter->netdev->dev,
196					"%s: tq[%d] error 0x%x\n",
197					adapter->netdev->name, i, le32_to_cpu(
198					adapter->tqd_start[i].status.error));
199		for (i = 0; i < adapter->num_rx_queues; i++)
200			if (adapter->rqd_start[i].status.stopped)
201				dev_err(&adapter->netdev->dev,
202					"%s: rq[%d] error 0x%x\n",
203					adapter->netdev->name, i,
204					adapter->rqd_start[i].status.error);
205
206		schedule_work(&adapter->work);
207	}
208}
209
210#ifdef __BIG_ENDIAN_BITFIELD
211/*
212 * The device expects the bitfields in shared structures to be written in
213 * little endian. When CPU is big endian, the following routines are used to
214 * correctly read and write into ABI.
215 * The general technique used here is : double word bitfields are defined in
216 * opposite order for big endian architecture. Then before reading them in
217 * driver the complete double word is translated using le32_to_cpu. Similarly
218 * After the driver writes into bitfields, cpu_to_le32 is used to translate the
219 * double words into required format.
220 * In order to avoid touching bits in shared structure more than once, temporary
221 * descriptors are used. These are passed as srcDesc to following functions.
222 */
223static void vmxnet3_RxDescToCPU(const struct Vmxnet3_RxDesc *srcDesc,
224				struct Vmxnet3_RxDesc *dstDesc)
225{
226	u32 *src = (u32 *)srcDesc + 2;
227	u32 *dst = (u32 *)dstDesc + 2;
228	dstDesc->addr = le64_to_cpu(srcDesc->addr);
229	*dst = le32_to_cpu(*src);
230	dstDesc->ext1 = le32_to_cpu(srcDesc->ext1);
231}
232
233static void vmxnet3_TxDescToLe(const struct Vmxnet3_TxDesc *srcDesc,
234			       struct Vmxnet3_TxDesc *dstDesc)
235{
236	int i;
237	u32 *src = (u32 *)(srcDesc + 1);
238	u32 *dst = (u32 *)(dstDesc + 1);
239
240	/* Working backwards so that the gen bit is set at the end. */
241	for (i = 2; i > 0; i--) {
242		src--;
243		dst--;
244		*dst = cpu_to_le32(*src);
245	}
246}
247
248
249static void vmxnet3_RxCompToCPU(const struct Vmxnet3_RxCompDesc *srcDesc,
250				struct Vmxnet3_RxCompDesc *dstDesc)
251{
252	int i = 0;
253	u32 *src = (u32 *)srcDesc;
254	u32 *dst = (u32 *)dstDesc;
255	for (i = 0; i < sizeof(struct Vmxnet3_RxCompDesc) / sizeof(u32); i++) {
256		*dst = le32_to_cpu(*src);
257		src++;
258		dst++;
259	}
260}
261
262
263/* Used to read bitfield values from double words. */
264static u32 get_bitfield32(const __le32 *bitfield, u32 pos, u32 size)
265{
266	u32 temp = le32_to_cpu(*bitfield);
267	u32 mask = ((1 << size) - 1) << pos;
268	temp &= mask;
269	temp >>= pos;
270	return temp;
271}
272
273
274
275#endif  /* __BIG_ENDIAN_BITFIELD */
276
277#ifdef __BIG_ENDIAN_BITFIELD
278
279#   define VMXNET3_TXDESC_GET_GEN(txdesc) get_bitfield32(((const __le32 *) \
280			txdesc) + VMXNET3_TXD_GEN_DWORD_SHIFT, \
281			VMXNET3_TXD_GEN_SHIFT, VMXNET3_TXD_GEN_SIZE)
282#   define VMXNET3_TXDESC_GET_EOP(txdesc) get_bitfield32(((const __le32 *) \
283			txdesc) + VMXNET3_TXD_EOP_DWORD_SHIFT, \
284			VMXNET3_TXD_EOP_SHIFT, VMXNET3_TXD_EOP_SIZE)
285#   define VMXNET3_TCD_GET_GEN(tcd) get_bitfield32(((const __le32 *)tcd) + \
286			VMXNET3_TCD_GEN_DWORD_SHIFT, VMXNET3_TCD_GEN_SHIFT, \
287			VMXNET3_TCD_GEN_SIZE)
288#   define VMXNET3_TCD_GET_TXIDX(tcd) get_bitfield32((const __le32 *)tcd, \
289			VMXNET3_TCD_TXIDX_SHIFT, VMXNET3_TCD_TXIDX_SIZE)
290#   define vmxnet3_getRxComp(dstrcd, rcd, tmp) do { \
291			(dstrcd) = (tmp); \
292			vmxnet3_RxCompToCPU((rcd), (tmp)); \
293		} while (0)
294#   define vmxnet3_getRxDesc(dstrxd, rxd, tmp) do { \
295			(dstrxd) = (tmp); \
296			vmxnet3_RxDescToCPU((rxd), (tmp)); \
297		} while (0)
298
299#else
300
301#   define VMXNET3_TXDESC_GET_GEN(txdesc) ((txdesc)->gen)
302#   define VMXNET3_TXDESC_GET_EOP(txdesc) ((txdesc)->eop)
303#   define VMXNET3_TCD_GET_GEN(tcd) ((tcd)->gen)
304#   define VMXNET3_TCD_GET_TXIDX(tcd) ((tcd)->txdIdx)
305#   define vmxnet3_getRxComp(dstrcd, rcd, tmp) (dstrcd) = (rcd)
306#   define vmxnet3_getRxDesc(dstrxd, rxd, tmp) (dstrxd) = (rxd)
307
308#endif /* __BIG_ENDIAN_BITFIELD  */
309
310
311static void
312vmxnet3_unmap_tx_buf(struct vmxnet3_tx_buf_info *tbi,
313		     struct pci_dev *pdev)
314{
315	if (tbi->map_type == VMXNET3_MAP_SINGLE)
316		dma_unmap_single(&pdev->dev, tbi->dma_addr, tbi->len,
317				 PCI_DMA_TODEVICE);
318	else if (tbi->map_type == VMXNET3_MAP_PAGE)
319		dma_unmap_page(&pdev->dev, tbi->dma_addr, tbi->len,
320			       PCI_DMA_TODEVICE);
321	else
322		BUG_ON(tbi->map_type != VMXNET3_MAP_NONE);
323
324	tbi->map_type = VMXNET3_MAP_NONE; /* to help debugging */
325}
326
327
328static int
329vmxnet3_unmap_pkt(u32 eop_idx, struct vmxnet3_tx_queue *tq,
330		  struct pci_dev *pdev,	struct vmxnet3_adapter *adapter)
331{
332	struct sk_buff *skb;
333	int entries = 0;
334
335	/* no out of order completion */
336	BUG_ON(tq->buf_info[eop_idx].sop_idx != tq->tx_ring.next2comp);
337	BUG_ON(VMXNET3_TXDESC_GET_EOP(&(tq->tx_ring.base[eop_idx].txd)) != 1);
338
339	skb = tq->buf_info[eop_idx].skb;
340	BUG_ON(skb == NULL);
341	tq->buf_info[eop_idx].skb = NULL;
342
343	VMXNET3_INC_RING_IDX_ONLY(eop_idx, tq->tx_ring.size);
344
345	while (tq->tx_ring.next2comp != eop_idx) {
346		vmxnet3_unmap_tx_buf(tq->buf_info + tq->tx_ring.next2comp,
347				     pdev);
348
349		/* update next2comp w/o tx_lock. Since we are marking more,
350		 * instead of less, tx ring entries avail, the worst case is
351		 * that the tx routine incorrectly re-queues a pkt due to
352		 * insufficient tx ring entries.
353		 */
354		vmxnet3_cmd_ring_adv_next2comp(&tq->tx_ring);
355		entries++;
356	}
357
358	dev_kfree_skb_any(skb);
359	return entries;
360}
361
362
363static int
364vmxnet3_tq_tx_complete(struct vmxnet3_tx_queue *tq,
365			struct vmxnet3_adapter *adapter)
366{
367	int completed = 0;
368	union Vmxnet3_GenericDesc *gdesc;
369
370	gdesc = tq->comp_ring.base + tq->comp_ring.next2proc;
371	while (VMXNET3_TCD_GET_GEN(&gdesc->tcd) == tq->comp_ring.gen) {
372		completed += vmxnet3_unmap_pkt(VMXNET3_TCD_GET_TXIDX(
373					       &gdesc->tcd), tq, adapter->pdev,
374					       adapter);
375
376		vmxnet3_comp_ring_adv_next2proc(&tq->comp_ring);
377		gdesc = tq->comp_ring.base + tq->comp_ring.next2proc;
378	}
379
380	if (completed) {
381		spin_lock(&tq->tx_lock);
382		if (unlikely(vmxnet3_tq_stopped(tq, adapter) &&
383			     vmxnet3_cmd_ring_desc_avail(&tq->tx_ring) >
384			     VMXNET3_WAKE_QUEUE_THRESHOLD(tq) &&
385			     netif_carrier_ok(adapter->netdev))) {
386			vmxnet3_tq_wake(tq, adapter);
387		}
388		spin_unlock(&tq->tx_lock);
389	}
390	return completed;
391}
392
393
394static void
395vmxnet3_tq_cleanup(struct vmxnet3_tx_queue *tq,
396		   struct vmxnet3_adapter *adapter)
397{
398	int i;
399
400	while (tq->tx_ring.next2comp != tq->tx_ring.next2fill) {
401		struct vmxnet3_tx_buf_info *tbi;
402
403		tbi = tq->buf_info + tq->tx_ring.next2comp;
404
405		vmxnet3_unmap_tx_buf(tbi, adapter->pdev);
406		if (tbi->skb) {
407			dev_kfree_skb_any(tbi->skb);
408			tbi->skb = NULL;
409		}
410		vmxnet3_cmd_ring_adv_next2comp(&tq->tx_ring);
411	}
412
413	/* sanity check, verify all buffers are indeed unmapped and freed */
414	for (i = 0; i < tq->tx_ring.size; i++) {
415		BUG_ON(tq->buf_info[i].skb != NULL ||
416		       tq->buf_info[i].map_type != VMXNET3_MAP_NONE);
417	}
418
419	tq->tx_ring.gen = VMXNET3_INIT_GEN;
420	tq->tx_ring.next2fill = tq->tx_ring.next2comp = 0;
421
422	tq->comp_ring.gen = VMXNET3_INIT_GEN;
423	tq->comp_ring.next2proc = 0;
424}
425
426
427static void
428vmxnet3_tq_destroy(struct vmxnet3_tx_queue *tq,
429		   struct vmxnet3_adapter *adapter)
430{
431	if (tq->tx_ring.base) {
432		dma_free_coherent(&adapter->pdev->dev, tq->tx_ring.size *
433				  sizeof(struct Vmxnet3_TxDesc),
434				  tq->tx_ring.base, tq->tx_ring.basePA);
435		tq->tx_ring.base = NULL;
436	}
437	if (tq->data_ring.base) {
438		dma_free_coherent(&adapter->pdev->dev, tq->data_ring.size *
439				  sizeof(struct Vmxnet3_TxDataDesc),
440				  tq->data_ring.base, tq->data_ring.basePA);
441		tq->data_ring.base = NULL;
442	}
443	if (tq->comp_ring.base) {
444		dma_free_coherent(&adapter->pdev->dev, tq->comp_ring.size *
445				  sizeof(struct Vmxnet3_TxCompDesc),
446				  tq->comp_ring.base, tq->comp_ring.basePA);
447		tq->comp_ring.base = NULL;
448	}
449	if (tq->buf_info) {
450		dma_free_coherent(&adapter->pdev->dev,
451				  tq->tx_ring.size * sizeof(tq->buf_info[0]),
452				  tq->buf_info, tq->buf_info_pa);
453		tq->buf_info = NULL;
454	}
455}
456
457
458/* Destroy all tx queues */
459void
460vmxnet3_tq_destroy_all(struct vmxnet3_adapter *adapter)
461{
462	int i;
463
464	for (i = 0; i < adapter->num_tx_queues; i++)
465		vmxnet3_tq_destroy(&adapter->tx_queue[i], adapter);
466}
467
468
469static void
470vmxnet3_tq_init(struct vmxnet3_tx_queue *tq,
471		struct vmxnet3_adapter *adapter)
472{
473	int i;
474
475	/* reset the tx ring contents to 0 and reset the tx ring states */
476	memset(tq->tx_ring.base, 0, tq->tx_ring.size *
477	       sizeof(struct Vmxnet3_TxDesc));
478	tq->tx_ring.next2fill = tq->tx_ring.next2comp = 0;
479	tq->tx_ring.gen = VMXNET3_INIT_GEN;
480
481	memset(tq->data_ring.base, 0, tq->data_ring.size *
482	       sizeof(struct Vmxnet3_TxDataDesc));
483
484	/* reset the tx comp ring contents to 0 and reset comp ring states */
485	memset(tq->comp_ring.base, 0, tq->comp_ring.size *
486	       sizeof(struct Vmxnet3_TxCompDesc));
487	tq->comp_ring.next2proc = 0;
488	tq->comp_ring.gen = VMXNET3_INIT_GEN;
489
490	/* reset the bookkeeping data */
491	memset(tq->buf_info, 0, sizeof(tq->buf_info[0]) * tq->tx_ring.size);
492	for (i = 0; i < tq->tx_ring.size; i++)
493		tq->buf_info[i].map_type = VMXNET3_MAP_NONE;
494
495	/* stats are not reset */
496}
497
498
499static int
500vmxnet3_tq_create(struct vmxnet3_tx_queue *tq,
501		  struct vmxnet3_adapter *adapter)
502{
503	size_t sz;
504
505	BUG_ON(tq->tx_ring.base || tq->data_ring.base ||
506	       tq->comp_ring.base || tq->buf_info);
507
508	tq->tx_ring.base = dma_alloc_coherent(&adapter->pdev->dev,
509			tq->tx_ring.size * sizeof(struct Vmxnet3_TxDesc),
510			&tq->tx_ring.basePA, GFP_KERNEL);
511	if (!tq->tx_ring.base) {
512		netdev_err(adapter->netdev, "failed to allocate tx ring\n");
513		goto err;
514	}
515
516	tq->data_ring.base = dma_alloc_coherent(&adapter->pdev->dev,
517			tq->data_ring.size * sizeof(struct Vmxnet3_TxDataDesc),
518			&tq->data_ring.basePA, GFP_KERNEL);
519	if (!tq->data_ring.base) {
520		netdev_err(adapter->netdev, "failed to allocate data ring\n");
521		goto err;
522	}
523
524	tq->comp_ring.base = dma_alloc_coherent(&adapter->pdev->dev,
525			tq->comp_ring.size * sizeof(struct Vmxnet3_TxCompDesc),
526			&tq->comp_ring.basePA, GFP_KERNEL);
527	if (!tq->comp_ring.base) {
528		netdev_err(adapter->netdev, "failed to allocate tx comp ring\n");
529		goto err;
530	}
531
532	sz = tq->tx_ring.size * sizeof(tq->buf_info[0]);
533	tq->buf_info = dma_zalloc_coherent(&adapter->pdev->dev, sz,
534					   &tq->buf_info_pa, GFP_KERNEL);
535	if (!tq->buf_info)
536		goto err;
537
538	return 0;
539
540err:
541	vmxnet3_tq_destroy(tq, adapter);
542	return -ENOMEM;
543}
544
545static void
546vmxnet3_tq_cleanup_all(struct vmxnet3_adapter *adapter)
547{
548	int i;
549
550	for (i = 0; i < adapter->num_tx_queues; i++)
551		vmxnet3_tq_cleanup(&adapter->tx_queue[i], adapter);
552}
553
554/*
555 *    starting from ring->next2fill, allocate rx buffers for the given ring
556 *    of the rx queue and update the rx desc. stop after @num_to_alloc buffers
557 *    are allocated or allocation fails
558 */
559
560static int
561vmxnet3_rq_alloc_rx_buf(struct vmxnet3_rx_queue *rq, u32 ring_idx,
562			int num_to_alloc, struct vmxnet3_adapter *adapter)
563{
564	int num_allocated = 0;
565	struct vmxnet3_rx_buf_info *rbi_base = rq->buf_info[ring_idx];
566	struct vmxnet3_cmd_ring *ring = &rq->rx_ring[ring_idx];
567	u32 val;
568
569	while (num_allocated <= num_to_alloc) {
570		struct vmxnet3_rx_buf_info *rbi;
571		union Vmxnet3_GenericDesc *gd;
572
573		rbi = rbi_base + ring->next2fill;
574		gd = ring->base + ring->next2fill;
575
576		if (rbi->buf_type == VMXNET3_RX_BUF_SKB) {
577			if (rbi->skb == NULL) {
578				rbi->skb = __netdev_alloc_skb_ip_align(adapter->netdev,
579								       rbi->len,
580								       GFP_KERNEL);
581				if (unlikely(rbi->skb == NULL)) {
582					rq->stats.rx_buf_alloc_failure++;
583					break;
584				}
585
586				rbi->dma_addr = dma_map_single(
587						&adapter->pdev->dev,
588						rbi->skb->data, rbi->len,
589						PCI_DMA_FROMDEVICE);
590			} else {
591				/* rx buffer skipped by the device */
592			}
593			val = VMXNET3_RXD_BTYPE_HEAD << VMXNET3_RXD_BTYPE_SHIFT;
594		} else {
595			BUG_ON(rbi->buf_type != VMXNET3_RX_BUF_PAGE ||
596			       rbi->len  != PAGE_SIZE);
597
598			if (rbi->page == NULL) {
599				rbi->page = alloc_page(GFP_ATOMIC);
600				if (unlikely(rbi->page == NULL)) {
601					rq->stats.rx_buf_alloc_failure++;
602					break;
603				}
604				rbi->dma_addr = dma_map_page(
605						&adapter->pdev->dev,
606						rbi->page, 0, PAGE_SIZE,
607						PCI_DMA_FROMDEVICE);
608			} else {
609				/* rx buffers skipped by the device */
610			}
611			val = VMXNET3_RXD_BTYPE_BODY << VMXNET3_RXD_BTYPE_SHIFT;
612		}
613
614		BUG_ON(rbi->dma_addr == 0);
615		gd->rxd.addr = cpu_to_le64(rbi->dma_addr);
616		gd->dword[2] = cpu_to_le32((!ring->gen << VMXNET3_RXD_GEN_SHIFT)
617					   | val | rbi->len);
618
619		/* Fill the last buffer but dont mark it ready, or else the
620		 * device will think that the queue is full */
621		if (num_allocated == num_to_alloc)
622			break;
623
624		gd->dword[2] |= cpu_to_le32(ring->gen << VMXNET3_RXD_GEN_SHIFT);
625		num_allocated++;
626		vmxnet3_cmd_ring_adv_next2fill(ring);
627	}
628
629	netdev_dbg(adapter->netdev,
630		"alloc_rx_buf: %d allocated, next2fill %u, next2comp %u\n",
631		num_allocated, ring->next2fill, ring->next2comp);
632
633	/* so that the device can distinguish a full ring and an empty ring */
634	BUG_ON(num_allocated != 0 && ring->next2fill == ring->next2comp);
635
636	return num_allocated;
637}
638
639
640static void
641vmxnet3_append_frag(struct sk_buff *skb, struct Vmxnet3_RxCompDesc *rcd,
642		    struct vmxnet3_rx_buf_info *rbi)
643{
644	struct skb_frag_struct *frag = skb_shinfo(skb)->frags +
645		skb_shinfo(skb)->nr_frags;
646
647	BUG_ON(skb_shinfo(skb)->nr_frags >= MAX_SKB_FRAGS);
648
649	__skb_frag_set_page(frag, rbi->page);
650	frag->page_offset = 0;
651	skb_frag_size_set(frag, rcd->len);
652	skb->data_len += rcd->len;
653	skb->truesize += PAGE_SIZE;
654	skb_shinfo(skb)->nr_frags++;
655}
656
657
658static void
659vmxnet3_map_pkt(struct sk_buff *skb, struct vmxnet3_tx_ctx *ctx,
660		struct vmxnet3_tx_queue *tq, struct pci_dev *pdev,
661		struct vmxnet3_adapter *adapter)
662{
663	u32 dw2, len;
664	unsigned long buf_offset;
665	int i;
666	union Vmxnet3_GenericDesc *gdesc;
667	struct vmxnet3_tx_buf_info *tbi = NULL;
668
669	BUG_ON(ctx->copy_size > skb_headlen(skb));
670
671	/* use the previous gen bit for the SOP desc */
672	dw2 = (tq->tx_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT;
673
674	ctx->sop_txd = tq->tx_ring.base + tq->tx_ring.next2fill;
675	gdesc = ctx->sop_txd; /* both loops below can be skipped */
676
677	/* no need to map the buffer if headers are copied */
678	if (ctx->copy_size) {
679		ctx->sop_txd->txd.addr = cpu_to_le64(tq->data_ring.basePA +
680					tq->tx_ring.next2fill *
681					sizeof(struct Vmxnet3_TxDataDesc));
682		ctx->sop_txd->dword[2] = cpu_to_le32(dw2 | ctx->copy_size);
683		ctx->sop_txd->dword[3] = 0;
684
685		tbi = tq->buf_info + tq->tx_ring.next2fill;
686		tbi->map_type = VMXNET3_MAP_NONE;
687
688		netdev_dbg(adapter->netdev,
689			"txd[%u]: 0x%Lx 0x%x 0x%x\n",
690			tq->tx_ring.next2fill,
691			le64_to_cpu(ctx->sop_txd->txd.addr),
692			ctx->sop_txd->dword[2], ctx->sop_txd->dword[3]);
693		vmxnet3_cmd_ring_adv_next2fill(&tq->tx_ring);
694
695		/* use the right gen for non-SOP desc */
696		dw2 = tq->tx_ring.gen << VMXNET3_TXD_GEN_SHIFT;
697	}
698
699	/* linear part can use multiple tx desc if it's big */
700	len = skb_headlen(skb) - ctx->copy_size;
701	buf_offset = ctx->copy_size;
702	while (len) {
703		u32 buf_size;
704
705		if (len < VMXNET3_MAX_TX_BUF_SIZE) {
706			buf_size = len;
707			dw2 |= len;
708		} else {
709			buf_size = VMXNET3_MAX_TX_BUF_SIZE;
710			/* spec says that for TxDesc.len, 0 == 2^14 */
711		}
712
713		tbi = tq->buf_info + tq->tx_ring.next2fill;
714		tbi->map_type = VMXNET3_MAP_SINGLE;
715		tbi->dma_addr = dma_map_single(&adapter->pdev->dev,
716				skb->data + buf_offset, buf_size,
717				PCI_DMA_TODEVICE);
718
719		tbi->len = buf_size;
720
721		gdesc = tq->tx_ring.base + tq->tx_ring.next2fill;
722		BUG_ON(gdesc->txd.gen == tq->tx_ring.gen);
723
724		gdesc->txd.addr = cpu_to_le64(tbi->dma_addr);
725		gdesc->dword[2] = cpu_to_le32(dw2);
726		gdesc->dword[3] = 0;
727
728		netdev_dbg(adapter->netdev,
729			"txd[%u]: 0x%Lx 0x%x 0x%x\n",
730			tq->tx_ring.next2fill, le64_to_cpu(gdesc->txd.addr),
731			le32_to_cpu(gdesc->dword[2]), gdesc->dword[3]);
732		vmxnet3_cmd_ring_adv_next2fill(&tq->tx_ring);
733		dw2 = tq->tx_ring.gen << VMXNET3_TXD_GEN_SHIFT;
734
735		len -= buf_size;
736		buf_offset += buf_size;
737	}
738
739	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
740		const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
741		u32 buf_size;
742
743		buf_offset = 0;
744		len = skb_frag_size(frag);
745		while (len) {
746			tbi = tq->buf_info + tq->tx_ring.next2fill;
747			if (len < VMXNET3_MAX_TX_BUF_SIZE) {
748				buf_size = len;
749				dw2 |= len;
750			} else {
751				buf_size = VMXNET3_MAX_TX_BUF_SIZE;
752				/* spec says that for TxDesc.len, 0 == 2^14 */
753			}
754			tbi->map_type = VMXNET3_MAP_PAGE;
755			tbi->dma_addr = skb_frag_dma_map(&adapter->pdev->dev, frag,
756							 buf_offset, buf_size,
757							 DMA_TO_DEVICE);
758
759			tbi->len = buf_size;
760
761			gdesc = tq->tx_ring.base + tq->tx_ring.next2fill;
762			BUG_ON(gdesc->txd.gen == tq->tx_ring.gen);
763
764			gdesc->txd.addr = cpu_to_le64(tbi->dma_addr);
765			gdesc->dword[2] = cpu_to_le32(dw2);
766			gdesc->dword[3] = 0;
767
768			netdev_dbg(adapter->netdev,
769				"txd[%u]: 0x%llu %u %u\n",
770				tq->tx_ring.next2fill, le64_to_cpu(gdesc->txd.addr),
771				le32_to_cpu(gdesc->dword[2]), gdesc->dword[3]);
772			vmxnet3_cmd_ring_adv_next2fill(&tq->tx_ring);
773			dw2 = tq->tx_ring.gen << VMXNET3_TXD_GEN_SHIFT;
774
775			len -= buf_size;
776			buf_offset += buf_size;
777		}
778	}
779
780	ctx->eop_txd = gdesc;
781
782	/* set the last buf_info for the pkt */
783	tbi->skb = skb;
784	tbi->sop_idx = ctx->sop_txd - tq->tx_ring.base;
785}
786
787
788/* Init all tx queues */
789static void
790vmxnet3_tq_init_all(struct vmxnet3_adapter *adapter)
791{
792	int i;
793
794	for (i = 0; i < adapter->num_tx_queues; i++)
795		vmxnet3_tq_init(&adapter->tx_queue[i], adapter);
796}
797
798
799/*
800 *    parse and copy relevant protocol headers:
801 *      For a tso pkt, relevant headers are L2/3/4 including options
802 *      For a pkt requesting csum offloading, they are L2/3 and may include L4
803 *      if it's a TCP/UDP pkt
804 *
805 * Returns:
806 *    -1:  error happens during parsing
807 *     0:  protocol headers parsed, but too big to be copied
808 *     1:  protocol headers parsed and copied
809 *
810 * Other effects:
811 *    1. related *ctx fields are updated.
812 *    2. ctx->copy_size is # of bytes copied
813 *    3. the portion copied is guaranteed to be in the linear part
814 *
815 */
816static int
817vmxnet3_parse_and_copy_hdr(struct sk_buff *skb, struct vmxnet3_tx_queue *tq,
818			   struct vmxnet3_tx_ctx *ctx,
819			   struct vmxnet3_adapter *adapter)
820{
821	struct Vmxnet3_TxDataDesc *tdd;
822
823	if (ctx->mss) {	/* TSO */
824		ctx->eth_ip_hdr_size = skb_transport_offset(skb);
825		ctx->l4_hdr_size = tcp_hdrlen(skb);
826		ctx->copy_size = ctx->eth_ip_hdr_size + ctx->l4_hdr_size;
827	} else {
828		if (skb->ip_summed == CHECKSUM_PARTIAL) {
829			ctx->eth_ip_hdr_size = skb_checksum_start_offset(skb);
830
831			if (ctx->ipv4) {
832				const struct iphdr *iph = ip_hdr(skb);
833
834				if (iph->protocol == IPPROTO_TCP)
835					ctx->l4_hdr_size = tcp_hdrlen(skb);
836				else if (iph->protocol == IPPROTO_UDP)
837					ctx->l4_hdr_size = sizeof(struct udphdr);
838				else
839					ctx->l4_hdr_size = 0;
840			} else {
841				/* for simplicity, don't copy L4 headers */
842				ctx->l4_hdr_size = 0;
843			}
844			ctx->copy_size = min(ctx->eth_ip_hdr_size +
845					 ctx->l4_hdr_size, skb->len);
846		} else {
847			ctx->eth_ip_hdr_size = 0;
848			ctx->l4_hdr_size = 0;
849			/* copy as much as allowed */
850			ctx->copy_size = min((unsigned int)VMXNET3_HDR_COPY_SIZE
851					     , skb_headlen(skb));
852		}
853
854		/* make sure headers are accessible directly */
855		if (unlikely(!pskb_may_pull(skb, ctx->copy_size)))
856			goto err;
857	}
858
859	if (unlikely(ctx->copy_size > VMXNET3_HDR_COPY_SIZE)) {
860		tq->stats.oversized_hdr++;
861		ctx->copy_size = 0;
862		return 0;
863	}
864
865	tdd = tq->data_ring.base + tq->tx_ring.next2fill;
866
867	memcpy(tdd->data, skb->data, ctx->copy_size);
868	netdev_dbg(adapter->netdev,
869		"copy %u bytes to dataRing[%u]\n",
870		ctx->copy_size, tq->tx_ring.next2fill);
871	return 1;
872
873err:
874	return -1;
875}
876
877
878static void
879vmxnet3_prepare_tso(struct sk_buff *skb,
880		    struct vmxnet3_tx_ctx *ctx)
881{
882	struct tcphdr *tcph = tcp_hdr(skb);
883
884	if (ctx->ipv4) {
885		struct iphdr *iph = ip_hdr(skb);
886
887		iph->check = 0;
888		tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, 0,
889						 IPPROTO_TCP, 0);
890	} else {
891		struct ipv6hdr *iph = ipv6_hdr(skb);
892
893		tcph->check = ~csum_ipv6_magic(&iph->saddr, &iph->daddr, 0,
894					       IPPROTO_TCP, 0);
895	}
896}
897
898static int txd_estimate(const struct sk_buff *skb)
899{
900	int count = VMXNET3_TXD_NEEDED(skb_headlen(skb)) + 1;
901	int i;
902
903	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
904		const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
905
906		count += VMXNET3_TXD_NEEDED(skb_frag_size(frag));
907	}
908	return count;
909}
910
911/*
912 * Transmits a pkt thru a given tq
913 * Returns:
914 *    NETDEV_TX_OK:      descriptors are setup successfully
915 *    NETDEV_TX_OK:      error occurred, the pkt is dropped
916 *    NETDEV_TX_BUSY:    tx ring is full, queue is stopped
917 *
918 * Side-effects:
919 *    1. tx ring may be changed
920 *    2. tq stats may be updated accordingly
921 *    3. shared->txNumDeferred may be updated
922 */
923
924static int
925vmxnet3_tq_xmit(struct sk_buff *skb, struct vmxnet3_tx_queue *tq,
926		struct vmxnet3_adapter *adapter, struct net_device *netdev)
927{
928	int ret;
929	u32 count;
930	unsigned long flags;
931	struct vmxnet3_tx_ctx ctx;
932	union Vmxnet3_GenericDesc *gdesc;
933#ifdef __BIG_ENDIAN_BITFIELD
934	/* Use temporary descriptor to avoid touching bits multiple times */
935	union Vmxnet3_GenericDesc tempTxDesc;
936#endif
937
938	count = txd_estimate(skb);
939
940	ctx.ipv4 = (vlan_get_protocol(skb) == cpu_to_be16(ETH_P_IP));
941
942	ctx.mss = skb_shinfo(skb)->gso_size;
943	if (ctx.mss) {
944		if (skb_header_cloned(skb)) {
945			if (unlikely(pskb_expand_head(skb, 0, 0,
946						      GFP_ATOMIC) != 0)) {
947				tq->stats.drop_tso++;
948				goto drop_pkt;
949			}
950			tq->stats.copy_skb_header++;
951		}
952		vmxnet3_prepare_tso(skb, &ctx);
953	} else {
954		if (unlikely(count > VMXNET3_MAX_TXD_PER_PKT)) {
955
956			/* non-tso pkts must not use more than
957			 * VMXNET3_MAX_TXD_PER_PKT entries
958			 */
959			if (skb_linearize(skb) != 0) {
960				tq->stats.drop_too_many_frags++;
961				goto drop_pkt;
962			}
963			tq->stats.linearized++;
964
965			/* recalculate the # of descriptors to use */
966			count = VMXNET3_TXD_NEEDED(skb_headlen(skb)) + 1;
967		}
968	}
969
970	spin_lock_irqsave(&tq->tx_lock, flags);
971
972	if (count > vmxnet3_cmd_ring_desc_avail(&tq->tx_ring)) {
973		tq->stats.tx_ring_full++;
974		netdev_dbg(adapter->netdev,
975			"tx queue stopped on %s, next2comp %u"
976			" next2fill %u\n", adapter->netdev->name,
977			tq->tx_ring.next2comp, tq->tx_ring.next2fill);
978
979		vmxnet3_tq_stop(tq, adapter);
980		spin_unlock_irqrestore(&tq->tx_lock, flags);
981		return NETDEV_TX_BUSY;
982	}
983
984
985	ret = vmxnet3_parse_and_copy_hdr(skb, tq, &ctx, adapter);
986	if (ret >= 0) {
987		BUG_ON(ret <= 0 && ctx.copy_size != 0);
988		/* hdrs parsed, check against other limits */
989		if (ctx.mss) {
990			if (unlikely(ctx.eth_ip_hdr_size + ctx.l4_hdr_size >
991				     VMXNET3_MAX_TX_BUF_SIZE)) {
992				goto hdr_too_big;
993			}
994		} else {
995			if (skb->ip_summed == CHECKSUM_PARTIAL) {
996				if (unlikely(ctx.eth_ip_hdr_size +
997					     skb->csum_offset >
998					     VMXNET3_MAX_CSUM_OFFSET)) {
999					goto hdr_too_big;
1000				}
1001			}
1002		}
1003	} else {
1004		tq->stats.drop_hdr_inspect_err++;
1005		goto unlock_drop_pkt;
1006	}
1007
1008	/* fill tx descs related to addr & len */
1009	vmxnet3_map_pkt(skb, &ctx, tq, adapter->pdev, adapter);
1010
1011	/* setup the EOP desc */
1012	ctx.eop_txd->dword[3] = cpu_to_le32(VMXNET3_TXD_CQ | VMXNET3_TXD_EOP);
1013
1014	/* setup the SOP desc */
1015#ifdef __BIG_ENDIAN_BITFIELD
1016	gdesc = &tempTxDesc;
1017	gdesc->dword[2] = ctx.sop_txd->dword[2];
1018	gdesc->dword[3] = ctx.sop_txd->dword[3];
1019#else
1020	gdesc = ctx.sop_txd;
1021#endif
1022	if (ctx.mss) {
1023		gdesc->txd.hlen = ctx.eth_ip_hdr_size + ctx.l4_hdr_size;
1024		gdesc->txd.om = VMXNET3_OM_TSO;
1025		gdesc->txd.msscof = ctx.mss;
1026		le32_add_cpu(&tq->shared->txNumDeferred, (skb->len -
1027			     gdesc->txd.hlen + ctx.mss - 1) / ctx.mss);
1028	} else {
1029		if (skb->ip_summed == CHECKSUM_PARTIAL) {
1030			gdesc->txd.hlen = ctx.eth_ip_hdr_size;
1031			gdesc->txd.om = VMXNET3_OM_CSUM;
1032			gdesc->txd.msscof = ctx.eth_ip_hdr_size +
1033					    skb->csum_offset;
1034		} else {
1035			gdesc->txd.om = 0;
1036			gdesc->txd.msscof = 0;
1037		}
1038		le32_add_cpu(&tq->shared->txNumDeferred, 1);
1039	}
1040
1041	if (vlan_tx_tag_present(skb)) {
1042		gdesc->txd.ti = 1;
1043		gdesc->txd.tci = vlan_tx_tag_get(skb);
1044	}
1045
1046	/* finally flips the GEN bit of the SOP desc. */
1047	gdesc->dword[2] = cpu_to_le32(le32_to_cpu(gdesc->dword[2]) ^
1048						  VMXNET3_TXD_GEN);
1049#ifdef __BIG_ENDIAN_BITFIELD
1050	/* Finished updating in bitfields of Tx Desc, so write them in original
1051	 * place.
1052	 */
1053	vmxnet3_TxDescToLe((struct Vmxnet3_TxDesc *)gdesc,
1054			   (struct Vmxnet3_TxDesc *)ctx.sop_txd);
1055	gdesc = ctx.sop_txd;
1056#endif
1057	netdev_dbg(adapter->netdev,
1058		"txd[%u]: SOP 0x%Lx 0x%x 0x%x\n",
1059		(u32)(ctx.sop_txd -
1060		tq->tx_ring.base), le64_to_cpu(gdesc->txd.addr),
1061		le32_to_cpu(gdesc->dword[2]), le32_to_cpu(gdesc->dword[3]));
1062
1063	spin_unlock_irqrestore(&tq->tx_lock, flags);
1064
1065	if (le32_to_cpu(tq->shared->txNumDeferred) >=
1066					le32_to_cpu(tq->shared->txThreshold)) {
1067		tq->shared->txNumDeferred = 0;
1068		VMXNET3_WRITE_BAR0_REG(adapter,
1069				       VMXNET3_REG_TXPROD + tq->qid * 8,
1070				       tq->tx_ring.next2fill);
1071	}
1072
1073	return NETDEV_TX_OK;
1074
1075hdr_too_big:
1076	tq->stats.drop_oversized_hdr++;
1077unlock_drop_pkt:
1078	spin_unlock_irqrestore(&tq->tx_lock, flags);
1079drop_pkt:
1080	tq->stats.drop_total++;
1081	dev_kfree_skb(skb);
1082	return NETDEV_TX_OK;
1083}
1084
1085
1086static netdev_tx_t
1087vmxnet3_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
1088{
1089	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
1090
1091	BUG_ON(skb->queue_mapping > adapter->num_tx_queues);
1092	return vmxnet3_tq_xmit(skb,
1093			       &adapter->tx_queue[skb->queue_mapping],
1094			       adapter, netdev);
1095}
1096
1097
1098static void
1099vmxnet3_rx_csum(struct vmxnet3_adapter *adapter,
1100		struct sk_buff *skb,
1101		union Vmxnet3_GenericDesc *gdesc)
1102{
1103	if (!gdesc->rcd.cnc && adapter->netdev->features & NETIF_F_RXCSUM) {
1104		/* typical case: TCP/UDP over IP and both csums are correct */
1105		if ((le32_to_cpu(gdesc->dword[3]) & VMXNET3_RCD_CSUM_OK) ==
1106							VMXNET3_RCD_CSUM_OK) {
1107			skb->ip_summed = CHECKSUM_UNNECESSARY;
1108			BUG_ON(!(gdesc->rcd.tcp || gdesc->rcd.udp));
1109			BUG_ON(!(gdesc->rcd.v4  || gdesc->rcd.v6));
1110			BUG_ON(gdesc->rcd.frg);
1111		} else {
1112			if (gdesc->rcd.csum) {
1113				skb->csum = htons(gdesc->rcd.csum);
1114				skb->ip_summed = CHECKSUM_PARTIAL;
1115			} else {
1116				skb_checksum_none_assert(skb);
1117			}
1118		}
1119	} else {
1120		skb_checksum_none_assert(skb);
1121	}
1122}
1123
1124
1125static void
1126vmxnet3_rx_error(struct vmxnet3_rx_queue *rq, struct Vmxnet3_RxCompDesc *rcd,
1127		 struct vmxnet3_rx_ctx *ctx,  struct vmxnet3_adapter *adapter)
1128{
1129	rq->stats.drop_err++;
1130	if (!rcd->fcs)
1131		rq->stats.drop_fcs++;
1132
1133	rq->stats.drop_total++;
1134
1135	/*
1136	 * We do not unmap and chain the rx buffer to the skb.
1137	 * We basically pretend this buffer is not used and will be recycled
1138	 * by vmxnet3_rq_alloc_rx_buf()
1139	 */
1140
1141	/*
1142	 * ctx->skb may be NULL if this is the first and the only one
1143	 * desc for the pkt
1144	 */
1145	if (ctx->skb)
1146		dev_kfree_skb_irq(ctx->skb);
1147
1148	ctx->skb = NULL;
1149}
1150
1151
1152static int
1153vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq,
1154		       struct vmxnet3_adapter *adapter, int quota)
1155{
1156	static const u32 rxprod_reg[2] = {
1157		VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2
1158	};
1159	u32 num_rxd = 0;
1160	bool skip_page_frags = false;
1161	struct Vmxnet3_RxCompDesc *rcd;
1162	struct vmxnet3_rx_ctx *ctx = &rq->rx_ctx;
1163#ifdef __BIG_ENDIAN_BITFIELD
1164	struct Vmxnet3_RxDesc rxCmdDesc;
1165	struct Vmxnet3_RxCompDesc rxComp;
1166#endif
1167	vmxnet3_getRxComp(rcd, &rq->comp_ring.base[rq->comp_ring.next2proc].rcd,
1168			  &rxComp);
1169	while (rcd->gen == rq->comp_ring.gen) {
1170		struct vmxnet3_rx_buf_info *rbi;
1171		struct sk_buff *skb, *new_skb = NULL;
1172		struct page *new_page = NULL;
1173		int num_to_alloc;
1174		struct Vmxnet3_RxDesc *rxd;
1175		u32 idx, ring_idx;
1176		struct vmxnet3_cmd_ring	*ring = NULL;
1177		if (num_rxd >= quota) {
1178			/* we may stop even before we see the EOP desc of
1179			 * the current pkt
1180			 */
1181			break;
1182		}
1183		num_rxd++;
1184		BUG_ON(rcd->rqID != rq->qid && rcd->rqID != rq->qid2);
1185		idx = rcd->rxdIdx;
1186		ring_idx = rcd->rqID < adapter->num_rx_queues ? 0 : 1;
1187		ring = rq->rx_ring + ring_idx;
1188		vmxnet3_getRxDesc(rxd, &rq->rx_ring[ring_idx].base[idx].rxd,
1189				  &rxCmdDesc);
1190		rbi = rq->buf_info[ring_idx] + idx;
1191
1192		BUG_ON(rxd->addr != rbi->dma_addr ||
1193		       rxd->len != rbi->len);
1194
1195		if (unlikely(rcd->eop && rcd->err)) {
1196			vmxnet3_rx_error(rq, rcd, ctx, adapter);
1197			goto rcd_done;
1198		}
1199
1200		if (rcd->sop) { /* first buf of the pkt */
1201			BUG_ON(rxd->btype != VMXNET3_RXD_BTYPE_HEAD ||
1202			       rcd->rqID != rq->qid);
1203
1204			BUG_ON(rbi->buf_type != VMXNET3_RX_BUF_SKB);
1205			BUG_ON(ctx->skb != NULL || rbi->skb == NULL);
1206
1207			if (unlikely(rcd->len == 0)) {
1208				/* Pretend the rx buffer is skipped. */
1209				BUG_ON(!(rcd->sop && rcd->eop));
1210				netdev_dbg(adapter->netdev,
1211					"rxRing[%u][%u] 0 length\n",
1212					ring_idx, idx);
1213				goto rcd_done;
1214			}
1215
1216			skip_page_frags = false;
1217			ctx->skb = rbi->skb;
1218			new_skb = netdev_alloc_skb_ip_align(adapter->netdev,
1219							    rbi->len);
1220			if (new_skb == NULL) {
1221				/* Skb allocation failed, do not handover this
1222				 * skb to stack. Reuse it. Drop the existing pkt
1223				 */
1224				rq->stats.rx_buf_alloc_failure++;
1225				ctx->skb = NULL;
1226				rq->stats.drop_total++;
1227				skip_page_frags = true;
1228				goto rcd_done;
1229			}
1230
1231			dma_unmap_single(&adapter->pdev->dev, rbi->dma_addr,
1232					 rbi->len,
1233					 PCI_DMA_FROMDEVICE);
1234
1235#ifdef VMXNET3_RSS
1236			if (rcd->rssType != VMXNET3_RCD_RSS_TYPE_NONE &&
1237			    (adapter->netdev->features & NETIF_F_RXHASH))
1238				skb_set_hash(ctx->skb,
1239					     le32_to_cpu(rcd->rssHash),
1240					     PKT_HASH_TYPE_L3);
1241#endif
1242			skb_put(ctx->skb, rcd->len);
1243
1244			/* Immediate refill */
1245			rbi->skb = new_skb;
1246			rbi->dma_addr = dma_map_single(&adapter->pdev->dev,
1247						       rbi->skb->data, rbi->len,
1248						       PCI_DMA_FROMDEVICE);
1249			rxd->addr = cpu_to_le64(rbi->dma_addr);
1250			rxd->len = rbi->len;
1251
1252		} else {
1253			BUG_ON(ctx->skb == NULL && !skip_page_frags);
1254
1255			/* non SOP buffer must be type 1 in most cases */
1256			BUG_ON(rbi->buf_type != VMXNET3_RX_BUF_PAGE);
1257			BUG_ON(rxd->btype != VMXNET3_RXD_BTYPE_BODY);
1258
1259			/* If an sop buffer was dropped, skip all
1260			 * following non-sop fragments. They will be reused.
1261			 */
1262			if (skip_page_frags)
1263				goto rcd_done;
1264
1265			new_page = alloc_page(GFP_ATOMIC);
1266			if (unlikely(new_page == NULL)) {
1267				/* Replacement page frag could not be allocated.
1268				 * Reuse this page. Drop the pkt and free the
1269				 * skb which contained this page as a frag. Skip
1270				 * processing all the following non-sop frags.
1271				 */
1272				rq->stats.rx_buf_alloc_failure++;
1273				dev_kfree_skb(ctx->skb);
1274				ctx->skb = NULL;
1275				skip_page_frags = true;
1276				goto rcd_done;
1277			}
1278
1279			if (rcd->len) {
1280				dma_unmap_page(&adapter->pdev->dev,
1281					       rbi->dma_addr, rbi->len,
1282					       PCI_DMA_FROMDEVICE);
1283
1284				vmxnet3_append_frag(ctx->skb, rcd, rbi);
1285			}
1286
1287			/* Immediate refill */
1288			rbi->page = new_page;
1289			rbi->dma_addr = dma_map_page(&adapter->pdev->dev,
1290						     rbi->page,
1291						     0, PAGE_SIZE,
1292						     PCI_DMA_FROMDEVICE);
1293			rxd->addr = cpu_to_le64(rbi->dma_addr);
1294			rxd->len = rbi->len;
1295		}
1296
1297
1298		skb = ctx->skb;
1299		if (rcd->eop) {
1300			skb->len += skb->data_len;
1301
1302			vmxnet3_rx_csum(adapter, skb,
1303					(union Vmxnet3_GenericDesc *)rcd);
1304			skb->protocol = eth_type_trans(skb, adapter->netdev);
1305
1306			if (unlikely(rcd->ts))
1307				__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rcd->tci);
1308
1309			if (adapter->netdev->features & NETIF_F_LRO)
1310				netif_receive_skb(skb);
1311			else
1312				napi_gro_receive(&rq->napi, skb);
1313
1314			ctx->skb = NULL;
1315		}
1316
1317rcd_done:
1318		/* device may have skipped some rx descs */
1319		ring->next2comp = idx;
1320		num_to_alloc = vmxnet3_cmd_ring_desc_avail(ring);
1321		ring = rq->rx_ring + ring_idx;
1322		while (num_to_alloc) {
1323			vmxnet3_getRxDesc(rxd, &ring->base[ring->next2fill].rxd,
1324					  &rxCmdDesc);
1325			BUG_ON(!rxd->addr);
1326
1327			/* Recv desc is ready to be used by the device */
1328			rxd->gen = ring->gen;
1329			vmxnet3_cmd_ring_adv_next2fill(ring);
1330			num_to_alloc--;
1331		}
1332
1333		/* if needed, update the register */
1334		if (unlikely(rq->shared->updateRxProd)) {
1335			VMXNET3_WRITE_BAR0_REG(adapter,
1336					       rxprod_reg[ring_idx] + rq->qid * 8,
1337					       ring->next2fill);
1338		}
1339
1340		vmxnet3_comp_ring_adv_next2proc(&rq->comp_ring);
1341		vmxnet3_getRxComp(rcd,
1342				  &rq->comp_ring.base[rq->comp_ring.next2proc].rcd, &rxComp);
1343	}
1344
1345	return num_rxd;
1346}
1347
1348
1349static void
1350vmxnet3_rq_cleanup(struct vmxnet3_rx_queue *rq,
1351		   struct vmxnet3_adapter *adapter)
1352{
1353	u32 i, ring_idx;
1354	struct Vmxnet3_RxDesc *rxd;
1355
1356	for (ring_idx = 0; ring_idx < 2; ring_idx++) {
1357		for (i = 0; i < rq->rx_ring[ring_idx].size; i++) {
1358#ifdef __BIG_ENDIAN_BITFIELD
1359			struct Vmxnet3_RxDesc rxDesc;
1360#endif
1361			vmxnet3_getRxDesc(rxd,
1362				&rq->rx_ring[ring_idx].base[i].rxd, &rxDesc);
1363
1364			if (rxd->btype == VMXNET3_RXD_BTYPE_HEAD &&
1365					rq->buf_info[ring_idx][i].skb) {
1366				dma_unmap_single(&adapter->pdev->dev, rxd->addr,
1367						 rxd->len, PCI_DMA_FROMDEVICE);
1368				dev_kfree_skb(rq->buf_info[ring_idx][i].skb);
1369				rq->buf_info[ring_idx][i].skb = NULL;
1370			} else if (rxd->btype == VMXNET3_RXD_BTYPE_BODY &&
1371					rq->buf_info[ring_idx][i].page) {
1372				dma_unmap_page(&adapter->pdev->dev, rxd->addr,
1373					       rxd->len, PCI_DMA_FROMDEVICE);
1374				put_page(rq->buf_info[ring_idx][i].page);
1375				rq->buf_info[ring_idx][i].page = NULL;
1376			}
1377		}
1378
1379		rq->rx_ring[ring_idx].gen = VMXNET3_INIT_GEN;
1380		rq->rx_ring[ring_idx].next2fill =
1381					rq->rx_ring[ring_idx].next2comp = 0;
1382	}
1383
1384	rq->comp_ring.gen = VMXNET3_INIT_GEN;
1385	rq->comp_ring.next2proc = 0;
1386}
1387
1388
1389static void
1390vmxnet3_rq_cleanup_all(struct vmxnet3_adapter *adapter)
1391{
1392	int i;
1393
1394	for (i = 0; i < adapter->num_rx_queues; i++)
1395		vmxnet3_rq_cleanup(&adapter->rx_queue[i], adapter);
1396}
1397
1398
1399static void vmxnet3_rq_destroy(struct vmxnet3_rx_queue *rq,
1400			       struct vmxnet3_adapter *adapter)
1401{
1402	int i;
1403	int j;
1404
1405	/* all rx buffers must have already been freed */
1406	for (i = 0; i < 2; i++) {
1407		if (rq->buf_info[i]) {
1408			for (j = 0; j < rq->rx_ring[i].size; j++)
1409				BUG_ON(rq->buf_info[i][j].page != NULL);
1410		}
1411	}
1412
1413
1414	for (i = 0; i < 2; i++) {
1415		if (rq->rx_ring[i].base) {
1416			dma_free_coherent(&adapter->pdev->dev,
1417					  rq->rx_ring[i].size
1418					  * sizeof(struct Vmxnet3_RxDesc),
1419					  rq->rx_ring[i].base,
1420					  rq->rx_ring[i].basePA);
1421			rq->rx_ring[i].base = NULL;
1422		}
1423		rq->buf_info[i] = NULL;
1424	}
1425
1426	if (rq->comp_ring.base) {
1427		dma_free_coherent(&adapter->pdev->dev, rq->comp_ring.size
1428				  * sizeof(struct Vmxnet3_RxCompDesc),
1429				  rq->comp_ring.base, rq->comp_ring.basePA);
1430		rq->comp_ring.base = NULL;
1431	}
1432
1433	if (rq->buf_info[0]) {
1434		size_t sz = sizeof(struct vmxnet3_rx_buf_info) *
1435			(rq->rx_ring[0].size + rq->rx_ring[1].size);
1436		dma_free_coherent(&adapter->pdev->dev, sz, rq->buf_info[0],
1437				  rq->buf_info_pa);
1438	}
1439}
1440
1441
1442static int
1443vmxnet3_rq_init(struct vmxnet3_rx_queue *rq,
1444		struct vmxnet3_adapter  *adapter)
1445{
1446	int i;
1447
1448	/* initialize buf_info */
1449	for (i = 0; i < rq->rx_ring[0].size; i++) {
1450
1451		/* 1st buf for a pkt is skbuff */
1452		if (i % adapter->rx_buf_per_pkt == 0) {
1453			rq->buf_info[0][i].buf_type = VMXNET3_RX_BUF_SKB;
1454			rq->buf_info[0][i].len = adapter->skb_buf_size;
1455		} else { /* subsequent bufs for a pkt is frag */
1456			rq->buf_info[0][i].buf_type = VMXNET3_RX_BUF_PAGE;
1457			rq->buf_info[0][i].len = PAGE_SIZE;
1458		}
1459	}
1460	for (i = 0; i < rq->rx_ring[1].size; i++) {
1461		rq->buf_info[1][i].buf_type = VMXNET3_RX_BUF_PAGE;
1462		rq->buf_info[1][i].len = PAGE_SIZE;
1463	}
1464
1465	/* reset internal state and allocate buffers for both rings */
1466	for (i = 0; i < 2; i++) {
1467		rq->rx_ring[i].next2fill = rq->rx_ring[i].next2comp = 0;
1468
1469		memset(rq->rx_ring[i].base, 0, rq->rx_ring[i].size *
1470		       sizeof(struct Vmxnet3_RxDesc));
1471		rq->rx_ring[i].gen = VMXNET3_INIT_GEN;
1472	}
1473	if (vmxnet3_rq_alloc_rx_buf(rq, 0, rq->rx_ring[0].size - 1,
1474				    adapter) == 0) {
1475		/* at least has 1 rx buffer for the 1st ring */
1476		return -ENOMEM;
1477	}
1478	vmxnet3_rq_alloc_rx_buf(rq, 1, rq->rx_ring[1].size - 1, adapter);
1479
1480	/* reset the comp ring */
1481	rq->comp_ring.next2proc = 0;
1482	memset(rq->comp_ring.base, 0, rq->comp_ring.size *
1483	       sizeof(struct Vmxnet3_RxCompDesc));
1484	rq->comp_ring.gen = VMXNET3_INIT_GEN;
1485
1486	/* reset rxctx */
1487	rq->rx_ctx.skb = NULL;
1488
1489	/* stats are not reset */
1490	return 0;
1491}
1492
1493
1494static int
1495vmxnet3_rq_init_all(struct vmxnet3_adapter *adapter)
1496{
1497	int i, err = 0;
1498
1499	for (i = 0; i < adapter->num_rx_queues; i++) {
1500		err = vmxnet3_rq_init(&adapter->rx_queue[i], adapter);
1501		if (unlikely(err)) {
1502			dev_err(&adapter->netdev->dev, "%s: failed to "
1503				"initialize rx queue%i\n",
1504				adapter->netdev->name, i);
1505			break;
1506		}
1507	}
1508	return err;
1509
1510}
1511
1512
1513static int
1514vmxnet3_rq_create(struct vmxnet3_rx_queue *rq, struct vmxnet3_adapter *adapter)
1515{
1516	int i;
1517	size_t sz;
1518	struct vmxnet3_rx_buf_info *bi;
1519
1520	for (i = 0; i < 2; i++) {
1521
1522		sz = rq->rx_ring[i].size * sizeof(struct Vmxnet3_RxDesc);
1523		rq->rx_ring[i].base = dma_alloc_coherent(
1524						&adapter->pdev->dev, sz,
1525						&rq->rx_ring[i].basePA,
1526						GFP_KERNEL);
1527		if (!rq->rx_ring[i].base) {
1528			netdev_err(adapter->netdev,
1529				   "failed to allocate rx ring %d\n", i);
1530			goto err;
1531		}
1532	}
1533
1534	sz = rq->comp_ring.size * sizeof(struct Vmxnet3_RxCompDesc);
1535	rq->comp_ring.base = dma_alloc_coherent(&adapter->pdev->dev, sz,
1536						&rq->comp_ring.basePA,
1537						GFP_KERNEL);
1538	if (!rq->comp_ring.base) {
1539		netdev_err(adapter->netdev, "failed to allocate rx comp ring\n");
1540		goto err;
1541	}
1542
1543	sz = sizeof(struct vmxnet3_rx_buf_info) * (rq->rx_ring[0].size +
1544						   rq->rx_ring[1].size);
1545	bi = dma_zalloc_coherent(&adapter->pdev->dev, sz, &rq->buf_info_pa,
1546				 GFP_KERNEL);
1547	if (!bi)
1548		goto err;
1549
1550	rq->buf_info[0] = bi;
1551	rq->buf_info[1] = bi + rq->rx_ring[0].size;
1552
1553	return 0;
1554
1555err:
1556	vmxnet3_rq_destroy(rq, adapter);
1557	return -ENOMEM;
1558}
1559
1560
1561static int
1562vmxnet3_rq_create_all(struct vmxnet3_adapter *adapter)
1563{
1564	int i, err = 0;
1565
1566	for (i = 0; i < adapter->num_rx_queues; i++) {
1567		err = vmxnet3_rq_create(&adapter->rx_queue[i], adapter);
1568		if (unlikely(err)) {
1569			dev_err(&adapter->netdev->dev,
1570				"%s: failed to create rx queue%i\n",
1571				adapter->netdev->name, i);
1572			goto err_out;
1573		}
1574	}
1575	return err;
1576err_out:
1577	vmxnet3_rq_destroy_all(adapter);
1578	return err;
1579
1580}
1581
1582/* Multiple queue aware polling function for tx and rx */
1583
1584static int
1585vmxnet3_do_poll(struct vmxnet3_adapter *adapter, int budget)
1586{
1587	int rcd_done = 0, i;
1588	if (unlikely(adapter->shared->ecr))
1589		vmxnet3_process_events(adapter);
1590	for (i = 0; i < adapter->num_tx_queues; i++)
1591		vmxnet3_tq_tx_complete(&adapter->tx_queue[i], adapter);
1592
1593	for (i = 0; i < adapter->num_rx_queues; i++)
1594		rcd_done += vmxnet3_rq_rx_complete(&adapter->rx_queue[i],
1595						   adapter, budget);
1596	return rcd_done;
1597}
1598
1599
1600static int
1601vmxnet3_poll(struct napi_struct *napi, int budget)
1602{
1603	struct vmxnet3_rx_queue *rx_queue = container_of(napi,
1604					  struct vmxnet3_rx_queue, napi);
1605	int rxd_done;
1606
1607	rxd_done = vmxnet3_do_poll(rx_queue->adapter, budget);
1608
1609	if (rxd_done < budget) {
1610		napi_complete(napi);
1611		vmxnet3_enable_all_intrs(rx_queue->adapter);
1612	}
1613	return rxd_done;
1614}
1615
1616/*
1617 * NAPI polling function for MSI-X mode with multiple Rx queues
1618 * Returns the # of the NAPI credit consumed (# of rx descriptors processed)
1619 */
1620
1621static int
1622vmxnet3_poll_rx_only(struct napi_struct *napi, int budget)
1623{
1624	struct vmxnet3_rx_queue *rq = container_of(napi,
1625						struct vmxnet3_rx_queue, napi);
1626	struct vmxnet3_adapter *adapter = rq->adapter;
1627	int rxd_done;
1628
1629	/* When sharing interrupt with corresponding tx queue, process
1630	 * tx completions in that queue as well
1631	 */
1632	if (adapter->share_intr == VMXNET3_INTR_BUDDYSHARE) {
1633		struct vmxnet3_tx_queue *tq =
1634				&adapter->tx_queue[rq - adapter->rx_queue];
1635		vmxnet3_tq_tx_complete(tq, adapter);
1636	}
1637
1638	rxd_done = vmxnet3_rq_rx_complete(rq, adapter, budget);
1639
1640	if (rxd_done < budget) {
1641		napi_complete(napi);
1642		vmxnet3_enable_intr(adapter, rq->comp_ring.intr_idx);
1643	}
1644	return rxd_done;
1645}
1646
1647
1648#ifdef CONFIG_PCI_MSI
1649
1650/*
1651 * Handle completion interrupts on tx queues
1652 * Returns whether or not the intr is handled
1653 */
1654
1655static irqreturn_t
1656vmxnet3_msix_tx(int irq, void *data)
1657{
1658	struct vmxnet3_tx_queue *tq = data;
1659	struct vmxnet3_adapter *adapter = tq->adapter;
1660
1661	if (adapter->intr.mask_mode == VMXNET3_IMM_ACTIVE)
1662		vmxnet3_disable_intr(adapter, tq->comp_ring.intr_idx);
1663
1664	/* Handle the case where only one irq is allocate for all tx queues */
1665	if (adapter->share_intr == VMXNET3_INTR_TXSHARE) {
1666		int i;
1667		for (i = 0; i < adapter->num_tx_queues; i++) {
1668			struct vmxnet3_tx_queue *txq = &adapter->tx_queue[i];
1669			vmxnet3_tq_tx_complete(txq, adapter);
1670		}
1671	} else {
1672		vmxnet3_tq_tx_complete(tq, adapter);
1673	}
1674	vmxnet3_enable_intr(adapter, tq->comp_ring.intr_idx);
1675
1676	return IRQ_HANDLED;
1677}
1678
1679
1680/*
1681 * Handle completion interrupts on rx queues. Returns whether or not the
1682 * intr is handled
1683 */
1684
1685static irqreturn_t
1686vmxnet3_msix_rx(int irq, void *data)
1687{
1688	struct vmxnet3_rx_queue *rq = data;
1689	struct vmxnet3_adapter *adapter = rq->adapter;
1690
1691	/* disable intr if needed */
1692	if (adapter->intr.mask_mode == VMXNET3_IMM_ACTIVE)
1693		vmxnet3_disable_intr(adapter, rq->comp_ring.intr_idx);
1694	napi_schedule(&rq->napi);
1695
1696	return IRQ_HANDLED;
1697}
1698
1699/*
1700 *----------------------------------------------------------------------------
1701 *
1702 * vmxnet3_msix_event --
1703 *
1704 *    vmxnet3 msix event intr handler
1705 *
1706 * Result:
1707 *    whether or not the intr is handled
1708 *
1709 *----------------------------------------------------------------------------
1710 */
1711
1712static irqreturn_t
1713vmxnet3_msix_event(int irq, void *data)
1714{
1715	struct net_device *dev = data;
1716	struct vmxnet3_adapter *adapter = netdev_priv(dev);
1717
1718	/* disable intr if needed */
1719	if (adapter->intr.mask_mode == VMXNET3_IMM_ACTIVE)
1720		vmxnet3_disable_intr(adapter, adapter->intr.event_intr_idx);
1721
1722	if (adapter->shared->ecr)
1723		vmxnet3_process_events(adapter);
1724
1725	vmxnet3_enable_intr(adapter, adapter->intr.event_intr_idx);
1726
1727	return IRQ_HANDLED;
1728}
1729
1730#endif /* CONFIG_PCI_MSI  */
1731
1732
1733/* Interrupt handler for vmxnet3  */
1734static irqreturn_t
1735vmxnet3_intr(int irq, void *dev_id)
1736{
1737	struct net_device *dev = dev_id;
1738	struct vmxnet3_adapter *adapter = netdev_priv(dev);
1739
1740	if (adapter->intr.type == VMXNET3_IT_INTX) {
1741		u32 icr = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_ICR);
1742		if (unlikely(icr == 0))
1743			/* not ours */
1744			return IRQ_NONE;
1745	}
1746
1747
1748	/* disable intr if needed */
1749	if (adapter->intr.mask_mode == VMXNET3_IMM_ACTIVE)
1750		vmxnet3_disable_all_intrs(adapter);
1751
1752	napi_schedule(&adapter->rx_queue[0].napi);
1753
1754	return IRQ_HANDLED;
1755}
1756
1757#ifdef CONFIG_NET_POLL_CONTROLLER
1758
1759/* netpoll callback. */
1760static void
1761vmxnet3_netpoll(struct net_device *netdev)
1762{
1763	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
1764
1765	if (adapter->intr.mask_mode == VMXNET3_IMM_ACTIVE)
1766		vmxnet3_disable_all_intrs(adapter);
1767
1768	vmxnet3_do_poll(adapter, adapter->rx_queue[0].rx_ring[0].size);
1769	vmxnet3_enable_all_intrs(adapter);
1770
1771}
1772#endif	/* CONFIG_NET_POLL_CONTROLLER */
1773
1774static int
1775vmxnet3_request_irqs(struct vmxnet3_adapter *adapter)
1776{
1777	struct vmxnet3_intr *intr = &adapter->intr;
1778	int err = 0, i;
1779	int vector = 0;
1780
1781#ifdef CONFIG_PCI_MSI
1782	if (adapter->intr.type == VMXNET3_IT_MSIX) {
1783		for (i = 0; i < adapter->num_tx_queues; i++) {
1784			if (adapter->share_intr != VMXNET3_INTR_BUDDYSHARE) {
1785				sprintf(adapter->tx_queue[i].name, "%s-tx-%d",
1786					adapter->netdev->name, vector);
1787				err = request_irq(
1788					      intr->msix_entries[vector].vector,
1789					      vmxnet3_msix_tx, 0,
1790					      adapter->tx_queue[i].name,
1791					      &adapter->tx_queue[i]);
1792			} else {
1793				sprintf(adapter->tx_queue[i].name, "%s-rxtx-%d",
1794					adapter->netdev->name, vector);
1795			}
1796			if (err) {
1797				dev_err(&adapter->netdev->dev,
1798					"Failed to request irq for MSIX, %s, "
1799					"error %d\n",
1800					adapter->tx_queue[i].name, err);
1801				return err;
1802			}
1803
1804			/* Handle the case where only 1 MSIx was allocated for
1805			 * all tx queues */
1806			if (adapter->share_intr == VMXNET3_INTR_TXSHARE) {
1807				for (; i < adapter->num_tx_queues; i++)
1808					adapter->tx_queue[i].comp_ring.intr_idx
1809								= vector;
1810				vector++;
1811				break;
1812			} else {
1813				adapter->tx_queue[i].comp_ring.intr_idx
1814								= vector++;
1815			}
1816		}
1817		if (adapter->share_intr == VMXNET3_INTR_BUDDYSHARE)
1818			vector = 0;
1819
1820		for (i = 0; i < adapter->num_rx_queues; i++) {
1821			if (adapter->share_intr != VMXNET3_INTR_BUDDYSHARE)
1822				sprintf(adapter->rx_queue[i].name, "%s-rx-%d",
1823					adapter->netdev->name, vector);
1824			else
1825				sprintf(adapter->rx_queue[i].name, "%s-rxtx-%d",
1826					adapter->netdev->name, vector);
1827			err = request_irq(intr->msix_entries[vector].vector,
1828					  vmxnet3_msix_rx, 0,
1829					  adapter->rx_queue[i].name,
1830					  &(adapter->rx_queue[i]));
1831			if (err) {
1832				netdev_err(adapter->netdev,
1833					   "Failed to request irq for MSIX, "
1834					   "%s, error %d\n",
1835					   adapter->rx_queue[i].name, err);
1836				return err;
1837			}
1838
1839			adapter->rx_queue[i].comp_ring.intr_idx = vector++;
1840		}
1841
1842		sprintf(intr->event_msi_vector_name, "%s-event-%d",
1843			adapter->netdev->name, vector);
1844		err = request_irq(intr->msix_entries[vector].vector,
1845				  vmxnet3_msix_event, 0,
1846				  intr->event_msi_vector_name, adapter->netdev);
1847		intr->event_intr_idx = vector;
1848
1849	} else if (intr->type == VMXNET3_IT_MSI) {
1850		adapter->num_rx_queues = 1;
1851		err = request_irq(adapter->pdev->irq, vmxnet3_intr, 0,
1852				  adapter->netdev->name, adapter->netdev);
1853	} else {
1854#endif
1855		adapter->num_rx_queues = 1;
1856		err = request_irq(adapter->pdev->irq, vmxnet3_intr,
1857				  IRQF_SHARED, adapter->netdev->name,
1858				  adapter->netdev);
1859#ifdef CONFIG_PCI_MSI
1860	}
1861#endif
1862	intr->num_intrs = vector + 1;
1863	if (err) {
1864		netdev_err(adapter->netdev,
1865			   "Failed to request irq (intr type:%d), error %d\n",
1866			   intr->type, err);
1867	} else {
1868		/* Number of rx queues will not change after this */
1869		for (i = 0; i < adapter->num_rx_queues; i++) {
1870			struct vmxnet3_rx_queue *rq = &adapter->rx_queue[i];
1871			rq->qid = i;
1872			rq->qid2 = i + adapter->num_rx_queues;
1873		}
1874
1875
1876
1877		/* init our intr settings */
1878		for (i = 0; i < intr->num_intrs; i++)
1879			intr->mod_levels[i] = UPT1_IML_ADAPTIVE;
1880		if (adapter->intr.type != VMXNET3_IT_MSIX) {
1881			adapter->intr.event_intr_idx = 0;
1882			for (i = 0; i < adapter->num_tx_queues; i++)
1883				adapter->tx_queue[i].comp_ring.intr_idx = 0;
1884			adapter->rx_queue[0].comp_ring.intr_idx = 0;
1885		}
1886
1887		netdev_info(adapter->netdev,
1888			    "intr type %u, mode %u, %u vectors allocated\n",
1889			    intr->type, intr->mask_mode, intr->num_intrs);
1890	}
1891
1892	return err;
1893}
1894
1895
1896static void
1897vmxnet3_free_irqs(struct vmxnet3_adapter *adapter)
1898{
1899	struct vmxnet3_intr *intr = &adapter->intr;
1900	BUG_ON(intr->type == VMXNET3_IT_AUTO || intr->num_intrs <= 0);
1901
1902	switch (intr->type) {
1903#ifdef CONFIG_PCI_MSI
1904	case VMXNET3_IT_MSIX:
1905	{
1906		int i, vector = 0;
1907
1908		if (adapter->share_intr != VMXNET3_INTR_BUDDYSHARE) {
1909			for (i = 0; i < adapter->num_tx_queues; i++) {
1910				free_irq(intr->msix_entries[vector++].vector,
1911					 &(adapter->tx_queue[i]));
1912				if (adapter->share_intr == VMXNET3_INTR_TXSHARE)
1913					break;
1914			}
1915		}
1916
1917		for (i = 0; i < adapter->num_rx_queues; i++) {
1918			free_irq(intr->msix_entries[vector++].vector,
1919				 &(adapter->rx_queue[i]));
1920		}
1921
1922		free_irq(intr->msix_entries[vector].vector,
1923			 adapter->netdev);
1924		BUG_ON(vector >= intr->num_intrs);
1925		break;
1926	}
1927#endif
1928	case VMXNET3_IT_MSI:
1929		free_irq(adapter->pdev->irq, adapter->netdev);
1930		break;
1931	case VMXNET3_IT_INTX:
1932		free_irq(adapter->pdev->irq, adapter->netdev);
1933		break;
1934	default:
1935		BUG();
1936	}
1937}
1938
1939
1940static void
1941vmxnet3_restore_vlan(struct vmxnet3_adapter *adapter)
1942{
1943	u32 *vfTable = adapter->shared->devRead.rxFilterConf.vfTable;
1944	u16 vid;
1945
1946	/* allow untagged pkts */
1947	VMXNET3_SET_VFTABLE_ENTRY(vfTable, 0);
1948
1949	for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID)
1950		VMXNET3_SET_VFTABLE_ENTRY(vfTable, vid);
1951}
1952
1953
1954static int
1955vmxnet3_vlan_rx_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
1956{
1957	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
1958
1959	if (!(netdev->flags & IFF_PROMISC)) {
1960		u32 *vfTable = adapter->shared->devRead.rxFilterConf.vfTable;
1961		unsigned long flags;
1962
1963		VMXNET3_SET_VFTABLE_ENTRY(vfTable, vid);
1964		spin_lock_irqsave(&adapter->cmd_lock, flags);
1965		VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
1966				       VMXNET3_CMD_UPDATE_VLAN_FILTERS);
1967		spin_unlock_irqrestore(&adapter->cmd_lock, flags);
1968	}
1969
1970	set_bit(vid, adapter->active_vlans);
1971
1972	return 0;
1973}
1974
1975
1976static int
1977vmxnet3_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto, u16 vid)
1978{
1979	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
1980
1981	if (!(netdev->flags & IFF_PROMISC)) {
1982		u32 *vfTable = adapter->shared->devRead.rxFilterConf.vfTable;
1983		unsigned long flags;
1984
1985		VMXNET3_CLEAR_VFTABLE_ENTRY(vfTable, vid);
1986		spin_lock_irqsave(&adapter->cmd_lock, flags);
1987		VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
1988				       VMXNET3_CMD_UPDATE_VLAN_FILTERS);
1989		spin_unlock_irqrestore(&adapter->cmd_lock, flags);
1990	}
1991
1992	clear_bit(vid, adapter->active_vlans);
1993
1994	return 0;
1995}
1996
1997
1998static u8 *
1999vmxnet3_copy_mc(struct net_device *netdev)
2000{
2001	u8 *buf = NULL;
2002	u32 sz = netdev_mc_count(netdev) * ETH_ALEN;
2003
2004	/* struct Vmxnet3_RxFilterConf.mfTableLen is u16. */
2005	if (sz <= 0xffff) {
2006		/* We may be called with BH disabled */
2007		buf = kmalloc(sz, GFP_ATOMIC);
2008		if (buf) {
2009			struct netdev_hw_addr *ha;
2010			int i = 0;
2011
2012			netdev_for_each_mc_addr(ha, netdev)
2013				memcpy(buf + i++ * ETH_ALEN, ha->addr,
2014				       ETH_ALEN);
2015		}
2016	}
2017	return buf;
2018}
2019
2020
2021static void
2022vmxnet3_set_mc(struct net_device *netdev)
2023{
2024	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
2025	unsigned long flags;
2026	struct Vmxnet3_RxFilterConf *rxConf =
2027					&adapter->shared->devRead.rxFilterConf;
2028	u8 *new_table = NULL;
2029	dma_addr_t new_table_pa = 0;
2030	u32 new_mode = VMXNET3_RXM_UCAST;
2031
2032	if (netdev->flags & IFF_PROMISC) {
2033		u32 *vfTable = adapter->shared->devRead.rxFilterConf.vfTable;
2034		memset(vfTable, 0, VMXNET3_VFT_SIZE * sizeof(*vfTable));
2035
2036		new_mode |= VMXNET3_RXM_PROMISC;
2037	} else {
2038		vmxnet3_restore_vlan(adapter);
2039	}
2040
2041	if (netdev->flags & IFF_BROADCAST)
2042		new_mode |= VMXNET3_RXM_BCAST;
2043
2044	if (netdev->flags & IFF_ALLMULTI)
2045		new_mode |= VMXNET3_RXM_ALL_MULTI;
2046	else
2047		if (!netdev_mc_empty(netdev)) {
2048			new_table = vmxnet3_copy_mc(netdev);
2049			if (new_table) {
2050				new_mode |= VMXNET3_RXM_MCAST;
2051				rxConf->mfTableLen = cpu_to_le16(
2052					netdev_mc_count(netdev) * ETH_ALEN);
2053				new_table_pa = dma_map_single(
2054							&adapter->pdev->dev,
2055							new_table,
2056							rxConf->mfTableLen,
2057							PCI_DMA_TODEVICE);
2058				rxConf->mfTablePA = cpu_to_le64(new_table_pa);
2059			} else {
2060				netdev_info(netdev, "failed to copy mcast list"
2061					    ", setting ALL_MULTI\n");
2062				new_mode |= VMXNET3_RXM_ALL_MULTI;
2063			}
2064		}
2065
2066
2067	if (!(new_mode & VMXNET3_RXM_MCAST)) {
2068		rxConf->mfTableLen = 0;
2069		rxConf->mfTablePA = 0;
2070	}
2071
2072	spin_lock_irqsave(&adapter->cmd_lock, flags);
2073	if (new_mode != rxConf->rxMode) {
2074		rxConf->rxMode = cpu_to_le32(new_mode);
2075		VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
2076				       VMXNET3_CMD_UPDATE_RX_MODE);
2077		VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
2078				       VMXNET3_CMD_UPDATE_VLAN_FILTERS);
2079	}
2080
2081	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
2082			       VMXNET3_CMD_UPDATE_MAC_FILTERS);
2083	spin_unlock_irqrestore(&adapter->cmd_lock, flags);
2084
2085	if (new_table) {
2086		dma_unmap_single(&adapter->pdev->dev, new_table_pa,
2087				 rxConf->mfTableLen, PCI_DMA_TODEVICE);
2088		kfree(new_table);
2089	}
2090}
2091
2092void
2093vmxnet3_rq_destroy_all(struct vmxnet3_adapter *adapter)
2094{
2095	int i;
2096
2097	for (i = 0; i < adapter->num_rx_queues; i++)
2098		vmxnet3_rq_destroy(&adapter->rx_queue[i], adapter);
2099}
2100
2101
2102/*
2103 *   Set up driver_shared based on settings in adapter.
2104 */
2105
2106static void
2107vmxnet3_setup_driver_shared(struct vmxnet3_adapter *adapter)
2108{
2109	struct Vmxnet3_DriverShared *shared = adapter->shared;
2110	struct Vmxnet3_DSDevRead *devRead = &shared->devRead;
2111	struct Vmxnet3_TxQueueConf *tqc;
2112	struct Vmxnet3_RxQueueConf *rqc;
2113	int i;
2114
2115	memset(shared, 0, sizeof(*shared));
2116
2117	/* driver settings */
2118	shared->magic = cpu_to_le32(VMXNET3_REV1_MAGIC);
2119	devRead->misc.driverInfo.version = cpu_to_le32(
2120						VMXNET3_DRIVER_VERSION_NUM);
2121	devRead->misc.driverInfo.gos.gosBits = (sizeof(void *) == 4 ?
2122				VMXNET3_GOS_BITS_32 : VMXNET3_GOS_BITS_64);
2123	devRead->misc.driverInfo.gos.gosType = VMXNET3_GOS_TYPE_LINUX;
2124	*((u32 *)&devRead->misc.driverInfo.gos) = cpu_to_le32(
2125				*((u32 *)&devRead->misc.driverInfo.gos));
2126	devRead->misc.driverInfo.vmxnet3RevSpt = cpu_to_le32(1);
2127	devRead->misc.driverInfo.uptVerSpt = cpu_to_le32(1);
2128
2129	devRead->misc.ddPA = cpu_to_le64(adapter->adapter_pa);
2130	devRead->misc.ddLen = cpu_to_le32(sizeof(struct vmxnet3_adapter));
2131
2132	/* set up feature flags */
2133	if (adapter->netdev->features & NETIF_F_RXCSUM)
2134		devRead->misc.uptFeatures |= UPT1_F_RXCSUM;
2135
2136	if (adapter->netdev->features & NETIF_F_LRO) {
2137		devRead->misc.uptFeatures |= UPT1_F_LRO;
2138		devRead->misc.maxNumRxSG = cpu_to_le16(1 + MAX_SKB_FRAGS);
2139	}
2140	if (adapter->netdev->features & NETIF_F_HW_VLAN_CTAG_RX)
2141		devRead->misc.uptFeatures |= UPT1_F_RXVLAN;
2142
2143	devRead->misc.mtu = cpu_to_le32(adapter->netdev->mtu);
2144	devRead->misc.queueDescPA = cpu_to_le64(adapter->queue_desc_pa);
2145	devRead->misc.queueDescLen = cpu_to_le32(
2146		adapter->num_tx_queues * sizeof(struct Vmxnet3_TxQueueDesc) +
2147		adapter->num_rx_queues * sizeof(struct Vmxnet3_RxQueueDesc));
2148
2149	/* tx queue settings */
2150	devRead->misc.numTxQueues =  adapter->num_tx_queues;
2151	for (i = 0; i < adapter->num_tx_queues; i++) {
2152		struct vmxnet3_tx_queue	*tq = &adapter->tx_queue[i];
2153		BUG_ON(adapter->tx_queue[i].tx_ring.base == NULL);
2154		tqc = &adapter->tqd_start[i].conf;
2155		tqc->txRingBasePA   = cpu_to_le64(tq->tx_ring.basePA);
2156		tqc->dataRingBasePA = cpu_to_le64(tq->data_ring.basePA);
2157		tqc->compRingBasePA = cpu_to_le64(tq->comp_ring.basePA);
2158		tqc->ddPA           = cpu_to_le64(tq->buf_info_pa);
2159		tqc->txRingSize     = cpu_to_le32(tq->tx_ring.size);
2160		tqc->dataRingSize   = cpu_to_le32(tq->data_ring.size);
2161		tqc->compRingSize   = cpu_to_le32(tq->comp_ring.size);
2162		tqc->ddLen          = cpu_to_le32(
2163					sizeof(struct vmxnet3_tx_buf_info) *
2164					tqc->txRingSize);
2165		tqc->intrIdx        = tq->comp_ring.intr_idx;
2166	}
2167
2168	/* rx queue settings */
2169	devRead->misc.numRxQueues = adapter->num_rx_queues;
2170	for (i = 0; i < adapter->num_rx_queues; i++) {
2171		struct vmxnet3_rx_queue	*rq = &adapter->rx_queue[i];
2172		rqc = &adapter->rqd_start[i].conf;
2173		rqc->rxRingBasePA[0] = cpu_to_le64(rq->rx_ring[0].basePA);
2174		rqc->rxRingBasePA[1] = cpu_to_le64(rq->rx_ring[1].basePA);
2175		rqc->compRingBasePA  = cpu_to_le64(rq->comp_ring.basePA);
2176		rqc->ddPA            = cpu_to_le64(rq->buf_info_pa);
2177		rqc->rxRingSize[0]   = cpu_to_le32(rq->rx_ring[0].size);
2178		rqc->rxRingSize[1]   = cpu_to_le32(rq->rx_ring[1].size);
2179		rqc->compRingSize    = cpu_to_le32(rq->comp_ring.size);
2180		rqc->ddLen           = cpu_to_le32(
2181					sizeof(struct vmxnet3_rx_buf_info) *
2182					(rqc->rxRingSize[0] +
2183					 rqc->rxRingSize[1]));
2184		rqc->intrIdx         = rq->comp_ring.intr_idx;
2185	}
2186
2187#ifdef VMXNET3_RSS
2188	memset(adapter->rss_conf, 0, sizeof(*adapter->rss_conf));
2189
2190	if (adapter->rss) {
2191		struct UPT1_RSSConf *rssConf = adapter->rss_conf;
2192		static const uint8_t rss_key[UPT1_RSS_MAX_KEY_SIZE] = {
2193			0x3b, 0x56, 0xd1, 0x56, 0x13, 0x4a, 0xe7, 0xac,
2194			0xe8, 0x79, 0x09, 0x75, 0xe8, 0x65, 0x79, 0x28,
2195			0x35, 0x12, 0xb9, 0x56, 0x7c, 0x76, 0x4b, 0x70,
2196			0xd8, 0x56, 0xa3, 0x18, 0x9b, 0x0a, 0xee, 0xf3,
2197			0x96, 0xa6, 0x9f, 0x8f, 0x9e, 0x8c, 0x90, 0xc9,
2198		};
2199
2200		devRead->misc.uptFeatures |= UPT1_F_RSS;
2201		devRead->misc.numRxQueues = adapter->num_rx_queues;
2202		rssConf->hashType = UPT1_RSS_HASH_TYPE_TCP_IPV4 |
2203				    UPT1_RSS_HASH_TYPE_IPV4 |
2204				    UPT1_RSS_HASH_TYPE_TCP_IPV6 |
2205				    UPT1_RSS_HASH_TYPE_IPV6;
2206		rssConf->hashFunc = UPT1_RSS_HASH_FUNC_TOEPLITZ;
2207		rssConf->hashKeySize = UPT1_RSS_MAX_KEY_SIZE;
2208		rssConf->indTableSize = VMXNET3_RSS_IND_TABLE_SIZE;
2209		memcpy(rssConf->hashKey, rss_key, sizeof(rss_key));
2210
2211		for (i = 0; i < rssConf->indTableSize; i++)
2212			rssConf->indTable[i] = ethtool_rxfh_indir_default(
2213				i, adapter->num_rx_queues);
2214
2215		devRead->rssConfDesc.confVer = 1;
2216		devRead->rssConfDesc.confLen = cpu_to_le32(sizeof(*rssConf));
2217		devRead->rssConfDesc.confPA =
2218			cpu_to_le64(adapter->rss_conf_pa);
2219	}
2220
2221#endif /* VMXNET3_RSS */
2222
2223	/* intr settings */
2224	devRead->intrConf.autoMask = adapter->intr.mask_mode ==
2225				     VMXNET3_IMM_AUTO;
2226	devRead->intrConf.numIntrs = adapter->intr.num_intrs;
2227	for (i = 0; i < adapter->intr.num_intrs; i++)
2228		devRead->intrConf.modLevels[i] = adapter->intr.mod_levels[i];
2229
2230	devRead->intrConf.eventIntrIdx = adapter->intr.event_intr_idx;
2231	devRead->intrConf.intrCtrl |= cpu_to_le32(VMXNET3_IC_DISABLE_ALL);
2232
2233	/* rx filter settings */
2234	devRead->rxFilterConf.rxMode = 0;
2235	vmxnet3_restore_vlan(adapter);
2236	vmxnet3_write_mac_addr(adapter, adapter->netdev->dev_addr);
2237
2238	/* the rest are already zeroed */
2239}
2240
2241
2242int
2243vmxnet3_activate_dev(struct vmxnet3_adapter *adapter)
2244{
2245	int err, i;
2246	u32 ret;
2247	unsigned long flags;
2248
2249	netdev_dbg(adapter->netdev, "%s: skb_buf_size %d, rx_buf_per_pkt %d,"
2250		" ring sizes %u %u %u\n", adapter->netdev->name,
2251		adapter->skb_buf_size, adapter->rx_buf_per_pkt,
2252		adapter->tx_queue[0].tx_ring.size,
2253		adapter->rx_queue[0].rx_ring[0].size,
2254		adapter->rx_queue[0].rx_ring[1].size);
2255
2256	vmxnet3_tq_init_all(adapter);
2257	err = vmxnet3_rq_init_all(adapter);
2258	if (err) {
2259		netdev_err(adapter->netdev,
2260			   "Failed to init rx queue error %d\n", err);
2261		goto rq_err;
2262	}
2263
2264	err = vmxnet3_request_irqs(adapter);
2265	if (err) {
2266		netdev_err(adapter->netdev,
2267			   "Failed to setup irq for error %d\n", err);
2268		goto irq_err;
2269	}
2270
2271	vmxnet3_setup_driver_shared(adapter);
2272
2273	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_DSAL, VMXNET3_GET_ADDR_LO(
2274			       adapter->shared_pa));
2275	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_DSAH, VMXNET3_GET_ADDR_HI(
2276			       adapter->shared_pa));
2277	spin_lock_irqsave(&adapter->cmd_lock, flags);
2278	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
2279			       VMXNET3_CMD_ACTIVATE_DEV);
2280	ret = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD);
2281	spin_unlock_irqrestore(&adapter->cmd_lock, flags);
2282
2283	if (ret != 0) {
2284		netdev_err(adapter->netdev,
2285			   "Failed to activate dev: error %u\n", ret);
2286		err = -EINVAL;
2287		goto activate_err;
2288	}
2289
2290	for (i = 0; i < adapter->num_rx_queues; i++) {
2291		VMXNET3_WRITE_BAR0_REG(adapter,
2292				VMXNET3_REG_RXPROD + i * VMXNET3_REG_ALIGN,
2293				adapter->rx_queue[i].rx_ring[0].next2fill);
2294		VMXNET3_WRITE_BAR0_REG(adapter, (VMXNET3_REG_RXPROD2 +
2295				(i * VMXNET3_REG_ALIGN)),
2296				adapter->rx_queue[i].rx_ring[1].next2fill);
2297	}
2298
2299	/* Apply the rx filter settins last. */
2300	vmxnet3_set_mc(adapter->netdev);
2301
2302	/*
2303	 * Check link state when first activating device. It will start the
2304	 * tx queue if the link is up.
2305	 */
2306	vmxnet3_check_link(adapter, true);
2307	for (i = 0; i < adapter->num_rx_queues; i++)
2308		napi_enable(&adapter->rx_queue[i].napi);
2309	vmxnet3_enable_all_intrs(adapter);
2310	clear_bit(VMXNET3_STATE_BIT_QUIESCED, &adapter->state);
2311	return 0;
2312
2313activate_err:
2314	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_DSAL, 0);
2315	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_DSAH, 0);
2316	vmxnet3_free_irqs(adapter);
2317irq_err:
2318rq_err:
2319	/* free up buffers we allocated */
2320	vmxnet3_rq_cleanup_all(adapter);
2321	return err;
2322}
2323
2324
2325void
2326vmxnet3_reset_dev(struct vmxnet3_adapter *adapter)
2327{
2328	unsigned long flags;
2329	spin_lock_irqsave(&adapter->cmd_lock, flags);
2330	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, VMXNET3_CMD_RESET_DEV);
2331	spin_unlock_irqrestore(&adapter->cmd_lock, flags);
2332}
2333
2334
2335int
2336vmxnet3_quiesce_dev(struct vmxnet3_adapter *adapter)
2337{
2338	int i;
2339	unsigned long flags;
2340	if (test_and_set_bit(VMXNET3_STATE_BIT_QUIESCED, &adapter->state))
2341		return 0;
2342
2343
2344	spin_lock_irqsave(&adapter->cmd_lock, flags);
2345	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
2346			       VMXNET3_CMD_QUIESCE_DEV);
2347	spin_unlock_irqrestore(&adapter->cmd_lock, flags);
2348	vmxnet3_disable_all_intrs(adapter);
2349
2350	for (i = 0; i < adapter->num_rx_queues; i++)
2351		napi_disable(&adapter->rx_queue[i].napi);
2352	netif_tx_disable(adapter->netdev);
2353	adapter->link_speed = 0;
2354	netif_carrier_off(adapter->netdev);
2355
2356	vmxnet3_tq_cleanup_all(adapter);
2357	vmxnet3_rq_cleanup_all(adapter);
2358	vmxnet3_free_irqs(adapter);
2359	return 0;
2360}
2361
2362
2363static void
2364vmxnet3_write_mac_addr(struct vmxnet3_adapter *adapter, u8 *mac)
2365{
2366	u32 tmp;
2367
2368	tmp = *(u32 *)mac;
2369	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_MACL, tmp);
2370
2371	tmp = (mac[5] << 8) | mac[4];
2372	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_MACH, tmp);
2373}
2374
2375
2376static int
2377vmxnet3_set_mac_addr(struct net_device *netdev, void *p)
2378{
2379	struct sockaddr *addr = p;
2380	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
2381
2382	memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
2383	vmxnet3_write_mac_addr(adapter, addr->sa_data);
2384
2385	return 0;
2386}
2387
2388
2389/* ==================== initialization and cleanup routines ============ */
2390
2391static int
2392vmxnet3_alloc_pci_resources(struct vmxnet3_adapter *adapter, bool *dma64)
2393{
2394	int err;
2395	unsigned long mmio_start, mmio_len;
2396	struct pci_dev *pdev = adapter->pdev;
2397
2398	err = pci_enable_device(pdev);
2399	if (err) {
2400		dev_err(&pdev->dev, "Failed to enable adapter: error %d\n", err);
2401		return err;
2402	}
2403
2404	if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64)) == 0) {
2405		if (pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)) != 0) {
2406			dev_err(&pdev->dev,
2407				"pci_set_consistent_dma_mask failed\n");
2408			err = -EIO;
2409			goto err_set_mask;
2410		}
2411		*dma64 = true;
2412	} else {
2413		if (pci_set_dma_mask(pdev, DMA_BIT_MASK(32)) != 0) {
2414			dev_err(&pdev->dev,
2415				"pci_set_dma_mask failed\n");
2416			err = -EIO;
2417			goto err_set_mask;
2418		}
2419		*dma64 = false;
2420	}
2421
2422	err = pci_request_selected_regions(pdev, (1 << 2) - 1,
2423					   vmxnet3_driver_name);
2424	if (err) {
2425		dev_err(&pdev->dev,
2426			"Failed to request region for adapter: error %d\n", err);
2427		goto err_set_mask;
2428	}
2429
2430	pci_set_master(pdev);
2431
2432	mmio_start = pci_resource_start(pdev, 0);
2433	mmio_len = pci_resource_len(pdev, 0);
2434	adapter->hw_addr0 = ioremap(mmio_start, mmio_len);
2435	if (!adapter->hw_addr0) {
2436		dev_err(&pdev->dev, "Failed to map bar0\n");
2437		err = -EIO;
2438		goto err_ioremap;
2439	}
2440
2441	mmio_start = pci_resource_start(pdev, 1);
2442	mmio_len = pci_resource_len(pdev, 1);
2443	adapter->hw_addr1 = ioremap(mmio_start, mmio_len);
2444	if (!adapter->hw_addr1) {
2445		dev_err(&pdev->dev, "Failed to map bar1\n");
2446		err = -EIO;
2447		goto err_bar1;
2448	}
2449	return 0;
2450
2451err_bar1:
2452	iounmap(adapter->hw_addr0);
2453err_ioremap:
2454	pci_release_selected_regions(pdev, (1 << 2) - 1);
2455err_set_mask:
2456	pci_disable_device(pdev);
2457	return err;
2458}
2459
2460
2461static void
2462vmxnet3_free_pci_resources(struct vmxnet3_adapter *adapter)
2463{
2464	BUG_ON(!adapter->pdev);
2465
2466	iounmap(adapter->hw_addr0);
2467	iounmap(adapter->hw_addr1);
2468	pci_release_selected_regions(adapter->pdev, (1 << 2) - 1);
2469	pci_disable_device(adapter->pdev);
2470}
2471
2472
2473static void
2474vmxnet3_adjust_rx_ring_size(struct vmxnet3_adapter *adapter)
2475{
2476	size_t sz, i, ring0_size, ring1_size, comp_size;
2477	struct vmxnet3_rx_queue	*rq = &adapter->rx_queue[0];
2478
2479
2480	if (adapter->netdev->mtu <= VMXNET3_MAX_SKB_BUF_SIZE -
2481				    VMXNET3_MAX_ETH_HDR_SIZE) {
2482		adapter->skb_buf_size = adapter->netdev->mtu +
2483					VMXNET3_MAX_ETH_HDR_SIZE;
2484		if (adapter->skb_buf_size < VMXNET3_MIN_T0_BUF_SIZE)
2485			adapter->skb_buf_size = VMXNET3_MIN_T0_BUF_SIZE;
2486
2487		adapter->rx_buf_per_pkt = 1;
2488	} else {
2489		adapter->skb_buf_size = VMXNET3_MAX_SKB_BUF_SIZE;
2490		sz = adapter->netdev->mtu - VMXNET3_MAX_SKB_BUF_SIZE +
2491					    VMXNET3_MAX_ETH_HDR_SIZE;
2492		adapter->rx_buf_per_pkt = 1 + (sz + PAGE_SIZE - 1) / PAGE_SIZE;
2493	}
2494
2495	/*
2496	 * for simplicity, force the ring0 size to be a multiple of
2497	 * rx_buf_per_pkt * VMXNET3_RING_SIZE_ALIGN
2498	 */
2499	sz = adapter->rx_buf_per_pkt * VMXNET3_RING_SIZE_ALIGN;
2500	ring0_size = adapter->rx_queue[0].rx_ring[0].size;
2501	ring0_size = (ring0_size + sz - 1) / sz * sz;
2502	ring0_size = min_t(u32, ring0_size, VMXNET3_RX_RING_MAX_SIZE /
2503			   sz * sz);
2504	ring1_size = adapter->rx_queue[0].rx_ring[1].size;
2505	comp_size = ring0_size + ring1_size;
2506
2507	for (i = 0; i < adapter->num_rx_queues; i++) {
2508		rq = &adapter->rx_queue[i];
2509		rq->rx_ring[0].size = ring0_size;
2510		rq->rx_ring[1].size = ring1_size;
2511		rq->comp_ring.size = comp_size;
2512	}
2513}
2514
2515
2516int
2517vmxnet3_create_queues(struct vmxnet3_adapter *adapter, u32 tx_ring_size,
2518		      u32 rx_ring_size, u32 rx_ring2_size)
2519{
2520	int err = 0, i;
2521
2522	for (i = 0; i < adapter->num_tx_queues; i++) {
2523		struct vmxnet3_tx_queue	*tq = &adapter->tx_queue[i];
2524		tq->tx_ring.size   = tx_ring_size;
2525		tq->data_ring.size = tx_ring_size;
2526		tq->comp_ring.size = tx_ring_size;
2527		tq->shared = &adapter->tqd_start[i].ctrl;
2528		tq->stopped = true;
2529		tq->adapter = adapter;
2530		tq->qid = i;
2531		err = vmxnet3_tq_create(tq, adapter);
2532		/*
2533		 * Too late to change num_tx_queues. We cannot do away with
2534		 * lesser number of queues than what we asked for
2535		 */
2536		if (err)
2537			goto queue_err;
2538	}
2539
2540	adapter->rx_queue[0].rx_ring[0].size = rx_ring_size;
2541	adapter->rx_queue[0].rx_ring[1].size = rx_ring2_size;
2542	vmxnet3_adjust_rx_ring_size(adapter);
2543	for (i = 0; i < adapter->num_rx_queues; i++) {
2544		struct vmxnet3_rx_queue *rq = &adapter->rx_queue[i];
2545		/* qid and qid2 for rx queues will be assigned later when num
2546		 * of rx queues is finalized after allocating intrs */
2547		rq->shared = &adapter->rqd_start[i].ctrl;
2548		rq->adapter = adapter;
2549		err = vmxnet3_rq_create(rq, adapter);
2550		if (err) {
2551			if (i == 0) {
2552				netdev_err(adapter->netdev,
2553					   "Could not allocate any rx queues. "
2554					   "Aborting.\n");
2555				goto queue_err;
2556			} else {
2557				netdev_info(adapter->netdev,
2558					    "Number of rx queues changed "
2559					    "to : %d.\n", i);
2560				adapter->num_rx_queues = i;
2561				err = 0;
2562				break;
2563			}
2564		}
2565	}
2566	return err;
2567queue_err:
2568	vmxnet3_tq_destroy_all(adapter);
2569	return err;
2570}
2571
2572static int
2573vmxnet3_open(struct net_device *netdev)
2574{
2575	struct vmxnet3_adapter *adapter;
2576	int err, i;
2577
2578	adapter = netdev_priv(netdev);
2579
2580	for (i = 0; i < adapter->num_tx_queues; i++)
2581		spin_lock_init(&adapter->tx_queue[i].tx_lock);
2582
2583	err = vmxnet3_create_queues(adapter, VMXNET3_DEF_TX_RING_SIZE,
2584				    VMXNET3_DEF_RX_RING_SIZE,
2585				    VMXNET3_DEF_RX_RING_SIZE);
2586	if (err)
2587		goto queue_err;
2588
2589	err = vmxnet3_activate_dev(adapter);
2590	if (err)
2591		goto activate_err;
2592
2593	return 0;
2594
2595activate_err:
2596	vmxnet3_rq_destroy_all(adapter);
2597	vmxnet3_tq_destroy_all(adapter);
2598queue_err:
2599	return err;
2600}
2601
2602
2603static int
2604vmxnet3_close(struct net_device *netdev)
2605{
2606	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
2607
2608	/*
2609	 * Reset_work may be in the middle of resetting the device, wait for its
2610	 * completion.
2611	 */
2612	while (test_and_set_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state))
2613		msleep(1);
2614
2615	vmxnet3_quiesce_dev(adapter);
2616
2617	vmxnet3_rq_destroy_all(adapter);
2618	vmxnet3_tq_destroy_all(adapter);
2619
2620	clear_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state);
2621
2622
2623	return 0;
2624}
2625
2626
2627void
2628vmxnet3_force_close(struct vmxnet3_adapter *adapter)
2629{
2630	int i;
2631
2632	/*
2633	 * we must clear VMXNET3_STATE_BIT_RESETTING, otherwise
2634	 * vmxnet3_close() will deadlock.
2635	 */
2636	BUG_ON(test_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state));
2637
2638	/* we need to enable NAPI, otherwise dev_close will deadlock */
2639	for (i = 0; i < adapter->num_rx_queues; i++)
2640		napi_enable(&adapter->rx_queue[i].napi);
2641	dev_close(adapter->netdev);
2642}
2643
2644
2645static int
2646vmxnet3_change_mtu(struct net_device *netdev, int new_mtu)
2647{
2648	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
2649	int err = 0;
2650
2651	if (new_mtu < VMXNET3_MIN_MTU || new_mtu > VMXNET3_MAX_MTU)
2652		return -EINVAL;
2653
2654	netdev->mtu = new_mtu;
2655
2656	/*
2657	 * Reset_work may be in the middle of resetting the device, wait for its
2658	 * completion.
2659	 */
2660	while (test_and_set_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state))
2661		msleep(1);
2662
2663	if (netif_running(netdev)) {
2664		vmxnet3_quiesce_dev(adapter);
2665		vmxnet3_reset_dev(adapter);
2666
2667		/* we need to re-create the rx queue based on the new mtu */
2668		vmxnet3_rq_destroy_all(adapter);
2669		vmxnet3_adjust_rx_ring_size(adapter);
2670		err = vmxnet3_rq_create_all(adapter);
2671		if (err) {
2672			netdev_err(netdev,
2673				   "failed to re-create rx queues, "
2674				   " error %d. Closing it.\n", err);
2675			goto out;
2676		}
2677
2678		err = vmxnet3_activate_dev(adapter);
2679		if (err) {
2680			netdev_err(netdev,
2681				   "failed to re-activate, error %d. "
2682				   "Closing it\n", err);
2683			goto out;
2684		}
2685	}
2686
2687out:
2688	clear_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state);
2689	if (err)
2690		vmxnet3_force_close(adapter);
2691
2692	return err;
2693}
2694
2695
2696static void
2697vmxnet3_declare_features(struct vmxnet3_adapter *adapter, bool dma64)
2698{
2699	struct net_device *netdev = adapter->netdev;
2700
2701	netdev->hw_features = NETIF_F_SG | NETIF_F_RXCSUM |
2702		NETIF_F_HW_CSUM | NETIF_F_HW_VLAN_CTAG_TX |
2703		NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_TSO | NETIF_F_TSO6 |
2704		NETIF_F_LRO;
2705	if (dma64)
2706		netdev->hw_features |= NETIF_F_HIGHDMA;
2707	netdev->vlan_features = netdev->hw_features &
2708				~(NETIF_F_HW_VLAN_CTAG_TX |
2709				  NETIF_F_HW_VLAN_CTAG_RX);
2710	netdev->features = netdev->hw_features | NETIF_F_HW_VLAN_CTAG_FILTER;
2711}
2712
2713
2714static void
2715vmxnet3_read_mac_addr(struct vmxnet3_adapter *adapter, u8 *mac)
2716{
2717	u32 tmp;
2718
2719	tmp = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_MACL);
2720	*(u32 *)mac = tmp;
2721
2722	tmp = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_MACH);
2723	mac[4] = tmp & 0xff;
2724	mac[5] = (tmp >> 8) & 0xff;
2725}
2726
2727#ifdef CONFIG_PCI_MSI
2728
2729/*
2730 * Enable MSIx vectors.
2731 * Returns :
2732 *	VMXNET3_LINUX_MIN_MSIX_VECT when only minimum number of vectors required
2733 *	 were enabled.
2734 *	number of vectors which were enabled otherwise (this number is greater
2735 *	 than VMXNET3_LINUX_MIN_MSIX_VECT)
2736 */
2737
2738static int
2739vmxnet3_acquire_msix_vectors(struct vmxnet3_adapter *adapter, int nvec)
2740{
2741	do {
2742		int err = pci_enable_msix(adapter->pdev,
2743					  adapter->intr.msix_entries, nvec);
2744		if (!err) {
2745			return nvec;
2746		} else if (err < 0) {
2747			dev_err(&adapter->netdev->dev,
2748				"Failed to enable MSI-X, error: %d\n", err);
2749			return err;
2750		} else if (err < VMXNET3_LINUX_MIN_MSIX_VECT) {
2751			dev_info(&adapter->pdev->dev,
2752				 "Number of MSI-X which can be allocated "
2753				 "is lower than min threshold required.\n");
2754			return -ENOSPC;
2755		} else {
2756			/* If fails to enable required number of MSI-x vectors
2757			 * try enabling minimum number of vectors required.
2758			 */
2759			dev_err(&adapter->netdev->dev,
2760				"Failed to enable %d MSI-X, trying %d\n",
2761				nvec, VMXNET3_LINUX_MIN_MSIX_VECT);
2762			nvec = VMXNET3_LINUX_MIN_MSIX_VECT;
2763		}
2764	} while (nvec >= VMXNET3_LINUX_MIN_MSIX_VECT);
2765
2766	/*
2767	 * Should never get here
2768	 */
2769	return -ENOSPC;
2770}
2771
2772
2773#endif /* CONFIG_PCI_MSI */
2774
2775static void
2776vmxnet3_alloc_intr_resources(struct vmxnet3_adapter *adapter)
2777{
2778	u32 cfg;
2779	unsigned long flags;
2780
2781	/* intr settings */
2782	spin_lock_irqsave(&adapter->cmd_lock, flags);
2783	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
2784			       VMXNET3_CMD_GET_CONF_INTR);
2785	cfg = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD);
2786	spin_unlock_irqrestore(&adapter->cmd_lock, flags);
2787	adapter->intr.type = cfg & 0x3;
2788	adapter->intr.mask_mode = (cfg >> 2) & 0x3;
2789
2790	if (adapter->intr.type == VMXNET3_IT_AUTO) {
2791		adapter->intr.type = VMXNET3_IT_MSIX;
2792	}
2793
2794#ifdef CONFIG_PCI_MSI
2795	if (adapter->intr.type == VMXNET3_IT_MSIX) {
2796		int i, nvec;
2797
2798		nvec  = adapter->share_intr == VMXNET3_INTR_TXSHARE ?
2799			1 : adapter->num_tx_queues;
2800		nvec += adapter->share_intr == VMXNET3_INTR_BUDDYSHARE ?
2801			0 : adapter->num_rx_queues;
2802		nvec += 1;	/* for link event */
2803		nvec = nvec > VMXNET3_LINUX_MIN_MSIX_VECT ?
2804		       nvec : VMXNET3_LINUX_MIN_MSIX_VECT;
2805
2806		for (i = 0; i < nvec; i++)
2807			adapter->intr.msix_entries[i].entry = i;
2808
2809		nvec = vmxnet3_acquire_msix_vectors(adapter, nvec);
2810		if (nvec < 0)
2811			goto msix_err;
2812
2813		/* If we cannot allocate one MSIx vector per queue
2814		 * then limit the number of rx queues to 1
2815		 */
2816		if (nvec == VMXNET3_LINUX_MIN_MSIX_VECT) {
2817			if (adapter->share_intr != VMXNET3_INTR_BUDDYSHARE
2818			    || adapter->num_rx_queues != 1) {
2819				adapter->share_intr = VMXNET3_INTR_TXSHARE;
2820				netdev_err(adapter->netdev,
2821					   "Number of rx queues : 1\n");
2822				adapter->num_rx_queues = 1;
2823			}
2824		}
2825
2826		adapter->intr.num_intrs = nvec;
2827		return;
2828
2829msix_err:
2830		/* If we cannot allocate MSIx vectors use only one rx queue */
2831		dev_info(&adapter->pdev->dev,
2832			 "Failed to enable MSI-X, error %d. "
2833			 "Limiting #rx queues to 1, try MSI.\n", nvec);
2834
2835		adapter->intr.type = VMXNET3_IT_MSI;
2836	}
2837
2838	if (adapter->intr.type == VMXNET3_IT_MSI) {
2839		if (!pci_enable_msi(adapter->pdev)) {
2840			adapter->num_rx_queues = 1;
2841			adapter->intr.num_intrs = 1;
2842			return;
2843		}
2844	}
2845#endif /* CONFIG_PCI_MSI */
2846
2847	adapter->num_rx_queues = 1;
2848	dev_info(&adapter->netdev->dev,
2849		 "Using INTx interrupt, #Rx queues: 1.\n");
2850	adapter->intr.type = VMXNET3_IT_INTX;
2851
2852	/* INT-X related setting */
2853	adapter->intr.num_intrs = 1;
2854}
2855
2856
2857static void
2858vmxnet3_free_intr_resources(struct vmxnet3_adapter *adapter)
2859{
2860	if (adapter->intr.type == VMXNET3_IT_MSIX)
2861		pci_disable_msix(adapter->pdev);
2862	else if (adapter->intr.type == VMXNET3_IT_MSI)
2863		pci_disable_msi(adapter->pdev);
2864	else
2865		BUG_ON(adapter->intr.type != VMXNET3_IT_INTX);
2866}
2867
2868
2869static void
2870vmxnet3_tx_timeout(struct net_device *netdev)
2871{
2872	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
2873	adapter->tx_timeout_count++;
2874
2875	netdev_err(adapter->netdev, "tx hang\n");
2876	schedule_work(&adapter->work);
2877	netif_wake_queue(adapter->netdev);
2878}
2879
2880
2881static void
2882vmxnet3_reset_work(struct work_struct *data)
2883{
2884	struct vmxnet3_adapter *adapter;
2885
2886	adapter = container_of(data, struct vmxnet3_adapter, work);
2887
2888	/* if another thread is resetting the device, no need to proceed */
2889	if (test_and_set_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state))
2890		return;
2891
2892	/* if the device is closed, we must leave it alone */
2893	rtnl_lock();
2894	if (netif_running(adapter->netdev)) {
2895		netdev_notice(adapter->netdev, "resetting\n");
2896		vmxnet3_quiesce_dev(adapter);
2897		vmxnet3_reset_dev(adapter);
2898		vmxnet3_activate_dev(adapter);
2899	} else {
2900		netdev_info(adapter->netdev, "already closed\n");
2901	}
2902	rtnl_unlock();
2903
2904	clear_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state);
2905}
2906
2907
2908static int
2909vmxnet3_probe_device(struct pci_dev *pdev,
2910		     const struct pci_device_id *id)
2911{
2912	static const struct net_device_ops vmxnet3_netdev_ops = {
2913		.ndo_open = vmxnet3_open,
2914		.ndo_stop = vmxnet3_close,
2915		.ndo_start_xmit = vmxnet3_xmit_frame,
2916		.ndo_set_mac_address = vmxnet3_set_mac_addr,
2917		.ndo_change_mtu = vmxnet3_change_mtu,
2918		.ndo_set_features = vmxnet3_set_features,
2919		.ndo_get_stats64 = vmxnet3_get_stats64,
2920		.ndo_tx_timeout = vmxnet3_tx_timeout,
2921		.ndo_set_rx_mode = vmxnet3_set_mc,
2922		.ndo_vlan_rx_add_vid = vmxnet3_vlan_rx_add_vid,
2923		.ndo_vlan_rx_kill_vid = vmxnet3_vlan_rx_kill_vid,
2924#ifdef CONFIG_NET_POLL_CONTROLLER
2925		.ndo_poll_controller = vmxnet3_netpoll,
2926#endif
2927	};
2928	int err;
2929	bool dma64 = false; /* stupid gcc */
2930	u32 ver;
2931	struct net_device *netdev;
2932	struct vmxnet3_adapter *adapter;
2933	u8 mac[ETH_ALEN];
2934	int size;
2935	int num_tx_queues;
2936	int num_rx_queues;
2937
2938	if (!pci_msi_enabled())
2939		enable_mq = 0;
2940
2941#ifdef VMXNET3_RSS
2942	if (enable_mq)
2943		num_rx_queues = min(VMXNET3_DEVICE_MAX_RX_QUEUES,
2944				    (int)num_online_cpus());
2945	else
2946#endif
2947		num_rx_queues = 1;
2948	num_rx_queues = rounddown_pow_of_two(num_rx_queues);
2949
2950	if (enable_mq)
2951		num_tx_queues = min(VMXNET3_DEVICE_MAX_TX_QUEUES,
2952				    (int)num_online_cpus());
2953	else
2954		num_tx_queues = 1;
2955
2956	num_tx_queues = rounddown_pow_of_two(num_tx_queues);
2957	netdev = alloc_etherdev_mq(sizeof(struct vmxnet3_adapter),
2958				   max(num_tx_queues, num_rx_queues));
2959	dev_info(&pdev->dev,
2960		 "# of Tx queues : %d, # of Rx queues : %d\n",
2961		 num_tx_queues, num_rx_queues);
2962
2963	if (!netdev)
2964		return -ENOMEM;
2965
2966	pci_set_drvdata(pdev, netdev);
2967	adapter = netdev_priv(netdev);
2968	adapter->netdev = netdev;
2969	adapter->pdev = pdev;
2970
2971	spin_lock_init(&adapter->cmd_lock);
2972	adapter->adapter_pa = dma_map_single(&adapter->pdev->dev, adapter,
2973					     sizeof(struct vmxnet3_adapter),
2974					     PCI_DMA_TODEVICE);
2975	adapter->shared = dma_alloc_coherent(
2976				&adapter->pdev->dev,
2977				sizeof(struct Vmxnet3_DriverShared),
2978				&adapter->shared_pa, GFP_KERNEL);
2979	if (!adapter->shared) {
2980		dev_err(&pdev->dev, "Failed to allocate memory\n");
2981		err = -ENOMEM;
2982		goto err_alloc_shared;
2983	}
2984
2985	adapter->num_rx_queues = num_rx_queues;
2986	adapter->num_tx_queues = num_tx_queues;
2987	adapter->rx_buf_per_pkt = 1;
2988
2989	size = sizeof(struct Vmxnet3_TxQueueDesc) * adapter->num_tx_queues;
2990	size += sizeof(struct Vmxnet3_RxQueueDesc) * adapter->num_rx_queues;
2991	adapter->tqd_start = dma_alloc_coherent(&adapter->pdev->dev, size,
2992						&adapter->queue_desc_pa,
2993						GFP_KERNEL);
2994
2995	if (!adapter->tqd_start) {
2996		dev_err(&pdev->dev, "Failed to allocate memory\n");
2997		err = -ENOMEM;
2998		goto err_alloc_queue_desc;
2999	}
3000	adapter->rqd_start = (struct Vmxnet3_RxQueueDesc *)(adapter->tqd_start +
3001							    adapter->num_tx_queues);
3002
3003	adapter->pm_conf = dma_alloc_coherent(&adapter->pdev->dev,
3004					      sizeof(struct Vmxnet3_PMConf),
3005					      &adapter->pm_conf_pa,
3006					      GFP_KERNEL);
3007	if (adapter->pm_conf == NULL) {
3008		err = -ENOMEM;
3009		goto err_alloc_pm;
3010	}
3011
3012#ifdef VMXNET3_RSS
3013
3014	adapter->rss_conf = dma_alloc_coherent(&adapter->pdev->dev,
3015					       sizeof(struct UPT1_RSSConf),
3016					       &adapter->rss_conf_pa,
3017					       GFP_KERNEL);
3018	if (adapter->rss_conf == NULL) {
3019		err = -ENOMEM;
3020		goto err_alloc_rss;
3021	}
3022#endif /* VMXNET3_RSS */
3023
3024	err = vmxnet3_alloc_pci_resources(adapter, &dma64);
3025	if (err < 0)
3026		goto err_alloc_pci;
3027
3028	ver = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_VRRS);
3029	if (ver & 1) {
3030		VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_VRRS, 1);
3031	} else {
3032		dev_err(&pdev->dev,
3033			"Incompatible h/w version (0x%x) for adapter\n", ver);
3034		err = -EBUSY;
3035		goto err_ver;
3036	}
3037
3038	ver = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_UVRS);
3039	if (ver & 1) {
3040		VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_UVRS, 1);
3041	} else {
3042		dev_err(&pdev->dev,
3043			"Incompatible upt version (0x%x) for adapter\n", ver);
3044		err = -EBUSY;
3045		goto err_ver;
3046	}
3047
3048	SET_NETDEV_DEV(netdev, &pdev->dev);
3049	vmxnet3_declare_features(adapter, dma64);
3050
3051	if (adapter->num_tx_queues == adapter->num_rx_queues)
3052		adapter->share_intr = VMXNET3_INTR_BUDDYSHARE;
3053	else
3054		adapter->share_intr = VMXNET3_INTR_DONTSHARE;
3055
3056	vmxnet3_alloc_intr_resources(adapter);
3057
3058#ifdef VMXNET3_RSS
3059	if (adapter->num_rx_queues > 1 &&
3060	    adapter->intr.type == VMXNET3_IT_MSIX) {
3061		adapter->rss = true;
3062		netdev->hw_features |= NETIF_F_RXHASH;
3063		netdev->features |= NETIF_F_RXHASH;
3064		dev_dbg(&pdev->dev, "RSS is enabled.\n");
3065	} else {
3066		adapter->rss = false;
3067	}
3068#endif
3069
3070	vmxnet3_read_mac_addr(adapter, mac);
3071	memcpy(netdev->dev_addr,  mac, netdev->addr_len);
3072
3073	netdev->netdev_ops = &vmxnet3_netdev_ops;
3074	vmxnet3_set_ethtool_ops(netdev);
3075	netdev->watchdog_timeo = 5 * HZ;
3076
3077	INIT_WORK(&adapter->work, vmxnet3_reset_work);
3078	set_bit(VMXNET3_STATE_BIT_QUIESCED, &adapter->state);
3079
3080	if (adapter->intr.type == VMXNET3_IT_MSIX) {
3081		int i;
3082		for (i = 0; i < adapter->num_rx_queues; i++) {
3083			netif_napi_add(adapter->netdev,
3084				       &adapter->rx_queue[i].napi,
3085				       vmxnet3_poll_rx_only, 64);
3086		}
3087	} else {
3088		netif_napi_add(adapter->netdev, &adapter->rx_queue[0].napi,
3089			       vmxnet3_poll, 64);
3090	}
3091
3092	netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues);
3093	netif_set_real_num_rx_queues(adapter->netdev, adapter->num_rx_queues);
3094
3095	netif_carrier_off(netdev);
3096	err = register_netdev(netdev);
3097
3098	if (err) {
3099		dev_err(&pdev->dev, "Failed to register adapter\n");
3100		goto err_register;
3101	}
3102
3103	vmxnet3_check_link(adapter, false);
3104	return 0;
3105
3106err_register:
3107	vmxnet3_free_intr_resources(adapter);
3108err_ver:
3109	vmxnet3_free_pci_resources(adapter);
3110err_alloc_pci:
3111#ifdef VMXNET3_RSS
3112	dma_free_coherent(&adapter->pdev->dev, sizeof(struct UPT1_RSSConf),
3113			  adapter->rss_conf, adapter->rss_conf_pa);
3114err_alloc_rss:
3115#endif
3116	dma_free_coherent(&adapter->pdev->dev, sizeof(struct Vmxnet3_PMConf),
3117			  adapter->pm_conf, adapter->pm_conf_pa);
3118err_alloc_pm:
3119	dma_free_coherent(&adapter->pdev->dev, size, adapter->tqd_start,
3120			  adapter->queue_desc_pa);
3121err_alloc_queue_desc:
3122	dma_free_coherent(&adapter->pdev->dev,
3123			  sizeof(struct Vmxnet3_DriverShared),
3124			  adapter->shared, adapter->shared_pa);
3125err_alloc_shared:
3126	dma_unmap_single(&adapter->pdev->dev, adapter->adapter_pa,
3127			 sizeof(struct vmxnet3_adapter), PCI_DMA_TODEVICE);
3128	free_netdev(netdev);
3129	return err;
3130}
3131
3132
3133static void
3134vmxnet3_remove_device(struct pci_dev *pdev)
3135{
3136	struct net_device *netdev = pci_get_drvdata(pdev);
3137	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
3138	int size = 0;
3139	int num_rx_queues;
3140
3141#ifdef VMXNET3_RSS
3142	if (enable_mq)
3143		num_rx_queues = min(VMXNET3_DEVICE_MAX_RX_QUEUES,
3144				    (int)num_online_cpus());
3145	else
3146#endif
3147		num_rx_queues = 1;
3148	num_rx_queues = rounddown_pow_of_two(num_rx_queues);
3149
3150	cancel_work_sync(&adapter->work);
3151
3152	unregister_netdev(netdev);
3153
3154	vmxnet3_free_intr_resources(adapter);
3155	vmxnet3_free_pci_resources(adapter);
3156#ifdef VMXNET3_RSS
3157	dma_free_coherent(&adapter->pdev->dev, sizeof(struct UPT1_RSSConf),
3158			  adapter->rss_conf, adapter->rss_conf_pa);
3159#endif
3160	dma_free_coherent(&adapter->pdev->dev, sizeof(struct Vmxnet3_PMConf),
3161			  adapter->pm_conf, adapter->pm_conf_pa);
3162
3163	size = sizeof(struct Vmxnet3_TxQueueDesc) * adapter->num_tx_queues;
3164	size += sizeof(struct Vmxnet3_RxQueueDesc) * num_rx_queues;
3165	dma_free_coherent(&adapter->pdev->dev, size, adapter->tqd_start,
3166			  adapter->queue_desc_pa);
3167	dma_free_coherent(&adapter->pdev->dev,
3168			  sizeof(struct Vmxnet3_DriverShared),
3169			  adapter->shared, adapter->shared_pa);
3170	dma_unmap_single(&adapter->pdev->dev, adapter->adapter_pa,
3171			 sizeof(struct vmxnet3_adapter), PCI_DMA_TODEVICE);
3172	free_netdev(netdev);
3173}
3174
3175
3176#ifdef CONFIG_PM
3177
3178static int
3179vmxnet3_suspend(struct device *device)
3180{
3181	struct pci_dev *pdev = to_pci_dev(device);
3182	struct net_device *netdev = pci_get_drvdata(pdev);
3183	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
3184	struct Vmxnet3_PMConf *pmConf;
3185	struct ethhdr *ehdr;
3186	struct arphdr *ahdr;
3187	u8 *arpreq;
3188	struct in_device *in_dev;
3189	struct in_ifaddr *ifa;
3190	unsigned long flags;
3191	int i = 0;
3192
3193	if (!netif_running(netdev))
3194		return 0;
3195
3196	for (i = 0; i < adapter->num_rx_queues; i++)
3197		napi_disable(&adapter->rx_queue[i].napi);
3198
3199	vmxnet3_disable_all_intrs(adapter);
3200	vmxnet3_free_irqs(adapter);
3201	vmxnet3_free_intr_resources(adapter);
3202
3203	netif_device_detach(netdev);
3204	netif_tx_stop_all_queues(netdev);
3205
3206	/* Create wake-up filters. */
3207	pmConf = adapter->pm_conf;
3208	memset(pmConf, 0, sizeof(*pmConf));
3209
3210	if (adapter->wol & WAKE_UCAST) {
3211		pmConf->filters[i].patternSize = ETH_ALEN;
3212		pmConf->filters[i].maskSize = 1;
3213		memcpy(pmConf->filters[i].pattern, netdev->dev_addr, ETH_ALEN);
3214		pmConf->filters[i].mask[0] = 0x3F; /* LSB ETH_ALEN bits */
3215
3216		pmConf->wakeUpEvents |= VMXNET3_PM_WAKEUP_FILTER;
3217		i++;
3218	}
3219
3220	if (adapter->wol & WAKE_ARP) {
3221		in_dev = in_dev_get(netdev);
3222		if (!in_dev)
3223			goto skip_arp;
3224
3225		ifa = (struct in_ifaddr *)in_dev->ifa_list;
3226		if (!ifa)
3227			goto skip_arp;
3228
3229		pmConf->filters[i].patternSize = ETH_HLEN + /* Ethernet header*/
3230			sizeof(struct arphdr) +		/* ARP header */
3231			2 * ETH_ALEN +		/* 2 Ethernet addresses*/
3232			2 * sizeof(u32);	/*2 IPv4 addresses */
3233		pmConf->filters[i].maskSize =
3234			(pmConf->filters[i].patternSize - 1) / 8 + 1;
3235
3236		/* ETH_P_ARP in Ethernet header. */
3237		ehdr = (struct ethhdr *)pmConf->filters[i].pattern;
3238		ehdr->h_proto = htons(ETH_P_ARP);
3239
3240		/* ARPOP_REQUEST in ARP header. */
3241		ahdr = (struct arphdr *)&pmConf->filters[i].pattern[ETH_HLEN];
3242		ahdr->ar_op = htons(ARPOP_REQUEST);
3243		arpreq = (u8 *)(ahdr + 1);
3244
3245		/* The Unicast IPv4 address in 'tip' field. */
3246		arpreq += 2 * ETH_ALEN + sizeof(u32);
3247		*(u32 *)arpreq = ifa->ifa_address;
3248
3249		/* The mask for the relevant bits. */
3250		pmConf->filters[i].mask[0] = 0x00;
3251		pmConf->filters[i].mask[1] = 0x30; /* ETH_P_ARP */
3252		pmConf->filters[i].mask[2] = 0x30; /* ARPOP_REQUEST */
3253		pmConf->filters[i].mask[3] = 0x00;
3254		pmConf->filters[i].mask[4] = 0xC0; /* IPv4 TIP */
3255		pmConf->filters[i].mask[5] = 0x03; /* IPv4 TIP */
3256		in_dev_put(in_dev);
3257
3258		pmConf->wakeUpEvents |= VMXNET3_PM_WAKEUP_FILTER;
3259		i++;
3260	}
3261
3262skip_arp:
3263	if (adapter->wol & WAKE_MAGIC)
3264		pmConf->wakeUpEvents |= VMXNET3_PM_WAKEUP_MAGIC;
3265
3266	pmConf->numFilters = i;
3267
3268	adapter->shared->devRead.pmConfDesc.confVer = cpu_to_le32(1);
3269	adapter->shared->devRead.pmConfDesc.confLen = cpu_to_le32(sizeof(
3270								  *pmConf));
3271	adapter->shared->devRead.pmConfDesc.confPA =
3272		cpu_to_le64(adapter->pm_conf_pa);
3273
3274	spin_lock_irqsave(&adapter->cmd_lock, flags);
3275	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
3276			       VMXNET3_CMD_UPDATE_PMCFG);
3277	spin_unlock_irqrestore(&adapter->cmd_lock, flags);
3278
3279	pci_save_state(pdev);
3280	pci_enable_wake(pdev, pci_choose_state(pdev, PMSG_SUSPEND),
3281			adapter->wol);
3282	pci_disable_device(pdev);
3283	pci_set_power_state(pdev, pci_choose_state(pdev, PMSG_SUSPEND));
3284
3285	return 0;
3286}
3287
3288
3289static int
3290vmxnet3_resume(struct device *device)
3291{
3292	int err, i = 0;
3293	unsigned long flags;
3294	struct pci_dev *pdev = to_pci_dev(device);
3295	struct net_device *netdev = pci_get_drvdata(pdev);
3296	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
3297	struct Vmxnet3_PMConf *pmConf;
3298
3299	if (!netif_running(netdev))
3300		return 0;
3301
3302	/* Destroy wake-up filters. */
3303	pmConf = adapter->pm_conf;
3304	memset(pmConf, 0, sizeof(*pmConf));
3305
3306	adapter->shared->devRead.pmConfDesc.confVer = cpu_to_le32(1);
3307	adapter->shared->devRead.pmConfDesc.confLen = cpu_to_le32(sizeof(
3308								  *pmConf));
3309	adapter->shared->devRead.pmConfDesc.confPA =
3310		cpu_to_le64(adapter->pm_conf_pa);
3311
3312	netif_device_attach(netdev);
3313	pci_set_power_state(pdev, PCI_D0);
3314	pci_restore_state(pdev);
3315	err = pci_enable_device_mem(pdev);
3316	if (err != 0)
3317		return err;
3318
3319	pci_enable_wake(pdev, PCI_D0, 0);
3320
3321	spin_lock_irqsave(&adapter->cmd_lock, flags);
3322	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
3323			       VMXNET3_CMD_UPDATE_PMCFG);
3324	spin_unlock_irqrestore(&adapter->cmd_lock, flags);
3325	vmxnet3_alloc_intr_resources(adapter);
3326	vmxnet3_request_irqs(adapter);
3327	for (i = 0; i < adapter->num_rx_queues; i++)
3328		napi_enable(&adapter->rx_queue[i].napi);
3329	vmxnet3_enable_all_intrs(adapter);
3330
3331	return 0;
3332}
3333
3334static const struct dev_pm_ops vmxnet3_pm_ops = {
3335	.suspend = vmxnet3_suspend,
3336	.resume = vmxnet3_resume,
3337};
3338#endif
3339
3340static struct pci_driver vmxnet3_driver = {
3341	.name		= vmxnet3_driver_name,
3342	.id_table	= vmxnet3_pciid_table,
3343	.probe		= vmxnet3_probe_device,
3344	.remove		= vmxnet3_remove_device,
3345#ifdef CONFIG_PM
3346	.driver.pm	= &vmxnet3_pm_ops,
3347#endif
3348};
3349
3350
3351static int __init
3352vmxnet3_init_module(void)
3353{
3354	pr_info("%s - version %s\n", VMXNET3_DRIVER_DESC,
3355		VMXNET3_DRIVER_VERSION_REPORT);
3356	return pci_register_driver(&vmxnet3_driver);
3357}
3358
3359module_init(vmxnet3_init_module);
3360
3361
3362static void
3363vmxnet3_exit_module(void)
3364{
3365	pci_unregister_driver(&vmxnet3_driver);
3366}
3367
3368module_exit(vmxnet3_exit_module);
3369
3370MODULE_AUTHOR("VMware, Inc.");
3371MODULE_DESCRIPTION(VMXNET3_DRIVER_DESC);
3372MODULE_LICENSE("GPL v2");
3373MODULE_VERSION(VMXNET3_DRIVER_VERSION_STRING);
3374