1/*
2 * Linux driver for VMware's vmxnet3 ethernet NIC.
3 *
4 * Copyright (C) 2008-2009, VMware, Inc. All Rights Reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation; version 2 of the License and no later version.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
13 * NON INFRINGEMENT. See the GNU General Public License for more
14 * details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19 *
20 * The full GNU General Public License is included in this distribution in
21 * the file called "COPYING".
22 *
23 * Maintained by: Shreyas Bhatewara <pv-drivers@vmware.com>
24 *
25 */
26
27#include <linux/module.h>
28#include <net/ip6_checksum.h>
29
30#include "vmxnet3_int.h"
31
32char vmxnet3_driver_name[] = "vmxnet3";
33#define VMXNET3_DRIVER_DESC "VMware vmxnet3 virtual NIC driver"
34
35/*
36 * PCI Device ID Table
37 * Last entry must be all 0s
38 */
39static const struct pci_device_id vmxnet3_pciid_table[] = {
40	{PCI_VDEVICE(VMWARE, PCI_DEVICE_ID_VMWARE_VMXNET3)},
41	{0}
42};
43
44MODULE_DEVICE_TABLE(pci, vmxnet3_pciid_table);
45
46static int enable_mq = 1;
47
48static void
49vmxnet3_write_mac_addr(struct vmxnet3_adapter *adapter, u8 *mac);
50
51/*
52 *    Enable/Disable the given intr
53 */
54static void
55vmxnet3_enable_intr(struct vmxnet3_adapter *adapter, unsigned intr_idx)
56{
57	VMXNET3_WRITE_BAR0_REG(adapter, VMXNET3_REG_IMR + intr_idx * 8, 0);
58}
59
60
61static void
62vmxnet3_disable_intr(struct vmxnet3_adapter *adapter, unsigned intr_idx)
63{
64	VMXNET3_WRITE_BAR0_REG(adapter, VMXNET3_REG_IMR + intr_idx * 8, 1);
65}
66
67
68/*
69 *    Enable/Disable all intrs used by the device
70 */
71static void
72vmxnet3_enable_all_intrs(struct vmxnet3_adapter *adapter)
73{
74	int i;
75
76	for (i = 0; i < adapter->intr.num_intrs; i++)
77		vmxnet3_enable_intr(adapter, i);
78	adapter->shared->devRead.intrConf.intrCtrl &=
79					cpu_to_le32(~VMXNET3_IC_DISABLE_ALL);
80}
81
82
83static void
84vmxnet3_disable_all_intrs(struct vmxnet3_adapter *adapter)
85{
86	int i;
87
88	adapter->shared->devRead.intrConf.intrCtrl |=
89					cpu_to_le32(VMXNET3_IC_DISABLE_ALL);
90	for (i = 0; i < adapter->intr.num_intrs; i++)
91		vmxnet3_disable_intr(adapter, i);
92}
93
94
95static void
96vmxnet3_ack_events(struct vmxnet3_adapter *adapter, u32 events)
97{
98	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_ECR, events);
99}
100
101
102static bool
103vmxnet3_tq_stopped(struct vmxnet3_tx_queue *tq, struct vmxnet3_adapter *adapter)
104{
105	return tq->stopped;
106}
107
108
109static void
110vmxnet3_tq_start(struct vmxnet3_tx_queue *tq, struct vmxnet3_adapter *adapter)
111{
112	tq->stopped = false;
113	netif_start_subqueue(adapter->netdev, tq - adapter->tx_queue);
114}
115
116
117static void
118vmxnet3_tq_wake(struct vmxnet3_tx_queue *tq, struct vmxnet3_adapter *adapter)
119{
120	tq->stopped = false;
121	netif_wake_subqueue(adapter->netdev, (tq - adapter->tx_queue));
122}
123
124
125static void
126vmxnet3_tq_stop(struct vmxnet3_tx_queue *tq, struct vmxnet3_adapter *adapter)
127{
128	tq->stopped = true;
129	tq->num_stop++;
130	netif_stop_subqueue(adapter->netdev, (tq - adapter->tx_queue));
131}
132
133
134/*
135 * Check the link state. This may start or stop the tx queue.
136 */
137static void
138vmxnet3_check_link(struct vmxnet3_adapter *adapter, bool affectTxQueue)
139{
140	u32 ret;
141	int i;
142	unsigned long flags;
143
144	spin_lock_irqsave(&adapter->cmd_lock, flags);
145	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, VMXNET3_CMD_GET_LINK);
146	ret = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD);
147	spin_unlock_irqrestore(&adapter->cmd_lock, flags);
148
149	adapter->link_speed = ret >> 16;
150	if (ret & 1) { /* Link is up. */
151		netdev_info(adapter->netdev, "NIC Link is Up %d Mbps\n",
152			    adapter->link_speed);
153		netif_carrier_on(adapter->netdev);
154
155		if (affectTxQueue) {
156			for (i = 0; i < adapter->num_tx_queues; i++)
157				vmxnet3_tq_start(&adapter->tx_queue[i],
158						 adapter);
159		}
160	} else {
161		netdev_info(adapter->netdev, "NIC Link is Down\n");
162		netif_carrier_off(adapter->netdev);
163
164		if (affectTxQueue) {
165			for (i = 0; i < adapter->num_tx_queues; i++)
166				vmxnet3_tq_stop(&adapter->tx_queue[i], adapter);
167		}
168	}
169}
170
171static void
172vmxnet3_process_events(struct vmxnet3_adapter *adapter)
173{
174	int i;
175	unsigned long flags;
176	u32 events = le32_to_cpu(adapter->shared->ecr);
177	if (!events)
178		return;
179
180	vmxnet3_ack_events(adapter, events);
181
182	/* Check if link state has changed */
183	if (events & VMXNET3_ECR_LINK)
184		vmxnet3_check_link(adapter, true);
185
186	/* Check if there is an error on xmit/recv queues */
187	if (events & (VMXNET3_ECR_TQERR | VMXNET3_ECR_RQERR)) {
188		spin_lock_irqsave(&adapter->cmd_lock, flags);
189		VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
190				       VMXNET3_CMD_GET_QUEUE_STATUS);
191		spin_unlock_irqrestore(&adapter->cmd_lock, flags);
192
193		for (i = 0; i < adapter->num_tx_queues; i++)
194			if (adapter->tqd_start[i].status.stopped)
195				dev_err(&adapter->netdev->dev,
196					"%s: tq[%d] error 0x%x\n",
197					adapter->netdev->name, i, le32_to_cpu(
198					adapter->tqd_start[i].status.error));
199		for (i = 0; i < adapter->num_rx_queues; i++)
200			if (adapter->rqd_start[i].status.stopped)
201				dev_err(&adapter->netdev->dev,
202					"%s: rq[%d] error 0x%x\n",
203					adapter->netdev->name, i,
204					adapter->rqd_start[i].status.error);
205
206		schedule_work(&adapter->work);
207	}
208}
209
210#ifdef __BIG_ENDIAN_BITFIELD
211/*
212 * The device expects the bitfields in shared structures to be written in
213 * little endian. When CPU is big endian, the following routines are used to
214 * correctly read and write into ABI.
215 * The general technique used here is : double word bitfields are defined in
216 * opposite order for big endian architecture. Then before reading them in
217 * driver the complete double word is translated using le32_to_cpu. Similarly
218 * After the driver writes into bitfields, cpu_to_le32 is used to translate the
219 * double words into required format.
220 * In order to avoid touching bits in shared structure more than once, temporary
221 * descriptors are used. These are passed as srcDesc to following functions.
222 */
223static void vmxnet3_RxDescToCPU(const struct Vmxnet3_RxDesc *srcDesc,
224				struct Vmxnet3_RxDesc *dstDesc)
225{
226	u32 *src = (u32 *)srcDesc + 2;
227	u32 *dst = (u32 *)dstDesc + 2;
228	dstDesc->addr = le64_to_cpu(srcDesc->addr);
229	*dst = le32_to_cpu(*src);
230	dstDesc->ext1 = le32_to_cpu(srcDesc->ext1);
231}
232
233static void vmxnet3_TxDescToLe(const struct Vmxnet3_TxDesc *srcDesc,
234			       struct Vmxnet3_TxDesc *dstDesc)
235{
236	int i;
237	u32 *src = (u32 *)(srcDesc + 1);
238	u32 *dst = (u32 *)(dstDesc + 1);
239
240	/* Working backwards so that the gen bit is set at the end. */
241	for (i = 2; i > 0; i--) {
242		src--;
243		dst--;
244		*dst = cpu_to_le32(*src);
245	}
246}
247
248
249static void vmxnet3_RxCompToCPU(const struct Vmxnet3_RxCompDesc *srcDesc,
250				struct Vmxnet3_RxCompDesc *dstDesc)
251{
252	int i = 0;
253	u32 *src = (u32 *)srcDesc;
254	u32 *dst = (u32 *)dstDesc;
255	for (i = 0; i < sizeof(struct Vmxnet3_RxCompDesc) / sizeof(u32); i++) {
256		*dst = le32_to_cpu(*src);
257		src++;
258		dst++;
259	}
260}
261
262
263/* Used to read bitfield values from double words. */
264static u32 get_bitfield32(const __le32 *bitfield, u32 pos, u32 size)
265{
266	u32 temp = le32_to_cpu(*bitfield);
267	u32 mask = ((1 << size) - 1) << pos;
268	temp &= mask;
269	temp >>= pos;
270	return temp;
271}
272
273
274
275#endif  /* __BIG_ENDIAN_BITFIELD */
276
277#ifdef __BIG_ENDIAN_BITFIELD
278
279#   define VMXNET3_TXDESC_GET_GEN(txdesc) get_bitfield32(((const __le32 *) \
280			txdesc) + VMXNET3_TXD_GEN_DWORD_SHIFT, \
281			VMXNET3_TXD_GEN_SHIFT, VMXNET3_TXD_GEN_SIZE)
282#   define VMXNET3_TXDESC_GET_EOP(txdesc) get_bitfield32(((const __le32 *) \
283			txdesc) + VMXNET3_TXD_EOP_DWORD_SHIFT, \
284			VMXNET3_TXD_EOP_SHIFT, VMXNET3_TXD_EOP_SIZE)
285#   define VMXNET3_TCD_GET_GEN(tcd) get_bitfield32(((const __le32 *)tcd) + \
286			VMXNET3_TCD_GEN_DWORD_SHIFT, VMXNET3_TCD_GEN_SHIFT, \
287			VMXNET3_TCD_GEN_SIZE)
288#   define VMXNET3_TCD_GET_TXIDX(tcd) get_bitfield32((const __le32 *)tcd, \
289			VMXNET3_TCD_TXIDX_SHIFT, VMXNET3_TCD_TXIDX_SIZE)
290#   define vmxnet3_getRxComp(dstrcd, rcd, tmp) do { \
291			(dstrcd) = (tmp); \
292			vmxnet3_RxCompToCPU((rcd), (tmp)); \
293		} while (0)
294#   define vmxnet3_getRxDesc(dstrxd, rxd, tmp) do { \
295			(dstrxd) = (tmp); \
296			vmxnet3_RxDescToCPU((rxd), (tmp)); \
297		} while (0)
298
299#else
300
301#   define VMXNET3_TXDESC_GET_GEN(txdesc) ((txdesc)->gen)
302#   define VMXNET3_TXDESC_GET_EOP(txdesc) ((txdesc)->eop)
303#   define VMXNET3_TCD_GET_GEN(tcd) ((tcd)->gen)
304#   define VMXNET3_TCD_GET_TXIDX(tcd) ((tcd)->txdIdx)
305#   define vmxnet3_getRxComp(dstrcd, rcd, tmp) (dstrcd) = (rcd)
306#   define vmxnet3_getRxDesc(dstrxd, rxd, tmp) (dstrxd) = (rxd)
307
308#endif /* __BIG_ENDIAN_BITFIELD  */
309
310
311static void
312vmxnet3_unmap_tx_buf(struct vmxnet3_tx_buf_info *tbi,
313		     struct pci_dev *pdev)
314{
315	if (tbi->map_type == VMXNET3_MAP_SINGLE)
316		dma_unmap_single(&pdev->dev, tbi->dma_addr, tbi->len,
317				 PCI_DMA_TODEVICE);
318	else if (tbi->map_type == VMXNET3_MAP_PAGE)
319		dma_unmap_page(&pdev->dev, tbi->dma_addr, tbi->len,
320			       PCI_DMA_TODEVICE);
321	else
322		BUG_ON(tbi->map_type != VMXNET3_MAP_NONE);
323
324	tbi->map_type = VMXNET3_MAP_NONE; /* to help debugging */
325}
326
327
328static int
329vmxnet3_unmap_pkt(u32 eop_idx, struct vmxnet3_tx_queue *tq,
330		  struct pci_dev *pdev,	struct vmxnet3_adapter *adapter)
331{
332	struct sk_buff *skb;
333	int entries = 0;
334
335	/* no out of order completion */
336	BUG_ON(tq->buf_info[eop_idx].sop_idx != tq->tx_ring.next2comp);
337	BUG_ON(VMXNET3_TXDESC_GET_EOP(&(tq->tx_ring.base[eop_idx].txd)) != 1);
338
339	skb = tq->buf_info[eop_idx].skb;
340	BUG_ON(skb == NULL);
341	tq->buf_info[eop_idx].skb = NULL;
342
343	VMXNET3_INC_RING_IDX_ONLY(eop_idx, tq->tx_ring.size);
344
345	while (tq->tx_ring.next2comp != eop_idx) {
346		vmxnet3_unmap_tx_buf(tq->buf_info + tq->tx_ring.next2comp,
347				     pdev);
348
349		/* update next2comp w/o tx_lock. Since we are marking more,
350		 * instead of less, tx ring entries avail, the worst case is
351		 * that the tx routine incorrectly re-queues a pkt due to
352		 * insufficient tx ring entries.
353		 */
354		vmxnet3_cmd_ring_adv_next2comp(&tq->tx_ring);
355		entries++;
356	}
357
358	dev_kfree_skb_any(skb);
359	return entries;
360}
361
362
363static int
364vmxnet3_tq_tx_complete(struct vmxnet3_tx_queue *tq,
365			struct vmxnet3_adapter *adapter)
366{
367	int completed = 0;
368	union Vmxnet3_GenericDesc *gdesc;
369
370	gdesc = tq->comp_ring.base + tq->comp_ring.next2proc;
371	while (VMXNET3_TCD_GET_GEN(&gdesc->tcd) == tq->comp_ring.gen) {
372		completed += vmxnet3_unmap_pkt(VMXNET3_TCD_GET_TXIDX(
373					       &gdesc->tcd), tq, adapter->pdev,
374					       adapter);
375
376		vmxnet3_comp_ring_adv_next2proc(&tq->comp_ring);
377		gdesc = tq->comp_ring.base + tq->comp_ring.next2proc;
378	}
379
380	if (completed) {
381		spin_lock(&tq->tx_lock);
382		if (unlikely(vmxnet3_tq_stopped(tq, adapter) &&
383			     vmxnet3_cmd_ring_desc_avail(&tq->tx_ring) >
384			     VMXNET3_WAKE_QUEUE_THRESHOLD(tq) &&
385			     netif_carrier_ok(adapter->netdev))) {
386			vmxnet3_tq_wake(tq, adapter);
387		}
388		spin_unlock(&tq->tx_lock);
389	}
390	return completed;
391}
392
393
394static void
395vmxnet3_tq_cleanup(struct vmxnet3_tx_queue *tq,
396		   struct vmxnet3_adapter *adapter)
397{
398	int i;
399
400	while (tq->tx_ring.next2comp != tq->tx_ring.next2fill) {
401		struct vmxnet3_tx_buf_info *tbi;
402
403		tbi = tq->buf_info + tq->tx_ring.next2comp;
404
405		vmxnet3_unmap_tx_buf(tbi, adapter->pdev);
406		if (tbi->skb) {
407			dev_kfree_skb_any(tbi->skb);
408			tbi->skb = NULL;
409		}
410		vmxnet3_cmd_ring_adv_next2comp(&tq->tx_ring);
411	}
412
413	/* sanity check, verify all buffers are indeed unmapped and freed */
414	for (i = 0; i < tq->tx_ring.size; i++) {
415		BUG_ON(tq->buf_info[i].skb != NULL ||
416		       tq->buf_info[i].map_type != VMXNET3_MAP_NONE);
417	}
418
419	tq->tx_ring.gen = VMXNET3_INIT_GEN;
420	tq->tx_ring.next2fill = tq->tx_ring.next2comp = 0;
421
422	tq->comp_ring.gen = VMXNET3_INIT_GEN;
423	tq->comp_ring.next2proc = 0;
424}
425
426
427static void
428vmxnet3_tq_destroy(struct vmxnet3_tx_queue *tq,
429		   struct vmxnet3_adapter *adapter)
430{
431	if (tq->tx_ring.base) {
432		dma_free_coherent(&adapter->pdev->dev, tq->tx_ring.size *
433				  sizeof(struct Vmxnet3_TxDesc),
434				  tq->tx_ring.base, tq->tx_ring.basePA);
435		tq->tx_ring.base = NULL;
436	}
437	if (tq->data_ring.base) {
438		dma_free_coherent(&adapter->pdev->dev, tq->data_ring.size *
439				  sizeof(struct Vmxnet3_TxDataDesc),
440				  tq->data_ring.base, tq->data_ring.basePA);
441		tq->data_ring.base = NULL;
442	}
443	if (tq->comp_ring.base) {
444		dma_free_coherent(&adapter->pdev->dev, tq->comp_ring.size *
445				  sizeof(struct Vmxnet3_TxCompDesc),
446				  tq->comp_ring.base, tq->comp_ring.basePA);
447		tq->comp_ring.base = NULL;
448	}
449	if (tq->buf_info) {
450		dma_free_coherent(&adapter->pdev->dev,
451				  tq->tx_ring.size * sizeof(tq->buf_info[0]),
452				  tq->buf_info, tq->buf_info_pa);
453		tq->buf_info = NULL;
454	}
455}
456
457
458/* Destroy all tx queues */
459void
460vmxnet3_tq_destroy_all(struct vmxnet3_adapter *adapter)
461{
462	int i;
463
464	for (i = 0; i < adapter->num_tx_queues; i++)
465		vmxnet3_tq_destroy(&adapter->tx_queue[i], adapter);
466}
467
468
469static void
470vmxnet3_tq_init(struct vmxnet3_tx_queue *tq,
471		struct vmxnet3_adapter *adapter)
472{
473	int i;
474
475	/* reset the tx ring contents to 0 and reset the tx ring states */
476	memset(tq->tx_ring.base, 0, tq->tx_ring.size *
477	       sizeof(struct Vmxnet3_TxDesc));
478	tq->tx_ring.next2fill = tq->tx_ring.next2comp = 0;
479	tq->tx_ring.gen = VMXNET3_INIT_GEN;
480
481	memset(tq->data_ring.base, 0, tq->data_ring.size *
482	       sizeof(struct Vmxnet3_TxDataDesc));
483
484	/* reset the tx comp ring contents to 0 and reset comp ring states */
485	memset(tq->comp_ring.base, 0, tq->comp_ring.size *
486	       sizeof(struct Vmxnet3_TxCompDesc));
487	tq->comp_ring.next2proc = 0;
488	tq->comp_ring.gen = VMXNET3_INIT_GEN;
489
490	/* reset the bookkeeping data */
491	memset(tq->buf_info, 0, sizeof(tq->buf_info[0]) * tq->tx_ring.size);
492	for (i = 0; i < tq->tx_ring.size; i++)
493		tq->buf_info[i].map_type = VMXNET3_MAP_NONE;
494
495	/* stats are not reset */
496}
497
498
499static int
500vmxnet3_tq_create(struct vmxnet3_tx_queue *tq,
501		  struct vmxnet3_adapter *adapter)
502{
503	size_t sz;
504
505	BUG_ON(tq->tx_ring.base || tq->data_ring.base ||
506	       tq->comp_ring.base || tq->buf_info);
507
508	tq->tx_ring.base = dma_alloc_coherent(&adapter->pdev->dev,
509			tq->tx_ring.size * sizeof(struct Vmxnet3_TxDesc),
510			&tq->tx_ring.basePA, GFP_KERNEL);
511	if (!tq->tx_ring.base) {
512		netdev_err(adapter->netdev, "failed to allocate tx ring\n");
513		goto err;
514	}
515
516	tq->data_ring.base = dma_alloc_coherent(&adapter->pdev->dev,
517			tq->data_ring.size * sizeof(struct Vmxnet3_TxDataDesc),
518			&tq->data_ring.basePA, GFP_KERNEL);
519	if (!tq->data_ring.base) {
520		netdev_err(adapter->netdev, "failed to allocate data ring\n");
521		goto err;
522	}
523
524	tq->comp_ring.base = dma_alloc_coherent(&adapter->pdev->dev,
525			tq->comp_ring.size * sizeof(struct Vmxnet3_TxCompDesc),
526			&tq->comp_ring.basePA, GFP_KERNEL);
527	if (!tq->comp_ring.base) {
528		netdev_err(adapter->netdev, "failed to allocate tx comp ring\n");
529		goto err;
530	}
531
532	sz = tq->tx_ring.size * sizeof(tq->buf_info[0]);
533	tq->buf_info = dma_zalloc_coherent(&adapter->pdev->dev, sz,
534					   &tq->buf_info_pa, GFP_KERNEL);
535	if (!tq->buf_info)
536		goto err;
537
538	return 0;
539
540err:
541	vmxnet3_tq_destroy(tq, adapter);
542	return -ENOMEM;
543}
544
545static void
546vmxnet3_tq_cleanup_all(struct vmxnet3_adapter *adapter)
547{
548	int i;
549
550	for (i = 0; i < adapter->num_tx_queues; i++)
551		vmxnet3_tq_cleanup(&adapter->tx_queue[i], adapter);
552}
553
554/*
555 *    starting from ring->next2fill, allocate rx buffers for the given ring
556 *    of the rx queue and update the rx desc. stop after @num_to_alloc buffers
557 *    are allocated or allocation fails
558 */
559
560static int
561vmxnet3_rq_alloc_rx_buf(struct vmxnet3_rx_queue *rq, u32 ring_idx,
562			int num_to_alloc, struct vmxnet3_adapter *adapter)
563{
564	int num_allocated = 0;
565	struct vmxnet3_rx_buf_info *rbi_base = rq->buf_info[ring_idx];
566	struct vmxnet3_cmd_ring *ring = &rq->rx_ring[ring_idx];
567	u32 val;
568
569	while (num_allocated <= num_to_alloc) {
570		struct vmxnet3_rx_buf_info *rbi;
571		union Vmxnet3_GenericDesc *gd;
572
573		rbi = rbi_base + ring->next2fill;
574		gd = ring->base + ring->next2fill;
575
576		if (rbi->buf_type == VMXNET3_RX_BUF_SKB) {
577			if (rbi->skb == NULL) {
578				rbi->skb = __netdev_alloc_skb_ip_align(adapter->netdev,
579								       rbi->len,
580								       GFP_KERNEL);
581				if (unlikely(rbi->skb == NULL)) {
582					rq->stats.rx_buf_alloc_failure++;
583					break;
584				}
585
586				rbi->dma_addr = dma_map_single(
587						&adapter->pdev->dev,
588						rbi->skb->data, rbi->len,
589						PCI_DMA_FROMDEVICE);
590			} else {
591				/* rx buffer skipped by the device */
592			}
593			val = VMXNET3_RXD_BTYPE_HEAD << VMXNET3_RXD_BTYPE_SHIFT;
594		} else {
595			BUG_ON(rbi->buf_type != VMXNET3_RX_BUF_PAGE ||
596			       rbi->len  != PAGE_SIZE);
597
598			if (rbi->page == NULL) {
599				rbi->page = alloc_page(GFP_ATOMIC);
600				if (unlikely(rbi->page == NULL)) {
601					rq->stats.rx_buf_alloc_failure++;
602					break;
603				}
604				rbi->dma_addr = dma_map_page(
605						&adapter->pdev->dev,
606						rbi->page, 0, PAGE_SIZE,
607						PCI_DMA_FROMDEVICE);
608			} else {
609				/* rx buffers skipped by the device */
610			}
611			val = VMXNET3_RXD_BTYPE_BODY << VMXNET3_RXD_BTYPE_SHIFT;
612		}
613
614		BUG_ON(rbi->dma_addr == 0);
615		gd->rxd.addr = cpu_to_le64(rbi->dma_addr);
616		gd->dword[2] = cpu_to_le32((!ring->gen << VMXNET3_RXD_GEN_SHIFT)
617					   | val | rbi->len);
618
619		/* Fill the last buffer but dont mark it ready, or else the
620		 * device will think that the queue is full */
621		if (num_allocated == num_to_alloc)
622			break;
623
624		gd->dword[2] |= cpu_to_le32(ring->gen << VMXNET3_RXD_GEN_SHIFT);
625		num_allocated++;
626		vmxnet3_cmd_ring_adv_next2fill(ring);
627	}
628
629	netdev_dbg(adapter->netdev,
630		"alloc_rx_buf: %d allocated, next2fill %u, next2comp %u\n",
631		num_allocated, ring->next2fill, ring->next2comp);
632
633	/* so that the device can distinguish a full ring and an empty ring */
634	BUG_ON(num_allocated != 0 && ring->next2fill == ring->next2comp);
635
636	return num_allocated;
637}
638
639
640static void
641vmxnet3_append_frag(struct sk_buff *skb, struct Vmxnet3_RxCompDesc *rcd,
642		    struct vmxnet3_rx_buf_info *rbi)
643{
644	struct skb_frag_struct *frag = skb_shinfo(skb)->frags +
645		skb_shinfo(skb)->nr_frags;
646
647	BUG_ON(skb_shinfo(skb)->nr_frags >= MAX_SKB_FRAGS);
648
649	__skb_frag_set_page(frag, rbi->page);
650	frag->page_offset = 0;
651	skb_frag_size_set(frag, rcd->len);
652	skb->data_len += rcd->len;
653	skb->truesize += PAGE_SIZE;
654	skb_shinfo(skb)->nr_frags++;
655}
656
657
658static void
659vmxnet3_map_pkt(struct sk_buff *skb, struct vmxnet3_tx_ctx *ctx,
660		struct vmxnet3_tx_queue *tq, struct pci_dev *pdev,
661		struct vmxnet3_adapter *adapter)
662{
663	u32 dw2, len;
664	unsigned long buf_offset;
665	int i;
666	union Vmxnet3_GenericDesc *gdesc;
667	struct vmxnet3_tx_buf_info *tbi = NULL;
668
669	BUG_ON(ctx->copy_size > skb_headlen(skb));
670
671	/* use the previous gen bit for the SOP desc */
672	dw2 = (tq->tx_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT;
673
674	ctx->sop_txd = tq->tx_ring.base + tq->tx_ring.next2fill;
675	gdesc = ctx->sop_txd; /* both loops below can be skipped */
676
677	/* no need to map the buffer if headers are copied */
678	if (ctx->copy_size) {
679		ctx->sop_txd->txd.addr = cpu_to_le64(tq->data_ring.basePA +
680					tq->tx_ring.next2fill *
681					sizeof(struct Vmxnet3_TxDataDesc));
682		ctx->sop_txd->dword[2] = cpu_to_le32(dw2 | ctx->copy_size);
683		ctx->sop_txd->dword[3] = 0;
684
685		tbi = tq->buf_info + tq->tx_ring.next2fill;
686		tbi->map_type = VMXNET3_MAP_NONE;
687
688		netdev_dbg(adapter->netdev,
689			"txd[%u]: 0x%Lx 0x%x 0x%x\n",
690			tq->tx_ring.next2fill,
691			le64_to_cpu(ctx->sop_txd->txd.addr),
692			ctx->sop_txd->dword[2], ctx->sop_txd->dword[3]);
693		vmxnet3_cmd_ring_adv_next2fill(&tq->tx_ring);
694
695		/* use the right gen for non-SOP desc */
696		dw2 = tq->tx_ring.gen << VMXNET3_TXD_GEN_SHIFT;
697	}
698
699	/* linear part can use multiple tx desc if it's big */
700	len = skb_headlen(skb) - ctx->copy_size;
701	buf_offset = ctx->copy_size;
702	while (len) {
703		u32 buf_size;
704
705		if (len < VMXNET3_MAX_TX_BUF_SIZE) {
706			buf_size = len;
707			dw2 |= len;
708		} else {
709			buf_size = VMXNET3_MAX_TX_BUF_SIZE;
710			/* spec says that for TxDesc.len, 0 == 2^14 */
711		}
712
713		tbi = tq->buf_info + tq->tx_ring.next2fill;
714		tbi->map_type = VMXNET3_MAP_SINGLE;
715		tbi->dma_addr = dma_map_single(&adapter->pdev->dev,
716				skb->data + buf_offset, buf_size,
717				PCI_DMA_TODEVICE);
718
719		tbi->len = buf_size;
720
721		gdesc = tq->tx_ring.base + tq->tx_ring.next2fill;
722		BUG_ON(gdesc->txd.gen == tq->tx_ring.gen);
723
724		gdesc->txd.addr = cpu_to_le64(tbi->dma_addr);
725		gdesc->dword[2] = cpu_to_le32(dw2);
726		gdesc->dword[3] = 0;
727
728		netdev_dbg(adapter->netdev,
729			"txd[%u]: 0x%Lx 0x%x 0x%x\n",
730			tq->tx_ring.next2fill, le64_to_cpu(gdesc->txd.addr),
731			le32_to_cpu(gdesc->dword[2]), gdesc->dword[3]);
732		vmxnet3_cmd_ring_adv_next2fill(&tq->tx_ring);
733		dw2 = tq->tx_ring.gen << VMXNET3_TXD_GEN_SHIFT;
734
735		len -= buf_size;
736		buf_offset += buf_size;
737	}
738
739	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
740		const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
741		u32 buf_size;
742
743		buf_offset = 0;
744		len = skb_frag_size(frag);
745		while (len) {
746			tbi = tq->buf_info + tq->tx_ring.next2fill;
747			if (len < VMXNET3_MAX_TX_BUF_SIZE) {
748				buf_size = len;
749				dw2 |= len;
750			} else {
751				buf_size = VMXNET3_MAX_TX_BUF_SIZE;
752				/* spec says that for TxDesc.len, 0 == 2^14 */
753			}
754			tbi->map_type = VMXNET3_MAP_PAGE;
755			tbi->dma_addr = skb_frag_dma_map(&adapter->pdev->dev, frag,
756							 buf_offset, buf_size,
757							 DMA_TO_DEVICE);
758
759			tbi->len = buf_size;
760
761			gdesc = tq->tx_ring.base + tq->tx_ring.next2fill;
762			BUG_ON(gdesc->txd.gen == tq->tx_ring.gen);
763
764			gdesc->txd.addr = cpu_to_le64(tbi->dma_addr);
765			gdesc->dword[2] = cpu_to_le32(dw2);
766			gdesc->dword[3] = 0;
767
768			netdev_dbg(adapter->netdev,
769				"txd[%u]: 0x%llx %u %u\n",
770				tq->tx_ring.next2fill, le64_to_cpu(gdesc->txd.addr),
771				le32_to_cpu(gdesc->dword[2]), gdesc->dword[3]);
772			vmxnet3_cmd_ring_adv_next2fill(&tq->tx_ring);
773			dw2 = tq->tx_ring.gen << VMXNET3_TXD_GEN_SHIFT;
774
775			len -= buf_size;
776			buf_offset += buf_size;
777		}
778	}
779
780	ctx->eop_txd = gdesc;
781
782	/* set the last buf_info for the pkt */
783	tbi->skb = skb;
784	tbi->sop_idx = ctx->sop_txd - tq->tx_ring.base;
785}
786
787
788/* Init all tx queues */
789static void
790vmxnet3_tq_init_all(struct vmxnet3_adapter *adapter)
791{
792	int i;
793
794	for (i = 0; i < adapter->num_tx_queues; i++)
795		vmxnet3_tq_init(&adapter->tx_queue[i], adapter);
796}
797
798
799/*
800 *    parse and copy relevant protocol headers:
801 *      For a tso pkt, relevant headers are L2/3/4 including options
802 *      For a pkt requesting csum offloading, they are L2/3 and may include L4
803 *      if it's a TCP/UDP pkt
804 *
805 * Returns:
806 *    -1:  error happens during parsing
807 *     0:  protocol headers parsed, but too big to be copied
808 *     1:  protocol headers parsed and copied
809 *
810 * Other effects:
811 *    1. related *ctx fields are updated.
812 *    2. ctx->copy_size is # of bytes copied
813 *    3. the portion copied is guaranteed to be in the linear part
814 *
815 */
816static int
817vmxnet3_parse_and_copy_hdr(struct sk_buff *skb, struct vmxnet3_tx_queue *tq,
818			   struct vmxnet3_tx_ctx *ctx,
819			   struct vmxnet3_adapter *adapter)
820{
821	struct Vmxnet3_TxDataDesc *tdd;
822
823	if (ctx->mss) {	/* TSO */
824		ctx->eth_ip_hdr_size = skb_transport_offset(skb);
825		ctx->l4_hdr_size = tcp_hdrlen(skb);
826		ctx->copy_size = ctx->eth_ip_hdr_size + ctx->l4_hdr_size;
827	} else {
828		if (skb->ip_summed == CHECKSUM_PARTIAL) {
829			ctx->eth_ip_hdr_size = skb_checksum_start_offset(skb);
830
831			if (ctx->ipv4) {
832				const struct iphdr *iph = ip_hdr(skb);
833
834				if (iph->protocol == IPPROTO_TCP)
835					ctx->l4_hdr_size = tcp_hdrlen(skb);
836				else if (iph->protocol == IPPROTO_UDP)
837					ctx->l4_hdr_size = sizeof(struct udphdr);
838				else
839					ctx->l4_hdr_size = 0;
840			} else {
841				/* for simplicity, don't copy L4 headers */
842				ctx->l4_hdr_size = 0;
843			}
844			ctx->copy_size = min(ctx->eth_ip_hdr_size +
845					 ctx->l4_hdr_size, skb->len);
846		} else {
847			ctx->eth_ip_hdr_size = 0;
848			ctx->l4_hdr_size = 0;
849			/* copy as much as allowed */
850			ctx->copy_size = min((unsigned int)VMXNET3_HDR_COPY_SIZE
851					     , skb_headlen(skb));
852		}
853
854		/* make sure headers are accessible directly */
855		if (unlikely(!pskb_may_pull(skb, ctx->copy_size)))
856			goto err;
857	}
858
859	if (unlikely(ctx->copy_size > VMXNET3_HDR_COPY_SIZE)) {
860		tq->stats.oversized_hdr++;
861		ctx->copy_size = 0;
862		return 0;
863	}
864
865	tdd = tq->data_ring.base + tq->tx_ring.next2fill;
866
867	memcpy(tdd->data, skb->data, ctx->copy_size);
868	netdev_dbg(adapter->netdev,
869		"copy %u bytes to dataRing[%u]\n",
870		ctx->copy_size, tq->tx_ring.next2fill);
871	return 1;
872
873err:
874	return -1;
875}
876
877
878static void
879vmxnet3_prepare_tso(struct sk_buff *skb,
880		    struct vmxnet3_tx_ctx *ctx)
881{
882	struct tcphdr *tcph = tcp_hdr(skb);
883
884	if (ctx->ipv4) {
885		struct iphdr *iph = ip_hdr(skb);
886
887		iph->check = 0;
888		tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, 0,
889						 IPPROTO_TCP, 0);
890	} else {
891		struct ipv6hdr *iph = ipv6_hdr(skb);
892
893		tcph->check = ~csum_ipv6_magic(&iph->saddr, &iph->daddr, 0,
894					       IPPROTO_TCP, 0);
895	}
896}
897
898static int txd_estimate(const struct sk_buff *skb)
899{
900	int count = VMXNET3_TXD_NEEDED(skb_headlen(skb)) + 1;
901	int i;
902
903	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
904		const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
905
906		count += VMXNET3_TXD_NEEDED(skb_frag_size(frag));
907	}
908	return count;
909}
910
911/*
912 * Transmits a pkt thru a given tq
913 * Returns:
914 *    NETDEV_TX_OK:      descriptors are setup successfully
915 *    NETDEV_TX_OK:      error occurred, the pkt is dropped
916 *    NETDEV_TX_BUSY:    tx ring is full, queue is stopped
917 *
918 * Side-effects:
919 *    1. tx ring may be changed
920 *    2. tq stats may be updated accordingly
921 *    3. shared->txNumDeferred may be updated
922 */
923
924static int
925vmxnet3_tq_xmit(struct sk_buff *skb, struct vmxnet3_tx_queue *tq,
926		struct vmxnet3_adapter *adapter, struct net_device *netdev)
927{
928	int ret;
929	u32 count;
930	unsigned long flags;
931	struct vmxnet3_tx_ctx ctx;
932	union Vmxnet3_GenericDesc *gdesc;
933#ifdef __BIG_ENDIAN_BITFIELD
934	/* Use temporary descriptor to avoid touching bits multiple times */
935	union Vmxnet3_GenericDesc tempTxDesc;
936#endif
937
938	count = txd_estimate(skb);
939
940	ctx.ipv4 = (vlan_get_protocol(skb) == cpu_to_be16(ETH_P_IP));
941
942	ctx.mss = skb_shinfo(skb)->gso_size;
943	if (ctx.mss) {
944		if (skb_header_cloned(skb)) {
945			if (unlikely(pskb_expand_head(skb, 0, 0,
946						      GFP_ATOMIC) != 0)) {
947				tq->stats.drop_tso++;
948				goto drop_pkt;
949			}
950			tq->stats.copy_skb_header++;
951		}
952		vmxnet3_prepare_tso(skb, &ctx);
953	} else {
954		if (unlikely(count > VMXNET3_MAX_TXD_PER_PKT)) {
955
956			/* non-tso pkts must not use more than
957			 * VMXNET3_MAX_TXD_PER_PKT entries
958			 */
959			if (skb_linearize(skb) != 0) {
960				tq->stats.drop_too_many_frags++;
961				goto drop_pkt;
962			}
963			tq->stats.linearized++;
964
965			/* recalculate the # of descriptors to use */
966			count = VMXNET3_TXD_NEEDED(skb_headlen(skb)) + 1;
967		}
968	}
969
970	spin_lock_irqsave(&tq->tx_lock, flags);
971
972	if (count > vmxnet3_cmd_ring_desc_avail(&tq->tx_ring)) {
973		tq->stats.tx_ring_full++;
974		netdev_dbg(adapter->netdev,
975			"tx queue stopped on %s, next2comp %u"
976			" next2fill %u\n", adapter->netdev->name,
977			tq->tx_ring.next2comp, tq->tx_ring.next2fill);
978
979		vmxnet3_tq_stop(tq, adapter);
980		spin_unlock_irqrestore(&tq->tx_lock, flags);
981		return NETDEV_TX_BUSY;
982	}
983
984
985	ret = vmxnet3_parse_and_copy_hdr(skb, tq, &ctx, adapter);
986	if (ret >= 0) {
987		BUG_ON(ret <= 0 && ctx.copy_size != 0);
988		/* hdrs parsed, check against other limits */
989		if (ctx.mss) {
990			if (unlikely(ctx.eth_ip_hdr_size + ctx.l4_hdr_size >
991				     VMXNET3_MAX_TX_BUF_SIZE)) {
992				goto hdr_too_big;
993			}
994		} else {
995			if (skb->ip_summed == CHECKSUM_PARTIAL) {
996				if (unlikely(ctx.eth_ip_hdr_size +
997					     skb->csum_offset >
998					     VMXNET3_MAX_CSUM_OFFSET)) {
999					goto hdr_too_big;
1000				}
1001			}
1002		}
1003	} else {
1004		tq->stats.drop_hdr_inspect_err++;
1005		goto unlock_drop_pkt;
1006	}
1007
1008	/* fill tx descs related to addr & len */
1009	vmxnet3_map_pkt(skb, &ctx, tq, adapter->pdev, adapter);
1010
1011	/* setup the EOP desc */
1012	ctx.eop_txd->dword[3] = cpu_to_le32(VMXNET3_TXD_CQ | VMXNET3_TXD_EOP);
1013
1014	/* setup the SOP desc */
1015#ifdef __BIG_ENDIAN_BITFIELD
1016	gdesc = &tempTxDesc;
1017	gdesc->dword[2] = ctx.sop_txd->dword[2];
1018	gdesc->dword[3] = ctx.sop_txd->dword[3];
1019#else
1020	gdesc = ctx.sop_txd;
1021#endif
1022	if (ctx.mss) {
1023		gdesc->txd.hlen = ctx.eth_ip_hdr_size + ctx.l4_hdr_size;
1024		gdesc->txd.om = VMXNET3_OM_TSO;
1025		gdesc->txd.msscof = ctx.mss;
1026		le32_add_cpu(&tq->shared->txNumDeferred, (skb->len -
1027			     gdesc->txd.hlen + ctx.mss - 1) / ctx.mss);
1028	} else {
1029		if (skb->ip_summed == CHECKSUM_PARTIAL) {
1030			gdesc->txd.hlen = ctx.eth_ip_hdr_size;
1031			gdesc->txd.om = VMXNET3_OM_CSUM;
1032			gdesc->txd.msscof = ctx.eth_ip_hdr_size +
1033					    skb->csum_offset;
1034		} else {
1035			gdesc->txd.om = 0;
1036			gdesc->txd.msscof = 0;
1037		}
1038		le32_add_cpu(&tq->shared->txNumDeferred, 1);
1039	}
1040
1041	if (vlan_tx_tag_present(skb)) {
1042		gdesc->txd.ti = 1;
1043		gdesc->txd.tci = vlan_tx_tag_get(skb);
1044	}
1045
1046	/* finally flips the GEN bit of the SOP desc. */
1047	gdesc->dword[2] = cpu_to_le32(le32_to_cpu(gdesc->dword[2]) ^
1048						  VMXNET3_TXD_GEN);
1049#ifdef __BIG_ENDIAN_BITFIELD
1050	/* Finished updating in bitfields of Tx Desc, so write them in original
1051	 * place.
1052	 */
1053	vmxnet3_TxDescToLe((struct Vmxnet3_TxDesc *)gdesc,
1054			   (struct Vmxnet3_TxDesc *)ctx.sop_txd);
1055	gdesc = ctx.sop_txd;
1056#endif
1057	netdev_dbg(adapter->netdev,
1058		"txd[%u]: SOP 0x%Lx 0x%x 0x%x\n",
1059		(u32)(ctx.sop_txd -
1060		tq->tx_ring.base), le64_to_cpu(gdesc->txd.addr),
1061		le32_to_cpu(gdesc->dword[2]), le32_to_cpu(gdesc->dword[3]));
1062
1063	spin_unlock_irqrestore(&tq->tx_lock, flags);
1064
1065	if (le32_to_cpu(tq->shared->txNumDeferred) >=
1066					le32_to_cpu(tq->shared->txThreshold)) {
1067		tq->shared->txNumDeferred = 0;
1068		VMXNET3_WRITE_BAR0_REG(adapter,
1069				       VMXNET3_REG_TXPROD + tq->qid * 8,
1070				       tq->tx_ring.next2fill);
1071	}
1072
1073	return NETDEV_TX_OK;
1074
1075hdr_too_big:
1076	tq->stats.drop_oversized_hdr++;
1077unlock_drop_pkt:
1078	spin_unlock_irqrestore(&tq->tx_lock, flags);
1079drop_pkt:
1080	tq->stats.drop_total++;
1081	dev_kfree_skb_any(skb);
1082	return NETDEV_TX_OK;
1083}
1084
1085
1086static netdev_tx_t
1087vmxnet3_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
1088{
1089	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
1090
1091	BUG_ON(skb->queue_mapping > adapter->num_tx_queues);
1092	return vmxnet3_tq_xmit(skb,
1093			       &adapter->tx_queue[skb->queue_mapping],
1094			       adapter, netdev);
1095}
1096
1097
1098static void
1099vmxnet3_rx_csum(struct vmxnet3_adapter *adapter,
1100		struct sk_buff *skb,
1101		union Vmxnet3_GenericDesc *gdesc)
1102{
1103	if (!gdesc->rcd.cnc && adapter->netdev->features & NETIF_F_RXCSUM) {
1104		/* typical case: TCP/UDP over IP and both csums are correct */
1105		if ((le32_to_cpu(gdesc->dword[3]) & VMXNET3_RCD_CSUM_OK) ==
1106							VMXNET3_RCD_CSUM_OK) {
1107			skb->ip_summed = CHECKSUM_UNNECESSARY;
1108			BUG_ON(!(gdesc->rcd.tcp || gdesc->rcd.udp));
1109			BUG_ON(!(gdesc->rcd.v4  || gdesc->rcd.v6));
1110			BUG_ON(gdesc->rcd.frg);
1111		} else {
1112			if (gdesc->rcd.csum) {
1113				skb->csum = htons(gdesc->rcd.csum);
1114				skb->ip_summed = CHECKSUM_PARTIAL;
1115			} else {
1116				skb_checksum_none_assert(skb);
1117			}
1118		}
1119	} else {
1120		skb_checksum_none_assert(skb);
1121	}
1122}
1123
1124
1125static void
1126vmxnet3_rx_error(struct vmxnet3_rx_queue *rq, struct Vmxnet3_RxCompDesc *rcd,
1127		 struct vmxnet3_rx_ctx *ctx,  struct vmxnet3_adapter *adapter)
1128{
1129	rq->stats.drop_err++;
1130	if (!rcd->fcs)
1131		rq->stats.drop_fcs++;
1132
1133	rq->stats.drop_total++;
1134
1135	/*
1136	 * We do not unmap and chain the rx buffer to the skb.
1137	 * We basically pretend this buffer is not used and will be recycled
1138	 * by vmxnet3_rq_alloc_rx_buf()
1139	 */
1140
1141	/*
1142	 * ctx->skb may be NULL if this is the first and the only one
1143	 * desc for the pkt
1144	 */
1145	if (ctx->skb)
1146		dev_kfree_skb_irq(ctx->skb);
1147
1148	ctx->skb = NULL;
1149}
1150
1151
1152static int
1153vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq,
1154		       struct vmxnet3_adapter *adapter, int quota)
1155{
1156	static const u32 rxprod_reg[2] = {
1157		VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2
1158	};
1159	u32 num_rxd = 0;
1160	bool skip_page_frags = false;
1161	struct Vmxnet3_RxCompDesc *rcd;
1162	struct vmxnet3_rx_ctx *ctx = &rq->rx_ctx;
1163#ifdef __BIG_ENDIAN_BITFIELD
1164	struct Vmxnet3_RxDesc rxCmdDesc;
1165	struct Vmxnet3_RxCompDesc rxComp;
1166#endif
1167	vmxnet3_getRxComp(rcd, &rq->comp_ring.base[rq->comp_ring.next2proc].rcd,
1168			  &rxComp);
1169	while (rcd->gen == rq->comp_ring.gen) {
1170		struct vmxnet3_rx_buf_info *rbi;
1171		struct sk_buff *skb, *new_skb = NULL;
1172		struct page *new_page = NULL;
1173		int num_to_alloc;
1174		struct Vmxnet3_RxDesc *rxd;
1175		u32 idx, ring_idx;
1176		struct vmxnet3_cmd_ring	*ring = NULL;
1177		if (num_rxd >= quota) {
1178			/* we may stop even before we see the EOP desc of
1179			 * the current pkt
1180			 */
1181			break;
1182		}
1183		num_rxd++;
1184		BUG_ON(rcd->rqID != rq->qid && rcd->rqID != rq->qid2);
1185		idx = rcd->rxdIdx;
1186		ring_idx = rcd->rqID < adapter->num_rx_queues ? 0 : 1;
1187		ring = rq->rx_ring + ring_idx;
1188		vmxnet3_getRxDesc(rxd, &rq->rx_ring[ring_idx].base[idx].rxd,
1189				  &rxCmdDesc);
1190		rbi = rq->buf_info[ring_idx] + idx;
1191
1192		BUG_ON(rxd->addr != rbi->dma_addr ||
1193		       rxd->len != rbi->len);
1194
1195		if (unlikely(rcd->eop && rcd->err)) {
1196			vmxnet3_rx_error(rq, rcd, ctx, adapter);
1197			goto rcd_done;
1198		}
1199
1200		if (rcd->sop) { /* first buf of the pkt */
1201			BUG_ON(rxd->btype != VMXNET3_RXD_BTYPE_HEAD ||
1202			       rcd->rqID != rq->qid);
1203
1204			BUG_ON(rbi->buf_type != VMXNET3_RX_BUF_SKB);
1205			BUG_ON(ctx->skb != NULL || rbi->skb == NULL);
1206
1207			if (unlikely(rcd->len == 0)) {
1208				/* Pretend the rx buffer is skipped. */
1209				BUG_ON(!(rcd->sop && rcd->eop));
1210				netdev_dbg(adapter->netdev,
1211					"rxRing[%u][%u] 0 length\n",
1212					ring_idx, idx);
1213				goto rcd_done;
1214			}
1215
1216			skip_page_frags = false;
1217			ctx->skb = rbi->skb;
1218			new_skb = netdev_alloc_skb_ip_align(adapter->netdev,
1219							    rbi->len);
1220			if (new_skb == NULL) {
1221				/* Skb allocation failed, do not handover this
1222				 * skb to stack. Reuse it. Drop the existing pkt
1223				 */
1224				rq->stats.rx_buf_alloc_failure++;
1225				ctx->skb = NULL;
1226				rq->stats.drop_total++;
1227				skip_page_frags = true;
1228				goto rcd_done;
1229			}
1230
1231			dma_unmap_single(&adapter->pdev->dev, rbi->dma_addr,
1232					 rbi->len,
1233					 PCI_DMA_FROMDEVICE);
1234
1235#ifdef VMXNET3_RSS
1236			if (rcd->rssType != VMXNET3_RCD_RSS_TYPE_NONE &&
1237			    (adapter->netdev->features & NETIF_F_RXHASH))
1238				skb_set_hash(ctx->skb,
1239					     le32_to_cpu(rcd->rssHash),
1240					     PKT_HASH_TYPE_L3);
1241#endif
1242			skb_put(ctx->skb, rcd->len);
1243
1244			/* Immediate refill */
1245			rbi->skb = new_skb;
1246			rbi->dma_addr = dma_map_single(&adapter->pdev->dev,
1247						       rbi->skb->data, rbi->len,
1248						       PCI_DMA_FROMDEVICE);
1249			rxd->addr = cpu_to_le64(rbi->dma_addr);
1250			rxd->len = rbi->len;
1251
1252		} else {
1253			BUG_ON(ctx->skb == NULL && !skip_page_frags);
1254
1255			/* non SOP buffer must be type 1 in most cases */
1256			BUG_ON(rbi->buf_type != VMXNET3_RX_BUF_PAGE);
1257			BUG_ON(rxd->btype != VMXNET3_RXD_BTYPE_BODY);
1258
1259			/* If an sop buffer was dropped, skip all
1260			 * following non-sop fragments. They will be reused.
1261			 */
1262			if (skip_page_frags)
1263				goto rcd_done;
1264
1265			new_page = alloc_page(GFP_ATOMIC);
1266			if (unlikely(new_page == NULL)) {
1267				/* Replacement page frag could not be allocated.
1268				 * Reuse this page. Drop the pkt and free the
1269				 * skb which contained this page as a frag. Skip
1270				 * processing all the following non-sop frags.
1271				 */
1272				rq->stats.rx_buf_alloc_failure++;
1273				dev_kfree_skb(ctx->skb);
1274				ctx->skb = NULL;
1275				skip_page_frags = true;
1276				goto rcd_done;
1277			}
1278
1279			if (rcd->len) {
1280				dma_unmap_page(&adapter->pdev->dev,
1281					       rbi->dma_addr, rbi->len,
1282					       PCI_DMA_FROMDEVICE);
1283
1284				vmxnet3_append_frag(ctx->skb, rcd, rbi);
1285			}
1286
1287			/* Immediate refill */
1288			rbi->page = new_page;
1289			rbi->dma_addr = dma_map_page(&adapter->pdev->dev,
1290						     rbi->page,
1291						     0, PAGE_SIZE,
1292						     PCI_DMA_FROMDEVICE);
1293			rxd->addr = cpu_to_le64(rbi->dma_addr);
1294			rxd->len = rbi->len;
1295		}
1296
1297
1298		skb = ctx->skb;
1299		if (rcd->eop) {
1300			skb->len += skb->data_len;
1301
1302			vmxnet3_rx_csum(adapter, skb,
1303					(union Vmxnet3_GenericDesc *)rcd);
1304			skb->protocol = eth_type_trans(skb, adapter->netdev);
1305
1306			if (unlikely(rcd->ts))
1307				__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rcd->tci);
1308
1309			if (adapter->netdev->features & NETIF_F_LRO)
1310				netif_receive_skb(skb);
1311			else
1312				napi_gro_receive(&rq->napi, skb);
1313
1314			ctx->skb = NULL;
1315		}
1316
1317rcd_done:
1318		/* device may have skipped some rx descs */
1319		ring->next2comp = idx;
1320		num_to_alloc = vmxnet3_cmd_ring_desc_avail(ring);
1321		ring = rq->rx_ring + ring_idx;
1322		while (num_to_alloc) {
1323			vmxnet3_getRxDesc(rxd, &ring->base[ring->next2fill].rxd,
1324					  &rxCmdDesc);
1325			BUG_ON(!rxd->addr);
1326
1327			/* Recv desc is ready to be used by the device */
1328			rxd->gen = ring->gen;
1329			vmxnet3_cmd_ring_adv_next2fill(ring);
1330			num_to_alloc--;
1331		}
1332
1333		/* if needed, update the register */
1334		if (unlikely(rq->shared->updateRxProd)) {
1335			VMXNET3_WRITE_BAR0_REG(adapter,
1336					       rxprod_reg[ring_idx] + rq->qid * 8,
1337					       ring->next2fill);
1338		}
1339
1340		vmxnet3_comp_ring_adv_next2proc(&rq->comp_ring);
1341		vmxnet3_getRxComp(rcd,
1342				  &rq->comp_ring.base[rq->comp_ring.next2proc].rcd, &rxComp);
1343	}
1344
1345	return num_rxd;
1346}
1347
1348
1349static void
1350vmxnet3_rq_cleanup(struct vmxnet3_rx_queue *rq,
1351		   struct vmxnet3_adapter *adapter)
1352{
1353	u32 i, ring_idx;
1354	struct Vmxnet3_RxDesc *rxd;
1355
1356	for (ring_idx = 0; ring_idx < 2; ring_idx++) {
1357		for (i = 0; i < rq->rx_ring[ring_idx].size; i++) {
1358#ifdef __BIG_ENDIAN_BITFIELD
1359			struct Vmxnet3_RxDesc rxDesc;
1360#endif
1361			vmxnet3_getRxDesc(rxd,
1362				&rq->rx_ring[ring_idx].base[i].rxd, &rxDesc);
1363
1364			if (rxd->btype == VMXNET3_RXD_BTYPE_HEAD &&
1365					rq->buf_info[ring_idx][i].skb) {
1366				dma_unmap_single(&adapter->pdev->dev, rxd->addr,
1367						 rxd->len, PCI_DMA_FROMDEVICE);
1368				dev_kfree_skb(rq->buf_info[ring_idx][i].skb);
1369				rq->buf_info[ring_idx][i].skb = NULL;
1370			} else if (rxd->btype == VMXNET3_RXD_BTYPE_BODY &&
1371					rq->buf_info[ring_idx][i].page) {
1372				dma_unmap_page(&adapter->pdev->dev, rxd->addr,
1373					       rxd->len, PCI_DMA_FROMDEVICE);
1374				put_page(rq->buf_info[ring_idx][i].page);
1375				rq->buf_info[ring_idx][i].page = NULL;
1376			}
1377		}
1378
1379		rq->rx_ring[ring_idx].gen = VMXNET3_INIT_GEN;
1380		rq->rx_ring[ring_idx].next2fill =
1381					rq->rx_ring[ring_idx].next2comp = 0;
1382	}
1383
1384	rq->comp_ring.gen = VMXNET3_INIT_GEN;
1385	rq->comp_ring.next2proc = 0;
1386}
1387
1388
1389static void
1390vmxnet3_rq_cleanup_all(struct vmxnet3_adapter *adapter)
1391{
1392	int i;
1393
1394	for (i = 0; i < adapter->num_rx_queues; i++)
1395		vmxnet3_rq_cleanup(&adapter->rx_queue[i], adapter);
1396}
1397
1398
1399static void vmxnet3_rq_destroy(struct vmxnet3_rx_queue *rq,
1400			       struct vmxnet3_adapter *adapter)
1401{
1402	int i;
1403	int j;
1404
1405	/* all rx buffers must have already been freed */
1406	for (i = 0; i < 2; i++) {
1407		if (rq->buf_info[i]) {
1408			for (j = 0; j < rq->rx_ring[i].size; j++)
1409				BUG_ON(rq->buf_info[i][j].page != NULL);
1410		}
1411	}
1412
1413
1414	for (i = 0; i < 2; i++) {
1415		if (rq->rx_ring[i].base) {
1416			dma_free_coherent(&adapter->pdev->dev,
1417					  rq->rx_ring[i].size
1418					  * sizeof(struct Vmxnet3_RxDesc),
1419					  rq->rx_ring[i].base,
1420					  rq->rx_ring[i].basePA);
1421			rq->rx_ring[i].base = NULL;
1422		}
1423		rq->buf_info[i] = NULL;
1424	}
1425
1426	if (rq->comp_ring.base) {
1427		dma_free_coherent(&adapter->pdev->dev, rq->comp_ring.size
1428				  * sizeof(struct Vmxnet3_RxCompDesc),
1429				  rq->comp_ring.base, rq->comp_ring.basePA);
1430		rq->comp_ring.base = NULL;
1431	}
1432
1433	if (rq->buf_info[0]) {
1434		size_t sz = sizeof(struct vmxnet3_rx_buf_info) *
1435			(rq->rx_ring[0].size + rq->rx_ring[1].size);
1436		dma_free_coherent(&adapter->pdev->dev, sz, rq->buf_info[0],
1437				  rq->buf_info_pa);
1438	}
1439}
1440
1441
1442static int
1443vmxnet3_rq_init(struct vmxnet3_rx_queue *rq,
1444		struct vmxnet3_adapter  *adapter)
1445{
1446	int i;
1447
1448	/* initialize buf_info */
1449	for (i = 0; i < rq->rx_ring[0].size; i++) {
1450
1451		/* 1st buf for a pkt is skbuff */
1452		if (i % adapter->rx_buf_per_pkt == 0) {
1453			rq->buf_info[0][i].buf_type = VMXNET3_RX_BUF_SKB;
1454			rq->buf_info[0][i].len = adapter->skb_buf_size;
1455		} else { /* subsequent bufs for a pkt is frag */
1456			rq->buf_info[0][i].buf_type = VMXNET3_RX_BUF_PAGE;
1457			rq->buf_info[0][i].len = PAGE_SIZE;
1458		}
1459	}
1460	for (i = 0; i < rq->rx_ring[1].size; i++) {
1461		rq->buf_info[1][i].buf_type = VMXNET3_RX_BUF_PAGE;
1462		rq->buf_info[1][i].len = PAGE_SIZE;
1463	}
1464
1465	/* reset internal state and allocate buffers for both rings */
1466	for (i = 0; i < 2; i++) {
1467		rq->rx_ring[i].next2fill = rq->rx_ring[i].next2comp = 0;
1468
1469		memset(rq->rx_ring[i].base, 0, rq->rx_ring[i].size *
1470		       sizeof(struct Vmxnet3_RxDesc));
1471		rq->rx_ring[i].gen = VMXNET3_INIT_GEN;
1472	}
1473	if (vmxnet3_rq_alloc_rx_buf(rq, 0, rq->rx_ring[0].size - 1,
1474				    adapter) == 0) {
1475		/* at least has 1 rx buffer for the 1st ring */
1476		return -ENOMEM;
1477	}
1478	vmxnet3_rq_alloc_rx_buf(rq, 1, rq->rx_ring[1].size - 1, adapter);
1479
1480	/* reset the comp ring */
1481	rq->comp_ring.next2proc = 0;
1482	memset(rq->comp_ring.base, 0, rq->comp_ring.size *
1483	       sizeof(struct Vmxnet3_RxCompDesc));
1484	rq->comp_ring.gen = VMXNET3_INIT_GEN;
1485
1486	/* reset rxctx */
1487	rq->rx_ctx.skb = NULL;
1488
1489	/* stats are not reset */
1490	return 0;
1491}
1492
1493
1494static int
1495vmxnet3_rq_init_all(struct vmxnet3_adapter *adapter)
1496{
1497	int i, err = 0;
1498
1499	for (i = 0; i < adapter->num_rx_queues; i++) {
1500		err = vmxnet3_rq_init(&adapter->rx_queue[i], adapter);
1501		if (unlikely(err)) {
1502			dev_err(&adapter->netdev->dev, "%s: failed to "
1503				"initialize rx queue%i\n",
1504				adapter->netdev->name, i);
1505			break;
1506		}
1507	}
1508	return err;
1509
1510}
1511
1512
1513static int
1514vmxnet3_rq_create(struct vmxnet3_rx_queue *rq, struct vmxnet3_adapter *adapter)
1515{
1516	int i;
1517	size_t sz;
1518	struct vmxnet3_rx_buf_info *bi;
1519
1520	for (i = 0; i < 2; i++) {
1521
1522		sz = rq->rx_ring[i].size * sizeof(struct Vmxnet3_RxDesc);
1523		rq->rx_ring[i].base = dma_alloc_coherent(
1524						&adapter->pdev->dev, sz,
1525						&rq->rx_ring[i].basePA,
1526						GFP_KERNEL);
1527		if (!rq->rx_ring[i].base) {
1528			netdev_err(adapter->netdev,
1529				   "failed to allocate rx ring %d\n", i);
1530			goto err;
1531		}
1532	}
1533
1534	sz = rq->comp_ring.size * sizeof(struct Vmxnet3_RxCompDesc);
1535	rq->comp_ring.base = dma_alloc_coherent(&adapter->pdev->dev, sz,
1536						&rq->comp_ring.basePA,
1537						GFP_KERNEL);
1538	if (!rq->comp_ring.base) {
1539		netdev_err(adapter->netdev, "failed to allocate rx comp ring\n");
1540		goto err;
1541	}
1542
1543	sz = sizeof(struct vmxnet3_rx_buf_info) * (rq->rx_ring[0].size +
1544						   rq->rx_ring[1].size);
1545	bi = dma_zalloc_coherent(&adapter->pdev->dev, sz, &rq->buf_info_pa,
1546				 GFP_KERNEL);
1547	if (!bi)
1548		goto err;
1549
1550	rq->buf_info[0] = bi;
1551	rq->buf_info[1] = bi + rq->rx_ring[0].size;
1552
1553	return 0;
1554
1555err:
1556	vmxnet3_rq_destroy(rq, adapter);
1557	return -ENOMEM;
1558}
1559
1560
1561static int
1562vmxnet3_rq_create_all(struct vmxnet3_adapter *adapter)
1563{
1564	int i, err = 0;
1565
1566	for (i = 0; i < adapter->num_rx_queues; i++) {
1567		err = vmxnet3_rq_create(&adapter->rx_queue[i], adapter);
1568		if (unlikely(err)) {
1569			dev_err(&adapter->netdev->dev,
1570				"%s: failed to create rx queue%i\n",
1571				adapter->netdev->name, i);
1572			goto err_out;
1573		}
1574	}
1575	return err;
1576err_out:
1577	vmxnet3_rq_destroy_all(adapter);
1578	return err;
1579
1580}
1581
1582/* Multiple queue aware polling function for tx and rx */
1583
1584static int
1585vmxnet3_do_poll(struct vmxnet3_adapter *adapter, int budget)
1586{
1587	int rcd_done = 0, i;
1588	if (unlikely(adapter->shared->ecr))
1589		vmxnet3_process_events(adapter);
1590	for (i = 0; i < adapter->num_tx_queues; i++)
1591		vmxnet3_tq_tx_complete(&adapter->tx_queue[i], adapter);
1592
1593	for (i = 0; i < adapter->num_rx_queues; i++)
1594		rcd_done += vmxnet3_rq_rx_complete(&adapter->rx_queue[i],
1595						   adapter, budget);
1596	return rcd_done;
1597}
1598
1599
1600static int
1601vmxnet3_poll(struct napi_struct *napi, int budget)
1602{
1603	struct vmxnet3_rx_queue *rx_queue = container_of(napi,
1604					  struct vmxnet3_rx_queue, napi);
1605	int rxd_done;
1606
1607	rxd_done = vmxnet3_do_poll(rx_queue->adapter, budget);
1608
1609	if (rxd_done < budget) {
1610		napi_complete(napi);
1611		vmxnet3_enable_all_intrs(rx_queue->adapter);
1612	}
1613	return rxd_done;
1614}
1615
1616/*
1617 * NAPI polling function for MSI-X mode with multiple Rx queues
1618 * Returns the # of the NAPI credit consumed (# of rx descriptors processed)
1619 */
1620
1621static int
1622vmxnet3_poll_rx_only(struct napi_struct *napi, int budget)
1623{
1624	struct vmxnet3_rx_queue *rq = container_of(napi,
1625						struct vmxnet3_rx_queue, napi);
1626	struct vmxnet3_adapter *adapter = rq->adapter;
1627	int rxd_done;
1628
1629	/* When sharing interrupt with corresponding tx queue, process
1630	 * tx completions in that queue as well
1631	 */
1632	if (adapter->share_intr == VMXNET3_INTR_BUDDYSHARE) {
1633		struct vmxnet3_tx_queue *tq =
1634				&adapter->tx_queue[rq - adapter->rx_queue];
1635		vmxnet3_tq_tx_complete(tq, adapter);
1636	}
1637
1638	rxd_done = vmxnet3_rq_rx_complete(rq, adapter, budget);
1639
1640	if (rxd_done < budget) {
1641		napi_complete(napi);
1642		vmxnet3_enable_intr(adapter, rq->comp_ring.intr_idx);
1643	}
1644	return rxd_done;
1645}
1646
1647
1648#ifdef CONFIG_PCI_MSI
1649
1650/*
1651 * Handle completion interrupts on tx queues
1652 * Returns whether or not the intr is handled
1653 */
1654
1655static irqreturn_t
1656vmxnet3_msix_tx(int irq, void *data)
1657{
1658	struct vmxnet3_tx_queue *tq = data;
1659	struct vmxnet3_adapter *adapter = tq->adapter;
1660
1661	if (adapter->intr.mask_mode == VMXNET3_IMM_ACTIVE)
1662		vmxnet3_disable_intr(adapter, tq->comp_ring.intr_idx);
1663
1664	/* Handle the case where only one irq is allocate for all tx queues */
1665	if (adapter->share_intr == VMXNET3_INTR_TXSHARE) {
1666		int i;
1667		for (i = 0; i < adapter->num_tx_queues; i++) {
1668			struct vmxnet3_tx_queue *txq = &adapter->tx_queue[i];
1669			vmxnet3_tq_tx_complete(txq, adapter);
1670		}
1671	} else {
1672		vmxnet3_tq_tx_complete(tq, adapter);
1673	}
1674	vmxnet3_enable_intr(adapter, tq->comp_ring.intr_idx);
1675
1676	return IRQ_HANDLED;
1677}
1678
1679
1680/*
1681 * Handle completion interrupts on rx queues. Returns whether or not the
1682 * intr is handled
1683 */
1684
1685static irqreturn_t
1686vmxnet3_msix_rx(int irq, void *data)
1687{
1688	struct vmxnet3_rx_queue *rq = data;
1689	struct vmxnet3_adapter *adapter = rq->adapter;
1690
1691	/* disable intr if needed */
1692	if (adapter->intr.mask_mode == VMXNET3_IMM_ACTIVE)
1693		vmxnet3_disable_intr(adapter, rq->comp_ring.intr_idx);
1694	napi_schedule(&rq->napi);
1695
1696	return IRQ_HANDLED;
1697}
1698
1699/*
1700 *----------------------------------------------------------------------------
1701 *
1702 * vmxnet3_msix_event --
1703 *
1704 *    vmxnet3 msix event intr handler
1705 *
1706 * Result:
1707 *    whether or not the intr is handled
1708 *
1709 *----------------------------------------------------------------------------
1710 */
1711
1712static irqreturn_t
1713vmxnet3_msix_event(int irq, void *data)
1714{
1715	struct net_device *dev = data;
1716	struct vmxnet3_adapter *adapter = netdev_priv(dev);
1717
1718	/* disable intr if needed */
1719	if (adapter->intr.mask_mode == VMXNET3_IMM_ACTIVE)
1720		vmxnet3_disable_intr(adapter, adapter->intr.event_intr_idx);
1721
1722	if (adapter->shared->ecr)
1723		vmxnet3_process_events(adapter);
1724
1725	vmxnet3_enable_intr(adapter, adapter->intr.event_intr_idx);
1726
1727	return IRQ_HANDLED;
1728}
1729
1730#endif /* CONFIG_PCI_MSI  */
1731
1732
1733/* Interrupt handler for vmxnet3  */
1734static irqreturn_t
1735vmxnet3_intr(int irq, void *dev_id)
1736{
1737	struct net_device *dev = dev_id;
1738	struct vmxnet3_adapter *adapter = netdev_priv(dev);
1739
1740	if (adapter->intr.type == VMXNET3_IT_INTX) {
1741		u32 icr = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_ICR);
1742		if (unlikely(icr == 0))
1743			/* not ours */
1744			return IRQ_NONE;
1745	}
1746
1747
1748	/* disable intr if needed */
1749	if (adapter->intr.mask_mode == VMXNET3_IMM_ACTIVE)
1750		vmxnet3_disable_all_intrs(adapter);
1751
1752	napi_schedule(&adapter->rx_queue[0].napi);
1753
1754	return IRQ_HANDLED;
1755}
1756
1757#ifdef CONFIG_NET_POLL_CONTROLLER
1758
1759/* netpoll callback. */
1760static void
1761vmxnet3_netpoll(struct net_device *netdev)
1762{
1763	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
1764
1765	switch (adapter->intr.type) {
1766#ifdef CONFIG_PCI_MSI
1767	case VMXNET3_IT_MSIX: {
1768		int i;
1769		for (i = 0; i < adapter->num_rx_queues; i++)
1770			vmxnet3_msix_rx(0, &adapter->rx_queue[i]);
1771		break;
1772	}
1773#endif
1774	case VMXNET3_IT_MSI:
1775	default:
1776		vmxnet3_intr(0, adapter->netdev);
1777		break;
1778	}
1779
1780}
1781#endif	/* CONFIG_NET_POLL_CONTROLLER */
1782
1783static int
1784vmxnet3_request_irqs(struct vmxnet3_adapter *adapter)
1785{
1786	struct vmxnet3_intr *intr = &adapter->intr;
1787	int err = 0, i;
1788	int vector = 0;
1789
1790#ifdef CONFIG_PCI_MSI
1791	if (adapter->intr.type == VMXNET3_IT_MSIX) {
1792		for (i = 0; i < adapter->num_tx_queues; i++) {
1793			if (adapter->share_intr != VMXNET3_INTR_BUDDYSHARE) {
1794				sprintf(adapter->tx_queue[i].name, "%s-tx-%d",
1795					adapter->netdev->name, vector);
1796				err = request_irq(
1797					      intr->msix_entries[vector].vector,
1798					      vmxnet3_msix_tx, 0,
1799					      adapter->tx_queue[i].name,
1800					      &adapter->tx_queue[i]);
1801			} else {
1802				sprintf(adapter->tx_queue[i].name, "%s-rxtx-%d",
1803					adapter->netdev->name, vector);
1804			}
1805			if (err) {
1806				dev_err(&adapter->netdev->dev,
1807					"Failed to request irq for MSIX, %s, "
1808					"error %d\n",
1809					adapter->tx_queue[i].name, err);
1810				return err;
1811			}
1812
1813			/* Handle the case where only 1 MSIx was allocated for
1814			 * all tx queues */
1815			if (adapter->share_intr == VMXNET3_INTR_TXSHARE) {
1816				for (; i < adapter->num_tx_queues; i++)
1817					adapter->tx_queue[i].comp_ring.intr_idx
1818								= vector;
1819				vector++;
1820				break;
1821			} else {
1822				adapter->tx_queue[i].comp_ring.intr_idx
1823								= vector++;
1824			}
1825		}
1826		if (adapter->share_intr == VMXNET3_INTR_BUDDYSHARE)
1827			vector = 0;
1828
1829		for (i = 0; i < adapter->num_rx_queues; i++) {
1830			if (adapter->share_intr != VMXNET3_INTR_BUDDYSHARE)
1831				sprintf(adapter->rx_queue[i].name, "%s-rx-%d",
1832					adapter->netdev->name, vector);
1833			else
1834				sprintf(adapter->rx_queue[i].name, "%s-rxtx-%d",
1835					adapter->netdev->name, vector);
1836			err = request_irq(intr->msix_entries[vector].vector,
1837					  vmxnet3_msix_rx, 0,
1838					  adapter->rx_queue[i].name,
1839					  &(adapter->rx_queue[i]));
1840			if (err) {
1841				netdev_err(adapter->netdev,
1842					   "Failed to request irq for MSIX, "
1843					   "%s, error %d\n",
1844					   adapter->rx_queue[i].name, err);
1845				return err;
1846			}
1847
1848			adapter->rx_queue[i].comp_ring.intr_idx = vector++;
1849		}
1850
1851		sprintf(intr->event_msi_vector_name, "%s-event-%d",
1852			adapter->netdev->name, vector);
1853		err = request_irq(intr->msix_entries[vector].vector,
1854				  vmxnet3_msix_event, 0,
1855				  intr->event_msi_vector_name, adapter->netdev);
1856		intr->event_intr_idx = vector;
1857
1858	} else if (intr->type == VMXNET3_IT_MSI) {
1859		adapter->num_rx_queues = 1;
1860		err = request_irq(adapter->pdev->irq, vmxnet3_intr, 0,
1861				  adapter->netdev->name, adapter->netdev);
1862	} else {
1863#endif
1864		adapter->num_rx_queues = 1;
1865		err = request_irq(adapter->pdev->irq, vmxnet3_intr,
1866				  IRQF_SHARED, adapter->netdev->name,
1867				  adapter->netdev);
1868#ifdef CONFIG_PCI_MSI
1869	}
1870#endif
1871	intr->num_intrs = vector + 1;
1872	if (err) {
1873		netdev_err(adapter->netdev,
1874			   "Failed to request irq (intr type:%d), error %d\n",
1875			   intr->type, err);
1876	} else {
1877		/* Number of rx queues will not change after this */
1878		for (i = 0; i < adapter->num_rx_queues; i++) {
1879			struct vmxnet3_rx_queue *rq = &adapter->rx_queue[i];
1880			rq->qid = i;
1881			rq->qid2 = i + adapter->num_rx_queues;
1882		}
1883
1884
1885
1886		/* init our intr settings */
1887		for (i = 0; i < intr->num_intrs; i++)
1888			intr->mod_levels[i] = UPT1_IML_ADAPTIVE;
1889		if (adapter->intr.type != VMXNET3_IT_MSIX) {
1890			adapter->intr.event_intr_idx = 0;
1891			for (i = 0; i < adapter->num_tx_queues; i++)
1892				adapter->tx_queue[i].comp_ring.intr_idx = 0;
1893			adapter->rx_queue[0].comp_ring.intr_idx = 0;
1894		}
1895
1896		netdev_info(adapter->netdev,
1897			    "intr type %u, mode %u, %u vectors allocated\n",
1898			    intr->type, intr->mask_mode, intr->num_intrs);
1899	}
1900
1901	return err;
1902}
1903
1904
1905static void
1906vmxnet3_free_irqs(struct vmxnet3_adapter *adapter)
1907{
1908	struct vmxnet3_intr *intr = &adapter->intr;
1909	BUG_ON(intr->type == VMXNET3_IT_AUTO || intr->num_intrs <= 0);
1910
1911	switch (intr->type) {
1912#ifdef CONFIG_PCI_MSI
1913	case VMXNET3_IT_MSIX:
1914	{
1915		int i, vector = 0;
1916
1917		if (adapter->share_intr != VMXNET3_INTR_BUDDYSHARE) {
1918			for (i = 0; i < adapter->num_tx_queues; i++) {
1919				free_irq(intr->msix_entries[vector++].vector,
1920					 &(adapter->tx_queue[i]));
1921				if (adapter->share_intr == VMXNET3_INTR_TXSHARE)
1922					break;
1923			}
1924		}
1925
1926		for (i = 0; i < adapter->num_rx_queues; i++) {
1927			free_irq(intr->msix_entries[vector++].vector,
1928				 &(adapter->rx_queue[i]));
1929		}
1930
1931		free_irq(intr->msix_entries[vector].vector,
1932			 adapter->netdev);
1933		BUG_ON(vector >= intr->num_intrs);
1934		break;
1935	}
1936#endif
1937	case VMXNET3_IT_MSI:
1938		free_irq(adapter->pdev->irq, adapter->netdev);
1939		break;
1940	case VMXNET3_IT_INTX:
1941		free_irq(adapter->pdev->irq, adapter->netdev);
1942		break;
1943	default:
1944		BUG();
1945	}
1946}
1947
1948
1949static void
1950vmxnet3_restore_vlan(struct vmxnet3_adapter *adapter)
1951{
1952	u32 *vfTable = adapter->shared->devRead.rxFilterConf.vfTable;
1953	u16 vid;
1954
1955	/* allow untagged pkts */
1956	VMXNET3_SET_VFTABLE_ENTRY(vfTable, 0);
1957
1958	for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID)
1959		VMXNET3_SET_VFTABLE_ENTRY(vfTable, vid);
1960}
1961
1962
1963static int
1964vmxnet3_vlan_rx_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
1965{
1966	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
1967
1968	if (!(netdev->flags & IFF_PROMISC)) {
1969		u32 *vfTable = adapter->shared->devRead.rxFilterConf.vfTable;
1970		unsigned long flags;
1971
1972		VMXNET3_SET_VFTABLE_ENTRY(vfTable, vid);
1973		spin_lock_irqsave(&adapter->cmd_lock, flags);
1974		VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
1975				       VMXNET3_CMD_UPDATE_VLAN_FILTERS);
1976		spin_unlock_irqrestore(&adapter->cmd_lock, flags);
1977	}
1978
1979	set_bit(vid, adapter->active_vlans);
1980
1981	return 0;
1982}
1983
1984
1985static int
1986vmxnet3_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto, u16 vid)
1987{
1988	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
1989
1990	if (!(netdev->flags & IFF_PROMISC)) {
1991		u32 *vfTable = adapter->shared->devRead.rxFilterConf.vfTable;
1992		unsigned long flags;
1993
1994		VMXNET3_CLEAR_VFTABLE_ENTRY(vfTable, vid);
1995		spin_lock_irqsave(&adapter->cmd_lock, flags);
1996		VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
1997				       VMXNET3_CMD_UPDATE_VLAN_FILTERS);
1998		spin_unlock_irqrestore(&adapter->cmd_lock, flags);
1999	}
2000
2001	clear_bit(vid, adapter->active_vlans);
2002
2003	return 0;
2004}
2005
2006
2007static u8 *
2008vmxnet3_copy_mc(struct net_device *netdev)
2009{
2010	u8 *buf = NULL;
2011	u32 sz = netdev_mc_count(netdev) * ETH_ALEN;
2012
2013	/* struct Vmxnet3_RxFilterConf.mfTableLen is u16. */
2014	if (sz <= 0xffff) {
2015		/* We may be called with BH disabled */
2016		buf = kmalloc(sz, GFP_ATOMIC);
2017		if (buf) {
2018			struct netdev_hw_addr *ha;
2019			int i = 0;
2020
2021			netdev_for_each_mc_addr(ha, netdev)
2022				memcpy(buf + i++ * ETH_ALEN, ha->addr,
2023				       ETH_ALEN);
2024		}
2025	}
2026	return buf;
2027}
2028
2029
2030static void
2031vmxnet3_set_mc(struct net_device *netdev)
2032{
2033	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
2034	unsigned long flags;
2035	struct Vmxnet3_RxFilterConf *rxConf =
2036					&adapter->shared->devRead.rxFilterConf;
2037	u8 *new_table = NULL;
2038	dma_addr_t new_table_pa = 0;
2039	u32 new_mode = VMXNET3_RXM_UCAST;
2040
2041	if (netdev->flags & IFF_PROMISC) {
2042		u32 *vfTable = adapter->shared->devRead.rxFilterConf.vfTable;
2043		memset(vfTable, 0, VMXNET3_VFT_SIZE * sizeof(*vfTable));
2044
2045		new_mode |= VMXNET3_RXM_PROMISC;
2046	} else {
2047		vmxnet3_restore_vlan(adapter);
2048	}
2049
2050	if (netdev->flags & IFF_BROADCAST)
2051		new_mode |= VMXNET3_RXM_BCAST;
2052
2053	if (netdev->flags & IFF_ALLMULTI)
2054		new_mode |= VMXNET3_RXM_ALL_MULTI;
2055	else
2056		if (!netdev_mc_empty(netdev)) {
2057			new_table = vmxnet3_copy_mc(netdev);
2058			if (new_table) {
2059				rxConf->mfTableLen = cpu_to_le16(
2060					netdev_mc_count(netdev) * ETH_ALEN);
2061				new_table_pa = dma_map_single(
2062							&adapter->pdev->dev,
2063							new_table,
2064							rxConf->mfTableLen,
2065							PCI_DMA_TODEVICE);
2066			}
2067
2068			if (new_table_pa) {
2069				new_mode |= VMXNET3_RXM_MCAST;
2070				rxConf->mfTablePA = cpu_to_le64(new_table_pa);
2071			} else {
2072				netdev_info(netdev,
2073					    "failed to copy mcast list, setting ALL_MULTI\n");
2074				new_mode |= VMXNET3_RXM_ALL_MULTI;
2075			}
2076		}
2077
2078	if (!(new_mode & VMXNET3_RXM_MCAST)) {
2079		rxConf->mfTableLen = 0;
2080		rxConf->mfTablePA = 0;
2081	}
2082
2083	spin_lock_irqsave(&adapter->cmd_lock, flags);
2084	if (new_mode != rxConf->rxMode) {
2085		rxConf->rxMode = cpu_to_le32(new_mode);
2086		VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
2087				       VMXNET3_CMD_UPDATE_RX_MODE);
2088		VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
2089				       VMXNET3_CMD_UPDATE_VLAN_FILTERS);
2090	}
2091
2092	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
2093			       VMXNET3_CMD_UPDATE_MAC_FILTERS);
2094	spin_unlock_irqrestore(&adapter->cmd_lock, flags);
2095
2096	if (new_table_pa)
2097		dma_unmap_single(&adapter->pdev->dev, new_table_pa,
2098				 rxConf->mfTableLen, PCI_DMA_TODEVICE);
2099	kfree(new_table);
2100}
2101
2102void
2103vmxnet3_rq_destroy_all(struct vmxnet3_adapter *adapter)
2104{
2105	int i;
2106
2107	for (i = 0; i < adapter->num_rx_queues; i++)
2108		vmxnet3_rq_destroy(&adapter->rx_queue[i], adapter);
2109}
2110
2111
2112/*
2113 *   Set up driver_shared based on settings in adapter.
2114 */
2115
2116static void
2117vmxnet3_setup_driver_shared(struct vmxnet3_adapter *adapter)
2118{
2119	struct Vmxnet3_DriverShared *shared = adapter->shared;
2120	struct Vmxnet3_DSDevRead *devRead = &shared->devRead;
2121	struct Vmxnet3_TxQueueConf *tqc;
2122	struct Vmxnet3_RxQueueConf *rqc;
2123	int i;
2124
2125	memset(shared, 0, sizeof(*shared));
2126
2127	/* driver settings */
2128	shared->magic = cpu_to_le32(VMXNET3_REV1_MAGIC);
2129	devRead->misc.driverInfo.version = cpu_to_le32(
2130						VMXNET3_DRIVER_VERSION_NUM);
2131	devRead->misc.driverInfo.gos.gosBits = (sizeof(void *) == 4 ?
2132				VMXNET3_GOS_BITS_32 : VMXNET3_GOS_BITS_64);
2133	devRead->misc.driverInfo.gos.gosType = VMXNET3_GOS_TYPE_LINUX;
2134	*((u32 *)&devRead->misc.driverInfo.gos) = cpu_to_le32(
2135				*((u32 *)&devRead->misc.driverInfo.gos));
2136	devRead->misc.driverInfo.vmxnet3RevSpt = cpu_to_le32(1);
2137	devRead->misc.driverInfo.uptVerSpt = cpu_to_le32(1);
2138
2139	devRead->misc.ddPA = cpu_to_le64(adapter->adapter_pa);
2140	devRead->misc.ddLen = cpu_to_le32(sizeof(struct vmxnet3_adapter));
2141
2142	/* set up feature flags */
2143	if (adapter->netdev->features & NETIF_F_RXCSUM)
2144		devRead->misc.uptFeatures |= UPT1_F_RXCSUM;
2145
2146	if (adapter->netdev->features & NETIF_F_LRO) {
2147		devRead->misc.uptFeatures |= UPT1_F_LRO;
2148		devRead->misc.maxNumRxSG = cpu_to_le16(1 + MAX_SKB_FRAGS);
2149	}
2150	if (adapter->netdev->features & NETIF_F_HW_VLAN_CTAG_RX)
2151		devRead->misc.uptFeatures |= UPT1_F_RXVLAN;
2152
2153	devRead->misc.mtu = cpu_to_le32(adapter->netdev->mtu);
2154	devRead->misc.queueDescPA = cpu_to_le64(adapter->queue_desc_pa);
2155	devRead->misc.queueDescLen = cpu_to_le32(
2156		adapter->num_tx_queues * sizeof(struct Vmxnet3_TxQueueDesc) +
2157		adapter->num_rx_queues * sizeof(struct Vmxnet3_RxQueueDesc));
2158
2159	/* tx queue settings */
2160	devRead->misc.numTxQueues =  adapter->num_tx_queues;
2161	for (i = 0; i < adapter->num_tx_queues; i++) {
2162		struct vmxnet3_tx_queue	*tq = &adapter->tx_queue[i];
2163		BUG_ON(adapter->tx_queue[i].tx_ring.base == NULL);
2164		tqc = &adapter->tqd_start[i].conf;
2165		tqc->txRingBasePA   = cpu_to_le64(tq->tx_ring.basePA);
2166		tqc->dataRingBasePA = cpu_to_le64(tq->data_ring.basePA);
2167		tqc->compRingBasePA = cpu_to_le64(tq->comp_ring.basePA);
2168		tqc->ddPA           = cpu_to_le64(tq->buf_info_pa);
2169		tqc->txRingSize     = cpu_to_le32(tq->tx_ring.size);
2170		tqc->dataRingSize   = cpu_to_le32(tq->data_ring.size);
2171		tqc->compRingSize   = cpu_to_le32(tq->comp_ring.size);
2172		tqc->ddLen          = cpu_to_le32(
2173					sizeof(struct vmxnet3_tx_buf_info) *
2174					tqc->txRingSize);
2175		tqc->intrIdx        = tq->comp_ring.intr_idx;
2176	}
2177
2178	/* rx queue settings */
2179	devRead->misc.numRxQueues = adapter->num_rx_queues;
2180	for (i = 0; i < adapter->num_rx_queues; i++) {
2181		struct vmxnet3_rx_queue	*rq = &adapter->rx_queue[i];
2182		rqc = &adapter->rqd_start[i].conf;
2183		rqc->rxRingBasePA[0] = cpu_to_le64(rq->rx_ring[0].basePA);
2184		rqc->rxRingBasePA[1] = cpu_to_le64(rq->rx_ring[1].basePA);
2185		rqc->compRingBasePA  = cpu_to_le64(rq->comp_ring.basePA);
2186		rqc->ddPA            = cpu_to_le64(rq->buf_info_pa);
2187		rqc->rxRingSize[0]   = cpu_to_le32(rq->rx_ring[0].size);
2188		rqc->rxRingSize[1]   = cpu_to_le32(rq->rx_ring[1].size);
2189		rqc->compRingSize    = cpu_to_le32(rq->comp_ring.size);
2190		rqc->ddLen           = cpu_to_le32(
2191					sizeof(struct vmxnet3_rx_buf_info) *
2192					(rqc->rxRingSize[0] +
2193					 rqc->rxRingSize[1]));
2194		rqc->intrIdx         = rq->comp_ring.intr_idx;
2195	}
2196
2197#ifdef VMXNET3_RSS
2198	memset(adapter->rss_conf, 0, sizeof(*adapter->rss_conf));
2199
2200	if (adapter->rss) {
2201		struct UPT1_RSSConf *rssConf = adapter->rss_conf;
2202		static const uint8_t rss_key[UPT1_RSS_MAX_KEY_SIZE] = {
2203			0x3b, 0x56, 0xd1, 0x56, 0x13, 0x4a, 0xe7, 0xac,
2204			0xe8, 0x79, 0x09, 0x75, 0xe8, 0x65, 0x79, 0x28,
2205			0x35, 0x12, 0xb9, 0x56, 0x7c, 0x76, 0x4b, 0x70,
2206			0xd8, 0x56, 0xa3, 0x18, 0x9b, 0x0a, 0xee, 0xf3,
2207			0x96, 0xa6, 0x9f, 0x8f, 0x9e, 0x8c, 0x90, 0xc9,
2208		};
2209
2210		devRead->misc.uptFeatures |= UPT1_F_RSS;
2211		devRead->misc.numRxQueues = adapter->num_rx_queues;
2212		rssConf->hashType = UPT1_RSS_HASH_TYPE_TCP_IPV4 |
2213				    UPT1_RSS_HASH_TYPE_IPV4 |
2214				    UPT1_RSS_HASH_TYPE_TCP_IPV6 |
2215				    UPT1_RSS_HASH_TYPE_IPV6;
2216		rssConf->hashFunc = UPT1_RSS_HASH_FUNC_TOEPLITZ;
2217		rssConf->hashKeySize = UPT1_RSS_MAX_KEY_SIZE;
2218		rssConf->indTableSize = VMXNET3_RSS_IND_TABLE_SIZE;
2219		memcpy(rssConf->hashKey, rss_key, sizeof(rss_key));
2220
2221		for (i = 0; i < rssConf->indTableSize; i++)
2222			rssConf->indTable[i] = ethtool_rxfh_indir_default(
2223				i, adapter->num_rx_queues);
2224
2225		devRead->rssConfDesc.confVer = 1;
2226		devRead->rssConfDesc.confLen = cpu_to_le32(sizeof(*rssConf));
2227		devRead->rssConfDesc.confPA =
2228			cpu_to_le64(adapter->rss_conf_pa);
2229	}
2230
2231#endif /* VMXNET3_RSS */
2232
2233	/* intr settings */
2234	devRead->intrConf.autoMask = adapter->intr.mask_mode ==
2235				     VMXNET3_IMM_AUTO;
2236	devRead->intrConf.numIntrs = adapter->intr.num_intrs;
2237	for (i = 0; i < adapter->intr.num_intrs; i++)
2238		devRead->intrConf.modLevels[i] = adapter->intr.mod_levels[i];
2239
2240	devRead->intrConf.eventIntrIdx = adapter->intr.event_intr_idx;
2241	devRead->intrConf.intrCtrl |= cpu_to_le32(VMXNET3_IC_DISABLE_ALL);
2242
2243	/* rx filter settings */
2244	devRead->rxFilterConf.rxMode = 0;
2245	vmxnet3_restore_vlan(adapter);
2246	vmxnet3_write_mac_addr(adapter, adapter->netdev->dev_addr);
2247
2248	/* the rest are already zeroed */
2249}
2250
2251
2252int
2253vmxnet3_activate_dev(struct vmxnet3_adapter *adapter)
2254{
2255	int err, i;
2256	u32 ret;
2257	unsigned long flags;
2258
2259	netdev_dbg(adapter->netdev, "%s: skb_buf_size %d, rx_buf_per_pkt %d,"
2260		" ring sizes %u %u %u\n", adapter->netdev->name,
2261		adapter->skb_buf_size, adapter->rx_buf_per_pkt,
2262		adapter->tx_queue[0].tx_ring.size,
2263		adapter->rx_queue[0].rx_ring[0].size,
2264		adapter->rx_queue[0].rx_ring[1].size);
2265
2266	vmxnet3_tq_init_all(adapter);
2267	err = vmxnet3_rq_init_all(adapter);
2268	if (err) {
2269		netdev_err(adapter->netdev,
2270			   "Failed to init rx queue error %d\n", err);
2271		goto rq_err;
2272	}
2273
2274	err = vmxnet3_request_irqs(adapter);
2275	if (err) {
2276		netdev_err(adapter->netdev,
2277			   "Failed to setup irq for error %d\n", err);
2278		goto irq_err;
2279	}
2280
2281	vmxnet3_setup_driver_shared(adapter);
2282
2283	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_DSAL, VMXNET3_GET_ADDR_LO(
2284			       adapter->shared_pa));
2285	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_DSAH, VMXNET3_GET_ADDR_HI(
2286			       adapter->shared_pa));
2287	spin_lock_irqsave(&adapter->cmd_lock, flags);
2288	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
2289			       VMXNET3_CMD_ACTIVATE_DEV);
2290	ret = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD);
2291	spin_unlock_irqrestore(&adapter->cmd_lock, flags);
2292
2293	if (ret != 0) {
2294		netdev_err(adapter->netdev,
2295			   "Failed to activate dev: error %u\n", ret);
2296		err = -EINVAL;
2297		goto activate_err;
2298	}
2299
2300	for (i = 0; i < adapter->num_rx_queues; i++) {
2301		VMXNET3_WRITE_BAR0_REG(adapter,
2302				VMXNET3_REG_RXPROD + i * VMXNET3_REG_ALIGN,
2303				adapter->rx_queue[i].rx_ring[0].next2fill);
2304		VMXNET3_WRITE_BAR0_REG(adapter, (VMXNET3_REG_RXPROD2 +
2305				(i * VMXNET3_REG_ALIGN)),
2306				adapter->rx_queue[i].rx_ring[1].next2fill);
2307	}
2308
2309	/* Apply the rx filter settins last. */
2310	vmxnet3_set_mc(adapter->netdev);
2311
2312	/*
2313	 * Check link state when first activating device. It will start the
2314	 * tx queue if the link is up.
2315	 */
2316	vmxnet3_check_link(adapter, true);
2317	for (i = 0; i < adapter->num_rx_queues; i++)
2318		napi_enable(&adapter->rx_queue[i].napi);
2319	vmxnet3_enable_all_intrs(adapter);
2320	clear_bit(VMXNET3_STATE_BIT_QUIESCED, &adapter->state);
2321	return 0;
2322
2323activate_err:
2324	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_DSAL, 0);
2325	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_DSAH, 0);
2326	vmxnet3_free_irqs(adapter);
2327irq_err:
2328rq_err:
2329	/* free up buffers we allocated */
2330	vmxnet3_rq_cleanup_all(adapter);
2331	return err;
2332}
2333
2334
2335void
2336vmxnet3_reset_dev(struct vmxnet3_adapter *adapter)
2337{
2338	unsigned long flags;
2339	spin_lock_irqsave(&adapter->cmd_lock, flags);
2340	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, VMXNET3_CMD_RESET_DEV);
2341	spin_unlock_irqrestore(&adapter->cmd_lock, flags);
2342}
2343
2344
2345int
2346vmxnet3_quiesce_dev(struct vmxnet3_adapter *adapter)
2347{
2348	int i;
2349	unsigned long flags;
2350	if (test_and_set_bit(VMXNET3_STATE_BIT_QUIESCED, &adapter->state))
2351		return 0;
2352
2353
2354	spin_lock_irqsave(&adapter->cmd_lock, flags);
2355	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
2356			       VMXNET3_CMD_QUIESCE_DEV);
2357	spin_unlock_irqrestore(&adapter->cmd_lock, flags);
2358	vmxnet3_disable_all_intrs(adapter);
2359
2360	for (i = 0; i < adapter->num_rx_queues; i++)
2361		napi_disable(&adapter->rx_queue[i].napi);
2362	netif_tx_disable(adapter->netdev);
2363	adapter->link_speed = 0;
2364	netif_carrier_off(adapter->netdev);
2365
2366	vmxnet3_tq_cleanup_all(adapter);
2367	vmxnet3_rq_cleanup_all(adapter);
2368	vmxnet3_free_irqs(adapter);
2369	return 0;
2370}
2371
2372
2373static void
2374vmxnet3_write_mac_addr(struct vmxnet3_adapter *adapter, u8 *mac)
2375{
2376	u32 tmp;
2377
2378	tmp = *(u32 *)mac;
2379	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_MACL, tmp);
2380
2381	tmp = (mac[5] << 8) | mac[4];
2382	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_MACH, tmp);
2383}
2384
2385
2386static int
2387vmxnet3_set_mac_addr(struct net_device *netdev, void *p)
2388{
2389	struct sockaddr *addr = p;
2390	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
2391
2392	memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
2393	vmxnet3_write_mac_addr(adapter, addr->sa_data);
2394
2395	return 0;
2396}
2397
2398
2399/* ==================== initialization and cleanup routines ============ */
2400
2401static int
2402vmxnet3_alloc_pci_resources(struct vmxnet3_adapter *adapter, bool *dma64)
2403{
2404	int err;
2405	unsigned long mmio_start, mmio_len;
2406	struct pci_dev *pdev = adapter->pdev;
2407
2408	err = pci_enable_device(pdev);
2409	if (err) {
2410		dev_err(&pdev->dev, "Failed to enable adapter: error %d\n", err);
2411		return err;
2412	}
2413
2414	if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64)) == 0) {
2415		if (pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)) != 0) {
2416			dev_err(&pdev->dev,
2417				"pci_set_consistent_dma_mask failed\n");
2418			err = -EIO;
2419			goto err_set_mask;
2420		}
2421		*dma64 = true;
2422	} else {
2423		if (pci_set_dma_mask(pdev, DMA_BIT_MASK(32)) != 0) {
2424			dev_err(&pdev->dev,
2425				"pci_set_dma_mask failed\n");
2426			err = -EIO;
2427			goto err_set_mask;
2428		}
2429		*dma64 = false;
2430	}
2431
2432	err = pci_request_selected_regions(pdev, (1 << 2) - 1,
2433					   vmxnet3_driver_name);
2434	if (err) {
2435		dev_err(&pdev->dev,
2436			"Failed to request region for adapter: error %d\n", err);
2437		goto err_set_mask;
2438	}
2439
2440	pci_set_master(pdev);
2441
2442	mmio_start = pci_resource_start(pdev, 0);
2443	mmio_len = pci_resource_len(pdev, 0);
2444	adapter->hw_addr0 = ioremap(mmio_start, mmio_len);
2445	if (!adapter->hw_addr0) {
2446		dev_err(&pdev->dev, "Failed to map bar0\n");
2447		err = -EIO;
2448		goto err_ioremap;
2449	}
2450
2451	mmio_start = pci_resource_start(pdev, 1);
2452	mmio_len = pci_resource_len(pdev, 1);
2453	adapter->hw_addr1 = ioremap(mmio_start, mmio_len);
2454	if (!adapter->hw_addr1) {
2455		dev_err(&pdev->dev, "Failed to map bar1\n");
2456		err = -EIO;
2457		goto err_bar1;
2458	}
2459	return 0;
2460
2461err_bar1:
2462	iounmap(adapter->hw_addr0);
2463err_ioremap:
2464	pci_release_selected_regions(pdev, (1 << 2) - 1);
2465err_set_mask:
2466	pci_disable_device(pdev);
2467	return err;
2468}
2469
2470
2471static void
2472vmxnet3_free_pci_resources(struct vmxnet3_adapter *adapter)
2473{
2474	BUG_ON(!adapter->pdev);
2475
2476	iounmap(adapter->hw_addr0);
2477	iounmap(adapter->hw_addr1);
2478	pci_release_selected_regions(adapter->pdev, (1 << 2) - 1);
2479	pci_disable_device(adapter->pdev);
2480}
2481
2482
2483static void
2484vmxnet3_adjust_rx_ring_size(struct vmxnet3_adapter *adapter)
2485{
2486	size_t sz, i, ring0_size, ring1_size, comp_size;
2487	struct vmxnet3_rx_queue	*rq = &adapter->rx_queue[0];
2488
2489
2490	if (adapter->netdev->mtu <= VMXNET3_MAX_SKB_BUF_SIZE -
2491				    VMXNET3_MAX_ETH_HDR_SIZE) {
2492		adapter->skb_buf_size = adapter->netdev->mtu +
2493					VMXNET3_MAX_ETH_HDR_SIZE;
2494		if (adapter->skb_buf_size < VMXNET3_MIN_T0_BUF_SIZE)
2495			adapter->skb_buf_size = VMXNET3_MIN_T0_BUF_SIZE;
2496
2497		adapter->rx_buf_per_pkt = 1;
2498	} else {
2499		adapter->skb_buf_size = VMXNET3_MAX_SKB_BUF_SIZE;
2500		sz = adapter->netdev->mtu - VMXNET3_MAX_SKB_BUF_SIZE +
2501					    VMXNET3_MAX_ETH_HDR_SIZE;
2502		adapter->rx_buf_per_pkt = 1 + (sz + PAGE_SIZE - 1) / PAGE_SIZE;
2503	}
2504
2505	/*
2506	 * for simplicity, force the ring0 size to be a multiple of
2507	 * rx_buf_per_pkt * VMXNET3_RING_SIZE_ALIGN
2508	 */
2509	sz = adapter->rx_buf_per_pkt * VMXNET3_RING_SIZE_ALIGN;
2510	ring0_size = adapter->rx_queue[0].rx_ring[0].size;
2511	ring0_size = (ring0_size + sz - 1) / sz * sz;
2512	ring0_size = min_t(u32, ring0_size, VMXNET3_RX_RING_MAX_SIZE /
2513			   sz * sz);
2514	ring1_size = adapter->rx_queue[0].rx_ring[1].size;
2515	comp_size = ring0_size + ring1_size;
2516
2517	for (i = 0; i < adapter->num_rx_queues; i++) {
2518		rq = &adapter->rx_queue[i];
2519		rq->rx_ring[0].size = ring0_size;
2520		rq->rx_ring[1].size = ring1_size;
2521		rq->comp_ring.size = comp_size;
2522	}
2523}
2524
2525
2526int
2527vmxnet3_create_queues(struct vmxnet3_adapter *adapter, u32 tx_ring_size,
2528		      u32 rx_ring_size, u32 rx_ring2_size)
2529{
2530	int err = 0, i;
2531
2532	for (i = 0; i < adapter->num_tx_queues; i++) {
2533		struct vmxnet3_tx_queue	*tq = &adapter->tx_queue[i];
2534		tq->tx_ring.size   = tx_ring_size;
2535		tq->data_ring.size = tx_ring_size;
2536		tq->comp_ring.size = tx_ring_size;
2537		tq->shared = &adapter->tqd_start[i].ctrl;
2538		tq->stopped = true;
2539		tq->adapter = adapter;
2540		tq->qid = i;
2541		err = vmxnet3_tq_create(tq, adapter);
2542		/*
2543		 * Too late to change num_tx_queues. We cannot do away with
2544		 * lesser number of queues than what we asked for
2545		 */
2546		if (err)
2547			goto queue_err;
2548	}
2549
2550	adapter->rx_queue[0].rx_ring[0].size = rx_ring_size;
2551	adapter->rx_queue[0].rx_ring[1].size = rx_ring2_size;
2552	vmxnet3_adjust_rx_ring_size(adapter);
2553	for (i = 0; i < adapter->num_rx_queues; i++) {
2554		struct vmxnet3_rx_queue *rq = &adapter->rx_queue[i];
2555		/* qid and qid2 for rx queues will be assigned later when num
2556		 * of rx queues is finalized after allocating intrs */
2557		rq->shared = &adapter->rqd_start[i].ctrl;
2558		rq->adapter = adapter;
2559		err = vmxnet3_rq_create(rq, adapter);
2560		if (err) {
2561			if (i == 0) {
2562				netdev_err(adapter->netdev,
2563					   "Could not allocate any rx queues. "
2564					   "Aborting.\n");
2565				goto queue_err;
2566			} else {
2567				netdev_info(adapter->netdev,
2568					    "Number of rx queues changed "
2569					    "to : %d.\n", i);
2570				adapter->num_rx_queues = i;
2571				err = 0;
2572				break;
2573			}
2574		}
2575	}
2576	return err;
2577queue_err:
2578	vmxnet3_tq_destroy_all(adapter);
2579	return err;
2580}
2581
2582static int
2583vmxnet3_open(struct net_device *netdev)
2584{
2585	struct vmxnet3_adapter *adapter;
2586	int err, i;
2587
2588	adapter = netdev_priv(netdev);
2589
2590	for (i = 0; i < adapter->num_tx_queues; i++)
2591		spin_lock_init(&adapter->tx_queue[i].tx_lock);
2592
2593	err = vmxnet3_create_queues(adapter, adapter->tx_ring_size,
2594				    adapter->rx_ring_size,
2595				    VMXNET3_DEF_RX_RING_SIZE);
2596	if (err)
2597		goto queue_err;
2598
2599	err = vmxnet3_activate_dev(adapter);
2600	if (err)
2601		goto activate_err;
2602
2603	return 0;
2604
2605activate_err:
2606	vmxnet3_rq_destroy_all(adapter);
2607	vmxnet3_tq_destroy_all(adapter);
2608queue_err:
2609	return err;
2610}
2611
2612
2613static int
2614vmxnet3_close(struct net_device *netdev)
2615{
2616	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
2617
2618	/*
2619	 * Reset_work may be in the middle of resetting the device, wait for its
2620	 * completion.
2621	 */
2622	while (test_and_set_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state))
2623		msleep(1);
2624
2625	vmxnet3_quiesce_dev(adapter);
2626
2627	vmxnet3_rq_destroy_all(adapter);
2628	vmxnet3_tq_destroy_all(adapter);
2629
2630	clear_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state);
2631
2632
2633	return 0;
2634}
2635
2636
2637void
2638vmxnet3_force_close(struct vmxnet3_adapter *adapter)
2639{
2640	int i;
2641
2642	/*
2643	 * we must clear VMXNET3_STATE_BIT_RESETTING, otherwise
2644	 * vmxnet3_close() will deadlock.
2645	 */
2646	BUG_ON(test_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state));
2647
2648	/* we need to enable NAPI, otherwise dev_close will deadlock */
2649	for (i = 0; i < adapter->num_rx_queues; i++)
2650		napi_enable(&adapter->rx_queue[i].napi);
2651	dev_close(adapter->netdev);
2652}
2653
2654
2655static int
2656vmxnet3_change_mtu(struct net_device *netdev, int new_mtu)
2657{
2658	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
2659	int err = 0;
2660
2661	if (new_mtu < VMXNET3_MIN_MTU || new_mtu > VMXNET3_MAX_MTU)
2662		return -EINVAL;
2663
2664	netdev->mtu = new_mtu;
2665
2666	/*
2667	 * Reset_work may be in the middle of resetting the device, wait for its
2668	 * completion.
2669	 */
2670	while (test_and_set_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state))
2671		msleep(1);
2672
2673	if (netif_running(netdev)) {
2674		vmxnet3_quiesce_dev(adapter);
2675		vmxnet3_reset_dev(adapter);
2676
2677		/* we need to re-create the rx queue based on the new mtu */
2678		vmxnet3_rq_destroy_all(adapter);
2679		vmxnet3_adjust_rx_ring_size(adapter);
2680		err = vmxnet3_rq_create_all(adapter);
2681		if (err) {
2682			netdev_err(netdev,
2683				   "failed to re-create rx queues, "
2684				   " error %d. Closing it.\n", err);
2685			goto out;
2686		}
2687
2688		err = vmxnet3_activate_dev(adapter);
2689		if (err) {
2690			netdev_err(netdev,
2691				   "failed to re-activate, error %d. "
2692				   "Closing it\n", err);
2693			goto out;
2694		}
2695	}
2696
2697out:
2698	clear_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state);
2699	if (err)
2700		vmxnet3_force_close(adapter);
2701
2702	return err;
2703}
2704
2705
2706static void
2707vmxnet3_declare_features(struct vmxnet3_adapter *adapter, bool dma64)
2708{
2709	struct net_device *netdev = adapter->netdev;
2710
2711	netdev->hw_features = NETIF_F_SG | NETIF_F_RXCSUM |
2712		NETIF_F_HW_CSUM | NETIF_F_HW_VLAN_CTAG_TX |
2713		NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_TSO | NETIF_F_TSO6 |
2714		NETIF_F_LRO;
2715	if (dma64)
2716		netdev->hw_features |= NETIF_F_HIGHDMA;
2717	netdev->vlan_features = netdev->hw_features &
2718				~(NETIF_F_HW_VLAN_CTAG_TX |
2719				  NETIF_F_HW_VLAN_CTAG_RX);
2720	netdev->features = netdev->hw_features | NETIF_F_HW_VLAN_CTAG_FILTER;
2721}
2722
2723
2724static void
2725vmxnet3_read_mac_addr(struct vmxnet3_adapter *adapter, u8 *mac)
2726{
2727	u32 tmp;
2728
2729	tmp = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_MACL);
2730	*(u32 *)mac = tmp;
2731
2732	tmp = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_MACH);
2733	mac[4] = tmp & 0xff;
2734	mac[5] = (tmp >> 8) & 0xff;
2735}
2736
2737#ifdef CONFIG_PCI_MSI
2738
2739/*
2740 * Enable MSIx vectors.
2741 * Returns :
2742 *	VMXNET3_LINUX_MIN_MSIX_VECT when only minimum number of vectors required
2743 *	 were enabled.
2744 *	number of vectors which were enabled otherwise (this number is greater
2745 *	 than VMXNET3_LINUX_MIN_MSIX_VECT)
2746 */
2747
2748static int
2749vmxnet3_acquire_msix_vectors(struct vmxnet3_adapter *adapter, int nvec)
2750{
2751	int ret = pci_enable_msix_range(adapter->pdev,
2752					adapter->intr.msix_entries, nvec, nvec);
2753
2754	if (ret == -ENOSPC && nvec > VMXNET3_LINUX_MIN_MSIX_VECT) {
2755		dev_err(&adapter->netdev->dev,
2756			"Failed to enable %d MSI-X, trying %d\n",
2757			nvec, VMXNET3_LINUX_MIN_MSIX_VECT);
2758
2759		ret = pci_enable_msix_range(adapter->pdev,
2760					    adapter->intr.msix_entries,
2761					    VMXNET3_LINUX_MIN_MSIX_VECT,
2762					    VMXNET3_LINUX_MIN_MSIX_VECT);
2763	}
2764
2765	if (ret < 0) {
2766		dev_err(&adapter->netdev->dev,
2767			"Failed to enable MSI-X, error: %d\n", ret);
2768	}
2769
2770	return ret;
2771}
2772
2773
2774#endif /* CONFIG_PCI_MSI */
2775
2776static void
2777vmxnet3_alloc_intr_resources(struct vmxnet3_adapter *adapter)
2778{
2779	u32 cfg;
2780	unsigned long flags;
2781
2782	/* intr settings */
2783	spin_lock_irqsave(&adapter->cmd_lock, flags);
2784	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
2785			       VMXNET3_CMD_GET_CONF_INTR);
2786	cfg = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD);
2787	spin_unlock_irqrestore(&adapter->cmd_lock, flags);
2788	adapter->intr.type = cfg & 0x3;
2789	adapter->intr.mask_mode = (cfg >> 2) & 0x3;
2790
2791	if (adapter->intr.type == VMXNET3_IT_AUTO) {
2792		adapter->intr.type = VMXNET3_IT_MSIX;
2793	}
2794
2795#ifdef CONFIG_PCI_MSI
2796	if (adapter->intr.type == VMXNET3_IT_MSIX) {
2797		int i, nvec;
2798
2799		nvec  = adapter->share_intr == VMXNET3_INTR_TXSHARE ?
2800			1 : adapter->num_tx_queues;
2801		nvec += adapter->share_intr == VMXNET3_INTR_BUDDYSHARE ?
2802			0 : adapter->num_rx_queues;
2803		nvec += 1;	/* for link event */
2804		nvec = nvec > VMXNET3_LINUX_MIN_MSIX_VECT ?
2805		       nvec : VMXNET3_LINUX_MIN_MSIX_VECT;
2806
2807		for (i = 0; i < nvec; i++)
2808			adapter->intr.msix_entries[i].entry = i;
2809
2810		nvec = vmxnet3_acquire_msix_vectors(adapter, nvec);
2811		if (nvec < 0)
2812			goto msix_err;
2813
2814		/* If we cannot allocate one MSIx vector per queue
2815		 * then limit the number of rx queues to 1
2816		 */
2817		if (nvec == VMXNET3_LINUX_MIN_MSIX_VECT) {
2818			if (adapter->share_intr != VMXNET3_INTR_BUDDYSHARE
2819			    || adapter->num_rx_queues != 1) {
2820				adapter->share_intr = VMXNET3_INTR_TXSHARE;
2821				netdev_err(adapter->netdev,
2822					   "Number of rx queues : 1\n");
2823				adapter->num_rx_queues = 1;
2824			}
2825		}
2826
2827		adapter->intr.num_intrs = nvec;
2828		return;
2829
2830msix_err:
2831		/* If we cannot allocate MSIx vectors use only one rx queue */
2832		dev_info(&adapter->pdev->dev,
2833			 "Failed to enable MSI-X, error %d. "
2834			 "Limiting #rx queues to 1, try MSI.\n", nvec);
2835
2836		adapter->intr.type = VMXNET3_IT_MSI;
2837	}
2838
2839	if (adapter->intr.type == VMXNET3_IT_MSI) {
2840		if (!pci_enable_msi(adapter->pdev)) {
2841			adapter->num_rx_queues = 1;
2842			adapter->intr.num_intrs = 1;
2843			return;
2844		}
2845	}
2846#endif /* CONFIG_PCI_MSI */
2847
2848	adapter->num_rx_queues = 1;
2849	dev_info(&adapter->netdev->dev,
2850		 "Using INTx interrupt, #Rx queues: 1.\n");
2851	adapter->intr.type = VMXNET3_IT_INTX;
2852
2853	/* INT-X related setting */
2854	adapter->intr.num_intrs = 1;
2855}
2856
2857
2858static void
2859vmxnet3_free_intr_resources(struct vmxnet3_adapter *adapter)
2860{
2861	if (adapter->intr.type == VMXNET3_IT_MSIX)
2862		pci_disable_msix(adapter->pdev);
2863	else if (adapter->intr.type == VMXNET3_IT_MSI)
2864		pci_disable_msi(adapter->pdev);
2865	else
2866		BUG_ON(adapter->intr.type != VMXNET3_IT_INTX);
2867}
2868
2869
2870static void
2871vmxnet3_tx_timeout(struct net_device *netdev)
2872{
2873	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
2874	adapter->tx_timeout_count++;
2875
2876	netdev_err(adapter->netdev, "tx hang\n");
2877	schedule_work(&adapter->work);
2878	netif_wake_queue(adapter->netdev);
2879}
2880
2881
2882static void
2883vmxnet3_reset_work(struct work_struct *data)
2884{
2885	struct vmxnet3_adapter *adapter;
2886
2887	adapter = container_of(data, struct vmxnet3_adapter, work);
2888
2889	/* if another thread is resetting the device, no need to proceed */
2890	if (test_and_set_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state))
2891		return;
2892
2893	/* if the device is closed, we must leave it alone */
2894	rtnl_lock();
2895	if (netif_running(adapter->netdev)) {
2896		netdev_notice(adapter->netdev, "resetting\n");
2897		vmxnet3_quiesce_dev(adapter);
2898		vmxnet3_reset_dev(adapter);
2899		vmxnet3_activate_dev(adapter);
2900	} else {
2901		netdev_info(adapter->netdev, "already closed\n");
2902	}
2903	rtnl_unlock();
2904
2905	clear_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state);
2906}
2907
2908
2909static int
2910vmxnet3_probe_device(struct pci_dev *pdev,
2911		     const struct pci_device_id *id)
2912{
2913	static const struct net_device_ops vmxnet3_netdev_ops = {
2914		.ndo_open = vmxnet3_open,
2915		.ndo_stop = vmxnet3_close,
2916		.ndo_start_xmit = vmxnet3_xmit_frame,
2917		.ndo_set_mac_address = vmxnet3_set_mac_addr,
2918		.ndo_change_mtu = vmxnet3_change_mtu,
2919		.ndo_set_features = vmxnet3_set_features,
2920		.ndo_get_stats64 = vmxnet3_get_stats64,
2921		.ndo_tx_timeout = vmxnet3_tx_timeout,
2922		.ndo_set_rx_mode = vmxnet3_set_mc,
2923		.ndo_vlan_rx_add_vid = vmxnet3_vlan_rx_add_vid,
2924		.ndo_vlan_rx_kill_vid = vmxnet3_vlan_rx_kill_vid,
2925#ifdef CONFIG_NET_POLL_CONTROLLER
2926		.ndo_poll_controller = vmxnet3_netpoll,
2927#endif
2928	};
2929	int err;
2930	bool dma64 = false; /* stupid gcc */
2931	u32 ver;
2932	struct net_device *netdev;
2933	struct vmxnet3_adapter *adapter;
2934	u8 mac[ETH_ALEN];
2935	int size;
2936	int num_tx_queues;
2937	int num_rx_queues;
2938
2939	if (!pci_msi_enabled())
2940		enable_mq = 0;
2941
2942#ifdef VMXNET3_RSS
2943	if (enable_mq)
2944		num_rx_queues = min(VMXNET3_DEVICE_MAX_RX_QUEUES,
2945				    (int)num_online_cpus());
2946	else
2947#endif
2948		num_rx_queues = 1;
2949	num_rx_queues = rounddown_pow_of_two(num_rx_queues);
2950
2951	if (enable_mq)
2952		num_tx_queues = min(VMXNET3_DEVICE_MAX_TX_QUEUES,
2953				    (int)num_online_cpus());
2954	else
2955		num_tx_queues = 1;
2956
2957	num_tx_queues = rounddown_pow_of_two(num_tx_queues);
2958	netdev = alloc_etherdev_mq(sizeof(struct vmxnet3_adapter),
2959				   max(num_tx_queues, num_rx_queues));
2960	dev_info(&pdev->dev,
2961		 "# of Tx queues : %d, # of Rx queues : %d\n",
2962		 num_tx_queues, num_rx_queues);
2963
2964	if (!netdev)
2965		return -ENOMEM;
2966
2967	pci_set_drvdata(pdev, netdev);
2968	adapter = netdev_priv(netdev);
2969	adapter->netdev = netdev;
2970	adapter->pdev = pdev;
2971
2972	adapter->tx_ring_size = VMXNET3_DEF_TX_RING_SIZE;
2973	adapter->rx_ring_size = VMXNET3_DEF_RX_RING_SIZE;
2974
2975	spin_lock_init(&adapter->cmd_lock);
2976	adapter->adapter_pa = dma_map_single(&adapter->pdev->dev, adapter,
2977					     sizeof(struct vmxnet3_adapter),
2978					     PCI_DMA_TODEVICE);
2979	adapter->shared = dma_alloc_coherent(
2980				&adapter->pdev->dev,
2981				sizeof(struct Vmxnet3_DriverShared),
2982				&adapter->shared_pa, GFP_KERNEL);
2983	if (!adapter->shared) {
2984		dev_err(&pdev->dev, "Failed to allocate memory\n");
2985		err = -ENOMEM;
2986		goto err_alloc_shared;
2987	}
2988
2989	adapter->num_rx_queues = num_rx_queues;
2990	adapter->num_tx_queues = num_tx_queues;
2991	adapter->rx_buf_per_pkt = 1;
2992
2993	size = sizeof(struct Vmxnet3_TxQueueDesc) * adapter->num_tx_queues;
2994	size += sizeof(struct Vmxnet3_RxQueueDesc) * adapter->num_rx_queues;
2995	adapter->tqd_start = dma_alloc_coherent(&adapter->pdev->dev, size,
2996						&adapter->queue_desc_pa,
2997						GFP_KERNEL);
2998
2999	if (!adapter->tqd_start) {
3000		dev_err(&pdev->dev, "Failed to allocate memory\n");
3001		err = -ENOMEM;
3002		goto err_alloc_queue_desc;
3003	}
3004	adapter->rqd_start = (struct Vmxnet3_RxQueueDesc *)(adapter->tqd_start +
3005							    adapter->num_tx_queues);
3006
3007	adapter->pm_conf = dma_alloc_coherent(&adapter->pdev->dev,
3008					      sizeof(struct Vmxnet3_PMConf),
3009					      &adapter->pm_conf_pa,
3010					      GFP_KERNEL);
3011	if (adapter->pm_conf == NULL) {
3012		err = -ENOMEM;
3013		goto err_alloc_pm;
3014	}
3015
3016#ifdef VMXNET3_RSS
3017
3018	adapter->rss_conf = dma_alloc_coherent(&adapter->pdev->dev,
3019					       sizeof(struct UPT1_RSSConf),
3020					       &adapter->rss_conf_pa,
3021					       GFP_KERNEL);
3022	if (adapter->rss_conf == NULL) {
3023		err = -ENOMEM;
3024		goto err_alloc_rss;
3025	}
3026#endif /* VMXNET3_RSS */
3027
3028	err = vmxnet3_alloc_pci_resources(adapter, &dma64);
3029	if (err < 0)
3030		goto err_alloc_pci;
3031
3032	ver = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_VRRS);
3033	if (ver & 1) {
3034		VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_VRRS, 1);
3035	} else {
3036		dev_err(&pdev->dev,
3037			"Incompatible h/w version (0x%x) for adapter\n", ver);
3038		err = -EBUSY;
3039		goto err_ver;
3040	}
3041
3042	ver = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_UVRS);
3043	if (ver & 1) {
3044		VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_UVRS, 1);
3045	} else {
3046		dev_err(&pdev->dev,
3047			"Incompatible upt version (0x%x) for adapter\n", ver);
3048		err = -EBUSY;
3049		goto err_ver;
3050	}
3051
3052	SET_NETDEV_DEV(netdev, &pdev->dev);
3053	vmxnet3_declare_features(adapter, dma64);
3054
3055	if (adapter->num_tx_queues == adapter->num_rx_queues)
3056		adapter->share_intr = VMXNET3_INTR_BUDDYSHARE;
3057	else
3058		adapter->share_intr = VMXNET3_INTR_DONTSHARE;
3059
3060	vmxnet3_alloc_intr_resources(adapter);
3061
3062#ifdef VMXNET3_RSS
3063	if (adapter->num_rx_queues > 1 &&
3064	    adapter->intr.type == VMXNET3_IT_MSIX) {
3065		adapter->rss = true;
3066		netdev->hw_features |= NETIF_F_RXHASH;
3067		netdev->features |= NETIF_F_RXHASH;
3068		dev_dbg(&pdev->dev, "RSS is enabled.\n");
3069	} else {
3070		adapter->rss = false;
3071	}
3072#endif
3073
3074	vmxnet3_read_mac_addr(adapter, mac);
3075	memcpy(netdev->dev_addr,  mac, netdev->addr_len);
3076
3077	netdev->netdev_ops = &vmxnet3_netdev_ops;
3078	vmxnet3_set_ethtool_ops(netdev);
3079	netdev->watchdog_timeo = 5 * HZ;
3080
3081	INIT_WORK(&adapter->work, vmxnet3_reset_work);
3082	set_bit(VMXNET3_STATE_BIT_QUIESCED, &adapter->state);
3083
3084	if (adapter->intr.type == VMXNET3_IT_MSIX) {
3085		int i;
3086		for (i = 0; i < adapter->num_rx_queues; i++) {
3087			netif_napi_add(adapter->netdev,
3088				       &adapter->rx_queue[i].napi,
3089				       vmxnet3_poll_rx_only, 64);
3090		}
3091	} else {
3092		netif_napi_add(adapter->netdev, &adapter->rx_queue[0].napi,
3093			       vmxnet3_poll, 64);
3094	}
3095
3096	netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues);
3097	netif_set_real_num_rx_queues(adapter->netdev, adapter->num_rx_queues);
3098
3099	netif_carrier_off(netdev);
3100	err = register_netdev(netdev);
3101
3102	if (err) {
3103		dev_err(&pdev->dev, "Failed to register adapter\n");
3104		goto err_register;
3105	}
3106
3107	vmxnet3_check_link(adapter, false);
3108	return 0;
3109
3110err_register:
3111	vmxnet3_free_intr_resources(adapter);
3112err_ver:
3113	vmxnet3_free_pci_resources(adapter);
3114err_alloc_pci:
3115#ifdef VMXNET3_RSS
3116	dma_free_coherent(&adapter->pdev->dev, sizeof(struct UPT1_RSSConf),
3117			  adapter->rss_conf, adapter->rss_conf_pa);
3118err_alloc_rss:
3119#endif
3120	dma_free_coherent(&adapter->pdev->dev, sizeof(struct Vmxnet3_PMConf),
3121			  adapter->pm_conf, adapter->pm_conf_pa);
3122err_alloc_pm:
3123	dma_free_coherent(&adapter->pdev->dev, size, adapter->tqd_start,
3124			  adapter->queue_desc_pa);
3125err_alloc_queue_desc:
3126	dma_free_coherent(&adapter->pdev->dev,
3127			  sizeof(struct Vmxnet3_DriverShared),
3128			  adapter->shared, adapter->shared_pa);
3129err_alloc_shared:
3130	dma_unmap_single(&adapter->pdev->dev, adapter->adapter_pa,
3131			 sizeof(struct vmxnet3_adapter), PCI_DMA_TODEVICE);
3132	free_netdev(netdev);
3133	return err;
3134}
3135
3136
3137static void
3138vmxnet3_remove_device(struct pci_dev *pdev)
3139{
3140	struct net_device *netdev = pci_get_drvdata(pdev);
3141	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
3142	int size = 0;
3143	int num_rx_queues;
3144
3145#ifdef VMXNET3_RSS
3146	if (enable_mq)
3147		num_rx_queues = min(VMXNET3_DEVICE_MAX_RX_QUEUES,
3148				    (int)num_online_cpus());
3149	else
3150#endif
3151		num_rx_queues = 1;
3152	num_rx_queues = rounddown_pow_of_two(num_rx_queues);
3153
3154	cancel_work_sync(&adapter->work);
3155
3156	unregister_netdev(netdev);
3157
3158	vmxnet3_free_intr_resources(adapter);
3159	vmxnet3_free_pci_resources(adapter);
3160#ifdef VMXNET3_RSS
3161	dma_free_coherent(&adapter->pdev->dev, sizeof(struct UPT1_RSSConf),
3162			  adapter->rss_conf, adapter->rss_conf_pa);
3163#endif
3164	dma_free_coherent(&adapter->pdev->dev, sizeof(struct Vmxnet3_PMConf),
3165			  adapter->pm_conf, adapter->pm_conf_pa);
3166
3167	size = sizeof(struct Vmxnet3_TxQueueDesc) * adapter->num_tx_queues;
3168	size += sizeof(struct Vmxnet3_RxQueueDesc) * num_rx_queues;
3169	dma_free_coherent(&adapter->pdev->dev, size, adapter->tqd_start,
3170			  adapter->queue_desc_pa);
3171	dma_free_coherent(&adapter->pdev->dev,
3172			  sizeof(struct Vmxnet3_DriverShared),
3173			  adapter->shared, adapter->shared_pa);
3174	dma_unmap_single(&adapter->pdev->dev, adapter->adapter_pa,
3175			 sizeof(struct vmxnet3_adapter), PCI_DMA_TODEVICE);
3176	free_netdev(netdev);
3177}
3178
3179
3180#ifdef CONFIG_PM
3181
3182static int
3183vmxnet3_suspend(struct device *device)
3184{
3185	struct pci_dev *pdev = to_pci_dev(device);
3186	struct net_device *netdev = pci_get_drvdata(pdev);
3187	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
3188	struct Vmxnet3_PMConf *pmConf;
3189	struct ethhdr *ehdr;
3190	struct arphdr *ahdr;
3191	u8 *arpreq;
3192	struct in_device *in_dev;
3193	struct in_ifaddr *ifa;
3194	unsigned long flags;
3195	int i = 0;
3196
3197	if (!netif_running(netdev))
3198		return 0;
3199
3200	for (i = 0; i < adapter->num_rx_queues; i++)
3201		napi_disable(&adapter->rx_queue[i].napi);
3202
3203	vmxnet3_disable_all_intrs(adapter);
3204	vmxnet3_free_irqs(adapter);
3205	vmxnet3_free_intr_resources(adapter);
3206
3207	netif_device_detach(netdev);
3208	netif_tx_stop_all_queues(netdev);
3209
3210	/* Create wake-up filters. */
3211	pmConf = adapter->pm_conf;
3212	memset(pmConf, 0, sizeof(*pmConf));
3213
3214	if (adapter->wol & WAKE_UCAST) {
3215		pmConf->filters[i].patternSize = ETH_ALEN;
3216		pmConf->filters[i].maskSize = 1;
3217		memcpy(pmConf->filters[i].pattern, netdev->dev_addr, ETH_ALEN);
3218		pmConf->filters[i].mask[0] = 0x3F; /* LSB ETH_ALEN bits */
3219
3220		pmConf->wakeUpEvents |= VMXNET3_PM_WAKEUP_FILTER;
3221		i++;
3222	}
3223
3224	if (adapter->wol & WAKE_ARP) {
3225		in_dev = in_dev_get(netdev);
3226		if (!in_dev)
3227			goto skip_arp;
3228
3229		ifa = (struct in_ifaddr *)in_dev->ifa_list;
3230		if (!ifa)
3231			goto skip_arp;
3232
3233		pmConf->filters[i].patternSize = ETH_HLEN + /* Ethernet header*/
3234			sizeof(struct arphdr) +		/* ARP header */
3235			2 * ETH_ALEN +		/* 2 Ethernet addresses*/
3236			2 * sizeof(u32);	/*2 IPv4 addresses */
3237		pmConf->filters[i].maskSize =
3238			(pmConf->filters[i].patternSize - 1) / 8 + 1;
3239
3240		/* ETH_P_ARP in Ethernet header. */
3241		ehdr = (struct ethhdr *)pmConf->filters[i].pattern;
3242		ehdr->h_proto = htons(ETH_P_ARP);
3243
3244		/* ARPOP_REQUEST in ARP header. */
3245		ahdr = (struct arphdr *)&pmConf->filters[i].pattern[ETH_HLEN];
3246		ahdr->ar_op = htons(ARPOP_REQUEST);
3247		arpreq = (u8 *)(ahdr + 1);
3248
3249		/* The Unicast IPv4 address in 'tip' field. */
3250		arpreq += 2 * ETH_ALEN + sizeof(u32);
3251		*(u32 *)arpreq = ifa->ifa_address;
3252
3253		/* The mask for the relevant bits. */
3254		pmConf->filters[i].mask[0] = 0x00;
3255		pmConf->filters[i].mask[1] = 0x30; /* ETH_P_ARP */
3256		pmConf->filters[i].mask[2] = 0x30; /* ARPOP_REQUEST */
3257		pmConf->filters[i].mask[3] = 0x00;
3258		pmConf->filters[i].mask[4] = 0xC0; /* IPv4 TIP */
3259		pmConf->filters[i].mask[5] = 0x03; /* IPv4 TIP */
3260		in_dev_put(in_dev);
3261
3262		pmConf->wakeUpEvents |= VMXNET3_PM_WAKEUP_FILTER;
3263		i++;
3264	}
3265
3266skip_arp:
3267	if (adapter->wol & WAKE_MAGIC)
3268		pmConf->wakeUpEvents |= VMXNET3_PM_WAKEUP_MAGIC;
3269
3270	pmConf->numFilters = i;
3271
3272	adapter->shared->devRead.pmConfDesc.confVer = cpu_to_le32(1);
3273	adapter->shared->devRead.pmConfDesc.confLen = cpu_to_le32(sizeof(
3274								  *pmConf));
3275	adapter->shared->devRead.pmConfDesc.confPA =
3276		cpu_to_le64(adapter->pm_conf_pa);
3277
3278	spin_lock_irqsave(&adapter->cmd_lock, flags);
3279	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
3280			       VMXNET3_CMD_UPDATE_PMCFG);
3281	spin_unlock_irqrestore(&adapter->cmd_lock, flags);
3282
3283	pci_save_state(pdev);
3284	pci_enable_wake(pdev, pci_choose_state(pdev, PMSG_SUSPEND),
3285			adapter->wol);
3286	pci_disable_device(pdev);
3287	pci_set_power_state(pdev, pci_choose_state(pdev, PMSG_SUSPEND));
3288
3289	return 0;
3290}
3291
3292
3293static int
3294vmxnet3_resume(struct device *device)
3295{
3296	int err, i = 0;
3297	unsigned long flags;
3298	struct pci_dev *pdev = to_pci_dev(device);
3299	struct net_device *netdev = pci_get_drvdata(pdev);
3300	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
3301	struct Vmxnet3_PMConf *pmConf;
3302
3303	if (!netif_running(netdev))
3304		return 0;
3305
3306	/* Destroy wake-up filters. */
3307	pmConf = adapter->pm_conf;
3308	memset(pmConf, 0, sizeof(*pmConf));
3309
3310	adapter->shared->devRead.pmConfDesc.confVer = cpu_to_le32(1);
3311	adapter->shared->devRead.pmConfDesc.confLen = cpu_to_le32(sizeof(
3312								  *pmConf));
3313	adapter->shared->devRead.pmConfDesc.confPA =
3314		cpu_to_le64(adapter->pm_conf_pa);
3315
3316	netif_device_attach(netdev);
3317	pci_set_power_state(pdev, PCI_D0);
3318	pci_restore_state(pdev);
3319	err = pci_enable_device_mem(pdev);
3320	if (err != 0)
3321		return err;
3322
3323	pci_enable_wake(pdev, PCI_D0, 0);
3324
3325	spin_lock_irqsave(&adapter->cmd_lock, flags);
3326	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
3327			       VMXNET3_CMD_UPDATE_PMCFG);
3328	spin_unlock_irqrestore(&adapter->cmd_lock, flags);
3329	vmxnet3_alloc_intr_resources(adapter);
3330	vmxnet3_request_irqs(adapter);
3331	for (i = 0; i < adapter->num_rx_queues; i++)
3332		napi_enable(&adapter->rx_queue[i].napi);
3333	vmxnet3_enable_all_intrs(adapter);
3334
3335	return 0;
3336}
3337
3338static const struct dev_pm_ops vmxnet3_pm_ops = {
3339	.suspend = vmxnet3_suspend,
3340	.resume = vmxnet3_resume,
3341};
3342#endif
3343
3344static struct pci_driver vmxnet3_driver = {
3345	.name		= vmxnet3_driver_name,
3346	.id_table	= vmxnet3_pciid_table,
3347	.probe		= vmxnet3_probe_device,
3348	.remove		= vmxnet3_remove_device,
3349#ifdef CONFIG_PM
3350	.driver.pm	= &vmxnet3_pm_ops,
3351#endif
3352};
3353
3354
3355static int __init
3356vmxnet3_init_module(void)
3357{
3358	pr_info("%s - version %s\n", VMXNET3_DRIVER_DESC,
3359		VMXNET3_DRIVER_VERSION_REPORT);
3360	return pci_register_driver(&vmxnet3_driver);
3361}
3362
3363module_init(vmxnet3_init_module);
3364
3365
3366static void
3367vmxnet3_exit_module(void)
3368{
3369	pci_unregister_driver(&vmxnet3_driver);
3370}
3371
3372module_exit(vmxnet3_exit_module);
3373
3374MODULE_AUTHOR("VMware, Inc.");
3375MODULE_DESCRIPTION(VMXNET3_DRIVER_DESC);
3376MODULE_LICENSE("GPL v2");
3377MODULE_VERSION(VMXNET3_DRIVER_VERSION_STRING);
3378