vmxnet3_drv.c revision 86a9bad3ab6b6f858fd4443b48738cabbb6d094c
1/*
2 * Linux driver for VMware's vmxnet3 ethernet NIC.
3 *
4 * Copyright (C) 2008-2009, VMware, Inc. All Rights Reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation; version 2 of the License and no later version.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
13 * NON INFRINGEMENT. See the GNU General Public License for more
14 * details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19 *
20 * The full GNU General Public License is included in this distribution in
21 * the file called "COPYING".
22 *
23 * Maintained by: Shreyas Bhatewara <pv-drivers@vmware.com>
24 *
25 */
26
27#include <linux/module.h>
28#include <net/ip6_checksum.h>
29
30#include "vmxnet3_int.h"
31
32char vmxnet3_driver_name[] = "vmxnet3";
33#define VMXNET3_DRIVER_DESC "VMware vmxnet3 virtual NIC driver"
34
35/*
36 * PCI Device ID Table
37 * Last entry must be all 0s
38 */
39static DEFINE_PCI_DEVICE_TABLE(vmxnet3_pciid_table) = {
40	{PCI_VDEVICE(VMWARE, PCI_DEVICE_ID_VMWARE_VMXNET3)},
41	{0}
42};
43
44MODULE_DEVICE_TABLE(pci, vmxnet3_pciid_table);
45
46static int enable_mq = 1;
47
48static void
49vmxnet3_write_mac_addr(struct vmxnet3_adapter *adapter, u8 *mac);
50
51/*
52 *    Enable/Disable the given intr
53 */
54static void
55vmxnet3_enable_intr(struct vmxnet3_adapter *adapter, unsigned intr_idx)
56{
57	VMXNET3_WRITE_BAR0_REG(adapter, VMXNET3_REG_IMR + intr_idx * 8, 0);
58}
59
60
61static void
62vmxnet3_disable_intr(struct vmxnet3_adapter *adapter, unsigned intr_idx)
63{
64	VMXNET3_WRITE_BAR0_REG(adapter, VMXNET3_REG_IMR + intr_idx * 8, 1);
65}
66
67
68/*
69 *    Enable/Disable all intrs used by the device
70 */
71static void
72vmxnet3_enable_all_intrs(struct vmxnet3_adapter *adapter)
73{
74	int i;
75
76	for (i = 0; i < adapter->intr.num_intrs; i++)
77		vmxnet3_enable_intr(adapter, i);
78	adapter->shared->devRead.intrConf.intrCtrl &=
79					cpu_to_le32(~VMXNET3_IC_DISABLE_ALL);
80}
81
82
83static void
84vmxnet3_disable_all_intrs(struct vmxnet3_adapter *adapter)
85{
86	int i;
87
88	adapter->shared->devRead.intrConf.intrCtrl |=
89					cpu_to_le32(VMXNET3_IC_DISABLE_ALL);
90	for (i = 0; i < adapter->intr.num_intrs; i++)
91		vmxnet3_disable_intr(adapter, i);
92}
93
94
95static void
96vmxnet3_ack_events(struct vmxnet3_adapter *adapter, u32 events)
97{
98	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_ECR, events);
99}
100
101
102static bool
103vmxnet3_tq_stopped(struct vmxnet3_tx_queue *tq, struct vmxnet3_adapter *adapter)
104{
105	return tq->stopped;
106}
107
108
109static void
110vmxnet3_tq_start(struct vmxnet3_tx_queue *tq, struct vmxnet3_adapter *adapter)
111{
112	tq->stopped = false;
113	netif_start_subqueue(adapter->netdev, tq - adapter->tx_queue);
114}
115
116
117static void
118vmxnet3_tq_wake(struct vmxnet3_tx_queue *tq, struct vmxnet3_adapter *adapter)
119{
120	tq->stopped = false;
121	netif_wake_subqueue(adapter->netdev, (tq - adapter->tx_queue));
122}
123
124
125static void
126vmxnet3_tq_stop(struct vmxnet3_tx_queue *tq, struct vmxnet3_adapter *adapter)
127{
128	tq->stopped = true;
129	tq->num_stop++;
130	netif_stop_subqueue(adapter->netdev, (tq - adapter->tx_queue));
131}
132
133
134/*
135 * Check the link state. This may start or stop the tx queue.
136 */
137static void
138vmxnet3_check_link(struct vmxnet3_adapter *adapter, bool affectTxQueue)
139{
140	u32 ret;
141	int i;
142	unsigned long flags;
143
144	spin_lock_irqsave(&adapter->cmd_lock, flags);
145	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, VMXNET3_CMD_GET_LINK);
146	ret = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD);
147	spin_unlock_irqrestore(&adapter->cmd_lock, flags);
148
149	adapter->link_speed = ret >> 16;
150	if (ret & 1) { /* Link is up. */
151		netdev_info(adapter->netdev, "NIC Link is Up %d Mbps\n",
152			    adapter->link_speed);
153		netif_carrier_on(adapter->netdev);
154
155		if (affectTxQueue) {
156			for (i = 0; i < adapter->num_tx_queues; i++)
157				vmxnet3_tq_start(&adapter->tx_queue[i],
158						 adapter);
159		}
160	} else {
161		netdev_info(adapter->netdev, "NIC Link is Down\n");
162		netif_carrier_off(adapter->netdev);
163
164		if (affectTxQueue) {
165			for (i = 0; i < adapter->num_tx_queues; i++)
166				vmxnet3_tq_stop(&adapter->tx_queue[i], adapter);
167		}
168	}
169}
170
171static void
172vmxnet3_process_events(struct vmxnet3_adapter *adapter)
173{
174	int i;
175	unsigned long flags;
176	u32 events = le32_to_cpu(adapter->shared->ecr);
177	if (!events)
178		return;
179
180	vmxnet3_ack_events(adapter, events);
181
182	/* Check if link state has changed */
183	if (events & VMXNET3_ECR_LINK)
184		vmxnet3_check_link(adapter, true);
185
186	/* Check if there is an error on xmit/recv queues */
187	if (events & (VMXNET3_ECR_TQERR | VMXNET3_ECR_RQERR)) {
188		spin_lock_irqsave(&adapter->cmd_lock, flags);
189		VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
190				       VMXNET3_CMD_GET_QUEUE_STATUS);
191		spin_unlock_irqrestore(&adapter->cmd_lock, flags);
192
193		for (i = 0; i < adapter->num_tx_queues; i++)
194			if (adapter->tqd_start[i].status.stopped)
195				dev_err(&adapter->netdev->dev,
196					"%s: tq[%d] error 0x%x\n",
197					adapter->netdev->name, i, le32_to_cpu(
198					adapter->tqd_start[i].status.error));
199		for (i = 0; i < adapter->num_rx_queues; i++)
200			if (adapter->rqd_start[i].status.stopped)
201				dev_err(&adapter->netdev->dev,
202					"%s: rq[%d] error 0x%x\n",
203					adapter->netdev->name, i,
204					adapter->rqd_start[i].status.error);
205
206		schedule_work(&adapter->work);
207	}
208}
209
210#ifdef __BIG_ENDIAN_BITFIELD
211/*
212 * The device expects the bitfields in shared structures to be written in
213 * little endian. When CPU is big endian, the following routines are used to
214 * correctly read and write into ABI.
215 * The general technique used here is : double word bitfields are defined in
216 * opposite order for big endian architecture. Then before reading them in
217 * driver the complete double word is translated using le32_to_cpu. Similarly
218 * After the driver writes into bitfields, cpu_to_le32 is used to translate the
219 * double words into required format.
220 * In order to avoid touching bits in shared structure more than once, temporary
221 * descriptors are used. These are passed as srcDesc to following functions.
222 */
223static void vmxnet3_RxDescToCPU(const struct Vmxnet3_RxDesc *srcDesc,
224				struct Vmxnet3_RxDesc *dstDesc)
225{
226	u32 *src = (u32 *)srcDesc + 2;
227	u32 *dst = (u32 *)dstDesc + 2;
228	dstDesc->addr = le64_to_cpu(srcDesc->addr);
229	*dst = le32_to_cpu(*src);
230	dstDesc->ext1 = le32_to_cpu(srcDesc->ext1);
231}
232
233static void vmxnet3_TxDescToLe(const struct Vmxnet3_TxDesc *srcDesc,
234			       struct Vmxnet3_TxDesc *dstDesc)
235{
236	int i;
237	u32 *src = (u32 *)(srcDesc + 1);
238	u32 *dst = (u32 *)(dstDesc + 1);
239
240	/* Working backwards so that the gen bit is set at the end. */
241	for (i = 2; i > 0; i--) {
242		src--;
243		dst--;
244		*dst = cpu_to_le32(*src);
245	}
246}
247
248
249static void vmxnet3_RxCompToCPU(const struct Vmxnet3_RxCompDesc *srcDesc,
250				struct Vmxnet3_RxCompDesc *dstDesc)
251{
252	int i = 0;
253	u32 *src = (u32 *)srcDesc;
254	u32 *dst = (u32 *)dstDesc;
255	for (i = 0; i < sizeof(struct Vmxnet3_RxCompDesc) / sizeof(u32); i++) {
256		*dst = le32_to_cpu(*src);
257		src++;
258		dst++;
259	}
260}
261
262
263/* Used to read bitfield values from double words. */
264static u32 get_bitfield32(const __le32 *bitfield, u32 pos, u32 size)
265{
266	u32 temp = le32_to_cpu(*bitfield);
267	u32 mask = ((1 << size) - 1) << pos;
268	temp &= mask;
269	temp >>= pos;
270	return temp;
271}
272
273
274
275#endif  /* __BIG_ENDIAN_BITFIELD */
276
277#ifdef __BIG_ENDIAN_BITFIELD
278
279#   define VMXNET3_TXDESC_GET_GEN(txdesc) get_bitfield32(((const __le32 *) \
280			txdesc) + VMXNET3_TXD_GEN_DWORD_SHIFT, \
281			VMXNET3_TXD_GEN_SHIFT, VMXNET3_TXD_GEN_SIZE)
282#   define VMXNET3_TXDESC_GET_EOP(txdesc) get_bitfield32(((const __le32 *) \
283			txdesc) + VMXNET3_TXD_EOP_DWORD_SHIFT, \
284			VMXNET3_TXD_EOP_SHIFT, VMXNET3_TXD_EOP_SIZE)
285#   define VMXNET3_TCD_GET_GEN(tcd) get_bitfield32(((const __le32 *)tcd) + \
286			VMXNET3_TCD_GEN_DWORD_SHIFT, VMXNET3_TCD_GEN_SHIFT, \
287			VMXNET3_TCD_GEN_SIZE)
288#   define VMXNET3_TCD_GET_TXIDX(tcd) get_bitfield32((const __le32 *)tcd, \
289			VMXNET3_TCD_TXIDX_SHIFT, VMXNET3_TCD_TXIDX_SIZE)
290#   define vmxnet3_getRxComp(dstrcd, rcd, tmp) do { \
291			(dstrcd) = (tmp); \
292			vmxnet3_RxCompToCPU((rcd), (tmp)); \
293		} while (0)
294#   define vmxnet3_getRxDesc(dstrxd, rxd, tmp) do { \
295			(dstrxd) = (tmp); \
296			vmxnet3_RxDescToCPU((rxd), (tmp)); \
297		} while (0)
298
299#else
300
301#   define VMXNET3_TXDESC_GET_GEN(txdesc) ((txdesc)->gen)
302#   define VMXNET3_TXDESC_GET_EOP(txdesc) ((txdesc)->eop)
303#   define VMXNET3_TCD_GET_GEN(tcd) ((tcd)->gen)
304#   define VMXNET3_TCD_GET_TXIDX(tcd) ((tcd)->txdIdx)
305#   define vmxnet3_getRxComp(dstrcd, rcd, tmp) (dstrcd) = (rcd)
306#   define vmxnet3_getRxDesc(dstrxd, rxd, tmp) (dstrxd) = (rxd)
307
308#endif /* __BIG_ENDIAN_BITFIELD  */
309
310
311static void
312vmxnet3_unmap_tx_buf(struct vmxnet3_tx_buf_info *tbi,
313		     struct pci_dev *pdev)
314{
315	if (tbi->map_type == VMXNET3_MAP_SINGLE)
316		pci_unmap_single(pdev, tbi->dma_addr, tbi->len,
317				 PCI_DMA_TODEVICE);
318	else if (tbi->map_type == VMXNET3_MAP_PAGE)
319		pci_unmap_page(pdev, tbi->dma_addr, tbi->len,
320			       PCI_DMA_TODEVICE);
321	else
322		BUG_ON(tbi->map_type != VMXNET3_MAP_NONE);
323
324	tbi->map_type = VMXNET3_MAP_NONE; /* to help debugging */
325}
326
327
328static int
329vmxnet3_unmap_pkt(u32 eop_idx, struct vmxnet3_tx_queue *tq,
330		  struct pci_dev *pdev,	struct vmxnet3_adapter *adapter)
331{
332	struct sk_buff *skb;
333	int entries = 0;
334
335	/* no out of order completion */
336	BUG_ON(tq->buf_info[eop_idx].sop_idx != tq->tx_ring.next2comp);
337	BUG_ON(VMXNET3_TXDESC_GET_EOP(&(tq->tx_ring.base[eop_idx].txd)) != 1);
338
339	skb = tq->buf_info[eop_idx].skb;
340	BUG_ON(skb == NULL);
341	tq->buf_info[eop_idx].skb = NULL;
342
343	VMXNET3_INC_RING_IDX_ONLY(eop_idx, tq->tx_ring.size);
344
345	while (tq->tx_ring.next2comp != eop_idx) {
346		vmxnet3_unmap_tx_buf(tq->buf_info + tq->tx_ring.next2comp,
347				     pdev);
348
349		/* update next2comp w/o tx_lock. Since we are marking more,
350		 * instead of less, tx ring entries avail, the worst case is
351		 * that the tx routine incorrectly re-queues a pkt due to
352		 * insufficient tx ring entries.
353		 */
354		vmxnet3_cmd_ring_adv_next2comp(&tq->tx_ring);
355		entries++;
356	}
357
358	dev_kfree_skb_any(skb);
359	return entries;
360}
361
362
363static int
364vmxnet3_tq_tx_complete(struct vmxnet3_tx_queue *tq,
365			struct vmxnet3_adapter *adapter)
366{
367	int completed = 0;
368	union Vmxnet3_GenericDesc *gdesc;
369
370	gdesc = tq->comp_ring.base + tq->comp_ring.next2proc;
371	while (VMXNET3_TCD_GET_GEN(&gdesc->tcd) == tq->comp_ring.gen) {
372		completed += vmxnet3_unmap_pkt(VMXNET3_TCD_GET_TXIDX(
373					       &gdesc->tcd), tq, adapter->pdev,
374					       adapter);
375
376		vmxnet3_comp_ring_adv_next2proc(&tq->comp_ring);
377		gdesc = tq->comp_ring.base + tq->comp_ring.next2proc;
378	}
379
380	if (completed) {
381		spin_lock(&tq->tx_lock);
382		if (unlikely(vmxnet3_tq_stopped(tq, adapter) &&
383			     vmxnet3_cmd_ring_desc_avail(&tq->tx_ring) >
384			     VMXNET3_WAKE_QUEUE_THRESHOLD(tq) &&
385			     netif_carrier_ok(adapter->netdev))) {
386			vmxnet3_tq_wake(tq, adapter);
387		}
388		spin_unlock(&tq->tx_lock);
389	}
390	return completed;
391}
392
393
394static void
395vmxnet3_tq_cleanup(struct vmxnet3_tx_queue *tq,
396		   struct vmxnet3_adapter *adapter)
397{
398	int i;
399
400	while (tq->tx_ring.next2comp != tq->tx_ring.next2fill) {
401		struct vmxnet3_tx_buf_info *tbi;
402
403		tbi = tq->buf_info + tq->tx_ring.next2comp;
404
405		vmxnet3_unmap_tx_buf(tbi, adapter->pdev);
406		if (tbi->skb) {
407			dev_kfree_skb_any(tbi->skb);
408			tbi->skb = NULL;
409		}
410		vmxnet3_cmd_ring_adv_next2comp(&tq->tx_ring);
411	}
412
413	/* sanity check, verify all buffers are indeed unmapped and freed */
414	for (i = 0; i < tq->tx_ring.size; i++) {
415		BUG_ON(tq->buf_info[i].skb != NULL ||
416		       tq->buf_info[i].map_type != VMXNET3_MAP_NONE);
417	}
418
419	tq->tx_ring.gen = VMXNET3_INIT_GEN;
420	tq->tx_ring.next2fill = tq->tx_ring.next2comp = 0;
421
422	tq->comp_ring.gen = VMXNET3_INIT_GEN;
423	tq->comp_ring.next2proc = 0;
424}
425
426
427static void
428vmxnet3_tq_destroy(struct vmxnet3_tx_queue *tq,
429		   struct vmxnet3_adapter *adapter)
430{
431	if (tq->tx_ring.base) {
432		pci_free_consistent(adapter->pdev, tq->tx_ring.size *
433				    sizeof(struct Vmxnet3_TxDesc),
434				    tq->tx_ring.base, tq->tx_ring.basePA);
435		tq->tx_ring.base = NULL;
436	}
437	if (tq->data_ring.base) {
438		pci_free_consistent(adapter->pdev, tq->data_ring.size *
439				    sizeof(struct Vmxnet3_TxDataDesc),
440				    tq->data_ring.base, tq->data_ring.basePA);
441		tq->data_ring.base = NULL;
442	}
443	if (tq->comp_ring.base) {
444		pci_free_consistent(adapter->pdev, tq->comp_ring.size *
445				    sizeof(struct Vmxnet3_TxCompDesc),
446				    tq->comp_ring.base, tq->comp_ring.basePA);
447		tq->comp_ring.base = NULL;
448	}
449	kfree(tq->buf_info);
450	tq->buf_info = NULL;
451}
452
453
454/* Destroy all tx queues */
455void
456vmxnet3_tq_destroy_all(struct vmxnet3_adapter *adapter)
457{
458	int i;
459
460	for (i = 0; i < adapter->num_tx_queues; i++)
461		vmxnet3_tq_destroy(&adapter->tx_queue[i], adapter);
462}
463
464
465static void
466vmxnet3_tq_init(struct vmxnet3_tx_queue *tq,
467		struct vmxnet3_adapter *adapter)
468{
469	int i;
470
471	/* reset the tx ring contents to 0 and reset the tx ring states */
472	memset(tq->tx_ring.base, 0, tq->tx_ring.size *
473	       sizeof(struct Vmxnet3_TxDesc));
474	tq->tx_ring.next2fill = tq->tx_ring.next2comp = 0;
475	tq->tx_ring.gen = VMXNET3_INIT_GEN;
476
477	memset(tq->data_ring.base, 0, tq->data_ring.size *
478	       sizeof(struct Vmxnet3_TxDataDesc));
479
480	/* reset the tx comp ring contents to 0 and reset comp ring states */
481	memset(tq->comp_ring.base, 0, tq->comp_ring.size *
482	       sizeof(struct Vmxnet3_TxCompDesc));
483	tq->comp_ring.next2proc = 0;
484	tq->comp_ring.gen = VMXNET3_INIT_GEN;
485
486	/* reset the bookkeeping data */
487	memset(tq->buf_info, 0, sizeof(tq->buf_info[0]) * tq->tx_ring.size);
488	for (i = 0; i < tq->tx_ring.size; i++)
489		tq->buf_info[i].map_type = VMXNET3_MAP_NONE;
490
491	/* stats are not reset */
492}
493
494
495static int
496vmxnet3_tq_create(struct vmxnet3_tx_queue *tq,
497		  struct vmxnet3_adapter *adapter)
498{
499	BUG_ON(tq->tx_ring.base || tq->data_ring.base ||
500	       tq->comp_ring.base || tq->buf_info);
501
502	tq->tx_ring.base = pci_alloc_consistent(adapter->pdev, tq->tx_ring.size
503			   * sizeof(struct Vmxnet3_TxDesc),
504			   &tq->tx_ring.basePA);
505	if (!tq->tx_ring.base) {
506		netdev_err(adapter->netdev, "failed to allocate tx ring\n");
507		goto err;
508	}
509
510	tq->data_ring.base = pci_alloc_consistent(adapter->pdev,
511			     tq->data_ring.size *
512			     sizeof(struct Vmxnet3_TxDataDesc),
513			     &tq->data_ring.basePA);
514	if (!tq->data_ring.base) {
515		netdev_err(adapter->netdev, "failed to allocate data ring\n");
516		goto err;
517	}
518
519	tq->comp_ring.base = pci_alloc_consistent(adapter->pdev,
520			     tq->comp_ring.size *
521			     sizeof(struct Vmxnet3_TxCompDesc),
522			     &tq->comp_ring.basePA);
523	if (!tq->comp_ring.base) {
524		netdev_err(adapter->netdev, "failed to allocate tx comp ring\n");
525		goto err;
526	}
527
528	tq->buf_info = kcalloc(tq->tx_ring.size, sizeof(tq->buf_info[0]),
529			       GFP_KERNEL);
530	if (!tq->buf_info)
531		goto err;
532
533	return 0;
534
535err:
536	vmxnet3_tq_destroy(tq, adapter);
537	return -ENOMEM;
538}
539
540static void
541vmxnet3_tq_cleanup_all(struct vmxnet3_adapter *adapter)
542{
543	int i;
544
545	for (i = 0; i < adapter->num_tx_queues; i++)
546		vmxnet3_tq_cleanup(&adapter->tx_queue[i], adapter);
547}
548
549/*
550 *    starting from ring->next2fill, allocate rx buffers for the given ring
551 *    of the rx queue and update the rx desc. stop after @num_to_alloc buffers
552 *    are allocated or allocation fails
553 */
554
555static int
556vmxnet3_rq_alloc_rx_buf(struct vmxnet3_rx_queue *rq, u32 ring_idx,
557			int num_to_alloc, struct vmxnet3_adapter *adapter)
558{
559	int num_allocated = 0;
560	struct vmxnet3_rx_buf_info *rbi_base = rq->buf_info[ring_idx];
561	struct vmxnet3_cmd_ring *ring = &rq->rx_ring[ring_idx];
562	u32 val;
563
564	while (num_allocated <= num_to_alloc) {
565		struct vmxnet3_rx_buf_info *rbi;
566		union Vmxnet3_GenericDesc *gd;
567
568		rbi = rbi_base + ring->next2fill;
569		gd = ring->base + ring->next2fill;
570
571		if (rbi->buf_type == VMXNET3_RX_BUF_SKB) {
572			if (rbi->skb == NULL) {
573				rbi->skb = __netdev_alloc_skb_ip_align(adapter->netdev,
574								       rbi->len,
575								       GFP_KERNEL);
576				if (unlikely(rbi->skb == NULL)) {
577					rq->stats.rx_buf_alloc_failure++;
578					break;
579				}
580
581				rbi->dma_addr = pci_map_single(adapter->pdev,
582						rbi->skb->data, rbi->len,
583						PCI_DMA_FROMDEVICE);
584			} else {
585				/* rx buffer skipped by the device */
586			}
587			val = VMXNET3_RXD_BTYPE_HEAD << VMXNET3_RXD_BTYPE_SHIFT;
588		} else {
589			BUG_ON(rbi->buf_type != VMXNET3_RX_BUF_PAGE ||
590			       rbi->len  != PAGE_SIZE);
591
592			if (rbi->page == NULL) {
593				rbi->page = alloc_page(GFP_ATOMIC);
594				if (unlikely(rbi->page == NULL)) {
595					rq->stats.rx_buf_alloc_failure++;
596					break;
597				}
598				rbi->dma_addr = pci_map_page(adapter->pdev,
599						rbi->page, 0, PAGE_SIZE,
600						PCI_DMA_FROMDEVICE);
601			} else {
602				/* rx buffers skipped by the device */
603			}
604			val = VMXNET3_RXD_BTYPE_BODY << VMXNET3_RXD_BTYPE_SHIFT;
605		}
606
607		BUG_ON(rbi->dma_addr == 0);
608		gd->rxd.addr = cpu_to_le64(rbi->dma_addr);
609		gd->dword[2] = cpu_to_le32((!ring->gen << VMXNET3_RXD_GEN_SHIFT)
610					   | val | rbi->len);
611
612		/* Fill the last buffer but dont mark it ready, or else the
613		 * device will think that the queue is full */
614		if (num_allocated == num_to_alloc)
615			break;
616
617		gd->dword[2] |= cpu_to_le32(ring->gen << VMXNET3_RXD_GEN_SHIFT);
618		num_allocated++;
619		vmxnet3_cmd_ring_adv_next2fill(ring);
620	}
621
622	netdev_dbg(adapter->netdev,
623		"alloc_rx_buf: %d allocated, next2fill %u, next2comp %u\n",
624		num_allocated, ring->next2fill, ring->next2comp);
625
626	/* so that the device can distinguish a full ring and an empty ring */
627	BUG_ON(num_allocated != 0 && ring->next2fill == ring->next2comp);
628
629	return num_allocated;
630}
631
632
633static void
634vmxnet3_append_frag(struct sk_buff *skb, struct Vmxnet3_RxCompDesc *rcd,
635		    struct vmxnet3_rx_buf_info *rbi)
636{
637	struct skb_frag_struct *frag = skb_shinfo(skb)->frags +
638		skb_shinfo(skb)->nr_frags;
639
640	BUG_ON(skb_shinfo(skb)->nr_frags >= MAX_SKB_FRAGS);
641
642	__skb_frag_set_page(frag, rbi->page);
643	frag->page_offset = 0;
644	skb_frag_size_set(frag, rcd->len);
645	skb->data_len += rcd->len;
646	skb->truesize += PAGE_SIZE;
647	skb_shinfo(skb)->nr_frags++;
648}
649
650
651static void
652vmxnet3_map_pkt(struct sk_buff *skb, struct vmxnet3_tx_ctx *ctx,
653		struct vmxnet3_tx_queue *tq, struct pci_dev *pdev,
654		struct vmxnet3_adapter *adapter)
655{
656	u32 dw2, len;
657	unsigned long buf_offset;
658	int i;
659	union Vmxnet3_GenericDesc *gdesc;
660	struct vmxnet3_tx_buf_info *tbi = NULL;
661
662	BUG_ON(ctx->copy_size > skb_headlen(skb));
663
664	/* use the previous gen bit for the SOP desc */
665	dw2 = (tq->tx_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT;
666
667	ctx->sop_txd = tq->tx_ring.base + tq->tx_ring.next2fill;
668	gdesc = ctx->sop_txd; /* both loops below can be skipped */
669
670	/* no need to map the buffer if headers are copied */
671	if (ctx->copy_size) {
672		ctx->sop_txd->txd.addr = cpu_to_le64(tq->data_ring.basePA +
673					tq->tx_ring.next2fill *
674					sizeof(struct Vmxnet3_TxDataDesc));
675		ctx->sop_txd->dword[2] = cpu_to_le32(dw2 | ctx->copy_size);
676		ctx->sop_txd->dword[3] = 0;
677
678		tbi = tq->buf_info + tq->tx_ring.next2fill;
679		tbi->map_type = VMXNET3_MAP_NONE;
680
681		netdev_dbg(adapter->netdev,
682			"txd[%u]: 0x%Lx 0x%x 0x%x\n",
683			tq->tx_ring.next2fill,
684			le64_to_cpu(ctx->sop_txd->txd.addr),
685			ctx->sop_txd->dword[2], ctx->sop_txd->dword[3]);
686		vmxnet3_cmd_ring_adv_next2fill(&tq->tx_ring);
687
688		/* use the right gen for non-SOP desc */
689		dw2 = tq->tx_ring.gen << VMXNET3_TXD_GEN_SHIFT;
690	}
691
692	/* linear part can use multiple tx desc if it's big */
693	len = skb_headlen(skb) - ctx->copy_size;
694	buf_offset = ctx->copy_size;
695	while (len) {
696		u32 buf_size;
697
698		if (len < VMXNET3_MAX_TX_BUF_SIZE) {
699			buf_size = len;
700			dw2 |= len;
701		} else {
702			buf_size = VMXNET3_MAX_TX_BUF_SIZE;
703			/* spec says that for TxDesc.len, 0 == 2^14 */
704		}
705
706		tbi = tq->buf_info + tq->tx_ring.next2fill;
707		tbi->map_type = VMXNET3_MAP_SINGLE;
708		tbi->dma_addr = pci_map_single(adapter->pdev,
709				skb->data + buf_offset, buf_size,
710				PCI_DMA_TODEVICE);
711
712		tbi->len = buf_size;
713
714		gdesc = tq->tx_ring.base + tq->tx_ring.next2fill;
715		BUG_ON(gdesc->txd.gen == tq->tx_ring.gen);
716
717		gdesc->txd.addr = cpu_to_le64(tbi->dma_addr);
718		gdesc->dword[2] = cpu_to_le32(dw2);
719		gdesc->dword[3] = 0;
720
721		netdev_dbg(adapter->netdev,
722			"txd[%u]: 0x%Lx 0x%x 0x%x\n",
723			tq->tx_ring.next2fill, le64_to_cpu(gdesc->txd.addr),
724			le32_to_cpu(gdesc->dword[2]), gdesc->dword[3]);
725		vmxnet3_cmd_ring_adv_next2fill(&tq->tx_ring);
726		dw2 = tq->tx_ring.gen << VMXNET3_TXD_GEN_SHIFT;
727
728		len -= buf_size;
729		buf_offset += buf_size;
730	}
731
732	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
733		const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
734		u32 buf_size;
735
736		buf_offset = 0;
737		len = skb_frag_size(frag);
738		while (len) {
739			tbi = tq->buf_info + tq->tx_ring.next2fill;
740			if (len < VMXNET3_MAX_TX_BUF_SIZE) {
741				buf_size = len;
742				dw2 |= len;
743			} else {
744				buf_size = VMXNET3_MAX_TX_BUF_SIZE;
745				/* spec says that for TxDesc.len, 0 == 2^14 */
746			}
747			tbi->map_type = VMXNET3_MAP_PAGE;
748			tbi->dma_addr = skb_frag_dma_map(&adapter->pdev->dev, frag,
749							 buf_offset, buf_size,
750							 DMA_TO_DEVICE);
751
752			tbi->len = buf_size;
753
754			gdesc = tq->tx_ring.base + tq->tx_ring.next2fill;
755			BUG_ON(gdesc->txd.gen == tq->tx_ring.gen);
756
757			gdesc->txd.addr = cpu_to_le64(tbi->dma_addr);
758			gdesc->dword[2] = cpu_to_le32(dw2);
759			gdesc->dword[3] = 0;
760
761			netdev_dbg(adapter->netdev,
762				"txd[%u]: 0x%llu %u %u\n",
763				tq->tx_ring.next2fill, le64_to_cpu(gdesc->txd.addr),
764				le32_to_cpu(gdesc->dword[2]), gdesc->dword[3]);
765			vmxnet3_cmd_ring_adv_next2fill(&tq->tx_ring);
766			dw2 = tq->tx_ring.gen << VMXNET3_TXD_GEN_SHIFT;
767
768			len -= buf_size;
769			buf_offset += buf_size;
770		}
771	}
772
773	ctx->eop_txd = gdesc;
774
775	/* set the last buf_info for the pkt */
776	tbi->skb = skb;
777	tbi->sop_idx = ctx->sop_txd - tq->tx_ring.base;
778}
779
780
781/* Init all tx queues */
782static void
783vmxnet3_tq_init_all(struct vmxnet3_adapter *adapter)
784{
785	int i;
786
787	for (i = 0; i < adapter->num_tx_queues; i++)
788		vmxnet3_tq_init(&adapter->tx_queue[i], adapter);
789}
790
791
792/*
793 *    parse and copy relevant protocol headers:
794 *      For a tso pkt, relevant headers are L2/3/4 including options
795 *      For a pkt requesting csum offloading, they are L2/3 and may include L4
796 *      if it's a TCP/UDP pkt
797 *
798 * Returns:
799 *    -1:  error happens during parsing
800 *     0:  protocol headers parsed, but too big to be copied
801 *     1:  protocol headers parsed and copied
802 *
803 * Other effects:
804 *    1. related *ctx fields are updated.
805 *    2. ctx->copy_size is # of bytes copied
806 *    3. the portion copied is guaranteed to be in the linear part
807 *
808 */
809static int
810vmxnet3_parse_and_copy_hdr(struct sk_buff *skb, struct vmxnet3_tx_queue *tq,
811			   struct vmxnet3_tx_ctx *ctx,
812			   struct vmxnet3_adapter *adapter)
813{
814	struct Vmxnet3_TxDataDesc *tdd;
815
816	if (ctx->mss) {	/* TSO */
817		ctx->eth_ip_hdr_size = skb_transport_offset(skb);
818		ctx->l4_hdr_size = tcp_hdrlen(skb);
819		ctx->copy_size = ctx->eth_ip_hdr_size + ctx->l4_hdr_size;
820	} else {
821		if (skb->ip_summed == CHECKSUM_PARTIAL) {
822			ctx->eth_ip_hdr_size = skb_checksum_start_offset(skb);
823
824			if (ctx->ipv4) {
825				const struct iphdr *iph = ip_hdr(skb);
826
827				if (iph->protocol == IPPROTO_TCP)
828					ctx->l4_hdr_size = tcp_hdrlen(skb);
829				else if (iph->protocol == IPPROTO_UDP)
830					ctx->l4_hdr_size = sizeof(struct udphdr);
831				else
832					ctx->l4_hdr_size = 0;
833			} else {
834				/* for simplicity, don't copy L4 headers */
835				ctx->l4_hdr_size = 0;
836			}
837			ctx->copy_size = min(ctx->eth_ip_hdr_size +
838					 ctx->l4_hdr_size, skb->len);
839		} else {
840			ctx->eth_ip_hdr_size = 0;
841			ctx->l4_hdr_size = 0;
842			/* copy as much as allowed */
843			ctx->copy_size = min((unsigned int)VMXNET3_HDR_COPY_SIZE
844					     , skb_headlen(skb));
845		}
846
847		/* make sure headers are accessible directly */
848		if (unlikely(!pskb_may_pull(skb, ctx->copy_size)))
849			goto err;
850	}
851
852	if (unlikely(ctx->copy_size > VMXNET3_HDR_COPY_SIZE)) {
853		tq->stats.oversized_hdr++;
854		ctx->copy_size = 0;
855		return 0;
856	}
857
858	tdd = tq->data_ring.base + tq->tx_ring.next2fill;
859
860	memcpy(tdd->data, skb->data, ctx->copy_size);
861	netdev_dbg(adapter->netdev,
862		"copy %u bytes to dataRing[%u]\n",
863		ctx->copy_size, tq->tx_ring.next2fill);
864	return 1;
865
866err:
867	return -1;
868}
869
870
871static void
872vmxnet3_prepare_tso(struct sk_buff *skb,
873		    struct vmxnet3_tx_ctx *ctx)
874{
875	struct tcphdr *tcph = tcp_hdr(skb);
876
877	if (ctx->ipv4) {
878		struct iphdr *iph = ip_hdr(skb);
879
880		iph->check = 0;
881		tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, 0,
882						 IPPROTO_TCP, 0);
883	} else {
884		struct ipv6hdr *iph = ipv6_hdr(skb);
885
886		tcph->check = ~csum_ipv6_magic(&iph->saddr, &iph->daddr, 0,
887					       IPPROTO_TCP, 0);
888	}
889}
890
891static int txd_estimate(const struct sk_buff *skb)
892{
893	int count = VMXNET3_TXD_NEEDED(skb_headlen(skb)) + 1;
894	int i;
895
896	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
897		const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
898
899		count += VMXNET3_TXD_NEEDED(skb_frag_size(frag));
900	}
901	return count;
902}
903
904/*
905 * Transmits a pkt thru a given tq
906 * Returns:
907 *    NETDEV_TX_OK:      descriptors are setup successfully
908 *    NETDEV_TX_OK:      error occurred, the pkt is dropped
909 *    NETDEV_TX_BUSY:    tx ring is full, queue is stopped
910 *
911 * Side-effects:
912 *    1. tx ring may be changed
913 *    2. tq stats may be updated accordingly
914 *    3. shared->txNumDeferred may be updated
915 */
916
917static int
918vmxnet3_tq_xmit(struct sk_buff *skb, struct vmxnet3_tx_queue *tq,
919		struct vmxnet3_adapter *adapter, struct net_device *netdev)
920{
921	int ret;
922	u32 count;
923	unsigned long flags;
924	struct vmxnet3_tx_ctx ctx;
925	union Vmxnet3_GenericDesc *gdesc;
926#ifdef __BIG_ENDIAN_BITFIELD
927	/* Use temporary descriptor to avoid touching bits multiple times */
928	union Vmxnet3_GenericDesc tempTxDesc;
929#endif
930
931	count = txd_estimate(skb);
932
933	ctx.ipv4 = (vlan_get_protocol(skb) == cpu_to_be16(ETH_P_IP));
934
935	ctx.mss = skb_shinfo(skb)->gso_size;
936	if (ctx.mss) {
937		if (skb_header_cloned(skb)) {
938			if (unlikely(pskb_expand_head(skb, 0, 0,
939						      GFP_ATOMIC) != 0)) {
940				tq->stats.drop_tso++;
941				goto drop_pkt;
942			}
943			tq->stats.copy_skb_header++;
944		}
945		vmxnet3_prepare_tso(skb, &ctx);
946	} else {
947		if (unlikely(count > VMXNET3_MAX_TXD_PER_PKT)) {
948
949			/* non-tso pkts must not use more than
950			 * VMXNET3_MAX_TXD_PER_PKT entries
951			 */
952			if (skb_linearize(skb) != 0) {
953				tq->stats.drop_too_many_frags++;
954				goto drop_pkt;
955			}
956			tq->stats.linearized++;
957
958			/* recalculate the # of descriptors to use */
959			count = VMXNET3_TXD_NEEDED(skb_headlen(skb)) + 1;
960		}
961	}
962
963	spin_lock_irqsave(&tq->tx_lock, flags);
964
965	if (count > vmxnet3_cmd_ring_desc_avail(&tq->tx_ring)) {
966		tq->stats.tx_ring_full++;
967		netdev_dbg(adapter->netdev,
968			"tx queue stopped on %s, next2comp %u"
969			" next2fill %u\n", adapter->netdev->name,
970			tq->tx_ring.next2comp, tq->tx_ring.next2fill);
971
972		vmxnet3_tq_stop(tq, adapter);
973		spin_unlock_irqrestore(&tq->tx_lock, flags);
974		return NETDEV_TX_BUSY;
975	}
976
977
978	ret = vmxnet3_parse_and_copy_hdr(skb, tq, &ctx, adapter);
979	if (ret >= 0) {
980		BUG_ON(ret <= 0 && ctx.copy_size != 0);
981		/* hdrs parsed, check against other limits */
982		if (ctx.mss) {
983			if (unlikely(ctx.eth_ip_hdr_size + ctx.l4_hdr_size >
984				     VMXNET3_MAX_TX_BUF_SIZE)) {
985				goto hdr_too_big;
986			}
987		} else {
988			if (skb->ip_summed == CHECKSUM_PARTIAL) {
989				if (unlikely(ctx.eth_ip_hdr_size +
990					     skb->csum_offset >
991					     VMXNET3_MAX_CSUM_OFFSET)) {
992					goto hdr_too_big;
993				}
994			}
995		}
996	} else {
997		tq->stats.drop_hdr_inspect_err++;
998		goto unlock_drop_pkt;
999	}
1000
1001	/* fill tx descs related to addr & len */
1002	vmxnet3_map_pkt(skb, &ctx, tq, adapter->pdev, adapter);
1003
1004	/* setup the EOP desc */
1005	ctx.eop_txd->dword[3] = cpu_to_le32(VMXNET3_TXD_CQ | VMXNET3_TXD_EOP);
1006
1007	/* setup the SOP desc */
1008#ifdef __BIG_ENDIAN_BITFIELD
1009	gdesc = &tempTxDesc;
1010	gdesc->dword[2] = ctx.sop_txd->dword[2];
1011	gdesc->dword[3] = ctx.sop_txd->dword[3];
1012#else
1013	gdesc = ctx.sop_txd;
1014#endif
1015	if (ctx.mss) {
1016		gdesc->txd.hlen = ctx.eth_ip_hdr_size + ctx.l4_hdr_size;
1017		gdesc->txd.om = VMXNET3_OM_TSO;
1018		gdesc->txd.msscof = ctx.mss;
1019		le32_add_cpu(&tq->shared->txNumDeferred, (skb->len -
1020			     gdesc->txd.hlen + ctx.mss - 1) / ctx.mss);
1021	} else {
1022		if (skb->ip_summed == CHECKSUM_PARTIAL) {
1023			gdesc->txd.hlen = ctx.eth_ip_hdr_size;
1024			gdesc->txd.om = VMXNET3_OM_CSUM;
1025			gdesc->txd.msscof = ctx.eth_ip_hdr_size +
1026					    skb->csum_offset;
1027		} else {
1028			gdesc->txd.om = 0;
1029			gdesc->txd.msscof = 0;
1030		}
1031		le32_add_cpu(&tq->shared->txNumDeferred, 1);
1032	}
1033
1034	if (vlan_tx_tag_present(skb)) {
1035		gdesc->txd.ti = 1;
1036		gdesc->txd.tci = vlan_tx_tag_get(skb);
1037	}
1038
1039	/* finally flips the GEN bit of the SOP desc. */
1040	gdesc->dword[2] = cpu_to_le32(le32_to_cpu(gdesc->dword[2]) ^
1041						  VMXNET3_TXD_GEN);
1042#ifdef __BIG_ENDIAN_BITFIELD
1043	/* Finished updating in bitfields of Tx Desc, so write them in original
1044	 * place.
1045	 */
1046	vmxnet3_TxDescToLe((struct Vmxnet3_TxDesc *)gdesc,
1047			   (struct Vmxnet3_TxDesc *)ctx.sop_txd);
1048	gdesc = ctx.sop_txd;
1049#endif
1050	netdev_dbg(adapter->netdev,
1051		"txd[%u]: SOP 0x%Lx 0x%x 0x%x\n",
1052		(u32)(ctx.sop_txd -
1053		tq->tx_ring.base), le64_to_cpu(gdesc->txd.addr),
1054		le32_to_cpu(gdesc->dword[2]), le32_to_cpu(gdesc->dword[3]));
1055
1056	spin_unlock_irqrestore(&tq->tx_lock, flags);
1057
1058	if (le32_to_cpu(tq->shared->txNumDeferred) >=
1059					le32_to_cpu(tq->shared->txThreshold)) {
1060		tq->shared->txNumDeferred = 0;
1061		VMXNET3_WRITE_BAR0_REG(adapter,
1062				       VMXNET3_REG_TXPROD + tq->qid * 8,
1063				       tq->tx_ring.next2fill);
1064	}
1065
1066	return NETDEV_TX_OK;
1067
1068hdr_too_big:
1069	tq->stats.drop_oversized_hdr++;
1070unlock_drop_pkt:
1071	spin_unlock_irqrestore(&tq->tx_lock, flags);
1072drop_pkt:
1073	tq->stats.drop_total++;
1074	dev_kfree_skb(skb);
1075	return NETDEV_TX_OK;
1076}
1077
1078
1079static netdev_tx_t
1080vmxnet3_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
1081{
1082	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
1083
1084	BUG_ON(skb->queue_mapping > adapter->num_tx_queues);
1085	return vmxnet3_tq_xmit(skb,
1086			       &adapter->tx_queue[skb->queue_mapping],
1087			       adapter, netdev);
1088}
1089
1090
1091static void
1092vmxnet3_rx_csum(struct vmxnet3_adapter *adapter,
1093		struct sk_buff *skb,
1094		union Vmxnet3_GenericDesc *gdesc)
1095{
1096	if (!gdesc->rcd.cnc && adapter->netdev->features & NETIF_F_RXCSUM) {
1097		/* typical case: TCP/UDP over IP and both csums are correct */
1098		if ((le32_to_cpu(gdesc->dword[3]) & VMXNET3_RCD_CSUM_OK) ==
1099							VMXNET3_RCD_CSUM_OK) {
1100			skb->ip_summed = CHECKSUM_UNNECESSARY;
1101			BUG_ON(!(gdesc->rcd.tcp || gdesc->rcd.udp));
1102			BUG_ON(!(gdesc->rcd.v4  || gdesc->rcd.v6));
1103			BUG_ON(gdesc->rcd.frg);
1104		} else {
1105			if (gdesc->rcd.csum) {
1106				skb->csum = htons(gdesc->rcd.csum);
1107				skb->ip_summed = CHECKSUM_PARTIAL;
1108			} else {
1109				skb_checksum_none_assert(skb);
1110			}
1111		}
1112	} else {
1113		skb_checksum_none_assert(skb);
1114	}
1115}
1116
1117
1118static void
1119vmxnet3_rx_error(struct vmxnet3_rx_queue *rq, struct Vmxnet3_RxCompDesc *rcd,
1120		 struct vmxnet3_rx_ctx *ctx,  struct vmxnet3_adapter *adapter)
1121{
1122	rq->stats.drop_err++;
1123	if (!rcd->fcs)
1124		rq->stats.drop_fcs++;
1125
1126	rq->stats.drop_total++;
1127
1128	/*
1129	 * We do not unmap and chain the rx buffer to the skb.
1130	 * We basically pretend this buffer is not used and will be recycled
1131	 * by vmxnet3_rq_alloc_rx_buf()
1132	 */
1133
1134	/*
1135	 * ctx->skb may be NULL if this is the first and the only one
1136	 * desc for the pkt
1137	 */
1138	if (ctx->skb)
1139		dev_kfree_skb_irq(ctx->skb);
1140
1141	ctx->skb = NULL;
1142}
1143
1144
1145static int
1146vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq,
1147		       struct vmxnet3_adapter *adapter, int quota)
1148{
1149	static const u32 rxprod_reg[2] = {
1150		VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2
1151	};
1152	u32 num_rxd = 0;
1153	bool skip_page_frags = false;
1154	struct Vmxnet3_RxCompDesc *rcd;
1155	struct vmxnet3_rx_ctx *ctx = &rq->rx_ctx;
1156#ifdef __BIG_ENDIAN_BITFIELD
1157	struct Vmxnet3_RxDesc rxCmdDesc;
1158	struct Vmxnet3_RxCompDesc rxComp;
1159#endif
1160	vmxnet3_getRxComp(rcd, &rq->comp_ring.base[rq->comp_ring.next2proc].rcd,
1161			  &rxComp);
1162	while (rcd->gen == rq->comp_ring.gen) {
1163		struct vmxnet3_rx_buf_info *rbi;
1164		struct sk_buff *skb, *new_skb = NULL;
1165		struct page *new_page = NULL;
1166		int num_to_alloc;
1167		struct Vmxnet3_RxDesc *rxd;
1168		u32 idx, ring_idx;
1169		struct vmxnet3_cmd_ring	*ring = NULL;
1170		if (num_rxd >= quota) {
1171			/* we may stop even before we see the EOP desc of
1172			 * the current pkt
1173			 */
1174			break;
1175		}
1176		num_rxd++;
1177		BUG_ON(rcd->rqID != rq->qid && rcd->rqID != rq->qid2);
1178		idx = rcd->rxdIdx;
1179		ring_idx = rcd->rqID < adapter->num_rx_queues ? 0 : 1;
1180		ring = rq->rx_ring + ring_idx;
1181		vmxnet3_getRxDesc(rxd, &rq->rx_ring[ring_idx].base[idx].rxd,
1182				  &rxCmdDesc);
1183		rbi = rq->buf_info[ring_idx] + idx;
1184
1185		BUG_ON(rxd->addr != rbi->dma_addr ||
1186		       rxd->len != rbi->len);
1187
1188		if (unlikely(rcd->eop && rcd->err)) {
1189			vmxnet3_rx_error(rq, rcd, ctx, adapter);
1190			goto rcd_done;
1191		}
1192
1193		if (rcd->sop) { /* first buf of the pkt */
1194			BUG_ON(rxd->btype != VMXNET3_RXD_BTYPE_HEAD ||
1195			       rcd->rqID != rq->qid);
1196
1197			BUG_ON(rbi->buf_type != VMXNET3_RX_BUF_SKB);
1198			BUG_ON(ctx->skb != NULL || rbi->skb == NULL);
1199
1200			if (unlikely(rcd->len == 0)) {
1201				/* Pretend the rx buffer is skipped. */
1202				BUG_ON(!(rcd->sop && rcd->eop));
1203				netdev_dbg(adapter->netdev,
1204					"rxRing[%u][%u] 0 length\n",
1205					ring_idx, idx);
1206				goto rcd_done;
1207			}
1208
1209			skip_page_frags = false;
1210			ctx->skb = rbi->skb;
1211			new_skb = netdev_alloc_skb_ip_align(adapter->netdev,
1212							    rbi->len);
1213			if (new_skb == NULL) {
1214				/* Skb allocation failed, do not handover this
1215				 * skb to stack. Reuse it. Drop the existing pkt
1216				 */
1217				rq->stats.rx_buf_alloc_failure++;
1218				ctx->skb = NULL;
1219				rq->stats.drop_total++;
1220				skip_page_frags = true;
1221				goto rcd_done;
1222			}
1223
1224			pci_unmap_single(adapter->pdev, rbi->dma_addr, rbi->len,
1225					 PCI_DMA_FROMDEVICE);
1226
1227#ifdef VMXNET3_RSS
1228			if (rcd->rssType != VMXNET3_RCD_RSS_TYPE_NONE &&
1229			    (adapter->netdev->features & NETIF_F_RXHASH))
1230				ctx->skb->rxhash = le32_to_cpu(rcd->rssHash);
1231#endif
1232			skb_put(ctx->skb, rcd->len);
1233
1234			/* Immediate refill */
1235			rbi->skb = new_skb;
1236			rbi->dma_addr = pci_map_single(adapter->pdev,
1237						       rbi->skb->data, rbi->len,
1238						       PCI_DMA_FROMDEVICE);
1239			rxd->addr = cpu_to_le64(rbi->dma_addr);
1240			rxd->len = rbi->len;
1241
1242		} else {
1243			BUG_ON(ctx->skb == NULL && !skip_page_frags);
1244
1245			/* non SOP buffer must be type 1 in most cases */
1246			BUG_ON(rbi->buf_type != VMXNET3_RX_BUF_PAGE);
1247			BUG_ON(rxd->btype != VMXNET3_RXD_BTYPE_BODY);
1248
1249			/* If an sop buffer was dropped, skip all
1250			 * following non-sop fragments. They will be reused.
1251			 */
1252			if (skip_page_frags)
1253				goto rcd_done;
1254
1255			new_page = alloc_page(GFP_ATOMIC);
1256			if (unlikely(new_page == NULL)) {
1257				/* Replacement page frag could not be allocated.
1258				 * Reuse this page. Drop the pkt and free the
1259				 * skb which contained this page as a frag. Skip
1260				 * processing all the following non-sop frags.
1261				 */
1262				rq->stats.rx_buf_alloc_failure++;
1263				dev_kfree_skb(ctx->skb);
1264				ctx->skb = NULL;
1265				skip_page_frags = true;
1266				goto rcd_done;
1267			}
1268
1269			if (rcd->len) {
1270				pci_unmap_page(adapter->pdev,
1271					       rbi->dma_addr, rbi->len,
1272					       PCI_DMA_FROMDEVICE);
1273
1274				vmxnet3_append_frag(ctx->skb, rcd, rbi);
1275			}
1276
1277			/* Immediate refill */
1278			rbi->page = new_page;
1279			rbi->dma_addr = pci_map_page(adapter->pdev, rbi->page,
1280						     0, PAGE_SIZE,
1281						     PCI_DMA_FROMDEVICE);
1282			rxd->addr = cpu_to_le64(rbi->dma_addr);
1283			rxd->len = rbi->len;
1284		}
1285
1286
1287		skb = ctx->skb;
1288		if (rcd->eop) {
1289			skb->len += skb->data_len;
1290
1291			vmxnet3_rx_csum(adapter, skb,
1292					(union Vmxnet3_GenericDesc *)rcd);
1293			skb->protocol = eth_type_trans(skb, adapter->netdev);
1294
1295			if (unlikely(rcd->ts))
1296				__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rcd->tci);
1297
1298			if (adapter->netdev->features & NETIF_F_LRO)
1299				netif_receive_skb(skb);
1300			else
1301				napi_gro_receive(&rq->napi, skb);
1302
1303			ctx->skb = NULL;
1304		}
1305
1306rcd_done:
1307		/* device may have skipped some rx descs */
1308		ring->next2comp = idx;
1309		num_to_alloc = vmxnet3_cmd_ring_desc_avail(ring);
1310		ring = rq->rx_ring + ring_idx;
1311		while (num_to_alloc) {
1312			vmxnet3_getRxDesc(rxd, &ring->base[ring->next2fill].rxd,
1313					  &rxCmdDesc);
1314			BUG_ON(!rxd->addr);
1315
1316			/* Recv desc is ready to be used by the device */
1317			rxd->gen = ring->gen;
1318			vmxnet3_cmd_ring_adv_next2fill(ring);
1319			num_to_alloc--;
1320		}
1321
1322		/* if needed, update the register */
1323		if (unlikely(rq->shared->updateRxProd)) {
1324			VMXNET3_WRITE_BAR0_REG(adapter,
1325					       rxprod_reg[ring_idx] + rq->qid * 8,
1326					       ring->next2fill);
1327		}
1328
1329		vmxnet3_comp_ring_adv_next2proc(&rq->comp_ring);
1330		vmxnet3_getRxComp(rcd,
1331				  &rq->comp_ring.base[rq->comp_ring.next2proc].rcd, &rxComp);
1332	}
1333
1334	return num_rxd;
1335}
1336
1337
1338static void
1339vmxnet3_rq_cleanup(struct vmxnet3_rx_queue *rq,
1340		   struct vmxnet3_adapter *adapter)
1341{
1342	u32 i, ring_idx;
1343	struct Vmxnet3_RxDesc *rxd;
1344
1345	for (ring_idx = 0; ring_idx < 2; ring_idx++) {
1346		for (i = 0; i < rq->rx_ring[ring_idx].size; i++) {
1347#ifdef __BIG_ENDIAN_BITFIELD
1348			struct Vmxnet3_RxDesc rxDesc;
1349#endif
1350			vmxnet3_getRxDesc(rxd,
1351				&rq->rx_ring[ring_idx].base[i].rxd, &rxDesc);
1352
1353			if (rxd->btype == VMXNET3_RXD_BTYPE_HEAD &&
1354					rq->buf_info[ring_idx][i].skb) {
1355				pci_unmap_single(adapter->pdev, rxd->addr,
1356						 rxd->len, PCI_DMA_FROMDEVICE);
1357				dev_kfree_skb(rq->buf_info[ring_idx][i].skb);
1358				rq->buf_info[ring_idx][i].skb = NULL;
1359			} else if (rxd->btype == VMXNET3_RXD_BTYPE_BODY &&
1360					rq->buf_info[ring_idx][i].page) {
1361				pci_unmap_page(adapter->pdev, rxd->addr,
1362					       rxd->len, PCI_DMA_FROMDEVICE);
1363				put_page(rq->buf_info[ring_idx][i].page);
1364				rq->buf_info[ring_idx][i].page = NULL;
1365			}
1366		}
1367
1368		rq->rx_ring[ring_idx].gen = VMXNET3_INIT_GEN;
1369		rq->rx_ring[ring_idx].next2fill =
1370					rq->rx_ring[ring_idx].next2comp = 0;
1371	}
1372
1373	rq->comp_ring.gen = VMXNET3_INIT_GEN;
1374	rq->comp_ring.next2proc = 0;
1375}
1376
1377
1378static void
1379vmxnet3_rq_cleanup_all(struct vmxnet3_adapter *adapter)
1380{
1381	int i;
1382
1383	for (i = 0; i < adapter->num_rx_queues; i++)
1384		vmxnet3_rq_cleanup(&adapter->rx_queue[i], adapter);
1385}
1386
1387
1388static void vmxnet3_rq_destroy(struct vmxnet3_rx_queue *rq,
1389			       struct vmxnet3_adapter *adapter)
1390{
1391	int i;
1392	int j;
1393
1394	/* all rx buffers must have already been freed */
1395	for (i = 0; i < 2; i++) {
1396		if (rq->buf_info[i]) {
1397			for (j = 0; j < rq->rx_ring[i].size; j++)
1398				BUG_ON(rq->buf_info[i][j].page != NULL);
1399		}
1400	}
1401
1402
1403	kfree(rq->buf_info[0]);
1404
1405	for (i = 0; i < 2; i++) {
1406		if (rq->rx_ring[i].base) {
1407			pci_free_consistent(adapter->pdev, rq->rx_ring[i].size
1408					    * sizeof(struct Vmxnet3_RxDesc),
1409					    rq->rx_ring[i].base,
1410					    rq->rx_ring[i].basePA);
1411			rq->rx_ring[i].base = NULL;
1412		}
1413		rq->buf_info[i] = NULL;
1414	}
1415
1416	if (rq->comp_ring.base) {
1417		pci_free_consistent(adapter->pdev, rq->comp_ring.size *
1418				    sizeof(struct Vmxnet3_RxCompDesc),
1419				    rq->comp_ring.base, rq->comp_ring.basePA);
1420		rq->comp_ring.base = NULL;
1421	}
1422}
1423
1424
1425static int
1426vmxnet3_rq_init(struct vmxnet3_rx_queue *rq,
1427		struct vmxnet3_adapter  *adapter)
1428{
1429	int i;
1430
1431	/* initialize buf_info */
1432	for (i = 0; i < rq->rx_ring[0].size; i++) {
1433
1434		/* 1st buf for a pkt is skbuff */
1435		if (i % adapter->rx_buf_per_pkt == 0) {
1436			rq->buf_info[0][i].buf_type = VMXNET3_RX_BUF_SKB;
1437			rq->buf_info[0][i].len = adapter->skb_buf_size;
1438		} else { /* subsequent bufs for a pkt is frag */
1439			rq->buf_info[0][i].buf_type = VMXNET3_RX_BUF_PAGE;
1440			rq->buf_info[0][i].len = PAGE_SIZE;
1441		}
1442	}
1443	for (i = 0; i < rq->rx_ring[1].size; i++) {
1444		rq->buf_info[1][i].buf_type = VMXNET3_RX_BUF_PAGE;
1445		rq->buf_info[1][i].len = PAGE_SIZE;
1446	}
1447
1448	/* reset internal state and allocate buffers for both rings */
1449	for (i = 0; i < 2; i++) {
1450		rq->rx_ring[i].next2fill = rq->rx_ring[i].next2comp = 0;
1451
1452		memset(rq->rx_ring[i].base, 0, rq->rx_ring[i].size *
1453		       sizeof(struct Vmxnet3_RxDesc));
1454		rq->rx_ring[i].gen = VMXNET3_INIT_GEN;
1455	}
1456	if (vmxnet3_rq_alloc_rx_buf(rq, 0, rq->rx_ring[0].size - 1,
1457				    adapter) == 0) {
1458		/* at least has 1 rx buffer for the 1st ring */
1459		return -ENOMEM;
1460	}
1461	vmxnet3_rq_alloc_rx_buf(rq, 1, rq->rx_ring[1].size - 1, adapter);
1462
1463	/* reset the comp ring */
1464	rq->comp_ring.next2proc = 0;
1465	memset(rq->comp_ring.base, 0, rq->comp_ring.size *
1466	       sizeof(struct Vmxnet3_RxCompDesc));
1467	rq->comp_ring.gen = VMXNET3_INIT_GEN;
1468
1469	/* reset rxctx */
1470	rq->rx_ctx.skb = NULL;
1471
1472	/* stats are not reset */
1473	return 0;
1474}
1475
1476
1477static int
1478vmxnet3_rq_init_all(struct vmxnet3_adapter *adapter)
1479{
1480	int i, err = 0;
1481
1482	for (i = 0; i < adapter->num_rx_queues; i++) {
1483		err = vmxnet3_rq_init(&adapter->rx_queue[i], adapter);
1484		if (unlikely(err)) {
1485			dev_err(&adapter->netdev->dev, "%s: failed to "
1486				"initialize rx queue%i\n",
1487				adapter->netdev->name, i);
1488			break;
1489		}
1490	}
1491	return err;
1492
1493}
1494
1495
1496static int
1497vmxnet3_rq_create(struct vmxnet3_rx_queue *rq, struct vmxnet3_adapter *adapter)
1498{
1499	int i;
1500	size_t sz;
1501	struct vmxnet3_rx_buf_info *bi;
1502
1503	for (i = 0; i < 2; i++) {
1504
1505		sz = rq->rx_ring[i].size * sizeof(struct Vmxnet3_RxDesc);
1506		rq->rx_ring[i].base = pci_alloc_consistent(adapter->pdev, sz,
1507							&rq->rx_ring[i].basePA);
1508		if (!rq->rx_ring[i].base) {
1509			netdev_err(adapter->netdev,
1510				   "failed to allocate rx ring %d\n", i);
1511			goto err;
1512		}
1513	}
1514
1515	sz = rq->comp_ring.size * sizeof(struct Vmxnet3_RxCompDesc);
1516	rq->comp_ring.base = pci_alloc_consistent(adapter->pdev, sz,
1517						  &rq->comp_ring.basePA);
1518	if (!rq->comp_ring.base) {
1519		netdev_err(adapter->netdev, "failed to allocate rx comp ring\n");
1520		goto err;
1521	}
1522
1523	sz = sizeof(struct vmxnet3_rx_buf_info) * (rq->rx_ring[0].size +
1524						   rq->rx_ring[1].size);
1525	bi = kzalloc(sz, GFP_KERNEL);
1526	if (!bi)
1527		goto err;
1528
1529	rq->buf_info[0] = bi;
1530	rq->buf_info[1] = bi + rq->rx_ring[0].size;
1531
1532	return 0;
1533
1534err:
1535	vmxnet3_rq_destroy(rq, adapter);
1536	return -ENOMEM;
1537}
1538
1539
1540static int
1541vmxnet3_rq_create_all(struct vmxnet3_adapter *adapter)
1542{
1543	int i, err = 0;
1544
1545	for (i = 0; i < adapter->num_rx_queues; i++) {
1546		err = vmxnet3_rq_create(&adapter->rx_queue[i], adapter);
1547		if (unlikely(err)) {
1548			dev_err(&adapter->netdev->dev,
1549				"%s: failed to create rx queue%i\n",
1550				adapter->netdev->name, i);
1551			goto err_out;
1552		}
1553	}
1554	return err;
1555err_out:
1556	vmxnet3_rq_destroy_all(adapter);
1557	return err;
1558
1559}
1560
1561/* Multiple queue aware polling function for tx and rx */
1562
1563static int
1564vmxnet3_do_poll(struct vmxnet3_adapter *adapter, int budget)
1565{
1566	int rcd_done = 0, i;
1567	if (unlikely(adapter->shared->ecr))
1568		vmxnet3_process_events(adapter);
1569	for (i = 0; i < adapter->num_tx_queues; i++)
1570		vmxnet3_tq_tx_complete(&adapter->tx_queue[i], adapter);
1571
1572	for (i = 0; i < adapter->num_rx_queues; i++)
1573		rcd_done += vmxnet3_rq_rx_complete(&adapter->rx_queue[i],
1574						   adapter, budget);
1575	return rcd_done;
1576}
1577
1578
1579static int
1580vmxnet3_poll(struct napi_struct *napi, int budget)
1581{
1582	struct vmxnet3_rx_queue *rx_queue = container_of(napi,
1583					  struct vmxnet3_rx_queue, napi);
1584	int rxd_done;
1585
1586	rxd_done = vmxnet3_do_poll(rx_queue->adapter, budget);
1587
1588	if (rxd_done < budget) {
1589		napi_complete(napi);
1590		vmxnet3_enable_all_intrs(rx_queue->adapter);
1591	}
1592	return rxd_done;
1593}
1594
1595/*
1596 * NAPI polling function for MSI-X mode with multiple Rx queues
1597 * Returns the # of the NAPI credit consumed (# of rx descriptors processed)
1598 */
1599
1600static int
1601vmxnet3_poll_rx_only(struct napi_struct *napi, int budget)
1602{
1603	struct vmxnet3_rx_queue *rq = container_of(napi,
1604						struct vmxnet3_rx_queue, napi);
1605	struct vmxnet3_adapter *adapter = rq->adapter;
1606	int rxd_done;
1607
1608	/* When sharing interrupt with corresponding tx queue, process
1609	 * tx completions in that queue as well
1610	 */
1611	if (adapter->share_intr == VMXNET3_INTR_BUDDYSHARE) {
1612		struct vmxnet3_tx_queue *tq =
1613				&adapter->tx_queue[rq - adapter->rx_queue];
1614		vmxnet3_tq_tx_complete(tq, adapter);
1615	}
1616
1617	rxd_done = vmxnet3_rq_rx_complete(rq, adapter, budget);
1618
1619	if (rxd_done < budget) {
1620		napi_complete(napi);
1621		vmxnet3_enable_intr(adapter, rq->comp_ring.intr_idx);
1622	}
1623	return rxd_done;
1624}
1625
1626
1627#ifdef CONFIG_PCI_MSI
1628
1629/*
1630 * Handle completion interrupts on tx queues
1631 * Returns whether or not the intr is handled
1632 */
1633
1634static irqreturn_t
1635vmxnet3_msix_tx(int irq, void *data)
1636{
1637	struct vmxnet3_tx_queue *tq = data;
1638	struct vmxnet3_adapter *adapter = tq->adapter;
1639
1640	if (adapter->intr.mask_mode == VMXNET3_IMM_ACTIVE)
1641		vmxnet3_disable_intr(adapter, tq->comp_ring.intr_idx);
1642
1643	/* Handle the case where only one irq is allocate for all tx queues */
1644	if (adapter->share_intr == VMXNET3_INTR_TXSHARE) {
1645		int i;
1646		for (i = 0; i < adapter->num_tx_queues; i++) {
1647			struct vmxnet3_tx_queue *txq = &adapter->tx_queue[i];
1648			vmxnet3_tq_tx_complete(txq, adapter);
1649		}
1650	} else {
1651		vmxnet3_tq_tx_complete(tq, adapter);
1652	}
1653	vmxnet3_enable_intr(adapter, tq->comp_ring.intr_idx);
1654
1655	return IRQ_HANDLED;
1656}
1657
1658
1659/*
1660 * Handle completion interrupts on rx queues. Returns whether or not the
1661 * intr is handled
1662 */
1663
1664static irqreturn_t
1665vmxnet3_msix_rx(int irq, void *data)
1666{
1667	struct vmxnet3_rx_queue *rq = data;
1668	struct vmxnet3_adapter *adapter = rq->adapter;
1669
1670	/* disable intr if needed */
1671	if (adapter->intr.mask_mode == VMXNET3_IMM_ACTIVE)
1672		vmxnet3_disable_intr(adapter, rq->comp_ring.intr_idx);
1673	napi_schedule(&rq->napi);
1674
1675	return IRQ_HANDLED;
1676}
1677
1678/*
1679 *----------------------------------------------------------------------------
1680 *
1681 * vmxnet3_msix_event --
1682 *
1683 *    vmxnet3 msix event intr handler
1684 *
1685 * Result:
1686 *    whether or not the intr is handled
1687 *
1688 *----------------------------------------------------------------------------
1689 */
1690
1691static irqreturn_t
1692vmxnet3_msix_event(int irq, void *data)
1693{
1694	struct net_device *dev = data;
1695	struct vmxnet3_adapter *adapter = netdev_priv(dev);
1696
1697	/* disable intr if needed */
1698	if (adapter->intr.mask_mode == VMXNET3_IMM_ACTIVE)
1699		vmxnet3_disable_intr(adapter, adapter->intr.event_intr_idx);
1700
1701	if (adapter->shared->ecr)
1702		vmxnet3_process_events(adapter);
1703
1704	vmxnet3_enable_intr(adapter, adapter->intr.event_intr_idx);
1705
1706	return IRQ_HANDLED;
1707}
1708
1709#endif /* CONFIG_PCI_MSI  */
1710
1711
1712/* Interrupt handler for vmxnet3  */
1713static irqreturn_t
1714vmxnet3_intr(int irq, void *dev_id)
1715{
1716	struct net_device *dev = dev_id;
1717	struct vmxnet3_adapter *adapter = netdev_priv(dev);
1718
1719	if (adapter->intr.type == VMXNET3_IT_INTX) {
1720		u32 icr = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_ICR);
1721		if (unlikely(icr == 0))
1722			/* not ours */
1723			return IRQ_NONE;
1724	}
1725
1726
1727	/* disable intr if needed */
1728	if (adapter->intr.mask_mode == VMXNET3_IMM_ACTIVE)
1729		vmxnet3_disable_all_intrs(adapter);
1730
1731	napi_schedule(&adapter->rx_queue[0].napi);
1732
1733	return IRQ_HANDLED;
1734}
1735
1736#ifdef CONFIG_NET_POLL_CONTROLLER
1737
1738/* netpoll callback. */
1739static void
1740vmxnet3_netpoll(struct net_device *netdev)
1741{
1742	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
1743
1744	if (adapter->intr.mask_mode == VMXNET3_IMM_ACTIVE)
1745		vmxnet3_disable_all_intrs(adapter);
1746
1747	vmxnet3_do_poll(adapter, adapter->rx_queue[0].rx_ring[0].size);
1748	vmxnet3_enable_all_intrs(adapter);
1749
1750}
1751#endif	/* CONFIG_NET_POLL_CONTROLLER */
1752
1753static int
1754vmxnet3_request_irqs(struct vmxnet3_adapter *adapter)
1755{
1756	struct vmxnet3_intr *intr = &adapter->intr;
1757	int err = 0, i;
1758	int vector = 0;
1759
1760#ifdef CONFIG_PCI_MSI
1761	if (adapter->intr.type == VMXNET3_IT_MSIX) {
1762		for (i = 0; i < adapter->num_tx_queues; i++) {
1763			if (adapter->share_intr != VMXNET3_INTR_BUDDYSHARE) {
1764				sprintf(adapter->tx_queue[i].name, "%s-tx-%d",
1765					adapter->netdev->name, vector);
1766				err = request_irq(
1767					      intr->msix_entries[vector].vector,
1768					      vmxnet3_msix_tx, 0,
1769					      adapter->tx_queue[i].name,
1770					      &adapter->tx_queue[i]);
1771			} else {
1772				sprintf(adapter->tx_queue[i].name, "%s-rxtx-%d",
1773					adapter->netdev->name, vector);
1774			}
1775			if (err) {
1776				dev_err(&adapter->netdev->dev,
1777					"Failed to request irq for MSIX, %s, "
1778					"error %d\n",
1779					adapter->tx_queue[i].name, err);
1780				return err;
1781			}
1782
1783			/* Handle the case where only 1 MSIx was allocated for
1784			 * all tx queues */
1785			if (adapter->share_intr == VMXNET3_INTR_TXSHARE) {
1786				for (; i < adapter->num_tx_queues; i++)
1787					adapter->tx_queue[i].comp_ring.intr_idx
1788								= vector;
1789				vector++;
1790				break;
1791			} else {
1792				adapter->tx_queue[i].comp_ring.intr_idx
1793								= vector++;
1794			}
1795		}
1796		if (adapter->share_intr == VMXNET3_INTR_BUDDYSHARE)
1797			vector = 0;
1798
1799		for (i = 0; i < adapter->num_rx_queues; i++) {
1800			if (adapter->share_intr != VMXNET3_INTR_BUDDYSHARE)
1801				sprintf(adapter->rx_queue[i].name, "%s-rx-%d",
1802					adapter->netdev->name, vector);
1803			else
1804				sprintf(adapter->rx_queue[i].name, "%s-rxtx-%d",
1805					adapter->netdev->name, vector);
1806			err = request_irq(intr->msix_entries[vector].vector,
1807					  vmxnet3_msix_rx, 0,
1808					  adapter->rx_queue[i].name,
1809					  &(adapter->rx_queue[i]));
1810			if (err) {
1811				netdev_err(adapter->netdev,
1812					   "Failed to request irq for MSIX, "
1813					   "%s, error %d\n",
1814					   adapter->rx_queue[i].name, err);
1815				return err;
1816			}
1817
1818			adapter->rx_queue[i].comp_ring.intr_idx = vector++;
1819		}
1820
1821		sprintf(intr->event_msi_vector_name, "%s-event-%d",
1822			adapter->netdev->name, vector);
1823		err = request_irq(intr->msix_entries[vector].vector,
1824				  vmxnet3_msix_event, 0,
1825				  intr->event_msi_vector_name, adapter->netdev);
1826		intr->event_intr_idx = vector;
1827
1828	} else if (intr->type == VMXNET3_IT_MSI) {
1829		adapter->num_rx_queues = 1;
1830		err = request_irq(adapter->pdev->irq, vmxnet3_intr, 0,
1831				  adapter->netdev->name, adapter->netdev);
1832	} else {
1833#endif
1834		adapter->num_rx_queues = 1;
1835		err = request_irq(adapter->pdev->irq, vmxnet3_intr,
1836				  IRQF_SHARED, adapter->netdev->name,
1837				  adapter->netdev);
1838#ifdef CONFIG_PCI_MSI
1839	}
1840#endif
1841	intr->num_intrs = vector + 1;
1842	if (err) {
1843		netdev_err(adapter->netdev,
1844			   "Failed to request irq (intr type:%d), error %d\n",
1845			   intr->type, err);
1846	} else {
1847		/* Number of rx queues will not change after this */
1848		for (i = 0; i < adapter->num_rx_queues; i++) {
1849			struct vmxnet3_rx_queue *rq = &adapter->rx_queue[i];
1850			rq->qid = i;
1851			rq->qid2 = i + adapter->num_rx_queues;
1852		}
1853
1854
1855
1856		/* init our intr settings */
1857		for (i = 0; i < intr->num_intrs; i++)
1858			intr->mod_levels[i] = UPT1_IML_ADAPTIVE;
1859		if (adapter->intr.type != VMXNET3_IT_MSIX) {
1860			adapter->intr.event_intr_idx = 0;
1861			for (i = 0; i < adapter->num_tx_queues; i++)
1862				adapter->tx_queue[i].comp_ring.intr_idx = 0;
1863			adapter->rx_queue[0].comp_ring.intr_idx = 0;
1864		}
1865
1866		netdev_info(adapter->netdev,
1867			    "intr type %u, mode %u, %u vectors allocated\n",
1868			    intr->type, intr->mask_mode, intr->num_intrs);
1869	}
1870
1871	return err;
1872}
1873
1874
1875static void
1876vmxnet3_free_irqs(struct vmxnet3_adapter *adapter)
1877{
1878	struct vmxnet3_intr *intr = &adapter->intr;
1879	BUG_ON(intr->type == VMXNET3_IT_AUTO || intr->num_intrs <= 0);
1880
1881	switch (intr->type) {
1882#ifdef CONFIG_PCI_MSI
1883	case VMXNET3_IT_MSIX:
1884	{
1885		int i, vector = 0;
1886
1887		if (adapter->share_intr != VMXNET3_INTR_BUDDYSHARE) {
1888			for (i = 0; i < adapter->num_tx_queues; i++) {
1889				free_irq(intr->msix_entries[vector++].vector,
1890					 &(adapter->tx_queue[i]));
1891				if (adapter->share_intr == VMXNET3_INTR_TXSHARE)
1892					break;
1893			}
1894		}
1895
1896		for (i = 0; i < adapter->num_rx_queues; i++) {
1897			free_irq(intr->msix_entries[vector++].vector,
1898				 &(adapter->rx_queue[i]));
1899		}
1900
1901		free_irq(intr->msix_entries[vector].vector,
1902			 adapter->netdev);
1903		BUG_ON(vector >= intr->num_intrs);
1904		break;
1905	}
1906#endif
1907	case VMXNET3_IT_MSI:
1908		free_irq(adapter->pdev->irq, adapter->netdev);
1909		break;
1910	case VMXNET3_IT_INTX:
1911		free_irq(adapter->pdev->irq, adapter->netdev);
1912		break;
1913	default:
1914		BUG();
1915	}
1916}
1917
1918
1919static void
1920vmxnet3_restore_vlan(struct vmxnet3_adapter *adapter)
1921{
1922	u32 *vfTable = adapter->shared->devRead.rxFilterConf.vfTable;
1923	u16 vid;
1924
1925	/* allow untagged pkts */
1926	VMXNET3_SET_VFTABLE_ENTRY(vfTable, 0);
1927
1928	for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID)
1929		VMXNET3_SET_VFTABLE_ENTRY(vfTable, vid);
1930}
1931
1932
1933static int
1934vmxnet3_vlan_rx_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
1935{
1936	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
1937
1938	if (!(netdev->flags & IFF_PROMISC)) {
1939		u32 *vfTable = adapter->shared->devRead.rxFilterConf.vfTable;
1940		unsigned long flags;
1941
1942		VMXNET3_SET_VFTABLE_ENTRY(vfTable, vid);
1943		spin_lock_irqsave(&adapter->cmd_lock, flags);
1944		VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
1945				       VMXNET3_CMD_UPDATE_VLAN_FILTERS);
1946		spin_unlock_irqrestore(&adapter->cmd_lock, flags);
1947	}
1948
1949	set_bit(vid, adapter->active_vlans);
1950
1951	return 0;
1952}
1953
1954
1955static int
1956vmxnet3_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto, u16 vid)
1957{
1958	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
1959
1960	if (!(netdev->flags & IFF_PROMISC)) {
1961		u32 *vfTable = adapter->shared->devRead.rxFilterConf.vfTable;
1962		unsigned long flags;
1963
1964		VMXNET3_CLEAR_VFTABLE_ENTRY(vfTable, vid);
1965		spin_lock_irqsave(&adapter->cmd_lock, flags);
1966		VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
1967				       VMXNET3_CMD_UPDATE_VLAN_FILTERS);
1968		spin_unlock_irqrestore(&adapter->cmd_lock, flags);
1969	}
1970
1971	clear_bit(vid, adapter->active_vlans);
1972
1973	return 0;
1974}
1975
1976
1977static u8 *
1978vmxnet3_copy_mc(struct net_device *netdev)
1979{
1980	u8 *buf = NULL;
1981	u32 sz = netdev_mc_count(netdev) * ETH_ALEN;
1982
1983	/* struct Vmxnet3_RxFilterConf.mfTableLen is u16. */
1984	if (sz <= 0xffff) {
1985		/* We may be called with BH disabled */
1986		buf = kmalloc(sz, GFP_ATOMIC);
1987		if (buf) {
1988			struct netdev_hw_addr *ha;
1989			int i = 0;
1990
1991			netdev_for_each_mc_addr(ha, netdev)
1992				memcpy(buf + i++ * ETH_ALEN, ha->addr,
1993				       ETH_ALEN);
1994		}
1995	}
1996	return buf;
1997}
1998
1999
2000static void
2001vmxnet3_set_mc(struct net_device *netdev)
2002{
2003	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
2004	unsigned long flags;
2005	struct Vmxnet3_RxFilterConf *rxConf =
2006					&adapter->shared->devRead.rxFilterConf;
2007	u8 *new_table = NULL;
2008	u32 new_mode = VMXNET3_RXM_UCAST;
2009
2010	if (netdev->flags & IFF_PROMISC) {
2011		u32 *vfTable = adapter->shared->devRead.rxFilterConf.vfTable;
2012		memset(vfTable, 0, VMXNET3_VFT_SIZE * sizeof(*vfTable));
2013
2014		new_mode |= VMXNET3_RXM_PROMISC;
2015	} else {
2016		vmxnet3_restore_vlan(adapter);
2017	}
2018
2019	if (netdev->flags & IFF_BROADCAST)
2020		new_mode |= VMXNET3_RXM_BCAST;
2021
2022	if (netdev->flags & IFF_ALLMULTI)
2023		new_mode |= VMXNET3_RXM_ALL_MULTI;
2024	else
2025		if (!netdev_mc_empty(netdev)) {
2026			new_table = vmxnet3_copy_mc(netdev);
2027			if (new_table) {
2028				new_mode |= VMXNET3_RXM_MCAST;
2029				rxConf->mfTableLen = cpu_to_le16(
2030					netdev_mc_count(netdev) * ETH_ALEN);
2031				rxConf->mfTablePA = cpu_to_le64(virt_to_phys(
2032						    new_table));
2033			} else {
2034				netdev_info(netdev, "failed to copy mcast list"
2035					    ", setting ALL_MULTI\n");
2036				new_mode |= VMXNET3_RXM_ALL_MULTI;
2037			}
2038		}
2039
2040
2041	if (!(new_mode & VMXNET3_RXM_MCAST)) {
2042		rxConf->mfTableLen = 0;
2043		rxConf->mfTablePA = 0;
2044	}
2045
2046	spin_lock_irqsave(&adapter->cmd_lock, flags);
2047	if (new_mode != rxConf->rxMode) {
2048		rxConf->rxMode = cpu_to_le32(new_mode);
2049		VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
2050				       VMXNET3_CMD_UPDATE_RX_MODE);
2051		VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
2052				       VMXNET3_CMD_UPDATE_VLAN_FILTERS);
2053	}
2054
2055	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
2056			       VMXNET3_CMD_UPDATE_MAC_FILTERS);
2057	spin_unlock_irqrestore(&adapter->cmd_lock, flags);
2058
2059	kfree(new_table);
2060}
2061
2062void
2063vmxnet3_rq_destroy_all(struct vmxnet3_adapter *adapter)
2064{
2065	int i;
2066
2067	for (i = 0; i < adapter->num_rx_queues; i++)
2068		vmxnet3_rq_destroy(&adapter->rx_queue[i], adapter);
2069}
2070
2071
2072/*
2073 *   Set up driver_shared based on settings in adapter.
2074 */
2075
2076static void
2077vmxnet3_setup_driver_shared(struct vmxnet3_adapter *adapter)
2078{
2079	struct Vmxnet3_DriverShared *shared = adapter->shared;
2080	struct Vmxnet3_DSDevRead *devRead = &shared->devRead;
2081	struct Vmxnet3_TxQueueConf *tqc;
2082	struct Vmxnet3_RxQueueConf *rqc;
2083	int i;
2084
2085	memset(shared, 0, sizeof(*shared));
2086
2087	/* driver settings */
2088	shared->magic = cpu_to_le32(VMXNET3_REV1_MAGIC);
2089	devRead->misc.driverInfo.version = cpu_to_le32(
2090						VMXNET3_DRIVER_VERSION_NUM);
2091	devRead->misc.driverInfo.gos.gosBits = (sizeof(void *) == 4 ?
2092				VMXNET3_GOS_BITS_32 : VMXNET3_GOS_BITS_64);
2093	devRead->misc.driverInfo.gos.gosType = VMXNET3_GOS_TYPE_LINUX;
2094	*((u32 *)&devRead->misc.driverInfo.gos) = cpu_to_le32(
2095				*((u32 *)&devRead->misc.driverInfo.gos));
2096	devRead->misc.driverInfo.vmxnet3RevSpt = cpu_to_le32(1);
2097	devRead->misc.driverInfo.uptVerSpt = cpu_to_le32(1);
2098
2099	devRead->misc.ddPA = cpu_to_le64(virt_to_phys(adapter));
2100	devRead->misc.ddLen = cpu_to_le32(sizeof(struct vmxnet3_adapter));
2101
2102	/* set up feature flags */
2103	if (adapter->netdev->features & NETIF_F_RXCSUM)
2104		devRead->misc.uptFeatures |= UPT1_F_RXCSUM;
2105
2106	if (adapter->netdev->features & NETIF_F_LRO) {
2107		devRead->misc.uptFeatures |= UPT1_F_LRO;
2108		devRead->misc.maxNumRxSG = cpu_to_le16(1 + MAX_SKB_FRAGS);
2109	}
2110	if (adapter->netdev->features & NETIF_F_HW_VLAN_CTAG_RX)
2111		devRead->misc.uptFeatures |= UPT1_F_RXVLAN;
2112
2113	devRead->misc.mtu = cpu_to_le32(adapter->netdev->mtu);
2114	devRead->misc.queueDescPA = cpu_to_le64(adapter->queue_desc_pa);
2115	devRead->misc.queueDescLen = cpu_to_le32(
2116		adapter->num_tx_queues * sizeof(struct Vmxnet3_TxQueueDesc) +
2117		adapter->num_rx_queues * sizeof(struct Vmxnet3_RxQueueDesc));
2118
2119	/* tx queue settings */
2120	devRead->misc.numTxQueues =  adapter->num_tx_queues;
2121	for (i = 0; i < adapter->num_tx_queues; i++) {
2122		struct vmxnet3_tx_queue	*tq = &adapter->tx_queue[i];
2123		BUG_ON(adapter->tx_queue[i].tx_ring.base == NULL);
2124		tqc = &adapter->tqd_start[i].conf;
2125		tqc->txRingBasePA   = cpu_to_le64(tq->tx_ring.basePA);
2126		tqc->dataRingBasePA = cpu_to_le64(tq->data_ring.basePA);
2127		tqc->compRingBasePA = cpu_to_le64(tq->comp_ring.basePA);
2128		tqc->ddPA           = cpu_to_le64(virt_to_phys(tq->buf_info));
2129		tqc->txRingSize     = cpu_to_le32(tq->tx_ring.size);
2130		tqc->dataRingSize   = cpu_to_le32(tq->data_ring.size);
2131		tqc->compRingSize   = cpu_to_le32(tq->comp_ring.size);
2132		tqc->ddLen          = cpu_to_le32(
2133					sizeof(struct vmxnet3_tx_buf_info) *
2134					tqc->txRingSize);
2135		tqc->intrIdx        = tq->comp_ring.intr_idx;
2136	}
2137
2138	/* rx queue settings */
2139	devRead->misc.numRxQueues = adapter->num_rx_queues;
2140	for (i = 0; i < adapter->num_rx_queues; i++) {
2141		struct vmxnet3_rx_queue	*rq = &adapter->rx_queue[i];
2142		rqc = &adapter->rqd_start[i].conf;
2143		rqc->rxRingBasePA[0] = cpu_to_le64(rq->rx_ring[0].basePA);
2144		rqc->rxRingBasePA[1] = cpu_to_le64(rq->rx_ring[1].basePA);
2145		rqc->compRingBasePA  = cpu_to_le64(rq->comp_ring.basePA);
2146		rqc->ddPA            = cpu_to_le64(virt_to_phys(
2147							rq->buf_info));
2148		rqc->rxRingSize[0]   = cpu_to_le32(rq->rx_ring[0].size);
2149		rqc->rxRingSize[1]   = cpu_to_le32(rq->rx_ring[1].size);
2150		rqc->compRingSize    = cpu_to_le32(rq->comp_ring.size);
2151		rqc->ddLen           = cpu_to_le32(
2152					sizeof(struct vmxnet3_rx_buf_info) *
2153					(rqc->rxRingSize[0] +
2154					 rqc->rxRingSize[1]));
2155		rqc->intrIdx         = rq->comp_ring.intr_idx;
2156	}
2157
2158#ifdef VMXNET3_RSS
2159	memset(adapter->rss_conf, 0, sizeof(*adapter->rss_conf));
2160
2161	if (adapter->rss) {
2162		struct UPT1_RSSConf *rssConf = adapter->rss_conf;
2163		static const uint8_t rss_key[UPT1_RSS_MAX_KEY_SIZE] = {
2164			0x3b, 0x56, 0xd1, 0x56, 0x13, 0x4a, 0xe7, 0xac,
2165			0xe8, 0x79, 0x09, 0x75, 0xe8, 0x65, 0x79, 0x28,
2166			0x35, 0x12, 0xb9, 0x56, 0x7c, 0x76, 0x4b, 0x70,
2167			0xd8, 0x56, 0xa3, 0x18, 0x9b, 0x0a, 0xee, 0xf3,
2168			0x96, 0xa6, 0x9f, 0x8f, 0x9e, 0x8c, 0x90, 0xc9,
2169		};
2170
2171		devRead->misc.uptFeatures |= UPT1_F_RSS;
2172		devRead->misc.numRxQueues = adapter->num_rx_queues;
2173		rssConf->hashType = UPT1_RSS_HASH_TYPE_TCP_IPV4 |
2174				    UPT1_RSS_HASH_TYPE_IPV4 |
2175				    UPT1_RSS_HASH_TYPE_TCP_IPV6 |
2176				    UPT1_RSS_HASH_TYPE_IPV6;
2177		rssConf->hashFunc = UPT1_RSS_HASH_FUNC_TOEPLITZ;
2178		rssConf->hashKeySize = UPT1_RSS_MAX_KEY_SIZE;
2179		rssConf->indTableSize = VMXNET3_RSS_IND_TABLE_SIZE;
2180		memcpy(rssConf->hashKey, rss_key, sizeof(rss_key));
2181
2182		for (i = 0; i < rssConf->indTableSize; i++)
2183			rssConf->indTable[i] = ethtool_rxfh_indir_default(
2184				i, adapter->num_rx_queues);
2185
2186		devRead->rssConfDesc.confVer = 1;
2187		devRead->rssConfDesc.confLen = sizeof(*rssConf);
2188		devRead->rssConfDesc.confPA  = virt_to_phys(rssConf);
2189	}
2190
2191#endif /* VMXNET3_RSS */
2192
2193	/* intr settings */
2194	devRead->intrConf.autoMask = adapter->intr.mask_mode ==
2195				     VMXNET3_IMM_AUTO;
2196	devRead->intrConf.numIntrs = adapter->intr.num_intrs;
2197	for (i = 0; i < adapter->intr.num_intrs; i++)
2198		devRead->intrConf.modLevels[i] = adapter->intr.mod_levels[i];
2199
2200	devRead->intrConf.eventIntrIdx = adapter->intr.event_intr_idx;
2201	devRead->intrConf.intrCtrl |= cpu_to_le32(VMXNET3_IC_DISABLE_ALL);
2202
2203	/* rx filter settings */
2204	devRead->rxFilterConf.rxMode = 0;
2205	vmxnet3_restore_vlan(adapter);
2206	vmxnet3_write_mac_addr(adapter, adapter->netdev->dev_addr);
2207
2208	/* the rest are already zeroed */
2209}
2210
2211
2212int
2213vmxnet3_activate_dev(struct vmxnet3_adapter *adapter)
2214{
2215	int err, i;
2216	u32 ret;
2217	unsigned long flags;
2218
2219	netdev_dbg(adapter->netdev, "%s: skb_buf_size %d, rx_buf_per_pkt %d,"
2220		" ring sizes %u %u %u\n", adapter->netdev->name,
2221		adapter->skb_buf_size, adapter->rx_buf_per_pkt,
2222		adapter->tx_queue[0].tx_ring.size,
2223		adapter->rx_queue[0].rx_ring[0].size,
2224		adapter->rx_queue[0].rx_ring[1].size);
2225
2226	vmxnet3_tq_init_all(adapter);
2227	err = vmxnet3_rq_init_all(adapter);
2228	if (err) {
2229		netdev_err(adapter->netdev,
2230			   "Failed to init rx queue error %d\n", err);
2231		goto rq_err;
2232	}
2233
2234	err = vmxnet3_request_irqs(adapter);
2235	if (err) {
2236		netdev_err(adapter->netdev,
2237			   "Failed to setup irq for error %d\n", err);
2238		goto irq_err;
2239	}
2240
2241	vmxnet3_setup_driver_shared(adapter);
2242
2243	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_DSAL, VMXNET3_GET_ADDR_LO(
2244			       adapter->shared_pa));
2245	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_DSAH, VMXNET3_GET_ADDR_HI(
2246			       adapter->shared_pa));
2247	spin_lock_irqsave(&adapter->cmd_lock, flags);
2248	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
2249			       VMXNET3_CMD_ACTIVATE_DEV);
2250	ret = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD);
2251	spin_unlock_irqrestore(&adapter->cmd_lock, flags);
2252
2253	if (ret != 0) {
2254		netdev_err(adapter->netdev,
2255			   "Failed to activate dev: error %u\n", ret);
2256		err = -EINVAL;
2257		goto activate_err;
2258	}
2259
2260	for (i = 0; i < adapter->num_rx_queues; i++) {
2261		VMXNET3_WRITE_BAR0_REG(adapter,
2262				VMXNET3_REG_RXPROD + i * VMXNET3_REG_ALIGN,
2263				adapter->rx_queue[i].rx_ring[0].next2fill);
2264		VMXNET3_WRITE_BAR0_REG(adapter, (VMXNET3_REG_RXPROD2 +
2265				(i * VMXNET3_REG_ALIGN)),
2266				adapter->rx_queue[i].rx_ring[1].next2fill);
2267	}
2268
2269	/* Apply the rx filter settins last. */
2270	vmxnet3_set_mc(adapter->netdev);
2271
2272	/*
2273	 * Check link state when first activating device. It will start the
2274	 * tx queue if the link is up.
2275	 */
2276	vmxnet3_check_link(adapter, true);
2277	for (i = 0; i < adapter->num_rx_queues; i++)
2278		napi_enable(&adapter->rx_queue[i].napi);
2279	vmxnet3_enable_all_intrs(adapter);
2280	clear_bit(VMXNET3_STATE_BIT_QUIESCED, &adapter->state);
2281	return 0;
2282
2283activate_err:
2284	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_DSAL, 0);
2285	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_DSAH, 0);
2286	vmxnet3_free_irqs(adapter);
2287irq_err:
2288rq_err:
2289	/* free up buffers we allocated */
2290	vmxnet3_rq_cleanup_all(adapter);
2291	return err;
2292}
2293
2294
2295void
2296vmxnet3_reset_dev(struct vmxnet3_adapter *adapter)
2297{
2298	unsigned long flags;
2299	spin_lock_irqsave(&adapter->cmd_lock, flags);
2300	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, VMXNET3_CMD_RESET_DEV);
2301	spin_unlock_irqrestore(&adapter->cmd_lock, flags);
2302}
2303
2304
2305int
2306vmxnet3_quiesce_dev(struct vmxnet3_adapter *adapter)
2307{
2308	int i;
2309	unsigned long flags;
2310	if (test_and_set_bit(VMXNET3_STATE_BIT_QUIESCED, &adapter->state))
2311		return 0;
2312
2313
2314	spin_lock_irqsave(&adapter->cmd_lock, flags);
2315	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
2316			       VMXNET3_CMD_QUIESCE_DEV);
2317	spin_unlock_irqrestore(&adapter->cmd_lock, flags);
2318	vmxnet3_disable_all_intrs(adapter);
2319
2320	for (i = 0; i < adapter->num_rx_queues; i++)
2321		napi_disable(&adapter->rx_queue[i].napi);
2322	netif_tx_disable(adapter->netdev);
2323	adapter->link_speed = 0;
2324	netif_carrier_off(adapter->netdev);
2325
2326	vmxnet3_tq_cleanup_all(adapter);
2327	vmxnet3_rq_cleanup_all(adapter);
2328	vmxnet3_free_irqs(adapter);
2329	return 0;
2330}
2331
2332
2333static void
2334vmxnet3_write_mac_addr(struct vmxnet3_adapter *adapter, u8 *mac)
2335{
2336	u32 tmp;
2337
2338	tmp = *(u32 *)mac;
2339	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_MACL, tmp);
2340
2341	tmp = (mac[5] << 8) | mac[4];
2342	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_MACH, tmp);
2343}
2344
2345
2346static int
2347vmxnet3_set_mac_addr(struct net_device *netdev, void *p)
2348{
2349	struct sockaddr *addr = p;
2350	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
2351
2352	memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
2353	vmxnet3_write_mac_addr(adapter, addr->sa_data);
2354
2355	return 0;
2356}
2357
2358
2359/* ==================== initialization and cleanup routines ============ */
2360
2361static int
2362vmxnet3_alloc_pci_resources(struct vmxnet3_adapter *adapter, bool *dma64)
2363{
2364	int err;
2365	unsigned long mmio_start, mmio_len;
2366	struct pci_dev *pdev = adapter->pdev;
2367
2368	err = pci_enable_device(pdev);
2369	if (err) {
2370		dev_err(&pdev->dev, "Failed to enable adapter: error %d\n", err);
2371		return err;
2372	}
2373
2374	if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64)) == 0) {
2375		if (pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)) != 0) {
2376			dev_err(&pdev->dev,
2377				"pci_set_consistent_dma_mask failed\n");
2378			err = -EIO;
2379			goto err_set_mask;
2380		}
2381		*dma64 = true;
2382	} else {
2383		if (pci_set_dma_mask(pdev, DMA_BIT_MASK(32)) != 0) {
2384			dev_err(&pdev->dev,
2385				"pci_set_dma_mask failed\n");
2386			err = -EIO;
2387			goto err_set_mask;
2388		}
2389		*dma64 = false;
2390	}
2391
2392	err = pci_request_selected_regions(pdev, (1 << 2) - 1,
2393					   vmxnet3_driver_name);
2394	if (err) {
2395		dev_err(&pdev->dev,
2396			"Failed to request region for adapter: error %d\n", err);
2397		goto err_set_mask;
2398	}
2399
2400	pci_set_master(pdev);
2401
2402	mmio_start = pci_resource_start(pdev, 0);
2403	mmio_len = pci_resource_len(pdev, 0);
2404	adapter->hw_addr0 = ioremap(mmio_start, mmio_len);
2405	if (!adapter->hw_addr0) {
2406		dev_err(&pdev->dev, "Failed to map bar0\n");
2407		err = -EIO;
2408		goto err_ioremap;
2409	}
2410
2411	mmio_start = pci_resource_start(pdev, 1);
2412	mmio_len = pci_resource_len(pdev, 1);
2413	adapter->hw_addr1 = ioremap(mmio_start, mmio_len);
2414	if (!adapter->hw_addr1) {
2415		dev_err(&pdev->dev, "Failed to map bar1\n");
2416		err = -EIO;
2417		goto err_bar1;
2418	}
2419	return 0;
2420
2421err_bar1:
2422	iounmap(adapter->hw_addr0);
2423err_ioremap:
2424	pci_release_selected_regions(pdev, (1 << 2) - 1);
2425err_set_mask:
2426	pci_disable_device(pdev);
2427	return err;
2428}
2429
2430
2431static void
2432vmxnet3_free_pci_resources(struct vmxnet3_adapter *adapter)
2433{
2434	BUG_ON(!adapter->pdev);
2435
2436	iounmap(adapter->hw_addr0);
2437	iounmap(adapter->hw_addr1);
2438	pci_release_selected_regions(adapter->pdev, (1 << 2) - 1);
2439	pci_disable_device(adapter->pdev);
2440}
2441
2442
2443static void
2444vmxnet3_adjust_rx_ring_size(struct vmxnet3_adapter *adapter)
2445{
2446	size_t sz, i, ring0_size, ring1_size, comp_size;
2447	struct vmxnet3_rx_queue	*rq = &adapter->rx_queue[0];
2448
2449
2450	if (adapter->netdev->mtu <= VMXNET3_MAX_SKB_BUF_SIZE -
2451				    VMXNET3_MAX_ETH_HDR_SIZE) {
2452		adapter->skb_buf_size = adapter->netdev->mtu +
2453					VMXNET3_MAX_ETH_HDR_SIZE;
2454		if (adapter->skb_buf_size < VMXNET3_MIN_T0_BUF_SIZE)
2455			adapter->skb_buf_size = VMXNET3_MIN_T0_BUF_SIZE;
2456
2457		adapter->rx_buf_per_pkt = 1;
2458	} else {
2459		adapter->skb_buf_size = VMXNET3_MAX_SKB_BUF_SIZE;
2460		sz = adapter->netdev->mtu - VMXNET3_MAX_SKB_BUF_SIZE +
2461					    VMXNET3_MAX_ETH_HDR_SIZE;
2462		adapter->rx_buf_per_pkt = 1 + (sz + PAGE_SIZE - 1) / PAGE_SIZE;
2463	}
2464
2465	/*
2466	 * for simplicity, force the ring0 size to be a multiple of
2467	 * rx_buf_per_pkt * VMXNET3_RING_SIZE_ALIGN
2468	 */
2469	sz = adapter->rx_buf_per_pkt * VMXNET3_RING_SIZE_ALIGN;
2470	ring0_size = adapter->rx_queue[0].rx_ring[0].size;
2471	ring0_size = (ring0_size + sz - 1) / sz * sz;
2472	ring0_size = min_t(u32, ring0_size, VMXNET3_RX_RING_MAX_SIZE /
2473			   sz * sz);
2474	ring1_size = adapter->rx_queue[0].rx_ring[1].size;
2475	comp_size = ring0_size + ring1_size;
2476
2477	for (i = 0; i < adapter->num_rx_queues; i++) {
2478		rq = &adapter->rx_queue[i];
2479		rq->rx_ring[0].size = ring0_size;
2480		rq->rx_ring[1].size = ring1_size;
2481		rq->comp_ring.size = comp_size;
2482	}
2483}
2484
2485
2486int
2487vmxnet3_create_queues(struct vmxnet3_adapter *adapter, u32 tx_ring_size,
2488		      u32 rx_ring_size, u32 rx_ring2_size)
2489{
2490	int err = 0, i;
2491
2492	for (i = 0; i < adapter->num_tx_queues; i++) {
2493		struct vmxnet3_tx_queue	*tq = &adapter->tx_queue[i];
2494		tq->tx_ring.size   = tx_ring_size;
2495		tq->data_ring.size = tx_ring_size;
2496		tq->comp_ring.size = tx_ring_size;
2497		tq->shared = &adapter->tqd_start[i].ctrl;
2498		tq->stopped = true;
2499		tq->adapter = adapter;
2500		tq->qid = i;
2501		err = vmxnet3_tq_create(tq, adapter);
2502		/*
2503		 * Too late to change num_tx_queues. We cannot do away with
2504		 * lesser number of queues than what we asked for
2505		 */
2506		if (err)
2507			goto queue_err;
2508	}
2509
2510	adapter->rx_queue[0].rx_ring[0].size = rx_ring_size;
2511	adapter->rx_queue[0].rx_ring[1].size = rx_ring2_size;
2512	vmxnet3_adjust_rx_ring_size(adapter);
2513	for (i = 0; i < adapter->num_rx_queues; i++) {
2514		struct vmxnet3_rx_queue *rq = &adapter->rx_queue[i];
2515		/* qid and qid2 for rx queues will be assigned later when num
2516		 * of rx queues is finalized after allocating intrs */
2517		rq->shared = &adapter->rqd_start[i].ctrl;
2518		rq->adapter = adapter;
2519		err = vmxnet3_rq_create(rq, adapter);
2520		if (err) {
2521			if (i == 0) {
2522				netdev_err(adapter->netdev,
2523					   "Could not allocate any rx queues. "
2524					   "Aborting.\n");
2525				goto queue_err;
2526			} else {
2527				netdev_info(adapter->netdev,
2528					    "Number of rx queues changed "
2529					    "to : %d.\n", i);
2530				adapter->num_rx_queues = i;
2531				err = 0;
2532				break;
2533			}
2534		}
2535	}
2536	return err;
2537queue_err:
2538	vmxnet3_tq_destroy_all(adapter);
2539	return err;
2540}
2541
2542static int
2543vmxnet3_open(struct net_device *netdev)
2544{
2545	struct vmxnet3_adapter *adapter;
2546	int err, i;
2547
2548	adapter = netdev_priv(netdev);
2549
2550	for (i = 0; i < adapter->num_tx_queues; i++)
2551		spin_lock_init(&adapter->tx_queue[i].tx_lock);
2552
2553	err = vmxnet3_create_queues(adapter, VMXNET3_DEF_TX_RING_SIZE,
2554				    VMXNET3_DEF_RX_RING_SIZE,
2555				    VMXNET3_DEF_RX_RING_SIZE);
2556	if (err)
2557		goto queue_err;
2558
2559	err = vmxnet3_activate_dev(adapter);
2560	if (err)
2561		goto activate_err;
2562
2563	return 0;
2564
2565activate_err:
2566	vmxnet3_rq_destroy_all(adapter);
2567	vmxnet3_tq_destroy_all(adapter);
2568queue_err:
2569	return err;
2570}
2571
2572
2573static int
2574vmxnet3_close(struct net_device *netdev)
2575{
2576	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
2577
2578	/*
2579	 * Reset_work may be in the middle of resetting the device, wait for its
2580	 * completion.
2581	 */
2582	while (test_and_set_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state))
2583		msleep(1);
2584
2585	vmxnet3_quiesce_dev(adapter);
2586
2587	vmxnet3_rq_destroy_all(adapter);
2588	vmxnet3_tq_destroy_all(adapter);
2589
2590	clear_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state);
2591
2592
2593	return 0;
2594}
2595
2596
2597void
2598vmxnet3_force_close(struct vmxnet3_adapter *adapter)
2599{
2600	int i;
2601
2602	/*
2603	 * we must clear VMXNET3_STATE_BIT_RESETTING, otherwise
2604	 * vmxnet3_close() will deadlock.
2605	 */
2606	BUG_ON(test_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state));
2607
2608	/* we need to enable NAPI, otherwise dev_close will deadlock */
2609	for (i = 0; i < adapter->num_rx_queues; i++)
2610		napi_enable(&adapter->rx_queue[i].napi);
2611	dev_close(adapter->netdev);
2612}
2613
2614
2615static int
2616vmxnet3_change_mtu(struct net_device *netdev, int new_mtu)
2617{
2618	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
2619	int err = 0;
2620
2621	if (new_mtu < VMXNET3_MIN_MTU || new_mtu > VMXNET3_MAX_MTU)
2622		return -EINVAL;
2623
2624	netdev->mtu = new_mtu;
2625
2626	/*
2627	 * Reset_work may be in the middle of resetting the device, wait for its
2628	 * completion.
2629	 */
2630	while (test_and_set_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state))
2631		msleep(1);
2632
2633	if (netif_running(netdev)) {
2634		vmxnet3_quiesce_dev(adapter);
2635		vmxnet3_reset_dev(adapter);
2636
2637		/* we need to re-create the rx queue based on the new mtu */
2638		vmxnet3_rq_destroy_all(adapter);
2639		vmxnet3_adjust_rx_ring_size(adapter);
2640		err = vmxnet3_rq_create_all(adapter);
2641		if (err) {
2642			netdev_err(netdev,
2643				   "failed to re-create rx queues, "
2644				   " error %d. Closing it.\n", err);
2645			goto out;
2646		}
2647
2648		err = vmxnet3_activate_dev(adapter);
2649		if (err) {
2650			netdev_err(netdev,
2651				   "failed to re-activate, error %d. "
2652				   "Closing it\n", err);
2653			goto out;
2654		}
2655	}
2656
2657out:
2658	clear_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state);
2659	if (err)
2660		vmxnet3_force_close(adapter);
2661
2662	return err;
2663}
2664
2665
2666static void
2667vmxnet3_declare_features(struct vmxnet3_adapter *adapter, bool dma64)
2668{
2669	struct net_device *netdev = adapter->netdev;
2670
2671	netdev->hw_features = NETIF_F_SG | NETIF_F_RXCSUM |
2672		NETIF_F_HW_CSUM | NETIF_F_HW_VLAN_CTAG_TX |
2673		NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_TSO | NETIF_F_TSO6 |
2674		NETIF_F_LRO;
2675	if (dma64)
2676		netdev->hw_features |= NETIF_F_HIGHDMA;
2677	netdev->vlan_features = netdev->hw_features &
2678				~(NETIF_F_HW_VLAN_CTAG_TX |
2679				  NETIF_F_HW_VLAN_CTAG_RX);
2680	netdev->features = netdev->hw_features | NETIF_F_HW_VLAN_CTAG_FILTER;
2681}
2682
2683
2684static void
2685vmxnet3_read_mac_addr(struct vmxnet3_adapter *adapter, u8 *mac)
2686{
2687	u32 tmp;
2688
2689	tmp = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_MACL);
2690	*(u32 *)mac = tmp;
2691
2692	tmp = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_MACH);
2693	mac[4] = tmp & 0xff;
2694	mac[5] = (tmp >> 8) & 0xff;
2695}
2696
2697#ifdef CONFIG_PCI_MSI
2698
2699/*
2700 * Enable MSIx vectors.
2701 * Returns :
2702 *	0 on successful enabling of required vectors,
2703 *	VMXNET3_LINUX_MIN_MSIX_VECT when only minimum number of vectors required
2704 *	 could be enabled.
2705 *	number of vectors which can be enabled otherwise (this number is smaller
2706 *	 than VMXNET3_LINUX_MIN_MSIX_VECT)
2707 */
2708
2709static int
2710vmxnet3_acquire_msix_vectors(struct vmxnet3_adapter *adapter,
2711			     int vectors)
2712{
2713	int err = 0, vector_threshold;
2714	vector_threshold = VMXNET3_LINUX_MIN_MSIX_VECT;
2715
2716	while (vectors >= vector_threshold) {
2717		err = pci_enable_msix(adapter->pdev, adapter->intr.msix_entries,
2718				      vectors);
2719		if (!err) {
2720			adapter->intr.num_intrs = vectors;
2721			return 0;
2722		} else if (err < 0) {
2723			dev_err(&adapter->netdev->dev,
2724				   "Failed to enable MSI-X, error: %d\n", err);
2725			vectors = 0;
2726		} else if (err < vector_threshold) {
2727			break;
2728		} else {
2729			/* If fails to enable required number of MSI-x vectors
2730			 * try enabling minimum number of vectors required.
2731			 */
2732			dev_err(&adapter->netdev->dev,
2733				"Failed to enable %d MSI-X, trying %d instead\n",
2734				    vectors, vector_threshold);
2735			vectors = vector_threshold;
2736		}
2737	}
2738
2739	dev_info(&adapter->pdev->dev,
2740		 "Number of MSI-X interrupts which can be allocated "
2741		 "is lower than min threshold required.\n");
2742	return err;
2743}
2744
2745
2746#endif /* CONFIG_PCI_MSI */
2747
2748static void
2749vmxnet3_alloc_intr_resources(struct vmxnet3_adapter *adapter)
2750{
2751	u32 cfg;
2752	unsigned long flags;
2753
2754	/* intr settings */
2755	spin_lock_irqsave(&adapter->cmd_lock, flags);
2756	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
2757			       VMXNET3_CMD_GET_CONF_INTR);
2758	cfg = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD);
2759	spin_unlock_irqrestore(&adapter->cmd_lock, flags);
2760	adapter->intr.type = cfg & 0x3;
2761	adapter->intr.mask_mode = (cfg >> 2) & 0x3;
2762
2763	if (adapter->intr.type == VMXNET3_IT_AUTO) {
2764		adapter->intr.type = VMXNET3_IT_MSIX;
2765	}
2766
2767#ifdef CONFIG_PCI_MSI
2768	if (adapter->intr.type == VMXNET3_IT_MSIX) {
2769		int vector, err = 0;
2770
2771		adapter->intr.num_intrs = (adapter->share_intr ==
2772					   VMXNET3_INTR_TXSHARE) ? 1 :
2773					   adapter->num_tx_queues;
2774		adapter->intr.num_intrs += (adapter->share_intr ==
2775					   VMXNET3_INTR_BUDDYSHARE) ? 0 :
2776					   adapter->num_rx_queues;
2777		adapter->intr.num_intrs += 1;		/* for link event */
2778
2779		adapter->intr.num_intrs = (adapter->intr.num_intrs >
2780					   VMXNET3_LINUX_MIN_MSIX_VECT
2781					   ? adapter->intr.num_intrs :
2782					   VMXNET3_LINUX_MIN_MSIX_VECT);
2783
2784		for (vector = 0; vector < adapter->intr.num_intrs; vector++)
2785			adapter->intr.msix_entries[vector].entry = vector;
2786
2787		err = vmxnet3_acquire_msix_vectors(adapter,
2788						   adapter->intr.num_intrs);
2789		/* If we cannot allocate one MSIx vector per queue
2790		 * then limit the number of rx queues to 1
2791		 */
2792		if (err == VMXNET3_LINUX_MIN_MSIX_VECT) {
2793			if (adapter->share_intr != VMXNET3_INTR_BUDDYSHARE
2794			    || adapter->num_rx_queues != 1) {
2795				adapter->share_intr = VMXNET3_INTR_TXSHARE;
2796				netdev_err(adapter->netdev,
2797					   "Number of rx queues : 1\n");
2798				adapter->num_rx_queues = 1;
2799				adapter->intr.num_intrs =
2800						VMXNET3_LINUX_MIN_MSIX_VECT;
2801			}
2802			return;
2803		}
2804		if (!err)
2805			return;
2806
2807		/* If we cannot allocate MSIx vectors use only one rx queue */
2808		dev_info(&adapter->pdev->dev,
2809			 "Failed to enable MSI-X, error %d. "
2810			 "Limiting #rx queues to 1, try MSI.\n", err);
2811
2812		adapter->intr.type = VMXNET3_IT_MSI;
2813	}
2814
2815	if (adapter->intr.type == VMXNET3_IT_MSI) {
2816		int err;
2817		err = pci_enable_msi(adapter->pdev);
2818		if (!err) {
2819			adapter->num_rx_queues = 1;
2820			adapter->intr.num_intrs = 1;
2821			return;
2822		}
2823	}
2824#endif /* CONFIG_PCI_MSI */
2825
2826	adapter->num_rx_queues = 1;
2827	dev_info(&adapter->netdev->dev,
2828		 "Using INTx interrupt, #Rx queues: 1.\n");
2829	adapter->intr.type = VMXNET3_IT_INTX;
2830
2831	/* INT-X related setting */
2832	adapter->intr.num_intrs = 1;
2833}
2834
2835
2836static void
2837vmxnet3_free_intr_resources(struct vmxnet3_adapter *adapter)
2838{
2839	if (adapter->intr.type == VMXNET3_IT_MSIX)
2840		pci_disable_msix(adapter->pdev);
2841	else if (adapter->intr.type == VMXNET3_IT_MSI)
2842		pci_disable_msi(adapter->pdev);
2843	else
2844		BUG_ON(adapter->intr.type != VMXNET3_IT_INTX);
2845}
2846
2847
2848static void
2849vmxnet3_tx_timeout(struct net_device *netdev)
2850{
2851	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
2852	adapter->tx_timeout_count++;
2853
2854	netdev_err(adapter->netdev, "tx hang\n");
2855	schedule_work(&adapter->work);
2856	netif_wake_queue(adapter->netdev);
2857}
2858
2859
2860static void
2861vmxnet3_reset_work(struct work_struct *data)
2862{
2863	struct vmxnet3_adapter *adapter;
2864
2865	adapter = container_of(data, struct vmxnet3_adapter, work);
2866
2867	/* if another thread is resetting the device, no need to proceed */
2868	if (test_and_set_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state))
2869		return;
2870
2871	/* if the device is closed, we must leave it alone */
2872	rtnl_lock();
2873	if (netif_running(adapter->netdev)) {
2874		netdev_notice(adapter->netdev, "resetting\n");
2875		vmxnet3_quiesce_dev(adapter);
2876		vmxnet3_reset_dev(adapter);
2877		vmxnet3_activate_dev(adapter);
2878	} else {
2879		netdev_info(adapter->netdev, "already closed\n");
2880	}
2881	rtnl_unlock();
2882
2883	clear_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state);
2884}
2885
2886
2887static int
2888vmxnet3_probe_device(struct pci_dev *pdev,
2889		     const struct pci_device_id *id)
2890{
2891	static const struct net_device_ops vmxnet3_netdev_ops = {
2892		.ndo_open = vmxnet3_open,
2893		.ndo_stop = vmxnet3_close,
2894		.ndo_start_xmit = vmxnet3_xmit_frame,
2895		.ndo_set_mac_address = vmxnet3_set_mac_addr,
2896		.ndo_change_mtu = vmxnet3_change_mtu,
2897		.ndo_set_features = vmxnet3_set_features,
2898		.ndo_get_stats64 = vmxnet3_get_stats64,
2899		.ndo_tx_timeout = vmxnet3_tx_timeout,
2900		.ndo_set_rx_mode = vmxnet3_set_mc,
2901		.ndo_vlan_rx_add_vid = vmxnet3_vlan_rx_add_vid,
2902		.ndo_vlan_rx_kill_vid = vmxnet3_vlan_rx_kill_vid,
2903#ifdef CONFIG_NET_POLL_CONTROLLER
2904		.ndo_poll_controller = vmxnet3_netpoll,
2905#endif
2906	};
2907	int err;
2908	bool dma64 = false; /* stupid gcc */
2909	u32 ver;
2910	struct net_device *netdev;
2911	struct vmxnet3_adapter *adapter;
2912	u8 mac[ETH_ALEN];
2913	int size;
2914	int num_tx_queues;
2915	int num_rx_queues;
2916
2917	if (!pci_msi_enabled())
2918		enable_mq = 0;
2919
2920#ifdef VMXNET3_RSS
2921	if (enable_mq)
2922		num_rx_queues = min(VMXNET3_DEVICE_MAX_RX_QUEUES,
2923				    (int)num_online_cpus());
2924	else
2925#endif
2926		num_rx_queues = 1;
2927	num_rx_queues = rounddown_pow_of_two(num_rx_queues);
2928
2929	if (enable_mq)
2930		num_tx_queues = min(VMXNET3_DEVICE_MAX_TX_QUEUES,
2931				    (int)num_online_cpus());
2932	else
2933		num_tx_queues = 1;
2934
2935	num_tx_queues = rounddown_pow_of_two(num_tx_queues);
2936	netdev = alloc_etherdev_mq(sizeof(struct vmxnet3_adapter),
2937				   max(num_tx_queues, num_rx_queues));
2938	dev_info(&pdev->dev,
2939		 "# of Tx queues : %d, # of Rx queues : %d\n",
2940		 num_tx_queues, num_rx_queues);
2941
2942	if (!netdev)
2943		return -ENOMEM;
2944
2945	pci_set_drvdata(pdev, netdev);
2946	adapter = netdev_priv(netdev);
2947	adapter->netdev = netdev;
2948	adapter->pdev = pdev;
2949
2950	spin_lock_init(&adapter->cmd_lock);
2951	adapter->shared = pci_alloc_consistent(adapter->pdev,
2952					       sizeof(struct Vmxnet3_DriverShared),
2953					       &adapter->shared_pa);
2954	if (!adapter->shared) {
2955		dev_err(&pdev->dev, "Failed to allocate memory\n");
2956		err = -ENOMEM;
2957		goto err_alloc_shared;
2958	}
2959
2960	adapter->num_rx_queues = num_rx_queues;
2961	adapter->num_tx_queues = num_tx_queues;
2962	adapter->rx_buf_per_pkt = 1;
2963
2964	size = sizeof(struct Vmxnet3_TxQueueDesc) * adapter->num_tx_queues;
2965	size += sizeof(struct Vmxnet3_RxQueueDesc) * adapter->num_rx_queues;
2966	adapter->tqd_start = pci_alloc_consistent(adapter->pdev, size,
2967						  &adapter->queue_desc_pa);
2968
2969	if (!adapter->tqd_start) {
2970		dev_err(&pdev->dev, "Failed to allocate memory\n");
2971		err = -ENOMEM;
2972		goto err_alloc_queue_desc;
2973	}
2974	adapter->rqd_start = (struct Vmxnet3_RxQueueDesc *)(adapter->tqd_start +
2975							    adapter->num_tx_queues);
2976
2977	adapter->pm_conf = kmalloc(sizeof(struct Vmxnet3_PMConf), GFP_KERNEL);
2978	if (adapter->pm_conf == NULL) {
2979		err = -ENOMEM;
2980		goto err_alloc_pm;
2981	}
2982
2983#ifdef VMXNET3_RSS
2984
2985	adapter->rss_conf = kmalloc(sizeof(struct UPT1_RSSConf), GFP_KERNEL);
2986	if (adapter->rss_conf == NULL) {
2987		err = -ENOMEM;
2988		goto err_alloc_rss;
2989	}
2990#endif /* VMXNET3_RSS */
2991
2992	err = vmxnet3_alloc_pci_resources(adapter, &dma64);
2993	if (err < 0)
2994		goto err_alloc_pci;
2995
2996	ver = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_VRRS);
2997	if (ver & 1) {
2998		VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_VRRS, 1);
2999	} else {
3000		dev_err(&pdev->dev,
3001			"Incompatible h/w version (0x%x) for adapter\n", ver);
3002		err = -EBUSY;
3003		goto err_ver;
3004	}
3005
3006	ver = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_UVRS);
3007	if (ver & 1) {
3008		VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_UVRS, 1);
3009	} else {
3010		dev_err(&pdev->dev,
3011			"Incompatible upt version (0x%x) for adapter\n", ver);
3012		err = -EBUSY;
3013		goto err_ver;
3014	}
3015
3016	SET_NETDEV_DEV(netdev, &pdev->dev);
3017	vmxnet3_declare_features(adapter, dma64);
3018
3019	if (adapter->num_tx_queues == adapter->num_rx_queues)
3020		adapter->share_intr = VMXNET3_INTR_BUDDYSHARE;
3021	else
3022		adapter->share_intr = VMXNET3_INTR_DONTSHARE;
3023
3024	vmxnet3_alloc_intr_resources(adapter);
3025
3026#ifdef VMXNET3_RSS
3027	if (adapter->num_rx_queues > 1 &&
3028	    adapter->intr.type == VMXNET3_IT_MSIX) {
3029		adapter->rss = true;
3030		netdev->hw_features |= NETIF_F_RXHASH;
3031		netdev->features |= NETIF_F_RXHASH;
3032		dev_dbg(&pdev->dev, "RSS is enabled.\n");
3033	} else {
3034		adapter->rss = false;
3035	}
3036#endif
3037
3038	vmxnet3_read_mac_addr(adapter, mac);
3039	memcpy(netdev->dev_addr,  mac, netdev->addr_len);
3040
3041	netdev->netdev_ops = &vmxnet3_netdev_ops;
3042	vmxnet3_set_ethtool_ops(netdev);
3043	netdev->watchdog_timeo = 5 * HZ;
3044
3045	INIT_WORK(&adapter->work, vmxnet3_reset_work);
3046	set_bit(VMXNET3_STATE_BIT_QUIESCED, &adapter->state);
3047
3048	if (adapter->intr.type == VMXNET3_IT_MSIX) {
3049		int i;
3050		for (i = 0; i < adapter->num_rx_queues; i++) {
3051			netif_napi_add(adapter->netdev,
3052				       &adapter->rx_queue[i].napi,
3053				       vmxnet3_poll_rx_only, 64);
3054		}
3055	} else {
3056		netif_napi_add(adapter->netdev, &adapter->rx_queue[0].napi,
3057			       vmxnet3_poll, 64);
3058	}
3059
3060	netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues);
3061	netif_set_real_num_rx_queues(adapter->netdev, adapter->num_rx_queues);
3062
3063	netif_carrier_off(netdev);
3064	err = register_netdev(netdev);
3065
3066	if (err) {
3067		dev_err(&pdev->dev, "Failed to register adapter\n");
3068		goto err_register;
3069	}
3070
3071	vmxnet3_check_link(adapter, false);
3072	return 0;
3073
3074err_register:
3075	vmxnet3_free_intr_resources(adapter);
3076err_ver:
3077	vmxnet3_free_pci_resources(adapter);
3078err_alloc_pci:
3079#ifdef VMXNET3_RSS
3080	kfree(adapter->rss_conf);
3081err_alloc_rss:
3082#endif
3083	kfree(adapter->pm_conf);
3084err_alloc_pm:
3085	pci_free_consistent(adapter->pdev, size, adapter->tqd_start,
3086			    adapter->queue_desc_pa);
3087err_alloc_queue_desc:
3088	pci_free_consistent(adapter->pdev, sizeof(struct Vmxnet3_DriverShared),
3089			    adapter->shared, adapter->shared_pa);
3090err_alloc_shared:
3091	pci_set_drvdata(pdev, NULL);
3092	free_netdev(netdev);
3093	return err;
3094}
3095
3096
3097static void
3098vmxnet3_remove_device(struct pci_dev *pdev)
3099{
3100	struct net_device *netdev = pci_get_drvdata(pdev);
3101	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
3102	int size = 0;
3103	int num_rx_queues;
3104
3105#ifdef VMXNET3_RSS
3106	if (enable_mq)
3107		num_rx_queues = min(VMXNET3_DEVICE_MAX_RX_QUEUES,
3108				    (int)num_online_cpus());
3109	else
3110#endif
3111		num_rx_queues = 1;
3112	num_rx_queues = rounddown_pow_of_two(num_rx_queues);
3113
3114	cancel_work_sync(&adapter->work);
3115
3116	unregister_netdev(netdev);
3117
3118	vmxnet3_free_intr_resources(adapter);
3119	vmxnet3_free_pci_resources(adapter);
3120#ifdef VMXNET3_RSS
3121	kfree(adapter->rss_conf);
3122#endif
3123	kfree(adapter->pm_conf);
3124
3125	size = sizeof(struct Vmxnet3_TxQueueDesc) * adapter->num_tx_queues;
3126	size += sizeof(struct Vmxnet3_RxQueueDesc) * num_rx_queues;
3127	pci_free_consistent(adapter->pdev, size, adapter->tqd_start,
3128			    adapter->queue_desc_pa);
3129	pci_free_consistent(adapter->pdev, sizeof(struct Vmxnet3_DriverShared),
3130			    adapter->shared, adapter->shared_pa);
3131	free_netdev(netdev);
3132}
3133
3134
3135#ifdef CONFIG_PM
3136
3137static int
3138vmxnet3_suspend(struct device *device)
3139{
3140	struct pci_dev *pdev = to_pci_dev(device);
3141	struct net_device *netdev = pci_get_drvdata(pdev);
3142	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
3143	struct Vmxnet3_PMConf *pmConf;
3144	struct ethhdr *ehdr;
3145	struct arphdr *ahdr;
3146	u8 *arpreq;
3147	struct in_device *in_dev;
3148	struct in_ifaddr *ifa;
3149	unsigned long flags;
3150	int i = 0;
3151
3152	if (!netif_running(netdev))
3153		return 0;
3154
3155	for (i = 0; i < adapter->num_rx_queues; i++)
3156		napi_disable(&adapter->rx_queue[i].napi);
3157
3158	vmxnet3_disable_all_intrs(adapter);
3159	vmxnet3_free_irqs(adapter);
3160	vmxnet3_free_intr_resources(adapter);
3161
3162	netif_device_detach(netdev);
3163	netif_tx_stop_all_queues(netdev);
3164
3165	/* Create wake-up filters. */
3166	pmConf = adapter->pm_conf;
3167	memset(pmConf, 0, sizeof(*pmConf));
3168
3169	if (adapter->wol & WAKE_UCAST) {
3170		pmConf->filters[i].patternSize = ETH_ALEN;
3171		pmConf->filters[i].maskSize = 1;
3172		memcpy(pmConf->filters[i].pattern, netdev->dev_addr, ETH_ALEN);
3173		pmConf->filters[i].mask[0] = 0x3F; /* LSB ETH_ALEN bits */
3174
3175		pmConf->wakeUpEvents |= VMXNET3_PM_WAKEUP_FILTER;
3176		i++;
3177	}
3178
3179	if (adapter->wol & WAKE_ARP) {
3180		in_dev = in_dev_get(netdev);
3181		if (!in_dev)
3182			goto skip_arp;
3183
3184		ifa = (struct in_ifaddr *)in_dev->ifa_list;
3185		if (!ifa)
3186			goto skip_arp;
3187
3188		pmConf->filters[i].patternSize = ETH_HLEN + /* Ethernet header*/
3189			sizeof(struct arphdr) +		/* ARP header */
3190			2 * ETH_ALEN +		/* 2 Ethernet addresses*/
3191			2 * sizeof(u32);	/*2 IPv4 addresses */
3192		pmConf->filters[i].maskSize =
3193			(pmConf->filters[i].patternSize - 1) / 8 + 1;
3194
3195		/* ETH_P_ARP in Ethernet header. */
3196		ehdr = (struct ethhdr *)pmConf->filters[i].pattern;
3197		ehdr->h_proto = htons(ETH_P_ARP);
3198
3199		/* ARPOP_REQUEST in ARP header. */
3200		ahdr = (struct arphdr *)&pmConf->filters[i].pattern[ETH_HLEN];
3201		ahdr->ar_op = htons(ARPOP_REQUEST);
3202		arpreq = (u8 *)(ahdr + 1);
3203
3204		/* The Unicast IPv4 address in 'tip' field. */
3205		arpreq += 2 * ETH_ALEN + sizeof(u32);
3206		*(u32 *)arpreq = ifa->ifa_address;
3207
3208		/* The mask for the relevant bits. */
3209		pmConf->filters[i].mask[0] = 0x00;
3210		pmConf->filters[i].mask[1] = 0x30; /* ETH_P_ARP */
3211		pmConf->filters[i].mask[2] = 0x30; /* ARPOP_REQUEST */
3212		pmConf->filters[i].mask[3] = 0x00;
3213		pmConf->filters[i].mask[4] = 0xC0; /* IPv4 TIP */
3214		pmConf->filters[i].mask[5] = 0x03; /* IPv4 TIP */
3215		in_dev_put(in_dev);
3216
3217		pmConf->wakeUpEvents |= VMXNET3_PM_WAKEUP_FILTER;
3218		i++;
3219	}
3220
3221skip_arp:
3222	if (adapter->wol & WAKE_MAGIC)
3223		pmConf->wakeUpEvents |= VMXNET3_PM_WAKEUP_MAGIC;
3224
3225	pmConf->numFilters = i;
3226
3227	adapter->shared->devRead.pmConfDesc.confVer = cpu_to_le32(1);
3228	adapter->shared->devRead.pmConfDesc.confLen = cpu_to_le32(sizeof(
3229								  *pmConf));
3230	adapter->shared->devRead.pmConfDesc.confPA = cpu_to_le64(virt_to_phys(
3231								 pmConf));
3232
3233	spin_lock_irqsave(&adapter->cmd_lock, flags);
3234	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
3235			       VMXNET3_CMD_UPDATE_PMCFG);
3236	spin_unlock_irqrestore(&adapter->cmd_lock, flags);
3237
3238	pci_save_state(pdev);
3239	pci_enable_wake(pdev, pci_choose_state(pdev, PMSG_SUSPEND),
3240			adapter->wol);
3241	pci_disable_device(pdev);
3242	pci_set_power_state(pdev, pci_choose_state(pdev, PMSG_SUSPEND));
3243
3244	return 0;
3245}
3246
3247
3248static int
3249vmxnet3_resume(struct device *device)
3250{
3251	int err, i = 0;
3252	unsigned long flags;
3253	struct pci_dev *pdev = to_pci_dev(device);
3254	struct net_device *netdev = pci_get_drvdata(pdev);
3255	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
3256	struct Vmxnet3_PMConf *pmConf;
3257
3258	if (!netif_running(netdev))
3259		return 0;
3260
3261	/* Destroy wake-up filters. */
3262	pmConf = adapter->pm_conf;
3263	memset(pmConf, 0, sizeof(*pmConf));
3264
3265	adapter->shared->devRead.pmConfDesc.confVer = cpu_to_le32(1);
3266	adapter->shared->devRead.pmConfDesc.confLen = cpu_to_le32(sizeof(
3267								  *pmConf));
3268	adapter->shared->devRead.pmConfDesc.confPA = cpu_to_le64(virt_to_phys(
3269								 pmConf));
3270
3271	netif_device_attach(netdev);
3272	pci_set_power_state(pdev, PCI_D0);
3273	pci_restore_state(pdev);
3274	err = pci_enable_device_mem(pdev);
3275	if (err != 0)
3276		return err;
3277
3278	pci_enable_wake(pdev, PCI_D0, 0);
3279
3280	spin_lock_irqsave(&adapter->cmd_lock, flags);
3281	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
3282			       VMXNET3_CMD_UPDATE_PMCFG);
3283	spin_unlock_irqrestore(&adapter->cmd_lock, flags);
3284	vmxnet3_alloc_intr_resources(adapter);
3285	vmxnet3_request_irqs(adapter);
3286	for (i = 0; i < adapter->num_rx_queues; i++)
3287		napi_enable(&adapter->rx_queue[i].napi);
3288	vmxnet3_enable_all_intrs(adapter);
3289
3290	return 0;
3291}
3292
3293static const struct dev_pm_ops vmxnet3_pm_ops = {
3294	.suspend = vmxnet3_suspend,
3295	.resume = vmxnet3_resume,
3296};
3297#endif
3298
3299static struct pci_driver vmxnet3_driver = {
3300	.name		= vmxnet3_driver_name,
3301	.id_table	= vmxnet3_pciid_table,
3302	.probe		= vmxnet3_probe_device,
3303	.remove		= vmxnet3_remove_device,
3304#ifdef CONFIG_PM
3305	.driver.pm	= &vmxnet3_pm_ops,
3306#endif
3307};
3308
3309
3310static int __init
3311vmxnet3_init_module(void)
3312{
3313	pr_info("%s - version %s\n", VMXNET3_DRIVER_DESC,
3314		VMXNET3_DRIVER_VERSION_REPORT);
3315	return pci_register_driver(&vmxnet3_driver);
3316}
3317
3318module_init(vmxnet3_init_module);
3319
3320
3321static void
3322vmxnet3_exit_module(void)
3323{
3324	pci_unregister_driver(&vmxnet3_driver);
3325}
3326
3327module_exit(vmxnet3_exit_module);
3328
3329MODULE_AUTHOR("VMware, Inc.");
3330MODULE_DESCRIPTION(VMXNET3_DRIVER_DESC);
3331MODULE_LICENSE("GPL v2");
3332MODULE_VERSION(VMXNET3_DRIVER_VERSION_STRING);
3333