vmxnet3_drv.c revision 72e85c45b9541e1f3d233e775da1dc6f68b85867
1/*
2 * Linux driver for VMware's vmxnet3 ethernet NIC.
3 *
4 * Copyright (C) 2008-2009, VMware, Inc. All Rights Reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation; version 2 of the License and no later version.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
13 * NON INFRINGEMENT. See the GNU General Public License for more
14 * details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19 *
20 * The full GNU General Public License is included in this distribution in
21 * the file called "COPYING".
22 *
23 * Maintained by: Shreyas Bhatewara <pv-drivers@vmware.com>
24 *
25 */
26
27#include <net/ip6_checksum.h>
28
29#include "vmxnet3_int.h"
30
31char vmxnet3_driver_name[] = "vmxnet3";
32#define VMXNET3_DRIVER_DESC "VMware vmxnet3 virtual NIC driver"
33
34/*
35 * PCI Device ID Table
36 * Last entry must be all 0s
37 */
38static DEFINE_PCI_DEVICE_TABLE(vmxnet3_pciid_table) = {
39	{PCI_VDEVICE(VMWARE, PCI_DEVICE_ID_VMWARE_VMXNET3)},
40	{0}
41};
42
43MODULE_DEVICE_TABLE(pci, vmxnet3_pciid_table);
44
45static atomic_t devices_found;
46
47#define VMXNET3_MAX_DEVICES 10
48static int enable_mq = 1;
49static int irq_share_mode;
50
51static void
52vmxnet3_write_mac_addr(struct vmxnet3_adapter *adapter, u8 *mac);
53
54/*
55 *    Enable/Disable the given intr
56 */
57static void
58vmxnet3_enable_intr(struct vmxnet3_adapter *adapter, unsigned intr_idx)
59{
60	VMXNET3_WRITE_BAR0_REG(adapter, VMXNET3_REG_IMR + intr_idx * 8, 0);
61}
62
63
64static void
65vmxnet3_disable_intr(struct vmxnet3_adapter *adapter, unsigned intr_idx)
66{
67	VMXNET3_WRITE_BAR0_REG(adapter, VMXNET3_REG_IMR + intr_idx * 8, 1);
68}
69
70
71/*
72 *    Enable/Disable all intrs used by the device
73 */
74static void
75vmxnet3_enable_all_intrs(struct vmxnet3_adapter *adapter)
76{
77	int i;
78
79	for (i = 0; i < adapter->intr.num_intrs; i++)
80		vmxnet3_enable_intr(adapter, i);
81	adapter->shared->devRead.intrConf.intrCtrl &=
82					cpu_to_le32(~VMXNET3_IC_DISABLE_ALL);
83}
84
85
86static void
87vmxnet3_disable_all_intrs(struct vmxnet3_adapter *adapter)
88{
89	int i;
90
91	adapter->shared->devRead.intrConf.intrCtrl |=
92					cpu_to_le32(VMXNET3_IC_DISABLE_ALL);
93	for (i = 0; i < adapter->intr.num_intrs; i++)
94		vmxnet3_disable_intr(adapter, i);
95}
96
97
98static void
99vmxnet3_ack_events(struct vmxnet3_adapter *adapter, u32 events)
100{
101	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_ECR, events);
102}
103
104
105static bool
106vmxnet3_tq_stopped(struct vmxnet3_tx_queue *tq, struct vmxnet3_adapter *adapter)
107{
108	return tq->stopped;
109}
110
111
112static void
113vmxnet3_tq_start(struct vmxnet3_tx_queue *tq, struct vmxnet3_adapter *adapter)
114{
115	tq->stopped = false;
116	netif_start_subqueue(adapter->netdev, tq - adapter->tx_queue);
117}
118
119
120static void
121vmxnet3_tq_wake(struct vmxnet3_tx_queue *tq, struct vmxnet3_adapter *adapter)
122{
123	tq->stopped = false;
124	netif_wake_subqueue(adapter->netdev, (tq - adapter->tx_queue));
125}
126
127
128static void
129vmxnet3_tq_stop(struct vmxnet3_tx_queue *tq, struct vmxnet3_adapter *adapter)
130{
131	tq->stopped = true;
132	tq->num_stop++;
133	netif_stop_subqueue(adapter->netdev, (tq - adapter->tx_queue));
134}
135
136
137/*
138 * Check the link state. This may start or stop the tx queue.
139 */
140static void
141vmxnet3_check_link(struct vmxnet3_adapter *adapter, bool affectTxQueue)
142{
143	u32 ret;
144	int i;
145	unsigned long flags;
146
147	spin_lock_irqsave(&adapter->cmd_lock, flags);
148	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, VMXNET3_CMD_GET_LINK);
149	ret = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD);
150	spin_unlock_irqrestore(&adapter->cmd_lock, flags);
151
152	adapter->link_speed = ret >> 16;
153	if (ret & 1) { /* Link is up. */
154		printk(KERN_INFO "%s: NIC Link is Up %d Mbps\n",
155		       adapter->netdev->name, adapter->link_speed);
156		if (!netif_carrier_ok(adapter->netdev))
157			netif_carrier_on(adapter->netdev);
158
159		if (affectTxQueue) {
160			for (i = 0; i < adapter->num_tx_queues; i++)
161				vmxnet3_tq_start(&adapter->tx_queue[i],
162						 adapter);
163		}
164	} else {
165		printk(KERN_INFO "%s: NIC Link is Down\n",
166		       adapter->netdev->name);
167		if (netif_carrier_ok(adapter->netdev))
168			netif_carrier_off(adapter->netdev);
169
170		if (affectTxQueue) {
171			for (i = 0; i < adapter->num_tx_queues; i++)
172				vmxnet3_tq_stop(&adapter->tx_queue[i], adapter);
173		}
174	}
175}
176
177static void
178vmxnet3_process_events(struct vmxnet3_adapter *adapter)
179{
180	int i;
181	unsigned long flags;
182	u32 events = le32_to_cpu(adapter->shared->ecr);
183	if (!events)
184		return;
185
186	vmxnet3_ack_events(adapter, events);
187
188	/* Check if link state has changed */
189	if (events & VMXNET3_ECR_LINK)
190		vmxnet3_check_link(adapter, true);
191
192	/* Check if there is an error on xmit/recv queues */
193	if (events & (VMXNET3_ECR_TQERR | VMXNET3_ECR_RQERR)) {
194		spin_lock_irqsave(&adapter->cmd_lock, flags);
195		VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
196				       VMXNET3_CMD_GET_QUEUE_STATUS);
197		spin_unlock_irqrestore(&adapter->cmd_lock, flags);
198
199		for (i = 0; i < adapter->num_tx_queues; i++)
200			if (adapter->tqd_start[i].status.stopped)
201				dev_err(&adapter->netdev->dev,
202					"%s: tq[%d] error 0x%x\n",
203					adapter->netdev->name, i, le32_to_cpu(
204					adapter->tqd_start[i].status.error));
205		for (i = 0; i < adapter->num_rx_queues; i++)
206			if (adapter->rqd_start[i].status.stopped)
207				dev_err(&adapter->netdev->dev,
208					"%s: rq[%d] error 0x%x\n",
209					adapter->netdev->name, i,
210					adapter->rqd_start[i].status.error);
211
212		schedule_work(&adapter->work);
213	}
214}
215
216#ifdef __BIG_ENDIAN_BITFIELD
217/*
218 * The device expects the bitfields in shared structures to be written in
219 * little endian. When CPU is big endian, the following routines are used to
220 * correctly read and write into ABI.
221 * The general technique used here is : double word bitfields are defined in
222 * opposite order for big endian architecture. Then before reading them in
223 * driver the complete double word is translated using le32_to_cpu. Similarly
224 * After the driver writes into bitfields, cpu_to_le32 is used to translate the
225 * double words into required format.
226 * In order to avoid touching bits in shared structure more than once, temporary
227 * descriptors are used. These are passed as srcDesc to following functions.
228 */
229static void vmxnet3_RxDescToCPU(const struct Vmxnet3_RxDesc *srcDesc,
230				struct Vmxnet3_RxDesc *dstDesc)
231{
232	u32 *src = (u32 *)srcDesc + 2;
233	u32 *dst = (u32 *)dstDesc + 2;
234	dstDesc->addr = le64_to_cpu(srcDesc->addr);
235	*dst = le32_to_cpu(*src);
236	dstDesc->ext1 = le32_to_cpu(srcDesc->ext1);
237}
238
239static void vmxnet3_TxDescToLe(const struct Vmxnet3_TxDesc *srcDesc,
240			       struct Vmxnet3_TxDesc *dstDesc)
241{
242	int i;
243	u32 *src = (u32 *)(srcDesc + 1);
244	u32 *dst = (u32 *)(dstDesc + 1);
245
246	/* Working backwards so that the gen bit is set at the end. */
247	for (i = 2; i > 0; i--) {
248		src--;
249		dst--;
250		*dst = cpu_to_le32(*src);
251	}
252}
253
254
255static void vmxnet3_RxCompToCPU(const struct Vmxnet3_RxCompDesc *srcDesc,
256				struct Vmxnet3_RxCompDesc *dstDesc)
257{
258	int i = 0;
259	u32 *src = (u32 *)srcDesc;
260	u32 *dst = (u32 *)dstDesc;
261	for (i = 0; i < sizeof(struct Vmxnet3_RxCompDesc) / sizeof(u32); i++) {
262		*dst = le32_to_cpu(*src);
263		src++;
264		dst++;
265	}
266}
267
268
269/* Used to read bitfield values from double words. */
270static u32 get_bitfield32(const __le32 *bitfield, u32 pos, u32 size)
271{
272	u32 temp = le32_to_cpu(*bitfield);
273	u32 mask = ((1 << size) - 1) << pos;
274	temp &= mask;
275	temp >>= pos;
276	return temp;
277}
278
279
280
281#endif  /* __BIG_ENDIAN_BITFIELD */
282
283#ifdef __BIG_ENDIAN_BITFIELD
284
285#   define VMXNET3_TXDESC_GET_GEN(txdesc) get_bitfield32(((const __le32 *) \
286			txdesc) + VMXNET3_TXD_GEN_DWORD_SHIFT, \
287			VMXNET3_TXD_GEN_SHIFT, VMXNET3_TXD_GEN_SIZE)
288#   define VMXNET3_TXDESC_GET_EOP(txdesc) get_bitfield32(((const __le32 *) \
289			txdesc) + VMXNET3_TXD_EOP_DWORD_SHIFT, \
290			VMXNET3_TXD_EOP_SHIFT, VMXNET3_TXD_EOP_SIZE)
291#   define VMXNET3_TCD_GET_GEN(tcd) get_bitfield32(((const __le32 *)tcd) + \
292			VMXNET3_TCD_GEN_DWORD_SHIFT, VMXNET3_TCD_GEN_SHIFT, \
293			VMXNET3_TCD_GEN_SIZE)
294#   define VMXNET3_TCD_GET_TXIDX(tcd) get_bitfield32((const __le32 *)tcd, \
295			VMXNET3_TCD_TXIDX_SHIFT, VMXNET3_TCD_TXIDX_SIZE)
296#   define vmxnet3_getRxComp(dstrcd, rcd, tmp) do { \
297			(dstrcd) = (tmp); \
298			vmxnet3_RxCompToCPU((rcd), (tmp)); \
299		} while (0)
300#   define vmxnet3_getRxDesc(dstrxd, rxd, tmp) do { \
301			(dstrxd) = (tmp); \
302			vmxnet3_RxDescToCPU((rxd), (tmp)); \
303		} while (0)
304
305#else
306
307#   define VMXNET3_TXDESC_GET_GEN(txdesc) ((txdesc)->gen)
308#   define VMXNET3_TXDESC_GET_EOP(txdesc) ((txdesc)->eop)
309#   define VMXNET3_TCD_GET_GEN(tcd) ((tcd)->gen)
310#   define VMXNET3_TCD_GET_TXIDX(tcd) ((tcd)->txdIdx)
311#   define vmxnet3_getRxComp(dstrcd, rcd, tmp) (dstrcd) = (rcd)
312#   define vmxnet3_getRxDesc(dstrxd, rxd, tmp) (dstrxd) = (rxd)
313
314#endif /* __BIG_ENDIAN_BITFIELD  */
315
316
317static void
318vmxnet3_unmap_tx_buf(struct vmxnet3_tx_buf_info *tbi,
319		     struct pci_dev *pdev)
320{
321	if (tbi->map_type == VMXNET3_MAP_SINGLE)
322		pci_unmap_single(pdev, tbi->dma_addr, tbi->len,
323				 PCI_DMA_TODEVICE);
324	else if (tbi->map_type == VMXNET3_MAP_PAGE)
325		pci_unmap_page(pdev, tbi->dma_addr, tbi->len,
326			       PCI_DMA_TODEVICE);
327	else
328		BUG_ON(tbi->map_type != VMXNET3_MAP_NONE);
329
330	tbi->map_type = VMXNET3_MAP_NONE; /* to help debugging */
331}
332
333
334static int
335vmxnet3_unmap_pkt(u32 eop_idx, struct vmxnet3_tx_queue *tq,
336		  struct pci_dev *pdev,	struct vmxnet3_adapter *adapter)
337{
338	struct sk_buff *skb;
339	int entries = 0;
340
341	/* no out of order completion */
342	BUG_ON(tq->buf_info[eop_idx].sop_idx != tq->tx_ring.next2comp);
343	BUG_ON(VMXNET3_TXDESC_GET_EOP(&(tq->tx_ring.base[eop_idx].txd)) != 1);
344
345	skb = tq->buf_info[eop_idx].skb;
346	BUG_ON(skb == NULL);
347	tq->buf_info[eop_idx].skb = NULL;
348
349	VMXNET3_INC_RING_IDX_ONLY(eop_idx, tq->tx_ring.size);
350
351	while (tq->tx_ring.next2comp != eop_idx) {
352		vmxnet3_unmap_tx_buf(tq->buf_info + tq->tx_ring.next2comp,
353				     pdev);
354
355		/* update next2comp w/o tx_lock. Since we are marking more,
356		 * instead of less, tx ring entries avail, the worst case is
357		 * that the tx routine incorrectly re-queues a pkt due to
358		 * insufficient tx ring entries.
359		 */
360		vmxnet3_cmd_ring_adv_next2comp(&tq->tx_ring);
361		entries++;
362	}
363
364	dev_kfree_skb_any(skb);
365	return entries;
366}
367
368
369static int
370vmxnet3_tq_tx_complete(struct vmxnet3_tx_queue *tq,
371			struct vmxnet3_adapter *adapter)
372{
373	int completed = 0;
374	union Vmxnet3_GenericDesc *gdesc;
375
376	gdesc = tq->comp_ring.base + tq->comp_ring.next2proc;
377	while (VMXNET3_TCD_GET_GEN(&gdesc->tcd) == tq->comp_ring.gen) {
378		completed += vmxnet3_unmap_pkt(VMXNET3_TCD_GET_TXIDX(
379					       &gdesc->tcd), tq, adapter->pdev,
380					       adapter);
381
382		vmxnet3_comp_ring_adv_next2proc(&tq->comp_ring);
383		gdesc = tq->comp_ring.base + tq->comp_ring.next2proc;
384	}
385
386	if (completed) {
387		spin_lock(&tq->tx_lock);
388		if (unlikely(vmxnet3_tq_stopped(tq, adapter) &&
389			     vmxnet3_cmd_ring_desc_avail(&tq->tx_ring) >
390			     VMXNET3_WAKE_QUEUE_THRESHOLD(tq) &&
391			     netif_carrier_ok(adapter->netdev))) {
392			vmxnet3_tq_wake(tq, adapter);
393		}
394		spin_unlock(&tq->tx_lock);
395	}
396	return completed;
397}
398
399
400static void
401vmxnet3_tq_cleanup(struct vmxnet3_tx_queue *tq,
402		   struct vmxnet3_adapter *adapter)
403{
404	int i;
405
406	while (tq->tx_ring.next2comp != tq->tx_ring.next2fill) {
407		struct vmxnet3_tx_buf_info *tbi;
408
409		tbi = tq->buf_info + tq->tx_ring.next2comp;
410
411		vmxnet3_unmap_tx_buf(tbi, adapter->pdev);
412		if (tbi->skb) {
413			dev_kfree_skb_any(tbi->skb);
414			tbi->skb = NULL;
415		}
416		vmxnet3_cmd_ring_adv_next2comp(&tq->tx_ring);
417	}
418
419	/* sanity check, verify all buffers are indeed unmapped and freed */
420	for (i = 0; i < tq->tx_ring.size; i++) {
421		BUG_ON(tq->buf_info[i].skb != NULL ||
422		       tq->buf_info[i].map_type != VMXNET3_MAP_NONE);
423	}
424
425	tq->tx_ring.gen = VMXNET3_INIT_GEN;
426	tq->tx_ring.next2fill = tq->tx_ring.next2comp = 0;
427
428	tq->comp_ring.gen = VMXNET3_INIT_GEN;
429	tq->comp_ring.next2proc = 0;
430}
431
432
433static void
434vmxnet3_tq_destroy(struct vmxnet3_tx_queue *tq,
435		   struct vmxnet3_adapter *adapter)
436{
437	if (tq->tx_ring.base) {
438		pci_free_consistent(adapter->pdev, tq->tx_ring.size *
439				    sizeof(struct Vmxnet3_TxDesc),
440				    tq->tx_ring.base, tq->tx_ring.basePA);
441		tq->tx_ring.base = NULL;
442	}
443	if (tq->data_ring.base) {
444		pci_free_consistent(adapter->pdev, tq->data_ring.size *
445				    sizeof(struct Vmxnet3_TxDataDesc),
446				    tq->data_ring.base, tq->data_ring.basePA);
447		tq->data_ring.base = NULL;
448	}
449	if (tq->comp_ring.base) {
450		pci_free_consistent(adapter->pdev, tq->comp_ring.size *
451				    sizeof(struct Vmxnet3_TxCompDesc),
452				    tq->comp_ring.base, tq->comp_ring.basePA);
453		tq->comp_ring.base = NULL;
454	}
455	kfree(tq->buf_info);
456	tq->buf_info = NULL;
457}
458
459
460/* Destroy all tx queues */
461void
462vmxnet3_tq_destroy_all(struct vmxnet3_adapter *adapter)
463{
464	int i;
465
466	for (i = 0; i < adapter->num_tx_queues; i++)
467		vmxnet3_tq_destroy(&adapter->tx_queue[i], adapter);
468}
469
470
471static void
472vmxnet3_tq_init(struct vmxnet3_tx_queue *tq,
473		struct vmxnet3_adapter *adapter)
474{
475	int i;
476
477	/* reset the tx ring contents to 0 and reset the tx ring states */
478	memset(tq->tx_ring.base, 0, tq->tx_ring.size *
479	       sizeof(struct Vmxnet3_TxDesc));
480	tq->tx_ring.next2fill = tq->tx_ring.next2comp = 0;
481	tq->tx_ring.gen = VMXNET3_INIT_GEN;
482
483	memset(tq->data_ring.base, 0, tq->data_ring.size *
484	       sizeof(struct Vmxnet3_TxDataDesc));
485
486	/* reset the tx comp ring contents to 0 and reset comp ring states */
487	memset(tq->comp_ring.base, 0, tq->comp_ring.size *
488	       sizeof(struct Vmxnet3_TxCompDesc));
489	tq->comp_ring.next2proc = 0;
490	tq->comp_ring.gen = VMXNET3_INIT_GEN;
491
492	/* reset the bookkeeping data */
493	memset(tq->buf_info, 0, sizeof(tq->buf_info[0]) * tq->tx_ring.size);
494	for (i = 0; i < tq->tx_ring.size; i++)
495		tq->buf_info[i].map_type = VMXNET3_MAP_NONE;
496
497	/* stats are not reset */
498}
499
500
501static int
502vmxnet3_tq_create(struct vmxnet3_tx_queue *tq,
503		  struct vmxnet3_adapter *adapter)
504{
505	BUG_ON(tq->tx_ring.base || tq->data_ring.base ||
506	       tq->comp_ring.base || tq->buf_info);
507
508	tq->tx_ring.base = pci_alloc_consistent(adapter->pdev, tq->tx_ring.size
509			   * sizeof(struct Vmxnet3_TxDesc),
510			   &tq->tx_ring.basePA);
511	if (!tq->tx_ring.base) {
512		printk(KERN_ERR "%s: failed to allocate tx ring\n",
513		       adapter->netdev->name);
514		goto err;
515	}
516
517	tq->data_ring.base = pci_alloc_consistent(adapter->pdev,
518			     tq->data_ring.size *
519			     sizeof(struct Vmxnet3_TxDataDesc),
520			     &tq->data_ring.basePA);
521	if (!tq->data_ring.base) {
522		printk(KERN_ERR "%s: failed to allocate data ring\n",
523		       adapter->netdev->name);
524		goto err;
525	}
526
527	tq->comp_ring.base = pci_alloc_consistent(adapter->pdev,
528			     tq->comp_ring.size *
529			     sizeof(struct Vmxnet3_TxCompDesc),
530			     &tq->comp_ring.basePA);
531	if (!tq->comp_ring.base) {
532		printk(KERN_ERR "%s: failed to allocate tx comp ring\n",
533		       adapter->netdev->name);
534		goto err;
535	}
536
537	tq->buf_info = kcalloc(tq->tx_ring.size, sizeof(tq->buf_info[0]),
538			       GFP_KERNEL);
539	if (!tq->buf_info) {
540		printk(KERN_ERR "%s: failed to allocate tx bufinfo\n",
541		       adapter->netdev->name);
542		goto err;
543	}
544
545	return 0;
546
547err:
548	vmxnet3_tq_destroy(tq, adapter);
549	return -ENOMEM;
550}
551
552static void
553vmxnet3_tq_cleanup_all(struct vmxnet3_adapter *adapter)
554{
555	int i;
556
557	for (i = 0; i < adapter->num_tx_queues; i++)
558		vmxnet3_tq_cleanup(&adapter->tx_queue[i], adapter);
559}
560
561/*
562 *    starting from ring->next2fill, allocate rx buffers for the given ring
563 *    of the rx queue and update the rx desc. stop after @num_to_alloc buffers
564 *    are allocated or allocation fails
565 */
566
567static int
568vmxnet3_rq_alloc_rx_buf(struct vmxnet3_rx_queue *rq, u32 ring_idx,
569			int num_to_alloc, struct vmxnet3_adapter *adapter)
570{
571	int num_allocated = 0;
572	struct vmxnet3_rx_buf_info *rbi_base = rq->buf_info[ring_idx];
573	struct vmxnet3_cmd_ring *ring = &rq->rx_ring[ring_idx];
574	u32 val;
575
576	while (num_allocated < num_to_alloc) {
577		struct vmxnet3_rx_buf_info *rbi;
578		union Vmxnet3_GenericDesc *gd;
579
580		rbi = rbi_base + ring->next2fill;
581		gd = ring->base + ring->next2fill;
582
583		if (rbi->buf_type == VMXNET3_RX_BUF_SKB) {
584			if (rbi->skb == NULL) {
585				rbi->skb = dev_alloc_skb(rbi->len +
586							 NET_IP_ALIGN);
587				if (unlikely(rbi->skb == NULL)) {
588					rq->stats.rx_buf_alloc_failure++;
589					break;
590				}
591				rbi->skb->dev = adapter->netdev;
592
593				skb_reserve(rbi->skb, NET_IP_ALIGN);
594				rbi->dma_addr = pci_map_single(adapter->pdev,
595						rbi->skb->data, rbi->len,
596						PCI_DMA_FROMDEVICE);
597			} else {
598				/* rx buffer skipped by the device */
599			}
600			val = VMXNET3_RXD_BTYPE_HEAD << VMXNET3_RXD_BTYPE_SHIFT;
601		} else {
602			BUG_ON(rbi->buf_type != VMXNET3_RX_BUF_PAGE ||
603			       rbi->len  != PAGE_SIZE);
604
605			if (rbi->page == NULL) {
606				rbi->page = alloc_page(GFP_ATOMIC);
607				if (unlikely(rbi->page == NULL)) {
608					rq->stats.rx_buf_alloc_failure++;
609					break;
610				}
611				rbi->dma_addr = pci_map_page(adapter->pdev,
612						rbi->page, 0, PAGE_SIZE,
613						PCI_DMA_FROMDEVICE);
614			} else {
615				/* rx buffers skipped by the device */
616			}
617			val = VMXNET3_RXD_BTYPE_BODY << VMXNET3_RXD_BTYPE_SHIFT;
618		}
619
620		BUG_ON(rbi->dma_addr == 0);
621		gd->rxd.addr = cpu_to_le64(rbi->dma_addr);
622		gd->dword[2] = cpu_to_le32((ring->gen << VMXNET3_RXD_GEN_SHIFT)
623					   | val | rbi->len);
624
625		num_allocated++;
626		vmxnet3_cmd_ring_adv_next2fill(ring);
627	}
628	rq->uncommitted[ring_idx] += num_allocated;
629
630	dev_dbg(&adapter->netdev->dev,
631		"alloc_rx_buf: %d allocated, next2fill %u, next2comp "
632		"%u, uncommited %u\n", num_allocated, ring->next2fill,
633		ring->next2comp, rq->uncommitted[ring_idx]);
634
635	/* so that the device can distinguish a full ring and an empty ring */
636	BUG_ON(num_allocated != 0 && ring->next2fill == ring->next2comp);
637
638	return num_allocated;
639}
640
641
642static void
643vmxnet3_append_frag(struct sk_buff *skb, struct Vmxnet3_RxCompDesc *rcd,
644		    struct vmxnet3_rx_buf_info *rbi)
645{
646	struct skb_frag_struct *frag = skb_shinfo(skb)->frags +
647		skb_shinfo(skb)->nr_frags;
648
649	BUG_ON(skb_shinfo(skb)->nr_frags >= MAX_SKB_FRAGS);
650
651	frag->page = rbi->page;
652	frag->page_offset = 0;
653	frag->size = rcd->len;
654	skb->data_len += frag->size;
655	skb_shinfo(skb)->nr_frags++;
656}
657
658
659static void
660vmxnet3_map_pkt(struct sk_buff *skb, struct vmxnet3_tx_ctx *ctx,
661		struct vmxnet3_tx_queue *tq, struct pci_dev *pdev,
662		struct vmxnet3_adapter *adapter)
663{
664	u32 dw2, len;
665	unsigned long buf_offset;
666	int i;
667	union Vmxnet3_GenericDesc *gdesc;
668	struct vmxnet3_tx_buf_info *tbi = NULL;
669
670	BUG_ON(ctx->copy_size > skb_headlen(skb));
671
672	/* use the previous gen bit for the SOP desc */
673	dw2 = (tq->tx_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT;
674
675	ctx->sop_txd = tq->tx_ring.base + tq->tx_ring.next2fill;
676	gdesc = ctx->sop_txd; /* both loops below can be skipped */
677
678	/* no need to map the buffer if headers are copied */
679	if (ctx->copy_size) {
680		ctx->sop_txd->txd.addr = cpu_to_le64(tq->data_ring.basePA +
681					tq->tx_ring.next2fill *
682					sizeof(struct Vmxnet3_TxDataDesc));
683		ctx->sop_txd->dword[2] = cpu_to_le32(dw2 | ctx->copy_size);
684		ctx->sop_txd->dword[3] = 0;
685
686		tbi = tq->buf_info + tq->tx_ring.next2fill;
687		tbi->map_type = VMXNET3_MAP_NONE;
688
689		dev_dbg(&adapter->netdev->dev,
690			"txd[%u]: 0x%Lx 0x%x 0x%x\n",
691			tq->tx_ring.next2fill,
692			le64_to_cpu(ctx->sop_txd->txd.addr),
693			ctx->sop_txd->dword[2], ctx->sop_txd->dword[3]);
694		vmxnet3_cmd_ring_adv_next2fill(&tq->tx_ring);
695
696		/* use the right gen for non-SOP desc */
697		dw2 = tq->tx_ring.gen << VMXNET3_TXD_GEN_SHIFT;
698	}
699
700	/* linear part can use multiple tx desc if it's big */
701	len = skb_headlen(skb) - ctx->copy_size;
702	buf_offset = ctx->copy_size;
703	while (len) {
704		u32 buf_size;
705
706		if (len < VMXNET3_MAX_TX_BUF_SIZE) {
707			buf_size = len;
708			dw2 |= len;
709		} else {
710			buf_size = VMXNET3_MAX_TX_BUF_SIZE;
711			/* spec says that for TxDesc.len, 0 == 2^14 */
712		}
713
714		tbi = tq->buf_info + tq->tx_ring.next2fill;
715		tbi->map_type = VMXNET3_MAP_SINGLE;
716		tbi->dma_addr = pci_map_single(adapter->pdev,
717				skb->data + buf_offset, buf_size,
718				PCI_DMA_TODEVICE);
719
720		tbi->len = buf_size;
721
722		gdesc = tq->tx_ring.base + tq->tx_ring.next2fill;
723		BUG_ON(gdesc->txd.gen == tq->tx_ring.gen);
724
725		gdesc->txd.addr = cpu_to_le64(tbi->dma_addr);
726		gdesc->dword[2] = cpu_to_le32(dw2);
727		gdesc->dword[3] = 0;
728
729		dev_dbg(&adapter->netdev->dev,
730			"txd[%u]: 0x%Lx 0x%x 0x%x\n",
731			tq->tx_ring.next2fill, le64_to_cpu(gdesc->txd.addr),
732			le32_to_cpu(gdesc->dword[2]), gdesc->dword[3]);
733		vmxnet3_cmd_ring_adv_next2fill(&tq->tx_ring);
734		dw2 = tq->tx_ring.gen << VMXNET3_TXD_GEN_SHIFT;
735
736		len -= buf_size;
737		buf_offset += buf_size;
738	}
739
740	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
741		struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
742
743		tbi = tq->buf_info + tq->tx_ring.next2fill;
744		tbi->map_type = VMXNET3_MAP_PAGE;
745		tbi->dma_addr = pci_map_page(adapter->pdev, frag->page,
746					     frag->page_offset, frag->size,
747					     PCI_DMA_TODEVICE);
748
749		tbi->len = frag->size;
750
751		gdesc = tq->tx_ring.base + tq->tx_ring.next2fill;
752		BUG_ON(gdesc->txd.gen == tq->tx_ring.gen);
753
754		gdesc->txd.addr = cpu_to_le64(tbi->dma_addr);
755		gdesc->dword[2] = cpu_to_le32(dw2 | frag->size);
756		gdesc->dword[3] = 0;
757
758		dev_dbg(&adapter->netdev->dev,
759			"txd[%u]: 0x%llu %u %u\n",
760			tq->tx_ring.next2fill, le64_to_cpu(gdesc->txd.addr),
761			le32_to_cpu(gdesc->dword[2]), gdesc->dword[3]);
762		vmxnet3_cmd_ring_adv_next2fill(&tq->tx_ring);
763		dw2 = tq->tx_ring.gen << VMXNET3_TXD_GEN_SHIFT;
764	}
765
766	ctx->eop_txd = gdesc;
767
768	/* set the last buf_info for the pkt */
769	tbi->skb = skb;
770	tbi->sop_idx = ctx->sop_txd - tq->tx_ring.base;
771}
772
773
774/* Init all tx queues */
775static void
776vmxnet3_tq_init_all(struct vmxnet3_adapter *adapter)
777{
778	int i;
779
780	for (i = 0; i < adapter->num_tx_queues; i++)
781		vmxnet3_tq_init(&adapter->tx_queue[i], adapter);
782}
783
784
785/*
786 *    parse and copy relevant protocol headers:
787 *      For a tso pkt, relevant headers are L2/3/4 including options
788 *      For a pkt requesting csum offloading, they are L2/3 and may include L4
789 *      if it's a TCP/UDP pkt
790 *
791 * Returns:
792 *    -1:  error happens during parsing
793 *     0:  protocol headers parsed, but too big to be copied
794 *     1:  protocol headers parsed and copied
795 *
796 * Other effects:
797 *    1. related *ctx fields are updated.
798 *    2. ctx->copy_size is # of bytes copied
799 *    3. the portion copied is guaranteed to be in the linear part
800 *
801 */
802static int
803vmxnet3_parse_and_copy_hdr(struct sk_buff *skb, struct vmxnet3_tx_queue *tq,
804			   struct vmxnet3_tx_ctx *ctx,
805			   struct vmxnet3_adapter *adapter)
806{
807	struct Vmxnet3_TxDataDesc *tdd;
808
809	if (ctx->mss) {	/* TSO */
810		ctx->eth_ip_hdr_size = skb_transport_offset(skb);
811		ctx->l4_hdr_size = ((struct tcphdr *)
812				   skb_transport_header(skb))->doff * 4;
813		ctx->copy_size = ctx->eth_ip_hdr_size + ctx->l4_hdr_size;
814	} else {
815		if (skb->ip_summed == CHECKSUM_PARTIAL) {
816			ctx->eth_ip_hdr_size = skb_checksum_start_offset(skb);
817
818			if (ctx->ipv4) {
819				struct iphdr *iph = (struct iphdr *)
820						    skb_network_header(skb);
821				if (iph->protocol == IPPROTO_TCP)
822					ctx->l4_hdr_size = ((struct tcphdr *)
823					   skb_transport_header(skb))->doff * 4;
824				else if (iph->protocol == IPPROTO_UDP)
825					/*
826					 * Use tcp header size so that bytes to
827					 * be copied are more than required by
828					 * the device.
829					 */
830					ctx->l4_hdr_size =
831							sizeof(struct tcphdr);
832				else
833					ctx->l4_hdr_size = 0;
834			} else {
835				/* for simplicity, don't copy L4 headers */
836				ctx->l4_hdr_size = 0;
837			}
838			ctx->copy_size = ctx->eth_ip_hdr_size +
839					 ctx->l4_hdr_size;
840		} else {
841			ctx->eth_ip_hdr_size = 0;
842			ctx->l4_hdr_size = 0;
843			/* copy as much as allowed */
844			ctx->copy_size = min((unsigned int)VMXNET3_HDR_COPY_SIZE
845					     , skb_headlen(skb));
846		}
847
848		/* make sure headers are accessible directly */
849		if (unlikely(!pskb_may_pull(skb, ctx->copy_size)))
850			goto err;
851	}
852
853	if (unlikely(ctx->copy_size > VMXNET3_HDR_COPY_SIZE)) {
854		tq->stats.oversized_hdr++;
855		ctx->copy_size = 0;
856		return 0;
857	}
858
859	tdd = tq->data_ring.base + tq->tx_ring.next2fill;
860
861	memcpy(tdd->data, skb->data, ctx->copy_size);
862	dev_dbg(&adapter->netdev->dev,
863		"copy %u bytes to dataRing[%u]\n",
864		ctx->copy_size, tq->tx_ring.next2fill);
865	return 1;
866
867err:
868	return -1;
869}
870
871
872static void
873vmxnet3_prepare_tso(struct sk_buff *skb,
874		    struct vmxnet3_tx_ctx *ctx)
875{
876	struct tcphdr *tcph = (struct tcphdr *)skb_transport_header(skb);
877	if (ctx->ipv4) {
878		struct iphdr *iph = (struct iphdr *)skb_network_header(skb);
879		iph->check = 0;
880		tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, 0,
881						 IPPROTO_TCP, 0);
882	} else {
883		struct ipv6hdr *iph = (struct ipv6hdr *)skb_network_header(skb);
884		tcph->check = ~csum_ipv6_magic(&iph->saddr, &iph->daddr, 0,
885					       IPPROTO_TCP, 0);
886	}
887}
888
889
890/*
891 * Transmits a pkt thru a given tq
892 * Returns:
893 *    NETDEV_TX_OK:      descriptors are setup successfully
894 *    NETDEV_TX_OK:      error occurred, the pkt is dropped
895 *    NETDEV_TX_BUSY:    tx ring is full, queue is stopped
896 *
897 * Side-effects:
898 *    1. tx ring may be changed
899 *    2. tq stats may be updated accordingly
900 *    3. shared->txNumDeferred may be updated
901 */
902
903static int
904vmxnet3_tq_xmit(struct sk_buff *skb, struct vmxnet3_tx_queue *tq,
905		struct vmxnet3_adapter *adapter, struct net_device *netdev)
906{
907	int ret;
908	u32 count;
909	unsigned long flags;
910	struct vmxnet3_tx_ctx ctx;
911	union Vmxnet3_GenericDesc *gdesc;
912#ifdef __BIG_ENDIAN_BITFIELD
913	/* Use temporary descriptor to avoid touching bits multiple times */
914	union Vmxnet3_GenericDesc tempTxDesc;
915#endif
916
917	/* conservatively estimate # of descriptors to use */
918	count = VMXNET3_TXD_NEEDED(skb_headlen(skb)) +
919		skb_shinfo(skb)->nr_frags + 1;
920
921	ctx.ipv4 = (vlan_get_protocol(skb) == cpu_to_be16(ETH_P_IP));
922
923	ctx.mss = skb_shinfo(skb)->gso_size;
924	if (ctx.mss) {
925		if (skb_header_cloned(skb)) {
926			if (unlikely(pskb_expand_head(skb, 0, 0,
927						      GFP_ATOMIC) != 0)) {
928				tq->stats.drop_tso++;
929				goto drop_pkt;
930			}
931			tq->stats.copy_skb_header++;
932		}
933		vmxnet3_prepare_tso(skb, &ctx);
934	} else {
935		if (unlikely(count > VMXNET3_MAX_TXD_PER_PKT)) {
936
937			/* non-tso pkts must not use more than
938			 * VMXNET3_MAX_TXD_PER_PKT entries
939			 */
940			if (skb_linearize(skb) != 0) {
941				tq->stats.drop_too_many_frags++;
942				goto drop_pkt;
943			}
944			tq->stats.linearized++;
945
946			/* recalculate the # of descriptors to use */
947			count = VMXNET3_TXD_NEEDED(skb_headlen(skb)) + 1;
948		}
949	}
950
951	spin_lock_irqsave(&tq->tx_lock, flags);
952
953	if (count > vmxnet3_cmd_ring_desc_avail(&tq->tx_ring)) {
954		tq->stats.tx_ring_full++;
955		dev_dbg(&adapter->netdev->dev,
956			"tx queue stopped on %s, next2comp %u"
957			" next2fill %u\n", adapter->netdev->name,
958			tq->tx_ring.next2comp, tq->tx_ring.next2fill);
959
960		vmxnet3_tq_stop(tq, adapter);
961		spin_unlock_irqrestore(&tq->tx_lock, flags);
962		return NETDEV_TX_BUSY;
963	}
964
965
966	ret = vmxnet3_parse_and_copy_hdr(skb, tq, &ctx, adapter);
967	if (ret >= 0) {
968		BUG_ON(ret <= 0 && ctx.copy_size != 0);
969		/* hdrs parsed, check against other limits */
970		if (ctx.mss) {
971			if (unlikely(ctx.eth_ip_hdr_size + ctx.l4_hdr_size >
972				     VMXNET3_MAX_TX_BUF_SIZE)) {
973				goto hdr_too_big;
974			}
975		} else {
976			if (skb->ip_summed == CHECKSUM_PARTIAL) {
977				if (unlikely(ctx.eth_ip_hdr_size +
978					     skb->csum_offset >
979					     VMXNET3_MAX_CSUM_OFFSET)) {
980					goto hdr_too_big;
981				}
982			}
983		}
984	} else {
985		tq->stats.drop_hdr_inspect_err++;
986		goto unlock_drop_pkt;
987	}
988
989	/* fill tx descs related to addr & len */
990	vmxnet3_map_pkt(skb, &ctx, tq, adapter->pdev, adapter);
991
992	/* setup the EOP desc */
993	ctx.eop_txd->dword[3] = cpu_to_le32(VMXNET3_TXD_CQ | VMXNET3_TXD_EOP);
994
995	/* setup the SOP desc */
996#ifdef __BIG_ENDIAN_BITFIELD
997	gdesc = &tempTxDesc;
998	gdesc->dword[2] = ctx.sop_txd->dword[2];
999	gdesc->dword[3] = ctx.sop_txd->dword[3];
1000#else
1001	gdesc = ctx.sop_txd;
1002#endif
1003	if (ctx.mss) {
1004		gdesc->txd.hlen = ctx.eth_ip_hdr_size + ctx.l4_hdr_size;
1005		gdesc->txd.om = VMXNET3_OM_TSO;
1006		gdesc->txd.msscof = ctx.mss;
1007		le32_add_cpu(&tq->shared->txNumDeferred, (skb->len -
1008			     gdesc->txd.hlen + ctx.mss - 1) / ctx.mss);
1009	} else {
1010		if (skb->ip_summed == CHECKSUM_PARTIAL) {
1011			gdesc->txd.hlen = ctx.eth_ip_hdr_size;
1012			gdesc->txd.om = VMXNET3_OM_CSUM;
1013			gdesc->txd.msscof = ctx.eth_ip_hdr_size +
1014					    skb->csum_offset;
1015		} else {
1016			gdesc->txd.om = 0;
1017			gdesc->txd.msscof = 0;
1018		}
1019		le32_add_cpu(&tq->shared->txNumDeferred, 1);
1020	}
1021
1022	if (vlan_tx_tag_present(skb)) {
1023		gdesc->txd.ti = 1;
1024		gdesc->txd.tci = vlan_tx_tag_get(skb);
1025	}
1026
1027	/* finally flips the GEN bit of the SOP desc. */
1028	gdesc->dword[2] = cpu_to_le32(le32_to_cpu(gdesc->dword[2]) ^
1029						  VMXNET3_TXD_GEN);
1030#ifdef __BIG_ENDIAN_BITFIELD
1031	/* Finished updating in bitfields of Tx Desc, so write them in original
1032	 * place.
1033	 */
1034	vmxnet3_TxDescToLe((struct Vmxnet3_TxDesc *)gdesc,
1035			   (struct Vmxnet3_TxDesc *)ctx.sop_txd);
1036	gdesc = ctx.sop_txd;
1037#endif
1038	dev_dbg(&adapter->netdev->dev,
1039		"txd[%u]: SOP 0x%Lx 0x%x 0x%x\n",
1040		(u32)((union Vmxnet3_GenericDesc *)ctx.sop_txd -
1041		tq->tx_ring.base), le64_to_cpu(gdesc->txd.addr),
1042		le32_to_cpu(gdesc->dword[2]), le32_to_cpu(gdesc->dword[3]));
1043
1044	spin_unlock_irqrestore(&tq->tx_lock, flags);
1045
1046	if (le32_to_cpu(tq->shared->txNumDeferred) >=
1047					le32_to_cpu(tq->shared->txThreshold)) {
1048		tq->shared->txNumDeferred = 0;
1049		VMXNET3_WRITE_BAR0_REG(adapter,
1050				       VMXNET3_REG_TXPROD + tq->qid * 8,
1051				       tq->tx_ring.next2fill);
1052	}
1053
1054	return NETDEV_TX_OK;
1055
1056hdr_too_big:
1057	tq->stats.drop_oversized_hdr++;
1058unlock_drop_pkt:
1059	spin_unlock_irqrestore(&tq->tx_lock, flags);
1060drop_pkt:
1061	tq->stats.drop_total++;
1062	dev_kfree_skb(skb);
1063	return NETDEV_TX_OK;
1064}
1065
1066
1067static netdev_tx_t
1068vmxnet3_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
1069{
1070	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
1071
1072		BUG_ON(skb->queue_mapping > adapter->num_tx_queues);
1073		return vmxnet3_tq_xmit(skb,
1074				       &adapter->tx_queue[skb->queue_mapping],
1075				       adapter, netdev);
1076}
1077
1078
1079static void
1080vmxnet3_rx_csum(struct vmxnet3_adapter *adapter,
1081		struct sk_buff *skb,
1082		union Vmxnet3_GenericDesc *gdesc)
1083{
1084	if (!gdesc->rcd.cnc && adapter->netdev->features & NETIF_F_RXCSUM) {
1085		/* typical case: TCP/UDP over IP and both csums are correct */
1086		if ((le32_to_cpu(gdesc->dword[3]) & VMXNET3_RCD_CSUM_OK) ==
1087							VMXNET3_RCD_CSUM_OK) {
1088			skb->ip_summed = CHECKSUM_UNNECESSARY;
1089			BUG_ON(!(gdesc->rcd.tcp || gdesc->rcd.udp));
1090			BUG_ON(!(gdesc->rcd.v4  || gdesc->rcd.v6));
1091			BUG_ON(gdesc->rcd.frg);
1092		} else {
1093			if (gdesc->rcd.csum) {
1094				skb->csum = htons(gdesc->rcd.csum);
1095				skb->ip_summed = CHECKSUM_PARTIAL;
1096			} else {
1097				skb_checksum_none_assert(skb);
1098			}
1099		}
1100	} else {
1101		skb_checksum_none_assert(skb);
1102	}
1103}
1104
1105
1106static void
1107vmxnet3_rx_error(struct vmxnet3_rx_queue *rq, struct Vmxnet3_RxCompDesc *rcd,
1108		 struct vmxnet3_rx_ctx *ctx,  struct vmxnet3_adapter *adapter)
1109{
1110	rq->stats.drop_err++;
1111	if (!rcd->fcs)
1112		rq->stats.drop_fcs++;
1113
1114	rq->stats.drop_total++;
1115
1116	/*
1117	 * We do not unmap and chain the rx buffer to the skb.
1118	 * We basically pretend this buffer is not used and will be recycled
1119	 * by vmxnet3_rq_alloc_rx_buf()
1120	 */
1121
1122	/*
1123	 * ctx->skb may be NULL if this is the first and the only one
1124	 * desc for the pkt
1125	 */
1126	if (ctx->skb)
1127		dev_kfree_skb_irq(ctx->skb);
1128
1129	ctx->skb = NULL;
1130}
1131
1132
1133static int
1134vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq,
1135		       struct vmxnet3_adapter *adapter, int quota)
1136{
1137	static const u32 rxprod_reg[2] = {
1138		VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2
1139	};
1140	u32 num_rxd = 0;
1141	struct Vmxnet3_RxCompDesc *rcd;
1142	struct vmxnet3_rx_ctx *ctx = &rq->rx_ctx;
1143#ifdef __BIG_ENDIAN_BITFIELD
1144	struct Vmxnet3_RxDesc rxCmdDesc;
1145	struct Vmxnet3_RxCompDesc rxComp;
1146#endif
1147	vmxnet3_getRxComp(rcd, &rq->comp_ring.base[rq->comp_ring.next2proc].rcd,
1148			  &rxComp);
1149	while (rcd->gen == rq->comp_ring.gen) {
1150		struct vmxnet3_rx_buf_info *rbi;
1151		struct sk_buff *skb;
1152		int num_to_alloc;
1153		struct Vmxnet3_RxDesc *rxd;
1154		u32 idx, ring_idx;
1155
1156		if (num_rxd >= quota) {
1157			/* we may stop even before we see the EOP desc of
1158			 * the current pkt
1159			 */
1160			break;
1161		}
1162		num_rxd++;
1163		BUG_ON(rcd->rqID != rq->qid && rcd->rqID != rq->qid2);
1164		idx = rcd->rxdIdx;
1165		ring_idx = rcd->rqID < adapter->num_rx_queues ? 0 : 1;
1166		vmxnet3_getRxDesc(rxd, &rq->rx_ring[ring_idx].base[idx].rxd,
1167				  &rxCmdDesc);
1168		rbi = rq->buf_info[ring_idx] + idx;
1169
1170		BUG_ON(rxd->addr != rbi->dma_addr ||
1171		       rxd->len != rbi->len);
1172
1173		if (unlikely(rcd->eop && rcd->err)) {
1174			vmxnet3_rx_error(rq, rcd, ctx, adapter);
1175			goto rcd_done;
1176		}
1177
1178		if (rcd->sop) { /* first buf of the pkt */
1179			BUG_ON(rxd->btype != VMXNET3_RXD_BTYPE_HEAD ||
1180			       rcd->rqID != rq->qid);
1181
1182			BUG_ON(rbi->buf_type != VMXNET3_RX_BUF_SKB);
1183			BUG_ON(ctx->skb != NULL || rbi->skb == NULL);
1184
1185			if (unlikely(rcd->len == 0)) {
1186				/* Pretend the rx buffer is skipped. */
1187				BUG_ON(!(rcd->sop && rcd->eop));
1188				dev_dbg(&adapter->netdev->dev,
1189					"rxRing[%u][%u] 0 length\n",
1190					ring_idx, idx);
1191				goto rcd_done;
1192			}
1193
1194			ctx->skb = rbi->skb;
1195			rbi->skb = NULL;
1196
1197			pci_unmap_single(adapter->pdev, rbi->dma_addr, rbi->len,
1198					 PCI_DMA_FROMDEVICE);
1199
1200			skb_put(ctx->skb, rcd->len);
1201		} else {
1202			BUG_ON(ctx->skb == NULL);
1203			/* non SOP buffer must be type 1 in most cases */
1204			if (rbi->buf_type == VMXNET3_RX_BUF_PAGE) {
1205				BUG_ON(rxd->btype != VMXNET3_RXD_BTYPE_BODY);
1206
1207				if (rcd->len) {
1208					pci_unmap_page(adapter->pdev,
1209						       rbi->dma_addr, rbi->len,
1210						       PCI_DMA_FROMDEVICE);
1211
1212					vmxnet3_append_frag(ctx->skb, rcd, rbi);
1213					rbi->page = NULL;
1214				}
1215			} else {
1216				/*
1217				 * The only time a non-SOP buffer is type 0 is
1218				 * when it's EOP and error flag is raised, which
1219				 * has already been handled.
1220				 */
1221				BUG_ON(true);
1222			}
1223		}
1224
1225		skb = ctx->skb;
1226		if (rcd->eop) {
1227			skb->len += skb->data_len;
1228			skb->truesize += skb->data_len;
1229
1230			vmxnet3_rx_csum(adapter, skb,
1231					(union Vmxnet3_GenericDesc *)rcd);
1232			skb->protocol = eth_type_trans(skb, adapter->netdev);
1233
1234			if (unlikely(rcd->ts))
1235				__vlan_hwaccel_put_tag(skb, rcd->tci);
1236
1237			netif_receive_skb(skb);
1238
1239			ctx->skb = NULL;
1240		}
1241
1242rcd_done:
1243		/* device may skip some rx descs */
1244		rq->rx_ring[ring_idx].next2comp = idx;
1245		VMXNET3_INC_RING_IDX_ONLY(rq->rx_ring[ring_idx].next2comp,
1246					  rq->rx_ring[ring_idx].size);
1247
1248		/* refill rx buffers frequently to avoid starving the h/w */
1249		num_to_alloc = vmxnet3_cmd_ring_desc_avail(rq->rx_ring +
1250							   ring_idx);
1251		if (unlikely(num_to_alloc > VMXNET3_RX_ALLOC_THRESHOLD(rq,
1252							ring_idx, adapter))) {
1253			vmxnet3_rq_alloc_rx_buf(rq, ring_idx, num_to_alloc,
1254						adapter);
1255
1256			/* if needed, update the register */
1257			if (unlikely(rq->shared->updateRxProd)) {
1258				VMXNET3_WRITE_BAR0_REG(adapter,
1259					rxprod_reg[ring_idx] + rq->qid * 8,
1260					rq->rx_ring[ring_idx].next2fill);
1261				rq->uncommitted[ring_idx] = 0;
1262			}
1263		}
1264
1265		vmxnet3_comp_ring_adv_next2proc(&rq->comp_ring);
1266		vmxnet3_getRxComp(rcd,
1267		     &rq->comp_ring.base[rq->comp_ring.next2proc].rcd, &rxComp);
1268	}
1269
1270	return num_rxd;
1271}
1272
1273
1274static void
1275vmxnet3_rq_cleanup(struct vmxnet3_rx_queue *rq,
1276		   struct vmxnet3_adapter *adapter)
1277{
1278	u32 i, ring_idx;
1279	struct Vmxnet3_RxDesc *rxd;
1280
1281	for (ring_idx = 0; ring_idx < 2; ring_idx++) {
1282		for (i = 0; i < rq->rx_ring[ring_idx].size; i++) {
1283#ifdef __BIG_ENDIAN_BITFIELD
1284			struct Vmxnet3_RxDesc rxDesc;
1285#endif
1286			vmxnet3_getRxDesc(rxd,
1287				&rq->rx_ring[ring_idx].base[i].rxd, &rxDesc);
1288
1289			if (rxd->btype == VMXNET3_RXD_BTYPE_HEAD &&
1290					rq->buf_info[ring_idx][i].skb) {
1291				pci_unmap_single(adapter->pdev, rxd->addr,
1292						 rxd->len, PCI_DMA_FROMDEVICE);
1293				dev_kfree_skb(rq->buf_info[ring_idx][i].skb);
1294				rq->buf_info[ring_idx][i].skb = NULL;
1295			} else if (rxd->btype == VMXNET3_RXD_BTYPE_BODY &&
1296					rq->buf_info[ring_idx][i].page) {
1297				pci_unmap_page(adapter->pdev, rxd->addr,
1298					       rxd->len, PCI_DMA_FROMDEVICE);
1299				put_page(rq->buf_info[ring_idx][i].page);
1300				rq->buf_info[ring_idx][i].page = NULL;
1301			}
1302		}
1303
1304		rq->rx_ring[ring_idx].gen = VMXNET3_INIT_GEN;
1305		rq->rx_ring[ring_idx].next2fill =
1306					rq->rx_ring[ring_idx].next2comp = 0;
1307		rq->uncommitted[ring_idx] = 0;
1308	}
1309
1310	rq->comp_ring.gen = VMXNET3_INIT_GEN;
1311	rq->comp_ring.next2proc = 0;
1312}
1313
1314
1315static void
1316vmxnet3_rq_cleanup_all(struct vmxnet3_adapter *adapter)
1317{
1318	int i;
1319
1320	for (i = 0; i < adapter->num_rx_queues; i++)
1321		vmxnet3_rq_cleanup(&adapter->rx_queue[i], adapter);
1322}
1323
1324
1325void vmxnet3_rq_destroy(struct vmxnet3_rx_queue *rq,
1326			struct vmxnet3_adapter *adapter)
1327{
1328	int i;
1329	int j;
1330
1331	/* all rx buffers must have already been freed */
1332	for (i = 0; i < 2; i++) {
1333		if (rq->buf_info[i]) {
1334			for (j = 0; j < rq->rx_ring[i].size; j++)
1335				BUG_ON(rq->buf_info[i][j].page != NULL);
1336		}
1337	}
1338
1339
1340	kfree(rq->buf_info[0]);
1341
1342	for (i = 0; i < 2; i++) {
1343		if (rq->rx_ring[i].base) {
1344			pci_free_consistent(adapter->pdev, rq->rx_ring[i].size
1345					    * sizeof(struct Vmxnet3_RxDesc),
1346					    rq->rx_ring[i].base,
1347					    rq->rx_ring[i].basePA);
1348			rq->rx_ring[i].base = NULL;
1349		}
1350		rq->buf_info[i] = NULL;
1351	}
1352
1353	if (rq->comp_ring.base) {
1354		pci_free_consistent(adapter->pdev, rq->comp_ring.size *
1355				    sizeof(struct Vmxnet3_RxCompDesc),
1356				    rq->comp_ring.base, rq->comp_ring.basePA);
1357		rq->comp_ring.base = NULL;
1358	}
1359}
1360
1361
1362static int
1363vmxnet3_rq_init(struct vmxnet3_rx_queue *rq,
1364		struct vmxnet3_adapter  *adapter)
1365{
1366	int i;
1367
1368	/* initialize buf_info */
1369	for (i = 0; i < rq->rx_ring[0].size; i++) {
1370
1371		/* 1st buf for a pkt is skbuff */
1372		if (i % adapter->rx_buf_per_pkt == 0) {
1373			rq->buf_info[0][i].buf_type = VMXNET3_RX_BUF_SKB;
1374			rq->buf_info[0][i].len = adapter->skb_buf_size;
1375		} else { /* subsequent bufs for a pkt is frag */
1376			rq->buf_info[0][i].buf_type = VMXNET3_RX_BUF_PAGE;
1377			rq->buf_info[0][i].len = PAGE_SIZE;
1378		}
1379	}
1380	for (i = 0; i < rq->rx_ring[1].size; i++) {
1381		rq->buf_info[1][i].buf_type = VMXNET3_RX_BUF_PAGE;
1382		rq->buf_info[1][i].len = PAGE_SIZE;
1383	}
1384
1385	/* reset internal state and allocate buffers for both rings */
1386	for (i = 0; i < 2; i++) {
1387		rq->rx_ring[i].next2fill = rq->rx_ring[i].next2comp = 0;
1388		rq->uncommitted[i] = 0;
1389
1390		memset(rq->rx_ring[i].base, 0, rq->rx_ring[i].size *
1391		       sizeof(struct Vmxnet3_RxDesc));
1392		rq->rx_ring[i].gen = VMXNET3_INIT_GEN;
1393	}
1394	if (vmxnet3_rq_alloc_rx_buf(rq, 0, rq->rx_ring[0].size - 1,
1395				    adapter) == 0) {
1396		/* at least has 1 rx buffer for the 1st ring */
1397		return -ENOMEM;
1398	}
1399	vmxnet3_rq_alloc_rx_buf(rq, 1, rq->rx_ring[1].size - 1, adapter);
1400
1401	/* reset the comp ring */
1402	rq->comp_ring.next2proc = 0;
1403	memset(rq->comp_ring.base, 0, rq->comp_ring.size *
1404	       sizeof(struct Vmxnet3_RxCompDesc));
1405	rq->comp_ring.gen = VMXNET3_INIT_GEN;
1406
1407	/* reset rxctx */
1408	rq->rx_ctx.skb = NULL;
1409
1410	/* stats are not reset */
1411	return 0;
1412}
1413
1414
1415static int
1416vmxnet3_rq_init_all(struct vmxnet3_adapter *adapter)
1417{
1418	int i, err = 0;
1419
1420	for (i = 0; i < adapter->num_rx_queues; i++) {
1421		err = vmxnet3_rq_init(&adapter->rx_queue[i], adapter);
1422		if (unlikely(err)) {
1423			dev_err(&adapter->netdev->dev, "%s: failed to "
1424				"initialize rx queue%i\n",
1425				adapter->netdev->name, i);
1426			break;
1427		}
1428	}
1429	return err;
1430
1431}
1432
1433
1434static int
1435vmxnet3_rq_create(struct vmxnet3_rx_queue *rq, struct vmxnet3_adapter *adapter)
1436{
1437	int i;
1438	size_t sz;
1439	struct vmxnet3_rx_buf_info *bi;
1440
1441	for (i = 0; i < 2; i++) {
1442
1443		sz = rq->rx_ring[i].size * sizeof(struct Vmxnet3_RxDesc);
1444		rq->rx_ring[i].base = pci_alloc_consistent(adapter->pdev, sz,
1445							&rq->rx_ring[i].basePA);
1446		if (!rq->rx_ring[i].base) {
1447			printk(KERN_ERR "%s: failed to allocate rx ring %d\n",
1448			       adapter->netdev->name, i);
1449			goto err;
1450		}
1451	}
1452
1453	sz = rq->comp_ring.size * sizeof(struct Vmxnet3_RxCompDesc);
1454	rq->comp_ring.base = pci_alloc_consistent(adapter->pdev, sz,
1455						  &rq->comp_ring.basePA);
1456	if (!rq->comp_ring.base) {
1457		printk(KERN_ERR "%s: failed to allocate rx comp ring\n",
1458		       adapter->netdev->name);
1459		goto err;
1460	}
1461
1462	sz = sizeof(struct vmxnet3_rx_buf_info) * (rq->rx_ring[0].size +
1463						   rq->rx_ring[1].size);
1464	bi = kzalloc(sz, GFP_KERNEL);
1465	if (!bi) {
1466		printk(KERN_ERR "%s: failed to allocate rx bufinfo\n",
1467		       adapter->netdev->name);
1468		goto err;
1469	}
1470	rq->buf_info[0] = bi;
1471	rq->buf_info[1] = bi + rq->rx_ring[0].size;
1472
1473	return 0;
1474
1475err:
1476	vmxnet3_rq_destroy(rq, adapter);
1477	return -ENOMEM;
1478}
1479
1480
1481static int
1482vmxnet3_rq_create_all(struct vmxnet3_adapter *adapter)
1483{
1484	int i, err = 0;
1485
1486	for (i = 0; i < adapter->num_rx_queues; i++) {
1487		err = vmxnet3_rq_create(&adapter->rx_queue[i], adapter);
1488		if (unlikely(err)) {
1489			dev_err(&adapter->netdev->dev,
1490				"%s: failed to create rx queue%i\n",
1491				adapter->netdev->name, i);
1492			goto err_out;
1493		}
1494	}
1495	return err;
1496err_out:
1497	vmxnet3_rq_destroy_all(adapter);
1498	return err;
1499
1500}
1501
1502/* Multiple queue aware polling function for tx and rx */
1503
1504static int
1505vmxnet3_do_poll(struct vmxnet3_adapter *adapter, int budget)
1506{
1507	int rcd_done = 0, i;
1508	if (unlikely(adapter->shared->ecr))
1509		vmxnet3_process_events(adapter);
1510	for (i = 0; i < adapter->num_tx_queues; i++)
1511		vmxnet3_tq_tx_complete(&adapter->tx_queue[i], adapter);
1512
1513	for (i = 0; i < adapter->num_rx_queues; i++)
1514		rcd_done += vmxnet3_rq_rx_complete(&adapter->rx_queue[i],
1515						   adapter, budget);
1516	return rcd_done;
1517}
1518
1519
1520static int
1521vmxnet3_poll(struct napi_struct *napi, int budget)
1522{
1523	struct vmxnet3_rx_queue *rx_queue = container_of(napi,
1524					  struct vmxnet3_rx_queue, napi);
1525	int rxd_done;
1526
1527	rxd_done = vmxnet3_do_poll(rx_queue->adapter, budget);
1528
1529	if (rxd_done < budget) {
1530		napi_complete(napi);
1531		vmxnet3_enable_all_intrs(rx_queue->adapter);
1532	}
1533	return rxd_done;
1534}
1535
1536/*
1537 * NAPI polling function for MSI-X mode with multiple Rx queues
1538 * Returns the # of the NAPI credit consumed (# of rx descriptors processed)
1539 */
1540
1541static int
1542vmxnet3_poll_rx_only(struct napi_struct *napi, int budget)
1543{
1544	struct vmxnet3_rx_queue *rq = container_of(napi,
1545						struct vmxnet3_rx_queue, napi);
1546	struct vmxnet3_adapter *adapter = rq->adapter;
1547	int rxd_done;
1548
1549	/* When sharing interrupt with corresponding tx queue, process
1550	 * tx completions in that queue as well
1551	 */
1552	if (adapter->share_intr == VMXNET3_INTR_BUDDYSHARE) {
1553		struct vmxnet3_tx_queue *tq =
1554				&adapter->tx_queue[rq - adapter->rx_queue];
1555		vmxnet3_tq_tx_complete(tq, adapter);
1556	}
1557
1558	rxd_done = vmxnet3_rq_rx_complete(rq, adapter, budget);
1559
1560	if (rxd_done < budget) {
1561		napi_complete(napi);
1562		vmxnet3_enable_intr(adapter, rq->comp_ring.intr_idx);
1563	}
1564	return rxd_done;
1565}
1566
1567
1568#ifdef CONFIG_PCI_MSI
1569
1570/*
1571 * Handle completion interrupts on tx queues
1572 * Returns whether or not the intr is handled
1573 */
1574
1575static irqreturn_t
1576vmxnet3_msix_tx(int irq, void *data)
1577{
1578	struct vmxnet3_tx_queue *tq = data;
1579	struct vmxnet3_adapter *adapter = tq->adapter;
1580
1581	if (adapter->intr.mask_mode == VMXNET3_IMM_ACTIVE)
1582		vmxnet3_disable_intr(adapter, tq->comp_ring.intr_idx);
1583
1584	/* Handle the case where only one irq is allocate for all tx queues */
1585	if (adapter->share_intr == VMXNET3_INTR_TXSHARE) {
1586		int i;
1587		for (i = 0; i < adapter->num_tx_queues; i++) {
1588			struct vmxnet3_tx_queue *txq = &adapter->tx_queue[i];
1589			vmxnet3_tq_tx_complete(txq, adapter);
1590		}
1591	} else {
1592		vmxnet3_tq_tx_complete(tq, adapter);
1593	}
1594	vmxnet3_enable_intr(adapter, tq->comp_ring.intr_idx);
1595
1596	return IRQ_HANDLED;
1597}
1598
1599
1600/*
1601 * Handle completion interrupts on rx queues. Returns whether or not the
1602 * intr is handled
1603 */
1604
1605static irqreturn_t
1606vmxnet3_msix_rx(int irq, void *data)
1607{
1608	struct vmxnet3_rx_queue *rq = data;
1609	struct vmxnet3_adapter *adapter = rq->adapter;
1610
1611	/* disable intr if needed */
1612	if (adapter->intr.mask_mode == VMXNET3_IMM_ACTIVE)
1613		vmxnet3_disable_intr(adapter, rq->comp_ring.intr_idx);
1614	napi_schedule(&rq->napi);
1615
1616	return IRQ_HANDLED;
1617}
1618
1619/*
1620 *----------------------------------------------------------------------------
1621 *
1622 * vmxnet3_msix_event --
1623 *
1624 *    vmxnet3 msix event intr handler
1625 *
1626 * Result:
1627 *    whether or not the intr is handled
1628 *
1629 *----------------------------------------------------------------------------
1630 */
1631
1632static irqreturn_t
1633vmxnet3_msix_event(int irq, void *data)
1634{
1635	struct net_device *dev = data;
1636	struct vmxnet3_adapter *adapter = netdev_priv(dev);
1637
1638	/* disable intr if needed */
1639	if (adapter->intr.mask_mode == VMXNET3_IMM_ACTIVE)
1640		vmxnet3_disable_intr(adapter, adapter->intr.event_intr_idx);
1641
1642	if (adapter->shared->ecr)
1643		vmxnet3_process_events(adapter);
1644
1645	vmxnet3_enable_intr(adapter, adapter->intr.event_intr_idx);
1646
1647	return IRQ_HANDLED;
1648}
1649
1650#endif /* CONFIG_PCI_MSI  */
1651
1652
1653/* Interrupt handler for vmxnet3  */
1654static irqreturn_t
1655vmxnet3_intr(int irq, void *dev_id)
1656{
1657	struct net_device *dev = dev_id;
1658	struct vmxnet3_adapter *adapter = netdev_priv(dev);
1659
1660	if (adapter->intr.type == VMXNET3_IT_INTX) {
1661		u32 icr = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_ICR);
1662		if (unlikely(icr == 0))
1663			/* not ours */
1664			return IRQ_NONE;
1665	}
1666
1667
1668	/* disable intr if needed */
1669	if (adapter->intr.mask_mode == VMXNET3_IMM_ACTIVE)
1670		vmxnet3_disable_all_intrs(adapter);
1671
1672	napi_schedule(&adapter->rx_queue[0].napi);
1673
1674	return IRQ_HANDLED;
1675}
1676
1677#ifdef CONFIG_NET_POLL_CONTROLLER
1678
1679/* netpoll callback. */
1680static void
1681vmxnet3_netpoll(struct net_device *netdev)
1682{
1683	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
1684
1685	if (adapter->intr.mask_mode == VMXNET3_IMM_ACTIVE)
1686		vmxnet3_disable_all_intrs(adapter);
1687
1688	vmxnet3_do_poll(adapter, adapter->rx_queue[0].rx_ring[0].size);
1689	vmxnet3_enable_all_intrs(adapter);
1690
1691}
1692#endif	/* CONFIG_NET_POLL_CONTROLLER */
1693
1694static int
1695vmxnet3_request_irqs(struct vmxnet3_adapter *adapter)
1696{
1697	struct vmxnet3_intr *intr = &adapter->intr;
1698	int err = 0, i;
1699	int vector = 0;
1700
1701#ifdef CONFIG_PCI_MSI
1702	if (adapter->intr.type == VMXNET3_IT_MSIX) {
1703		for (i = 0; i < adapter->num_tx_queues; i++) {
1704			if (adapter->share_intr != VMXNET3_INTR_BUDDYSHARE) {
1705				sprintf(adapter->tx_queue[i].name, "%s-tx-%d",
1706					adapter->netdev->name, vector);
1707				err = request_irq(
1708					      intr->msix_entries[vector].vector,
1709					      vmxnet3_msix_tx, 0,
1710					      adapter->tx_queue[i].name,
1711					      &adapter->tx_queue[i]);
1712			} else {
1713				sprintf(adapter->tx_queue[i].name, "%s-rxtx-%d",
1714					adapter->netdev->name, vector);
1715			}
1716			if (err) {
1717				dev_err(&adapter->netdev->dev,
1718					"Failed to request irq for MSIX, %s, "
1719					"error %d\n",
1720					adapter->tx_queue[i].name, err);
1721				return err;
1722			}
1723
1724			/* Handle the case where only 1 MSIx was allocated for
1725			 * all tx queues */
1726			if (adapter->share_intr == VMXNET3_INTR_TXSHARE) {
1727				for (; i < adapter->num_tx_queues; i++)
1728					adapter->tx_queue[i].comp_ring.intr_idx
1729								= vector;
1730				vector++;
1731				break;
1732			} else {
1733				adapter->tx_queue[i].comp_ring.intr_idx
1734								= vector++;
1735			}
1736		}
1737		if (adapter->share_intr == VMXNET3_INTR_BUDDYSHARE)
1738			vector = 0;
1739
1740		for (i = 0; i < adapter->num_rx_queues; i++) {
1741			if (adapter->share_intr != VMXNET3_INTR_BUDDYSHARE)
1742				sprintf(adapter->rx_queue[i].name, "%s-rx-%d",
1743					adapter->netdev->name, vector);
1744			else
1745				sprintf(adapter->rx_queue[i].name, "%s-rxtx-%d",
1746					adapter->netdev->name, vector);
1747			err = request_irq(intr->msix_entries[vector].vector,
1748					  vmxnet3_msix_rx, 0,
1749					  adapter->rx_queue[i].name,
1750					  &(adapter->rx_queue[i]));
1751			if (err) {
1752				printk(KERN_ERR "Failed to request irq for MSIX"
1753				       ", %s, error %d\n",
1754				       adapter->rx_queue[i].name, err);
1755				return err;
1756			}
1757
1758			adapter->rx_queue[i].comp_ring.intr_idx = vector++;
1759		}
1760
1761		sprintf(intr->event_msi_vector_name, "%s-event-%d",
1762			adapter->netdev->name, vector);
1763		err = request_irq(intr->msix_entries[vector].vector,
1764				  vmxnet3_msix_event, 0,
1765				  intr->event_msi_vector_name, adapter->netdev);
1766		intr->event_intr_idx = vector;
1767
1768	} else if (intr->type == VMXNET3_IT_MSI) {
1769		adapter->num_rx_queues = 1;
1770		err = request_irq(adapter->pdev->irq, vmxnet3_intr, 0,
1771				  adapter->netdev->name, adapter->netdev);
1772	} else {
1773#endif
1774		adapter->num_rx_queues = 1;
1775		err = request_irq(adapter->pdev->irq, vmxnet3_intr,
1776				  IRQF_SHARED, adapter->netdev->name,
1777				  adapter->netdev);
1778#ifdef CONFIG_PCI_MSI
1779	}
1780#endif
1781	intr->num_intrs = vector + 1;
1782	if (err) {
1783		printk(KERN_ERR "Failed to request irq %s (intr type:%d), error"
1784		       ":%d\n", adapter->netdev->name, intr->type, err);
1785	} else {
1786		/* Number of rx queues will not change after this */
1787		for (i = 0; i < adapter->num_rx_queues; i++) {
1788			struct vmxnet3_rx_queue *rq = &adapter->rx_queue[i];
1789			rq->qid = i;
1790			rq->qid2 = i + adapter->num_rx_queues;
1791		}
1792
1793
1794
1795		/* init our intr settings */
1796		for (i = 0; i < intr->num_intrs; i++)
1797			intr->mod_levels[i] = UPT1_IML_ADAPTIVE;
1798		if (adapter->intr.type != VMXNET3_IT_MSIX) {
1799			adapter->intr.event_intr_idx = 0;
1800			for (i = 0; i < adapter->num_tx_queues; i++)
1801				adapter->tx_queue[i].comp_ring.intr_idx = 0;
1802			adapter->rx_queue[0].comp_ring.intr_idx = 0;
1803		}
1804
1805		printk(KERN_INFO "%s: intr type %u, mode %u, %u vectors "
1806		       "allocated\n", adapter->netdev->name, intr->type,
1807		       intr->mask_mode, intr->num_intrs);
1808	}
1809
1810	return err;
1811}
1812
1813
1814static void
1815vmxnet3_free_irqs(struct vmxnet3_adapter *adapter)
1816{
1817	struct vmxnet3_intr *intr = &adapter->intr;
1818	BUG_ON(intr->type == VMXNET3_IT_AUTO || intr->num_intrs <= 0);
1819
1820	switch (intr->type) {
1821#ifdef CONFIG_PCI_MSI
1822	case VMXNET3_IT_MSIX:
1823	{
1824		int i, vector = 0;
1825
1826		if (adapter->share_intr != VMXNET3_INTR_BUDDYSHARE) {
1827			for (i = 0; i < adapter->num_tx_queues; i++) {
1828				free_irq(intr->msix_entries[vector++].vector,
1829					 &(adapter->tx_queue[i]));
1830				if (adapter->share_intr == VMXNET3_INTR_TXSHARE)
1831					break;
1832			}
1833		}
1834
1835		for (i = 0; i < adapter->num_rx_queues; i++) {
1836			free_irq(intr->msix_entries[vector++].vector,
1837				 &(adapter->rx_queue[i]));
1838		}
1839
1840		free_irq(intr->msix_entries[vector].vector,
1841			 adapter->netdev);
1842		BUG_ON(vector >= intr->num_intrs);
1843		break;
1844	}
1845#endif
1846	case VMXNET3_IT_MSI:
1847		free_irq(adapter->pdev->irq, adapter->netdev);
1848		break;
1849	case VMXNET3_IT_INTX:
1850		free_irq(adapter->pdev->irq, adapter->netdev);
1851		break;
1852	default:
1853		BUG_ON(true);
1854	}
1855}
1856
1857
1858static void
1859vmxnet3_restore_vlan(struct vmxnet3_adapter *adapter)
1860{
1861	u32 *vfTable = adapter->shared->devRead.rxFilterConf.vfTable;
1862	u16 vid;
1863
1864	/* allow untagged pkts */
1865	VMXNET3_SET_VFTABLE_ENTRY(vfTable, 0);
1866
1867	for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID)
1868		VMXNET3_SET_VFTABLE_ENTRY(vfTable, vid);
1869}
1870
1871
1872static void
1873vmxnet3_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
1874{
1875	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
1876	u32 *vfTable = adapter->shared->devRead.rxFilterConf.vfTable;
1877	unsigned long flags;
1878
1879	VMXNET3_SET_VFTABLE_ENTRY(vfTable, vid);
1880	spin_lock_irqsave(&adapter->cmd_lock, flags);
1881	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
1882			       VMXNET3_CMD_UPDATE_VLAN_FILTERS);
1883	spin_unlock_irqrestore(&adapter->cmd_lock, flags);
1884
1885	set_bit(vid, adapter->active_vlans);
1886}
1887
1888
1889static void
1890vmxnet3_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
1891{
1892	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
1893	u32 *vfTable = adapter->shared->devRead.rxFilterConf.vfTable;
1894	unsigned long flags;
1895
1896	VMXNET3_CLEAR_VFTABLE_ENTRY(vfTable, vid);
1897	spin_lock_irqsave(&adapter->cmd_lock, flags);
1898	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
1899			       VMXNET3_CMD_UPDATE_VLAN_FILTERS);
1900	spin_unlock_irqrestore(&adapter->cmd_lock, flags);
1901
1902	clear_bit(vid, adapter->active_vlans);
1903}
1904
1905
1906static u8 *
1907vmxnet3_copy_mc(struct net_device *netdev)
1908{
1909	u8 *buf = NULL;
1910	u32 sz = netdev_mc_count(netdev) * ETH_ALEN;
1911
1912	/* struct Vmxnet3_RxFilterConf.mfTableLen is u16. */
1913	if (sz <= 0xffff) {
1914		/* We may be called with BH disabled */
1915		buf = kmalloc(sz, GFP_ATOMIC);
1916		if (buf) {
1917			struct netdev_hw_addr *ha;
1918			int i = 0;
1919
1920			netdev_for_each_mc_addr(ha, netdev)
1921				memcpy(buf + i++ * ETH_ALEN, ha->addr,
1922				       ETH_ALEN);
1923		}
1924	}
1925	return buf;
1926}
1927
1928
1929static void
1930vmxnet3_set_mc(struct net_device *netdev)
1931{
1932	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
1933	unsigned long flags;
1934	struct Vmxnet3_RxFilterConf *rxConf =
1935					&adapter->shared->devRead.rxFilterConf;
1936	u8 *new_table = NULL;
1937	u32 new_mode = VMXNET3_RXM_UCAST;
1938
1939	if (netdev->flags & IFF_PROMISC) {
1940		u32 *vfTable = adapter->shared->devRead.rxFilterConf.vfTable;
1941		memset(vfTable, 0, VMXNET3_VFT_SIZE * sizeof(*vfTable));
1942
1943		new_mode |= VMXNET3_RXM_PROMISC;
1944	} else {
1945		vmxnet3_restore_vlan(adapter);
1946	}
1947
1948	if (netdev->flags & IFF_BROADCAST)
1949		new_mode |= VMXNET3_RXM_BCAST;
1950
1951	if (netdev->flags & IFF_ALLMULTI)
1952		new_mode |= VMXNET3_RXM_ALL_MULTI;
1953	else
1954		if (!netdev_mc_empty(netdev)) {
1955			new_table = vmxnet3_copy_mc(netdev);
1956			if (new_table) {
1957				new_mode |= VMXNET3_RXM_MCAST;
1958				rxConf->mfTableLen = cpu_to_le16(
1959					netdev_mc_count(netdev) * ETH_ALEN);
1960				rxConf->mfTablePA = cpu_to_le64(virt_to_phys(
1961						    new_table));
1962			} else {
1963				printk(KERN_INFO "%s: failed to copy mcast list"
1964				       ", setting ALL_MULTI\n", netdev->name);
1965				new_mode |= VMXNET3_RXM_ALL_MULTI;
1966			}
1967		}
1968
1969
1970	if (!(new_mode & VMXNET3_RXM_MCAST)) {
1971		rxConf->mfTableLen = 0;
1972		rxConf->mfTablePA = 0;
1973	}
1974
1975	spin_lock_irqsave(&adapter->cmd_lock, flags);
1976	if (new_mode != rxConf->rxMode) {
1977		rxConf->rxMode = cpu_to_le32(new_mode);
1978		VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
1979				       VMXNET3_CMD_UPDATE_RX_MODE);
1980		VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
1981				       VMXNET3_CMD_UPDATE_VLAN_FILTERS);
1982	}
1983
1984	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
1985			       VMXNET3_CMD_UPDATE_MAC_FILTERS);
1986	spin_unlock_irqrestore(&adapter->cmd_lock, flags);
1987
1988	kfree(new_table);
1989}
1990
1991void
1992vmxnet3_rq_destroy_all(struct vmxnet3_adapter *adapter)
1993{
1994	int i;
1995
1996	for (i = 0; i < adapter->num_rx_queues; i++)
1997		vmxnet3_rq_destroy(&adapter->rx_queue[i], adapter);
1998}
1999
2000
2001/*
2002 *   Set up driver_shared based on settings in adapter.
2003 */
2004
2005static void
2006vmxnet3_setup_driver_shared(struct vmxnet3_adapter *adapter)
2007{
2008	struct Vmxnet3_DriverShared *shared = adapter->shared;
2009	struct Vmxnet3_DSDevRead *devRead = &shared->devRead;
2010	struct Vmxnet3_TxQueueConf *tqc;
2011	struct Vmxnet3_RxQueueConf *rqc;
2012	int i;
2013
2014	memset(shared, 0, sizeof(*shared));
2015
2016	/* driver settings */
2017	shared->magic = cpu_to_le32(VMXNET3_REV1_MAGIC);
2018	devRead->misc.driverInfo.version = cpu_to_le32(
2019						VMXNET3_DRIVER_VERSION_NUM);
2020	devRead->misc.driverInfo.gos.gosBits = (sizeof(void *) == 4 ?
2021				VMXNET3_GOS_BITS_32 : VMXNET3_GOS_BITS_64);
2022	devRead->misc.driverInfo.gos.gosType = VMXNET3_GOS_TYPE_LINUX;
2023	*((u32 *)&devRead->misc.driverInfo.gos) = cpu_to_le32(
2024				*((u32 *)&devRead->misc.driverInfo.gos));
2025	devRead->misc.driverInfo.vmxnet3RevSpt = cpu_to_le32(1);
2026	devRead->misc.driverInfo.uptVerSpt = cpu_to_le32(1);
2027
2028	devRead->misc.ddPA = cpu_to_le64(virt_to_phys(adapter));
2029	devRead->misc.ddLen = cpu_to_le32(sizeof(struct vmxnet3_adapter));
2030
2031	/* set up feature flags */
2032	if (adapter->netdev->features & NETIF_F_RXCSUM)
2033		devRead->misc.uptFeatures |= UPT1_F_RXCSUM;
2034
2035	if (adapter->netdev->features & NETIF_F_LRO) {
2036		devRead->misc.uptFeatures |= UPT1_F_LRO;
2037		devRead->misc.maxNumRxSG = cpu_to_le16(1 + MAX_SKB_FRAGS);
2038	}
2039	if (adapter->netdev->features & NETIF_F_HW_VLAN_RX)
2040		devRead->misc.uptFeatures |= UPT1_F_RXVLAN;
2041
2042	devRead->misc.mtu = cpu_to_le32(adapter->netdev->mtu);
2043	devRead->misc.queueDescPA = cpu_to_le64(adapter->queue_desc_pa);
2044	devRead->misc.queueDescLen = cpu_to_le32(
2045		adapter->num_tx_queues * sizeof(struct Vmxnet3_TxQueueDesc) +
2046		adapter->num_rx_queues * sizeof(struct Vmxnet3_RxQueueDesc));
2047
2048	/* tx queue settings */
2049	devRead->misc.numTxQueues =  adapter->num_tx_queues;
2050	for (i = 0; i < adapter->num_tx_queues; i++) {
2051		struct vmxnet3_tx_queue	*tq = &adapter->tx_queue[i];
2052		BUG_ON(adapter->tx_queue[i].tx_ring.base == NULL);
2053		tqc = &adapter->tqd_start[i].conf;
2054		tqc->txRingBasePA   = cpu_to_le64(tq->tx_ring.basePA);
2055		tqc->dataRingBasePA = cpu_to_le64(tq->data_ring.basePA);
2056		tqc->compRingBasePA = cpu_to_le64(tq->comp_ring.basePA);
2057		tqc->ddPA           = cpu_to_le64(virt_to_phys(tq->buf_info));
2058		tqc->txRingSize     = cpu_to_le32(tq->tx_ring.size);
2059		tqc->dataRingSize   = cpu_to_le32(tq->data_ring.size);
2060		tqc->compRingSize   = cpu_to_le32(tq->comp_ring.size);
2061		tqc->ddLen          = cpu_to_le32(
2062					sizeof(struct vmxnet3_tx_buf_info) *
2063					tqc->txRingSize);
2064		tqc->intrIdx        = tq->comp_ring.intr_idx;
2065	}
2066
2067	/* rx queue settings */
2068	devRead->misc.numRxQueues = adapter->num_rx_queues;
2069	for (i = 0; i < adapter->num_rx_queues; i++) {
2070		struct vmxnet3_rx_queue	*rq = &adapter->rx_queue[i];
2071		rqc = &adapter->rqd_start[i].conf;
2072		rqc->rxRingBasePA[0] = cpu_to_le64(rq->rx_ring[0].basePA);
2073		rqc->rxRingBasePA[1] = cpu_to_le64(rq->rx_ring[1].basePA);
2074		rqc->compRingBasePA  = cpu_to_le64(rq->comp_ring.basePA);
2075		rqc->ddPA            = cpu_to_le64(virt_to_phys(
2076							rq->buf_info));
2077		rqc->rxRingSize[0]   = cpu_to_le32(rq->rx_ring[0].size);
2078		rqc->rxRingSize[1]   = cpu_to_le32(rq->rx_ring[1].size);
2079		rqc->compRingSize    = cpu_to_le32(rq->comp_ring.size);
2080		rqc->ddLen           = cpu_to_le32(
2081					sizeof(struct vmxnet3_rx_buf_info) *
2082					(rqc->rxRingSize[0] +
2083					 rqc->rxRingSize[1]));
2084		rqc->intrIdx         = rq->comp_ring.intr_idx;
2085	}
2086
2087#ifdef VMXNET3_RSS
2088	memset(adapter->rss_conf, 0, sizeof(*adapter->rss_conf));
2089
2090	if (adapter->rss) {
2091		struct UPT1_RSSConf *rssConf = adapter->rss_conf;
2092		devRead->misc.uptFeatures |= UPT1_F_RSS;
2093		devRead->misc.numRxQueues = adapter->num_rx_queues;
2094		rssConf->hashType = UPT1_RSS_HASH_TYPE_TCP_IPV4 |
2095				    UPT1_RSS_HASH_TYPE_IPV4 |
2096				    UPT1_RSS_HASH_TYPE_TCP_IPV6 |
2097				    UPT1_RSS_HASH_TYPE_IPV6;
2098		rssConf->hashFunc = UPT1_RSS_HASH_FUNC_TOEPLITZ;
2099		rssConf->hashKeySize = UPT1_RSS_MAX_KEY_SIZE;
2100		rssConf->indTableSize = VMXNET3_RSS_IND_TABLE_SIZE;
2101		get_random_bytes(&rssConf->hashKey[0], rssConf->hashKeySize);
2102		for (i = 0; i < rssConf->indTableSize; i++)
2103			rssConf->indTable[i] = i % adapter->num_rx_queues;
2104
2105		devRead->rssConfDesc.confVer = 1;
2106		devRead->rssConfDesc.confLen = sizeof(*rssConf);
2107		devRead->rssConfDesc.confPA  = virt_to_phys(rssConf);
2108	}
2109
2110#endif /* VMXNET3_RSS */
2111
2112	/* intr settings */
2113	devRead->intrConf.autoMask = adapter->intr.mask_mode ==
2114				     VMXNET3_IMM_AUTO;
2115	devRead->intrConf.numIntrs = adapter->intr.num_intrs;
2116	for (i = 0; i < adapter->intr.num_intrs; i++)
2117		devRead->intrConf.modLevels[i] = adapter->intr.mod_levels[i];
2118
2119	devRead->intrConf.eventIntrIdx = adapter->intr.event_intr_idx;
2120	devRead->intrConf.intrCtrl |= cpu_to_le32(VMXNET3_IC_DISABLE_ALL);
2121
2122	/* rx filter settings */
2123	devRead->rxFilterConf.rxMode = 0;
2124	vmxnet3_restore_vlan(adapter);
2125	vmxnet3_write_mac_addr(adapter, adapter->netdev->dev_addr);
2126
2127	/* the rest are already zeroed */
2128}
2129
2130
2131int
2132vmxnet3_activate_dev(struct vmxnet3_adapter *adapter)
2133{
2134	int err, i;
2135	u32 ret;
2136	unsigned long flags;
2137
2138	dev_dbg(&adapter->netdev->dev, "%s: skb_buf_size %d, rx_buf_per_pkt %d,"
2139		" ring sizes %u %u %u\n", adapter->netdev->name,
2140		adapter->skb_buf_size, adapter->rx_buf_per_pkt,
2141		adapter->tx_queue[0].tx_ring.size,
2142		adapter->rx_queue[0].rx_ring[0].size,
2143		adapter->rx_queue[0].rx_ring[1].size);
2144
2145	vmxnet3_tq_init_all(adapter);
2146	err = vmxnet3_rq_init_all(adapter);
2147	if (err) {
2148		printk(KERN_ERR "Failed to init rx queue for %s: error %d\n",
2149		       adapter->netdev->name, err);
2150		goto rq_err;
2151	}
2152
2153	err = vmxnet3_request_irqs(adapter);
2154	if (err) {
2155		printk(KERN_ERR "Failed to setup irq for %s: error %d\n",
2156		       adapter->netdev->name, err);
2157		goto irq_err;
2158	}
2159
2160	vmxnet3_setup_driver_shared(adapter);
2161
2162	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_DSAL, VMXNET3_GET_ADDR_LO(
2163			       adapter->shared_pa));
2164	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_DSAH, VMXNET3_GET_ADDR_HI(
2165			       adapter->shared_pa));
2166	spin_lock_irqsave(&adapter->cmd_lock, flags);
2167	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
2168			       VMXNET3_CMD_ACTIVATE_DEV);
2169	ret = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD);
2170	spin_unlock_irqrestore(&adapter->cmd_lock, flags);
2171
2172	if (ret != 0) {
2173		printk(KERN_ERR "Failed to activate dev %s: error %u\n",
2174		       adapter->netdev->name, ret);
2175		err = -EINVAL;
2176		goto activate_err;
2177	}
2178
2179	for (i = 0; i < adapter->num_rx_queues; i++) {
2180		VMXNET3_WRITE_BAR0_REG(adapter,
2181				VMXNET3_REG_RXPROD + i * VMXNET3_REG_ALIGN,
2182				adapter->rx_queue[i].rx_ring[0].next2fill);
2183		VMXNET3_WRITE_BAR0_REG(adapter, (VMXNET3_REG_RXPROD2 +
2184				(i * VMXNET3_REG_ALIGN)),
2185				adapter->rx_queue[i].rx_ring[1].next2fill);
2186	}
2187
2188	/* Apply the rx filter settins last. */
2189	vmxnet3_set_mc(adapter->netdev);
2190
2191	/*
2192	 * Check link state when first activating device. It will start the
2193	 * tx queue if the link is up.
2194	 */
2195	vmxnet3_check_link(adapter, true);
2196	for (i = 0; i < adapter->num_rx_queues; i++)
2197		napi_enable(&adapter->rx_queue[i].napi);
2198	vmxnet3_enable_all_intrs(adapter);
2199	clear_bit(VMXNET3_STATE_BIT_QUIESCED, &adapter->state);
2200	return 0;
2201
2202activate_err:
2203	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_DSAL, 0);
2204	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_DSAH, 0);
2205	vmxnet3_free_irqs(adapter);
2206irq_err:
2207rq_err:
2208	/* free up buffers we allocated */
2209	vmxnet3_rq_cleanup_all(adapter);
2210	return err;
2211}
2212
2213
2214void
2215vmxnet3_reset_dev(struct vmxnet3_adapter *adapter)
2216{
2217	unsigned long flags;
2218	spin_lock_irqsave(&adapter->cmd_lock, flags);
2219	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, VMXNET3_CMD_RESET_DEV);
2220	spin_unlock_irqrestore(&adapter->cmd_lock, flags);
2221}
2222
2223
2224int
2225vmxnet3_quiesce_dev(struct vmxnet3_adapter *adapter)
2226{
2227	int i;
2228	unsigned long flags;
2229	if (test_and_set_bit(VMXNET3_STATE_BIT_QUIESCED, &adapter->state))
2230		return 0;
2231
2232
2233	spin_lock_irqsave(&adapter->cmd_lock, flags);
2234	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
2235			       VMXNET3_CMD_QUIESCE_DEV);
2236	spin_unlock_irqrestore(&adapter->cmd_lock, flags);
2237	vmxnet3_disable_all_intrs(adapter);
2238
2239	for (i = 0; i < adapter->num_rx_queues; i++)
2240		napi_disable(&adapter->rx_queue[i].napi);
2241	netif_tx_disable(adapter->netdev);
2242	adapter->link_speed = 0;
2243	netif_carrier_off(adapter->netdev);
2244
2245	vmxnet3_tq_cleanup_all(adapter);
2246	vmxnet3_rq_cleanup_all(adapter);
2247	vmxnet3_free_irqs(adapter);
2248	return 0;
2249}
2250
2251
2252static void
2253vmxnet3_write_mac_addr(struct vmxnet3_adapter *adapter, u8 *mac)
2254{
2255	u32 tmp;
2256
2257	tmp = *(u32 *)mac;
2258	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_MACL, tmp);
2259
2260	tmp = (mac[5] << 8) | mac[4];
2261	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_MACH, tmp);
2262}
2263
2264
2265static int
2266vmxnet3_set_mac_addr(struct net_device *netdev, void *p)
2267{
2268	struct sockaddr *addr = p;
2269	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
2270
2271	memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
2272	vmxnet3_write_mac_addr(adapter, addr->sa_data);
2273
2274	return 0;
2275}
2276
2277
2278/* ==================== initialization and cleanup routines ============ */
2279
2280static int
2281vmxnet3_alloc_pci_resources(struct vmxnet3_adapter *adapter, bool *dma64)
2282{
2283	int err;
2284	unsigned long mmio_start, mmio_len;
2285	struct pci_dev *pdev = adapter->pdev;
2286
2287	err = pci_enable_device(pdev);
2288	if (err) {
2289		printk(KERN_ERR "Failed to enable adapter %s: error %d\n",
2290		       pci_name(pdev), err);
2291		return err;
2292	}
2293
2294	if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64)) == 0) {
2295		if (pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)) != 0) {
2296			printk(KERN_ERR "pci_set_consistent_dma_mask failed "
2297			       "for adapter %s\n", pci_name(pdev));
2298			err = -EIO;
2299			goto err_set_mask;
2300		}
2301		*dma64 = true;
2302	} else {
2303		if (pci_set_dma_mask(pdev, DMA_BIT_MASK(32)) != 0) {
2304			printk(KERN_ERR "pci_set_dma_mask failed for adapter "
2305			       "%s\n",	pci_name(pdev));
2306			err = -EIO;
2307			goto err_set_mask;
2308		}
2309		*dma64 = false;
2310	}
2311
2312	err = pci_request_selected_regions(pdev, (1 << 2) - 1,
2313					   vmxnet3_driver_name);
2314	if (err) {
2315		printk(KERN_ERR "Failed to request region for adapter %s: "
2316		       "error %d\n", pci_name(pdev), err);
2317		goto err_set_mask;
2318	}
2319
2320	pci_set_master(pdev);
2321
2322	mmio_start = pci_resource_start(pdev, 0);
2323	mmio_len = pci_resource_len(pdev, 0);
2324	adapter->hw_addr0 = ioremap(mmio_start, mmio_len);
2325	if (!adapter->hw_addr0) {
2326		printk(KERN_ERR "Failed to map bar0 for adapter %s\n",
2327		       pci_name(pdev));
2328		err = -EIO;
2329		goto err_ioremap;
2330	}
2331
2332	mmio_start = pci_resource_start(pdev, 1);
2333	mmio_len = pci_resource_len(pdev, 1);
2334	adapter->hw_addr1 = ioremap(mmio_start, mmio_len);
2335	if (!adapter->hw_addr1) {
2336		printk(KERN_ERR "Failed to map bar1 for adapter %s\n",
2337		       pci_name(pdev));
2338		err = -EIO;
2339		goto err_bar1;
2340	}
2341	return 0;
2342
2343err_bar1:
2344	iounmap(adapter->hw_addr0);
2345err_ioremap:
2346	pci_release_selected_regions(pdev, (1 << 2) - 1);
2347err_set_mask:
2348	pci_disable_device(pdev);
2349	return err;
2350}
2351
2352
2353static void
2354vmxnet3_free_pci_resources(struct vmxnet3_adapter *adapter)
2355{
2356	BUG_ON(!adapter->pdev);
2357
2358	iounmap(adapter->hw_addr0);
2359	iounmap(adapter->hw_addr1);
2360	pci_release_selected_regions(adapter->pdev, (1 << 2) - 1);
2361	pci_disable_device(adapter->pdev);
2362}
2363
2364
2365static void
2366vmxnet3_adjust_rx_ring_size(struct vmxnet3_adapter *adapter)
2367{
2368	size_t sz, i, ring0_size, ring1_size, comp_size;
2369	struct vmxnet3_rx_queue	*rq = &adapter->rx_queue[0];
2370
2371
2372	if (adapter->netdev->mtu <= VMXNET3_MAX_SKB_BUF_SIZE -
2373				    VMXNET3_MAX_ETH_HDR_SIZE) {
2374		adapter->skb_buf_size = adapter->netdev->mtu +
2375					VMXNET3_MAX_ETH_HDR_SIZE;
2376		if (adapter->skb_buf_size < VMXNET3_MIN_T0_BUF_SIZE)
2377			adapter->skb_buf_size = VMXNET3_MIN_T0_BUF_SIZE;
2378
2379		adapter->rx_buf_per_pkt = 1;
2380	} else {
2381		adapter->skb_buf_size = VMXNET3_MAX_SKB_BUF_SIZE;
2382		sz = adapter->netdev->mtu - VMXNET3_MAX_SKB_BUF_SIZE +
2383					    VMXNET3_MAX_ETH_HDR_SIZE;
2384		adapter->rx_buf_per_pkt = 1 + (sz + PAGE_SIZE - 1) / PAGE_SIZE;
2385	}
2386
2387	/*
2388	 * for simplicity, force the ring0 size to be a multiple of
2389	 * rx_buf_per_pkt * VMXNET3_RING_SIZE_ALIGN
2390	 */
2391	sz = adapter->rx_buf_per_pkt * VMXNET3_RING_SIZE_ALIGN;
2392	ring0_size = adapter->rx_queue[0].rx_ring[0].size;
2393	ring0_size = (ring0_size + sz - 1) / sz * sz;
2394	ring0_size = min_t(u32, ring0_size, VMXNET3_RX_RING_MAX_SIZE /
2395			   sz * sz);
2396	ring1_size = adapter->rx_queue[0].rx_ring[1].size;
2397	comp_size = ring0_size + ring1_size;
2398
2399	for (i = 0; i < adapter->num_rx_queues; i++) {
2400		rq = &adapter->rx_queue[i];
2401		rq->rx_ring[0].size = ring0_size;
2402		rq->rx_ring[1].size = ring1_size;
2403		rq->comp_ring.size = comp_size;
2404	}
2405}
2406
2407
2408int
2409vmxnet3_create_queues(struct vmxnet3_adapter *adapter, u32 tx_ring_size,
2410		      u32 rx_ring_size, u32 rx_ring2_size)
2411{
2412	int err = 0, i;
2413
2414	for (i = 0; i < adapter->num_tx_queues; i++) {
2415		struct vmxnet3_tx_queue	*tq = &adapter->tx_queue[i];
2416		tq->tx_ring.size   = tx_ring_size;
2417		tq->data_ring.size = tx_ring_size;
2418		tq->comp_ring.size = tx_ring_size;
2419		tq->shared = &adapter->tqd_start[i].ctrl;
2420		tq->stopped = true;
2421		tq->adapter = adapter;
2422		tq->qid = i;
2423		err = vmxnet3_tq_create(tq, adapter);
2424		/*
2425		 * Too late to change num_tx_queues. We cannot do away with
2426		 * lesser number of queues than what we asked for
2427		 */
2428		if (err)
2429			goto queue_err;
2430	}
2431
2432	adapter->rx_queue[0].rx_ring[0].size = rx_ring_size;
2433	adapter->rx_queue[0].rx_ring[1].size = rx_ring2_size;
2434	vmxnet3_adjust_rx_ring_size(adapter);
2435	for (i = 0; i < adapter->num_rx_queues; i++) {
2436		struct vmxnet3_rx_queue *rq = &adapter->rx_queue[i];
2437		/* qid and qid2 for rx queues will be assigned later when num
2438		 * of rx queues is finalized after allocating intrs */
2439		rq->shared = &adapter->rqd_start[i].ctrl;
2440		rq->adapter = adapter;
2441		err = vmxnet3_rq_create(rq, adapter);
2442		if (err) {
2443			if (i == 0) {
2444				printk(KERN_ERR "Could not allocate any rx"
2445				       "queues. Aborting.\n");
2446				goto queue_err;
2447			} else {
2448				printk(KERN_INFO "Number of rx queues changed "
2449				       "to : %d.\n", i);
2450				adapter->num_rx_queues = i;
2451				err = 0;
2452				break;
2453			}
2454		}
2455	}
2456	return err;
2457queue_err:
2458	vmxnet3_tq_destroy_all(adapter);
2459	return err;
2460}
2461
2462static int
2463vmxnet3_open(struct net_device *netdev)
2464{
2465	struct vmxnet3_adapter *adapter;
2466	int err, i;
2467
2468	adapter = netdev_priv(netdev);
2469
2470	for (i = 0; i < adapter->num_tx_queues; i++)
2471		spin_lock_init(&adapter->tx_queue[i].tx_lock);
2472
2473	err = vmxnet3_create_queues(adapter, VMXNET3_DEF_TX_RING_SIZE,
2474				    VMXNET3_DEF_RX_RING_SIZE,
2475				    VMXNET3_DEF_RX_RING_SIZE);
2476	if (err)
2477		goto queue_err;
2478
2479	err = vmxnet3_activate_dev(adapter);
2480	if (err)
2481		goto activate_err;
2482
2483	return 0;
2484
2485activate_err:
2486	vmxnet3_rq_destroy_all(adapter);
2487	vmxnet3_tq_destroy_all(adapter);
2488queue_err:
2489	return err;
2490}
2491
2492
2493static int
2494vmxnet3_close(struct net_device *netdev)
2495{
2496	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
2497
2498	/*
2499	 * Reset_work may be in the middle of resetting the device, wait for its
2500	 * completion.
2501	 */
2502	while (test_and_set_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state))
2503		msleep(1);
2504
2505	vmxnet3_quiesce_dev(adapter);
2506
2507	vmxnet3_rq_destroy_all(adapter);
2508	vmxnet3_tq_destroy_all(adapter);
2509
2510	clear_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state);
2511
2512
2513	return 0;
2514}
2515
2516
2517void
2518vmxnet3_force_close(struct vmxnet3_adapter *adapter)
2519{
2520	int i;
2521
2522	/*
2523	 * we must clear VMXNET3_STATE_BIT_RESETTING, otherwise
2524	 * vmxnet3_close() will deadlock.
2525	 */
2526	BUG_ON(test_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state));
2527
2528	/* we need to enable NAPI, otherwise dev_close will deadlock */
2529	for (i = 0; i < adapter->num_rx_queues; i++)
2530		napi_enable(&adapter->rx_queue[i].napi);
2531	dev_close(adapter->netdev);
2532}
2533
2534
2535static int
2536vmxnet3_change_mtu(struct net_device *netdev, int new_mtu)
2537{
2538	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
2539	int err = 0;
2540
2541	if (new_mtu < VMXNET3_MIN_MTU || new_mtu > VMXNET3_MAX_MTU)
2542		return -EINVAL;
2543
2544	netdev->mtu = new_mtu;
2545
2546	/*
2547	 * Reset_work may be in the middle of resetting the device, wait for its
2548	 * completion.
2549	 */
2550	while (test_and_set_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state))
2551		msleep(1);
2552
2553	if (netif_running(netdev)) {
2554		vmxnet3_quiesce_dev(adapter);
2555		vmxnet3_reset_dev(adapter);
2556
2557		/* we need to re-create the rx queue based on the new mtu */
2558		vmxnet3_rq_destroy_all(adapter);
2559		vmxnet3_adjust_rx_ring_size(adapter);
2560		err = vmxnet3_rq_create_all(adapter);
2561		if (err) {
2562			printk(KERN_ERR "%s: failed to re-create rx queues,"
2563				" error %d. Closing it.\n", netdev->name, err);
2564			goto out;
2565		}
2566
2567		err = vmxnet3_activate_dev(adapter);
2568		if (err) {
2569			printk(KERN_ERR "%s: failed to re-activate, error %d. "
2570				"Closing it\n", netdev->name, err);
2571			goto out;
2572		}
2573	}
2574
2575out:
2576	clear_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state);
2577	if (err)
2578		vmxnet3_force_close(adapter);
2579
2580	return err;
2581}
2582
2583
2584static void
2585vmxnet3_declare_features(struct vmxnet3_adapter *adapter, bool dma64)
2586{
2587	struct net_device *netdev = adapter->netdev;
2588
2589	netdev->hw_features = NETIF_F_SG | NETIF_F_RXCSUM |
2590		NETIF_F_HW_CSUM | NETIF_F_HW_VLAN_TX |
2591		NETIF_F_HW_VLAN_RX | NETIF_F_TSO | NETIF_F_TSO6 |
2592		NETIF_F_LRO;
2593	if (dma64)
2594		netdev->features |= NETIF_F_HIGHDMA;
2595	netdev->vlan_features = netdev->hw_features &
2596				~(NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX);
2597	netdev->features = netdev->hw_features | NETIF_F_HW_VLAN_FILTER;
2598
2599	netdev_info(adapter->netdev,
2600		"features: sg csum vlan jf tso tsoIPv6 lro%s\n",
2601		dma64 ? " highDMA" : "");
2602}
2603
2604
2605static void
2606vmxnet3_read_mac_addr(struct vmxnet3_adapter *adapter, u8 *mac)
2607{
2608	u32 tmp;
2609
2610	tmp = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_MACL);
2611	*(u32 *)mac = tmp;
2612
2613	tmp = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_MACH);
2614	mac[4] = tmp & 0xff;
2615	mac[5] = (tmp >> 8) & 0xff;
2616}
2617
2618#ifdef CONFIG_PCI_MSI
2619
2620/*
2621 * Enable MSIx vectors.
2622 * Returns :
2623 *	0 on successful enabling of required vectors,
2624 *	VMXNET3_LINUX_MIN_MSIX_VECT when only minimum number of vectors required
2625 *	 could be enabled.
2626 *	number of vectors which can be enabled otherwise (this number is smaller
2627 *	 than VMXNET3_LINUX_MIN_MSIX_VECT)
2628 */
2629
2630static int
2631vmxnet3_acquire_msix_vectors(struct vmxnet3_adapter *adapter,
2632			     int vectors)
2633{
2634	int err = 0, vector_threshold;
2635	vector_threshold = VMXNET3_LINUX_MIN_MSIX_VECT;
2636
2637	while (vectors >= vector_threshold) {
2638		err = pci_enable_msix(adapter->pdev, adapter->intr.msix_entries,
2639				      vectors);
2640		if (!err) {
2641			adapter->intr.num_intrs = vectors;
2642			return 0;
2643		} else if (err < 0) {
2644			printk(KERN_ERR "Failed to enable MSI-X for %s, error"
2645			       " %d\n",	adapter->netdev->name, err);
2646			vectors = 0;
2647		} else if (err < vector_threshold) {
2648			break;
2649		} else {
2650			/* If fails to enable required number of MSI-x vectors
2651			 * try enabling minimum number of vectors required.
2652			 */
2653			vectors = vector_threshold;
2654			printk(KERN_ERR "Failed to enable %d MSI-X for %s, try"
2655			       " %d instead\n", vectors, adapter->netdev->name,
2656			       vector_threshold);
2657		}
2658	}
2659
2660	printk(KERN_INFO "Number of MSI-X interrupts which can be allocatedi"
2661	       " are lower than min threshold required.\n");
2662	return err;
2663}
2664
2665
2666#endif /* CONFIG_PCI_MSI */
2667
2668static void
2669vmxnet3_alloc_intr_resources(struct vmxnet3_adapter *adapter)
2670{
2671	u32 cfg;
2672	unsigned long flags;
2673
2674	/* intr settings */
2675	spin_lock_irqsave(&adapter->cmd_lock, flags);
2676	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
2677			       VMXNET3_CMD_GET_CONF_INTR);
2678	cfg = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD);
2679	spin_unlock_irqrestore(&adapter->cmd_lock, flags);
2680	adapter->intr.type = cfg & 0x3;
2681	adapter->intr.mask_mode = (cfg >> 2) & 0x3;
2682
2683	if (adapter->intr.type == VMXNET3_IT_AUTO) {
2684		adapter->intr.type = VMXNET3_IT_MSIX;
2685	}
2686
2687#ifdef CONFIG_PCI_MSI
2688	if (adapter->intr.type == VMXNET3_IT_MSIX) {
2689		int vector, err = 0;
2690
2691		adapter->intr.num_intrs = (adapter->share_intr ==
2692					   VMXNET3_INTR_TXSHARE) ? 1 :
2693					   adapter->num_tx_queues;
2694		adapter->intr.num_intrs += (adapter->share_intr ==
2695					   VMXNET3_INTR_BUDDYSHARE) ? 0 :
2696					   adapter->num_rx_queues;
2697		adapter->intr.num_intrs += 1;		/* for link event */
2698
2699		adapter->intr.num_intrs = (adapter->intr.num_intrs >
2700					   VMXNET3_LINUX_MIN_MSIX_VECT
2701					   ? adapter->intr.num_intrs :
2702					   VMXNET3_LINUX_MIN_MSIX_VECT);
2703
2704		for (vector = 0; vector < adapter->intr.num_intrs; vector++)
2705			adapter->intr.msix_entries[vector].entry = vector;
2706
2707		err = vmxnet3_acquire_msix_vectors(adapter,
2708						   adapter->intr.num_intrs);
2709		/* If we cannot allocate one MSIx vector per queue
2710		 * then limit the number of rx queues to 1
2711		 */
2712		if (err == VMXNET3_LINUX_MIN_MSIX_VECT) {
2713			if (adapter->share_intr != VMXNET3_INTR_BUDDYSHARE
2714			    || adapter->num_rx_queues != 1) {
2715				adapter->share_intr = VMXNET3_INTR_TXSHARE;
2716				printk(KERN_ERR "Number of rx queues : 1\n");
2717				adapter->num_rx_queues = 1;
2718				adapter->intr.num_intrs =
2719						VMXNET3_LINUX_MIN_MSIX_VECT;
2720			}
2721			return;
2722		}
2723		if (!err)
2724			return;
2725
2726		/* If we cannot allocate MSIx vectors use only one rx queue */
2727		printk(KERN_INFO "Failed to enable MSI-X for %s, error %d."
2728		       "#rx queues : 1, try MSI\n", adapter->netdev->name, err);
2729
2730		adapter->intr.type = VMXNET3_IT_MSI;
2731	}
2732
2733	if (adapter->intr.type == VMXNET3_IT_MSI) {
2734		int err;
2735		err = pci_enable_msi(adapter->pdev);
2736		if (!err) {
2737			adapter->num_rx_queues = 1;
2738			adapter->intr.num_intrs = 1;
2739			return;
2740		}
2741	}
2742#endif /* CONFIG_PCI_MSI */
2743
2744	adapter->num_rx_queues = 1;
2745	printk(KERN_INFO "Using INTx interrupt, #Rx queues: 1.\n");
2746	adapter->intr.type = VMXNET3_IT_INTX;
2747
2748	/* INT-X related setting */
2749	adapter->intr.num_intrs = 1;
2750}
2751
2752
2753static void
2754vmxnet3_free_intr_resources(struct vmxnet3_adapter *adapter)
2755{
2756	if (adapter->intr.type == VMXNET3_IT_MSIX)
2757		pci_disable_msix(adapter->pdev);
2758	else if (adapter->intr.type == VMXNET3_IT_MSI)
2759		pci_disable_msi(adapter->pdev);
2760	else
2761		BUG_ON(adapter->intr.type != VMXNET3_IT_INTX);
2762}
2763
2764
2765static void
2766vmxnet3_tx_timeout(struct net_device *netdev)
2767{
2768	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
2769	adapter->tx_timeout_count++;
2770
2771	printk(KERN_ERR "%s: tx hang\n", adapter->netdev->name);
2772	schedule_work(&adapter->work);
2773	netif_wake_queue(adapter->netdev);
2774}
2775
2776
2777static void
2778vmxnet3_reset_work(struct work_struct *data)
2779{
2780	struct vmxnet3_adapter *adapter;
2781
2782	adapter = container_of(data, struct vmxnet3_adapter, work);
2783
2784	/* if another thread is resetting the device, no need to proceed */
2785	if (test_and_set_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state))
2786		return;
2787
2788	/* if the device is closed, we must leave it alone */
2789	rtnl_lock();
2790	if (netif_running(adapter->netdev)) {
2791		printk(KERN_INFO "%s: resetting\n", adapter->netdev->name);
2792		vmxnet3_quiesce_dev(adapter);
2793		vmxnet3_reset_dev(adapter);
2794		vmxnet3_activate_dev(adapter);
2795	} else {
2796		printk(KERN_INFO "%s: already closed\n", adapter->netdev->name);
2797	}
2798	rtnl_unlock();
2799
2800	clear_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state);
2801}
2802
2803
2804static int __devinit
2805vmxnet3_probe_device(struct pci_dev *pdev,
2806		     const struct pci_device_id *id)
2807{
2808	static const struct net_device_ops vmxnet3_netdev_ops = {
2809		.ndo_open = vmxnet3_open,
2810		.ndo_stop = vmxnet3_close,
2811		.ndo_start_xmit = vmxnet3_xmit_frame,
2812		.ndo_set_mac_address = vmxnet3_set_mac_addr,
2813		.ndo_change_mtu = vmxnet3_change_mtu,
2814		.ndo_set_features = vmxnet3_set_features,
2815		.ndo_get_stats64 = vmxnet3_get_stats64,
2816		.ndo_tx_timeout = vmxnet3_tx_timeout,
2817		.ndo_set_multicast_list = vmxnet3_set_mc,
2818		.ndo_vlan_rx_add_vid = vmxnet3_vlan_rx_add_vid,
2819		.ndo_vlan_rx_kill_vid = vmxnet3_vlan_rx_kill_vid,
2820#ifdef CONFIG_NET_POLL_CONTROLLER
2821		.ndo_poll_controller = vmxnet3_netpoll,
2822#endif
2823	};
2824	int err;
2825	bool dma64 = false; /* stupid gcc */
2826	u32 ver;
2827	struct net_device *netdev;
2828	struct vmxnet3_adapter *adapter;
2829	u8 mac[ETH_ALEN];
2830	int size;
2831	int num_tx_queues;
2832	int num_rx_queues;
2833
2834	if (!pci_msi_enabled())
2835		enable_mq = 0;
2836
2837#ifdef VMXNET3_RSS
2838	if (enable_mq)
2839		num_rx_queues = min(VMXNET3_DEVICE_MAX_RX_QUEUES,
2840				    (int)num_online_cpus());
2841	else
2842#endif
2843		num_rx_queues = 1;
2844
2845	if (enable_mq)
2846		num_tx_queues = min(VMXNET3_DEVICE_MAX_TX_QUEUES,
2847				    (int)num_online_cpus());
2848	else
2849		num_tx_queues = 1;
2850
2851	netdev = alloc_etherdev_mq(sizeof(struct vmxnet3_adapter),
2852				   max(num_tx_queues, num_rx_queues));
2853	printk(KERN_INFO "# of Tx queues : %d, # of Rx queues : %d\n",
2854	       num_tx_queues, num_rx_queues);
2855
2856	if (!netdev) {
2857		printk(KERN_ERR "Failed to alloc ethernet device for adapter "
2858			"%s\n",	pci_name(pdev));
2859		return -ENOMEM;
2860	}
2861
2862	pci_set_drvdata(pdev, netdev);
2863	adapter = netdev_priv(netdev);
2864	adapter->netdev = netdev;
2865	adapter->pdev = pdev;
2866
2867	spin_lock_init(&adapter->cmd_lock);
2868	adapter->shared = pci_alloc_consistent(adapter->pdev,
2869			  sizeof(struct Vmxnet3_DriverShared),
2870			  &adapter->shared_pa);
2871	if (!adapter->shared) {
2872		printk(KERN_ERR "Failed to allocate memory for %s\n",
2873			pci_name(pdev));
2874		err = -ENOMEM;
2875		goto err_alloc_shared;
2876	}
2877
2878	adapter->num_rx_queues = num_rx_queues;
2879	adapter->num_tx_queues = num_tx_queues;
2880
2881	size = sizeof(struct Vmxnet3_TxQueueDesc) * adapter->num_tx_queues;
2882	size += sizeof(struct Vmxnet3_RxQueueDesc) * adapter->num_rx_queues;
2883	adapter->tqd_start = pci_alloc_consistent(adapter->pdev, size,
2884			     &adapter->queue_desc_pa);
2885
2886	if (!adapter->tqd_start) {
2887		printk(KERN_ERR "Failed to allocate memory for %s\n",
2888			pci_name(pdev));
2889		err = -ENOMEM;
2890		goto err_alloc_queue_desc;
2891	}
2892	adapter->rqd_start = (struct Vmxnet3_RxQueueDesc *)(adapter->tqd_start +
2893							adapter->num_tx_queues);
2894
2895	adapter->pm_conf = kmalloc(sizeof(struct Vmxnet3_PMConf), GFP_KERNEL);
2896	if (adapter->pm_conf == NULL) {
2897		printk(KERN_ERR "Failed to allocate memory for %s\n",
2898			pci_name(pdev));
2899		err = -ENOMEM;
2900		goto err_alloc_pm;
2901	}
2902
2903#ifdef VMXNET3_RSS
2904
2905	adapter->rss_conf = kmalloc(sizeof(struct UPT1_RSSConf), GFP_KERNEL);
2906	if (adapter->rss_conf == NULL) {
2907		printk(KERN_ERR "Failed to allocate memory for %s\n",
2908		       pci_name(pdev));
2909		err = -ENOMEM;
2910		goto err_alloc_rss;
2911	}
2912#endif /* VMXNET3_RSS */
2913
2914	err = vmxnet3_alloc_pci_resources(adapter, &dma64);
2915	if (err < 0)
2916		goto err_alloc_pci;
2917
2918	ver = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_VRRS);
2919	if (ver & 1) {
2920		VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_VRRS, 1);
2921	} else {
2922		printk(KERN_ERR "Incompatible h/w version (0x%x) for adapter"
2923		       " %s\n",	ver, pci_name(pdev));
2924		err = -EBUSY;
2925		goto err_ver;
2926	}
2927
2928	ver = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_UVRS);
2929	if (ver & 1) {
2930		VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_UVRS, 1);
2931	} else {
2932		printk(KERN_ERR "Incompatible upt version (0x%x) for "
2933		       "adapter %s\n", ver, pci_name(pdev));
2934		err = -EBUSY;
2935		goto err_ver;
2936	}
2937
2938	vmxnet3_declare_features(adapter, dma64);
2939
2940	adapter->dev_number = atomic_read(&devices_found);
2941
2942	 adapter->share_intr = irq_share_mode;
2943	if (adapter->share_intr == VMXNET3_INTR_BUDDYSHARE &&
2944	    adapter->num_tx_queues != adapter->num_rx_queues)
2945		adapter->share_intr = VMXNET3_INTR_DONTSHARE;
2946
2947	vmxnet3_alloc_intr_resources(adapter);
2948
2949#ifdef VMXNET3_RSS
2950	if (adapter->num_rx_queues > 1 &&
2951	    adapter->intr.type == VMXNET3_IT_MSIX) {
2952		adapter->rss = true;
2953		printk(KERN_INFO "RSS is enabled.\n");
2954	} else {
2955		adapter->rss = false;
2956	}
2957#endif
2958
2959	vmxnet3_read_mac_addr(adapter, mac);
2960	memcpy(netdev->dev_addr,  mac, netdev->addr_len);
2961
2962	netdev->netdev_ops = &vmxnet3_netdev_ops;
2963	vmxnet3_set_ethtool_ops(netdev);
2964	netdev->watchdog_timeo = 5 * HZ;
2965
2966	INIT_WORK(&adapter->work, vmxnet3_reset_work);
2967
2968	if (adapter->intr.type == VMXNET3_IT_MSIX) {
2969		int i;
2970		for (i = 0; i < adapter->num_rx_queues; i++) {
2971			netif_napi_add(adapter->netdev,
2972				       &adapter->rx_queue[i].napi,
2973				       vmxnet3_poll_rx_only, 64);
2974		}
2975	} else {
2976		netif_napi_add(adapter->netdev, &adapter->rx_queue[0].napi,
2977			       vmxnet3_poll, 64);
2978	}
2979
2980	netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues);
2981	netif_set_real_num_rx_queues(adapter->netdev, adapter->num_rx_queues);
2982
2983	SET_NETDEV_DEV(netdev, &pdev->dev);
2984	err = register_netdev(netdev);
2985
2986	if (err) {
2987		printk(KERN_ERR "Failed to register adapter %s\n",
2988			pci_name(pdev));
2989		goto err_register;
2990	}
2991
2992	set_bit(VMXNET3_STATE_BIT_QUIESCED, &adapter->state);
2993	vmxnet3_check_link(adapter, false);
2994	atomic_inc(&devices_found);
2995	return 0;
2996
2997err_register:
2998	vmxnet3_free_intr_resources(adapter);
2999err_ver:
3000	vmxnet3_free_pci_resources(adapter);
3001err_alloc_pci:
3002#ifdef VMXNET3_RSS
3003	kfree(adapter->rss_conf);
3004err_alloc_rss:
3005#endif
3006	kfree(adapter->pm_conf);
3007err_alloc_pm:
3008	pci_free_consistent(adapter->pdev, size, adapter->tqd_start,
3009			    adapter->queue_desc_pa);
3010err_alloc_queue_desc:
3011	pci_free_consistent(adapter->pdev, sizeof(struct Vmxnet3_DriverShared),
3012			    adapter->shared, adapter->shared_pa);
3013err_alloc_shared:
3014	pci_set_drvdata(pdev, NULL);
3015	free_netdev(netdev);
3016	return err;
3017}
3018
3019
3020static void __devexit
3021vmxnet3_remove_device(struct pci_dev *pdev)
3022{
3023	struct net_device *netdev = pci_get_drvdata(pdev);
3024	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
3025	int size = 0;
3026	int num_rx_queues;
3027
3028#ifdef VMXNET3_RSS
3029	if (enable_mq)
3030		num_rx_queues = min(VMXNET3_DEVICE_MAX_RX_QUEUES,
3031				    (int)num_online_cpus());
3032	else
3033#endif
3034		num_rx_queues = 1;
3035
3036	cancel_work_sync(&adapter->work);
3037
3038	unregister_netdev(netdev);
3039
3040	vmxnet3_free_intr_resources(adapter);
3041	vmxnet3_free_pci_resources(adapter);
3042#ifdef VMXNET3_RSS
3043	kfree(adapter->rss_conf);
3044#endif
3045	kfree(adapter->pm_conf);
3046
3047	size = sizeof(struct Vmxnet3_TxQueueDesc) * adapter->num_tx_queues;
3048	size += sizeof(struct Vmxnet3_RxQueueDesc) * num_rx_queues;
3049	pci_free_consistent(adapter->pdev, size, adapter->tqd_start,
3050			    adapter->queue_desc_pa);
3051	pci_free_consistent(adapter->pdev, sizeof(struct Vmxnet3_DriverShared),
3052			    adapter->shared, adapter->shared_pa);
3053	free_netdev(netdev);
3054}
3055
3056
3057#ifdef CONFIG_PM
3058
3059static int
3060vmxnet3_suspend(struct device *device)
3061{
3062	struct pci_dev *pdev = to_pci_dev(device);
3063	struct net_device *netdev = pci_get_drvdata(pdev);
3064	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
3065	struct Vmxnet3_PMConf *pmConf;
3066	struct ethhdr *ehdr;
3067	struct arphdr *ahdr;
3068	u8 *arpreq;
3069	struct in_device *in_dev;
3070	struct in_ifaddr *ifa;
3071	unsigned long flags;
3072	int i = 0;
3073
3074	if (!netif_running(netdev))
3075		return 0;
3076
3077	for (i = 0; i < adapter->num_rx_queues; i++)
3078		napi_disable(&adapter->rx_queue[i].napi);
3079
3080	vmxnet3_disable_all_intrs(adapter);
3081	vmxnet3_free_irqs(adapter);
3082	vmxnet3_free_intr_resources(adapter);
3083
3084	netif_device_detach(netdev);
3085	netif_tx_stop_all_queues(netdev);
3086
3087	/* Create wake-up filters. */
3088	pmConf = adapter->pm_conf;
3089	memset(pmConf, 0, sizeof(*pmConf));
3090
3091	if (adapter->wol & WAKE_UCAST) {
3092		pmConf->filters[i].patternSize = ETH_ALEN;
3093		pmConf->filters[i].maskSize = 1;
3094		memcpy(pmConf->filters[i].pattern, netdev->dev_addr, ETH_ALEN);
3095		pmConf->filters[i].mask[0] = 0x3F; /* LSB ETH_ALEN bits */
3096
3097		pmConf->wakeUpEvents |= VMXNET3_PM_WAKEUP_FILTER;
3098		i++;
3099	}
3100
3101	if (adapter->wol & WAKE_ARP) {
3102		in_dev = in_dev_get(netdev);
3103		if (!in_dev)
3104			goto skip_arp;
3105
3106		ifa = (struct in_ifaddr *)in_dev->ifa_list;
3107		if (!ifa)
3108			goto skip_arp;
3109
3110		pmConf->filters[i].patternSize = ETH_HLEN + /* Ethernet header*/
3111			sizeof(struct arphdr) +		/* ARP header */
3112			2 * ETH_ALEN +		/* 2 Ethernet addresses*/
3113			2 * sizeof(u32);	/*2 IPv4 addresses */
3114		pmConf->filters[i].maskSize =
3115			(pmConf->filters[i].patternSize - 1) / 8 + 1;
3116
3117		/* ETH_P_ARP in Ethernet header. */
3118		ehdr = (struct ethhdr *)pmConf->filters[i].pattern;
3119		ehdr->h_proto = htons(ETH_P_ARP);
3120
3121		/* ARPOP_REQUEST in ARP header. */
3122		ahdr = (struct arphdr *)&pmConf->filters[i].pattern[ETH_HLEN];
3123		ahdr->ar_op = htons(ARPOP_REQUEST);
3124		arpreq = (u8 *)(ahdr + 1);
3125
3126		/* The Unicast IPv4 address in 'tip' field. */
3127		arpreq += 2 * ETH_ALEN + sizeof(u32);
3128		*(u32 *)arpreq = ifa->ifa_address;
3129
3130		/* The mask for the relevant bits. */
3131		pmConf->filters[i].mask[0] = 0x00;
3132		pmConf->filters[i].mask[1] = 0x30; /* ETH_P_ARP */
3133		pmConf->filters[i].mask[2] = 0x30; /* ARPOP_REQUEST */
3134		pmConf->filters[i].mask[3] = 0x00;
3135		pmConf->filters[i].mask[4] = 0xC0; /* IPv4 TIP */
3136		pmConf->filters[i].mask[5] = 0x03; /* IPv4 TIP */
3137		in_dev_put(in_dev);
3138
3139		pmConf->wakeUpEvents |= VMXNET3_PM_WAKEUP_FILTER;
3140		i++;
3141	}
3142
3143skip_arp:
3144	if (adapter->wol & WAKE_MAGIC)
3145		pmConf->wakeUpEvents |= VMXNET3_PM_WAKEUP_MAGIC;
3146
3147	pmConf->numFilters = i;
3148
3149	adapter->shared->devRead.pmConfDesc.confVer = cpu_to_le32(1);
3150	adapter->shared->devRead.pmConfDesc.confLen = cpu_to_le32(sizeof(
3151								  *pmConf));
3152	adapter->shared->devRead.pmConfDesc.confPA = cpu_to_le64(virt_to_phys(
3153								 pmConf));
3154
3155	spin_lock_irqsave(&adapter->cmd_lock, flags);
3156	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
3157			       VMXNET3_CMD_UPDATE_PMCFG);
3158	spin_unlock_irqrestore(&adapter->cmd_lock, flags);
3159
3160	pci_save_state(pdev);
3161	pci_enable_wake(pdev, pci_choose_state(pdev, PMSG_SUSPEND),
3162			adapter->wol);
3163	pci_disable_device(pdev);
3164	pci_set_power_state(pdev, pci_choose_state(pdev, PMSG_SUSPEND));
3165
3166	return 0;
3167}
3168
3169
3170static int
3171vmxnet3_resume(struct device *device)
3172{
3173	int err, i = 0;
3174	unsigned long flags;
3175	struct pci_dev *pdev = to_pci_dev(device);
3176	struct net_device *netdev = pci_get_drvdata(pdev);
3177	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
3178	struct Vmxnet3_PMConf *pmConf;
3179
3180	if (!netif_running(netdev))
3181		return 0;
3182
3183	/* Destroy wake-up filters. */
3184	pmConf = adapter->pm_conf;
3185	memset(pmConf, 0, sizeof(*pmConf));
3186
3187	adapter->shared->devRead.pmConfDesc.confVer = cpu_to_le32(1);
3188	adapter->shared->devRead.pmConfDesc.confLen = cpu_to_le32(sizeof(
3189								  *pmConf));
3190	adapter->shared->devRead.pmConfDesc.confPA = cpu_to_le64(virt_to_phys(
3191								 pmConf));
3192
3193	netif_device_attach(netdev);
3194	pci_set_power_state(pdev, PCI_D0);
3195	pci_restore_state(pdev);
3196	err = pci_enable_device_mem(pdev);
3197	if (err != 0)
3198		return err;
3199
3200	pci_enable_wake(pdev, PCI_D0, 0);
3201
3202	spin_lock_irqsave(&adapter->cmd_lock, flags);
3203	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
3204			       VMXNET3_CMD_UPDATE_PMCFG);
3205	spin_unlock_irqrestore(&adapter->cmd_lock, flags);
3206	vmxnet3_alloc_intr_resources(adapter);
3207	vmxnet3_request_irqs(adapter);
3208	for (i = 0; i < adapter->num_rx_queues; i++)
3209		napi_enable(&adapter->rx_queue[i].napi);
3210	vmxnet3_enable_all_intrs(adapter);
3211
3212	return 0;
3213}
3214
3215static const struct dev_pm_ops vmxnet3_pm_ops = {
3216	.suspend = vmxnet3_suspend,
3217	.resume = vmxnet3_resume,
3218};
3219#endif
3220
3221static struct pci_driver vmxnet3_driver = {
3222	.name		= vmxnet3_driver_name,
3223	.id_table	= vmxnet3_pciid_table,
3224	.probe		= vmxnet3_probe_device,
3225	.remove		= __devexit_p(vmxnet3_remove_device),
3226#ifdef CONFIG_PM
3227	.driver.pm	= &vmxnet3_pm_ops,
3228#endif
3229};
3230
3231
3232static int __init
3233vmxnet3_init_module(void)
3234{
3235	printk(KERN_INFO "%s - version %s\n", VMXNET3_DRIVER_DESC,
3236		VMXNET3_DRIVER_VERSION_REPORT);
3237	return pci_register_driver(&vmxnet3_driver);
3238}
3239
3240module_init(vmxnet3_init_module);
3241
3242
3243static void
3244vmxnet3_exit_module(void)
3245{
3246	pci_unregister_driver(&vmxnet3_driver);
3247}
3248
3249module_exit(vmxnet3_exit_module);
3250
3251MODULE_AUTHOR("VMware, Inc.");
3252MODULE_DESCRIPTION(VMXNET3_DRIVER_DESC);
3253MODULE_LICENSE("GPL v2");
3254MODULE_VERSION(VMXNET3_DRIVER_VERSION_STRING);
3255