igb_main.c revision 1128c756bef8285db3bbde5b26d4a6b4c7e2e613
1/*******************************************************************************
2
3  Intel(R) Gigabit Ethernet Linux driver
4  Copyright(c) 2007-2011 Intel Corporation.
5
6  This program is free software; you can redistribute it and/or modify it
7  under the terms and conditions of the GNU General Public License,
8  version 2, as published by the Free Software Foundation.
9
10  This program is distributed in the hope it will be useful, but WITHOUT
11  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13  more details.
14
15  You should have received a copy of the GNU General Public License along with
16  this program; if not, write to the Free Software Foundation, Inc.,
17  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18
19  The full GNU General Public License is included in this distribution in
20  the file called "COPYING".
21
22  Contact Information:
23  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
25
26*******************************************************************************/
27
28#include <linux/module.h>
29#include <linux/types.h>
30#include <linux/init.h>
31#include <linux/bitops.h>
32#include <linux/vmalloc.h>
33#include <linux/pagemap.h>
34#include <linux/netdevice.h>
35#include <linux/ipv6.h>
36#include <linux/slab.h>
37#include <net/checksum.h>
38#include <net/ip6_checksum.h>
39#include <linux/net_tstamp.h>
40#include <linux/mii.h>
41#include <linux/ethtool.h>
42#include <linux/if.h>
43#include <linux/if_vlan.h>
44#include <linux/pci.h>
45#include <linux/pci-aspm.h>
46#include <linux/delay.h>
47#include <linux/interrupt.h>
48#include <linux/ip.h>
49#include <linux/tcp.h>
50#include <linux/sctp.h>
51#include <linux/if_ether.h>
52#include <linux/aer.h>
53#include <linux/prefetch.h>
54#ifdef CONFIG_IGB_DCA
55#include <linux/dca.h>
56#endif
57#include "igb.h"
58
59#define MAJ 3
60#define MIN 2
61#define BUILD 10
62#define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \
63__stringify(BUILD) "-k"
64char igb_driver_name[] = "igb";
65char igb_driver_version[] = DRV_VERSION;
66static const char igb_driver_string[] =
67				"Intel(R) Gigabit Ethernet Network Driver";
68static const char igb_copyright[] = "Copyright (c) 2007-2011 Intel Corporation.";
69
70static const struct e1000_info *igb_info_tbl[] = {
71	[board_82575] = &e1000_82575_info,
72};
73
74static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
75	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER), board_82575 },
76	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER), board_82575 },
77	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES), board_82575 },
78	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII), board_82575 },
79	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
80	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
81	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_QUAD_FIBER), board_82575 },
82	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
83	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
84	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
85	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SGMII), board_82575 },
86	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SERDES), board_82575 },
87	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_BACKPLANE), board_82575 },
88	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SFP), board_82575 },
89	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
90	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
91	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
92	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
93	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
94	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
95	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2), board_82575 },
96	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
97	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
98	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
99	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
100	/* required last entry */
101	{0, }
102};
103
104MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
105
106void igb_reset(struct igb_adapter *);
107static int igb_setup_all_tx_resources(struct igb_adapter *);
108static int igb_setup_all_rx_resources(struct igb_adapter *);
109static void igb_free_all_tx_resources(struct igb_adapter *);
110static void igb_free_all_rx_resources(struct igb_adapter *);
111static void igb_setup_mrqc(struct igb_adapter *);
112static int igb_probe(struct pci_dev *, const struct pci_device_id *);
113static void __devexit igb_remove(struct pci_dev *pdev);
114static void igb_init_hw_timer(struct igb_adapter *adapter);
115static int igb_sw_init(struct igb_adapter *);
116static int igb_open(struct net_device *);
117static int igb_close(struct net_device *);
118static void igb_configure_tx(struct igb_adapter *);
119static void igb_configure_rx(struct igb_adapter *);
120static void igb_clean_all_tx_rings(struct igb_adapter *);
121static void igb_clean_all_rx_rings(struct igb_adapter *);
122static void igb_clean_tx_ring(struct igb_ring *);
123static void igb_clean_rx_ring(struct igb_ring *);
124static void igb_set_rx_mode(struct net_device *);
125static void igb_update_phy_info(unsigned long);
126static void igb_watchdog(unsigned long);
127static void igb_watchdog_task(struct work_struct *);
128static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, struct net_device *);
129static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *dev,
130						 struct rtnl_link_stats64 *stats);
131static int igb_change_mtu(struct net_device *, int);
132static int igb_set_mac(struct net_device *, void *);
133static void igb_set_uta(struct igb_adapter *adapter);
134static irqreturn_t igb_intr(int irq, void *);
135static irqreturn_t igb_intr_msi(int irq, void *);
136static irqreturn_t igb_msix_other(int irq, void *);
137static irqreturn_t igb_msix_ring(int irq, void *);
138#ifdef CONFIG_IGB_DCA
139static void igb_update_dca(struct igb_q_vector *);
140static void igb_setup_dca(struct igb_adapter *);
141#endif /* CONFIG_IGB_DCA */
142static int igb_poll(struct napi_struct *, int);
143static bool igb_clean_tx_irq(struct igb_q_vector *);
144static bool igb_clean_rx_irq(struct igb_q_vector *, int);
145static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
146static void igb_tx_timeout(struct net_device *);
147static void igb_reset_task(struct work_struct *);
148static void igb_vlan_mode(struct net_device *netdev, u32 features);
149static void igb_vlan_rx_add_vid(struct net_device *, u16);
150static void igb_vlan_rx_kill_vid(struct net_device *, u16);
151static void igb_restore_vlan(struct igb_adapter *);
152static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
153static void igb_ping_all_vfs(struct igb_adapter *);
154static void igb_msg_task(struct igb_adapter *);
155static void igb_vmm_control(struct igb_adapter *);
156static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
157static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
158static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
159static int igb_ndo_set_vf_vlan(struct net_device *netdev,
160			       int vf, u16 vlan, u8 qos);
161static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
162static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
163				 struct ifla_vf_info *ivi);
164static void igb_check_vf_rate_limit(struct igb_adapter *);
165
166#ifdef CONFIG_PCI_IOV
167static int igb_vf_configure(struct igb_adapter *adapter, int vf);
168static int igb_find_enabled_vfs(struct igb_adapter *adapter);
169static int igb_check_vf_assignment(struct igb_adapter *adapter);
170#endif
171
172#ifdef CONFIG_PM
173static int igb_suspend(struct pci_dev *, pm_message_t);
174static int igb_resume(struct pci_dev *);
175#endif
176static void igb_shutdown(struct pci_dev *);
177#ifdef CONFIG_IGB_DCA
178static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
179static struct notifier_block dca_notifier = {
180	.notifier_call	= igb_notify_dca,
181	.next		= NULL,
182	.priority	= 0
183};
184#endif
185#ifdef CONFIG_NET_POLL_CONTROLLER
186/* for netdump / net console */
187static void igb_netpoll(struct net_device *);
188#endif
189#ifdef CONFIG_PCI_IOV
190static unsigned int max_vfs = 0;
191module_param(max_vfs, uint, 0);
192MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
193                 "per physical function");
194#endif /* CONFIG_PCI_IOV */
195
196static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
197		     pci_channel_state_t);
198static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
199static void igb_io_resume(struct pci_dev *);
200
201static struct pci_error_handlers igb_err_handler = {
202	.error_detected = igb_io_error_detected,
203	.slot_reset = igb_io_slot_reset,
204	.resume = igb_io_resume,
205};
206
207static void igb_init_dmac(struct igb_adapter *adapter, u32 pba);
208
209static struct pci_driver igb_driver = {
210	.name     = igb_driver_name,
211	.id_table = igb_pci_tbl,
212	.probe    = igb_probe,
213	.remove   = __devexit_p(igb_remove),
214#ifdef CONFIG_PM
215	/* Power Management Hooks */
216	.suspend  = igb_suspend,
217	.resume   = igb_resume,
218#endif
219	.shutdown = igb_shutdown,
220	.err_handler = &igb_err_handler
221};
222
223MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
224MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
225MODULE_LICENSE("GPL");
226MODULE_VERSION(DRV_VERSION);
227
228struct igb_reg_info {
229	u32 ofs;
230	char *name;
231};
232
233static const struct igb_reg_info igb_reg_info_tbl[] = {
234
235	/* General Registers */
236	{E1000_CTRL, "CTRL"},
237	{E1000_STATUS, "STATUS"},
238	{E1000_CTRL_EXT, "CTRL_EXT"},
239
240	/* Interrupt Registers */
241	{E1000_ICR, "ICR"},
242
243	/* RX Registers */
244	{E1000_RCTL, "RCTL"},
245	{E1000_RDLEN(0), "RDLEN"},
246	{E1000_RDH(0), "RDH"},
247	{E1000_RDT(0), "RDT"},
248	{E1000_RXDCTL(0), "RXDCTL"},
249	{E1000_RDBAL(0), "RDBAL"},
250	{E1000_RDBAH(0), "RDBAH"},
251
252	/* TX Registers */
253	{E1000_TCTL, "TCTL"},
254	{E1000_TDBAL(0), "TDBAL"},
255	{E1000_TDBAH(0), "TDBAH"},
256	{E1000_TDLEN(0), "TDLEN"},
257	{E1000_TDH(0), "TDH"},
258	{E1000_TDT(0), "TDT"},
259	{E1000_TXDCTL(0), "TXDCTL"},
260	{E1000_TDFH, "TDFH"},
261	{E1000_TDFT, "TDFT"},
262	{E1000_TDFHS, "TDFHS"},
263	{E1000_TDFPC, "TDFPC"},
264
265	/* List Terminator */
266	{}
267};
268
269/*
270 * igb_regdump - register printout routine
271 */
272static void igb_regdump(struct e1000_hw *hw, struct igb_reg_info *reginfo)
273{
274	int n = 0;
275	char rname[16];
276	u32 regs[8];
277
278	switch (reginfo->ofs) {
279	case E1000_RDLEN(0):
280		for (n = 0; n < 4; n++)
281			regs[n] = rd32(E1000_RDLEN(n));
282		break;
283	case E1000_RDH(0):
284		for (n = 0; n < 4; n++)
285			regs[n] = rd32(E1000_RDH(n));
286		break;
287	case E1000_RDT(0):
288		for (n = 0; n < 4; n++)
289			regs[n] = rd32(E1000_RDT(n));
290		break;
291	case E1000_RXDCTL(0):
292		for (n = 0; n < 4; n++)
293			regs[n] = rd32(E1000_RXDCTL(n));
294		break;
295	case E1000_RDBAL(0):
296		for (n = 0; n < 4; n++)
297			regs[n] = rd32(E1000_RDBAL(n));
298		break;
299	case E1000_RDBAH(0):
300		for (n = 0; n < 4; n++)
301			regs[n] = rd32(E1000_RDBAH(n));
302		break;
303	case E1000_TDBAL(0):
304		for (n = 0; n < 4; n++)
305			regs[n] = rd32(E1000_RDBAL(n));
306		break;
307	case E1000_TDBAH(0):
308		for (n = 0; n < 4; n++)
309			regs[n] = rd32(E1000_TDBAH(n));
310		break;
311	case E1000_TDLEN(0):
312		for (n = 0; n < 4; n++)
313			regs[n] = rd32(E1000_TDLEN(n));
314		break;
315	case E1000_TDH(0):
316		for (n = 0; n < 4; n++)
317			regs[n] = rd32(E1000_TDH(n));
318		break;
319	case E1000_TDT(0):
320		for (n = 0; n < 4; n++)
321			regs[n] = rd32(E1000_TDT(n));
322		break;
323	case E1000_TXDCTL(0):
324		for (n = 0; n < 4; n++)
325			regs[n] = rd32(E1000_TXDCTL(n));
326		break;
327	default:
328		printk(KERN_INFO "%-15s %08x\n",
329			reginfo->name, rd32(reginfo->ofs));
330		return;
331	}
332
333	snprintf(rname, 16, "%s%s", reginfo->name, "[0-3]");
334	printk(KERN_INFO "%-15s ", rname);
335	for (n = 0; n < 4; n++)
336		printk(KERN_CONT "%08x ", regs[n]);
337	printk(KERN_CONT "\n");
338}
339
340/*
341 * igb_dump - Print registers, tx-rings and rx-rings
342 */
343static void igb_dump(struct igb_adapter *adapter)
344{
345	struct net_device *netdev = adapter->netdev;
346	struct e1000_hw *hw = &adapter->hw;
347	struct igb_reg_info *reginfo;
348	struct igb_ring *tx_ring;
349	union e1000_adv_tx_desc *tx_desc;
350	struct my_u0 { u64 a; u64 b; } *u0;
351	struct igb_ring *rx_ring;
352	union e1000_adv_rx_desc *rx_desc;
353	u32 staterr;
354	u16 i, n;
355
356	if (!netif_msg_hw(adapter))
357		return;
358
359	/* Print netdevice Info */
360	if (netdev) {
361		dev_info(&adapter->pdev->dev, "Net device Info\n");
362		printk(KERN_INFO "Device Name     state            "
363			"trans_start      last_rx\n");
364		printk(KERN_INFO "%-15s %016lX %016lX %016lX\n",
365		netdev->name,
366		netdev->state,
367		netdev->trans_start,
368		netdev->last_rx);
369	}
370
371	/* Print Registers */
372	dev_info(&adapter->pdev->dev, "Register Dump\n");
373	printk(KERN_INFO " Register Name   Value\n");
374	for (reginfo = (struct igb_reg_info *)igb_reg_info_tbl;
375	     reginfo->name; reginfo++) {
376		igb_regdump(hw, reginfo);
377	}
378
379	/* Print TX Ring Summary */
380	if (!netdev || !netif_running(netdev))
381		goto exit;
382
383	dev_info(&adapter->pdev->dev, "TX Rings Summary\n");
384	printk(KERN_INFO "Queue [NTU] [NTC] [bi(ntc)->dma  ]"
385		" leng ntw timestamp\n");
386	for (n = 0; n < adapter->num_tx_queues; n++) {
387		struct igb_tx_buffer *buffer_info;
388		tx_ring = adapter->tx_ring[n];
389		buffer_info = &tx_ring->tx_buffer_info[tx_ring->next_to_clean];
390		printk(KERN_INFO " %5d %5X %5X %016llX %04X %p %016llX\n",
391			   n, tx_ring->next_to_use, tx_ring->next_to_clean,
392			   (u64)buffer_info->dma,
393			   buffer_info->length,
394			   buffer_info->next_to_watch,
395			   (u64)buffer_info->time_stamp);
396	}
397
398	/* Print TX Rings */
399	if (!netif_msg_tx_done(adapter))
400		goto rx_ring_summary;
401
402	dev_info(&adapter->pdev->dev, "TX Rings Dump\n");
403
404	/* Transmit Descriptor Formats
405	 *
406	 * Advanced Transmit Descriptor
407	 *   +--------------------------------------------------------------+
408	 * 0 |         Buffer Address [63:0]                                |
409	 *   +--------------------------------------------------------------+
410	 * 8 | PAYLEN  | PORTS  |CC|IDX | STA | DCMD  |DTYP|MAC|RSV| DTALEN |
411	 *   +--------------------------------------------------------------+
412	 *   63      46 45    40 39 38 36 35 32 31   24             15       0
413	 */
414
415	for (n = 0; n < adapter->num_tx_queues; n++) {
416		tx_ring = adapter->tx_ring[n];
417		printk(KERN_INFO "------------------------------------\n");
418		printk(KERN_INFO "TX QUEUE INDEX = %d\n", tx_ring->queue_index);
419		printk(KERN_INFO "------------------------------------\n");
420		printk(KERN_INFO "T [desc]     [address 63:0  ] "
421			"[PlPOCIStDDM Ln] [bi->dma       ] "
422			"leng  ntw timestamp        bi->skb\n");
423
424		for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) {
425			struct igb_tx_buffer *buffer_info;
426			tx_desc = IGB_TX_DESC(tx_ring, i);
427			buffer_info = &tx_ring->tx_buffer_info[i];
428			u0 = (struct my_u0 *)tx_desc;
429			printk(KERN_INFO "T [0x%03X]    %016llX %016llX %016llX"
430				" %04X  %p %016llX %p", i,
431				le64_to_cpu(u0->a),
432				le64_to_cpu(u0->b),
433				(u64)buffer_info->dma,
434				buffer_info->length,
435				buffer_info->next_to_watch,
436				(u64)buffer_info->time_stamp,
437				buffer_info->skb);
438			if (i == tx_ring->next_to_use &&
439				i == tx_ring->next_to_clean)
440				printk(KERN_CONT " NTC/U\n");
441			else if (i == tx_ring->next_to_use)
442				printk(KERN_CONT " NTU\n");
443			else if (i == tx_ring->next_to_clean)
444				printk(KERN_CONT " NTC\n");
445			else
446				printk(KERN_CONT "\n");
447
448			if (netif_msg_pktdata(adapter) && buffer_info->dma != 0)
449				print_hex_dump(KERN_INFO, "",
450					DUMP_PREFIX_ADDRESS,
451					16, 1, phys_to_virt(buffer_info->dma),
452					buffer_info->length, true);
453		}
454	}
455
456	/* Print RX Rings Summary */
457rx_ring_summary:
458	dev_info(&adapter->pdev->dev, "RX Rings Summary\n");
459	printk(KERN_INFO "Queue [NTU] [NTC]\n");
460	for (n = 0; n < adapter->num_rx_queues; n++) {
461		rx_ring = adapter->rx_ring[n];
462		printk(KERN_INFO " %5d %5X %5X\n", n,
463			   rx_ring->next_to_use, rx_ring->next_to_clean);
464	}
465
466	/* Print RX Rings */
467	if (!netif_msg_rx_status(adapter))
468		goto exit;
469
470	dev_info(&adapter->pdev->dev, "RX Rings Dump\n");
471
472	/* Advanced Receive Descriptor (Read) Format
473	 *    63                                           1        0
474	 *    +-----------------------------------------------------+
475	 *  0 |       Packet Buffer Address [63:1]           |A0/NSE|
476	 *    +----------------------------------------------+------+
477	 *  8 |       Header Buffer Address [63:1]           |  DD  |
478	 *    +-----------------------------------------------------+
479	 *
480	 *
481	 * Advanced Receive Descriptor (Write-Back) Format
482	 *
483	 *   63       48 47    32 31  30      21 20 17 16   4 3     0
484	 *   +------------------------------------------------------+
485	 * 0 | Packet     IP     |SPH| HDR_LEN   | RSV|Packet|  RSS |
486	 *   | Checksum   Ident  |   |           |    | Type | Type |
487	 *   +------------------------------------------------------+
488	 * 8 | VLAN Tag | Length | Extended Error | Extended Status |
489	 *   +------------------------------------------------------+
490	 *   63       48 47    32 31            20 19               0
491	 */
492
493	for (n = 0; n < adapter->num_rx_queues; n++) {
494		rx_ring = adapter->rx_ring[n];
495		printk(KERN_INFO "------------------------------------\n");
496		printk(KERN_INFO "RX QUEUE INDEX = %d\n", rx_ring->queue_index);
497		printk(KERN_INFO "------------------------------------\n");
498		printk(KERN_INFO "R  [desc]      [ PktBuf     A0] "
499			"[  HeadBuf   DD] [bi->dma       ] [bi->skb] "
500			"<-- Adv Rx Read format\n");
501		printk(KERN_INFO "RWB[desc]      [PcsmIpSHl PtRs] "
502			"[vl er S cks ln] ---------------- [bi->skb] "
503			"<-- Adv Rx Write-Back format\n");
504
505		for (i = 0; i < rx_ring->count; i++) {
506			struct igb_rx_buffer *buffer_info;
507			buffer_info = &rx_ring->rx_buffer_info[i];
508			rx_desc = IGB_RX_DESC(rx_ring, i);
509			u0 = (struct my_u0 *)rx_desc;
510			staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
511			if (staterr & E1000_RXD_STAT_DD) {
512				/* Descriptor Done */
513				printk(KERN_INFO "RWB[0x%03X]     %016llX "
514					"%016llX ---------------- %p", i,
515					le64_to_cpu(u0->a),
516					le64_to_cpu(u0->b),
517					buffer_info->skb);
518			} else {
519				printk(KERN_INFO "R  [0x%03X]     %016llX "
520					"%016llX %016llX %p", i,
521					le64_to_cpu(u0->a),
522					le64_to_cpu(u0->b),
523					(u64)buffer_info->dma,
524					buffer_info->skb);
525
526				if (netif_msg_pktdata(adapter)) {
527					print_hex_dump(KERN_INFO, "",
528						DUMP_PREFIX_ADDRESS,
529						16, 1,
530						phys_to_virt(buffer_info->dma),
531						IGB_RX_HDR_LEN, true);
532					print_hex_dump(KERN_INFO, "",
533					  DUMP_PREFIX_ADDRESS,
534					  16, 1,
535					  phys_to_virt(
536					    buffer_info->page_dma +
537					    buffer_info->page_offset),
538					  PAGE_SIZE/2, true);
539				}
540			}
541
542			if (i == rx_ring->next_to_use)
543				printk(KERN_CONT " NTU\n");
544			else if (i == rx_ring->next_to_clean)
545				printk(KERN_CONT " NTC\n");
546			else
547				printk(KERN_CONT "\n");
548
549		}
550	}
551
552exit:
553	return;
554}
555
556
557/**
558 * igb_read_clock - read raw cycle counter (to be used by time counter)
559 */
560static cycle_t igb_read_clock(const struct cyclecounter *tc)
561{
562	struct igb_adapter *adapter =
563		container_of(tc, struct igb_adapter, cycles);
564	struct e1000_hw *hw = &adapter->hw;
565	u64 stamp = 0;
566	int shift = 0;
567
568	/*
569	 * The timestamp latches on lowest register read. For the 82580
570	 * the lowest register is SYSTIMR instead of SYSTIML.  However we never
571	 * adjusted TIMINCA so SYSTIMR will just read as all 0s so ignore it.
572	 */
573	if (hw->mac.type >= e1000_82580) {
574		stamp = rd32(E1000_SYSTIMR) >> 8;
575		shift = IGB_82580_TSYNC_SHIFT;
576	}
577
578	stamp |= (u64)rd32(E1000_SYSTIML) << shift;
579	stamp |= (u64)rd32(E1000_SYSTIMH) << (shift + 32);
580	return stamp;
581}
582
583/**
584 * igb_get_hw_dev - return device
585 * used by hardware layer to print debugging information
586 **/
587struct net_device *igb_get_hw_dev(struct e1000_hw *hw)
588{
589	struct igb_adapter *adapter = hw->back;
590	return adapter->netdev;
591}
592
593/**
594 * igb_init_module - Driver Registration Routine
595 *
596 * igb_init_module is the first routine called when the driver is
597 * loaded. All it does is register with the PCI subsystem.
598 **/
599static int __init igb_init_module(void)
600{
601	int ret;
602	printk(KERN_INFO "%s - version %s\n",
603	       igb_driver_string, igb_driver_version);
604
605	printk(KERN_INFO "%s\n", igb_copyright);
606
607#ifdef CONFIG_IGB_DCA
608	dca_register_notify(&dca_notifier);
609#endif
610	ret = pci_register_driver(&igb_driver);
611	return ret;
612}
613
614module_init(igb_init_module);
615
616/**
617 * igb_exit_module - Driver Exit Cleanup Routine
618 *
619 * igb_exit_module is called just before the driver is removed
620 * from memory.
621 **/
622static void __exit igb_exit_module(void)
623{
624#ifdef CONFIG_IGB_DCA
625	dca_unregister_notify(&dca_notifier);
626#endif
627	pci_unregister_driver(&igb_driver);
628}
629
630module_exit(igb_exit_module);
631
632#define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
633/**
634 * igb_cache_ring_register - Descriptor ring to register mapping
635 * @adapter: board private structure to initialize
636 *
637 * Once we know the feature-set enabled for the device, we'll cache
638 * the register offset the descriptor ring is assigned to.
639 **/
640static void igb_cache_ring_register(struct igb_adapter *adapter)
641{
642	int i = 0, j = 0;
643	u32 rbase_offset = adapter->vfs_allocated_count;
644
645	switch (adapter->hw.mac.type) {
646	case e1000_82576:
647		/* The queues are allocated for virtualization such that VF 0
648		 * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
649		 * In order to avoid collision we start at the first free queue
650		 * and continue consuming queues in the same sequence
651		 */
652		if (adapter->vfs_allocated_count) {
653			for (; i < adapter->rss_queues; i++)
654				adapter->rx_ring[i]->reg_idx = rbase_offset +
655				                               Q_IDX_82576(i);
656		}
657	case e1000_82575:
658	case e1000_82580:
659	case e1000_i350:
660	default:
661		for (; i < adapter->num_rx_queues; i++)
662			adapter->rx_ring[i]->reg_idx = rbase_offset + i;
663		for (; j < adapter->num_tx_queues; j++)
664			adapter->tx_ring[j]->reg_idx = rbase_offset + j;
665		break;
666	}
667}
668
669static void igb_free_queues(struct igb_adapter *adapter)
670{
671	int i;
672
673	for (i = 0; i < adapter->num_tx_queues; i++) {
674		kfree(adapter->tx_ring[i]);
675		adapter->tx_ring[i] = NULL;
676	}
677	for (i = 0; i < adapter->num_rx_queues; i++) {
678		kfree(adapter->rx_ring[i]);
679		adapter->rx_ring[i] = NULL;
680	}
681	adapter->num_rx_queues = 0;
682	adapter->num_tx_queues = 0;
683}
684
685/**
686 * igb_alloc_queues - Allocate memory for all rings
687 * @adapter: board private structure to initialize
688 *
689 * We allocate one ring per queue at run-time since we don't know the
690 * number of queues at compile-time.
691 **/
692static int igb_alloc_queues(struct igb_adapter *adapter)
693{
694	struct igb_ring *ring;
695	int i;
696	int orig_node = adapter->node;
697
698	for (i = 0; i < adapter->num_tx_queues; i++) {
699		if (orig_node == -1) {
700			int cur_node = next_online_node(adapter->node);
701			if (cur_node == MAX_NUMNODES)
702				cur_node = first_online_node;
703			adapter->node = cur_node;
704		}
705		ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL,
706				    adapter->node);
707		if (!ring)
708			ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
709		if (!ring)
710			goto err;
711		ring->count = adapter->tx_ring_count;
712		ring->queue_index = i;
713		ring->dev = &adapter->pdev->dev;
714		ring->netdev = adapter->netdev;
715		ring->numa_node = adapter->node;
716		/* For 82575, context index must be unique per ring. */
717		if (adapter->hw.mac.type == e1000_82575)
718			set_bit(IGB_RING_FLAG_TX_CTX_IDX, &ring->flags);
719		adapter->tx_ring[i] = ring;
720	}
721	/* Restore the adapter's original node */
722	adapter->node = orig_node;
723
724	for (i = 0; i < adapter->num_rx_queues; i++) {
725		if (orig_node == -1) {
726			int cur_node = next_online_node(adapter->node);
727			if (cur_node == MAX_NUMNODES)
728				cur_node = first_online_node;
729			adapter->node = cur_node;
730		}
731		ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL,
732				    adapter->node);
733		if (!ring)
734			ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
735		if (!ring)
736			goto err;
737		ring->count = adapter->rx_ring_count;
738		ring->queue_index = i;
739		ring->dev = &adapter->pdev->dev;
740		ring->netdev = adapter->netdev;
741		ring->numa_node = adapter->node;
742		/* set flag indicating ring supports SCTP checksum offload */
743		if (adapter->hw.mac.type >= e1000_82576)
744			set_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags);
745
746		/* On i350, loopback VLAN packets have the tag byte-swapped. */
747		if (adapter->hw.mac.type == e1000_i350)
748			set_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &ring->flags);
749
750		adapter->rx_ring[i] = ring;
751	}
752	/* Restore the adapter's original node */
753	adapter->node = orig_node;
754
755	igb_cache_ring_register(adapter);
756
757	return 0;
758
759err:
760	/* Restore the adapter's original node */
761	adapter->node = orig_node;
762	igb_free_queues(adapter);
763
764	return -ENOMEM;
765}
766
767/**
768 *  igb_write_ivar - configure ivar for given MSI-X vector
769 *  @hw: pointer to the HW structure
770 *  @msix_vector: vector number we are allocating to a given ring
771 *  @index: row index of IVAR register to write within IVAR table
772 *  @offset: column offset of in IVAR, should be multiple of 8
773 *
774 *  This function is intended to handle the writing of the IVAR register
775 *  for adapters 82576 and newer.  The IVAR table consists of 2 columns,
776 *  each containing an cause allocation for an Rx and Tx ring, and a
777 *  variable number of rows depending on the number of queues supported.
778 **/
779static void igb_write_ivar(struct e1000_hw *hw, int msix_vector,
780			   int index, int offset)
781{
782	u32 ivar = array_rd32(E1000_IVAR0, index);
783
784	/* clear any bits that are currently set */
785	ivar &= ~((u32)0xFF << offset);
786
787	/* write vector and valid bit */
788	ivar |= (msix_vector | E1000_IVAR_VALID) << offset;
789
790	array_wr32(E1000_IVAR0, index, ivar);
791}
792
793#define IGB_N0_QUEUE -1
794static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
795{
796	struct igb_adapter *adapter = q_vector->adapter;
797	struct e1000_hw *hw = &adapter->hw;
798	int rx_queue = IGB_N0_QUEUE;
799	int tx_queue = IGB_N0_QUEUE;
800	u32 msixbm = 0;
801
802	if (q_vector->rx.ring)
803		rx_queue = q_vector->rx.ring->reg_idx;
804	if (q_vector->tx.ring)
805		tx_queue = q_vector->tx.ring->reg_idx;
806
807	switch (hw->mac.type) {
808	case e1000_82575:
809		/* The 82575 assigns vectors using a bitmask, which matches the
810		   bitmask for the EICR/EIMS/EIMC registers.  To assign one
811		   or more queues to a vector, we write the appropriate bits
812		   into the MSIXBM register for that vector. */
813		if (rx_queue > IGB_N0_QUEUE)
814			msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
815		if (tx_queue > IGB_N0_QUEUE)
816			msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
817		if (!adapter->msix_entries && msix_vector == 0)
818			msixbm |= E1000_EIMS_OTHER;
819		array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
820		q_vector->eims_value = msixbm;
821		break;
822	case e1000_82576:
823		/*
824		 * 82576 uses a table that essentially consists of 2 columns
825		 * with 8 rows.  The ordering is column-major so we use the
826		 * lower 3 bits as the row index, and the 4th bit as the
827		 * column offset.
828		 */
829		if (rx_queue > IGB_N0_QUEUE)
830			igb_write_ivar(hw, msix_vector,
831				       rx_queue & 0x7,
832				       (rx_queue & 0x8) << 1);
833		if (tx_queue > IGB_N0_QUEUE)
834			igb_write_ivar(hw, msix_vector,
835				       tx_queue & 0x7,
836				       ((tx_queue & 0x8) << 1) + 8);
837		q_vector->eims_value = 1 << msix_vector;
838		break;
839	case e1000_82580:
840	case e1000_i350:
841		/*
842		 * On 82580 and newer adapters the scheme is similar to 82576
843		 * however instead of ordering column-major we have things
844		 * ordered row-major.  So we traverse the table by using
845		 * bit 0 as the column offset, and the remaining bits as the
846		 * row index.
847		 */
848		if (rx_queue > IGB_N0_QUEUE)
849			igb_write_ivar(hw, msix_vector,
850				       rx_queue >> 1,
851				       (rx_queue & 0x1) << 4);
852		if (tx_queue > IGB_N0_QUEUE)
853			igb_write_ivar(hw, msix_vector,
854				       tx_queue >> 1,
855				       ((tx_queue & 0x1) << 4) + 8);
856		q_vector->eims_value = 1 << msix_vector;
857		break;
858	default:
859		BUG();
860		break;
861	}
862
863	/* add q_vector eims value to global eims_enable_mask */
864	adapter->eims_enable_mask |= q_vector->eims_value;
865
866	/* configure q_vector to set itr on first interrupt */
867	q_vector->set_itr = 1;
868}
869
870/**
871 * igb_configure_msix - Configure MSI-X hardware
872 *
873 * igb_configure_msix sets up the hardware to properly
874 * generate MSI-X interrupts.
875 **/
876static void igb_configure_msix(struct igb_adapter *adapter)
877{
878	u32 tmp;
879	int i, vector = 0;
880	struct e1000_hw *hw = &adapter->hw;
881
882	adapter->eims_enable_mask = 0;
883
884	/* set vector for other causes, i.e. link changes */
885	switch (hw->mac.type) {
886	case e1000_82575:
887		tmp = rd32(E1000_CTRL_EXT);
888		/* enable MSI-X PBA support*/
889		tmp |= E1000_CTRL_EXT_PBA_CLR;
890
891		/* Auto-Mask interrupts upon ICR read. */
892		tmp |= E1000_CTRL_EXT_EIAME;
893		tmp |= E1000_CTRL_EXT_IRCA;
894
895		wr32(E1000_CTRL_EXT, tmp);
896
897		/* enable msix_other interrupt */
898		array_wr32(E1000_MSIXBM(0), vector++,
899		                      E1000_EIMS_OTHER);
900		adapter->eims_other = E1000_EIMS_OTHER;
901
902		break;
903
904	case e1000_82576:
905	case e1000_82580:
906	case e1000_i350:
907		/* Turn on MSI-X capability first, or our settings
908		 * won't stick.  And it will take days to debug. */
909		wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
910		                E1000_GPIE_PBA | E1000_GPIE_EIAME |
911		                E1000_GPIE_NSICR);
912
913		/* enable msix_other interrupt */
914		adapter->eims_other = 1 << vector;
915		tmp = (vector++ | E1000_IVAR_VALID) << 8;
916
917		wr32(E1000_IVAR_MISC, tmp);
918		break;
919	default:
920		/* do nothing, since nothing else supports MSI-X */
921		break;
922	} /* switch (hw->mac.type) */
923
924	adapter->eims_enable_mask |= adapter->eims_other;
925
926	for (i = 0; i < adapter->num_q_vectors; i++)
927		igb_assign_vector(adapter->q_vector[i], vector++);
928
929	wrfl();
930}
931
932/**
933 * igb_request_msix - Initialize MSI-X interrupts
934 *
935 * igb_request_msix allocates MSI-X vectors and requests interrupts from the
936 * kernel.
937 **/
938static int igb_request_msix(struct igb_adapter *adapter)
939{
940	struct net_device *netdev = adapter->netdev;
941	struct e1000_hw *hw = &adapter->hw;
942	int i, err = 0, vector = 0;
943
944	err = request_irq(adapter->msix_entries[vector].vector,
945	                  igb_msix_other, 0, netdev->name, adapter);
946	if (err)
947		goto out;
948	vector++;
949
950	for (i = 0; i < adapter->num_q_vectors; i++) {
951		struct igb_q_vector *q_vector = adapter->q_vector[i];
952
953		q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
954
955		if (q_vector->rx.ring && q_vector->tx.ring)
956			sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
957				q_vector->rx.ring->queue_index);
958		else if (q_vector->tx.ring)
959			sprintf(q_vector->name, "%s-tx-%u", netdev->name,
960				q_vector->tx.ring->queue_index);
961		else if (q_vector->rx.ring)
962			sprintf(q_vector->name, "%s-rx-%u", netdev->name,
963				q_vector->rx.ring->queue_index);
964		else
965			sprintf(q_vector->name, "%s-unused", netdev->name);
966
967		err = request_irq(adapter->msix_entries[vector].vector,
968		                  igb_msix_ring, 0, q_vector->name,
969		                  q_vector);
970		if (err)
971			goto out;
972		vector++;
973	}
974
975	igb_configure_msix(adapter);
976	return 0;
977out:
978	return err;
979}
980
981static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
982{
983	if (adapter->msix_entries) {
984		pci_disable_msix(adapter->pdev);
985		kfree(adapter->msix_entries);
986		adapter->msix_entries = NULL;
987	} else if (adapter->flags & IGB_FLAG_HAS_MSI) {
988		pci_disable_msi(adapter->pdev);
989	}
990}
991
992/**
993 * igb_free_q_vectors - Free memory allocated for interrupt vectors
994 * @adapter: board private structure to initialize
995 *
996 * This function frees the memory allocated to the q_vectors.  In addition if
997 * NAPI is enabled it will delete any references to the NAPI struct prior
998 * to freeing the q_vector.
999 **/
1000static void igb_free_q_vectors(struct igb_adapter *adapter)
1001{
1002	int v_idx;
1003
1004	for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1005		struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1006		adapter->q_vector[v_idx] = NULL;
1007		if (!q_vector)
1008			continue;
1009		netif_napi_del(&q_vector->napi);
1010		kfree(q_vector);
1011	}
1012	adapter->num_q_vectors = 0;
1013}
1014
1015/**
1016 * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
1017 *
1018 * This function resets the device so that it has 0 rx queues, tx queues, and
1019 * MSI-X interrupts allocated.
1020 */
1021static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
1022{
1023	igb_free_queues(adapter);
1024	igb_free_q_vectors(adapter);
1025	igb_reset_interrupt_capability(adapter);
1026}
1027
1028/**
1029 * igb_set_interrupt_capability - set MSI or MSI-X if supported
1030 *
1031 * Attempt to configure interrupts using the best available
1032 * capabilities of the hardware and kernel.
1033 **/
1034static int igb_set_interrupt_capability(struct igb_adapter *adapter)
1035{
1036	int err;
1037	int numvecs, i;
1038
1039	/* Number of supported queues. */
1040	adapter->num_rx_queues = adapter->rss_queues;
1041	if (adapter->vfs_allocated_count)
1042		adapter->num_tx_queues = 1;
1043	else
1044		adapter->num_tx_queues = adapter->rss_queues;
1045
1046	/* start with one vector for every rx queue */
1047	numvecs = adapter->num_rx_queues;
1048
1049	/* if tx handler is separate add 1 for every tx queue */
1050	if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
1051		numvecs += adapter->num_tx_queues;
1052
1053	/* store the number of vectors reserved for queues */
1054	adapter->num_q_vectors = numvecs;
1055
1056	/* add 1 vector for link status interrupts */
1057	numvecs++;
1058	adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
1059					GFP_KERNEL);
1060	if (!adapter->msix_entries)
1061		goto msi_only;
1062
1063	for (i = 0; i < numvecs; i++)
1064		adapter->msix_entries[i].entry = i;
1065
1066	err = pci_enable_msix(adapter->pdev,
1067			      adapter->msix_entries,
1068			      numvecs);
1069	if (err == 0)
1070		goto out;
1071
1072	igb_reset_interrupt_capability(adapter);
1073
1074	/* If we can't do MSI-X, try MSI */
1075msi_only:
1076#ifdef CONFIG_PCI_IOV
1077	/* disable SR-IOV for non MSI-X configurations */
1078	if (adapter->vf_data) {
1079		struct e1000_hw *hw = &adapter->hw;
1080		/* disable iov and allow time for transactions to clear */
1081		pci_disable_sriov(adapter->pdev);
1082		msleep(500);
1083
1084		kfree(adapter->vf_data);
1085		adapter->vf_data = NULL;
1086		wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1087		wrfl();
1088		msleep(100);
1089		dev_info(&adapter->pdev->dev, "IOV Disabled\n");
1090	}
1091#endif
1092	adapter->vfs_allocated_count = 0;
1093	adapter->rss_queues = 1;
1094	adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1095	adapter->num_rx_queues = 1;
1096	adapter->num_tx_queues = 1;
1097	adapter->num_q_vectors = 1;
1098	if (!pci_enable_msi(adapter->pdev))
1099		adapter->flags |= IGB_FLAG_HAS_MSI;
1100out:
1101	/* Notify the stack of the (possibly) reduced queue counts. */
1102	netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues);
1103	return netif_set_real_num_rx_queues(adapter->netdev,
1104					    adapter->num_rx_queues);
1105}
1106
1107/**
1108 * igb_alloc_q_vectors - Allocate memory for interrupt vectors
1109 * @adapter: board private structure to initialize
1110 *
1111 * We allocate one q_vector per queue interrupt.  If allocation fails we
1112 * return -ENOMEM.
1113 **/
1114static int igb_alloc_q_vectors(struct igb_adapter *adapter)
1115{
1116	struct igb_q_vector *q_vector;
1117	struct e1000_hw *hw = &adapter->hw;
1118	int v_idx;
1119	int orig_node = adapter->node;
1120
1121	for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1122		if ((adapter->num_q_vectors == (adapter->num_rx_queues +
1123						adapter->num_tx_queues)) &&
1124		    (adapter->num_rx_queues == v_idx))
1125			adapter->node = orig_node;
1126		if (orig_node == -1) {
1127			int cur_node = next_online_node(adapter->node);
1128			if (cur_node == MAX_NUMNODES)
1129				cur_node = first_online_node;
1130			adapter->node = cur_node;
1131		}
1132		q_vector = kzalloc_node(sizeof(struct igb_q_vector), GFP_KERNEL,
1133					adapter->node);
1134		if (!q_vector)
1135			q_vector = kzalloc(sizeof(struct igb_q_vector),
1136					   GFP_KERNEL);
1137		if (!q_vector)
1138			goto err_out;
1139		q_vector->adapter = adapter;
1140		q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
1141		q_vector->itr_val = IGB_START_ITR;
1142		netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
1143		adapter->q_vector[v_idx] = q_vector;
1144	}
1145	/* Restore the adapter's original node */
1146	adapter->node = orig_node;
1147
1148	return 0;
1149
1150err_out:
1151	/* Restore the adapter's original node */
1152	adapter->node = orig_node;
1153	igb_free_q_vectors(adapter);
1154	return -ENOMEM;
1155}
1156
1157static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
1158                                      int ring_idx, int v_idx)
1159{
1160	struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1161
1162	q_vector->rx.ring = adapter->rx_ring[ring_idx];
1163	q_vector->rx.ring->q_vector = q_vector;
1164	q_vector->rx.count++;
1165	q_vector->itr_val = adapter->rx_itr_setting;
1166	if (q_vector->itr_val && q_vector->itr_val <= 3)
1167		q_vector->itr_val = IGB_START_ITR;
1168}
1169
1170static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
1171                                      int ring_idx, int v_idx)
1172{
1173	struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1174
1175	q_vector->tx.ring = adapter->tx_ring[ring_idx];
1176	q_vector->tx.ring->q_vector = q_vector;
1177	q_vector->tx.count++;
1178	q_vector->itr_val = adapter->tx_itr_setting;
1179	q_vector->tx.work_limit = adapter->tx_work_limit;
1180	if (q_vector->itr_val && q_vector->itr_val <= 3)
1181		q_vector->itr_val = IGB_START_ITR;
1182}
1183
1184/**
1185 * igb_map_ring_to_vector - maps allocated queues to vectors
1186 *
1187 * This function maps the recently allocated queues to vectors.
1188 **/
1189static int igb_map_ring_to_vector(struct igb_adapter *adapter)
1190{
1191	int i;
1192	int v_idx = 0;
1193
1194	if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
1195	    (adapter->num_q_vectors < adapter->num_tx_queues))
1196		return -ENOMEM;
1197
1198	if (adapter->num_q_vectors >=
1199	    (adapter->num_rx_queues + adapter->num_tx_queues)) {
1200		for (i = 0; i < adapter->num_rx_queues; i++)
1201			igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1202		for (i = 0; i < adapter->num_tx_queues; i++)
1203			igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1204	} else {
1205		for (i = 0; i < adapter->num_rx_queues; i++) {
1206			if (i < adapter->num_tx_queues)
1207				igb_map_tx_ring_to_vector(adapter, i, v_idx);
1208			igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1209		}
1210		for (; i < adapter->num_tx_queues; i++)
1211			igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1212	}
1213	return 0;
1214}
1215
1216/**
1217 * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
1218 *
1219 * This function initializes the interrupts and allocates all of the queues.
1220 **/
1221static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
1222{
1223	struct pci_dev *pdev = adapter->pdev;
1224	int err;
1225
1226	err = igb_set_interrupt_capability(adapter);
1227	if (err)
1228		return err;
1229
1230	err = igb_alloc_q_vectors(adapter);
1231	if (err) {
1232		dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
1233		goto err_alloc_q_vectors;
1234	}
1235
1236	err = igb_alloc_queues(adapter);
1237	if (err) {
1238		dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1239		goto err_alloc_queues;
1240	}
1241
1242	err = igb_map_ring_to_vector(adapter);
1243	if (err) {
1244		dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
1245		goto err_map_queues;
1246	}
1247
1248
1249	return 0;
1250err_map_queues:
1251	igb_free_queues(adapter);
1252err_alloc_queues:
1253	igb_free_q_vectors(adapter);
1254err_alloc_q_vectors:
1255	igb_reset_interrupt_capability(adapter);
1256	return err;
1257}
1258
1259/**
1260 * igb_request_irq - initialize interrupts
1261 *
1262 * Attempts to configure interrupts using the best available
1263 * capabilities of the hardware and kernel.
1264 **/
1265static int igb_request_irq(struct igb_adapter *adapter)
1266{
1267	struct net_device *netdev = adapter->netdev;
1268	struct pci_dev *pdev = adapter->pdev;
1269	int err = 0;
1270
1271	if (adapter->msix_entries) {
1272		err = igb_request_msix(adapter);
1273		if (!err)
1274			goto request_done;
1275		/* fall back to MSI */
1276		igb_clear_interrupt_scheme(adapter);
1277		if (!pci_enable_msi(pdev))
1278			adapter->flags |= IGB_FLAG_HAS_MSI;
1279		igb_free_all_tx_resources(adapter);
1280		igb_free_all_rx_resources(adapter);
1281		adapter->num_tx_queues = 1;
1282		adapter->num_rx_queues = 1;
1283		adapter->num_q_vectors = 1;
1284		err = igb_alloc_q_vectors(adapter);
1285		if (err) {
1286			dev_err(&pdev->dev,
1287			        "Unable to allocate memory for vectors\n");
1288			goto request_done;
1289		}
1290		err = igb_alloc_queues(adapter);
1291		if (err) {
1292			dev_err(&pdev->dev,
1293			        "Unable to allocate memory for queues\n");
1294			igb_free_q_vectors(adapter);
1295			goto request_done;
1296		}
1297		igb_setup_all_tx_resources(adapter);
1298		igb_setup_all_rx_resources(adapter);
1299	}
1300
1301	igb_assign_vector(adapter->q_vector[0], 0);
1302
1303	if (adapter->flags & IGB_FLAG_HAS_MSI) {
1304		err = request_irq(pdev->irq, igb_intr_msi, 0,
1305				  netdev->name, adapter);
1306		if (!err)
1307			goto request_done;
1308
1309		/* fall back to legacy interrupts */
1310		igb_reset_interrupt_capability(adapter);
1311		adapter->flags &= ~IGB_FLAG_HAS_MSI;
1312	}
1313
1314	err = request_irq(pdev->irq, igb_intr, IRQF_SHARED,
1315			  netdev->name, adapter);
1316
1317	if (err)
1318		dev_err(&pdev->dev, "Error %d getting interrupt\n",
1319			err);
1320
1321request_done:
1322	return err;
1323}
1324
1325static void igb_free_irq(struct igb_adapter *adapter)
1326{
1327	if (adapter->msix_entries) {
1328		int vector = 0, i;
1329
1330		free_irq(adapter->msix_entries[vector++].vector, adapter);
1331
1332		for (i = 0; i < adapter->num_q_vectors; i++)
1333			free_irq(adapter->msix_entries[vector++].vector,
1334				 adapter->q_vector[i]);
1335	} else {
1336		free_irq(adapter->pdev->irq, adapter);
1337	}
1338}
1339
1340/**
1341 * igb_irq_disable - Mask off interrupt generation on the NIC
1342 * @adapter: board private structure
1343 **/
1344static void igb_irq_disable(struct igb_adapter *adapter)
1345{
1346	struct e1000_hw *hw = &adapter->hw;
1347
1348	/*
1349	 * we need to be careful when disabling interrupts.  The VFs are also
1350	 * mapped into these registers and so clearing the bits can cause
1351	 * issues on the VF drivers so we only need to clear what we set
1352	 */
1353	if (adapter->msix_entries) {
1354		u32 regval = rd32(E1000_EIAM);
1355		wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
1356		wr32(E1000_EIMC, adapter->eims_enable_mask);
1357		regval = rd32(E1000_EIAC);
1358		wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
1359	}
1360
1361	wr32(E1000_IAM, 0);
1362	wr32(E1000_IMC, ~0);
1363	wrfl();
1364	if (adapter->msix_entries) {
1365		int i;
1366		for (i = 0; i < adapter->num_q_vectors; i++)
1367			synchronize_irq(adapter->msix_entries[i].vector);
1368	} else {
1369		synchronize_irq(adapter->pdev->irq);
1370	}
1371}
1372
1373/**
1374 * igb_irq_enable - Enable default interrupt generation settings
1375 * @adapter: board private structure
1376 **/
1377static void igb_irq_enable(struct igb_adapter *adapter)
1378{
1379	struct e1000_hw *hw = &adapter->hw;
1380
1381	if (adapter->msix_entries) {
1382		u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC | E1000_IMS_DRSTA;
1383		u32 regval = rd32(E1000_EIAC);
1384		wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
1385		regval = rd32(E1000_EIAM);
1386		wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
1387		wr32(E1000_EIMS, adapter->eims_enable_mask);
1388		if (adapter->vfs_allocated_count) {
1389			wr32(E1000_MBVFIMR, 0xFF);
1390			ims |= E1000_IMS_VMMB;
1391		}
1392		wr32(E1000_IMS, ims);
1393	} else {
1394		wr32(E1000_IMS, IMS_ENABLE_MASK |
1395				E1000_IMS_DRSTA);
1396		wr32(E1000_IAM, IMS_ENABLE_MASK |
1397				E1000_IMS_DRSTA);
1398	}
1399}
1400
1401static void igb_update_mng_vlan(struct igb_adapter *adapter)
1402{
1403	struct e1000_hw *hw = &adapter->hw;
1404	u16 vid = adapter->hw.mng_cookie.vlan_id;
1405	u16 old_vid = adapter->mng_vlan_id;
1406
1407	if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1408		/* add VID to filter table */
1409		igb_vfta_set(hw, vid, true);
1410		adapter->mng_vlan_id = vid;
1411	} else {
1412		adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1413	}
1414
1415	if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1416	    (vid != old_vid) &&
1417	    !test_bit(old_vid, adapter->active_vlans)) {
1418		/* remove VID from filter table */
1419		igb_vfta_set(hw, old_vid, false);
1420	}
1421}
1422
1423/**
1424 * igb_release_hw_control - release control of the h/w to f/w
1425 * @adapter: address of board private structure
1426 *
1427 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1428 * For ASF and Pass Through versions of f/w this means that the
1429 * driver is no longer loaded.
1430 *
1431 **/
1432static void igb_release_hw_control(struct igb_adapter *adapter)
1433{
1434	struct e1000_hw *hw = &adapter->hw;
1435	u32 ctrl_ext;
1436
1437	/* Let firmware take over control of h/w */
1438	ctrl_ext = rd32(E1000_CTRL_EXT);
1439	wr32(E1000_CTRL_EXT,
1440			ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1441}
1442
1443/**
1444 * igb_get_hw_control - get control of the h/w from f/w
1445 * @adapter: address of board private structure
1446 *
1447 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1448 * For ASF and Pass Through versions of f/w this means that
1449 * the driver is loaded.
1450 *
1451 **/
1452static void igb_get_hw_control(struct igb_adapter *adapter)
1453{
1454	struct e1000_hw *hw = &adapter->hw;
1455	u32 ctrl_ext;
1456
1457	/* Let firmware know the driver has taken over */
1458	ctrl_ext = rd32(E1000_CTRL_EXT);
1459	wr32(E1000_CTRL_EXT,
1460			ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1461}
1462
1463/**
1464 * igb_configure - configure the hardware for RX and TX
1465 * @adapter: private board structure
1466 **/
1467static void igb_configure(struct igb_adapter *adapter)
1468{
1469	struct net_device *netdev = adapter->netdev;
1470	int i;
1471
1472	igb_get_hw_control(adapter);
1473	igb_set_rx_mode(netdev);
1474
1475	igb_restore_vlan(adapter);
1476
1477	igb_setup_tctl(adapter);
1478	igb_setup_mrqc(adapter);
1479	igb_setup_rctl(adapter);
1480
1481	igb_configure_tx(adapter);
1482	igb_configure_rx(adapter);
1483
1484	igb_rx_fifo_flush_82575(&adapter->hw);
1485
1486	/* call igb_desc_unused which always leaves
1487	 * at least 1 descriptor unused to make sure
1488	 * next_to_use != next_to_clean */
1489	for (i = 0; i < adapter->num_rx_queues; i++) {
1490		struct igb_ring *ring = adapter->rx_ring[i];
1491		igb_alloc_rx_buffers(ring, igb_desc_unused(ring));
1492	}
1493}
1494
1495/**
1496 * igb_power_up_link - Power up the phy/serdes link
1497 * @adapter: address of board private structure
1498 **/
1499void igb_power_up_link(struct igb_adapter *adapter)
1500{
1501	if (adapter->hw.phy.media_type == e1000_media_type_copper)
1502		igb_power_up_phy_copper(&adapter->hw);
1503	else
1504		igb_power_up_serdes_link_82575(&adapter->hw);
1505}
1506
1507/**
1508 * igb_power_down_link - Power down the phy/serdes link
1509 * @adapter: address of board private structure
1510 */
1511static void igb_power_down_link(struct igb_adapter *adapter)
1512{
1513	if (adapter->hw.phy.media_type == e1000_media_type_copper)
1514		igb_power_down_phy_copper_82575(&adapter->hw);
1515	else
1516		igb_shutdown_serdes_link_82575(&adapter->hw);
1517}
1518
1519/**
1520 * igb_up - Open the interface and prepare it to handle traffic
1521 * @adapter: board private structure
1522 **/
1523int igb_up(struct igb_adapter *adapter)
1524{
1525	struct e1000_hw *hw = &adapter->hw;
1526	int i;
1527
1528	/* hardware has been reset, we need to reload some things */
1529	igb_configure(adapter);
1530
1531	clear_bit(__IGB_DOWN, &adapter->state);
1532
1533	for (i = 0; i < adapter->num_q_vectors; i++)
1534		napi_enable(&(adapter->q_vector[i]->napi));
1535
1536	if (adapter->msix_entries)
1537		igb_configure_msix(adapter);
1538	else
1539		igb_assign_vector(adapter->q_vector[0], 0);
1540
1541	/* Clear any pending interrupts. */
1542	rd32(E1000_ICR);
1543	igb_irq_enable(adapter);
1544
1545	/* notify VFs that reset has been completed */
1546	if (adapter->vfs_allocated_count) {
1547		u32 reg_data = rd32(E1000_CTRL_EXT);
1548		reg_data |= E1000_CTRL_EXT_PFRSTD;
1549		wr32(E1000_CTRL_EXT, reg_data);
1550	}
1551
1552	netif_tx_start_all_queues(adapter->netdev);
1553
1554	/* start the watchdog. */
1555	hw->mac.get_link_status = 1;
1556	schedule_work(&adapter->watchdog_task);
1557
1558	return 0;
1559}
1560
1561void igb_down(struct igb_adapter *adapter)
1562{
1563	struct net_device *netdev = adapter->netdev;
1564	struct e1000_hw *hw = &adapter->hw;
1565	u32 tctl, rctl;
1566	int i;
1567
1568	/* signal that we're down so the interrupt handler does not
1569	 * reschedule our watchdog timer */
1570	set_bit(__IGB_DOWN, &adapter->state);
1571
1572	/* disable receives in the hardware */
1573	rctl = rd32(E1000_RCTL);
1574	wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1575	/* flush and sleep below */
1576
1577	netif_tx_stop_all_queues(netdev);
1578
1579	/* disable transmits in the hardware */
1580	tctl = rd32(E1000_TCTL);
1581	tctl &= ~E1000_TCTL_EN;
1582	wr32(E1000_TCTL, tctl);
1583	/* flush both disables and wait for them to finish */
1584	wrfl();
1585	msleep(10);
1586
1587	for (i = 0; i < adapter->num_q_vectors; i++)
1588		napi_disable(&(adapter->q_vector[i]->napi));
1589
1590	igb_irq_disable(adapter);
1591
1592	del_timer_sync(&adapter->watchdog_timer);
1593	del_timer_sync(&adapter->phy_info_timer);
1594
1595	netif_carrier_off(netdev);
1596
1597	/* record the stats before reset*/
1598	spin_lock(&adapter->stats64_lock);
1599	igb_update_stats(adapter, &adapter->stats64);
1600	spin_unlock(&adapter->stats64_lock);
1601
1602	adapter->link_speed = 0;
1603	adapter->link_duplex = 0;
1604
1605	if (!pci_channel_offline(adapter->pdev))
1606		igb_reset(adapter);
1607	igb_clean_all_tx_rings(adapter);
1608	igb_clean_all_rx_rings(adapter);
1609#ifdef CONFIG_IGB_DCA
1610
1611	/* since we reset the hardware DCA settings were cleared */
1612	igb_setup_dca(adapter);
1613#endif
1614}
1615
1616void igb_reinit_locked(struct igb_adapter *adapter)
1617{
1618	WARN_ON(in_interrupt());
1619	while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1620		msleep(1);
1621	igb_down(adapter);
1622	igb_up(adapter);
1623	clear_bit(__IGB_RESETTING, &adapter->state);
1624}
1625
1626void igb_reset(struct igb_adapter *adapter)
1627{
1628	struct pci_dev *pdev = adapter->pdev;
1629	struct e1000_hw *hw = &adapter->hw;
1630	struct e1000_mac_info *mac = &hw->mac;
1631	struct e1000_fc_info *fc = &hw->fc;
1632	u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1633	u16 hwm;
1634
1635	/* Repartition Pba for greater than 9k mtu
1636	 * To take effect CTRL.RST is required.
1637	 */
1638	switch (mac->type) {
1639	case e1000_i350:
1640	case e1000_82580:
1641		pba = rd32(E1000_RXPBS);
1642		pba = igb_rxpbs_adjust_82580(pba);
1643		break;
1644	case e1000_82576:
1645		pba = rd32(E1000_RXPBS);
1646		pba &= E1000_RXPBS_SIZE_MASK_82576;
1647		break;
1648	case e1000_82575:
1649	default:
1650		pba = E1000_PBA_34K;
1651		break;
1652	}
1653
1654	if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1655	    (mac->type < e1000_82576)) {
1656		/* adjust PBA for jumbo frames */
1657		wr32(E1000_PBA, pba);
1658
1659		/* To maintain wire speed transmits, the Tx FIFO should be
1660		 * large enough to accommodate two full transmit packets,
1661		 * rounded up to the next 1KB and expressed in KB.  Likewise,
1662		 * the Rx FIFO should be large enough to accommodate at least
1663		 * one full receive packet and is similarly rounded up and
1664		 * expressed in KB. */
1665		pba = rd32(E1000_PBA);
1666		/* upper 16 bits has Tx packet buffer allocation size in KB */
1667		tx_space = pba >> 16;
1668		/* lower 16 bits has Rx packet buffer allocation size in KB */
1669		pba &= 0xffff;
1670		/* the tx fifo also stores 16 bytes of information about the tx
1671		 * but don't include ethernet FCS because hardware appends it */
1672		min_tx_space = (adapter->max_frame_size +
1673				sizeof(union e1000_adv_tx_desc) -
1674				ETH_FCS_LEN) * 2;
1675		min_tx_space = ALIGN(min_tx_space, 1024);
1676		min_tx_space >>= 10;
1677		/* software strips receive CRC, so leave room for it */
1678		min_rx_space = adapter->max_frame_size;
1679		min_rx_space = ALIGN(min_rx_space, 1024);
1680		min_rx_space >>= 10;
1681
1682		/* If current Tx allocation is less than the min Tx FIFO size,
1683		 * and the min Tx FIFO size is less than the current Rx FIFO
1684		 * allocation, take space away from current Rx allocation */
1685		if (tx_space < min_tx_space &&
1686		    ((min_tx_space - tx_space) < pba)) {
1687			pba = pba - (min_tx_space - tx_space);
1688
1689			/* if short on rx space, rx wins and must trump tx
1690			 * adjustment */
1691			if (pba < min_rx_space)
1692				pba = min_rx_space;
1693		}
1694		wr32(E1000_PBA, pba);
1695	}
1696
1697	/* flow control settings */
1698	/* The high water mark must be low enough to fit one full frame
1699	 * (or the size used for early receive) above it in the Rx FIFO.
1700	 * Set it to the lower of:
1701	 * - 90% of the Rx FIFO size, or
1702	 * - the full Rx FIFO size minus one full frame */
1703	hwm = min(((pba << 10) * 9 / 10),
1704			((pba << 10) - 2 * adapter->max_frame_size));
1705
1706	fc->high_water = hwm & 0xFFF0;	/* 16-byte granularity */
1707	fc->low_water = fc->high_water - 16;
1708	fc->pause_time = 0xFFFF;
1709	fc->send_xon = 1;
1710	fc->current_mode = fc->requested_mode;
1711
1712	/* disable receive for all VFs and wait one second */
1713	if (adapter->vfs_allocated_count) {
1714		int i;
1715		for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1716			adapter->vf_data[i].flags &= IGB_VF_FLAG_PF_SET_MAC;
1717
1718		/* ping all the active vfs to let them know we are going down */
1719		igb_ping_all_vfs(adapter);
1720
1721		/* disable transmits and receives */
1722		wr32(E1000_VFRE, 0);
1723		wr32(E1000_VFTE, 0);
1724	}
1725
1726	/* Allow time for pending master requests to run */
1727	hw->mac.ops.reset_hw(hw);
1728	wr32(E1000_WUC, 0);
1729
1730	if (hw->mac.ops.init_hw(hw))
1731		dev_err(&pdev->dev, "Hardware Error\n");
1732
1733	igb_init_dmac(adapter, pba);
1734	if (!netif_running(adapter->netdev))
1735		igb_power_down_link(adapter);
1736
1737	igb_update_mng_vlan(adapter);
1738
1739	/* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1740	wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1741
1742	igb_get_phy_info(hw);
1743}
1744
1745static u32 igb_fix_features(struct net_device *netdev, u32 features)
1746{
1747	/*
1748	 * Since there is no support for separate rx/tx vlan accel
1749	 * enable/disable make sure tx flag is always in same state as rx.
1750	 */
1751	if (features & NETIF_F_HW_VLAN_RX)
1752		features |= NETIF_F_HW_VLAN_TX;
1753	else
1754		features &= ~NETIF_F_HW_VLAN_TX;
1755
1756	return features;
1757}
1758
1759static int igb_set_features(struct net_device *netdev, u32 features)
1760{
1761	u32 changed = netdev->features ^ features;
1762
1763	if (changed & NETIF_F_HW_VLAN_RX)
1764		igb_vlan_mode(netdev, features);
1765
1766	return 0;
1767}
1768
1769static const struct net_device_ops igb_netdev_ops = {
1770	.ndo_open		= igb_open,
1771	.ndo_stop		= igb_close,
1772	.ndo_start_xmit		= igb_xmit_frame,
1773	.ndo_get_stats64	= igb_get_stats64,
1774	.ndo_set_rx_mode	= igb_set_rx_mode,
1775	.ndo_set_mac_address	= igb_set_mac,
1776	.ndo_change_mtu		= igb_change_mtu,
1777	.ndo_do_ioctl		= igb_ioctl,
1778	.ndo_tx_timeout		= igb_tx_timeout,
1779	.ndo_validate_addr	= eth_validate_addr,
1780	.ndo_vlan_rx_add_vid	= igb_vlan_rx_add_vid,
1781	.ndo_vlan_rx_kill_vid	= igb_vlan_rx_kill_vid,
1782	.ndo_set_vf_mac		= igb_ndo_set_vf_mac,
1783	.ndo_set_vf_vlan	= igb_ndo_set_vf_vlan,
1784	.ndo_set_vf_tx_rate	= igb_ndo_set_vf_bw,
1785	.ndo_get_vf_config	= igb_ndo_get_vf_config,
1786#ifdef CONFIG_NET_POLL_CONTROLLER
1787	.ndo_poll_controller	= igb_netpoll,
1788#endif
1789	.ndo_fix_features	= igb_fix_features,
1790	.ndo_set_features	= igb_set_features,
1791};
1792
1793/**
1794 * igb_probe - Device Initialization Routine
1795 * @pdev: PCI device information struct
1796 * @ent: entry in igb_pci_tbl
1797 *
1798 * Returns 0 on success, negative on failure
1799 *
1800 * igb_probe initializes an adapter identified by a pci_dev structure.
1801 * The OS initialization, configuring of the adapter private structure,
1802 * and a hardware reset occur.
1803 **/
1804static int __devinit igb_probe(struct pci_dev *pdev,
1805			       const struct pci_device_id *ent)
1806{
1807	struct net_device *netdev;
1808	struct igb_adapter *adapter;
1809	struct e1000_hw *hw;
1810	u16 eeprom_data = 0;
1811	s32 ret_val;
1812	static int global_quad_port_a; /* global quad port a indication */
1813	const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1814	unsigned long mmio_start, mmio_len;
1815	int err, pci_using_dac;
1816	u16 eeprom_apme_mask = IGB_EEPROM_APME;
1817	u8 part_str[E1000_PBANUM_LENGTH];
1818
1819	/* Catch broken hardware that put the wrong VF device ID in
1820	 * the PCIe SR-IOV capability.
1821	 */
1822	if (pdev->is_virtfn) {
1823		WARN(1, KERN_ERR "%s (%hx:%hx) should not be a VF!\n",
1824		     pci_name(pdev), pdev->vendor, pdev->device);
1825		return -EINVAL;
1826	}
1827
1828	err = pci_enable_device_mem(pdev);
1829	if (err)
1830		return err;
1831
1832	pci_using_dac = 0;
1833	err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
1834	if (!err) {
1835		err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
1836		if (!err)
1837			pci_using_dac = 1;
1838	} else {
1839		err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
1840		if (err) {
1841			err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
1842			if (err) {
1843				dev_err(&pdev->dev, "No usable DMA "
1844					"configuration, aborting\n");
1845				goto err_dma;
1846			}
1847		}
1848	}
1849
1850	err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1851	                                   IORESOURCE_MEM),
1852	                                   igb_driver_name);
1853	if (err)
1854		goto err_pci_reg;
1855
1856	pci_enable_pcie_error_reporting(pdev);
1857
1858	pci_set_master(pdev);
1859	pci_save_state(pdev);
1860
1861	err = -ENOMEM;
1862	netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1863				   IGB_MAX_TX_QUEUES);
1864	if (!netdev)
1865		goto err_alloc_etherdev;
1866
1867	SET_NETDEV_DEV(netdev, &pdev->dev);
1868
1869	pci_set_drvdata(pdev, netdev);
1870	adapter = netdev_priv(netdev);
1871	adapter->netdev = netdev;
1872	adapter->pdev = pdev;
1873	hw = &adapter->hw;
1874	hw->back = adapter;
1875	adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE;
1876
1877	mmio_start = pci_resource_start(pdev, 0);
1878	mmio_len = pci_resource_len(pdev, 0);
1879
1880	err = -EIO;
1881	hw->hw_addr = ioremap(mmio_start, mmio_len);
1882	if (!hw->hw_addr)
1883		goto err_ioremap;
1884
1885	netdev->netdev_ops = &igb_netdev_ops;
1886	igb_set_ethtool_ops(netdev);
1887	netdev->watchdog_timeo = 5 * HZ;
1888
1889	strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1890
1891	netdev->mem_start = mmio_start;
1892	netdev->mem_end = mmio_start + mmio_len;
1893
1894	/* PCI config space info */
1895	hw->vendor_id = pdev->vendor;
1896	hw->device_id = pdev->device;
1897	hw->revision_id = pdev->revision;
1898	hw->subsystem_vendor_id = pdev->subsystem_vendor;
1899	hw->subsystem_device_id = pdev->subsystem_device;
1900
1901	/* Copy the default MAC, PHY and NVM function pointers */
1902	memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1903	memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1904	memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1905	/* Initialize skew-specific constants */
1906	err = ei->get_invariants(hw);
1907	if (err)
1908		goto err_sw_init;
1909
1910	/* setup the private structure */
1911	err = igb_sw_init(adapter);
1912	if (err)
1913		goto err_sw_init;
1914
1915	igb_get_bus_info_pcie(hw);
1916
1917	hw->phy.autoneg_wait_to_complete = false;
1918
1919	/* Copper options */
1920	if (hw->phy.media_type == e1000_media_type_copper) {
1921		hw->phy.mdix = AUTO_ALL_MODES;
1922		hw->phy.disable_polarity_correction = false;
1923		hw->phy.ms_type = e1000_ms_hw_default;
1924	}
1925
1926	if (igb_check_reset_block(hw))
1927		dev_info(&pdev->dev,
1928			"PHY reset is blocked due to SOL/IDER session.\n");
1929
1930	/*
1931	 * features is initialized to 0 in allocation, it might have bits
1932	 * set by igb_sw_init so we should use an or instead of an
1933	 * assignment.
1934	 */
1935	netdev->features |= NETIF_F_SG |
1936			    NETIF_F_IP_CSUM |
1937			    NETIF_F_IPV6_CSUM |
1938			    NETIF_F_TSO |
1939			    NETIF_F_TSO6 |
1940			    NETIF_F_RXHASH |
1941			    NETIF_F_RXCSUM |
1942			    NETIF_F_HW_VLAN_RX |
1943			    NETIF_F_HW_VLAN_TX;
1944
1945	/* copy netdev features into list of user selectable features */
1946	netdev->hw_features |= netdev->features;
1947
1948	/* set this bit last since it cannot be part of hw_features */
1949	netdev->features |= NETIF_F_HW_VLAN_FILTER;
1950
1951	netdev->vlan_features |= NETIF_F_TSO |
1952				 NETIF_F_TSO6 |
1953				 NETIF_F_IP_CSUM |
1954				 NETIF_F_IPV6_CSUM |
1955				 NETIF_F_SG;
1956
1957	if (pci_using_dac) {
1958		netdev->features |= NETIF_F_HIGHDMA;
1959		netdev->vlan_features |= NETIF_F_HIGHDMA;
1960	}
1961
1962	if (hw->mac.type >= e1000_82576) {
1963		netdev->hw_features |= NETIF_F_SCTP_CSUM;
1964		netdev->features |= NETIF_F_SCTP_CSUM;
1965	}
1966
1967	netdev->priv_flags |= IFF_UNICAST_FLT;
1968
1969	adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
1970
1971	/* before reading the NVM, reset the controller to put the device in a
1972	 * known good starting state */
1973	hw->mac.ops.reset_hw(hw);
1974
1975	/* make sure the NVM is good */
1976	if (hw->nvm.ops.validate(hw) < 0) {
1977		dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
1978		err = -EIO;
1979		goto err_eeprom;
1980	}
1981
1982	/* copy the MAC address out of the NVM */
1983	if (hw->mac.ops.read_mac_addr(hw))
1984		dev_err(&pdev->dev, "NVM Read Error\n");
1985
1986	memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
1987	memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
1988
1989	if (!is_valid_ether_addr(netdev->perm_addr)) {
1990		dev_err(&pdev->dev, "Invalid MAC Address\n");
1991		err = -EIO;
1992		goto err_eeprom;
1993	}
1994
1995	setup_timer(&adapter->watchdog_timer, igb_watchdog,
1996	            (unsigned long) adapter);
1997	setup_timer(&adapter->phy_info_timer, igb_update_phy_info,
1998	            (unsigned long) adapter);
1999
2000	INIT_WORK(&adapter->reset_task, igb_reset_task);
2001	INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
2002
2003	/* Initialize link properties that are user-changeable */
2004	adapter->fc_autoneg = true;
2005	hw->mac.autoneg = true;
2006	hw->phy.autoneg_advertised = 0x2f;
2007
2008	hw->fc.requested_mode = e1000_fc_default;
2009	hw->fc.current_mode = e1000_fc_default;
2010
2011	igb_validate_mdi_setting(hw);
2012
2013	/* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
2014	 * enable the ACPI Magic Packet filter
2015	 */
2016
2017	if (hw->bus.func == 0)
2018		hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
2019	else if (hw->mac.type >= e1000_82580)
2020		hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
2021		                 NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
2022		                 &eeprom_data);
2023	else if (hw->bus.func == 1)
2024		hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
2025
2026	if (eeprom_data & eeprom_apme_mask)
2027		adapter->eeprom_wol |= E1000_WUFC_MAG;
2028
2029	/* now that we have the eeprom settings, apply the special cases where
2030	 * the eeprom may be wrong or the board simply won't support wake on
2031	 * lan on a particular port */
2032	switch (pdev->device) {
2033	case E1000_DEV_ID_82575GB_QUAD_COPPER:
2034		adapter->eeprom_wol = 0;
2035		break;
2036	case E1000_DEV_ID_82575EB_FIBER_SERDES:
2037	case E1000_DEV_ID_82576_FIBER:
2038	case E1000_DEV_ID_82576_SERDES:
2039		/* Wake events only supported on port A for dual fiber
2040		 * regardless of eeprom setting */
2041		if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
2042			adapter->eeprom_wol = 0;
2043		break;
2044	case E1000_DEV_ID_82576_QUAD_COPPER:
2045	case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
2046		/* if quad port adapter, disable WoL on all but port A */
2047		if (global_quad_port_a != 0)
2048			adapter->eeprom_wol = 0;
2049		else
2050			adapter->flags |= IGB_FLAG_QUAD_PORT_A;
2051		/* Reset for multiple quad port adapters */
2052		if (++global_quad_port_a == 4)
2053			global_quad_port_a = 0;
2054		break;
2055	}
2056
2057	/* initialize the wol settings based on the eeprom settings */
2058	adapter->wol = adapter->eeprom_wol;
2059	device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
2060
2061	/* reset the hardware with the new settings */
2062	igb_reset(adapter);
2063
2064	/* let the f/w know that the h/w is now under the control of the
2065	 * driver. */
2066	igb_get_hw_control(adapter);
2067
2068	strcpy(netdev->name, "eth%d");
2069	err = register_netdev(netdev);
2070	if (err)
2071		goto err_register;
2072
2073	/* carrier off reporting is important to ethtool even BEFORE open */
2074	netif_carrier_off(netdev);
2075
2076#ifdef CONFIG_IGB_DCA
2077	if (dca_add_requester(&pdev->dev) == 0) {
2078		adapter->flags |= IGB_FLAG_DCA_ENABLED;
2079		dev_info(&pdev->dev, "DCA enabled\n");
2080		igb_setup_dca(adapter);
2081	}
2082
2083#endif
2084	/* do hw tstamp init after resetting */
2085	igb_init_hw_timer(adapter);
2086
2087	dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
2088	/* print bus type/speed/width info */
2089	dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
2090		 netdev->name,
2091		 ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
2092		  (hw->bus.speed == e1000_bus_speed_5000) ? "5.0Gb/s" :
2093		                                            "unknown"),
2094		 ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
2095		  (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
2096		  (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
2097		   "unknown"),
2098		 netdev->dev_addr);
2099
2100	ret_val = igb_read_part_string(hw, part_str, E1000_PBANUM_LENGTH);
2101	if (ret_val)
2102		strcpy(part_str, "Unknown");
2103	dev_info(&pdev->dev, "%s: PBA No: %s\n", netdev->name, part_str);
2104	dev_info(&pdev->dev,
2105		"Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
2106		adapter->msix_entries ? "MSI-X" :
2107		(adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
2108		adapter->num_rx_queues, adapter->num_tx_queues);
2109	switch (hw->mac.type) {
2110	case e1000_i350:
2111		igb_set_eee_i350(hw);
2112		break;
2113	default:
2114		break;
2115	}
2116	return 0;
2117
2118err_register:
2119	igb_release_hw_control(adapter);
2120err_eeprom:
2121	if (!igb_check_reset_block(hw))
2122		igb_reset_phy(hw);
2123
2124	if (hw->flash_address)
2125		iounmap(hw->flash_address);
2126err_sw_init:
2127	igb_clear_interrupt_scheme(adapter);
2128	iounmap(hw->hw_addr);
2129err_ioremap:
2130	free_netdev(netdev);
2131err_alloc_etherdev:
2132	pci_release_selected_regions(pdev,
2133	                             pci_select_bars(pdev, IORESOURCE_MEM));
2134err_pci_reg:
2135err_dma:
2136	pci_disable_device(pdev);
2137	return err;
2138}
2139
2140/**
2141 * igb_remove - Device Removal Routine
2142 * @pdev: PCI device information struct
2143 *
2144 * igb_remove is called by the PCI subsystem to alert the driver
2145 * that it should release a PCI device.  The could be caused by a
2146 * Hot-Plug event, or because the driver is going to be removed from
2147 * memory.
2148 **/
2149static void __devexit igb_remove(struct pci_dev *pdev)
2150{
2151	struct net_device *netdev = pci_get_drvdata(pdev);
2152	struct igb_adapter *adapter = netdev_priv(netdev);
2153	struct e1000_hw *hw = &adapter->hw;
2154
2155	/*
2156	 * The watchdog timer may be rescheduled, so explicitly
2157	 * disable watchdog from being rescheduled.
2158	 */
2159	set_bit(__IGB_DOWN, &adapter->state);
2160	del_timer_sync(&adapter->watchdog_timer);
2161	del_timer_sync(&adapter->phy_info_timer);
2162
2163	cancel_work_sync(&adapter->reset_task);
2164	cancel_work_sync(&adapter->watchdog_task);
2165
2166#ifdef CONFIG_IGB_DCA
2167	if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
2168		dev_info(&pdev->dev, "DCA disabled\n");
2169		dca_remove_requester(&pdev->dev);
2170		adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
2171		wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
2172	}
2173#endif
2174
2175	/* Release control of h/w to f/w.  If f/w is AMT enabled, this
2176	 * would have already happened in close and is redundant. */
2177	igb_release_hw_control(adapter);
2178
2179	unregister_netdev(netdev);
2180
2181	igb_clear_interrupt_scheme(adapter);
2182
2183#ifdef CONFIG_PCI_IOV
2184	/* reclaim resources allocated to VFs */
2185	if (adapter->vf_data) {
2186		/* disable iov and allow time for transactions to clear */
2187		if (!igb_check_vf_assignment(adapter)) {
2188			pci_disable_sriov(pdev);
2189			msleep(500);
2190		} else {
2191			dev_info(&pdev->dev, "VF(s) assigned to guests!\n");
2192		}
2193
2194		kfree(adapter->vf_data);
2195		adapter->vf_data = NULL;
2196		wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
2197		wrfl();
2198		msleep(100);
2199		dev_info(&pdev->dev, "IOV Disabled\n");
2200	}
2201#endif
2202
2203	iounmap(hw->hw_addr);
2204	if (hw->flash_address)
2205		iounmap(hw->flash_address);
2206	pci_release_selected_regions(pdev,
2207	                             pci_select_bars(pdev, IORESOURCE_MEM));
2208
2209	kfree(adapter->shadow_vfta);
2210	free_netdev(netdev);
2211
2212	pci_disable_pcie_error_reporting(pdev);
2213
2214	pci_disable_device(pdev);
2215}
2216
2217/**
2218 * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
2219 * @adapter: board private structure to initialize
2220 *
2221 * This function initializes the vf specific data storage and then attempts to
2222 * allocate the VFs.  The reason for ordering it this way is because it is much
2223 * mor expensive time wise to disable SR-IOV than it is to allocate and free
2224 * the memory for the VFs.
2225 **/
2226static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
2227{
2228#ifdef CONFIG_PCI_IOV
2229	struct pci_dev *pdev = adapter->pdev;
2230	int old_vfs = igb_find_enabled_vfs(adapter);
2231	int i;
2232
2233	if (old_vfs) {
2234		dev_info(&pdev->dev, "%d pre-allocated VFs found - override "
2235			 "max_vfs setting of %d\n", old_vfs, max_vfs);
2236		adapter->vfs_allocated_count = old_vfs;
2237	}
2238
2239	if (!adapter->vfs_allocated_count)
2240		return;
2241
2242	adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
2243				sizeof(struct vf_data_storage), GFP_KERNEL);
2244	/* if allocation failed then we do not support SR-IOV */
2245	if (!adapter->vf_data) {
2246		adapter->vfs_allocated_count = 0;
2247		dev_err(&pdev->dev, "Unable to allocate memory for VF "
2248			"Data Storage\n");
2249		goto out;
2250	}
2251
2252	if (!old_vfs) {
2253		if (pci_enable_sriov(pdev, adapter->vfs_allocated_count))
2254			goto err_out;
2255	}
2256	dev_info(&pdev->dev, "%d VFs allocated\n",
2257		 adapter->vfs_allocated_count);
2258	for (i = 0; i < adapter->vfs_allocated_count; i++)
2259		igb_vf_configure(adapter, i);
2260
2261	/* DMA Coalescing is not supported in IOV mode. */
2262	adapter->flags &= ~IGB_FLAG_DMAC;
2263	goto out;
2264err_out:
2265	kfree(adapter->vf_data);
2266	adapter->vf_data = NULL;
2267	adapter->vfs_allocated_count = 0;
2268out:
2269	return;
2270#endif /* CONFIG_PCI_IOV */
2271}
2272
2273/**
2274 * igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp
2275 * @adapter: board private structure to initialize
2276 *
2277 * igb_init_hw_timer initializes the function pointer and values for the hw
2278 * timer found in hardware.
2279 **/
2280static void igb_init_hw_timer(struct igb_adapter *adapter)
2281{
2282	struct e1000_hw *hw = &adapter->hw;
2283
2284	switch (hw->mac.type) {
2285	case e1000_i350:
2286	case e1000_82580:
2287		memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2288		adapter->cycles.read = igb_read_clock;
2289		adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2290		adapter->cycles.mult = 1;
2291		/*
2292		 * The 82580 timesync updates the system timer every 8ns by 8ns
2293		 * and the value cannot be shifted.  Instead we need to shift
2294		 * the registers to generate a 64bit timer value.  As a result
2295		 * SYSTIMR/L/H, TXSTMPL/H, RXSTMPL/H all have to be shifted by
2296		 * 24 in order to generate a larger value for synchronization.
2297		 */
2298		adapter->cycles.shift = IGB_82580_TSYNC_SHIFT;
2299		/* disable system timer temporarily by setting bit 31 */
2300		wr32(E1000_TSAUXC, 0x80000000);
2301		wrfl();
2302
2303		/* Set registers so that rollover occurs soon to test this. */
2304		wr32(E1000_SYSTIMR, 0x00000000);
2305		wr32(E1000_SYSTIML, 0x80000000);
2306		wr32(E1000_SYSTIMH, 0x000000FF);
2307		wrfl();
2308
2309		/* enable system timer by clearing bit 31 */
2310		wr32(E1000_TSAUXC, 0x0);
2311		wrfl();
2312
2313		timecounter_init(&adapter->clock,
2314				 &adapter->cycles,
2315				 ktime_to_ns(ktime_get_real()));
2316		/*
2317		 * Synchronize our NIC clock against system wall clock. NIC
2318		 * time stamp reading requires ~3us per sample, each sample
2319		 * was pretty stable even under load => only require 10
2320		 * samples for each offset comparison.
2321		 */
2322		memset(&adapter->compare, 0, sizeof(adapter->compare));
2323		adapter->compare.source = &adapter->clock;
2324		adapter->compare.target = ktime_get_real;
2325		adapter->compare.num_samples = 10;
2326		timecompare_update(&adapter->compare, 0);
2327		break;
2328	case e1000_82576:
2329		/*
2330		 * Initialize hardware timer: we keep it running just in case
2331		 * that some program needs it later on.
2332		 */
2333		memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2334		adapter->cycles.read = igb_read_clock;
2335		adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2336		adapter->cycles.mult = 1;
2337		/**
2338		 * Scale the NIC clock cycle by a large factor so that
2339		 * relatively small clock corrections can be added or
2340		 * subtracted at each clock tick. The drawbacks of a large
2341		 * factor are a) that the clock register overflows more quickly
2342		 * (not such a big deal) and b) that the increment per tick has
2343		 * to fit into 24 bits.  As a result we need to use a shift of
2344		 * 19 so we can fit a value of 16 into the TIMINCA register.
2345		 */
2346		adapter->cycles.shift = IGB_82576_TSYNC_SHIFT;
2347		wr32(E1000_TIMINCA,
2348		                (1 << E1000_TIMINCA_16NS_SHIFT) |
2349		                (16 << IGB_82576_TSYNC_SHIFT));
2350
2351		/* Set registers so that rollover occurs soon to test this. */
2352		wr32(E1000_SYSTIML, 0x00000000);
2353		wr32(E1000_SYSTIMH, 0xFF800000);
2354		wrfl();
2355
2356		timecounter_init(&adapter->clock,
2357				 &adapter->cycles,
2358				 ktime_to_ns(ktime_get_real()));
2359		/*
2360		 * Synchronize our NIC clock against system wall clock. NIC
2361		 * time stamp reading requires ~3us per sample, each sample
2362		 * was pretty stable even under load => only require 10
2363		 * samples for each offset comparison.
2364		 */
2365		memset(&adapter->compare, 0, sizeof(adapter->compare));
2366		adapter->compare.source = &adapter->clock;
2367		adapter->compare.target = ktime_get_real;
2368		adapter->compare.num_samples = 10;
2369		timecompare_update(&adapter->compare, 0);
2370		break;
2371	case e1000_82575:
2372		/* 82575 does not support timesync */
2373	default:
2374		break;
2375	}
2376
2377}
2378
2379/**
2380 * igb_sw_init - Initialize general software structures (struct igb_adapter)
2381 * @adapter: board private structure to initialize
2382 *
2383 * igb_sw_init initializes the Adapter private data structure.
2384 * Fields are initialized based on PCI device information and
2385 * OS network device settings (MTU size).
2386 **/
2387static int __devinit igb_sw_init(struct igb_adapter *adapter)
2388{
2389	struct e1000_hw *hw = &adapter->hw;
2390	struct net_device *netdev = adapter->netdev;
2391	struct pci_dev *pdev = adapter->pdev;
2392
2393	pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
2394
2395	/* set default ring sizes */
2396	adapter->tx_ring_count = IGB_DEFAULT_TXD;
2397	adapter->rx_ring_count = IGB_DEFAULT_RXD;
2398
2399	/* set default ITR values */
2400	adapter->rx_itr_setting = IGB_DEFAULT_ITR;
2401	adapter->tx_itr_setting = IGB_DEFAULT_ITR;
2402
2403	/* set default work limits */
2404	adapter->tx_work_limit = IGB_DEFAULT_TX_WORK;
2405
2406	adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN +
2407				  VLAN_HLEN;
2408	adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
2409
2410	adapter->node = -1;
2411
2412	spin_lock_init(&adapter->stats64_lock);
2413#ifdef CONFIG_PCI_IOV
2414	switch (hw->mac.type) {
2415	case e1000_82576:
2416	case e1000_i350:
2417		if (max_vfs > 7) {
2418			dev_warn(&pdev->dev,
2419				 "Maximum of 7 VFs per PF, using max\n");
2420			adapter->vfs_allocated_count = 7;
2421		} else
2422			adapter->vfs_allocated_count = max_vfs;
2423		break;
2424	default:
2425		break;
2426	}
2427#endif /* CONFIG_PCI_IOV */
2428	adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
2429	/* i350 cannot do RSS and SR-IOV at the same time */
2430	if (hw->mac.type == e1000_i350 && adapter->vfs_allocated_count)
2431		adapter->rss_queues = 1;
2432
2433	/*
2434	 * if rss_queues > 4 or vfs are going to be allocated with rss_queues
2435	 * then we should combine the queues into a queue pair in order to
2436	 * conserve interrupts due to limited supply
2437	 */
2438	if ((adapter->rss_queues > 4) ||
2439	    ((adapter->rss_queues > 1) && (adapter->vfs_allocated_count > 6)))
2440		adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
2441
2442	/* Setup and initialize a copy of the hw vlan table array */
2443	adapter->shadow_vfta = kzalloc(sizeof(u32) *
2444				E1000_VLAN_FILTER_TBL_SIZE,
2445				GFP_ATOMIC);
2446
2447	/* This call may decrease the number of queues */
2448	if (igb_init_interrupt_scheme(adapter)) {
2449		dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
2450		return -ENOMEM;
2451	}
2452
2453	igb_probe_vfs(adapter);
2454
2455	/* Explicitly disable IRQ since the NIC can be in any state. */
2456	igb_irq_disable(adapter);
2457
2458	if (hw->mac.type == e1000_i350)
2459		adapter->flags &= ~IGB_FLAG_DMAC;
2460
2461	set_bit(__IGB_DOWN, &adapter->state);
2462	return 0;
2463}
2464
2465/**
2466 * igb_open - Called when a network interface is made active
2467 * @netdev: network interface device structure
2468 *
2469 * Returns 0 on success, negative value on failure
2470 *
2471 * The open entry point is called when a network interface is made
2472 * active by the system (IFF_UP).  At this point all resources needed
2473 * for transmit and receive operations are allocated, the interrupt
2474 * handler is registered with the OS, the watchdog timer is started,
2475 * and the stack is notified that the interface is ready.
2476 **/
2477static int igb_open(struct net_device *netdev)
2478{
2479	struct igb_adapter *adapter = netdev_priv(netdev);
2480	struct e1000_hw *hw = &adapter->hw;
2481	int err;
2482	int i;
2483
2484	/* disallow open during test */
2485	if (test_bit(__IGB_TESTING, &adapter->state))
2486		return -EBUSY;
2487
2488	netif_carrier_off(netdev);
2489
2490	/* allocate transmit descriptors */
2491	err = igb_setup_all_tx_resources(adapter);
2492	if (err)
2493		goto err_setup_tx;
2494
2495	/* allocate receive descriptors */
2496	err = igb_setup_all_rx_resources(adapter);
2497	if (err)
2498		goto err_setup_rx;
2499
2500	igb_power_up_link(adapter);
2501
2502	/* before we allocate an interrupt, we must be ready to handle it.
2503	 * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2504	 * as soon as we call pci_request_irq, so we have to setup our
2505	 * clean_rx handler before we do so.  */
2506	igb_configure(adapter);
2507
2508	err = igb_request_irq(adapter);
2509	if (err)
2510		goto err_req_irq;
2511
2512	/* From here on the code is the same as igb_up() */
2513	clear_bit(__IGB_DOWN, &adapter->state);
2514
2515	for (i = 0; i < adapter->num_q_vectors; i++)
2516		napi_enable(&(adapter->q_vector[i]->napi));
2517
2518	/* Clear any pending interrupts. */
2519	rd32(E1000_ICR);
2520
2521	igb_irq_enable(adapter);
2522
2523	/* notify VFs that reset has been completed */
2524	if (adapter->vfs_allocated_count) {
2525		u32 reg_data = rd32(E1000_CTRL_EXT);
2526		reg_data |= E1000_CTRL_EXT_PFRSTD;
2527		wr32(E1000_CTRL_EXT, reg_data);
2528	}
2529
2530	netif_tx_start_all_queues(netdev);
2531
2532	/* start the watchdog. */
2533	hw->mac.get_link_status = 1;
2534	schedule_work(&adapter->watchdog_task);
2535
2536	return 0;
2537
2538err_req_irq:
2539	igb_release_hw_control(adapter);
2540	igb_power_down_link(adapter);
2541	igb_free_all_rx_resources(adapter);
2542err_setup_rx:
2543	igb_free_all_tx_resources(adapter);
2544err_setup_tx:
2545	igb_reset(adapter);
2546
2547	return err;
2548}
2549
2550/**
2551 * igb_close - Disables a network interface
2552 * @netdev: network interface device structure
2553 *
2554 * Returns 0, this is not allowed to fail
2555 *
2556 * The close entry point is called when an interface is de-activated
2557 * by the OS.  The hardware is still under the driver's control, but
2558 * needs to be disabled.  A global MAC reset is issued to stop the
2559 * hardware, and all transmit and receive resources are freed.
2560 **/
2561static int igb_close(struct net_device *netdev)
2562{
2563	struct igb_adapter *adapter = netdev_priv(netdev);
2564
2565	WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
2566	igb_down(adapter);
2567
2568	igb_free_irq(adapter);
2569
2570	igb_free_all_tx_resources(adapter);
2571	igb_free_all_rx_resources(adapter);
2572
2573	return 0;
2574}
2575
2576/**
2577 * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2578 * @tx_ring: tx descriptor ring (for a specific queue) to setup
2579 *
2580 * Return 0 on success, negative on failure
2581 **/
2582int igb_setup_tx_resources(struct igb_ring *tx_ring)
2583{
2584	struct device *dev = tx_ring->dev;
2585	int orig_node = dev_to_node(dev);
2586	int size;
2587
2588	size = sizeof(struct igb_tx_buffer) * tx_ring->count;
2589	tx_ring->tx_buffer_info = vzalloc_node(size, tx_ring->numa_node);
2590	if (!tx_ring->tx_buffer_info)
2591		tx_ring->tx_buffer_info = vzalloc(size);
2592	if (!tx_ring->tx_buffer_info)
2593		goto err;
2594
2595	/* round up to nearest 4K */
2596	tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2597	tx_ring->size = ALIGN(tx_ring->size, 4096);
2598
2599	set_dev_node(dev, tx_ring->numa_node);
2600	tx_ring->desc = dma_alloc_coherent(dev,
2601					   tx_ring->size,
2602					   &tx_ring->dma,
2603					   GFP_KERNEL);
2604	set_dev_node(dev, orig_node);
2605	if (!tx_ring->desc)
2606		tx_ring->desc = dma_alloc_coherent(dev,
2607						   tx_ring->size,
2608						   &tx_ring->dma,
2609						   GFP_KERNEL);
2610
2611	if (!tx_ring->desc)
2612		goto err;
2613
2614	tx_ring->next_to_use = 0;
2615	tx_ring->next_to_clean = 0;
2616
2617	return 0;
2618
2619err:
2620	vfree(tx_ring->tx_buffer_info);
2621	dev_err(dev,
2622		"Unable to allocate memory for the transmit descriptor ring\n");
2623	return -ENOMEM;
2624}
2625
2626/**
2627 * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2628 *				  (Descriptors) for all queues
2629 * @adapter: board private structure
2630 *
2631 * Return 0 on success, negative on failure
2632 **/
2633static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2634{
2635	struct pci_dev *pdev = adapter->pdev;
2636	int i, err = 0;
2637
2638	for (i = 0; i < adapter->num_tx_queues; i++) {
2639		err = igb_setup_tx_resources(adapter->tx_ring[i]);
2640		if (err) {
2641			dev_err(&pdev->dev,
2642				"Allocation for Tx Queue %u failed\n", i);
2643			for (i--; i >= 0; i--)
2644				igb_free_tx_resources(adapter->tx_ring[i]);
2645			break;
2646		}
2647	}
2648
2649	return err;
2650}
2651
2652/**
2653 * igb_setup_tctl - configure the transmit control registers
2654 * @adapter: Board private structure
2655 **/
2656void igb_setup_tctl(struct igb_adapter *adapter)
2657{
2658	struct e1000_hw *hw = &adapter->hw;
2659	u32 tctl;
2660
2661	/* disable queue 0 which is enabled by default on 82575 and 82576 */
2662	wr32(E1000_TXDCTL(0), 0);
2663
2664	/* Program the Transmit Control Register */
2665	tctl = rd32(E1000_TCTL);
2666	tctl &= ~E1000_TCTL_CT;
2667	tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2668		(E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2669
2670	igb_config_collision_dist(hw);
2671
2672	/* Enable transmits */
2673	tctl |= E1000_TCTL_EN;
2674
2675	wr32(E1000_TCTL, tctl);
2676}
2677
2678/**
2679 * igb_configure_tx_ring - Configure transmit ring after Reset
2680 * @adapter: board private structure
2681 * @ring: tx ring to configure
2682 *
2683 * Configure a transmit ring after a reset.
2684 **/
2685void igb_configure_tx_ring(struct igb_adapter *adapter,
2686                           struct igb_ring *ring)
2687{
2688	struct e1000_hw *hw = &adapter->hw;
2689	u32 txdctl = 0;
2690	u64 tdba = ring->dma;
2691	int reg_idx = ring->reg_idx;
2692
2693	/* disable the queue */
2694	wr32(E1000_TXDCTL(reg_idx), 0);
2695	wrfl();
2696	mdelay(10);
2697
2698	wr32(E1000_TDLEN(reg_idx),
2699	                ring->count * sizeof(union e1000_adv_tx_desc));
2700	wr32(E1000_TDBAL(reg_idx),
2701	                tdba & 0x00000000ffffffffULL);
2702	wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2703
2704	ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2705	wr32(E1000_TDH(reg_idx), 0);
2706	writel(0, ring->tail);
2707
2708	txdctl |= IGB_TX_PTHRESH;
2709	txdctl |= IGB_TX_HTHRESH << 8;
2710	txdctl |= IGB_TX_WTHRESH << 16;
2711
2712	txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2713	wr32(E1000_TXDCTL(reg_idx), txdctl);
2714}
2715
2716/**
2717 * igb_configure_tx - Configure transmit Unit after Reset
2718 * @adapter: board private structure
2719 *
2720 * Configure the Tx unit of the MAC after a reset.
2721 **/
2722static void igb_configure_tx(struct igb_adapter *adapter)
2723{
2724	int i;
2725
2726	for (i = 0; i < adapter->num_tx_queues; i++)
2727		igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
2728}
2729
2730/**
2731 * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2732 * @rx_ring:    rx descriptor ring (for a specific queue) to setup
2733 *
2734 * Returns 0 on success, negative on failure
2735 **/
2736int igb_setup_rx_resources(struct igb_ring *rx_ring)
2737{
2738	struct device *dev = rx_ring->dev;
2739	int orig_node = dev_to_node(dev);
2740	int size, desc_len;
2741
2742	size = sizeof(struct igb_rx_buffer) * rx_ring->count;
2743	rx_ring->rx_buffer_info = vzalloc_node(size, rx_ring->numa_node);
2744	if (!rx_ring->rx_buffer_info)
2745		rx_ring->rx_buffer_info = vzalloc(size);
2746	if (!rx_ring->rx_buffer_info)
2747		goto err;
2748
2749	desc_len = sizeof(union e1000_adv_rx_desc);
2750
2751	/* Round up to nearest 4K */
2752	rx_ring->size = rx_ring->count * desc_len;
2753	rx_ring->size = ALIGN(rx_ring->size, 4096);
2754
2755	set_dev_node(dev, rx_ring->numa_node);
2756	rx_ring->desc = dma_alloc_coherent(dev,
2757					   rx_ring->size,
2758					   &rx_ring->dma,
2759					   GFP_KERNEL);
2760	set_dev_node(dev, orig_node);
2761	if (!rx_ring->desc)
2762		rx_ring->desc = dma_alloc_coherent(dev,
2763						   rx_ring->size,
2764						   &rx_ring->dma,
2765						   GFP_KERNEL);
2766
2767	if (!rx_ring->desc)
2768		goto err;
2769
2770	rx_ring->next_to_clean = 0;
2771	rx_ring->next_to_use = 0;
2772
2773	return 0;
2774
2775err:
2776	vfree(rx_ring->rx_buffer_info);
2777	rx_ring->rx_buffer_info = NULL;
2778	dev_err(dev, "Unable to allocate memory for the receive descriptor"
2779		" ring\n");
2780	return -ENOMEM;
2781}
2782
2783/**
2784 * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2785 *				  (Descriptors) for all queues
2786 * @adapter: board private structure
2787 *
2788 * Return 0 on success, negative on failure
2789 **/
2790static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2791{
2792	struct pci_dev *pdev = adapter->pdev;
2793	int i, err = 0;
2794
2795	for (i = 0; i < adapter->num_rx_queues; i++) {
2796		err = igb_setup_rx_resources(adapter->rx_ring[i]);
2797		if (err) {
2798			dev_err(&pdev->dev,
2799				"Allocation for Rx Queue %u failed\n", i);
2800			for (i--; i >= 0; i--)
2801				igb_free_rx_resources(adapter->rx_ring[i]);
2802			break;
2803		}
2804	}
2805
2806	return err;
2807}
2808
2809/**
2810 * igb_setup_mrqc - configure the multiple receive queue control registers
2811 * @adapter: Board private structure
2812 **/
2813static void igb_setup_mrqc(struct igb_adapter *adapter)
2814{
2815	struct e1000_hw *hw = &adapter->hw;
2816	u32 mrqc, rxcsum;
2817	u32 j, num_rx_queues, shift = 0, shift2 = 0;
2818	union e1000_reta {
2819		u32 dword;
2820		u8  bytes[4];
2821	} reta;
2822	static const u8 rsshash[40] = {
2823		0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2824		0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2825		0xae, 0x7b, 0x30, 0xb4,	0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2826		0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2827
2828	/* Fill out hash function seeds */
2829	for (j = 0; j < 10; j++) {
2830		u32 rsskey = rsshash[(j * 4)];
2831		rsskey |= rsshash[(j * 4) + 1] << 8;
2832		rsskey |= rsshash[(j * 4) + 2] << 16;
2833		rsskey |= rsshash[(j * 4) + 3] << 24;
2834		array_wr32(E1000_RSSRK(0), j, rsskey);
2835	}
2836
2837	num_rx_queues = adapter->rss_queues;
2838
2839	if (adapter->vfs_allocated_count) {
2840		/* 82575 and 82576 supports 2 RSS queues for VMDq */
2841		switch (hw->mac.type) {
2842		case e1000_i350:
2843		case e1000_82580:
2844			num_rx_queues = 1;
2845			shift = 0;
2846			break;
2847		case e1000_82576:
2848			shift = 3;
2849			num_rx_queues = 2;
2850			break;
2851		case e1000_82575:
2852			shift = 2;
2853			shift2 = 6;
2854		default:
2855			break;
2856		}
2857	} else {
2858		if (hw->mac.type == e1000_82575)
2859			shift = 6;
2860	}
2861
2862	for (j = 0; j < (32 * 4); j++) {
2863		reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2864		if (shift2)
2865			reta.bytes[j & 3] |= num_rx_queues << shift2;
2866		if ((j & 3) == 3)
2867			wr32(E1000_RETA(j >> 2), reta.dword);
2868	}
2869
2870	/*
2871	 * Disable raw packet checksumming so that RSS hash is placed in
2872	 * descriptor on writeback.  No need to enable TCP/UDP/IP checksum
2873	 * offloads as they are enabled by default
2874	 */
2875	rxcsum = rd32(E1000_RXCSUM);
2876	rxcsum |= E1000_RXCSUM_PCSD;
2877
2878	if (adapter->hw.mac.type >= e1000_82576)
2879		/* Enable Receive Checksum Offload for SCTP */
2880		rxcsum |= E1000_RXCSUM_CRCOFL;
2881
2882	/* Don't need to set TUOFL or IPOFL, they default to 1 */
2883	wr32(E1000_RXCSUM, rxcsum);
2884
2885	/* If VMDq is enabled then we set the appropriate mode for that, else
2886	 * we default to RSS so that an RSS hash is calculated per packet even
2887	 * if we are only using one queue */
2888	if (adapter->vfs_allocated_count) {
2889		if (hw->mac.type > e1000_82575) {
2890			/* Set the default pool for the PF's first queue */
2891			u32 vtctl = rd32(E1000_VT_CTL);
2892			vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2893				   E1000_VT_CTL_DISABLE_DEF_POOL);
2894			vtctl |= adapter->vfs_allocated_count <<
2895				E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2896			wr32(E1000_VT_CTL, vtctl);
2897		}
2898		if (adapter->rss_queues > 1)
2899			mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2900		else
2901			mrqc = E1000_MRQC_ENABLE_VMDQ;
2902	} else {
2903		mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2904	}
2905	igb_vmm_control(adapter);
2906
2907	/*
2908	 * Generate RSS hash based on TCP port numbers and/or
2909	 * IPv4/v6 src and dst addresses since UDP cannot be
2910	 * hashed reliably due to IP fragmentation
2911	 */
2912	mrqc |= E1000_MRQC_RSS_FIELD_IPV4 |
2913		E1000_MRQC_RSS_FIELD_IPV4_TCP |
2914		E1000_MRQC_RSS_FIELD_IPV6 |
2915		E1000_MRQC_RSS_FIELD_IPV6_TCP |
2916		E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
2917
2918	wr32(E1000_MRQC, mrqc);
2919}
2920
2921/**
2922 * igb_setup_rctl - configure the receive control registers
2923 * @adapter: Board private structure
2924 **/
2925void igb_setup_rctl(struct igb_adapter *adapter)
2926{
2927	struct e1000_hw *hw = &adapter->hw;
2928	u32 rctl;
2929
2930	rctl = rd32(E1000_RCTL);
2931
2932	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2933	rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2934
2935	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2936		(hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2937
2938	/*
2939	 * enable stripping of CRC. It's unlikely this will break BMC
2940	 * redirection as it did with e1000. Newer features require
2941	 * that the HW strips the CRC.
2942	 */
2943	rctl |= E1000_RCTL_SECRC;
2944
2945	/* disable store bad packets and clear size bits. */
2946	rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
2947
2948	/* enable LPE to prevent packets larger than max_frame_size */
2949	rctl |= E1000_RCTL_LPE;
2950
2951	/* disable queue 0 to prevent tail write w/o re-config */
2952	wr32(E1000_RXDCTL(0), 0);
2953
2954	/* Attention!!!  For SR-IOV PF driver operations you must enable
2955	 * queue drop for all VF and PF queues to prevent head of line blocking
2956	 * if an un-trusted VF does not provide descriptors to hardware.
2957	 */
2958	if (adapter->vfs_allocated_count) {
2959		/* set all queue drop enable bits */
2960		wr32(E1000_QDE, ALL_QUEUES);
2961	}
2962
2963	wr32(E1000_RCTL, rctl);
2964}
2965
2966static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
2967                                   int vfn)
2968{
2969	struct e1000_hw *hw = &adapter->hw;
2970	u32 vmolr;
2971
2972	/* if it isn't the PF check to see if VFs are enabled and
2973	 * increase the size to support vlan tags */
2974	if (vfn < adapter->vfs_allocated_count &&
2975	    adapter->vf_data[vfn].vlans_enabled)
2976		size += VLAN_TAG_SIZE;
2977
2978	vmolr = rd32(E1000_VMOLR(vfn));
2979	vmolr &= ~E1000_VMOLR_RLPML_MASK;
2980	vmolr |= size | E1000_VMOLR_LPE;
2981	wr32(E1000_VMOLR(vfn), vmolr);
2982
2983	return 0;
2984}
2985
2986/**
2987 * igb_rlpml_set - set maximum receive packet size
2988 * @adapter: board private structure
2989 *
2990 * Configure maximum receivable packet size.
2991 **/
2992static void igb_rlpml_set(struct igb_adapter *adapter)
2993{
2994	u32 max_frame_size = adapter->max_frame_size;
2995	struct e1000_hw *hw = &adapter->hw;
2996	u16 pf_id = adapter->vfs_allocated_count;
2997
2998	if (pf_id) {
2999		igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
3000		/*
3001		 * If we're in VMDQ or SR-IOV mode, then set global RLPML
3002		 * to our max jumbo frame size, in case we need to enable
3003		 * jumbo frames on one of the rings later.
3004		 * This will not pass over-length frames into the default
3005		 * queue because it's gated by the VMOLR.RLPML.
3006		 */
3007		max_frame_size = MAX_JUMBO_FRAME_SIZE;
3008	}
3009
3010	wr32(E1000_RLPML, max_frame_size);
3011}
3012
3013static inline void igb_set_vmolr(struct igb_adapter *adapter,
3014				 int vfn, bool aupe)
3015{
3016	struct e1000_hw *hw = &adapter->hw;
3017	u32 vmolr;
3018
3019	/*
3020	 * This register exists only on 82576 and newer so if we are older then
3021	 * we should exit and do nothing
3022	 */
3023	if (hw->mac.type < e1000_82576)
3024		return;
3025
3026	vmolr = rd32(E1000_VMOLR(vfn));
3027	vmolr |= E1000_VMOLR_STRVLAN;      /* Strip vlan tags */
3028	if (aupe)
3029		vmolr |= E1000_VMOLR_AUPE;        /* Accept untagged packets */
3030	else
3031		vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
3032
3033	/* clear all bits that might not be set */
3034	vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
3035
3036	if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
3037		vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
3038	/*
3039	 * for VMDq only allow the VFs and pool 0 to accept broadcast and
3040	 * multicast packets
3041	 */
3042	if (vfn <= adapter->vfs_allocated_count)
3043		vmolr |= E1000_VMOLR_BAM;	   /* Accept broadcast */
3044
3045	wr32(E1000_VMOLR(vfn), vmolr);
3046}
3047
3048/**
3049 * igb_configure_rx_ring - Configure a receive ring after Reset
3050 * @adapter: board private structure
3051 * @ring: receive ring to be configured
3052 *
3053 * Configure the Rx unit of the MAC after a reset.
3054 **/
3055void igb_configure_rx_ring(struct igb_adapter *adapter,
3056                           struct igb_ring *ring)
3057{
3058	struct e1000_hw *hw = &adapter->hw;
3059	u64 rdba = ring->dma;
3060	int reg_idx = ring->reg_idx;
3061	u32 srrctl = 0, rxdctl = 0;
3062
3063	/* disable the queue */
3064	wr32(E1000_RXDCTL(reg_idx), 0);
3065
3066	/* Set DMA base address registers */
3067	wr32(E1000_RDBAL(reg_idx),
3068	     rdba & 0x00000000ffffffffULL);
3069	wr32(E1000_RDBAH(reg_idx), rdba >> 32);
3070	wr32(E1000_RDLEN(reg_idx),
3071	               ring->count * sizeof(union e1000_adv_rx_desc));
3072
3073	/* initialize head and tail */
3074	ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
3075	wr32(E1000_RDH(reg_idx), 0);
3076	writel(0, ring->tail);
3077
3078	/* set descriptor configuration */
3079	srrctl = IGB_RX_HDR_LEN << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
3080#if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
3081	srrctl |= IGB_RXBUFFER_16384 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3082#else
3083	srrctl |= (PAGE_SIZE / 2) >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3084#endif
3085	srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3086	if (hw->mac.type >= e1000_82580)
3087		srrctl |= E1000_SRRCTL_TIMESTAMP;
3088	/* Only set Drop Enable if we are supporting multiple queues */
3089	if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
3090		srrctl |= E1000_SRRCTL_DROP_EN;
3091
3092	wr32(E1000_SRRCTL(reg_idx), srrctl);
3093
3094	/* set filtering for VMDQ pools */
3095	igb_set_vmolr(adapter, reg_idx & 0x7, true);
3096
3097	rxdctl |= IGB_RX_PTHRESH;
3098	rxdctl |= IGB_RX_HTHRESH << 8;
3099	rxdctl |= IGB_RX_WTHRESH << 16;
3100
3101	/* enable receive descriptor fetching */
3102	rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
3103	wr32(E1000_RXDCTL(reg_idx), rxdctl);
3104}
3105
3106/**
3107 * igb_configure_rx - Configure receive Unit after Reset
3108 * @adapter: board private structure
3109 *
3110 * Configure the Rx unit of the MAC after a reset.
3111 **/
3112static void igb_configure_rx(struct igb_adapter *adapter)
3113{
3114	int i;
3115
3116	/* set UTA to appropriate mode */
3117	igb_set_uta(adapter);
3118
3119	/* set the correct pool for the PF default MAC address in entry 0 */
3120	igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
3121	                 adapter->vfs_allocated_count);
3122
3123	/* Setup the HW Rx Head and Tail Descriptor Pointers and
3124	 * the Base and Length of the Rx Descriptor Ring */
3125	for (i = 0; i < adapter->num_rx_queues; i++)
3126		igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
3127}
3128
3129/**
3130 * igb_free_tx_resources - Free Tx Resources per Queue
3131 * @tx_ring: Tx descriptor ring for a specific queue
3132 *
3133 * Free all transmit software resources
3134 **/
3135void igb_free_tx_resources(struct igb_ring *tx_ring)
3136{
3137	igb_clean_tx_ring(tx_ring);
3138
3139	vfree(tx_ring->tx_buffer_info);
3140	tx_ring->tx_buffer_info = NULL;
3141
3142	/* if not set, then don't free */
3143	if (!tx_ring->desc)
3144		return;
3145
3146	dma_free_coherent(tx_ring->dev, tx_ring->size,
3147			  tx_ring->desc, tx_ring->dma);
3148
3149	tx_ring->desc = NULL;
3150}
3151
3152/**
3153 * igb_free_all_tx_resources - Free Tx Resources for All Queues
3154 * @adapter: board private structure
3155 *
3156 * Free all transmit software resources
3157 **/
3158static void igb_free_all_tx_resources(struct igb_adapter *adapter)
3159{
3160	int i;
3161
3162	for (i = 0; i < adapter->num_tx_queues; i++)
3163		igb_free_tx_resources(adapter->tx_ring[i]);
3164}
3165
3166void igb_unmap_and_free_tx_resource(struct igb_ring *ring,
3167				    struct igb_tx_buffer *tx_buffer)
3168{
3169	if (tx_buffer->skb) {
3170		dev_kfree_skb_any(tx_buffer->skb);
3171		if (tx_buffer->dma)
3172			dma_unmap_single(ring->dev,
3173					 tx_buffer->dma,
3174					 tx_buffer->length,
3175					 DMA_TO_DEVICE);
3176	} else if (tx_buffer->dma) {
3177		dma_unmap_page(ring->dev,
3178			       tx_buffer->dma,
3179			       tx_buffer->length,
3180			       DMA_TO_DEVICE);
3181	}
3182	tx_buffer->next_to_watch = NULL;
3183	tx_buffer->skb = NULL;
3184	tx_buffer->dma = 0;
3185	/* buffer_info must be completely set up in the transmit path */
3186}
3187
3188/**
3189 * igb_clean_tx_ring - Free Tx Buffers
3190 * @tx_ring: ring to be cleaned
3191 **/
3192static void igb_clean_tx_ring(struct igb_ring *tx_ring)
3193{
3194	struct igb_tx_buffer *buffer_info;
3195	unsigned long size;
3196	u16 i;
3197
3198	if (!tx_ring->tx_buffer_info)
3199		return;
3200	/* Free all the Tx ring sk_buffs */
3201
3202	for (i = 0; i < tx_ring->count; i++) {
3203		buffer_info = &tx_ring->tx_buffer_info[i];
3204		igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3205	}
3206
3207	size = sizeof(struct igb_tx_buffer) * tx_ring->count;
3208	memset(tx_ring->tx_buffer_info, 0, size);
3209
3210	/* Zero out the descriptor ring */
3211	memset(tx_ring->desc, 0, tx_ring->size);
3212
3213	tx_ring->next_to_use = 0;
3214	tx_ring->next_to_clean = 0;
3215}
3216
3217/**
3218 * igb_clean_all_tx_rings - Free Tx Buffers for all queues
3219 * @adapter: board private structure
3220 **/
3221static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
3222{
3223	int i;
3224
3225	for (i = 0; i < adapter->num_tx_queues; i++)
3226		igb_clean_tx_ring(adapter->tx_ring[i]);
3227}
3228
3229/**
3230 * igb_free_rx_resources - Free Rx Resources
3231 * @rx_ring: ring to clean the resources from
3232 *
3233 * Free all receive software resources
3234 **/
3235void igb_free_rx_resources(struct igb_ring *rx_ring)
3236{
3237	igb_clean_rx_ring(rx_ring);
3238
3239	vfree(rx_ring->rx_buffer_info);
3240	rx_ring->rx_buffer_info = NULL;
3241
3242	/* if not set, then don't free */
3243	if (!rx_ring->desc)
3244		return;
3245
3246	dma_free_coherent(rx_ring->dev, rx_ring->size,
3247			  rx_ring->desc, rx_ring->dma);
3248
3249	rx_ring->desc = NULL;
3250}
3251
3252/**
3253 * igb_free_all_rx_resources - Free Rx Resources for All Queues
3254 * @adapter: board private structure
3255 *
3256 * Free all receive software resources
3257 **/
3258static void igb_free_all_rx_resources(struct igb_adapter *adapter)
3259{
3260	int i;
3261
3262	for (i = 0; i < adapter->num_rx_queues; i++)
3263		igb_free_rx_resources(adapter->rx_ring[i]);
3264}
3265
3266/**
3267 * igb_clean_rx_ring - Free Rx Buffers per Queue
3268 * @rx_ring: ring to free buffers from
3269 **/
3270static void igb_clean_rx_ring(struct igb_ring *rx_ring)
3271{
3272	unsigned long size;
3273	u16 i;
3274
3275	if (!rx_ring->rx_buffer_info)
3276		return;
3277
3278	/* Free all the Rx ring sk_buffs */
3279	for (i = 0; i < rx_ring->count; i++) {
3280		struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
3281		if (buffer_info->dma) {
3282			dma_unmap_single(rx_ring->dev,
3283			                 buffer_info->dma,
3284					 IGB_RX_HDR_LEN,
3285					 DMA_FROM_DEVICE);
3286			buffer_info->dma = 0;
3287		}
3288
3289		if (buffer_info->skb) {
3290			dev_kfree_skb(buffer_info->skb);
3291			buffer_info->skb = NULL;
3292		}
3293		if (buffer_info->page_dma) {
3294			dma_unmap_page(rx_ring->dev,
3295			               buffer_info->page_dma,
3296				       PAGE_SIZE / 2,
3297				       DMA_FROM_DEVICE);
3298			buffer_info->page_dma = 0;
3299		}
3300		if (buffer_info->page) {
3301			put_page(buffer_info->page);
3302			buffer_info->page = NULL;
3303			buffer_info->page_offset = 0;
3304		}
3305	}
3306
3307	size = sizeof(struct igb_rx_buffer) * rx_ring->count;
3308	memset(rx_ring->rx_buffer_info, 0, size);
3309
3310	/* Zero out the descriptor ring */
3311	memset(rx_ring->desc, 0, rx_ring->size);
3312
3313	rx_ring->next_to_clean = 0;
3314	rx_ring->next_to_use = 0;
3315}
3316
3317/**
3318 * igb_clean_all_rx_rings - Free Rx Buffers for all queues
3319 * @adapter: board private structure
3320 **/
3321static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
3322{
3323	int i;
3324
3325	for (i = 0; i < adapter->num_rx_queues; i++)
3326		igb_clean_rx_ring(adapter->rx_ring[i]);
3327}
3328
3329/**
3330 * igb_set_mac - Change the Ethernet Address of the NIC
3331 * @netdev: network interface device structure
3332 * @p: pointer to an address structure
3333 *
3334 * Returns 0 on success, negative on failure
3335 **/
3336static int igb_set_mac(struct net_device *netdev, void *p)
3337{
3338	struct igb_adapter *adapter = netdev_priv(netdev);
3339	struct e1000_hw *hw = &adapter->hw;
3340	struct sockaddr *addr = p;
3341
3342	if (!is_valid_ether_addr(addr->sa_data))
3343		return -EADDRNOTAVAIL;
3344
3345	memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
3346	memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
3347
3348	/* set the correct pool for the new PF MAC address in entry 0 */
3349	igb_rar_set_qsel(adapter, hw->mac.addr, 0,
3350	                 adapter->vfs_allocated_count);
3351
3352	return 0;
3353}
3354
3355/**
3356 * igb_write_mc_addr_list - write multicast addresses to MTA
3357 * @netdev: network interface device structure
3358 *
3359 * Writes multicast address list to the MTA hash table.
3360 * Returns: -ENOMEM on failure
3361 *                0 on no addresses written
3362 *                X on writing X addresses to MTA
3363 **/
3364static int igb_write_mc_addr_list(struct net_device *netdev)
3365{
3366	struct igb_adapter *adapter = netdev_priv(netdev);
3367	struct e1000_hw *hw = &adapter->hw;
3368	struct netdev_hw_addr *ha;
3369	u8  *mta_list;
3370	int i;
3371
3372	if (netdev_mc_empty(netdev)) {
3373		/* nothing to program, so clear mc list */
3374		igb_update_mc_addr_list(hw, NULL, 0);
3375		igb_restore_vf_multicasts(adapter);
3376		return 0;
3377	}
3378
3379	mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
3380	if (!mta_list)
3381		return -ENOMEM;
3382
3383	/* The shared function expects a packed array of only addresses. */
3384	i = 0;
3385	netdev_for_each_mc_addr(ha, netdev)
3386		memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
3387
3388	igb_update_mc_addr_list(hw, mta_list, i);
3389	kfree(mta_list);
3390
3391	return netdev_mc_count(netdev);
3392}
3393
3394/**
3395 * igb_write_uc_addr_list - write unicast addresses to RAR table
3396 * @netdev: network interface device structure
3397 *
3398 * Writes unicast address list to the RAR table.
3399 * Returns: -ENOMEM on failure/insufficient address space
3400 *                0 on no addresses written
3401 *                X on writing X addresses to the RAR table
3402 **/
3403static int igb_write_uc_addr_list(struct net_device *netdev)
3404{
3405	struct igb_adapter *adapter = netdev_priv(netdev);
3406	struct e1000_hw *hw = &adapter->hw;
3407	unsigned int vfn = adapter->vfs_allocated_count;
3408	unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
3409	int count = 0;
3410
3411	/* return ENOMEM indicating insufficient memory for addresses */
3412	if (netdev_uc_count(netdev) > rar_entries)
3413		return -ENOMEM;
3414
3415	if (!netdev_uc_empty(netdev) && rar_entries) {
3416		struct netdev_hw_addr *ha;
3417
3418		netdev_for_each_uc_addr(ha, netdev) {
3419			if (!rar_entries)
3420				break;
3421			igb_rar_set_qsel(adapter, ha->addr,
3422			                 rar_entries--,
3423			                 vfn);
3424			count++;
3425		}
3426	}
3427	/* write the addresses in reverse order to avoid write combining */
3428	for (; rar_entries > 0 ; rar_entries--) {
3429		wr32(E1000_RAH(rar_entries), 0);
3430		wr32(E1000_RAL(rar_entries), 0);
3431	}
3432	wrfl();
3433
3434	return count;
3435}
3436
3437/**
3438 * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
3439 * @netdev: network interface device structure
3440 *
3441 * The set_rx_mode entry point is called whenever the unicast or multicast
3442 * address lists or the network interface flags are updated.  This routine is
3443 * responsible for configuring the hardware for proper unicast, multicast,
3444 * promiscuous mode, and all-multi behavior.
3445 **/
3446static void igb_set_rx_mode(struct net_device *netdev)
3447{
3448	struct igb_adapter *adapter = netdev_priv(netdev);
3449	struct e1000_hw *hw = &adapter->hw;
3450	unsigned int vfn = adapter->vfs_allocated_count;
3451	u32 rctl, vmolr = 0;
3452	int count;
3453
3454	/* Check for Promiscuous and All Multicast modes */
3455	rctl = rd32(E1000_RCTL);
3456
3457	/* clear the effected bits */
3458	rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
3459
3460	if (netdev->flags & IFF_PROMISC) {
3461		rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
3462		vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
3463	} else {
3464		if (netdev->flags & IFF_ALLMULTI) {
3465			rctl |= E1000_RCTL_MPE;
3466			vmolr |= E1000_VMOLR_MPME;
3467		} else {
3468			/*
3469			 * Write addresses to the MTA, if the attempt fails
3470			 * then we should just turn on promiscuous mode so
3471			 * that we can at least receive multicast traffic
3472			 */
3473			count = igb_write_mc_addr_list(netdev);
3474			if (count < 0) {
3475				rctl |= E1000_RCTL_MPE;
3476				vmolr |= E1000_VMOLR_MPME;
3477			} else if (count) {
3478				vmolr |= E1000_VMOLR_ROMPE;
3479			}
3480		}
3481		/*
3482		 * Write addresses to available RAR registers, if there is not
3483		 * sufficient space to store all the addresses then enable
3484		 * unicast promiscuous mode
3485		 */
3486		count = igb_write_uc_addr_list(netdev);
3487		if (count < 0) {
3488			rctl |= E1000_RCTL_UPE;
3489			vmolr |= E1000_VMOLR_ROPE;
3490		}
3491		rctl |= E1000_RCTL_VFE;
3492	}
3493	wr32(E1000_RCTL, rctl);
3494
3495	/*
3496	 * In order to support SR-IOV and eventually VMDq it is necessary to set
3497	 * the VMOLR to enable the appropriate modes.  Without this workaround
3498	 * we will have issues with VLAN tag stripping not being done for frames
3499	 * that are only arriving because we are the default pool
3500	 */
3501	if (hw->mac.type < e1000_82576)
3502		return;
3503
3504	vmolr |= rd32(E1000_VMOLR(vfn)) &
3505	         ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
3506	wr32(E1000_VMOLR(vfn), vmolr);
3507	igb_restore_vf_multicasts(adapter);
3508}
3509
3510static void igb_check_wvbr(struct igb_adapter *adapter)
3511{
3512	struct e1000_hw *hw = &adapter->hw;
3513	u32 wvbr = 0;
3514
3515	switch (hw->mac.type) {
3516	case e1000_82576:
3517	case e1000_i350:
3518		if (!(wvbr = rd32(E1000_WVBR)))
3519			return;
3520		break;
3521	default:
3522		break;
3523	}
3524
3525	adapter->wvbr |= wvbr;
3526}
3527
3528#define IGB_STAGGERED_QUEUE_OFFSET 8
3529
3530static void igb_spoof_check(struct igb_adapter *adapter)
3531{
3532	int j;
3533
3534	if (!adapter->wvbr)
3535		return;
3536
3537	for(j = 0; j < adapter->vfs_allocated_count; j++) {
3538		if (adapter->wvbr & (1 << j) ||
3539		    adapter->wvbr & (1 << (j + IGB_STAGGERED_QUEUE_OFFSET))) {
3540			dev_warn(&adapter->pdev->dev,
3541				"Spoof event(s) detected on VF %d\n", j);
3542			adapter->wvbr &=
3543				~((1 << j) |
3544				  (1 << (j + IGB_STAGGERED_QUEUE_OFFSET)));
3545		}
3546	}
3547}
3548
3549/* Need to wait a few seconds after link up to get diagnostic information from
3550 * the phy */
3551static void igb_update_phy_info(unsigned long data)
3552{
3553	struct igb_adapter *adapter = (struct igb_adapter *) data;
3554	igb_get_phy_info(&adapter->hw);
3555}
3556
3557/**
3558 * igb_has_link - check shared code for link and determine up/down
3559 * @adapter: pointer to driver private info
3560 **/
3561bool igb_has_link(struct igb_adapter *adapter)
3562{
3563	struct e1000_hw *hw = &adapter->hw;
3564	bool link_active = false;
3565	s32 ret_val = 0;
3566
3567	/* get_link_status is set on LSC (link status) interrupt or
3568	 * rx sequence error interrupt.  get_link_status will stay
3569	 * false until the e1000_check_for_link establishes link
3570	 * for copper adapters ONLY
3571	 */
3572	switch (hw->phy.media_type) {
3573	case e1000_media_type_copper:
3574		if (hw->mac.get_link_status) {
3575			ret_val = hw->mac.ops.check_for_link(hw);
3576			link_active = !hw->mac.get_link_status;
3577		} else {
3578			link_active = true;
3579		}
3580		break;
3581	case e1000_media_type_internal_serdes:
3582		ret_val = hw->mac.ops.check_for_link(hw);
3583		link_active = hw->mac.serdes_has_link;
3584		break;
3585	default:
3586	case e1000_media_type_unknown:
3587		break;
3588	}
3589
3590	return link_active;
3591}
3592
3593static bool igb_thermal_sensor_event(struct e1000_hw *hw, u32 event)
3594{
3595	bool ret = false;
3596	u32 ctrl_ext, thstat;
3597
3598	/* check for thermal sensor event on i350, copper only */
3599	if (hw->mac.type == e1000_i350) {
3600		thstat = rd32(E1000_THSTAT);
3601		ctrl_ext = rd32(E1000_CTRL_EXT);
3602
3603		if ((hw->phy.media_type == e1000_media_type_copper) &&
3604		    !(ctrl_ext & E1000_CTRL_EXT_LINK_MODE_SGMII)) {
3605			ret = !!(thstat & event);
3606		}
3607	}
3608
3609	return ret;
3610}
3611
3612/**
3613 * igb_watchdog - Timer Call-back
3614 * @data: pointer to adapter cast into an unsigned long
3615 **/
3616static void igb_watchdog(unsigned long data)
3617{
3618	struct igb_adapter *adapter = (struct igb_adapter *)data;
3619	/* Do the rest outside of interrupt context */
3620	schedule_work(&adapter->watchdog_task);
3621}
3622
3623static void igb_watchdog_task(struct work_struct *work)
3624{
3625	struct igb_adapter *adapter = container_of(work,
3626	                                           struct igb_adapter,
3627                                                   watchdog_task);
3628	struct e1000_hw *hw = &adapter->hw;
3629	struct net_device *netdev = adapter->netdev;
3630	u32 link;
3631	int i;
3632
3633	link = igb_has_link(adapter);
3634	if (link) {
3635		if (!netif_carrier_ok(netdev)) {
3636			u32 ctrl;
3637			hw->mac.ops.get_speed_and_duplex(hw,
3638			                                 &adapter->link_speed,
3639			                                 &adapter->link_duplex);
3640
3641			ctrl = rd32(E1000_CTRL);
3642			/* Links status message must follow this format */
3643			printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s, "
3644				 "Flow Control: %s\n",
3645			       netdev->name,
3646			       adapter->link_speed,
3647			       adapter->link_duplex == FULL_DUPLEX ?
3648				 "Full Duplex" : "Half Duplex",
3649			       ((ctrl & E1000_CTRL_TFCE) &&
3650			        (ctrl & E1000_CTRL_RFCE)) ? "RX/TX" :
3651			       ((ctrl & E1000_CTRL_RFCE) ?  "RX" :
3652			       ((ctrl & E1000_CTRL_TFCE) ?  "TX" : "None")));
3653
3654			/* check for thermal sensor event */
3655			if (igb_thermal_sensor_event(hw, E1000_THSTAT_LINK_THROTTLE)) {
3656				printk(KERN_INFO "igb: %s The network adapter "
3657						 "link speed was downshifted "
3658						 "because it overheated.\n",
3659						 netdev->name);
3660			}
3661
3662			/* adjust timeout factor according to speed/duplex */
3663			adapter->tx_timeout_factor = 1;
3664			switch (adapter->link_speed) {
3665			case SPEED_10:
3666				adapter->tx_timeout_factor = 14;
3667				break;
3668			case SPEED_100:
3669				/* maybe add some timeout factor ? */
3670				break;
3671			}
3672
3673			netif_carrier_on(netdev);
3674
3675			igb_ping_all_vfs(adapter);
3676			igb_check_vf_rate_limit(adapter);
3677
3678			/* link state has changed, schedule phy info update */
3679			if (!test_bit(__IGB_DOWN, &adapter->state))
3680				mod_timer(&adapter->phy_info_timer,
3681					  round_jiffies(jiffies + 2 * HZ));
3682		}
3683	} else {
3684		if (netif_carrier_ok(netdev)) {
3685			adapter->link_speed = 0;
3686			adapter->link_duplex = 0;
3687
3688			/* check for thermal sensor event */
3689			if (igb_thermal_sensor_event(hw, E1000_THSTAT_PWR_DOWN)) {
3690				printk(KERN_ERR "igb: %s The network adapter "
3691						"was stopped because it "
3692						"overheated.\n",
3693						netdev->name);
3694			}
3695
3696			/* Links status message must follow this format */
3697			printk(KERN_INFO "igb: %s NIC Link is Down\n",
3698			       netdev->name);
3699			netif_carrier_off(netdev);
3700
3701			igb_ping_all_vfs(adapter);
3702
3703			/* link state has changed, schedule phy info update */
3704			if (!test_bit(__IGB_DOWN, &adapter->state))
3705				mod_timer(&adapter->phy_info_timer,
3706					  round_jiffies(jiffies + 2 * HZ));
3707		}
3708	}
3709
3710	spin_lock(&adapter->stats64_lock);
3711	igb_update_stats(adapter, &adapter->stats64);
3712	spin_unlock(&adapter->stats64_lock);
3713
3714	for (i = 0; i < adapter->num_tx_queues; i++) {
3715		struct igb_ring *tx_ring = adapter->tx_ring[i];
3716		if (!netif_carrier_ok(netdev)) {
3717			/* We've lost link, so the controller stops DMA,
3718			 * but we've got queued Tx work that's never going
3719			 * to get done, so reset controller to flush Tx.
3720			 * (Do the reset outside of interrupt context). */
3721			if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3722				adapter->tx_timeout_count++;
3723				schedule_work(&adapter->reset_task);
3724				/* return immediately since reset is imminent */
3725				return;
3726			}
3727		}
3728
3729		/* Force detection of hung controller every watchdog period */
3730		set_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
3731	}
3732
3733	/* Cause software interrupt to ensure rx ring is cleaned */
3734	if (adapter->msix_entries) {
3735		u32 eics = 0;
3736		for (i = 0; i < adapter->num_q_vectors; i++)
3737			eics |= adapter->q_vector[i]->eims_value;
3738		wr32(E1000_EICS, eics);
3739	} else {
3740		wr32(E1000_ICS, E1000_ICS_RXDMT0);
3741	}
3742
3743	igb_spoof_check(adapter);
3744
3745	/* Reset the timer */
3746	if (!test_bit(__IGB_DOWN, &adapter->state))
3747		mod_timer(&adapter->watchdog_timer,
3748			  round_jiffies(jiffies + 2 * HZ));
3749}
3750
3751enum latency_range {
3752	lowest_latency = 0,
3753	low_latency = 1,
3754	bulk_latency = 2,
3755	latency_invalid = 255
3756};
3757
3758/**
3759 * igb_update_ring_itr - update the dynamic ITR value based on packet size
3760 *
3761 *      Stores a new ITR value based on strictly on packet size.  This
3762 *      algorithm is less sophisticated than that used in igb_update_itr,
3763 *      due to the difficulty of synchronizing statistics across multiple
3764 *      receive rings.  The divisors and thresholds used by this function
3765 *      were determined based on theoretical maximum wire speed and testing
3766 *      data, in order to minimize response time while increasing bulk
3767 *      throughput.
3768 *      This functionality is controlled by the InterruptThrottleRate module
3769 *      parameter (see igb_param.c)
3770 *      NOTE:  This function is called only when operating in a multiqueue
3771 *             receive environment.
3772 * @q_vector: pointer to q_vector
3773 **/
3774static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3775{
3776	int new_val = q_vector->itr_val;
3777	int avg_wire_size = 0;
3778	struct igb_adapter *adapter = q_vector->adapter;
3779	unsigned int packets;
3780
3781	/* For non-gigabit speeds, just fix the interrupt rate at 4000
3782	 * ints/sec - ITR timer value of 120 ticks.
3783	 */
3784	if (adapter->link_speed != SPEED_1000) {
3785		new_val = IGB_4K_ITR;
3786		goto set_itr_val;
3787	}
3788
3789	packets = q_vector->rx.total_packets;
3790	if (packets)
3791		avg_wire_size = q_vector->rx.total_bytes / packets;
3792
3793	packets = q_vector->tx.total_packets;
3794	if (packets)
3795		avg_wire_size = max_t(u32, avg_wire_size,
3796				      q_vector->tx.total_bytes / packets);
3797
3798	/* if avg_wire_size isn't set no work was done */
3799	if (!avg_wire_size)
3800		goto clear_counts;
3801
3802	/* Add 24 bytes to size to account for CRC, preamble, and gap */
3803	avg_wire_size += 24;
3804
3805	/* Don't starve jumbo frames */
3806	avg_wire_size = min(avg_wire_size, 3000);
3807
3808	/* Give a little boost to mid-size frames */
3809	if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3810		new_val = avg_wire_size / 3;
3811	else
3812		new_val = avg_wire_size / 2;
3813
3814	/* conservative mode (itr 3) eliminates the lowest_latency setting */
3815	if (new_val < IGB_20K_ITR &&
3816	    ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
3817	     (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
3818		new_val = IGB_20K_ITR;
3819
3820set_itr_val:
3821	if (new_val != q_vector->itr_val) {
3822		q_vector->itr_val = new_val;
3823		q_vector->set_itr = 1;
3824	}
3825clear_counts:
3826	q_vector->rx.total_bytes = 0;
3827	q_vector->rx.total_packets = 0;
3828	q_vector->tx.total_bytes = 0;
3829	q_vector->tx.total_packets = 0;
3830}
3831
3832/**
3833 * igb_update_itr - update the dynamic ITR value based on statistics
3834 *      Stores a new ITR value based on packets and byte
3835 *      counts during the last interrupt.  The advantage of per interrupt
3836 *      computation is faster updates and more accurate ITR for the current
3837 *      traffic pattern.  Constants in this function were computed
3838 *      based on theoretical maximum wire speed and thresholds were set based
3839 *      on testing data as well as attempting to minimize response time
3840 *      while increasing bulk throughput.
3841 *      this functionality is controlled by the InterruptThrottleRate module
3842 *      parameter (see igb_param.c)
3843 *      NOTE:  These calculations are only valid when operating in a single-
3844 *             queue environment.
3845 * @q_vector: pointer to q_vector
3846 * @ring_container: ring info to update the itr for
3847 **/
3848static void igb_update_itr(struct igb_q_vector *q_vector,
3849			   struct igb_ring_container *ring_container)
3850{
3851	unsigned int packets = ring_container->total_packets;
3852	unsigned int bytes = ring_container->total_bytes;
3853	u8 itrval = ring_container->itr;
3854
3855	/* no packets, exit with status unchanged */
3856	if (packets == 0)
3857		return;
3858
3859	switch (itrval) {
3860	case lowest_latency:
3861		/* handle TSO and jumbo frames */
3862		if (bytes/packets > 8000)
3863			itrval = bulk_latency;
3864		else if ((packets < 5) && (bytes > 512))
3865			itrval = low_latency;
3866		break;
3867	case low_latency:  /* 50 usec aka 20000 ints/s */
3868		if (bytes > 10000) {
3869			/* this if handles the TSO accounting */
3870			if (bytes/packets > 8000) {
3871				itrval = bulk_latency;
3872			} else if ((packets < 10) || ((bytes/packets) > 1200)) {
3873				itrval = bulk_latency;
3874			} else if ((packets > 35)) {
3875				itrval = lowest_latency;
3876			}
3877		} else if (bytes/packets > 2000) {
3878			itrval = bulk_latency;
3879		} else if (packets <= 2 && bytes < 512) {
3880			itrval = lowest_latency;
3881		}
3882		break;
3883	case bulk_latency: /* 250 usec aka 4000 ints/s */
3884		if (bytes > 25000) {
3885			if (packets > 35)
3886				itrval = low_latency;
3887		} else if (bytes < 1500) {
3888			itrval = low_latency;
3889		}
3890		break;
3891	}
3892
3893	/* clear work counters since we have the values we need */
3894	ring_container->total_bytes = 0;
3895	ring_container->total_packets = 0;
3896
3897	/* write updated itr to ring container */
3898	ring_container->itr = itrval;
3899}
3900
3901static void igb_set_itr(struct igb_q_vector *q_vector)
3902{
3903	struct igb_adapter *adapter = q_vector->adapter;
3904	u32 new_itr = q_vector->itr_val;
3905	u8 current_itr = 0;
3906
3907	/* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3908	if (adapter->link_speed != SPEED_1000) {
3909		current_itr = 0;
3910		new_itr = IGB_4K_ITR;
3911		goto set_itr_now;
3912	}
3913
3914	igb_update_itr(q_vector, &q_vector->tx);
3915	igb_update_itr(q_vector, &q_vector->rx);
3916
3917	current_itr = max(q_vector->rx.itr, q_vector->tx.itr);
3918
3919	/* conservative mode (itr 3) eliminates the lowest_latency setting */
3920	if (current_itr == lowest_latency &&
3921	    ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
3922	     (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
3923		current_itr = low_latency;
3924
3925	switch (current_itr) {
3926	/* counts and packets in update_itr are dependent on these numbers */
3927	case lowest_latency:
3928		new_itr = IGB_70K_ITR; /* 70,000 ints/sec */
3929		break;
3930	case low_latency:
3931		new_itr = IGB_20K_ITR; /* 20,000 ints/sec */
3932		break;
3933	case bulk_latency:
3934		new_itr = IGB_4K_ITR;  /* 4,000 ints/sec */
3935		break;
3936	default:
3937		break;
3938	}
3939
3940set_itr_now:
3941	if (new_itr != q_vector->itr_val) {
3942		/* this attempts to bias the interrupt rate towards Bulk
3943		 * by adding intermediate steps when interrupt rate is
3944		 * increasing */
3945		new_itr = new_itr > q_vector->itr_val ?
3946		             max((new_itr * q_vector->itr_val) /
3947		                 (new_itr + (q_vector->itr_val >> 2)),
3948				 new_itr) :
3949			     new_itr;
3950		/* Don't write the value here; it resets the adapter's
3951		 * internal timer, and causes us to delay far longer than
3952		 * we should between interrupts.  Instead, we write the ITR
3953		 * value at the beginning of the next interrupt so the timing
3954		 * ends up being correct.
3955		 */
3956		q_vector->itr_val = new_itr;
3957		q_vector->set_itr = 1;
3958	}
3959}
3960
3961void igb_tx_ctxtdesc(struct igb_ring *tx_ring, u32 vlan_macip_lens,
3962		     u32 type_tucmd, u32 mss_l4len_idx)
3963{
3964	struct e1000_adv_tx_context_desc *context_desc;
3965	u16 i = tx_ring->next_to_use;
3966
3967	context_desc = IGB_TX_CTXTDESC(tx_ring, i);
3968
3969	i++;
3970	tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
3971
3972	/* set bits to identify this as an advanced context descriptor */
3973	type_tucmd |= E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3974
3975	/* For 82575, context index must be unique per ring. */
3976	if (test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
3977		mss_l4len_idx |= tx_ring->reg_idx << 4;
3978
3979	context_desc->vlan_macip_lens	= cpu_to_le32(vlan_macip_lens);
3980	context_desc->seqnum_seed	= 0;
3981	context_desc->type_tucmd_mlhl	= cpu_to_le32(type_tucmd);
3982	context_desc->mss_l4len_idx	= cpu_to_le32(mss_l4len_idx);
3983}
3984
3985static int igb_tso(struct igb_ring *tx_ring,
3986		   struct igb_tx_buffer *first,
3987		   u8 *hdr_len)
3988{
3989	struct sk_buff *skb = first->skb;
3990	u32 vlan_macip_lens, type_tucmd;
3991	u32 mss_l4len_idx, l4len;
3992
3993	if (!skb_is_gso(skb))
3994		return 0;
3995
3996	if (skb_header_cloned(skb)) {
3997		int err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
3998		if (err)
3999			return err;
4000	}
4001
4002	/* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
4003	type_tucmd = E1000_ADVTXD_TUCMD_L4T_TCP;
4004
4005	if (first->protocol == __constant_htons(ETH_P_IP)) {
4006		struct iphdr *iph = ip_hdr(skb);
4007		iph->tot_len = 0;
4008		iph->check = 0;
4009		tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
4010							 iph->daddr, 0,
4011							 IPPROTO_TCP,
4012							 0);
4013		type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
4014		first->tx_flags |= IGB_TX_FLAGS_TSO |
4015				   IGB_TX_FLAGS_CSUM |
4016				   IGB_TX_FLAGS_IPV4;
4017	} else if (skb_is_gso_v6(skb)) {
4018		ipv6_hdr(skb)->payload_len = 0;
4019		tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
4020						       &ipv6_hdr(skb)->daddr,
4021						       0, IPPROTO_TCP, 0);
4022		first->tx_flags |= IGB_TX_FLAGS_TSO |
4023				   IGB_TX_FLAGS_CSUM;
4024	}
4025
4026	/* compute header lengths */
4027	l4len = tcp_hdrlen(skb);
4028	*hdr_len = skb_transport_offset(skb) + l4len;
4029
4030	/* update gso size and bytecount with header size */
4031	first->gso_segs = skb_shinfo(skb)->gso_segs;
4032	first->bytecount += (first->gso_segs - 1) * *hdr_len;
4033
4034	/* MSS L4LEN IDX */
4035	mss_l4len_idx = l4len << E1000_ADVTXD_L4LEN_SHIFT;
4036	mss_l4len_idx |= skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT;
4037
4038	/* VLAN MACLEN IPLEN */
4039	vlan_macip_lens = skb_network_header_len(skb);
4040	vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
4041	vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
4042
4043	igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
4044
4045	return 1;
4046}
4047
4048static void igb_tx_csum(struct igb_ring *tx_ring, struct igb_tx_buffer *first)
4049{
4050	struct sk_buff *skb = first->skb;
4051	u32 vlan_macip_lens = 0;
4052	u32 mss_l4len_idx = 0;
4053	u32 type_tucmd = 0;
4054
4055	if (skb->ip_summed != CHECKSUM_PARTIAL) {
4056		if (!(first->tx_flags & IGB_TX_FLAGS_VLAN))
4057			return;
4058	} else {
4059		u8 l4_hdr = 0;
4060		switch (first->protocol) {
4061		case __constant_htons(ETH_P_IP):
4062			vlan_macip_lens |= skb_network_header_len(skb);
4063			type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
4064			l4_hdr = ip_hdr(skb)->protocol;
4065			break;
4066		case __constant_htons(ETH_P_IPV6):
4067			vlan_macip_lens |= skb_network_header_len(skb);
4068			l4_hdr = ipv6_hdr(skb)->nexthdr;
4069			break;
4070		default:
4071			if (unlikely(net_ratelimit())) {
4072				dev_warn(tx_ring->dev,
4073				 "partial checksum but proto=%x!\n",
4074				 first->protocol);
4075			}
4076			break;
4077		}
4078
4079		switch (l4_hdr) {
4080		case IPPROTO_TCP:
4081			type_tucmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
4082			mss_l4len_idx = tcp_hdrlen(skb) <<
4083					E1000_ADVTXD_L4LEN_SHIFT;
4084			break;
4085		case IPPROTO_SCTP:
4086			type_tucmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
4087			mss_l4len_idx = sizeof(struct sctphdr) <<
4088					E1000_ADVTXD_L4LEN_SHIFT;
4089			break;
4090		case IPPROTO_UDP:
4091			mss_l4len_idx = sizeof(struct udphdr) <<
4092					E1000_ADVTXD_L4LEN_SHIFT;
4093			break;
4094		default:
4095			if (unlikely(net_ratelimit())) {
4096				dev_warn(tx_ring->dev,
4097				 "partial checksum but l4 proto=%x!\n",
4098				 l4_hdr);
4099			}
4100			break;
4101		}
4102
4103		/* update TX checksum flag */
4104		first->tx_flags |= IGB_TX_FLAGS_CSUM;
4105	}
4106
4107	vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
4108	vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
4109
4110	igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
4111}
4112
4113static __le32 igb_tx_cmd_type(u32 tx_flags)
4114{
4115	/* set type for advanced descriptor with frame checksum insertion */
4116	__le32 cmd_type = cpu_to_le32(E1000_ADVTXD_DTYP_DATA |
4117				      E1000_ADVTXD_DCMD_IFCS |
4118				      E1000_ADVTXD_DCMD_DEXT);
4119
4120	/* set HW vlan bit if vlan is present */
4121	if (tx_flags & IGB_TX_FLAGS_VLAN)
4122		cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_VLE);
4123
4124	/* set timestamp bit if present */
4125	if (tx_flags & IGB_TX_FLAGS_TSTAMP)
4126		cmd_type |= cpu_to_le32(E1000_ADVTXD_MAC_TSTAMP);
4127
4128	/* set segmentation bits for TSO */
4129	if (tx_flags & IGB_TX_FLAGS_TSO)
4130		cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_TSE);
4131
4132	return cmd_type;
4133}
4134
4135static void igb_tx_olinfo_status(struct igb_ring *tx_ring,
4136				 union e1000_adv_tx_desc *tx_desc,
4137				 u32 tx_flags, unsigned int paylen)
4138{
4139	u32 olinfo_status = paylen << E1000_ADVTXD_PAYLEN_SHIFT;
4140
4141	/* 82575 requires a unique index per ring if any offload is enabled */
4142	if ((tx_flags & (IGB_TX_FLAGS_CSUM | IGB_TX_FLAGS_VLAN)) &&
4143	    test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
4144		olinfo_status |= tx_ring->reg_idx << 4;
4145
4146	/* insert L4 checksum */
4147	if (tx_flags & IGB_TX_FLAGS_CSUM) {
4148		olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4149
4150		/* insert IPv4 checksum */
4151		if (tx_flags & IGB_TX_FLAGS_IPV4)
4152			olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
4153	}
4154
4155	tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
4156}
4157
4158/*
4159 * The largest size we can write to the descriptor is 65535.  In order to
4160 * maintain a power of two alignment we have to limit ourselves to 32K.
4161 */
4162#define IGB_MAX_TXD_PWR	15
4163#define IGB_MAX_DATA_PER_TXD	(1<<IGB_MAX_TXD_PWR)
4164
4165static void igb_tx_map(struct igb_ring *tx_ring,
4166		       struct igb_tx_buffer *first,
4167		       const u8 hdr_len)
4168{
4169	struct sk_buff *skb = first->skb;
4170	struct igb_tx_buffer *tx_buffer_info;
4171	union e1000_adv_tx_desc *tx_desc;
4172	dma_addr_t dma;
4173	struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0];
4174	unsigned int data_len = skb->data_len;
4175	unsigned int size = skb_headlen(skb);
4176	unsigned int paylen = skb->len - hdr_len;
4177	__le32 cmd_type;
4178	u32 tx_flags = first->tx_flags;
4179	u16 i = tx_ring->next_to_use;
4180
4181	tx_desc = IGB_TX_DESC(tx_ring, i);
4182
4183	igb_tx_olinfo_status(tx_ring, tx_desc, tx_flags, paylen);
4184	cmd_type = igb_tx_cmd_type(tx_flags);
4185
4186	dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
4187	if (dma_mapping_error(tx_ring->dev, dma))
4188		goto dma_error;
4189
4190	/* record length, and DMA address */
4191	first->length = size;
4192	first->dma = dma;
4193	tx_desc->read.buffer_addr = cpu_to_le64(dma);
4194
4195	for (;;) {
4196		while (unlikely(size > IGB_MAX_DATA_PER_TXD)) {
4197			tx_desc->read.cmd_type_len =
4198				cmd_type | cpu_to_le32(IGB_MAX_DATA_PER_TXD);
4199
4200			i++;
4201			tx_desc++;
4202			if (i == tx_ring->count) {
4203				tx_desc = IGB_TX_DESC(tx_ring, 0);
4204				i = 0;
4205			}
4206
4207			dma += IGB_MAX_DATA_PER_TXD;
4208			size -= IGB_MAX_DATA_PER_TXD;
4209
4210			tx_desc->read.olinfo_status = 0;
4211			tx_desc->read.buffer_addr = cpu_to_le64(dma);
4212		}
4213
4214		if (likely(!data_len))
4215			break;
4216
4217		tx_desc->read.cmd_type_len = cmd_type | cpu_to_le32(size);
4218
4219		i++;
4220		tx_desc++;
4221		if (i == tx_ring->count) {
4222			tx_desc = IGB_TX_DESC(tx_ring, 0);
4223			i = 0;
4224		}
4225
4226		size = skb_frag_size(frag);
4227		data_len -= size;
4228
4229		dma = skb_frag_dma_map(tx_ring->dev, frag, 0,
4230				   size, DMA_TO_DEVICE);
4231		if (dma_mapping_error(tx_ring->dev, dma))
4232			goto dma_error;
4233
4234		tx_buffer_info = &tx_ring->tx_buffer_info[i];
4235		tx_buffer_info->length = size;
4236		tx_buffer_info->dma = dma;
4237
4238		tx_desc->read.olinfo_status = 0;
4239		tx_desc->read.buffer_addr = cpu_to_le64(dma);
4240
4241		frag++;
4242	}
4243
4244	/* write last descriptor with RS and EOP bits */
4245	cmd_type |= cpu_to_le32(size) | cpu_to_le32(IGB_TXD_DCMD);
4246	tx_desc->read.cmd_type_len = cmd_type;
4247
4248	/* set the timestamp */
4249	first->time_stamp = jiffies;
4250
4251	/*
4252	 * Force memory writes to complete before letting h/w know there
4253	 * are new descriptors to fetch.  (Only applicable for weak-ordered
4254	 * memory model archs, such as IA-64).
4255	 *
4256	 * We also need this memory barrier to make certain all of the
4257	 * status bits have been updated before next_to_watch is written.
4258	 */
4259	wmb();
4260
4261	/* set next_to_watch value indicating a packet is present */
4262	first->next_to_watch = tx_desc;
4263
4264	i++;
4265	if (i == tx_ring->count)
4266		i = 0;
4267
4268	tx_ring->next_to_use = i;
4269
4270	writel(i, tx_ring->tail);
4271
4272	/* we need this if more than one processor can write to our tail
4273	 * at a time, it syncronizes IO on IA64/Altix systems */
4274	mmiowb();
4275
4276	return;
4277
4278dma_error:
4279	dev_err(tx_ring->dev, "TX DMA map failed\n");
4280
4281	/* clear dma mappings for failed tx_buffer_info map */
4282	for (;;) {
4283		tx_buffer_info = &tx_ring->tx_buffer_info[i];
4284		igb_unmap_and_free_tx_resource(tx_ring, tx_buffer_info);
4285		if (tx_buffer_info == first)
4286			break;
4287		if (i == 0)
4288			i = tx_ring->count;
4289		i--;
4290	}
4291
4292	tx_ring->next_to_use = i;
4293}
4294
4295static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
4296{
4297	struct net_device *netdev = tx_ring->netdev;
4298
4299	netif_stop_subqueue(netdev, tx_ring->queue_index);
4300
4301	/* Herbert's original patch had:
4302	 *  smp_mb__after_netif_stop_queue();
4303	 * but since that doesn't exist yet, just open code it. */
4304	smp_mb();
4305
4306	/* We need to check again in a case another CPU has just
4307	 * made room available. */
4308	if (igb_desc_unused(tx_ring) < size)
4309		return -EBUSY;
4310
4311	/* A reprieve! */
4312	netif_wake_subqueue(netdev, tx_ring->queue_index);
4313
4314	u64_stats_update_begin(&tx_ring->tx_syncp2);
4315	tx_ring->tx_stats.restart_queue2++;
4316	u64_stats_update_end(&tx_ring->tx_syncp2);
4317
4318	return 0;
4319}
4320
4321static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
4322{
4323	if (igb_desc_unused(tx_ring) >= size)
4324		return 0;
4325	return __igb_maybe_stop_tx(tx_ring, size);
4326}
4327
4328netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb,
4329				struct igb_ring *tx_ring)
4330{
4331	struct igb_tx_buffer *first;
4332	int tso;
4333	u32 tx_flags = 0;
4334	__be16 protocol = vlan_get_protocol(skb);
4335	u8 hdr_len = 0;
4336
4337	/* need: 1 descriptor per page,
4338	 *       + 2 desc gap to keep tail from touching head,
4339	 *       + 1 desc for skb->data,
4340	 *       + 1 desc for context descriptor,
4341	 * otherwise try next time */
4342	if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
4343		/* this is a hard error */
4344		return NETDEV_TX_BUSY;
4345	}
4346
4347	/* record the location of the first descriptor for this packet */
4348	first = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
4349	first->skb = skb;
4350	first->bytecount = skb->len;
4351	first->gso_segs = 1;
4352
4353	if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
4354		skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
4355		tx_flags |= IGB_TX_FLAGS_TSTAMP;
4356	}
4357
4358	if (vlan_tx_tag_present(skb)) {
4359		tx_flags |= IGB_TX_FLAGS_VLAN;
4360		tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
4361	}
4362
4363	/* record initial flags and protocol */
4364	first->tx_flags = tx_flags;
4365	first->protocol = protocol;
4366
4367	tso = igb_tso(tx_ring, first, &hdr_len);
4368	if (tso < 0)
4369		goto out_drop;
4370	else if (!tso)
4371		igb_tx_csum(tx_ring, first);
4372
4373	igb_tx_map(tx_ring, first, hdr_len);
4374
4375	/* Make sure there is space in the ring for the next send. */
4376	igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
4377
4378	return NETDEV_TX_OK;
4379
4380out_drop:
4381	igb_unmap_and_free_tx_resource(tx_ring, first);
4382
4383	return NETDEV_TX_OK;
4384}
4385
4386static inline struct igb_ring *igb_tx_queue_mapping(struct igb_adapter *adapter,
4387						    struct sk_buff *skb)
4388{
4389	unsigned int r_idx = skb->queue_mapping;
4390
4391	if (r_idx >= adapter->num_tx_queues)
4392		r_idx = r_idx % adapter->num_tx_queues;
4393
4394	return adapter->tx_ring[r_idx];
4395}
4396
4397static netdev_tx_t igb_xmit_frame(struct sk_buff *skb,
4398				  struct net_device *netdev)
4399{
4400	struct igb_adapter *adapter = netdev_priv(netdev);
4401
4402	if (test_bit(__IGB_DOWN, &adapter->state)) {
4403		dev_kfree_skb_any(skb);
4404		return NETDEV_TX_OK;
4405	}
4406
4407	if (skb->len <= 0) {
4408		dev_kfree_skb_any(skb);
4409		return NETDEV_TX_OK;
4410	}
4411
4412	/*
4413	 * The minimum packet size with TCTL.PSP set is 17 so pad the skb
4414	 * in order to meet this minimum size requirement.
4415	 */
4416	if (skb->len < 17) {
4417		if (skb_padto(skb, 17))
4418			return NETDEV_TX_OK;
4419		skb->len = 17;
4420	}
4421
4422	return igb_xmit_frame_ring(skb, igb_tx_queue_mapping(adapter, skb));
4423}
4424
4425/**
4426 * igb_tx_timeout - Respond to a Tx Hang
4427 * @netdev: network interface device structure
4428 **/
4429static void igb_tx_timeout(struct net_device *netdev)
4430{
4431	struct igb_adapter *adapter = netdev_priv(netdev);
4432	struct e1000_hw *hw = &adapter->hw;
4433
4434	/* Do the reset outside of interrupt context */
4435	adapter->tx_timeout_count++;
4436
4437	if (hw->mac.type >= e1000_82580)
4438		hw->dev_spec._82575.global_device_reset = true;
4439
4440	schedule_work(&adapter->reset_task);
4441	wr32(E1000_EICS,
4442	     (adapter->eims_enable_mask & ~adapter->eims_other));
4443}
4444
4445static void igb_reset_task(struct work_struct *work)
4446{
4447	struct igb_adapter *adapter;
4448	adapter = container_of(work, struct igb_adapter, reset_task);
4449
4450	igb_dump(adapter);
4451	netdev_err(adapter->netdev, "Reset adapter\n");
4452	igb_reinit_locked(adapter);
4453}
4454
4455/**
4456 * igb_get_stats64 - Get System Network Statistics
4457 * @netdev: network interface device structure
4458 * @stats: rtnl_link_stats64 pointer
4459 *
4460 **/
4461static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *netdev,
4462						 struct rtnl_link_stats64 *stats)
4463{
4464	struct igb_adapter *adapter = netdev_priv(netdev);
4465
4466	spin_lock(&adapter->stats64_lock);
4467	igb_update_stats(adapter, &adapter->stats64);
4468	memcpy(stats, &adapter->stats64, sizeof(*stats));
4469	spin_unlock(&adapter->stats64_lock);
4470
4471	return stats;
4472}
4473
4474/**
4475 * igb_change_mtu - Change the Maximum Transfer Unit
4476 * @netdev: network interface device structure
4477 * @new_mtu: new value for maximum frame size
4478 *
4479 * Returns 0 on success, negative on failure
4480 **/
4481static int igb_change_mtu(struct net_device *netdev, int new_mtu)
4482{
4483	struct igb_adapter *adapter = netdev_priv(netdev);
4484	struct pci_dev *pdev = adapter->pdev;
4485	int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
4486
4487	if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
4488		dev_err(&pdev->dev, "Invalid MTU setting\n");
4489		return -EINVAL;
4490	}
4491
4492#define MAX_STD_JUMBO_FRAME_SIZE 9238
4493	if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
4494		dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
4495		return -EINVAL;
4496	}
4497
4498	while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
4499		msleep(1);
4500
4501	/* igb_down has a dependency on max_frame_size */
4502	adapter->max_frame_size = max_frame;
4503
4504	if (netif_running(netdev))
4505		igb_down(adapter);
4506
4507	dev_info(&pdev->dev, "changing MTU from %d to %d\n",
4508		 netdev->mtu, new_mtu);
4509	netdev->mtu = new_mtu;
4510
4511	if (netif_running(netdev))
4512		igb_up(adapter);
4513	else
4514		igb_reset(adapter);
4515
4516	clear_bit(__IGB_RESETTING, &adapter->state);
4517
4518	return 0;
4519}
4520
4521/**
4522 * igb_update_stats - Update the board statistics counters
4523 * @adapter: board private structure
4524 **/
4525
4526void igb_update_stats(struct igb_adapter *adapter,
4527		      struct rtnl_link_stats64 *net_stats)
4528{
4529	struct e1000_hw *hw = &adapter->hw;
4530	struct pci_dev *pdev = adapter->pdev;
4531	u32 reg, mpc;
4532	u16 phy_tmp;
4533	int i;
4534	u64 bytes, packets;
4535	unsigned int start;
4536	u64 _bytes, _packets;
4537
4538#define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
4539
4540	/*
4541	 * Prevent stats update while adapter is being reset, or if the pci
4542	 * connection is down.
4543	 */
4544	if (adapter->link_speed == 0)
4545		return;
4546	if (pci_channel_offline(pdev))
4547		return;
4548
4549	bytes = 0;
4550	packets = 0;
4551	for (i = 0; i < adapter->num_rx_queues; i++) {
4552		u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
4553		struct igb_ring *ring = adapter->rx_ring[i];
4554
4555		ring->rx_stats.drops += rqdpc_tmp;
4556		net_stats->rx_fifo_errors += rqdpc_tmp;
4557
4558		do {
4559			start = u64_stats_fetch_begin_bh(&ring->rx_syncp);
4560			_bytes = ring->rx_stats.bytes;
4561			_packets = ring->rx_stats.packets;
4562		} while (u64_stats_fetch_retry_bh(&ring->rx_syncp, start));
4563		bytes += _bytes;
4564		packets += _packets;
4565	}
4566
4567	net_stats->rx_bytes = bytes;
4568	net_stats->rx_packets = packets;
4569
4570	bytes = 0;
4571	packets = 0;
4572	for (i = 0; i < adapter->num_tx_queues; i++) {
4573		struct igb_ring *ring = adapter->tx_ring[i];
4574		do {
4575			start = u64_stats_fetch_begin_bh(&ring->tx_syncp);
4576			_bytes = ring->tx_stats.bytes;
4577			_packets = ring->tx_stats.packets;
4578		} while (u64_stats_fetch_retry_bh(&ring->tx_syncp, start));
4579		bytes += _bytes;
4580		packets += _packets;
4581	}
4582	net_stats->tx_bytes = bytes;
4583	net_stats->tx_packets = packets;
4584
4585	/* read stats registers */
4586	adapter->stats.crcerrs += rd32(E1000_CRCERRS);
4587	adapter->stats.gprc += rd32(E1000_GPRC);
4588	adapter->stats.gorc += rd32(E1000_GORCL);
4589	rd32(E1000_GORCH); /* clear GORCL */
4590	adapter->stats.bprc += rd32(E1000_BPRC);
4591	adapter->stats.mprc += rd32(E1000_MPRC);
4592	adapter->stats.roc += rd32(E1000_ROC);
4593
4594	adapter->stats.prc64 += rd32(E1000_PRC64);
4595	adapter->stats.prc127 += rd32(E1000_PRC127);
4596	adapter->stats.prc255 += rd32(E1000_PRC255);
4597	adapter->stats.prc511 += rd32(E1000_PRC511);
4598	adapter->stats.prc1023 += rd32(E1000_PRC1023);
4599	adapter->stats.prc1522 += rd32(E1000_PRC1522);
4600	adapter->stats.symerrs += rd32(E1000_SYMERRS);
4601	adapter->stats.sec += rd32(E1000_SEC);
4602
4603	mpc = rd32(E1000_MPC);
4604	adapter->stats.mpc += mpc;
4605	net_stats->rx_fifo_errors += mpc;
4606	adapter->stats.scc += rd32(E1000_SCC);
4607	adapter->stats.ecol += rd32(E1000_ECOL);
4608	adapter->stats.mcc += rd32(E1000_MCC);
4609	adapter->stats.latecol += rd32(E1000_LATECOL);
4610	adapter->stats.dc += rd32(E1000_DC);
4611	adapter->stats.rlec += rd32(E1000_RLEC);
4612	adapter->stats.xonrxc += rd32(E1000_XONRXC);
4613	adapter->stats.xontxc += rd32(E1000_XONTXC);
4614	adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
4615	adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
4616	adapter->stats.fcruc += rd32(E1000_FCRUC);
4617	adapter->stats.gptc += rd32(E1000_GPTC);
4618	adapter->stats.gotc += rd32(E1000_GOTCL);
4619	rd32(E1000_GOTCH); /* clear GOTCL */
4620	adapter->stats.rnbc += rd32(E1000_RNBC);
4621	adapter->stats.ruc += rd32(E1000_RUC);
4622	adapter->stats.rfc += rd32(E1000_RFC);
4623	adapter->stats.rjc += rd32(E1000_RJC);
4624	adapter->stats.tor += rd32(E1000_TORH);
4625	adapter->stats.tot += rd32(E1000_TOTH);
4626	adapter->stats.tpr += rd32(E1000_TPR);
4627
4628	adapter->stats.ptc64 += rd32(E1000_PTC64);
4629	adapter->stats.ptc127 += rd32(E1000_PTC127);
4630	adapter->stats.ptc255 += rd32(E1000_PTC255);
4631	adapter->stats.ptc511 += rd32(E1000_PTC511);
4632	adapter->stats.ptc1023 += rd32(E1000_PTC1023);
4633	adapter->stats.ptc1522 += rd32(E1000_PTC1522);
4634
4635	adapter->stats.mptc += rd32(E1000_MPTC);
4636	adapter->stats.bptc += rd32(E1000_BPTC);
4637
4638	adapter->stats.tpt += rd32(E1000_TPT);
4639	adapter->stats.colc += rd32(E1000_COLC);
4640
4641	adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
4642	/* read internal phy specific stats */
4643	reg = rd32(E1000_CTRL_EXT);
4644	if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
4645		adapter->stats.rxerrc += rd32(E1000_RXERRC);
4646		adapter->stats.tncrs += rd32(E1000_TNCRS);
4647	}
4648
4649	adapter->stats.tsctc += rd32(E1000_TSCTC);
4650	adapter->stats.tsctfc += rd32(E1000_TSCTFC);
4651
4652	adapter->stats.iac += rd32(E1000_IAC);
4653	adapter->stats.icrxoc += rd32(E1000_ICRXOC);
4654	adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
4655	adapter->stats.icrxatc += rd32(E1000_ICRXATC);
4656	adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
4657	adapter->stats.ictxatc += rd32(E1000_ICTXATC);
4658	adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
4659	adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
4660	adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
4661
4662	/* Fill out the OS statistics structure */
4663	net_stats->multicast = adapter->stats.mprc;
4664	net_stats->collisions = adapter->stats.colc;
4665
4666	/* Rx Errors */
4667
4668	/* RLEC on some newer hardware can be incorrect so build
4669	 * our own version based on RUC and ROC */
4670	net_stats->rx_errors = adapter->stats.rxerrc +
4671		adapter->stats.crcerrs + adapter->stats.algnerrc +
4672		adapter->stats.ruc + adapter->stats.roc +
4673		adapter->stats.cexterr;
4674	net_stats->rx_length_errors = adapter->stats.ruc +
4675				      adapter->stats.roc;
4676	net_stats->rx_crc_errors = adapter->stats.crcerrs;
4677	net_stats->rx_frame_errors = adapter->stats.algnerrc;
4678	net_stats->rx_missed_errors = adapter->stats.mpc;
4679
4680	/* Tx Errors */
4681	net_stats->tx_errors = adapter->stats.ecol +
4682			       adapter->stats.latecol;
4683	net_stats->tx_aborted_errors = adapter->stats.ecol;
4684	net_stats->tx_window_errors = adapter->stats.latecol;
4685	net_stats->tx_carrier_errors = adapter->stats.tncrs;
4686
4687	/* Tx Dropped needs to be maintained elsewhere */
4688
4689	/* Phy Stats */
4690	if (hw->phy.media_type == e1000_media_type_copper) {
4691		if ((adapter->link_speed == SPEED_1000) &&
4692		   (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
4693			phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
4694			adapter->phy_stats.idle_errors += phy_tmp;
4695		}
4696	}
4697
4698	/* Management Stats */
4699	adapter->stats.mgptc += rd32(E1000_MGTPTC);
4700	adapter->stats.mgprc += rd32(E1000_MGTPRC);
4701	adapter->stats.mgpdc += rd32(E1000_MGTPDC);
4702
4703	/* OS2BMC Stats */
4704	reg = rd32(E1000_MANC);
4705	if (reg & E1000_MANC_EN_BMC2OS) {
4706		adapter->stats.o2bgptc += rd32(E1000_O2BGPTC);
4707		adapter->stats.o2bspc += rd32(E1000_O2BSPC);
4708		adapter->stats.b2ospc += rd32(E1000_B2OSPC);
4709		adapter->stats.b2ogprc += rd32(E1000_B2OGPRC);
4710	}
4711}
4712
4713static irqreturn_t igb_msix_other(int irq, void *data)
4714{
4715	struct igb_adapter *adapter = data;
4716	struct e1000_hw *hw = &adapter->hw;
4717	u32 icr = rd32(E1000_ICR);
4718	/* reading ICR causes bit 31 of EICR to be cleared */
4719
4720	if (icr & E1000_ICR_DRSTA)
4721		schedule_work(&adapter->reset_task);
4722
4723	if (icr & E1000_ICR_DOUTSYNC) {
4724		/* HW is reporting DMA is out of sync */
4725		adapter->stats.doosync++;
4726		/* The DMA Out of Sync is also indication of a spoof event
4727		 * in IOV mode. Check the Wrong VM Behavior register to
4728		 * see if it is really a spoof event. */
4729		igb_check_wvbr(adapter);
4730	}
4731
4732	/* Check for a mailbox event */
4733	if (icr & E1000_ICR_VMMB)
4734		igb_msg_task(adapter);
4735
4736	if (icr & E1000_ICR_LSC) {
4737		hw->mac.get_link_status = 1;
4738		/* guard against interrupt when we're going down */
4739		if (!test_bit(__IGB_DOWN, &adapter->state))
4740			mod_timer(&adapter->watchdog_timer, jiffies + 1);
4741	}
4742
4743	wr32(E1000_EIMS, adapter->eims_other);
4744
4745	return IRQ_HANDLED;
4746}
4747
4748static void igb_write_itr(struct igb_q_vector *q_vector)
4749{
4750	struct igb_adapter *adapter = q_vector->adapter;
4751	u32 itr_val = q_vector->itr_val & 0x7FFC;
4752
4753	if (!q_vector->set_itr)
4754		return;
4755
4756	if (!itr_val)
4757		itr_val = 0x4;
4758
4759	if (adapter->hw.mac.type == e1000_82575)
4760		itr_val |= itr_val << 16;
4761	else
4762		itr_val |= E1000_EITR_CNT_IGNR;
4763
4764	writel(itr_val, q_vector->itr_register);
4765	q_vector->set_itr = 0;
4766}
4767
4768static irqreturn_t igb_msix_ring(int irq, void *data)
4769{
4770	struct igb_q_vector *q_vector = data;
4771
4772	/* Write the ITR value calculated from the previous interrupt. */
4773	igb_write_itr(q_vector);
4774
4775	napi_schedule(&q_vector->napi);
4776
4777	return IRQ_HANDLED;
4778}
4779
4780#ifdef CONFIG_IGB_DCA
4781static void igb_update_dca(struct igb_q_vector *q_vector)
4782{
4783	struct igb_adapter *adapter = q_vector->adapter;
4784	struct e1000_hw *hw = &adapter->hw;
4785	int cpu = get_cpu();
4786
4787	if (q_vector->cpu == cpu)
4788		goto out_no_update;
4789
4790	if (q_vector->tx.ring) {
4791		int q = q_vector->tx.ring->reg_idx;
4792		u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4793		if (hw->mac.type == e1000_82575) {
4794			dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4795			dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4796		} else {
4797			dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4798			dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4799			              E1000_DCA_TXCTRL_CPUID_SHIFT;
4800		}
4801		dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4802		wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4803	}
4804	if (q_vector->rx.ring) {
4805		int q = q_vector->rx.ring->reg_idx;
4806		u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4807		if (hw->mac.type == e1000_82575) {
4808			dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4809			dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4810		} else {
4811			dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4812			dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4813			              E1000_DCA_RXCTRL_CPUID_SHIFT;
4814		}
4815		dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4816		dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4817		dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4818		wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4819	}
4820	q_vector->cpu = cpu;
4821out_no_update:
4822	put_cpu();
4823}
4824
4825static void igb_setup_dca(struct igb_adapter *adapter)
4826{
4827	struct e1000_hw *hw = &adapter->hw;
4828	int i;
4829
4830	if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4831		return;
4832
4833	/* Always use CB2 mode, difference is masked in the CB driver. */
4834	wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4835
4836	for (i = 0; i < adapter->num_q_vectors; i++) {
4837		adapter->q_vector[i]->cpu = -1;
4838		igb_update_dca(adapter->q_vector[i]);
4839	}
4840}
4841
4842static int __igb_notify_dca(struct device *dev, void *data)
4843{
4844	struct net_device *netdev = dev_get_drvdata(dev);
4845	struct igb_adapter *adapter = netdev_priv(netdev);
4846	struct pci_dev *pdev = adapter->pdev;
4847	struct e1000_hw *hw = &adapter->hw;
4848	unsigned long event = *(unsigned long *)data;
4849
4850	switch (event) {
4851	case DCA_PROVIDER_ADD:
4852		/* if already enabled, don't do it again */
4853		if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4854			break;
4855		if (dca_add_requester(dev) == 0) {
4856			adapter->flags |= IGB_FLAG_DCA_ENABLED;
4857			dev_info(&pdev->dev, "DCA enabled\n");
4858			igb_setup_dca(adapter);
4859			break;
4860		}
4861		/* Fall Through since DCA is disabled. */
4862	case DCA_PROVIDER_REMOVE:
4863		if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4864			/* without this a class_device is left
4865			 * hanging around in the sysfs model */
4866			dca_remove_requester(dev);
4867			dev_info(&pdev->dev, "DCA disabled\n");
4868			adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4869			wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4870		}
4871		break;
4872	}
4873
4874	return 0;
4875}
4876
4877static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4878                          void *p)
4879{
4880	int ret_val;
4881
4882	ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4883	                                 __igb_notify_dca);
4884
4885	return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4886}
4887#endif /* CONFIG_IGB_DCA */
4888
4889#ifdef CONFIG_PCI_IOV
4890static int igb_vf_configure(struct igb_adapter *adapter, int vf)
4891{
4892	unsigned char mac_addr[ETH_ALEN];
4893	struct pci_dev *pdev = adapter->pdev;
4894	struct e1000_hw *hw = &adapter->hw;
4895	struct pci_dev *pvfdev;
4896	unsigned int device_id;
4897	u16 thisvf_devfn;
4898
4899	random_ether_addr(mac_addr);
4900	igb_set_vf_mac(adapter, vf, mac_addr);
4901
4902	switch (adapter->hw.mac.type) {
4903	case e1000_82576:
4904		device_id = IGB_82576_VF_DEV_ID;
4905		/* VF Stride for 82576 is 2 */
4906		thisvf_devfn = (pdev->devfn + 0x80 + (vf << 1)) |
4907			(pdev->devfn & 1);
4908		break;
4909	case e1000_i350:
4910		device_id = IGB_I350_VF_DEV_ID;
4911		/* VF Stride for I350 is 4 */
4912		thisvf_devfn = (pdev->devfn + 0x80 + (vf << 2)) |
4913				(pdev->devfn & 3);
4914		break;
4915	default:
4916		device_id = 0;
4917		thisvf_devfn = 0;
4918		break;
4919	}
4920
4921	pvfdev = pci_get_device(hw->vendor_id, device_id, NULL);
4922	while (pvfdev) {
4923		if (pvfdev->devfn == thisvf_devfn)
4924			break;
4925		pvfdev = pci_get_device(hw->vendor_id,
4926					device_id, pvfdev);
4927	}
4928
4929	if (pvfdev)
4930		adapter->vf_data[vf].vfdev = pvfdev;
4931	else
4932		dev_err(&pdev->dev,
4933			"Couldn't find pci dev ptr for VF %4.4x\n",
4934			thisvf_devfn);
4935	return pvfdev != NULL;
4936}
4937
4938static int igb_find_enabled_vfs(struct igb_adapter *adapter)
4939{
4940	struct e1000_hw *hw = &adapter->hw;
4941	struct pci_dev *pdev = adapter->pdev;
4942	struct pci_dev *pvfdev;
4943	u16 vf_devfn = 0;
4944	u16 vf_stride;
4945	unsigned int device_id;
4946	int vfs_found = 0;
4947
4948	switch (adapter->hw.mac.type) {
4949	case e1000_82576:
4950		device_id = IGB_82576_VF_DEV_ID;
4951		/* VF Stride for 82576 is 2 */
4952		vf_stride = 2;
4953		break;
4954	case e1000_i350:
4955		device_id = IGB_I350_VF_DEV_ID;
4956		/* VF Stride for I350 is 4 */
4957		vf_stride = 4;
4958		break;
4959	default:
4960		device_id = 0;
4961		vf_stride = 0;
4962		break;
4963	}
4964
4965	vf_devfn = pdev->devfn + 0x80;
4966	pvfdev = pci_get_device(hw->vendor_id, device_id, NULL);
4967	while (pvfdev) {
4968		if (pvfdev->devfn == vf_devfn)
4969			vfs_found++;
4970		vf_devfn += vf_stride;
4971		pvfdev = pci_get_device(hw->vendor_id,
4972					device_id, pvfdev);
4973	}
4974
4975	return vfs_found;
4976}
4977
4978static int igb_check_vf_assignment(struct igb_adapter *adapter)
4979{
4980	int i;
4981	for (i = 0; i < adapter->vfs_allocated_count; i++) {
4982		if (adapter->vf_data[i].vfdev) {
4983			if (adapter->vf_data[i].vfdev->dev_flags &
4984			    PCI_DEV_FLAGS_ASSIGNED)
4985				return true;
4986		}
4987	}
4988	return false;
4989}
4990
4991#endif
4992static void igb_ping_all_vfs(struct igb_adapter *adapter)
4993{
4994	struct e1000_hw *hw = &adapter->hw;
4995	u32 ping;
4996	int i;
4997
4998	for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
4999		ping = E1000_PF_CONTROL_MSG;
5000		if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
5001			ping |= E1000_VT_MSGTYPE_CTS;
5002		igb_write_mbx(hw, &ping, 1, i);
5003	}
5004}
5005
5006static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5007{
5008	struct e1000_hw *hw = &adapter->hw;
5009	u32 vmolr = rd32(E1000_VMOLR(vf));
5010	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5011
5012	vf_data->flags &= ~(IGB_VF_FLAG_UNI_PROMISC |
5013	                    IGB_VF_FLAG_MULTI_PROMISC);
5014	vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
5015
5016	if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
5017		vmolr |= E1000_VMOLR_MPME;
5018		vf_data->flags |= IGB_VF_FLAG_MULTI_PROMISC;
5019		*msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
5020	} else {
5021		/*
5022		 * if we have hashes and we are clearing a multicast promisc
5023		 * flag we need to write the hashes to the MTA as this step
5024		 * was previously skipped
5025		 */
5026		if (vf_data->num_vf_mc_hashes > 30) {
5027			vmolr |= E1000_VMOLR_MPME;
5028		} else if (vf_data->num_vf_mc_hashes) {
5029			int j;
5030			vmolr |= E1000_VMOLR_ROMPE;
5031			for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
5032				igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
5033		}
5034	}
5035
5036	wr32(E1000_VMOLR(vf), vmolr);
5037
5038	/* there are flags left unprocessed, likely not supported */
5039	if (*msgbuf & E1000_VT_MSGINFO_MASK)
5040		return -EINVAL;
5041
5042	return 0;
5043
5044}
5045
5046static int igb_set_vf_multicasts(struct igb_adapter *adapter,
5047				  u32 *msgbuf, u32 vf)
5048{
5049	int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5050	u16 *hash_list = (u16 *)&msgbuf[1];
5051	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5052	int i;
5053
5054	/* salt away the number of multicast addresses assigned
5055	 * to this VF for later use to restore when the PF multi cast
5056	 * list changes
5057	 */
5058	vf_data->num_vf_mc_hashes = n;
5059
5060	/* only up to 30 hash values supported */
5061	if (n > 30)
5062		n = 30;
5063
5064	/* store the hashes for later use */
5065	for (i = 0; i < n; i++)
5066		vf_data->vf_mc_hashes[i] = hash_list[i];
5067
5068	/* Flush and reset the mta with the new values */
5069	igb_set_rx_mode(adapter->netdev);
5070
5071	return 0;
5072}
5073
5074static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
5075{
5076	struct e1000_hw *hw = &adapter->hw;
5077	struct vf_data_storage *vf_data;
5078	int i, j;
5079
5080	for (i = 0; i < adapter->vfs_allocated_count; i++) {
5081		u32 vmolr = rd32(E1000_VMOLR(i));
5082		vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
5083
5084		vf_data = &adapter->vf_data[i];
5085
5086		if ((vf_data->num_vf_mc_hashes > 30) ||
5087		    (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
5088			vmolr |= E1000_VMOLR_MPME;
5089		} else if (vf_data->num_vf_mc_hashes) {
5090			vmolr |= E1000_VMOLR_ROMPE;
5091			for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
5092				igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
5093		}
5094		wr32(E1000_VMOLR(i), vmolr);
5095	}
5096}
5097
5098static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
5099{
5100	struct e1000_hw *hw = &adapter->hw;
5101	u32 pool_mask, reg, vid;
5102	int i;
5103
5104	pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5105
5106	/* Find the vlan filter for this id */
5107	for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5108		reg = rd32(E1000_VLVF(i));
5109
5110		/* remove the vf from the pool */
5111		reg &= ~pool_mask;
5112
5113		/* if pool is empty then remove entry from vfta */
5114		if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
5115		    (reg & E1000_VLVF_VLANID_ENABLE)) {
5116			reg = 0;
5117			vid = reg & E1000_VLVF_VLANID_MASK;
5118			igb_vfta_set(hw, vid, false);
5119		}
5120
5121		wr32(E1000_VLVF(i), reg);
5122	}
5123
5124	adapter->vf_data[vf].vlans_enabled = 0;
5125}
5126
5127static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
5128{
5129	struct e1000_hw *hw = &adapter->hw;
5130	u32 reg, i;
5131
5132	/* The vlvf table only exists on 82576 hardware and newer */
5133	if (hw->mac.type < e1000_82576)
5134		return -1;
5135
5136	/* we only need to do this if VMDq is enabled */
5137	if (!adapter->vfs_allocated_count)
5138		return -1;
5139
5140	/* Find the vlan filter for this id */
5141	for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5142		reg = rd32(E1000_VLVF(i));
5143		if ((reg & E1000_VLVF_VLANID_ENABLE) &&
5144		    vid == (reg & E1000_VLVF_VLANID_MASK))
5145			break;
5146	}
5147
5148	if (add) {
5149		if (i == E1000_VLVF_ARRAY_SIZE) {
5150			/* Did not find a matching VLAN ID entry that was
5151			 * enabled.  Search for a free filter entry, i.e.
5152			 * one without the enable bit set
5153			 */
5154			for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5155				reg = rd32(E1000_VLVF(i));
5156				if (!(reg & E1000_VLVF_VLANID_ENABLE))
5157					break;
5158			}
5159		}
5160		if (i < E1000_VLVF_ARRAY_SIZE) {
5161			/* Found an enabled/available entry */
5162			reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5163
5164			/* if !enabled we need to set this up in vfta */
5165			if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
5166				/* add VID to filter table */
5167				igb_vfta_set(hw, vid, true);
5168				reg |= E1000_VLVF_VLANID_ENABLE;
5169			}
5170			reg &= ~E1000_VLVF_VLANID_MASK;
5171			reg |= vid;
5172			wr32(E1000_VLVF(i), reg);
5173
5174			/* do not modify RLPML for PF devices */
5175			if (vf >= adapter->vfs_allocated_count)
5176				return 0;
5177
5178			if (!adapter->vf_data[vf].vlans_enabled) {
5179				u32 size;
5180				reg = rd32(E1000_VMOLR(vf));
5181				size = reg & E1000_VMOLR_RLPML_MASK;
5182				size += 4;
5183				reg &= ~E1000_VMOLR_RLPML_MASK;
5184				reg |= size;
5185				wr32(E1000_VMOLR(vf), reg);
5186			}
5187
5188			adapter->vf_data[vf].vlans_enabled++;
5189		}
5190	} else {
5191		if (i < E1000_VLVF_ARRAY_SIZE) {
5192			/* remove vf from the pool */
5193			reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
5194			/* if pool is empty then remove entry from vfta */
5195			if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
5196				reg = 0;
5197				igb_vfta_set(hw, vid, false);
5198			}
5199			wr32(E1000_VLVF(i), reg);
5200
5201			/* do not modify RLPML for PF devices */
5202			if (vf >= adapter->vfs_allocated_count)
5203				return 0;
5204
5205			adapter->vf_data[vf].vlans_enabled--;
5206			if (!adapter->vf_data[vf].vlans_enabled) {
5207				u32 size;
5208				reg = rd32(E1000_VMOLR(vf));
5209				size = reg & E1000_VMOLR_RLPML_MASK;
5210				size -= 4;
5211				reg &= ~E1000_VMOLR_RLPML_MASK;
5212				reg |= size;
5213				wr32(E1000_VMOLR(vf), reg);
5214			}
5215		}
5216	}
5217	return 0;
5218}
5219
5220static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
5221{
5222	struct e1000_hw *hw = &adapter->hw;
5223
5224	if (vid)
5225		wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
5226	else
5227		wr32(E1000_VMVIR(vf), 0);
5228}
5229
5230static int igb_ndo_set_vf_vlan(struct net_device *netdev,
5231			       int vf, u16 vlan, u8 qos)
5232{
5233	int err = 0;
5234	struct igb_adapter *adapter = netdev_priv(netdev);
5235
5236	if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
5237		return -EINVAL;
5238	if (vlan || qos) {
5239		err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
5240		if (err)
5241			goto out;
5242		igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
5243		igb_set_vmolr(adapter, vf, !vlan);
5244		adapter->vf_data[vf].pf_vlan = vlan;
5245		adapter->vf_data[vf].pf_qos = qos;
5246		dev_info(&adapter->pdev->dev,
5247			 "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
5248		if (test_bit(__IGB_DOWN, &adapter->state)) {
5249			dev_warn(&adapter->pdev->dev,
5250				 "The VF VLAN has been set,"
5251				 " but the PF device is not up.\n");
5252			dev_warn(&adapter->pdev->dev,
5253				 "Bring the PF device up before"
5254				 " attempting to use the VF device.\n");
5255		}
5256	} else {
5257		igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
5258				   false, vf);
5259		igb_set_vmvir(adapter, vlan, vf);
5260		igb_set_vmolr(adapter, vf, true);
5261		adapter->vf_data[vf].pf_vlan = 0;
5262		adapter->vf_data[vf].pf_qos = 0;
5263       }
5264out:
5265       return err;
5266}
5267
5268static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5269{
5270	int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5271	int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
5272
5273	return igb_vlvf_set(adapter, vid, add, vf);
5274}
5275
5276static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
5277{
5278	/* clear flags - except flag that indicates PF has set the MAC */
5279	adapter->vf_data[vf].flags &= IGB_VF_FLAG_PF_SET_MAC;
5280	adapter->vf_data[vf].last_nack = jiffies;
5281
5282	/* reset offloads to defaults */
5283	igb_set_vmolr(adapter, vf, true);
5284
5285	/* reset vlans for device */
5286	igb_clear_vf_vfta(adapter, vf);
5287	if (adapter->vf_data[vf].pf_vlan)
5288		igb_ndo_set_vf_vlan(adapter->netdev, vf,
5289				    adapter->vf_data[vf].pf_vlan,
5290				    adapter->vf_data[vf].pf_qos);
5291	else
5292		igb_clear_vf_vfta(adapter, vf);
5293
5294	/* reset multicast table array for vf */
5295	adapter->vf_data[vf].num_vf_mc_hashes = 0;
5296
5297	/* Flush and reset the mta with the new values */
5298	igb_set_rx_mode(adapter->netdev);
5299}
5300
5301static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
5302{
5303	unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5304
5305	/* generate a new mac address as we were hotplug removed/added */
5306	if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
5307		random_ether_addr(vf_mac);
5308
5309	/* process remaining reset events */
5310	igb_vf_reset(adapter, vf);
5311}
5312
5313static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
5314{
5315	struct e1000_hw *hw = &adapter->hw;
5316	unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5317	int rar_entry = hw->mac.rar_entry_count - (vf + 1);
5318	u32 reg, msgbuf[3];
5319	u8 *addr = (u8 *)(&msgbuf[1]);
5320
5321	/* process all the same items cleared in a function level reset */
5322	igb_vf_reset(adapter, vf);
5323
5324	/* set vf mac address */
5325	igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
5326
5327	/* enable transmit and receive for vf */
5328	reg = rd32(E1000_VFTE);
5329	wr32(E1000_VFTE, reg | (1 << vf));
5330	reg = rd32(E1000_VFRE);
5331	wr32(E1000_VFRE, reg | (1 << vf));
5332
5333	adapter->vf_data[vf].flags |= IGB_VF_FLAG_CTS;
5334
5335	/* reply to reset with ack and vf mac address */
5336	msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
5337	memcpy(addr, vf_mac, 6);
5338	igb_write_mbx(hw, msgbuf, 3, vf);
5339}
5340
5341static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
5342{
5343	/*
5344	 * The VF MAC Address is stored in a packed array of bytes
5345	 * starting at the second 32 bit word of the msg array
5346	 */
5347	unsigned char *addr = (char *)&msg[1];
5348	int err = -1;
5349
5350	if (is_valid_ether_addr(addr))
5351		err = igb_set_vf_mac(adapter, vf, addr);
5352
5353	return err;
5354}
5355
5356static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
5357{
5358	struct e1000_hw *hw = &adapter->hw;
5359	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5360	u32 msg = E1000_VT_MSGTYPE_NACK;
5361
5362	/* if device isn't clear to send it shouldn't be reading either */
5363	if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
5364	    time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
5365		igb_write_mbx(hw, &msg, 1, vf);
5366		vf_data->last_nack = jiffies;
5367	}
5368}
5369
5370static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
5371{
5372	struct pci_dev *pdev = adapter->pdev;
5373	u32 msgbuf[E1000_VFMAILBOX_SIZE];
5374	struct e1000_hw *hw = &adapter->hw;
5375	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5376	s32 retval;
5377
5378	retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
5379
5380	if (retval) {
5381		/* if receive failed revoke VF CTS stats and restart init */
5382		dev_err(&pdev->dev, "Error receiving message from VF\n");
5383		vf_data->flags &= ~IGB_VF_FLAG_CTS;
5384		if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5385			return;
5386		goto out;
5387	}
5388
5389	/* this is a message we already processed, do nothing */
5390	if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
5391		return;
5392
5393	/*
5394	 * until the vf completes a reset it should not be
5395	 * allowed to start any configuration.
5396	 */
5397
5398	if (msgbuf[0] == E1000_VF_RESET) {
5399		igb_vf_reset_msg(adapter, vf);
5400		return;
5401	}
5402
5403	if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
5404		if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5405			return;
5406		retval = -1;
5407		goto out;
5408	}
5409
5410	switch ((msgbuf[0] & 0xFFFF)) {
5411	case E1000_VF_SET_MAC_ADDR:
5412		retval = -EINVAL;
5413		if (!(vf_data->flags & IGB_VF_FLAG_PF_SET_MAC))
5414			retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
5415		else
5416			dev_warn(&pdev->dev,
5417				 "VF %d attempted to override administratively "
5418				 "set MAC address\nReload the VF driver to "
5419				 "resume operations\n", vf);
5420		break;
5421	case E1000_VF_SET_PROMISC:
5422		retval = igb_set_vf_promisc(adapter, msgbuf, vf);
5423		break;
5424	case E1000_VF_SET_MULTICAST:
5425		retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
5426		break;
5427	case E1000_VF_SET_LPE:
5428		retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
5429		break;
5430	case E1000_VF_SET_VLAN:
5431		retval = -1;
5432		if (vf_data->pf_vlan)
5433			dev_warn(&pdev->dev,
5434				 "VF %d attempted to override administratively "
5435				 "set VLAN tag\nReload the VF driver to "
5436				 "resume operations\n", vf);
5437		else
5438			retval = igb_set_vf_vlan(adapter, msgbuf, vf);
5439		break;
5440	default:
5441		dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
5442		retval = -1;
5443		break;
5444	}
5445
5446	msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
5447out:
5448	/* notify the VF of the results of what it sent us */
5449	if (retval)
5450		msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
5451	else
5452		msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
5453
5454	igb_write_mbx(hw, msgbuf, 1, vf);
5455}
5456
5457static void igb_msg_task(struct igb_adapter *adapter)
5458{
5459	struct e1000_hw *hw = &adapter->hw;
5460	u32 vf;
5461
5462	for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
5463		/* process any reset requests */
5464		if (!igb_check_for_rst(hw, vf))
5465			igb_vf_reset_event(adapter, vf);
5466
5467		/* process any messages pending */
5468		if (!igb_check_for_msg(hw, vf))
5469			igb_rcv_msg_from_vf(adapter, vf);
5470
5471		/* process any acks */
5472		if (!igb_check_for_ack(hw, vf))
5473			igb_rcv_ack_from_vf(adapter, vf);
5474	}
5475}
5476
5477/**
5478 *  igb_set_uta - Set unicast filter table address
5479 *  @adapter: board private structure
5480 *
5481 *  The unicast table address is a register array of 32-bit registers.
5482 *  The table is meant to be used in a way similar to how the MTA is used
5483 *  however due to certain limitations in the hardware it is necessary to
5484 *  set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscuous
5485 *  enable bit to allow vlan tag stripping when promiscuous mode is enabled
5486 **/
5487static void igb_set_uta(struct igb_adapter *adapter)
5488{
5489	struct e1000_hw *hw = &adapter->hw;
5490	int i;
5491
5492	/* The UTA table only exists on 82576 hardware and newer */
5493	if (hw->mac.type < e1000_82576)
5494		return;
5495
5496	/* we only need to do this if VMDq is enabled */
5497	if (!adapter->vfs_allocated_count)
5498		return;
5499
5500	for (i = 0; i < hw->mac.uta_reg_count; i++)
5501		array_wr32(E1000_UTA, i, ~0);
5502}
5503
5504/**
5505 * igb_intr_msi - Interrupt Handler
5506 * @irq: interrupt number
5507 * @data: pointer to a network interface device structure
5508 **/
5509static irqreturn_t igb_intr_msi(int irq, void *data)
5510{
5511	struct igb_adapter *adapter = data;
5512	struct igb_q_vector *q_vector = adapter->q_vector[0];
5513	struct e1000_hw *hw = &adapter->hw;
5514	/* read ICR disables interrupts using IAM */
5515	u32 icr = rd32(E1000_ICR);
5516
5517	igb_write_itr(q_vector);
5518
5519	if (icr & E1000_ICR_DRSTA)
5520		schedule_work(&adapter->reset_task);
5521
5522	if (icr & E1000_ICR_DOUTSYNC) {
5523		/* HW is reporting DMA is out of sync */
5524		adapter->stats.doosync++;
5525	}
5526
5527	if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5528		hw->mac.get_link_status = 1;
5529		if (!test_bit(__IGB_DOWN, &adapter->state))
5530			mod_timer(&adapter->watchdog_timer, jiffies + 1);
5531	}
5532
5533	napi_schedule(&q_vector->napi);
5534
5535	return IRQ_HANDLED;
5536}
5537
5538/**
5539 * igb_intr - Legacy Interrupt Handler
5540 * @irq: interrupt number
5541 * @data: pointer to a network interface device structure
5542 **/
5543static irqreturn_t igb_intr(int irq, void *data)
5544{
5545	struct igb_adapter *adapter = data;
5546	struct igb_q_vector *q_vector = adapter->q_vector[0];
5547	struct e1000_hw *hw = &adapter->hw;
5548	/* Interrupt Auto-Mask...upon reading ICR, interrupts are masked.  No
5549	 * need for the IMC write */
5550	u32 icr = rd32(E1000_ICR);
5551
5552	/* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
5553	 * not set, then the adapter didn't send an interrupt */
5554	if (!(icr & E1000_ICR_INT_ASSERTED))
5555		return IRQ_NONE;
5556
5557	igb_write_itr(q_vector);
5558
5559	if (icr & E1000_ICR_DRSTA)
5560		schedule_work(&adapter->reset_task);
5561
5562	if (icr & E1000_ICR_DOUTSYNC) {
5563		/* HW is reporting DMA is out of sync */
5564		adapter->stats.doosync++;
5565	}
5566
5567	if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5568		hw->mac.get_link_status = 1;
5569		/* guard against interrupt when we're going down */
5570		if (!test_bit(__IGB_DOWN, &adapter->state))
5571			mod_timer(&adapter->watchdog_timer, jiffies + 1);
5572	}
5573
5574	napi_schedule(&q_vector->napi);
5575
5576	return IRQ_HANDLED;
5577}
5578
5579void igb_ring_irq_enable(struct igb_q_vector *q_vector)
5580{
5581	struct igb_adapter *adapter = q_vector->adapter;
5582	struct e1000_hw *hw = &adapter->hw;
5583
5584	if ((q_vector->rx.ring && (adapter->rx_itr_setting & 3)) ||
5585	    (!q_vector->rx.ring && (adapter->tx_itr_setting & 3))) {
5586		if ((adapter->num_q_vectors == 1) && !adapter->vf_data)
5587			igb_set_itr(q_vector);
5588		else
5589			igb_update_ring_itr(q_vector);
5590	}
5591
5592	if (!test_bit(__IGB_DOWN, &adapter->state)) {
5593		if (adapter->msix_entries)
5594			wr32(E1000_EIMS, q_vector->eims_value);
5595		else
5596			igb_irq_enable(adapter);
5597	}
5598}
5599
5600/**
5601 * igb_poll - NAPI Rx polling callback
5602 * @napi: napi polling structure
5603 * @budget: count of how many packets we should handle
5604 **/
5605static int igb_poll(struct napi_struct *napi, int budget)
5606{
5607	struct igb_q_vector *q_vector = container_of(napi,
5608	                                             struct igb_q_vector,
5609	                                             napi);
5610	bool clean_complete = true;
5611
5612#ifdef CONFIG_IGB_DCA
5613	if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
5614		igb_update_dca(q_vector);
5615#endif
5616	if (q_vector->tx.ring)
5617		clean_complete = igb_clean_tx_irq(q_vector);
5618
5619	if (q_vector->rx.ring)
5620		clean_complete &= igb_clean_rx_irq(q_vector, budget);
5621
5622	/* If all work not completed, return budget and keep polling */
5623	if (!clean_complete)
5624		return budget;
5625
5626	/* If not enough Rx work done, exit the polling mode */
5627	napi_complete(napi);
5628	igb_ring_irq_enable(q_vector);
5629
5630	return 0;
5631}
5632
5633/**
5634 * igb_systim_to_hwtstamp - convert system time value to hw timestamp
5635 * @adapter: board private structure
5636 * @shhwtstamps: timestamp structure to update
5637 * @regval: unsigned 64bit system time value.
5638 *
5639 * We need to convert the system time value stored in the RX/TXSTMP registers
5640 * into a hwtstamp which can be used by the upper level timestamping functions
5641 */
5642static void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
5643                                   struct skb_shared_hwtstamps *shhwtstamps,
5644                                   u64 regval)
5645{
5646	u64 ns;
5647
5648	/*
5649	 * The 82580 starts with 1ns at bit 0 in RX/TXSTMPL, shift this up to
5650	 * 24 to match clock shift we setup earlier.
5651	 */
5652	if (adapter->hw.mac.type >= e1000_82580)
5653		regval <<= IGB_82580_TSYNC_SHIFT;
5654
5655	ns = timecounter_cyc2time(&adapter->clock, regval);
5656	timecompare_update(&adapter->compare, ns);
5657	memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
5658	shhwtstamps->hwtstamp = ns_to_ktime(ns);
5659	shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns);
5660}
5661
5662/**
5663 * igb_tx_hwtstamp - utility function which checks for TX time stamp
5664 * @q_vector: pointer to q_vector containing needed info
5665 * @buffer: pointer to igb_tx_buffer structure
5666 *
5667 * If we were asked to do hardware stamping and such a time stamp is
5668 * available, then it must have been for this skb here because we only
5669 * allow only one such packet into the queue.
5670 */
5671static void igb_tx_hwtstamp(struct igb_q_vector *q_vector,
5672			    struct igb_tx_buffer *buffer_info)
5673{
5674	struct igb_adapter *adapter = q_vector->adapter;
5675	struct e1000_hw *hw = &adapter->hw;
5676	struct skb_shared_hwtstamps shhwtstamps;
5677	u64 regval;
5678
5679	/* if skb does not support hw timestamp or TX stamp not valid exit */
5680	if (likely(!(buffer_info->tx_flags & IGB_TX_FLAGS_TSTAMP)) ||
5681	    !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
5682		return;
5683
5684	regval = rd32(E1000_TXSTMPL);
5685	regval |= (u64)rd32(E1000_TXSTMPH) << 32;
5686
5687	igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
5688	skb_tstamp_tx(buffer_info->skb, &shhwtstamps);
5689}
5690
5691/**
5692 * igb_clean_tx_irq - Reclaim resources after transmit completes
5693 * @q_vector: pointer to q_vector containing needed info
5694 * returns true if ring is completely cleaned
5695 **/
5696static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
5697{
5698	struct igb_adapter *adapter = q_vector->adapter;
5699	struct igb_ring *tx_ring = q_vector->tx.ring;
5700	struct igb_tx_buffer *tx_buffer;
5701	union e1000_adv_tx_desc *tx_desc, *eop_desc;
5702	unsigned int total_bytes = 0, total_packets = 0;
5703	unsigned int budget = q_vector->tx.work_limit;
5704	unsigned int i = tx_ring->next_to_clean;
5705
5706	if (test_bit(__IGB_DOWN, &adapter->state))
5707		return true;
5708
5709	tx_buffer = &tx_ring->tx_buffer_info[i];
5710	tx_desc = IGB_TX_DESC(tx_ring, i);
5711	i -= tx_ring->count;
5712
5713	for (; budget; budget--) {
5714		eop_desc = tx_buffer->next_to_watch;
5715
5716		/* prevent any other reads prior to eop_desc */
5717		rmb();
5718
5719		/* if next_to_watch is not set then there is no work pending */
5720		if (!eop_desc)
5721			break;
5722
5723		/* if DD is not set pending work has not been completed */
5724		if (!(eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)))
5725			break;
5726
5727		/* clear next_to_watch to prevent false hangs */
5728		tx_buffer->next_to_watch = NULL;
5729
5730		/* update the statistics for this packet */
5731		total_bytes += tx_buffer->bytecount;
5732		total_packets += tx_buffer->gso_segs;
5733
5734		/* retrieve hardware timestamp */
5735		igb_tx_hwtstamp(q_vector, tx_buffer);
5736
5737		/* free the skb */
5738		dev_kfree_skb_any(tx_buffer->skb);
5739		tx_buffer->skb = NULL;
5740
5741		/* unmap skb header data */
5742		dma_unmap_single(tx_ring->dev,
5743				 tx_buffer->dma,
5744				 tx_buffer->length,
5745				 DMA_TO_DEVICE);
5746
5747		/* clear last DMA location and unmap remaining buffers */
5748		while (tx_desc != eop_desc) {
5749			tx_buffer->dma = 0;
5750
5751			tx_buffer++;
5752			tx_desc++;
5753			i++;
5754			if (unlikely(!i)) {
5755				i -= tx_ring->count;
5756				tx_buffer = tx_ring->tx_buffer_info;
5757				tx_desc = IGB_TX_DESC(tx_ring, 0);
5758			}
5759
5760			/* unmap any remaining paged data */
5761			if (tx_buffer->dma) {
5762				dma_unmap_page(tx_ring->dev,
5763					       tx_buffer->dma,
5764					       tx_buffer->length,
5765					       DMA_TO_DEVICE);
5766			}
5767		}
5768
5769		/* clear last DMA location */
5770		tx_buffer->dma = 0;
5771
5772		/* move us one more past the eop_desc for start of next pkt */
5773		tx_buffer++;
5774		tx_desc++;
5775		i++;
5776		if (unlikely(!i)) {
5777			i -= tx_ring->count;
5778			tx_buffer = tx_ring->tx_buffer_info;
5779			tx_desc = IGB_TX_DESC(tx_ring, 0);
5780		}
5781	}
5782
5783	i += tx_ring->count;
5784	tx_ring->next_to_clean = i;
5785	u64_stats_update_begin(&tx_ring->tx_syncp);
5786	tx_ring->tx_stats.bytes += total_bytes;
5787	tx_ring->tx_stats.packets += total_packets;
5788	u64_stats_update_end(&tx_ring->tx_syncp);
5789	q_vector->tx.total_bytes += total_bytes;
5790	q_vector->tx.total_packets += total_packets;
5791
5792	if (test_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags)) {
5793		struct e1000_hw *hw = &adapter->hw;
5794
5795		eop_desc = tx_buffer->next_to_watch;
5796
5797		/* Detect a transmit hang in hardware, this serializes the
5798		 * check with the clearing of time_stamp and movement of i */
5799		clear_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
5800		if (eop_desc &&
5801		    time_after(jiffies, tx_buffer->time_stamp +
5802			       (adapter->tx_timeout_factor * HZ)) &&
5803		    !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
5804
5805			/* detected Tx unit hang */
5806			dev_err(tx_ring->dev,
5807				"Detected Tx Unit Hang\n"
5808				"  Tx Queue             <%d>\n"
5809				"  TDH                  <%x>\n"
5810				"  TDT                  <%x>\n"
5811				"  next_to_use          <%x>\n"
5812				"  next_to_clean        <%x>\n"
5813				"buffer_info[next_to_clean]\n"
5814				"  time_stamp           <%lx>\n"
5815				"  next_to_watch        <%p>\n"
5816				"  jiffies              <%lx>\n"
5817				"  desc.status          <%x>\n",
5818				tx_ring->queue_index,
5819				rd32(E1000_TDH(tx_ring->reg_idx)),
5820				readl(tx_ring->tail),
5821				tx_ring->next_to_use,
5822				tx_ring->next_to_clean,
5823				tx_buffer->time_stamp,
5824				eop_desc,
5825				jiffies,
5826				eop_desc->wb.status);
5827			netif_stop_subqueue(tx_ring->netdev,
5828					    tx_ring->queue_index);
5829
5830			/* we are about to reset, no point in enabling stuff */
5831			return true;
5832		}
5833	}
5834
5835	if (unlikely(total_packets &&
5836		     netif_carrier_ok(tx_ring->netdev) &&
5837		     igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
5838		/* Make sure that anybody stopping the queue after this
5839		 * sees the new next_to_clean.
5840		 */
5841		smp_mb();
5842		if (__netif_subqueue_stopped(tx_ring->netdev,
5843					     tx_ring->queue_index) &&
5844		    !(test_bit(__IGB_DOWN, &adapter->state))) {
5845			netif_wake_subqueue(tx_ring->netdev,
5846					    tx_ring->queue_index);
5847
5848			u64_stats_update_begin(&tx_ring->tx_syncp);
5849			tx_ring->tx_stats.restart_queue++;
5850			u64_stats_update_end(&tx_ring->tx_syncp);
5851		}
5852	}
5853
5854	return !!budget;
5855}
5856
5857static inline void igb_rx_checksum(struct igb_ring *ring,
5858				   union e1000_adv_rx_desc *rx_desc,
5859				   struct sk_buff *skb)
5860{
5861	skb_checksum_none_assert(skb);
5862
5863	/* Ignore Checksum bit is set */
5864	if (igb_test_staterr(rx_desc, E1000_RXD_STAT_IXSM))
5865		return;
5866
5867	/* Rx checksum disabled via ethtool */
5868	if (!(ring->netdev->features & NETIF_F_RXCSUM))
5869		return;
5870
5871	/* TCP/UDP checksum error bit is set */
5872	if (igb_test_staterr(rx_desc,
5873			     E1000_RXDEXT_STATERR_TCPE |
5874			     E1000_RXDEXT_STATERR_IPE)) {
5875		/*
5876		 * work around errata with sctp packets where the TCPE aka
5877		 * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
5878		 * packets, (aka let the stack check the crc32c)
5879		 */
5880		if (!((skb->len == 60) &&
5881		      test_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags))) {
5882			u64_stats_update_begin(&ring->rx_syncp);
5883			ring->rx_stats.csum_err++;
5884			u64_stats_update_end(&ring->rx_syncp);
5885		}
5886		/* let the stack verify checksum errors */
5887		return;
5888	}
5889	/* It must be a TCP or UDP packet with a valid checksum */
5890	if (igb_test_staterr(rx_desc, E1000_RXD_STAT_TCPCS |
5891				      E1000_RXD_STAT_UDPCS))
5892		skb->ip_summed = CHECKSUM_UNNECESSARY;
5893
5894	dev_dbg(ring->dev, "cksum success: bits %08X\n",
5895		le32_to_cpu(rx_desc->wb.upper.status_error));
5896}
5897
5898static inline void igb_rx_hash(struct igb_ring *ring,
5899			       union e1000_adv_rx_desc *rx_desc,
5900			       struct sk_buff *skb)
5901{
5902	if (ring->netdev->features & NETIF_F_RXHASH)
5903		skb->rxhash = le32_to_cpu(rx_desc->wb.lower.hi_dword.rss);
5904}
5905
5906static void igb_rx_hwtstamp(struct igb_q_vector *q_vector,
5907			    union e1000_adv_rx_desc *rx_desc,
5908			    struct sk_buff *skb)
5909{
5910	struct igb_adapter *adapter = q_vector->adapter;
5911	struct e1000_hw *hw = &adapter->hw;
5912	u64 regval;
5913
5914	if (!igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP |
5915				       E1000_RXDADV_STAT_TS))
5916		return;
5917
5918	/*
5919	 * If this bit is set, then the RX registers contain the time stamp. No
5920	 * other packet will be time stamped until we read these registers, so
5921	 * read the registers to make them available again. Because only one
5922	 * packet can be time stamped at a time, we know that the register
5923	 * values must belong to this one here and therefore we don't need to
5924	 * compare any of the additional attributes stored for it.
5925	 *
5926	 * If nothing went wrong, then it should have a shared tx_flags that we
5927	 * can turn into a skb_shared_hwtstamps.
5928	 */
5929	if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) {
5930		u32 *stamp = (u32 *)skb->data;
5931		regval = le32_to_cpu(*(stamp + 2));
5932		regval |= (u64)le32_to_cpu(*(stamp + 3)) << 32;
5933		skb_pull(skb, IGB_TS_HDR_LEN);
5934	} else {
5935		if(!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
5936			return;
5937
5938		regval = rd32(E1000_RXSTMPL);
5939		regval |= (u64)rd32(E1000_RXSTMPH) << 32;
5940	}
5941
5942	igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
5943}
5944
5945static void igb_rx_vlan(struct igb_ring *ring,
5946			union e1000_adv_rx_desc *rx_desc,
5947			struct sk_buff *skb)
5948{
5949	if (igb_test_staterr(rx_desc, E1000_RXD_STAT_VP)) {
5950		u16 vid;
5951		if (igb_test_staterr(rx_desc, E1000_RXDEXT_STATERR_LB) &&
5952		    test_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &ring->flags))
5953			vid = be16_to_cpu(rx_desc->wb.upper.vlan);
5954		else
5955			vid = le16_to_cpu(rx_desc->wb.upper.vlan);
5956
5957		__vlan_hwaccel_put_tag(skb, vid);
5958	}
5959}
5960
5961static inline u16 igb_get_hlen(union e1000_adv_rx_desc *rx_desc)
5962{
5963	/* HW will not DMA in data larger than the given buffer, even if it
5964	 * parses the (NFS, of course) header to be larger.  In that case, it
5965	 * fills the header buffer and spills the rest into the page.
5966	 */
5967	u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
5968	           E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
5969	if (hlen > IGB_RX_HDR_LEN)
5970		hlen = IGB_RX_HDR_LEN;
5971	return hlen;
5972}
5973
5974static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, int budget)
5975{
5976	struct igb_ring *rx_ring = q_vector->rx.ring;
5977	union e1000_adv_rx_desc *rx_desc;
5978	const int current_node = numa_node_id();
5979	unsigned int total_bytes = 0, total_packets = 0;
5980	u16 cleaned_count = igb_desc_unused(rx_ring);
5981	u16 i = rx_ring->next_to_clean;
5982
5983	rx_desc = IGB_RX_DESC(rx_ring, i);
5984
5985	while (igb_test_staterr(rx_desc, E1000_RXD_STAT_DD)) {
5986		struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
5987		struct sk_buff *skb = buffer_info->skb;
5988		union e1000_adv_rx_desc *next_rxd;
5989
5990		buffer_info->skb = NULL;
5991		prefetch(skb->data);
5992
5993		i++;
5994		if (i == rx_ring->count)
5995			i = 0;
5996
5997		next_rxd = IGB_RX_DESC(rx_ring, i);
5998		prefetch(next_rxd);
5999
6000		/*
6001		 * This memory barrier is needed to keep us from reading
6002		 * any other fields out of the rx_desc until we know the
6003		 * RXD_STAT_DD bit is set
6004		 */
6005		rmb();
6006
6007		if (!skb_is_nonlinear(skb)) {
6008			__skb_put(skb, igb_get_hlen(rx_desc));
6009			dma_unmap_single(rx_ring->dev, buffer_info->dma,
6010					 IGB_RX_HDR_LEN,
6011					 DMA_FROM_DEVICE);
6012			buffer_info->dma = 0;
6013		}
6014
6015		if (rx_desc->wb.upper.length) {
6016			u16 length = le16_to_cpu(rx_desc->wb.upper.length);
6017
6018			skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
6019						buffer_info->page,
6020						buffer_info->page_offset,
6021						length);
6022
6023			skb->len += length;
6024			skb->data_len += length;
6025			skb->truesize += PAGE_SIZE / 2;
6026
6027			if ((page_count(buffer_info->page) != 1) ||
6028			    (page_to_nid(buffer_info->page) != current_node))
6029				buffer_info->page = NULL;
6030			else
6031				get_page(buffer_info->page);
6032
6033			dma_unmap_page(rx_ring->dev, buffer_info->page_dma,
6034				       PAGE_SIZE / 2, DMA_FROM_DEVICE);
6035			buffer_info->page_dma = 0;
6036		}
6037
6038		if (!igb_test_staterr(rx_desc, E1000_RXD_STAT_EOP)) {
6039			struct igb_rx_buffer *next_buffer;
6040			next_buffer = &rx_ring->rx_buffer_info[i];
6041			buffer_info->skb = next_buffer->skb;
6042			buffer_info->dma = next_buffer->dma;
6043			next_buffer->skb = skb;
6044			next_buffer->dma = 0;
6045			goto next_desc;
6046		}
6047
6048		if (igb_test_staterr(rx_desc,
6049				     E1000_RXDEXT_ERR_FRAME_ERR_MASK)) {
6050			dev_kfree_skb_any(skb);
6051			goto next_desc;
6052		}
6053
6054		igb_rx_hwtstamp(q_vector, rx_desc, skb);
6055		igb_rx_hash(rx_ring, rx_desc, skb);
6056		igb_rx_checksum(rx_ring, rx_desc, skb);
6057		igb_rx_vlan(rx_ring, rx_desc, skb);
6058
6059		total_bytes += skb->len;
6060		total_packets++;
6061
6062		skb->protocol = eth_type_trans(skb, rx_ring->netdev);
6063
6064		napi_gro_receive(&q_vector->napi, skb);
6065
6066		budget--;
6067next_desc:
6068		if (!budget)
6069			break;
6070
6071		cleaned_count++;
6072		/* return some buffers to hardware, one at a time is too slow */
6073		if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
6074			igb_alloc_rx_buffers(rx_ring, cleaned_count);
6075			cleaned_count = 0;
6076		}
6077
6078		/* use prefetched values */
6079		rx_desc = next_rxd;
6080	}
6081
6082	rx_ring->next_to_clean = i;
6083	u64_stats_update_begin(&rx_ring->rx_syncp);
6084	rx_ring->rx_stats.packets += total_packets;
6085	rx_ring->rx_stats.bytes += total_bytes;
6086	u64_stats_update_end(&rx_ring->rx_syncp);
6087	q_vector->rx.total_packets += total_packets;
6088	q_vector->rx.total_bytes += total_bytes;
6089
6090	if (cleaned_count)
6091		igb_alloc_rx_buffers(rx_ring, cleaned_count);
6092
6093	return !!budget;
6094}
6095
6096static bool igb_alloc_mapped_skb(struct igb_ring *rx_ring,
6097				 struct igb_rx_buffer *bi)
6098{
6099	struct sk_buff *skb = bi->skb;
6100	dma_addr_t dma = bi->dma;
6101
6102	if (dma)
6103		return true;
6104
6105	if (likely(!skb)) {
6106		skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
6107						IGB_RX_HDR_LEN);
6108		bi->skb = skb;
6109		if (!skb) {
6110			rx_ring->rx_stats.alloc_failed++;
6111			return false;
6112		}
6113
6114		/* initialize skb for ring */
6115		skb_record_rx_queue(skb, rx_ring->queue_index);
6116	}
6117
6118	dma = dma_map_single(rx_ring->dev, skb->data,
6119			     IGB_RX_HDR_LEN, DMA_FROM_DEVICE);
6120
6121	if (dma_mapping_error(rx_ring->dev, dma)) {
6122		rx_ring->rx_stats.alloc_failed++;
6123		return false;
6124	}
6125
6126	bi->dma = dma;
6127	return true;
6128}
6129
6130static bool igb_alloc_mapped_page(struct igb_ring *rx_ring,
6131				  struct igb_rx_buffer *bi)
6132{
6133	struct page *page = bi->page;
6134	dma_addr_t page_dma = bi->page_dma;
6135	unsigned int page_offset = bi->page_offset ^ (PAGE_SIZE / 2);
6136
6137	if (page_dma)
6138		return true;
6139
6140	if (!page) {
6141		page = netdev_alloc_page(rx_ring->netdev);
6142		bi->page = page;
6143		if (unlikely(!page)) {
6144			rx_ring->rx_stats.alloc_failed++;
6145			return false;
6146		}
6147	}
6148
6149	page_dma = dma_map_page(rx_ring->dev, page,
6150				page_offset, PAGE_SIZE / 2,
6151				DMA_FROM_DEVICE);
6152
6153	if (dma_mapping_error(rx_ring->dev, page_dma)) {
6154		rx_ring->rx_stats.alloc_failed++;
6155		return false;
6156	}
6157
6158	bi->page_dma = page_dma;
6159	bi->page_offset = page_offset;
6160	return true;
6161}
6162
6163/**
6164 * igb_alloc_rx_buffers - Replace used receive buffers; packet split
6165 * @adapter: address of board private structure
6166 **/
6167void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count)
6168{
6169	union e1000_adv_rx_desc *rx_desc;
6170	struct igb_rx_buffer *bi;
6171	u16 i = rx_ring->next_to_use;
6172
6173	rx_desc = IGB_RX_DESC(rx_ring, i);
6174	bi = &rx_ring->rx_buffer_info[i];
6175	i -= rx_ring->count;
6176
6177	while (cleaned_count--) {
6178		if (!igb_alloc_mapped_skb(rx_ring, bi))
6179			break;
6180
6181		/* Refresh the desc even if buffer_addrs didn't change
6182		 * because each write-back erases this info. */
6183		rx_desc->read.hdr_addr = cpu_to_le64(bi->dma);
6184
6185		if (!igb_alloc_mapped_page(rx_ring, bi))
6186			break;
6187
6188		rx_desc->read.pkt_addr = cpu_to_le64(bi->page_dma);
6189
6190		rx_desc++;
6191		bi++;
6192		i++;
6193		if (unlikely(!i)) {
6194			rx_desc = IGB_RX_DESC(rx_ring, 0);
6195			bi = rx_ring->rx_buffer_info;
6196			i -= rx_ring->count;
6197		}
6198
6199		/* clear the hdr_addr for the next_to_use descriptor */
6200		rx_desc->read.hdr_addr = 0;
6201	}
6202
6203	i += rx_ring->count;
6204
6205	if (rx_ring->next_to_use != i) {
6206		rx_ring->next_to_use = i;
6207
6208		/* Force memory writes to complete before letting h/w
6209		 * know there are new descriptors to fetch.  (Only
6210		 * applicable for weak-ordered memory model archs,
6211		 * such as IA-64). */
6212		wmb();
6213		writel(i, rx_ring->tail);
6214	}
6215}
6216
6217/**
6218 * igb_mii_ioctl -
6219 * @netdev:
6220 * @ifreq:
6221 * @cmd:
6222 **/
6223static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6224{
6225	struct igb_adapter *adapter = netdev_priv(netdev);
6226	struct mii_ioctl_data *data = if_mii(ifr);
6227
6228	if (adapter->hw.phy.media_type != e1000_media_type_copper)
6229		return -EOPNOTSUPP;
6230
6231	switch (cmd) {
6232	case SIOCGMIIPHY:
6233		data->phy_id = adapter->hw.phy.addr;
6234		break;
6235	case SIOCGMIIREG:
6236		if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
6237		                     &data->val_out))
6238			return -EIO;
6239		break;
6240	case SIOCSMIIREG:
6241	default:
6242		return -EOPNOTSUPP;
6243	}
6244	return 0;
6245}
6246
6247/**
6248 * igb_hwtstamp_ioctl - control hardware time stamping
6249 * @netdev:
6250 * @ifreq:
6251 * @cmd:
6252 *
6253 * Outgoing time stamping can be enabled and disabled. Play nice and
6254 * disable it when requested, although it shouldn't case any overhead
6255 * when no packet needs it. At most one packet in the queue may be
6256 * marked for time stamping, otherwise it would be impossible to tell
6257 * for sure to which packet the hardware time stamp belongs.
6258 *
6259 * Incoming time stamping has to be configured via the hardware
6260 * filters. Not all combinations are supported, in particular event
6261 * type has to be specified. Matching the kind of event packet is
6262 * not supported, with the exception of "all V2 events regardless of
6263 * level 2 or 4".
6264 *
6265 **/
6266static int igb_hwtstamp_ioctl(struct net_device *netdev,
6267			      struct ifreq *ifr, int cmd)
6268{
6269	struct igb_adapter *adapter = netdev_priv(netdev);
6270	struct e1000_hw *hw = &adapter->hw;
6271	struct hwtstamp_config config;
6272	u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
6273	u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6274	u32 tsync_rx_cfg = 0;
6275	bool is_l4 = false;
6276	bool is_l2 = false;
6277	u32 regval;
6278
6279	if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
6280		return -EFAULT;
6281
6282	/* reserved for future extensions */
6283	if (config.flags)
6284		return -EINVAL;
6285
6286	switch (config.tx_type) {
6287	case HWTSTAMP_TX_OFF:
6288		tsync_tx_ctl = 0;
6289	case HWTSTAMP_TX_ON:
6290		break;
6291	default:
6292		return -ERANGE;
6293	}
6294
6295	switch (config.rx_filter) {
6296	case HWTSTAMP_FILTER_NONE:
6297		tsync_rx_ctl = 0;
6298		break;
6299	case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
6300	case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
6301	case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
6302	case HWTSTAMP_FILTER_ALL:
6303		/*
6304		 * register TSYNCRXCFG must be set, therefore it is not
6305		 * possible to time stamp both Sync and Delay_Req messages
6306		 * => fall back to time stamping all packets
6307		 */
6308		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6309		config.rx_filter = HWTSTAMP_FILTER_ALL;
6310		break;
6311	case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
6312		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6313		tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
6314		is_l4 = true;
6315		break;
6316	case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
6317		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6318		tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
6319		is_l4 = true;
6320		break;
6321	case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
6322	case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
6323		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6324		tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
6325		is_l2 = true;
6326		is_l4 = true;
6327		config.rx_filter = HWTSTAMP_FILTER_SOME;
6328		break;
6329	case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
6330	case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
6331		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6332		tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
6333		is_l2 = true;
6334		is_l4 = true;
6335		config.rx_filter = HWTSTAMP_FILTER_SOME;
6336		break;
6337	case HWTSTAMP_FILTER_PTP_V2_EVENT:
6338	case HWTSTAMP_FILTER_PTP_V2_SYNC:
6339	case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
6340		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
6341		config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
6342		is_l2 = true;
6343		is_l4 = true;
6344		break;
6345	default:
6346		return -ERANGE;
6347	}
6348
6349	if (hw->mac.type == e1000_82575) {
6350		if (tsync_rx_ctl | tsync_tx_ctl)
6351			return -EINVAL;
6352		return 0;
6353	}
6354
6355	/*
6356	 * Per-packet timestamping only works if all packets are
6357	 * timestamped, so enable timestamping in all packets as
6358	 * long as one rx filter was configured.
6359	 */
6360	if ((hw->mac.type >= e1000_82580) && tsync_rx_ctl) {
6361		tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6362		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6363	}
6364
6365	/* enable/disable TX */
6366	regval = rd32(E1000_TSYNCTXCTL);
6367	regval &= ~E1000_TSYNCTXCTL_ENABLED;
6368	regval |= tsync_tx_ctl;
6369	wr32(E1000_TSYNCTXCTL, regval);
6370
6371	/* enable/disable RX */
6372	regval = rd32(E1000_TSYNCRXCTL);
6373	regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
6374	regval |= tsync_rx_ctl;
6375	wr32(E1000_TSYNCRXCTL, regval);
6376
6377	/* define which PTP packets are time stamped */
6378	wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
6379
6380	/* define ethertype filter for timestamped packets */
6381	if (is_l2)
6382		wr32(E1000_ETQF(3),
6383		                (E1000_ETQF_FILTER_ENABLE | /* enable filter */
6384		                 E1000_ETQF_1588 | /* enable timestamping */
6385		                 ETH_P_1588));     /* 1588 eth protocol type */
6386	else
6387		wr32(E1000_ETQF(3), 0);
6388
6389#define PTP_PORT 319
6390	/* L4 Queue Filter[3]: filter by destination port and protocol */
6391	if (is_l4) {
6392		u32 ftqf = (IPPROTO_UDP /* UDP */
6393			| E1000_FTQF_VF_BP /* VF not compared */
6394			| E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
6395			| E1000_FTQF_MASK); /* mask all inputs */
6396		ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
6397
6398		wr32(E1000_IMIR(3), htons(PTP_PORT));
6399		wr32(E1000_IMIREXT(3),
6400		     (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
6401		if (hw->mac.type == e1000_82576) {
6402			/* enable source port check */
6403			wr32(E1000_SPQF(3), htons(PTP_PORT));
6404			ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
6405		}
6406		wr32(E1000_FTQF(3), ftqf);
6407	} else {
6408		wr32(E1000_FTQF(3), E1000_FTQF_MASK);
6409	}
6410	wrfl();
6411
6412	adapter->hwtstamp_config = config;
6413
6414	/* clear TX/RX time stamp registers, just to be sure */
6415	regval = rd32(E1000_TXSTMPH);
6416	regval = rd32(E1000_RXSTMPH);
6417
6418	return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
6419		-EFAULT : 0;
6420}
6421
6422/**
6423 * igb_ioctl -
6424 * @netdev:
6425 * @ifreq:
6426 * @cmd:
6427 **/
6428static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6429{
6430	switch (cmd) {
6431	case SIOCGMIIPHY:
6432	case SIOCGMIIREG:
6433	case SIOCSMIIREG:
6434		return igb_mii_ioctl(netdev, ifr, cmd);
6435	case SIOCSHWTSTAMP:
6436		return igb_hwtstamp_ioctl(netdev, ifr, cmd);
6437	default:
6438		return -EOPNOTSUPP;
6439	}
6440}
6441
6442s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6443{
6444	struct igb_adapter *adapter = hw->back;
6445	u16 cap_offset;
6446
6447	cap_offset = adapter->pdev->pcie_cap;
6448	if (!cap_offset)
6449		return -E1000_ERR_CONFIG;
6450
6451	pci_read_config_word(adapter->pdev, cap_offset + reg, value);
6452
6453	return 0;
6454}
6455
6456s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6457{
6458	struct igb_adapter *adapter = hw->back;
6459	u16 cap_offset;
6460
6461	cap_offset = adapter->pdev->pcie_cap;
6462	if (!cap_offset)
6463		return -E1000_ERR_CONFIG;
6464
6465	pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
6466
6467	return 0;
6468}
6469
6470static void igb_vlan_mode(struct net_device *netdev, u32 features)
6471{
6472	struct igb_adapter *adapter = netdev_priv(netdev);
6473	struct e1000_hw *hw = &adapter->hw;
6474	u32 ctrl, rctl;
6475	bool enable = !!(features & NETIF_F_HW_VLAN_RX);
6476
6477	if (enable) {
6478		/* enable VLAN tag insert/strip */
6479		ctrl = rd32(E1000_CTRL);
6480		ctrl |= E1000_CTRL_VME;
6481		wr32(E1000_CTRL, ctrl);
6482
6483		/* Disable CFI check */
6484		rctl = rd32(E1000_RCTL);
6485		rctl &= ~E1000_RCTL_CFIEN;
6486		wr32(E1000_RCTL, rctl);
6487	} else {
6488		/* disable VLAN tag insert/strip */
6489		ctrl = rd32(E1000_CTRL);
6490		ctrl &= ~E1000_CTRL_VME;
6491		wr32(E1000_CTRL, ctrl);
6492	}
6493
6494	igb_rlpml_set(adapter);
6495}
6496
6497static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
6498{
6499	struct igb_adapter *adapter = netdev_priv(netdev);
6500	struct e1000_hw *hw = &adapter->hw;
6501	int pf_id = adapter->vfs_allocated_count;
6502
6503	/* attempt to add filter to vlvf array */
6504	igb_vlvf_set(adapter, vid, true, pf_id);
6505
6506	/* add the filter since PF can receive vlans w/o entry in vlvf */
6507	igb_vfta_set(hw, vid, true);
6508
6509	set_bit(vid, adapter->active_vlans);
6510}
6511
6512static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
6513{
6514	struct igb_adapter *adapter = netdev_priv(netdev);
6515	struct e1000_hw *hw = &adapter->hw;
6516	int pf_id = adapter->vfs_allocated_count;
6517	s32 err;
6518
6519	/* remove vlan from VLVF table array */
6520	err = igb_vlvf_set(adapter, vid, false, pf_id);
6521
6522	/* if vid was not present in VLVF just remove it from table */
6523	if (err)
6524		igb_vfta_set(hw, vid, false);
6525
6526	clear_bit(vid, adapter->active_vlans);
6527}
6528
6529static void igb_restore_vlan(struct igb_adapter *adapter)
6530{
6531	u16 vid;
6532
6533	igb_vlan_mode(adapter->netdev, adapter->netdev->features);
6534
6535	for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID)
6536		igb_vlan_rx_add_vid(adapter->netdev, vid);
6537}
6538
6539int igb_set_spd_dplx(struct igb_adapter *adapter, u32 spd, u8 dplx)
6540{
6541	struct pci_dev *pdev = adapter->pdev;
6542	struct e1000_mac_info *mac = &adapter->hw.mac;
6543
6544	mac->autoneg = 0;
6545
6546	/* Make sure dplx is at most 1 bit and lsb of speed is not set
6547	 * for the switch() below to work */
6548	if ((spd & 1) || (dplx & ~1))
6549		goto err_inval;
6550
6551	/* Fiber NIC's only allow 1000 Gbps Full duplex */
6552	if ((adapter->hw.phy.media_type == e1000_media_type_internal_serdes) &&
6553	    spd != SPEED_1000 &&
6554	    dplx != DUPLEX_FULL)
6555		goto err_inval;
6556
6557	switch (spd + dplx) {
6558	case SPEED_10 + DUPLEX_HALF:
6559		mac->forced_speed_duplex = ADVERTISE_10_HALF;
6560		break;
6561	case SPEED_10 + DUPLEX_FULL:
6562		mac->forced_speed_duplex = ADVERTISE_10_FULL;
6563		break;
6564	case SPEED_100 + DUPLEX_HALF:
6565		mac->forced_speed_duplex = ADVERTISE_100_HALF;
6566		break;
6567	case SPEED_100 + DUPLEX_FULL:
6568		mac->forced_speed_duplex = ADVERTISE_100_FULL;
6569		break;
6570	case SPEED_1000 + DUPLEX_FULL:
6571		mac->autoneg = 1;
6572		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
6573		break;
6574	case SPEED_1000 + DUPLEX_HALF: /* not supported */
6575	default:
6576		goto err_inval;
6577	}
6578	return 0;
6579
6580err_inval:
6581	dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
6582	return -EINVAL;
6583}
6584
6585static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake)
6586{
6587	struct net_device *netdev = pci_get_drvdata(pdev);
6588	struct igb_adapter *adapter = netdev_priv(netdev);
6589	struct e1000_hw *hw = &adapter->hw;
6590	u32 ctrl, rctl, status;
6591	u32 wufc = adapter->wol;
6592#ifdef CONFIG_PM
6593	int retval = 0;
6594#endif
6595
6596	netif_device_detach(netdev);
6597
6598	if (netif_running(netdev))
6599		igb_close(netdev);
6600
6601	igb_clear_interrupt_scheme(adapter);
6602
6603#ifdef CONFIG_PM
6604	retval = pci_save_state(pdev);
6605	if (retval)
6606		return retval;
6607#endif
6608
6609	status = rd32(E1000_STATUS);
6610	if (status & E1000_STATUS_LU)
6611		wufc &= ~E1000_WUFC_LNKC;
6612
6613	if (wufc) {
6614		igb_setup_rctl(adapter);
6615		igb_set_rx_mode(netdev);
6616
6617		/* turn on all-multi mode if wake on multicast is enabled */
6618		if (wufc & E1000_WUFC_MC) {
6619			rctl = rd32(E1000_RCTL);
6620			rctl |= E1000_RCTL_MPE;
6621			wr32(E1000_RCTL, rctl);
6622		}
6623
6624		ctrl = rd32(E1000_CTRL);
6625		/* advertise wake from D3Cold */
6626		#define E1000_CTRL_ADVD3WUC 0x00100000
6627		/* phy power management enable */
6628		#define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
6629		ctrl |= E1000_CTRL_ADVD3WUC;
6630		wr32(E1000_CTRL, ctrl);
6631
6632		/* Allow time for pending master requests to run */
6633		igb_disable_pcie_master(hw);
6634
6635		wr32(E1000_WUC, E1000_WUC_PME_EN);
6636		wr32(E1000_WUFC, wufc);
6637	} else {
6638		wr32(E1000_WUC, 0);
6639		wr32(E1000_WUFC, 0);
6640	}
6641
6642	*enable_wake = wufc || adapter->en_mng_pt;
6643	if (!*enable_wake)
6644		igb_power_down_link(adapter);
6645	else
6646		igb_power_up_link(adapter);
6647
6648	/* Release control of h/w to f/w.  If f/w is AMT enabled, this
6649	 * would have already happened in close and is redundant. */
6650	igb_release_hw_control(adapter);
6651
6652	pci_disable_device(pdev);
6653
6654	return 0;
6655}
6656
6657#ifdef CONFIG_PM
6658static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
6659{
6660	int retval;
6661	bool wake;
6662
6663	retval = __igb_shutdown(pdev, &wake);
6664	if (retval)
6665		return retval;
6666
6667	if (wake) {
6668		pci_prepare_to_sleep(pdev);
6669	} else {
6670		pci_wake_from_d3(pdev, false);
6671		pci_set_power_state(pdev, PCI_D3hot);
6672	}
6673
6674	return 0;
6675}
6676
6677static int igb_resume(struct pci_dev *pdev)
6678{
6679	struct net_device *netdev = pci_get_drvdata(pdev);
6680	struct igb_adapter *adapter = netdev_priv(netdev);
6681	struct e1000_hw *hw = &adapter->hw;
6682	u32 err;
6683
6684	pci_set_power_state(pdev, PCI_D0);
6685	pci_restore_state(pdev);
6686	pci_save_state(pdev);
6687
6688	err = pci_enable_device_mem(pdev);
6689	if (err) {
6690		dev_err(&pdev->dev,
6691			"igb: Cannot enable PCI device from suspend\n");
6692		return err;
6693	}
6694	pci_set_master(pdev);
6695
6696	pci_enable_wake(pdev, PCI_D3hot, 0);
6697	pci_enable_wake(pdev, PCI_D3cold, 0);
6698
6699	if (igb_init_interrupt_scheme(adapter)) {
6700		dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
6701		return -ENOMEM;
6702	}
6703
6704	igb_reset(adapter);
6705
6706	/* let the f/w know that the h/w is now under the control of the
6707	 * driver. */
6708	igb_get_hw_control(adapter);
6709
6710	wr32(E1000_WUS, ~0);
6711
6712	if (netif_running(netdev)) {
6713		err = igb_open(netdev);
6714		if (err)
6715			return err;
6716	}
6717
6718	netif_device_attach(netdev);
6719
6720	return 0;
6721}
6722#endif
6723
6724static void igb_shutdown(struct pci_dev *pdev)
6725{
6726	bool wake;
6727
6728	__igb_shutdown(pdev, &wake);
6729
6730	if (system_state == SYSTEM_POWER_OFF) {
6731		pci_wake_from_d3(pdev, wake);
6732		pci_set_power_state(pdev, PCI_D3hot);
6733	}
6734}
6735
6736#ifdef CONFIG_NET_POLL_CONTROLLER
6737/*
6738 * Polling 'interrupt' - used by things like netconsole to send skbs
6739 * without having to re-enable interrupts. It's not called while
6740 * the interrupt routine is executing.
6741 */
6742static void igb_netpoll(struct net_device *netdev)
6743{
6744	struct igb_adapter *adapter = netdev_priv(netdev);
6745	struct e1000_hw *hw = &adapter->hw;
6746	struct igb_q_vector *q_vector;
6747	int i;
6748
6749	for (i = 0; i < adapter->num_q_vectors; i++) {
6750		q_vector = adapter->q_vector[i];
6751		if (adapter->msix_entries)
6752			wr32(E1000_EIMC, q_vector->eims_value);
6753		else
6754			igb_irq_disable(adapter);
6755		napi_schedule(&q_vector->napi);
6756	}
6757}
6758#endif /* CONFIG_NET_POLL_CONTROLLER */
6759
6760/**
6761 * igb_io_error_detected - called when PCI error is detected
6762 * @pdev: Pointer to PCI device
6763 * @state: The current pci connection state
6764 *
6765 * This function is called after a PCI bus error affecting
6766 * this device has been detected.
6767 */
6768static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
6769					      pci_channel_state_t state)
6770{
6771	struct net_device *netdev = pci_get_drvdata(pdev);
6772	struct igb_adapter *adapter = netdev_priv(netdev);
6773
6774	netif_device_detach(netdev);
6775
6776	if (state == pci_channel_io_perm_failure)
6777		return PCI_ERS_RESULT_DISCONNECT;
6778
6779	if (netif_running(netdev))
6780		igb_down(adapter);
6781	pci_disable_device(pdev);
6782
6783	/* Request a slot slot reset. */
6784	return PCI_ERS_RESULT_NEED_RESET;
6785}
6786
6787/**
6788 * igb_io_slot_reset - called after the pci bus has been reset.
6789 * @pdev: Pointer to PCI device
6790 *
6791 * Restart the card from scratch, as if from a cold-boot. Implementation
6792 * resembles the first-half of the igb_resume routine.
6793 */
6794static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
6795{
6796	struct net_device *netdev = pci_get_drvdata(pdev);
6797	struct igb_adapter *adapter = netdev_priv(netdev);
6798	struct e1000_hw *hw = &adapter->hw;
6799	pci_ers_result_t result;
6800	int err;
6801
6802	if (pci_enable_device_mem(pdev)) {
6803		dev_err(&pdev->dev,
6804			"Cannot re-enable PCI device after reset.\n");
6805		result = PCI_ERS_RESULT_DISCONNECT;
6806	} else {
6807		pci_set_master(pdev);
6808		pci_restore_state(pdev);
6809		pci_save_state(pdev);
6810
6811		pci_enable_wake(pdev, PCI_D3hot, 0);
6812		pci_enable_wake(pdev, PCI_D3cold, 0);
6813
6814		igb_reset(adapter);
6815		wr32(E1000_WUS, ~0);
6816		result = PCI_ERS_RESULT_RECOVERED;
6817	}
6818
6819	err = pci_cleanup_aer_uncorrect_error_status(pdev);
6820	if (err) {
6821		dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
6822		        "failed 0x%0x\n", err);
6823		/* non-fatal, continue */
6824	}
6825
6826	return result;
6827}
6828
6829/**
6830 * igb_io_resume - called when traffic can start flowing again.
6831 * @pdev: Pointer to PCI device
6832 *
6833 * This callback is called when the error recovery driver tells us that
6834 * its OK to resume normal operation. Implementation resembles the
6835 * second-half of the igb_resume routine.
6836 */
6837static void igb_io_resume(struct pci_dev *pdev)
6838{
6839	struct net_device *netdev = pci_get_drvdata(pdev);
6840	struct igb_adapter *adapter = netdev_priv(netdev);
6841
6842	if (netif_running(netdev)) {
6843		if (igb_up(adapter)) {
6844			dev_err(&pdev->dev, "igb_up failed after reset\n");
6845			return;
6846		}
6847	}
6848
6849	netif_device_attach(netdev);
6850
6851	/* let the f/w know that the h/w is now under the control of the
6852	 * driver. */
6853	igb_get_hw_control(adapter);
6854}
6855
6856static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
6857                             u8 qsel)
6858{
6859	u32 rar_low, rar_high;
6860	struct e1000_hw *hw = &adapter->hw;
6861
6862	/* HW expects these in little endian so we reverse the byte order
6863	 * from network order (big endian) to little endian
6864	 */
6865	rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
6866	          ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
6867	rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
6868
6869	/* Indicate to hardware the Address is Valid. */
6870	rar_high |= E1000_RAH_AV;
6871
6872	if (hw->mac.type == e1000_82575)
6873		rar_high |= E1000_RAH_POOL_1 * qsel;
6874	else
6875		rar_high |= E1000_RAH_POOL_1 << qsel;
6876
6877	wr32(E1000_RAL(index), rar_low);
6878	wrfl();
6879	wr32(E1000_RAH(index), rar_high);
6880	wrfl();
6881}
6882
6883static int igb_set_vf_mac(struct igb_adapter *adapter,
6884                          int vf, unsigned char *mac_addr)
6885{
6886	struct e1000_hw *hw = &adapter->hw;
6887	/* VF MAC addresses start at end of receive addresses and moves
6888	 * torwards the first, as a result a collision should not be possible */
6889	int rar_entry = hw->mac.rar_entry_count - (vf + 1);
6890
6891	memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
6892
6893	igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
6894
6895	return 0;
6896}
6897
6898static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
6899{
6900	struct igb_adapter *adapter = netdev_priv(netdev);
6901	if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
6902		return -EINVAL;
6903	adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
6904	dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
6905	dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
6906				      " change effective.");
6907	if (test_bit(__IGB_DOWN, &adapter->state)) {
6908		dev_warn(&adapter->pdev->dev, "The VF MAC address has been set,"
6909			 " but the PF device is not up.\n");
6910		dev_warn(&adapter->pdev->dev, "Bring the PF device up before"
6911			 " attempting to use the VF device.\n");
6912	}
6913	return igb_set_vf_mac(adapter, vf, mac);
6914}
6915
6916static int igb_link_mbps(int internal_link_speed)
6917{
6918	switch (internal_link_speed) {
6919	case SPEED_100:
6920		return 100;
6921	case SPEED_1000:
6922		return 1000;
6923	default:
6924		return 0;
6925	}
6926}
6927
6928static void igb_set_vf_rate_limit(struct e1000_hw *hw, int vf, int tx_rate,
6929				  int link_speed)
6930{
6931	int rf_dec, rf_int;
6932	u32 bcnrc_val;
6933
6934	if (tx_rate != 0) {
6935		/* Calculate the rate factor values to set */
6936		rf_int = link_speed / tx_rate;
6937		rf_dec = (link_speed - (rf_int * tx_rate));
6938		rf_dec = (rf_dec * (1<<E1000_RTTBCNRC_RF_INT_SHIFT)) / tx_rate;
6939
6940		bcnrc_val = E1000_RTTBCNRC_RS_ENA;
6941		bcnrc_val |= ((rf_int<<E1000_RTTBCNRC_RF_INT_SHIFT) &
6942		               E1000_RTTBCNRC_RF_INT_MASK);
6943		bcnrc_val |= (rf_dec & E1000_RTTBCNRC_RF_DEC_MASK);
6944	} else {
6945		bcnrc_val = 0;
6946	}
6947
6948	wr32(E1000_RTTDQSEL, vf); /* vf X uses queue X */
6949	wr32(E1000_RTTBCNRC, bcnrc_val);
6950}
6951
6952static void igb_check_vf_rate_limit(struct igb_adapter *adapter)
6953{
6954	int actual_link_speed, i;
6955	bool reset_rate = false;
6956
6957	/* VF TX rate limit was not set or not supported */
6958	if ((adapter->vf_rate_link_speed == 0) ||
6959	    (adapter->hw.mac.type != e1000_82576))
6960		return;
6961
6962	actual_link_speed = igb_link_mbps(adapter->link_speed);
6963	if (actual_link_speed != adapter->vf_rate_link_speed) {
6964		reset_rate = true;
6965		adapter->vf_rate_link_speed = 0;
6966		dev_info(&adapter->pdev->dev,
6967		         "Link speed has been changed. VF Transmit "
6968		         "rate is disabled\n");
6969	}
6970
6971	for (i = 0; i < adapter->vfs_allocated_count; i++) {
6972		if (reset_rate)
6973			adapter->vf_data[i].tx_rate = 0;
6974
6975		igb_set_vf_rate_limit(&adapter->hw, i,
6976		                      adapter->vf_data[i].tx_rate,
6977		                      actual_link_speed);
6978	}
6979}
6980
6981static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
6982{
6983	struct igb_adapter *adapter = netdev_priv(netdev);
6984	struct e1000_hw *hw = &adapter->hw;
6985	int actual_link_speed;
6986
6987	if (hw->mac.type != e1000_82576)
6988		return -EOPNOTSUPP;
6989
6990	actual_link_speed = igb_link_mbps(adapter->link_speed);
6991	if ((vf >= adapter->vfs_allocated_count) ||
6992	    (!(rd32(E1000_STATUS) & E1000_STATUS_LU)) ||
6993	    (tx_rate < 0) || (tx_rate > actual_link_speed))
6994		return -EINVAL;
6995
6996	adapter->vf_rate_link_speed = actual_link_speed;
6997	adapter->vf_data[vf].tx_rate = (u16)tx_rate;
6998	igb_set_vf_rate_limit(hw, vf, tx_rate, actual_link_speed);
6999
7000	return 0;
7001}
7002
7003static int igb_ndo_get_vf_config(struct net_device *netdev,
7004				 int vf, struct ifla_vf_info *ivi)
7005{
7006	struct igb_adapter *adapter = netdev_priv(netdev);
7007	if (vf >= adapter->vfs_allocated_count)
7008		return -EINVAL;
7009	ivi->vf = vf;
7010	memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
7011	ivi->tx_rate = adapter->vf_data[vf].tx_rate;
7012	ivi->vlan = adapter->vf_data[vf].pf_vlan;
7013	ivi->qos = adapter->vf_data[vf].pf_qos;
7014	return 0;
7015}
7016
7017static void igb_vmm_control(struct igb_adapter *adapter)
7018{
7019	struct e1000_hw *hw = &adapter->hw;
7020	u32 reg;
7021
7022	switch (hw->mac.type) {
7023	case e1000_82575:
7024	default:
7025		/* replication is not supported for 82575 */
7026		return;
7027	case e1000_82576:
7028		/* notify HW that the MAC is adding vlan tags */
7029		reg = rd32(E1000_DTXCTL);
7030		reg |= E1000_DTXCTL_VLAN_ADDED;
7031		wr32(E1000_DTXCTL, reg);
7032	case e1000_82580:
7033		/* enable replication vlan tag stripping */
7034		reg = rd32(E1000_RPLOLR);
7035		reg |= E1000_RPLOLR_STRVLAN;
7036		wr32(E1000_RPLOLR, reg);
7037	case e1000_i350:
7038		/* none of the above registers are supported by i350 */
7039		break;
7040	}
7041
7042	if (adapter->vfs_allocated_count) {
7043		igb_vmdq_set_loopback_pf(hw, true);
7044		igb_vmdq_set_replication_pf(hw, true);
7045		igb_vmdq_set_anti_spoofing_pf(hw, true,
7046						adapter->vfs_allocated_count);
7047	} else {
7048		igb_vmdq_set_loopback_pf(hw, false);
7049		igb_vmdq_set_replication_pf(hw, false);
7050	}
7051}
7052
7053static void igb_init_dmac(struct igb_adapter *adapter, u32 pba)
7054{
7055	struct e1000_hw *hw = &adapter->hw;
7056	u32 dmac_thr;
7057	u16 hwm;
7058
7059	if (hw->mac.type > e1000_82580) {
7060		if (adapter->flags & IGB_FLAG_DMAC) {
7061			u32 reg;
7062
7063			/* force threshold to 0. */
7064			wr32(E1000_DMCTXTH, 0);
7065
7066			/*
7067			 * DMA Coalescing high water mark needs to be higher
7068			 * than the RX threshold. set hwm to PBA -  2 * max
7069			 * frame size
7070			 */
7071			hwm = pba - (2 * adapter->max_frame_size);
7072			reg = rd32(E1000_DMACR);
7073			reg &= ~E1000_DMACR_DMACTHR_MASK;
7074			dmac_thr = pba - 4;
7075
7076			reg |= ((dmac_thr << E1000_DMACR_DMACTHR_SHIFT)
7077				& E1000_DMACR_DMACTHR_MASK);
7078
7079			/* transition to L0x or L1 if available..*/
7080			reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
7081
7082			/* watchdog timer= +-1000 usec in 32usec intervals */
7083			reg |= (1000 >> 5);
7084			wr32(E1000_DMACR, reg);
7085
7086			/*
7087			 * no lower threshold to disable
7088			 * coalescing(smart fifb)-UTRESH=0
7089			 */
7090			wr32(E1000_DMCRTRH, 0);
7091			wr32(E1000_FCRTC, hwm);
7092
7093			reg = (IGB_DMCTLX_DCFLUSH_DIS | 0x4);
7094
7095			wr32(E1000_DMCTLX, reg);
7096
7097			/*
7098			 * free space in tx packet buffer to wake from
7099			 * DMA coal
7100			 */
7101			wr32(E1000_DMCTXTH, (IGB_MIN_TXPBSIZE -
7102			     (IGB_TX_BUF_4096 + adapter->max_frame_size)) >> 6);
7103
7104			/*
7105			 * make low power state decision controlled
7106			 * by DMA coal
7107			 */
7108			reg = rd32(E1000_PCIEMISC);
7109			reg &= ~E1000_PCIEMISC_LX_DECISION;
7110			wr32(E1000_PCIEMISC, reg);
7111		} /* endif adapter->dmac is not disabled */
7112	} else if (hw->mac.type == e1000_82580) {
7113		u32 reg = rd32(E1000_PCIEMISC);
7114		wr32(E1000_PCIEMISC, reg & ~E1000_PCIEMISC_LX_DECISION);
7115		wr32(E1000_DMACR, 0);
7116	}
7117}
7118
7119/* igb_main.c */
7120