igb_main.c revision f00b0da776fda1abc481578e3932a668f603d72d
1/*******************************************************************************
2
3  Intel(R) Gigabit Ethernet Linux driver
4  Copyright(c) 2007-2012 Intel Corporation.
5
6  This program is free software; you can redistribute it and/or modify it
7  under the terms and conditions of the GNU General Public License,
8  version 2, as published by the Free Software Foundation.
9
10  This program is distributed in the hope it will be useful, but WITHOUT
11  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13  more details.
14
15  You should have received a copy of the GNU General Public License along with
16  this program; if not, write to the Free Software Foundation, Inc.,
17  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18
19  The full GNU General Public License is included in this distribution in
20  the file called "COPYING".
21
22  Contact Information:
23  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
25
26*******************************************************************************/
27
28#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
29
30#include <linux/module.h>
31#include <linux/types.h>
32#include <linux/init.h>
33#include <linux/bitops.h>
34#include <linux/vmalloc.h>
35#include <linux/pagemap.h>
36#include <linux/netdevice.h>
37#include <linux/ipv6.h>
38#include <linux/slab.h>
39#include <net/checksum.h>
40#include <net/ip6_checksum.h>
41#include <linux/net_tstamp.h>
42#include <linux/mii.h>
43#include <linux/ethtool.h>
44#include <linux/if.h>
45#include <linux/if_vlan.h>
46#include <linux/pci.h>
47#include <linux/pci-aspm.h>
48#include <linux/delay.h>
49#include <linux/interrupt.h>
50#include <linux/ip.h>
51#include <linux/tcp.h>
52#include <linux/sctp.h>
53#include <linux/if_ether.h>
54#include <linux/aer.h>
55#include <linux/prefetch.h>
56#include <linux/pm_runtime.h>
57#ifdef CONFIG_IGB_DCA
58#include <linux/dca.h>
59#endif
60#include "igb.h"
61
62#define MAJ 3
63#define MIN 4
64#define BUILD 7
65#define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \
66__stringify(BUILD) "-k"
67char igb_driver_name[] = "igb";
68char igb_driver_version[] = DRV_VERSION;
69static const char igb_driver_string[] =
70				"Intel(R) Gigabit Ethernet Network Driver";
71static const char igb_copyright[] = "Copyright (c) 2007-2012 Intel Corporation.";
72
73static const struct e1000_info *igb_info_tbl[] = {
74	[board_82575] = &e1000_82575_info,
75};
76
77static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
78	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I211_COPPER), board_82575 },
79	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_COPPER), board_82575 },
80	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_FIBER), board_82575 },
81	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_SERDES), board_82575 },
82	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_SGMII), board_82575 },
83	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER), board_82575 },
84	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER), board_82575 },
85	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES), board_82575 },
86	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII), board_82575 },
87	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
88	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
89	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_QUAD_FIBER), board_82575 },
90	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
91	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
92	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
93	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SGMII), board_82575 },
94	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SERDES), board_82575 },
95	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_BACKPLANE), board_82575 },
96	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SFP), board_82575 },
97	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
98	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
99	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
100	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
101	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
102	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
103	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2), board_82575 },
104	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
105	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
106	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
107	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
108	/* required last entry */
109	{0, }
110};
111
112MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
113
114void igb_reset(struct igb_adapter *);
115static int igb_setup_all_tx_resources(struct igb_adapter *);
116static int igb_setup_all_rx_resources(struct igb_adapter *);
117static void igb_free_all_tx_resources(struct igb_adapter *);
118static void igb_free_all_rx_resources(struct igb_adapter *);
119static void igb_setup_mrqc(struct igb_adapter *);
120static int igb_probe(struct pci_dev *, const struct pci_device_id *);
121static void __devexit igb_remove(struct pci_dev *pdev);
122static int igb_sw_init(struct igb_adapter *);
123static int igb_open(struct net_device *);
124static int igb_close(struct net_device *);
125static void igb_configure_tx(struct igb_adapter *);
126static void igb_configure_rx(struct igb_adapter *);
127static void igb_clean_all_tx_rings(struct igb_adapter *);
128static void igb_clean_all_rx_rings(struct igb_adapter *);
129static void igb_clean_tx_ring(struct igb_ring *);
130static void igb_clean_rx_ring(struct igb_ring *);
131static void igb_set_rx_mode(struct net_device *);
132static void igb_update_phy_info(unsigned long);
133static void igb_watchdog(unsigned long);
134static void igb_watchdog_task(struct work_struct *);
135static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, struct net_device *);
136static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *dev,
137						 struct rtnl_link_stats64 *stats);
138static int igb_change_mtu(struct net_device *, int);
139static int igb_set_mac(struct net_device *, void *);
140static void igb_set_uta(struct igb_adapter *adapter);
141static irqreturn_t igb_intr(int irq, void *);
142static irqreturn_t igb_intr_msi(int irq, void *);
143static irqreturn_t igb_msix_other(int irq, void *);
144static irqreturn_t igb_msix_ring(int irq, void *);
145#ifdef CONFIG_IGB_DCA
146static void igb_update_dca(struct igb_q_vector *);
147static void igb_setup_dca(struct igb_adapter *);
148#endif /* CONFIG_IGB_DCA */
149static int igb_poll(struct napi_struct *, int);
150static bool igb_clean_tx_irq(struct igb_q_vector *);
151static bool igb_clean_rx_irq(struct igb_q_vector *, int);
152static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
153static void igb_tx_timeout(struct net_device *);
154static void igb_reset_task(struct work_struct *);
155static void igb_vlan_mode(struct net_device *netdev, netdev_features_t features);
156static int igb_vlan_rx_add_vid(struct net_device *, u16);
157static int igb_vlan_rx_kill_vid(struct net_device *, u16);
158static void igb_restore_vlan(struct igb_adapter *);
159static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
160static void igb_ping_all_vfs(struct igb_adapter *);
161static void igb_msg_task(struct igb_adapter *);
162static void igb_vmm_control(struct igb_adapter *);
163static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
164static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
165static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
166static int igb_ndo_set_vf_vlan(struct net_device *netdev,
167			       int vf, u16 vlan, u8 qos);
168static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
169static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
170				 struct ifla_vf_info *ivi);
171static void igb_check_vf_rate_limit(struct igb_adapter *);
172
173#ifdef CONFIG_PCI_IOV
174static int igb_vf_configure(struct igb_adapter *adapter, int vf);
175static int igb_find_enabled_vfs(struct igb_adapter *adapter);
176static int igb_check_vf_assignment(struct igb_adapter *adapter);
177#endif
178
179#ifdef CONFIG_PM
180#ifdef CONFIG_PM_SLEEP
181static int igb_suspend(struct device *);
182#endif
183static int igb_resume(struct device *);
184#ifdef CONFIG_PM_RUNTIME
185static int igb_runtime_suspend(struct device *dev);
186static int igb_runtime_resume(struct device *dev);
187static int igb_runtime_idle(struct device *dev);
188#endif
189static const struct dev_pm_ops igb_pm_ops = {
190	SET_SYSTEM_SLEEP_PM_OPS(igb_suspend, igb_resume)
191	SET_RUNTIME_PM_OPS(igb_runtime_suspend, igb_runtime_resume,
192			igb_runtime_idle)
193};
194#endif
195static void igb_shutdown(struct pci_dev *);
196#ifdef CONFIG_IGB_DCA
197static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
198static struct notifier_block dca_notifier = {
199	.notifier_call	= igb_notify_dca,
200	.next		= NULL,
201	.priority	= 0
202};
203#endif
204#ifdef CONFIG_NET_POLL_CONTROLLER
205/* for netdump / net console */
206static void igb_netpoll(struct net_device *);
207#endif
208#ifdef CONFIG_PCI_IOV
209static unsigned int max_vfs = 0;
210module_param(max_vfs, uint, 0);
211MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
212                 "per physical function");
213#endif /* CONFIG_PCI_IOV */
214
215static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
216		     pci_channel_state_t);
217static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
218static void igb_io_resume(struct pci_dev *);
219
220static struct pci_error_handlers igb_err_handler = {
221	.error_detected = igb_io_error_detected,
222	.slot_reset = igb_io_slot_reset,
223	.resume = igb_io_resume,
224};
225
226static void igb_init_dmac(struct igb_adapter *adapter, u32 pba);
227
228static struct pci_driver igb_driver = {
229	.name     = igb_driver_name,
230	.id_table = igb_pci_tbl,
231	.probe    = igb_probe,
232	.remove   = __devexit_p(igb_remove),
233#ifdef CONFIG_PM
234	.driver.pm = &igb_pm_ops,
235#endif
236	.shutdown = igb_shutdown,
237	.err_handler = &igb_err_handler
238};
239
240MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
241MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
242MODULE_LICENSE("GPL");
243MODULE_VERSION(DRV_VERSION);
244
245#define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV|NETIF_MSG_PROBE|NETIF_MSG_LINK)
246static int debug = -1;
247module_param(debug, int, 0);
248MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
249
250struct igb_reg_info {
251	u32 ofs;
252	char *name;
253};
254
255static const struct igb_reg_info igb_reg_info_tbl[] = {
256
257	/* General Registers */
258	{E1000_CTRL, "CTRL"},
259	{E1000_STATUS, "STATUS"},
260	{E1000_CTRL_EXT, "CTRL_EXT"},
261
262	/* Interrupt Registers */
263	{E1000_ICR, "ICR"},
264
265	/* RX Registers */
266	{E1000_RCTL, "RCTL"},
267	{E1000_RDLEN(0), "RDLEN"},
268	{E1000_RDH(0), "RDH"},
269	{E1000_RDT(0), "RDT"},
270	{E1000_RXDCTL(0), "RXDCTL"},
271	{E1000_RDBAL(0), "RDBAL"},
272	{E1000_RDBAH(0), "RDBAH"},
273
274	/* TX Registers */
275	{E1000_TCTL, "TCTL"},
276	{E1000_TDBAL(0), "TDBAL"},
277	{E1000_TDBAH(0), "TDBAH"},
278	{E1000_TDLEN(0), "TDLEN"},
279	{E1000_TDH(0), "TDH"},
280	{E1000_TDT(0), "TDT"},
281	{E1000_TXDCTL(0), "TXDCTL"},
282	{E1000_TDFH, "TDFH"},
283	{E1000_TDFT, "TDFT"},
284	{E1000_TDFHS, "TDFHS"},
285	{E1000_TDFPC, "TDFPC"},
286
287	/* List Terminator */
288	{}
289};
290
291/*
292 * igb_regdump - register printout routine
293 */
294static void igb_regdump(struct e1000_hw *hw, struct igb_reg_info *reginfo)
295{
296	int n = 0;
297	char rname[16];
298	u32 regs[8];
299
300	switch (reginfo->ofs) {
301	case E1000_RDLEN(0):
302		for (n = 0; n < 4; n++)
303			regs[n] = rd32(E1000_RDLEN(n));
304		break;
305	case E1000_RDH(0):
306		for (n = 0; n < 4; n++)
307			regs[n] = rd32(E1000_RDH(n));
308		break;
309	case E1000_RDT(0):
310		for (n = 0; n < 4; n++)
311			regs[n] = rd32(E1000_RDT(n));
312		break;
313	case E1000_RXDCTL(0):
314		for (n = 0; n < 4; n++)
315			regs[n] = rd32(E1000_RXDCTL(n));
316		break;
317	case E1000_RDBAL(0):
318		for (n = 0; n < 4; n++)
319			regs[n] = rd32(E1000_RDBAL(n));
320		break;
321	case E1000_RDBAH(0):
322		for (n = 0; n < 4; n++)
323			regs[n] = rd32(E1000_RDBAH(n));
324		break;
325	case E1000_TDBAL(0):
326		for (n = 0; n < 4; n++)
327			regs[n] = rd32(E1000_RDBAL(n));
328		break;
329	case E1000_TDBAH(0):
330		for (n = 0; n < 4; n++)
331			regs[n] = rd32(E1000_TDBAH(n));
332		break;
333	case E1000_TDLEN(0):
334		for (n = 0; n < 4; n++)
335			regs[n] = rd32(E1000_TDLEN(n));
336		break;
337	case E1000_TDH(0):
338		for (n = 0; n < 4; n++)
339			regs[n] = rd32(E1000_TDH(n));
340		break;
341	case E1000_TDT(0):
342		for (n = 0; n < 4; n++)
343			regs[n] = rd32(E1000_TDT(n));
344		break;
345	case E1000_TXDCTL(0):
346		for (n = 0; n < 4; n++)
347			regs[n] = rd32(E1000_TXDCTL(n));
348		break;
349	default:
350		pr_info("%-15s %08x\n", reginfo->name, rd32(reginfo->ofs));
351		return;
352	}
353
354	snprintf(rname, 16, "%s%s", reginfo->name, "[0-3]");
355	pr_info("%-15s %08x %08x %08x %08x\n", rname, regs[0], regs[1],
356		regs[2], regs[3]);
357}
358
359/*
360 * igb_dump - Print registers, tx-rings and rx-rings
361 */
362static void igb_dump(struct igb_adapter *adapter)
363{
364	struct net_device *netdev = adapter->netdev;
365	struct e1000_hw *hw = &adapter->hw;
366	struct igb_reg_info *reginfo;
367	struct igb_ring *tx_ring;
368	union e1000_adv_tx_desc *tx_desc;
369	struct my_u0 { u64 a; u64 b; } *u0;
370	struct igb_ring *rx_ring;
371	union e1000_adv_rx_desc *rx_desc;
372	u32 staterr;
373	u16 i, n;
374
375	if (!netif_msg_hw(adapter))
376		return;
377
378	/* Print netdevice Info */
379	if (netdev) {
380		dev_info(&adapter->pdev->dev, "Net device Info\n");
381		pr_info("Device Name     state            trans_start      "
382			"last_rx\n");
383		pr_info("%-15s %016lX %016lX %016lX\n", netdev->name,
384			netdev->state, netdev->trans_start, netdev->last_rx);
385	}
386
387	/* Print Registers */
388	dev_info(&adapter->pdev->dev, "Register Dump\n");
389	pr_info(" Register Name   Value\n");
390	for (reginfo = (struct igb_reg_info *)igb_reg_info_tbl;
391	     reginfo->name; reginfo++) {
392		igb_regdump(hw, reginfo);
393	}
394
395	/* Print TX Ring Summary */
396	if (!netdev || !netif_running(netdev))
397		goto exit;
398
399	dev_info(&adapter->pdev->dev, "TX Rings Summary\n");
400	pr_info("Queue [NTU] [NTC] [bi(ntc)->dma  ] leng ntw timestamp\n");
401	for (n = 0; n < adapter->num_tx_queues; n++) {
402		struct igb_tx_buffer *buffer_info;
403		tx_ring = adapter->tx_ring[n];
404		buffer_info = &tx_ring->tx_buffer_info[tx_ring->next_to_clean];
405		pr_info(" %5d %5X %5X %016llX %04X %p %016llX\n",
406			n, tx_ring->next_to_use, tx_ring->next_to_clean,
407			(u64)buffer_info->dma,
408			buffer_info->length,
409			buffer_info->next_to_watch,
410			(u64)buffer_info->time_stamp);
411	}
412
413	/* Print TX Rings */
414	if (!netif_msg_tx_done(adapter))
415		goto rx_ring_summary;
416
417	dev_info(&adapter->pdev->dev, "TX Rings Dump\n");
418
419	/* Transmit Descriptor Formats
420	 *
421	 * Advanced Transmit Descriptor
422	 *   +--------------------------------------------------------------+
423	 * 0 |         Buffer Address [63:0]                                |
424	 *   +--------------------------------------------------------------+
425	 * 8 | PAYLEN  | PORTS  |CC|IDX | STA | DCMD  |DTYP|MAC|RSV| DTALEN |
426	 *   +--------------------------------------------------------------+
427	 *   63      46 45    40 39 38 36 35 32 31   24             15       0
428	 */
429
430	for (n = 0; n < adapter->num_tx_queues; n++) {
431		tx_ring = adapter->tx_ring[n];
432		pr_info("------------------------------------\n");
433		pr_info("TX QUEUE INDEX = %d\n", tx_ring->queue_index);
434		pr_info("------------------------------------\n");
435		pr_info("T [desc]     [address 63:0  ] [PlPOCIStDDM Ln] "
436			"[bi->dma       ] leng  ntw timestamp        "
437			"bi->skb\n");
438
439		for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) {
440			const char *next_desc;
441			struct igb_tx_buffer *buffer_info;
442			tx_desc = IGB_TX_DESC(tx_ring, i);
443			buffer_info = &tx_ring->tx_buffer_info[i];
444			u0 = (struct my_u0 *)tx_desc;
445			if (i == tx_ring->next_to_use &&
446			    i == tx_ring->next_to_clean)
447				next_desc = " NTC/U";
448			else if (i == tx_ring->next_to_use)
449				next_desc = " NTU";
450			else if (i == tx_ring->next_to_clean)
451				next_desc = " NTC";
452			else
453				next_desc = "";
454
455			pr_info("T [0x%03X]    %016llX %016llX %016llX"
456				" %04X  %p %016llX %p%s\n", i,
457				le64_to_cpu(u0->a),
458				le64_to_cpu(u0->b),
459				(u64)buffer_info->dma,
460				buffer_info->length,
461				buffer_info->next_to_watch,
462				(u64)buffer_info->time_stamp,
463				buffer_info->skb, next_desc);
464
465			if (netif_msg_pktdata(adapter) && buffer_info->dma != 0)
466				print_hex_dump(KERN_INFO, "",
467					DUMP_PREFIX_ADDRESS,
468					16, 1, phys_to_virt(buffer_info->dma),
469					buffer_info->length, true);
470		}
471	}
472
473	/* Print RX Rings Summary */
474rx_ring_summary:
475	dev_info(&adapter->pdev->dev, "RX Rings Summary\n");
476	pr_info("Queue [NTU] [NTC]\n");
477	for (n = 0; n < adapter->num_rx_queues; n++) {
478		rx_ring = adapter->rx_ring[n];
479		pr_info(" %5d %5X %5X\n",
480			n, rx_ring->next_to_use, rx_ring->next_to_clean);
481	}
482
483	/* Print RX Rings */
484	if (!netif_msg_rx_status(adapter))
485		goto exit;
486
487	dev_info(&adapter->pdev->dev, "RX Rings Dump\n");
488
489	/* Advanced Receive Descriptor (Read) Format
490	 *    63                                           1        0
491	 *    +-----------------------------------------------------+
492	 *  0 |       Packet Buffer Address [63:1]           |A0/NSE|
493	 *    +----------------------------------------------+------+
494	 *  8 |       Header Buffer Address [63:1]           |  DD  |
495	 *    +-----------------------------------------------------+
496	 *
497	 *
498	 * Advanced Receive Descriptor (Write-Back) Format
499	 *
500	 *   63       48 47    32 31  30      21 20 17 16   4 3     0
501	 *   +------------------------------------------------------+
502	 * 0 | Packet     IP     |SPH| HDR_LEN   | RSV|Packet|  RSS |
503	 *   | Checksum   Ident  |   |           |    | Type | Type |
504	 *   +------------------------------------------------------+
505	 * 8 | VLAN Tag | Length | Extended Error | Extended Status |
506	 *   +------------------------------------------------------+
507	 *   63       48 47    32 31            20 19               0
508	 */
509
510	for (n = 0; n < adapter->num_rx_queues; n++) {
511		rx_ring = adapter->rx_ring[n];
512		pr_info("------------------------------------\n");
513		pr_info("RX QUEUE INDEX = %d\n", rx_ring->queue_index);
514		pr_info("------------------------------------\n");
515		pr_info("R  [desc]      [ PktBuf     A0] [  HeadBuf   DD] "
516			"[bi->dma       ] [bi->skb] <-- Adv Rx Read format\n");
517		pr_info("RWB[desc]      [PcsmIpSHl PtRs] [vl er S cks ln] -----"
518			"----------- [bi->skb] <-- Adv Rx Write-Back format\n");
519
520		for (i = 0; i < rx_ring->count; i++) {
521			const char *next_desc;
522			struct igb_rx_buffer *buffer_info;
523			buffer_info = &rx_ring->rx_buffer_info[i];
524			rx_desc = IGB_RX_DESC(rx_ring, i);
525			u0 = (struct my_u0 *)rx_desc;
526			staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
527
528			if (i == rx_ring->next_to_use)
529				next_desc = " NTU";
530			else if (i == rx_ring->next_to_clean)
531				next_desc = " NTC";
532			else
533				next_desc = "";
534
535			if (staterr & E1000_RXD_STAT_DD) {
536				/* Descriptor Done */
537				pr_info("%s[0x%03X]     %016llX %016llX -------"
538					"--------- %p%s\n", "RWB", i,
539					le64_to_cpu(u0->a),
540					le64_to_cpu(u0->b),
541					buffer_info->skb, next_desc);
542			} else {
543				pr_info("%s[0x%03X]     %016llX %016llX %016llX"
544					" %p%s\n", "R  ", i,
545					le64_to_cpu(u0->a),
546					le64_to_cpu(u0->b),
547					(u64)buffer_info->dma,
548					buffer_info->skb, next_desc);
549
550				if (netif_msg_pktdata(adapter)) {
551					print_hex_dump(KERN_INFO, "",
552						DUMP_PREFIX_ADDRESS,
553						16, 1,
554						phys_to_virt(buffer_info->dma),
555						IGB_RX_HDR_LEN, true);
556					print_hex_dump(KERN_INFO, "",
557					  DUMP_PREFIX_ADDRESS,
558					  16, 1,
559					  phys_to_virt(
560					    buffer_info->page_dma +
561					    buffer_info->page_offset),
562					  PAGE_SIZE/2, true);
563				}
564			}
565		}
566	}
567
568exit:
569	return;
570}
571
572/**
573 * igb_get_hw_dev - return device
574 * used by hardware layer to print debugging information
575 **/
576struct net_device *igb_get_hw_dev(struct e1000_hw *hw)
577{
578	struct igb_adapter *adapter = hw->back;
579	return adapter->netdev;
580}
581
582/**
583 * igb_init_module - Driver Registration Routine
584 *
585 * igb_init_module is the first routine called when the driver is
586 * loaded. All it does is register with the PCI subsystem.
587 **/
588static int __init igb_init_module(void)
589{
590	int ret;
591	pr_info("%s - version %s\n",
592	       igb_driver_string, igb_driver_version);
593
594	pr_info("%s\n", igb_copyright);
595
596#ifdef CONFIG_IGB_DCA
597	dca_register_notify(&dca_notifier);
598#endif
599	ret = pci_register_driver(&igb_driver);
600	return ret;
601}
602
603module_init(igb_init_module);
604
605/**
606 * igb_exit_module - Driver Exit Cleanup Routine
607 *
608 * igb_exit_module is called just before the driver is removed
609 * from memory.
610 **/
611static void __exit igb_exit_module(void)
612{
613#ifdef CONFIG_IGB_DCA
614	dca_unregister_notify(&dca_notifier);
615#endif
616	pci_unregister_driver(&igb_driver);
617}
618
619module_exit(igb_exit_module);
620
621#define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
622/**
623 * igb_cache_ring_register - Descriptor ring to register mapping
624 * @adapter: board private structure to initialize
625 *
626 * Once we know the feature-set enabled for the device, we'll cache
627 * the register offset the descriptor ring is assigned to.
628 **/
629static void igb_cache_ring_register(struct igb_adapter *adapter)
630{
631	int i = 0, j = 0;
632	u32 rbase_offset = adapter->vfs_allocated_count;
633
634	switch (adapter->hw.mac.type) {
635	case e1000_82576:
636		/* The queues are allocated for virtualization such that VF 0
637		 * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
638		 * In order to avoid collision we start at the first free queue
639		 * and continue consuming queues in the same sequence
640		 */
641		if (adapter->vfs_allocated_count) {
642			for (; i < adapter->rss_queues; i++)
643				adapter->rx_ring[i]->reg_idx = rbase_offset +
644				                               Q_IDX_82576(i);
645		}
646	case e1000_82575:
647	case e1000_82580:
648	case e1000_i350:
649	case e1000_i210:
650	case e1000_i211:
651	default:
652		for (; i < adapter->num_rx_queues; i++)
653			adapter->rx_ring[i]->reg_idx = rbase_offset + i;
654		for (; j < adapter->num_tx_queues; j++)
655			adapter->tx_ring[j]->reg_idx = rbase_offset + j;
656		break;
657	}
658}
659
660static void igb_free_queues(struct igb_adapter *adapter)
661{
662	int i;
663
664	for (i = 0; i < adapter->num_tx_queues; i++) {
665		kfree(adapter->tx_ring[i]);
666		adapter->tx_ring[i] = NULL;
667	}
668	for (i = 0; i < adapter->num_rx_queues; i++) {
669		kfree(adapter->rx_ring[i]);
670		adapter->rx_ring[i] = NULL;
671	}
672	adapter->num_rx_queues = 0;
673	adapter->num_tx_queues = 0;
674}
675
676/**
677 * igb_alloc_queues - Allocate memory for all rings
678 * @adapter: board private structure to initialize
679 *
680 * We allocate one ring per queue at run-time since we don't know the
681 * number of queues at compile-time.
682 **/
683static int igb_alloc_queues(struct igb_adapter *adapter)
684{
685	struct igb_ring *ring;
686	int i;
687	int orig_node = adapter->node;
688
689	for (i = 0; i < adapter->num_tx_queues; i++) {
690		if (orig_node == -1) {
691			int cur_node = next_online_node(adapter->node);
692			if (cur_node == MAX_NUMNODES)
693				cur_node = first_online_node;
694			adapter->node = cur_node;
695		}
696		ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL,
697				    adapter->node);
698		if (!ring)
699			ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
700		if (!ring)
701			goto err;
702		ring->count = adapter->tx_ring_count;
703		ring->queue_index = i;
704		ring->dev = &adapter->pdev->dev;
705		ring->netdev = adapter->netdev;
706		ring->numa_node = adapter->node;
707		/* For 82575, context index must be unique per ring. */
708		if (adapter->hw.mac.type == e1000_82575)
709			set_bit(IGB_RING_FLAG_TX_CTX_IDX, &ring->flags);
710		adapter->tx_ring[i] = ring;
711	}
712	/* Restore the adapter's original node */
713	adapter->node = orig_node;
714
715	for (i = 0; i < adapter->num_rx_queues; i++) {
716		if (orig_node == -1) {
717			int cur_node = next_online_node(adapter->node);
718			if (cur_node == MAX_NUMNODES)
719				cur_node = first_online_node;
720			adapter->node = cur_node;
721		}
722		ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL,
723				    adapter->node);
724		if (!ring)
725			ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
726		if (!ring)
727			goto err;
728		ring->count = adapter->rx_ring_count;
729		ring->queue_index = i;
730		ring->dev = &adapter->pdev->dev;
731		ring->netdev = adapter->netdev;
732		ring->numa_node = adapter->node;
733		/* set flag indicating ring supports SCTP checksum offload */
734		if (adapter->hw.mac.type >= e1000_82576)
735			set_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags);
736
737		/*
738		 * On i350, i210, and i211, loopback VLAN packets
739		 * have the tag byte-swapped.
740		 * */
741		if (adapter->hw.mac.type >= e1000_i350)
742			set_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &ring->flags);
743
744		adapter->rx_ring[i] = ring;
745	}
746	/* Restore the adapter's original node */
747	adapter->node = orig_node;
748
749	igb_cache_ring_register(adapter);
750
751	return 0;
752
753err:
754	/* Restore the adapter's original node */
755	adapter->node = orig_node;
756	igb_free_queues(adapter);
757
758	return -ENOMEM;
759}
760
761/**
762 *  igb_write_ivar - configure ivar for given MSI-X vector
763 *  @hw: pointer to the HW structure
764 *  @msix_vector: vector number we are allocating to a given ring
765 *  @index: row index of IVAR register to write within IVAR table
766 *  @offset: column offset of in IVAR, should be multiple of 8
767 *
768 *  This function is intended to handle the writing of the IVAR register
769 *  for adapters 82576 and newer.  The IVAR table consists of 2 columns,
770 *  each containing an cause allocation for an Rx and Tx ring, and a
771 *  variable number of rows depending on the number of queues supported.
772 **/
773static void igb_write_ivar(struct e1000_hw *hw, int msix_vector,
774			   int index, int offset)
775{
776	u32 ivar = array_rd32(E1000_IVAR0, index);
777
778	/* clear any bits that are currently set */
779	ivar &= ~((u32)0xFF << offset);
780
781	/* write vector and valid bit */
782	ivar |= (msix_vector | E1000_IVAR_VALID) << offset;
783
784	array_wr32(E1000_IVAR0, index, ivar);
785}
786
787#define IGB_N0_QUEUE -1
788static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
789{
790	struct igb_adapter *adapter = q_vector->adapter;
791	struct e1000_hw *hw = &adapter->hw;
792	int rx_queue = IGB_N0_QUEUE;
793	int tx_queue = IGB_N0_QUEUE;
794	u32 msixbm = 0;
795
796	if (q_vector->rx.ring)
797		rx_queue = q_vector->rx.ring->reg_idx;
798	if (q_vector->tx.ring)
799		tx_queue = q_vector->tx.ring->reg_idx;
800
801	switch (hw->mac.type) {
802	case e1000_82575:
803		/* The 82575 assigns vectors using a bitmask, which matches the
804		   bitmask for the EICR/EIMS/EIMC registers.  To assign one
805		   or more queues to a vector, we write the appropriate bits
806		   into the MSIXBM register for that vector. */
807		if (rx_queue > IGB_N0_QUEUE)
808			msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
809		if (tx_queue > IGB_N0_QUEUE)
810			msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
811		if (!adapter->msix_entries && msix_vector == 0)
812			msixbm |= E1000_EIMS_OTHER;
813		array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
814		q_vector->eims_value = msixbm;
815		break;
816	case e1000_82576:
817		/*
818		 * 82576 uses a table that essentially consists of 2 columns
819		 * with 8 rows.  The ordering is column-major so we use the
820		 * lower 3 bits as the row index, and the 4th bit as the
821		 * column offset.
822		 */
823		if (rx_queue > IGB_N0_QUEUE)
824			igb_write_ivar(hw, msix_vector,
825				       rx_queue & 0x7,
826				       (rx_queue & 0x8) << 1);
827		if (tx_queue > IGB_N0_QUEUE)
828			igb_write_ivar(hw, msix_vector,
829				       tx_queue & 0x7,
830				       ((tx_queue & 0x8) << 1) + 8);
831		q_vector->eims_value = 1 << msix_vector;
832		break;
833	case e1000_82580:
834	case e1000_i350:
835	case e1000_i210:
836	case e1000_i211:
837		/*
838		 * On 82580 and newer adapters the scheme is similar to 82576
839		 * however instead of ordering column-major we have things
840		 * ordered row-major.  So we traverse the table by using
841		 * bit 0 as the column offset, and the remaining bits as the
842		 * row index.
843		 */
844		if (rx_queue > IGB_N0_QUEUE)
845			igb_write_ivar(hw, msix_vector,
846				       rx_queue >> 1,
847				       (rx_queue & 0x1) << 4);
848		if (tx_queue > IGB_N0_QUEUE)
849			igb_write_ivar(hw, msix_vector,
850				       tx_queue >> 1,
851				       ((tx_queue & 0x1) << 4) + 8);
852		q_vector->eims_value = 1 << msix_vector;
853		break;
854	default:
855		BUG();
856		break;
857	}
858
859	/* add q_vector eims value to global eims_enable_mask */
860	adapter->eims_enable_mask |= q_vector->eims_value;
861
862	/* configure q_vector to set itr on first interrupt */
863	q_vector->set_itr = 1;
864}
865
866/**
867 * igb_configure_msix - Configure MSI-X hardware
868 *
869 * igb_configure_msix sets up the hardware to properly
870 * generate MSI-X interrupts.
871 **/
872static void igb_configure_msix(struct igb_adapter *adapter)
873{
874	u32 tmp;
875	int i, vector = 0;
876	struct e1000_hw *hw = &adapter->hw;
877
878	adapter->eims_enable_mask = 0;
879
880	/* set vector for other causes, i.e. link changes */
881	switch (hw->mac.type) {
882	case e1000_82575:
883		tmp = rd32(E1000_CTRL_EXT);
884		/* enable MSI-X PBA support*/
885		tmp |= E1000_CTRL_EXT_PBA_CLR;
886
887		/* Auto-Mask interrupts upon ICR read. */
888		tmp |= E1000_CTRL_EXT_EIAME;
889		tmp |= E1000_CTRL_EXT_IRCA;
890
891		wr32(E1000_CTRL_EXT, tmp);
892
893		/* enable msix_other interrupt */
894		array_wr32(E1000_MSIXBM(0), vector++,
895		                      E1000_EIMS_OTHER);
896		adapter->eims_other = E1000_EIMS_OTHER;
897
898		break;
899
900	case e1000_82576:
901	case e1000_82580:
902	case e1000_i350:
903	case e1000_i210:
904	case e1000_i211:
905		/* Turn on MSI-X capability first, or our settings
906		 * won't stick.  And it will take days to debug. */
907		wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
908		                E1000_GPIE_PBA | E1000_GPIE_EIAME |
909		                E1000_GPIE_NSICR);
910
911		/* enable msix_other interrupt */
912		adapter->eims_other = 1 << vector;
913		tmp = (vector++ | E1000_IVAR_VALID) << 8;
914
915		wr32(E1000_IVAR_MISC, tmp);
916		break;
917	default:
918		/* do nothing, since nothing else supports MSI-X */
919		break;
920	} /* switch (hw->mac.type) */
921
922	adapter->eims_enable_mask |= adapter->eims_other;
923
924	for (i = 0; i < adapter->num_q_vectors; i++)
925		igb_assign_vector(adapter->q_vector[i], vector++);
926
927	wrfl();
928}
929
930/**
931 * igb_request_msix - Initialize MSI-X interrupts
932 *
933 * igb_request_msix allocates MSI-X vectors and requests interrupts from the
934 * kernel.
935 **/
936static int igb_request_msix(struct igb_adapter *adapter)
937{
938	struct net_device *netdev = adapter->netdev;
939	struct e1000_hw *hw = &adapter->hw;
940	int i, err = 0, vector = 0;
941
942	err = request_irq(adapter->msix_entries[vector].vector,
943	                  igb_msix_other, 0, netdev->name, adapter);
944	if (err)
945		goto out;
946	vector++;
947
948	for (i = 0; i < adapter->num_q_vectors; i++) {
949		struct igb_q_vector *q_vector = adapter->q_vector[i];
950
951		q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
952
953		if (q_vector->rx.ring && q_vector->tx.ring)
954			sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
955				q_vector->rx.ring->queue_index);
956		else if (q_vector->tx.ring)
957			sprintf(q_vector->name, "%s-tx-%u", netdev->name,
958				q_vector->tx.ring->queue_index);
959		else if (q_vector->rx.ring)
960			sprintf(q_vector->name, "%s-rx-%u", netdev->name,
961				q_vector->rx.ring->queue_index);
962		else
963			sprintf(q_vector->name, "%s-unused", netdev->name);
964
965		err = request_irq(adapter->msix_entries[vector].vector,
966		                  igb_msix_ring, 0, q_vector->name,
967		                  q_vector);
968		if (err)
969			goto out;
970		vector++;
971	}
972
973	igb_configure_msix(adapter);
974	return 0;
975out:
976	return err;
977}
978
979static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
980{
981	if (adapter->msix_entries) {
982		pci_disable_msix(adapter->pdev);
983		kfree(adapter->msix_entries);
984		adapter->msix_entries = NULL;
985	} else if (adapter->flags & IGB_FLAG_HAS_MSI) {
986		pci_disable_msi(adapter->pdev);
987	}
988}
989
990/**
991 * igb_free_q_vectors - Free memory allocated for interrupt vectors
992 * @adapter: board private structure to initialize
993 *
994 * This function frees the memory allocated to the q_vectors.  In addition if
995 * NAPI is enabled it will delete any references to the NAPI struct prior
996 * to freeing the q_vector.
997 **/
998static void igb_free_q_vectors(struct igb_adapter *adapter)
999{
1000	int v_idx;
1001
1002	for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1003		struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1004		adapter->q_vector[v_idx] = NULL;
1005		if (!q_vector)
1006			continue;
1007		netif_napi_del(&q_vector->napi);
1008		kfree(q_vector);
1009	}
1010	adapter->num_q_vectors = 0;
1011}
1012
1013/**
1014 * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
1015 *
1016 * This function resets the device so that it has 0 rx queues, tx queues, and
1017 * MSI-X interrupts allocated.
1018 */
1019static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
1020{
1021	igb_free_queues(adapter);
1022	igb_free_q_vectors(adapter);
1023	igb_reset_interrupt_capability(adapter);
1024}
1025
1026/**
1027 * igb_set_interrupt_capability - set MSI or MSI-X if supported
1028 *
1029 * Attempt to configure interrupts using the best available
1030 * capabilities of the hardware and kernel.
1031 **/
1032static int igb_set_interrupt_capability(struct igb_adapter *adapter)
1033{
1034	int err;
1035	int numvecs, i;
1036
1037	/* Number of supported queues. */
1038	adapter->num_rx_queues = adapter->rss_queues;
1039	if (adapter->vfs_allocated_count)
1040		adapter->num_tx_queues = 1;
1041	else
1042		adapter->num_tx_queues = adapter->rss_queues;
1043
1044	/* start with one vector for every rx queue */
1045	numvecs = adapter->num_rx_queues;
1046
1047	/* if tx handler is separate add 1 for every tx queue */
1048	if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
1049		numvecs += adapter->num_tx_queues;
1050
1051	/* i210 and i211 can only have 4 MSIX vectors for rx/tx queues. */
1052	if ((adapter->hw.mac.type == e1000_i210)
1053		|| (adapter->hw.mac.type == e1000_i211))
1054		numvecs = 4;
1055
1056	/* store the number of vectors reserved for queues */
1057	adapter->num_q_vectors = numvecs;
1058
1059	/* add 1 vector for link status interrupts */
1060	numvecs++;
1061	adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
1062					GFP_KERNEL);
1063
1064	if (!adapter->msix_entries)
1065		goto msi_only;
1066
1067	for (i = 0; i < numvecs; i++)
1068		adapter->msix_entries[i].entry = i;
1069
1070	err = pci_enable_msix(adapter->pdev,
1071			      adapter->msix_entries,
1072			      numvecs);
1073	if (err == 0)
1074		goto out;
1075
1076	igb_reset_interrupt_capability(adapter);
1077
1078	/* If we can't do MSI-X, try MSI */
1079msi_only:
1080#ifdef CONFIG_PCI_IOV
1081	/* disable SR-IOV for non MSI-X configurations */
1082	if (adapter->vf_data) {
1083		struct e1000_hw *hw = &adapter->hw;
1084		/* disable iov and allow time for transactions to clear */
1085		pci_disable_sriov(adapter->pdev);
1086		msleep(500);
1087
1088		kfree(adapter->vf_data);
1089		adapter->vf_data = NULL;
1090		wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1091		wrfl();
1092		msleep(100);
1093		dev_info(&adapter->pdev->dev, "IOV Disabled\n");
1094	}
1095#endif
1096	adapter->vfs_allocated_count = 0;
1097	adapter->rss_queues = 1;
1098	adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1099	adapter->num_rx_queues = 1;
1100	adapter->num_tx_queues = 1;
1101	adapter->num_q_vectors = 1;
1102	if (!pci_enable_msi(adapter->pdev))
1103		adapter->flags |= IGB_FLAG_HAS_MSI;
1104out:
1105	/* Notify the stack of the (possibly) reduced queue counts. */
1106	rtnl_lock();
1107	netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues);
1108	err = netif_set_real_num_rx_queues(adapter->netdev,
1109		adapter->num_rx_queues);
1110	rtnl_unlock();
1111	return err;
1112}
1113
1114/**
1115 * igb_alloc_q_vectors - Allocate memory for interrupt vectors
1116 * @adapter: board private structure to initialize
1117 *
1118 * We allocate one q_vector per queue interrupt.  If allocation fails we
1119 * return -ENOMEM.
1120 **/
1121static int igb_alloc_q_vectors(struct igb_adapter *adapter)
1122{
1123	struct igb_q_vector *q_vector;
1124	struct e1000_hw *hw = &adapter->hw;
1125	int v_idx;
1126	int orig_node = adapter->node;
1127
1128	for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1129		if ((adapter->num_q_vectors == (adapter->num_rx_queues +
1130						adapter->num_tx_queues)) &&
1131		    (adapter->num_rx_queues == v_idx))
1132			adapter->node = orig_node;
1133		if (orig_node == -1) {
1134			int cur_node = next_online_node(adapter->node);
1135			if (cur_node == MAX_NUMNODES)
1136				cur_node = first_online_node;
1137			adapter->node = cur_node;
1138		}
1139		q_vector = kzalloc_node(sizeof(struct igb_q_vector), GFP_KERNEL,
1140					adapter->node);
1141		if (!q_vector)
1142			q_vector = kzalloc(sizeof(struct igb_q_vector),
1143					   GFP_KERNEL);
1144		if (!q_vector)
1145			goto err_out;
1146		q_vector->adapter = adapter;
1147		q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
1148		q_vector->itr_val = IGB_START_ITR;
1149		netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
1150		adapter->q_vector[v_idx] = q_vector;
1151	}
1152	/* Restore the adapter's original node */
1153	adapter->node = orig_node;
1154
1155	return 0;
1156
1157err_out:
1158	/* Restore the adapter's original node */
1159	adapter->node = orig_node;
1160	igb_free_q_vectors(adapter);
1161	return -ENOMEM;
1162}
1163
1164static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
1165                                      int ring_idx, int v_idx)
1166{
1167	struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1168
1169	q_vector->rx.ring = adapter->rx_ring[ring_idx];
1170	q_vector->rx.ring->q_vector = q_vector;
1171	q_vector->rx.count++;
1172	q_vector->itr_val = adapter->rx_itr_setting;
1173	if (q_vector->itr_val && q_vector->itr_val <= 3)
1174		q_vector->itr_val = IGB_START_ITR;
1175}
1176
1177static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
1178                                      int ring_idx, int v_idx)
1179{
1180	struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1181
1182	q_vector->tx.ring = adapter->tx_ring[ring_idx];
1183	q_vector->tx.ring->q_vector = q_vector;
1184	q_vector->tx.count++;
1185	q_vector->itr_val = adapter->tx_itr_setting;
1186	q_vector->tx.work_limit = adapter->tx_work_limit;
1187	if (q_vector->itr_val && q_vector->itr_val <= 3)
1188		q_vector->itr_val = IGB_START_ITR;
1189}
1190
1191/**
1192 * igb_map_ring_to_vector - maps allocated queues to vectors
1193 *
1194 * This function maps the recently allocated queues to vectors.
1195 **/
1196static int igb_map_ring_to_vector(struct igb_adapter *adapter)
1197{
1198	int i;
1199	int v_idx = 0;
1200
1201	if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
1202	    (adapter->num_q_vectors < adapter->num_tx_queues))
1203		return -ENOMEM;
1204
1205	if (adapter->num_q_vectors >=
1206	    (adapter->num_rx_queues + adapter->num_tx_queues)) {
1207		for (i = 0; i < adapter->num_rx_queues; i++)
1208			igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1209		for (i = 0; i < adapter->num_tx_queues; i++)
1210			igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1211	} else {
1212		for (i = 0; i < adapter->num_rx_queues; i++) {
1213			if (i < adapter->num_tx_queues)
1214				igb_map_tx_ring_to_vector(adapter, i, v_idx);
1215			igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1216		}
1217		for (; i < adapter->num_tx_queues; i++)
1218			igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1219	}
1220	return 0;
1221}
1222
1223/**
1224 * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
1225 *
1226 * This function initializes the interrupts and allocates all of the queues.
1227 **/
1228static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
1229{
1230	struct pci_dev *pdev = adapter->pdev;
1231	int err;
1232
1233	err = igb_set_interrupt_capability(adapter);
1234	if (err)
1235		return err;
1236
1237	err = igb_alloc_q_vectors(adapter);
1238	if (err) {
1239		dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
1240		goto err_alloc_q_vectors;
1241	}
1242
1243	err = igb_alloc_queues(adapter);
1244	if (err) {
1245		dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1246		goto err_alloc_queues;
1247	}
1248
1249	err = igb_map_ring_to_vector(adapter);
1250	if (err) {
1251		dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
1252		goto err_map_queues;
1253	}
1254
1255
1256	return 0;
1257err_map_queues:
1258	igb_free_queues(adapter);
1259err_alloc_queues:
1260	igb_free_q_vectors(adapter);
1261err_alloc_q_vectors:
1262	igb_reset_interrupt_capability(adapter);
1263	return err;
1264}
1265
1266/**
1267 * igb_request_irq - initialize interrupts
1268 *
1269 * Attempts to configure interrupts using the best available
1270 * capabilities of the hardware and kernel.
1271 **/
1272static int igb_request_irq(struct igb_adapter *adapter)
1273{
1274	struct net_device *netdev = adapter->netdev;
1275	struct pci_dev *pdev = adapter->pdev;
1276	int err = 0;
1277
1278	if (adapter->msix_entries) {
1279		err = igb_request_msix(adapter);
1280		if (!err)
1281			goto request_done;
1282		/* fall back to MSI */
1283		igb_clear_interrupt_scheme(adapter);
1284		if (!pci_enable_msi(pdev))
1285			adapter->flags |= IGB_FLAG_HAS_MSI;
1286		igb_free_all_tx_resources(adapter);
1287		igb_free_all_rx_resources(adapter);
1288		adapter->num_tx_queues = 1;
1289		adapter->num_rx_queues = 1;
1290		adapter->num_q_vectors = 1;
1291		err = igb_alloc_q_vectors(adapter);
1292		if (err) {
1293			dev_err(&pdev->dev,
1294			        "Unable to allocate memory for vectors\n");
1295			goto request_done;
1296		}
1297		err = igb_alloc_queues(adapter);
1298		if (err) {
1299			dev_err(&pdev->dev,
1300			        "Unable to allocate memory for queues\n");
1301			igb_free_q_vectors(adapter);
1302			goto request_done;
1303		}
1304		igb_setup_all_tx_resources(adapter);
1305		igb_setup_all_rx_resources(adapter);
1306	}
1307
1308	igb_assign_vector(adapter->q_vector[0], 0);
1309
1310	if (adapter->flags & IGB_FLAG_HAS_MSI) {
1311		err = request_irq(pdev->irq, igb_intr_msi, 0,
1312				  netdev->name, adapter);
1313		if (!err)
1314			goto request_done;
1315
1316		/* fall back to legacy interrupts */
1317		igb_reset_interrupt_capability(adapter);
1318		adapter->flags &= ~IGB_FLAG_HAS_MSI;
1319	}
1320
1321	err = request_irq(pdev->irq, igb_intr, IRQF_SHARED,
1322			  netdev->name, adapter);
1323
1324	if (err)
1325		dev_err(&pdev->dev, "Error %d getting interrupt\n",
1326			err);
1327
1328request_done:
1329	return err;
1330}
1331
1332static void igb_free_irq(struct igb_adapter *adapter)
1333{
1334	if (adapter->msix_entries) {
1335		int vector = 0, i;
1336
1337		free_irq(adapter->msix_entries[vector++].vector, adapter);
1338
1339		for (i = 0; i < adapter->num_q_vectors; i++)
1340			free_irq(adapter->msix_entries[vector++].vector,
1341				 adapter->q_vector[i]);
1342	} else {
1343		free_irq(adapter->pdev->irq, adapter);
1344	}
1345}
1346
1347/**
1348 * igb_irq_disable - Mask off interrupt generation on the NIC
1349 * @adapter: board private structure
1350 **/
1351static void igb_irq_disable(struct igb_adapter *adapter)
1352{
1353	struct e1000_hw *hw = &adapter->hw;
1354
1355	/*
1356	 * we need to be careful when disabling interrupts.  The VFs are also
1357	 * mapped into these registers and so clearing the bits can cause
1358	 * issues on the VF drivers so we only need to clear what we set
1359	 */
1360	if (adapter->msix_entries) {
1361		u32 regval = rd32(E1000_EIAM);
1362		wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
1363		wr32(E1000_EIMC, adapter->eims_enable_mask);
1364		regval = rd32(E1000_EIAC);
1365		wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
1366	}
1367
1368	wr32(E1000_IAM, 0);
1369	wr32(E1000_IMC, ~0);
1370	wrfl();
1371	if (adapter->msix_entries) {
1372		int i;
1373		for (i = 0; i < adapter->num_q_vectors; i++)
1374			synchronize_irq(adapter->msix_entries[i].vector);
1375	} else {
1376		synchronize_irq(adapter->pdev->irq);
1377	}
1378}
1379
1380/**
1381 * igb_irq_enable - Enable default interrupt generation settings
1382 * @adapter: board private structure
1383 **/
1384static void igb_irq_enable(struct igb_adapter *adapter)
1385{
1386	struct e1000_hw *hw = &adapter->hw;
1387
1388	if (adapter->msix_entries) {
1389		u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC | E1000_IMS_DRSTA;
1390		u32 regval = rd32(E1000_EIAC);
1391		wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
1392		regval = rd32(E1000_EIAM);
1393		wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
1394		wr32(E1000_EIMS, adapter->eims_enable_mask);
1395		if (adapter->vfs_allocated_count) {
1396			wr32(E1000_MBVFIMR, 0xFF);
1397			ims |= E1000_IMS_VMMB;
1398		}
1399		wr32(E1000_IMS, ims);
1400	} else {
1401		wr32(E1000_IMS, IMS_ENABLE_MASK |
1402				E1000_IMS_DRSTA);
1403		wr32(E1000_IAM, IMS_ENABLE_MASK |
1404				E1000_IMS_DRSTA);
1405	}
1406}
1407
1408static void igb_update_mng_vlan(struct igb_adapter *adapter)
1409{
1410	struct e1000_hw *hw = &adapter->hw;
1411	u16 vid = adapter->hw.mng_cookie.vlan_id;
1412	u16 old_vid = adapter->mng_vlan_id;
1413
1414	if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1415		/* add VID to filter table */
1416		igb_vfta_set(hw, vid, true);
1417		adapter->mng_vlan_id = vid;
1418	} else {
1419		adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1420	}
1421
1422	if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1423	    (vid != old_vid) &&
1424	    !test_bit(old_vid, adapter->active_vlans)) {
1425		/* remove VID from filter table */
1426		igb_vfta_set(hw, old_vid, false);
1427	}
1428}
1429
1430/**
1431 * igb_release_hw_control - release control of the h/w to f/w
1432 * @adapter: address of board private structure
1433 *
1434 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1435 * For ASF and Pass Through versions of f/w this means that the
1436 * driver is no longer loaded.
1437 *
1438 **/
1439static void igb_release_hw_control(struct igb_adapter *adapter)
1440{
1441	struct e1000_hw *hw = &adapter->hw;
1442	u32 ctrl_ext;
1443
1444	/* Let firmware take over control of h/w */
1445	ctrl_ext = rd32(E1000_CTRL_EXT);
1446	wr32(E1000_CTRL_EXT,
1447			ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1448}
1449
1450/**
1451 * igb_get_hw_control - get control of the h/w from f/w
1452 * @adapter: address of board private structure
1453 *
1454 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1455 * For ASF and Pass Through versions of f/w this means that
1456 * the driver is loaded.
1457 *
1458 **/
1459static void igb_get_hw_control(struct igb_adapter *adapter)
1460{
1461	struct e1000_hw *hw = &adapter->hw;
1462	u32 ctrl_ext;
1463
1464	/* Let firmware know the driver has taken over */
1465	ctrl_ext = rd32(E1000_CTRL_EXT);
1466	wr32(E1000_CTRL_EXT,
1467			ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1468}
1469
1470/**
1471 * igb_configure - configure the hardware for RX and TX
1472 * @adapter: private board structure
1473 **/
1474static void igb_configure(struct igb_adapter *adapter)
1475{
1476	struct net_device *netdev = adapter->netdev;
1477	int i;
1478
1479	igb_get_hw_control(adapter);
1480	igb_set_rx_mode(netdev);
1481
1482	igb_restore_vlan(adapter);
1483
1484	igb_setup_tctl(adapter);
1485	igb_setup_mrqc(adapter);
1486	igb_setup_rctl(adapter);
1487
1488	igb_configure_tx(adapter);
1489	igb_configure_rx(adapter);
1490
1491	igb_rx_fifo_flush_82575(&adapter->hw);
1492
1493	/* call igb_desc_unused which always leaves
1494	 * at least 1 descriptor unused to make sure
1495	 * next_to_use != next_to_clean */
1496	for (i = 0; i < adapter->num_rx_queues; i++) {
1497		struct igb_ring *ring = adapter->rx_ring[i];
1498		igb_alloc_rx_buffers(ring, igb_desc_unused(ring));
1499	}
1500}
1501
1502/**
1503 * igb_power_up_link - Power up the phy/serdes link
1504 * @adapter: address of board private structure
1505 **/
1506void igb_power_up_link(struct igb_adapter *adapter)
1507{
1508	if (adapter->hw.phy.media_type == e1000_media_type_copper)
1509		igb_power_up_phy_copper(&adapter->hw);
1510	else
1511		igb_power_up_serdes_link_82575(&adapter->hw);
1512	igb_reset_phy(&adapter->hw);
1513}
1514
1515/**
1516 * igb_power_down_link - Power down the phy/serdes link
1517 * @adapter: address of board private structure
1518 */
1519static void igb_power_down_link(struct igb_adapter *adapter)
1520{
1521	if (adapter->hw.phy.media_type == e1000_media_type_copper)
1522		igb_power_down_phy_copper_82575(&adapter->hw);
1523	else
1524		igb_shutdown_serdes_link_82575(&adapter->hw);
1525}
1526
1527/**
1528 * igb_up - Open the interface and prepare it to handle traffic
1529 * @adapter: board private structure
1530 **/
1531int igb_up(struct igb_adapter *adapter)
1532{
1533	struct e1000_hw *hw = &adapter->hw;
1534	int i;
1535
1536	/* hardware has been reset, we need to reload some things */
1537	igb_configure(adapter);
1538
1539	clear_bit(__IGB_DOWN, &adapter->state);
1540
1541	for (i = 0; i < adapter->num_q_vectors; i++)
1542		napi_enable(&(adapter->q_vector[i]->napi));
1543
1544	if (adapter->msix_entries)
1545		igb_configure_msix(adapter);
1546	else
1547		igb_assign_vector(adapter->q_vector[0], 0);
1548
1549	/* Clear any pending interrupts. */
1550	rd32(E1000_ICR);
1551	igb_irq_enable(adapter);
1552
1553	/* notify VFs that reset has been completed */
1554	if (adapter->vfs_allocated_count) {
1555		u32 reg_data = rd32(E1000_CTRL_EXT);
1556		reg_data |= E1000_CTRL_EXT_PFRSTD;
1557		wr32(E1000_CTRL_EXT, reg_data);
1558	}
1559
1560	netif_tx_start_all_queues(adapter->netdev);
1561
1562	/* start the watchdog. */
1563	hw->mac.get_link_status = 1;
1564	schedule_work(&adapter->watchdog_task);
1565
1566	return 0;
1567}
1568
1569void igb_down(struct igb_adapter *adapter)
1570{
1571	struct net_device *netdev = adapter->netdev;
1572	struct e1000_hw *hw = &adapter->hw;
1573	u32 tctl, rctl;
1574	int i;
1575
1576	/* signal that we're down so the interrupt handler does not
1577	 * reschedule our watchdog timer */
1578	set_bit(__IGB_DOWN, &adapter->state);
1579
1580	/* disable receives in the hardware */
1581	rctl = rd32(E1000_RCTL);
1582	wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1583	/* flush and sleep below */
1584
1585	netif_tx_stop_all_queues(netdev);
1586
1587	/* disable transmits in the hardware */
1588	tctl = rd32(E1000_TCTL);
1589	tctl &= ~E1000_TCTL_EN;
1590	wr32(E1000_TCTL, tctl);
1591	/* flush both disables and wait for them to finish */
1592	wrfl();
1593	msleep(10);
1594
1595	for (i = 0; i < adapter->num_q_vectors; i++)
1596		napi_disable(&(adapter->q_vector[i]->napi));
1597
1598	igb_irq_disable(adapter);
1599
1600	del_timer_sync(&adapter->watchdog_timer);
1601	del_timer_sync(&adapter->phy_info_timer);
1602
1603	netif_carrier_off(netdev);
1604
1605	/* record the stats before reset*/
1606	spin_lock(&adapter->stats64_lock);
1607	igb_update_stats(adapter, &adapter->stats64);
1608	spin_unlock(&adapter->stats64_lock);
1609
1610	adapter->link_speed = 0;
1611	adapter->link_duplex = 0;
1612
1613	if (!pci_channel_offline(adapter->pdev))
1614		igb_reset(adapter);
1615	igb_clean_all_tx_rings(adapter);
1616	igb_clean_all_rx_rings(adapter);
1617#ifdef CONFIG_IGB_DCA
1618
1619	/* since we reset the hardware DCA settings were cleared */
1620	igb_setup_dca(adapter);
1621#endif
1622}
1623
1624void igb_reinit_locked(struct igb_adapter *adapter)
1625{
1626	WARN_ON(in_interrupt());
1627	while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1628		msleep(1);
1629	igb_down(adapter);
1630	igb_up(adapter);
1631	clear_bit(__IGB_RESETTING, &adapter->state);
1632}
1633
1634void igb_reset(struct igb_adapter *adapter)
1635{
1636	struct pci_dev *pdev = adapter->pdev;
1637	struct e1000_hw *hw = &adapter->hw;
1638	struct e1000_mac_info *mac = &hw->mac;
1639	struct e1000_fc_info *fc = &hw->fc;
1640	u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1641	u16 hwm;
1642
1643	/* Repartition Pba for greater than 9k mtu
1644	 * To take effect CTRL.RST is required.
1645	 */
1646	switch (mac->type) {
1647	case e1000_i350:
1648	case e1000_82580:
1649		pba = rd32(E1000_RXPBS);
1650		pba = igb_rxpbs_adjust_82580(pba);
1651		break;
1652	case e1000_82576:
1653		pba = rd32(E1000_RXPBS);
1654		pba &= E1000_RXPBS_SIZE_MASK_82576;
1655		break;
1656	case e1000_82575:
1657	case e1000_i210:
1658	case e1000_i211:
1659	default:
1660		pba = E1000_PBA_34K;
1661		break;
1662	}
1663
1664	if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1665	    (mac->type < e1000_82576)) {
1666		/* adjust PBA for jumbo frames */
1667		wr32(E1000_PBA, pba);
1668
1669		/* To maintain wire speed transmits, the Tx FIFO should be
1670		 * large enough to accommodate two full transmit packets,
1671		 * rounded up to the next 1KB and expressed in KB.  Likewise,
1672		 * the Rx FIFO should be large enough to accommodate at least
1673		 * one full receive packet and is similarly rounded up and
1674		 * expressed in KB. */
1675		pba = rd32(E1000_PBA);
1676		/* upper 16 bits has Tx packet buffer allocation size in KB */
1677		tx_space = pba >> 16;
1678		/* lower 16 bits has Rx packet buffer allocation size in KB */
1679		pba &= 0xffff;
1680		/* the tx fifo also stores 16 bytes of information about the tx
1681		 * but don't include ethernet FCS because hardware appends it */
1682		min_tx_space = (adapter->max_frame_size +
1683				sizeof(union e1000_adv_tx_desc) -
1684				ETH_FCS_LEN) * 2;
1685		min_tx_space = ALIGN(min_tx_space, 1024);
1686		min_tx_space >>= 10;
1687		/* software strips receive CRC, so leave room for it */
1688		min_rx_space = adapter->max_frame_size;
1689		min_rx_space = ALIGN(min_rx_space, 1024);
1690		min_rx_space >>= 10;
1691
1692		/* If current Tx allocation is less than the min Tx FIFO size,
1693		 * and the min Tx FIFO size is less than the current Rx FIFO
1694		 * allocation, take space away from current Rx allocation */
1695		if (tx_space < min_tx_space &&
1696		    ((min_tx_space - tx_space) < pba)) {
1697			pba = pba - (min_tx_space - tx_space);
1698
1699			/* if short on rx space, rx wins and must trump tx
1700			 * adjustment */
1701			if (pba < min_rx_space)
1702				pba = min_rx_space;
1703		}
1704		wr32(E1000_PBA, pba);
1705	}
1706
1707	/* flow control settings */
1708	/* The high water mark must be low enough to fit one full frame
1709	 * (or the size used for early receive) above it in the Rx FIFO.
1710	 * Set it to the lower of:
1711	 * - 90% of the Rx FIFO size, or
1712	 * - the full Rx FIFO size minus one full frame */
1713	hwm = min(((pba << 10) * 9 / 10),
1714			((pba << 10) - 2 * adapter->max_frame_size));
1715
1716	fc->high_water = hwm & 0xFFF0;	/* 16-byte granularity */
1717	fc->low_water = fc->high_water - 16;
1718	fc->pause_time = 0xFFFF;
1719	fc->send_xon = 1;
1720	fc->current_mode = fc->requested_mode;
1721
1722	/* disable receive for all VFs and wait one second */
1723	if (adapter->vfs_allocated_count) {
1724		int i;
1725		for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1726			adapter->vf_data[i].flags &= IGB_VF_FLAG_PF_SET_MAC;
1727
1728		/* ping all the active vfs to let them know we are going down */
1729		igb_ping_all_vfs(adapter);
1730
1731		/* disable transmits and receives */
1732		wr32(E1000_VFRE, 0);
1733		wr32(E1000_VFTE, 0);
1734	}
1735
1736	/* Allow time for pending master requests to run */
1737	hw->mac.ops.reset_hw(hw);
1738	wr32(E1000_WUC, 0);
1739
1740	if (hw->mac.ops.init_hw(hw))
1741		dev_err(&pdev->dev, "Hardware Error\n");
1742
1743	/*
1744	 * Flow control settings reset on hardware reset, so guarantee flow
1745	 * control is off when forcing speed.
1746	 */
1747	if (!hw->mac.autoneg)
1748		igb_force_mac_fc(hw);
1749
1750	igb_init_dmac(adapter, pba);
1751	if (!netif_running(adapter->netdev))
1752		igb_power_down_link(adapter);
1753
1754	igb_update_mng_vlan(adapter);
1755
1756	/* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1757	wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1758
1759	igb_get_phy_info(hw);
1760}
1761
1762static netdev_features_t igb_fix_features(struct net_device *netdev,
1763	netdev_features_t features)
1764{
1765	/*
1766	 * Since there is no support for separate rx/tx vlan accel
1767	 * enable/disable make sure tx flag is always in same state as rx.
1768	 */
1769	if (features & NETIF_F_HW_VLAN_RX)
1770		features |= NETIF_F_HW_VLAN_TX;
1771	else
1772		features &= ~NETIF_F_HW_VLAN_TX;
1773
1774	return features;
1775}
1776
1777static int igb_set_features(struct net_device *netdev,
1778	netdev_features_t features)
1779{
1780	netdev_features_t changed = netdev->features ^ features;
1781	struct igb_adapter *adapter = netdev_priv(netdev);
1782
1783	if (changed & NETIF_F_HW_VLAN_RX)
1784		igb_vlan_mode(netdev, features);
1785
1786	if (!(changed & NETIF_F_RXALL))
1787		return 0;
1788
1789	netdev->features = features;
1790
1791	if (netif_running(netdev))
1792		igb_reinit_locked(adapter);
1793	else
1794		igb_reset(adapter);
1795
1796	return 0;
1797}
1798
1799static const struct net_device_ops igb_netdev_ops = {
1800	.ndo_open		= igb_open,
1801	.ndo_stop		= igb_close,
1802	.ndo_start_xmit		= igb_xmit_frame,
1803	.ndo_get_stats64	= igb_get_stats64,
1804	.ndo_set_rx_mode	= igb_set_rx_mode,
1805	.ndo_set_mac_address	= igb_set_mac,
1806	.ndo_change_mtu		= igb_change_mtu,
1807	.ndo_do_ioctl		= igb_ioctl,
1808	.ndo_tx_timeout		= igb_tx_timeout,
1809	.ndo_validate_addr	= eth_validate_addr,
1810	.ndo_vlan_rx_add_vid	= igb_vlan_rx_add_vid,
1811	.ndo_vlan_rx_kill_vid	= igb_vlan_rx_kill_vid,
1812	.ndo_set_vf_mac		= igb_ndo_set_vf_mac,
1813	.ndo_set_vf_vlan	= igb_ndo_set_vf_vlan,
1814	.ndo_set_vf_tx_rate	= igb_ndo_set_vf_bw,
1815	.ndo_get_vf_config	= igb_ndo_get_vf_config,
1816#ifdef CONFIG_NET_POLL_CONTROLLER
1817	.ndo_poll_controller	= igb_netpoll,
1818#endif
1819	.ndo_fix_features	= igb_fix_features,
1820	.ndo_set_features	= igb_set_features,
1821};
1822
1823/**
1824 * igb_probe - Device Initialization Routine
1825 * @pdev: PCI device information struct
1826 * @ent: entry in igb_pci_tbl
1827 *
1828 * Returns 0 on success, negative on failure
1829 *
1830 * igb_probe initializes an adapter identified by a pci_dev structure.
1831 * The OS initialization, configuring of the adapter private structure,
1832 * and a hardware reset occur.
1833 **/
1834static int __devinit igb_probe(struct pci_dev *pdev,
1835			       const struct pci_device_id *ent)
1836{
1837	struct net_device *netdev;
1838	struct igb_adapter *adapter;
1839	struct e1000_hw *hw;
1840	u16 eeprom_data = 0;
1841	s32 ret_val;
1842	static int global_quad_port_a; /* global quad port a indication */
1843	const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1844	unsigned long mmio_start, mmio_len;
1845	int err, pci_using_dac;
1846	u16 eeprom_apme_mask = IGB_EEPROM_APME;
1847	u8 part_str[E1000_PBANUM_LENGTH];
1848
1849	/* Catch broken hardware that put the wrong VF device ID in
1850	 * the PCIe SR-IOV capability.
1851	 */
1852	if (pdev->is_virtfn) {
1853		WARN(1, KERN_ERR "%s (%hx:%hx) should not be a VF!\n",
1854			pci_name(pdev), pdev->vendor, pdev->device);
1855		return -EINVAL;
1856	}
1857
1858	err = pci_enable_device_mem(pdev);
1859	if (err)
1860		return err;
1861
1862	pci_using_dac = 0;
1863	err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
1864	if (!err) {
1865		err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
1866		if (!err)
1867			pci_using_dac = 1;
1868	} else {
1869		err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
1870		if (err) {
1871			err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
1872			if (err) {
1873				dev_err(&pdev->dev, "No usable DMA "
1874					"configuration, aborting\n");
1875				goto err_dma;
1876			}
1877		}
1878	}
1879
1880	err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1881	                                   IORESOURCE_MEM),
1882	                                   igb_driver_name);
1883	if (err)
1884		goto err_pci_reg;
1885
1886	pci_enable_pcie_error_reporting(pdev);
1887
1888	pci_set_master(pdev);
1889	pci_save_state(pdev);
1890
1891	err = -ENOMEM;
1892	netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1893				   IGB_MAX_TX_QUEUES);
1894	if (!netdev)
1895		goto err_alloc_etherdev;
1896
1897	SET_NETDEV_DEV(netdev, &pdev->dev);
1898
1899	pci_set_drvdata(pdev, netdev);
1900	adapter = netdev_priv(netdev);
1901	adapter->netdev = netdev;
1902	adapter->pdev = pdev;
1903	hw = &adapter->hw;
1904	hw->back = adapter;
1905	adapter->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE);
1906
1907	mmio_start = pci_resource_start(pdev, 0);
1908	mmio_len = pci_resource_len(pdev, 0);
1909
1910	err = -EIO;
1911	hw->hw_addr = ioremap(mmio_start, mmio_len);
1912	if (!hw->hw_addr)
1913		goto err_ioremap;
1914
1915	netdev->netdev_ops = &igb_netdev_ops;
1916	igb_set_ethtool_ops(netdev);
1917	netdev->watchdog_timeo = 5 * HZ;
1918
1919	strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1920
1921	netdev->mem_start = mmio_start;
1922	netdev->mem_end = mmio_start + mmio_len;
1923
1924	/* PCI config space info */
1925	hw->vendor_id = pdev->vendor;
1926	hw->device_id = pdev->device;
1927	hw->revision_id = pdev->revision;
1928	hw->subsystem_vendor_id = pdev->subsystem_vendor;
1929	hw->subsystem_device_id = pdev->subsystem_device;
1930
1931	/* Copy the default MAC, PHY and NVM function pointers */
1932	memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1933	memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1934	memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1935	/* Initialize skew-specific constants */
1936	err = ei->get_invariants(hw);
1937	if (err)
1938		goto err_sw_init;
1939
1940	/* setup the private structure */
1941	err = igb_sw_init(adapter);
1942	if (err)
1943		goto err_sw_init;
1944
1945	igb_get_bus_info_pcie(hw);
1946
1947	hw->phy.autoneg_wait_to_complete = false;
1948
1949	/* Copper options */
1950	if (hw->phy.media_type == e1000_media_type_copper) {
1951		hw->phy.mdix = AUTO_ALL_MODES;
1952		hw->phy.disable_polarity_correction = false;
1953		hw->phy.ms_type = e1000_ms_hw_default;
1954	}
1955
1956	if (igb_check_reset_block(hw))
1957		dev_info(&pdev->dev,
1958			"PHY reset is blocked due to SOL/IDER session.\n");
1959
1960	/*
1961	 * features is initialized to 0 in allocation, it might have bits
1962	 * set by igb_sw_init so we should use an or instead of an
1963	 * assignment.
1964	 */
1965	netdev->features |= NETIF_F_SG |
1966			    NETIF_F_IP_CSUM |
1967			    NETIF_F_IPV6_CSUM |
1968			    NETIF_F_TSO |
1969			    NETIF_F_TSO6 |
1970			    NETIF_F_RXHASH |
1971			    NETIF_F_RXCSUM |
1972			    NETIF_F_HW_VLAN_RX |
1973			    NETIF_F_HW_VLAN_TX;
1974
1975	/* copy netdev features into list of user selectable features */
1976	netdev->hw_features |= netdev->features;
1977	netdev->hw_features |= NETIF_F_RXALL;
1978
1979	/* set this bit last since it cannot be part of hw_features */
1980	netdev->features |= NETIF_F_HW_VLAN_FILTER;
1981
1982	netdev->vlan_features |= NETIF_F_TSO |
1983				 NETIF_F_TSO6 |
1984				 NETIF_F_IP_CSUM |
1985				 NETIF_F_IPV6_CSUM |
1986				 NETIF_F_SG;
1987
1988	netdev->priv_flags |= IFF_SUPP_NOFCS;
1989
1990	if (pci_using_dac) {
1991		netdev->features |= NETIF_F_HIGHDMA;
1992		netdev->vlan_features |= NETIF_F_HIGHDMA;
1993	}
1994
1995	if (hw->mac.type >= e1000_82576) {
1996		netdev->hw_features |= NETIF_F_SCTP_CSUM;
1997		netdev->features |= NETIF_F_SCTP_CSUM;
1998	}
1999
2000	netdev->priv_flags |= IFF_UNICAST_FLT;
2001
2002	adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
2003
2004	/* before reading the NVM, reset the controller to put the device in a
2005	 * known good starting state */
2006	hw->mac.ops.reset_hw(hw);
2007
2008	/*
2009	 * make sure the NVM is good , i211 parts have special NVM that
2010	 * doesn't contain a checksum
2011	 */
2012	if (hw->mac.type != e1000_i211) {
2013		if (hw->nvm.ops.validate(hw) < 0) {
2014			dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
2015			err = -EIO;
2016			goto err_eeprom;
2017		}
2018	}
2019
2020	/* copy the MAC address out of the NVM */
2021	if (hw->mac.ops.read_mac_addr(hw))
2022		dev_err(&pdev->dev, "NVM Read Error\n");
2023
2024	memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
2025	memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
2026
2027	if (!is_valid_ether_addr(netdev->perm_addr)) {
2028		dev_err(&pdev->dev, "Invalid MAC Address\n");
2029		err = -EIO;
2030		goto err_eeprom;
2031	}
2032
2033	setup_timer(&adapter->watchdog_timer, igb_watchdog,
2034	            (unsigned long) adapter);
2035	setup_timer(&adapter->phy_info_timer, igb_update_phy_info,
2036	            (unsigned long) adapter);
2037
2038	INIT_WORK(&adapter->reset_task, igb_reset_task);
2039	INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
2040
2041	/* Initialize link properties that are user-changeable */
2042	adapter->fc_autoneg = true;
2043	hw->mac.autoneg = true;
2044	hw->phy.autoneg_advertised = 0x2f;
2045
2046	hw->fc.requested_mode = e1000_fc_default;
2047	hw->fc.current_mode = e1000_fc_default;
2048
2049	igb_validate_mdi_setting(hw);
2050
2051	/* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
2052	 * enable the ACPI Magic Packet filter
2053	 */
2054
2055	if (hw->bus.func == 0)
2056		hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
2057	else if (hw->mac.type >= e1000_82580)
2058		hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
2059		                 NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
2060		                 &eeprom_data);
2061	else if (hw->bus.func == 1)
2062		hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
2063
2064	if (eeprom_data & eeprom_apme_mask)
2065		adapter->eeprom_wol |= E1000_WUFC_MAG;
2066
2067	/* now that we have the eeprom settings, apply the special cases where
2068	 * the eeprom may be wrong or the board simply won't support wake on
2069	 * lan on a particular port */
2070	switch (pdev->device) {
2071	case E1000_DEV_ID_82575GB_QUAD_COPPER:
2072		adapter->eeprom_wol = 0;
2073		break;
2074	case E1000_DEV_ID_82575EB_FIBER_SERDES:
2075	case E1000_DEV_ID_82576_FIBER:
2076	case E1000_DEV_ID_82576_SERDES:
2077		/* Wake events only supported on port A for dual fiber
2078		 * regardless of eeprom setting */
2079		if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
2080			adapter->eeprom_wol = 0;
2081		break;
2082	case E1000_DEV_ID_82576_QUAD_COPPER:
2083	case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
2084		/* if quad port adapter, disable WoL on all but port A */
2085		if (global_quad_port_a != 0)
2086			adapter->eeprom_wol = 0;
2087		else
2088			adapter->flags |= IGB_FLAG_QUAD_PORT_A;
2089		/* Reset for multiple quad port adapters */
2090		if (++global_quad_port_a == 4)
2091			global_quad_port_a = 0;
2092		break;
2093	}
2094
2095	/* initialize the wol settings based on the eeprom settings */
2096	adapter->wol = adapter->eeprom_wol;
2097	device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
2098
2099	/* reset the hardware with the new settings */
2100	igb_reset(adapter);
2101
2102	/* let the f/w know that the h/w is now under the control of the
2103	 * driver. */
2104	igb_get_hw_control(adapter);
2105
2106	strcpy(netdev->name, "eth%d");
2107	err = register_netdev(netdev);
2108	if (err)
2109		goto err_register;
2110
2111	/* carrier off reporting is important to ethtool even BEFORE open */
2112	netif_carrier_off(netdev);
2113
2114#ifdef CONFIG_IGB_DCA
2115	if (dca_add_requester(&pdev->dev) == 0) {
2116		adapter->flags |= IGB_FLAG_DCA_ENABLED;
2117		dev_info(&pdev->dev, "DCA enabled\n");
2118		igb_setup_dca(adapter);
2119	}
2120
2121#endif
2122#ifdef CONFIG_IGB_PTP
2123	/* do hw tstamp init after resetting */
2124	igb_ptp_init(adapter);
2125
2126#endif
2127	dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
2128	/* print bus type/speed/width info */
2129	dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
2130		 netdev->name,
2131		 ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
2132		  (hw->bus.speed == e1000_bus_speed_5000) ? "5.0Gb/s" :
2133		                                            "unknown"),
2134		 ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
2135		  (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
2136		  (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
2137		   "unknown"),
2138		 netdev->dev_addr);
2139
2140	ret_val = igb_read_part_string(hw, part_str, E1000_PBANUM_LENGTH);
2141	if (ret_val)
2142		strcpy(part_str, "Unknown");
2143	dev_info(&pdev->dev, "%s: PBA No: %s\n", netdev->name, part_str);
2144	dev_info(&pdev->dev,
2145		"Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
2146		adapter->msix_entries ? "MSI-X" :
2147		(adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
2148		adapter->num_rx_queues, adapter->num_tx_queues);
2149	switch (hw->mac.type) {
2150	case e1000_i350:
2151	case e1000_i210:
2152	case e1000_i211:
2153		igb_set_eee_i350(hw);
2154		break;
2155	default:
2156		break;
2157	}
2158
2159	pm_runtime_put_noidle(&pdev->dev);
2160	return 0;
2161
2162err_register:
2163	igb_release_hw_control(adapter);
2164err_eeprom:
2165	if (!igb_check_reset_block(hw))
2166		igb_reset_phy(hw);
2167
2168	if (hw->flash_address)
2169		iounmap(hw->flash_address);
2170err_sw_init:
2171	igb_clear_interrupt_scheme(adapter);
2172	iounmap(hw->hw_addr);
2173err_ioremap:
2174	free_netdev(netdev);
2175err_alloc_etherdev:
2176	pci_release_selected_regions(pdev,
2177	                             pci_select_bars(pdev, IORESOURCE_MEM));
2178err_pci_reg:
2179err_dma:
2180	pci_disable_device(pdev);
2181	return err;
2182}
2183
2184/**
2185 * igb_remove - Device Removal Routine
2186 * @pdev: PCI device information struct
2187 *
2188 * igb_remove is called by the PCI subsystem to alert the driver
2189 * that it should release a PCI device.  The could be caused by a
2190 * Hot-Plug event, or because the driver is going to be removed from
2191 * memory.
2192 **/
2193static void __devexit igb_remove(struct pci_dev *pdev)
2194{
2195	struct net_device *netdev = pci_get_drvdata(pdev);
2196	struct igb_adapter *adapter = netdev_priv(netdev);
2197	struct e1000_hw *hw = &adapter->hw;
2198
2199	pm_runtime_get_noresume(&pdev->dev);
2200#ifdef CONFIG_IGB_PTP
2201	igb_ptp_remove(adapter);
2202
2203#endif
2204	/*
2205	 * The watchdog timer may be rescheduled, so explicitly
2206	 * disable watchdog from being rescheduled.
2207	 */
2208	set_bit(__IGB_DOWN, &adapter->state);
2209	del_timer_sync(&adapter->watchdog_timer);
2210	del_timer_sync(&adapter->phy_info_timer);
2211
2212	cancel_work_sync(&adapter->reset_task);
2213	cancel_work_sync(&adapter->watchdog_task);
2214
2215#ifdef CONFIG_IGB_DCA
2216	if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
2217		dev_info(&pdev->dev, "DCA disabled\n");
2218		dca_remove_requester(&pdev->dev);
2219		adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
2220		wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
2221	}
2222#endif
2223
2224	/* Release control of h/w to f/w.  If f/w is AMT enabled, this
2225	 * would have already happened in close and is redundant. */
2226	igb_release_hw_control(adapter);
2227
2228	unregister_netdev(netdev);
2229
2230	igb_clear_interrupt_scheme(adapter);
2231
2232#ifdef CONFIG_PCI_IOV
2233	/* reclaim resources allocated to VFs */
2234	if (adapter->vf_data) {
2235		/* disable iov and allow time for transactions to clear */
2236		if (!igb_check_vf_assignment(adapter)) {
2237			pci_disable_sriov(pdev);
2238			msleep(500);
2239		} else {
2240			dev_info(&pdev->dev, "VF(s) assigned to guests!\n");
2241		}
2242
2243		kfree(adapter->vf_data);
2244		adapter->vf_data = NULL;
2245		wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
2246		wrfl();
2247		msleep(100);
2248		dev_info(&pdev->dev, "IOV Disabled\n");
2249	}
2250#endif
2251
2252	iounmap(hw->hw_addr);
2253	if (hw->flash_address)
2254		iounmap(hw->flash_address);
2255	pci_release_selected_regions(pdev,
2256	                             pci_select_bars(pdev, IORESOURCE_MEM));
2257
2258	kfree(adapter->shadow_vfta);
2259	free_netdev(netdev);
2260
2261	pci_disable_pcie_error_reporting(pdev);
2262
2263	pci_disable_device(pdev);
2264}
2265
2266/**
2267 * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
2268 * @adapter: board private structure to initialize
2269 *
2270 * This function initializes the vf specific data storage and then attempts to
2271 * allocate the VFs.  The reason for ordering it this way is because it is much
2272 * mor expensive time wise to disable SR-IOV than it is to allocate and free
2273 * the memory for the VFs.
2274 **/
2275static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
2276{
2277#ifdef CONFIG_PCI_IOV
2278	struct pci_dev *pdev = adapter->pdev;
2279	struct e1000_hw *hw = &adapter->hw;
2280	int old_vfs = igb_find_enabled_vfs(adapter);
2281	int i;
2282
2283	/* Virtualization features not supported on i210 family. */
2284	if ((hw->mac.type == e1000_i210) || (hw->mac.type == e1000_i211))
2285		return;
2286
2287	if (old_vfs) {
2288		dev_info(&pdev->dev, "%d pre-allocated VFs found - override "
2289			 "max_vfs setting of %d\n", old_vfs, max_vfs);
2290		adapter->vfs_allocated_count = old_vfs;
2291	}
2292
2293	if (!adapter->vfs_allocated_count)
2294		return;
2295
2296	adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
2297				sizeof(struct vf_data_storage), GFP_KERNEL);
2298
2299	/* if allocation failed then we do not support SR-IOV */
2300	if (!adapter->vf_data) {
2301		adapter->vfs_allocated_count = 0;
2302		dev_err(&pdev->dev, "Unable to allocate memory for VF "
2303			"Data Storage\n");
2304		goto out;
2305	}
2306
2307	if (!old_vfs) {
2308		if (pci_enable_sriov(pdev, adapter->vfs_allocated_count))
2309			goto err_out;
2310	}
2311	dev_info(&pdev->dev, "%d VFs allocated\n",
2312		 adapter->vfs_allocated_count);
2313	for (i = 0; i < adapter->vfs_allocated_count; i++)
2314		igb_vf_configure(adapter, i);
2315
2316	/* DMA Coalescing is not supported in IOV mode. */
2317	adapter->flags &= ~IGB_FLAG_DMAC;
2318	goto out;
2319err_out:
2320	kfree(adapter->vf_data);
2321	adapter->vf_data = NULL;
2322	adapter->vfs_allocated_count = 0;
2323out:
2324	return;
2325#endif /* CONFIG_PCI_IOV */
2326}
2327
2328/**
2329 * igb_sw_init - Initialize general software structures (struct igb_adapter)
2330 * @adapter: board private structure to initialize
2331 *
2332 * igb_sw_init initializes the Adapter private data structure.
2333 * Fields are initialized based on PCI device information and
2334 * OS network device settings (MTU size).
2335 **/
2336static int __devinit igb_sw_init(struct igb_adapter *adapter)
2337{
2338	struct e1000_hw *hw = &adapter->hw;
2339	struct net_device *netdev = adapter->netdev;
2340	struct pci_dev *pdev = adapter->pdev;
2341
2342	pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
2343
2344	/* set default ring sizes */
2345	adapter->tx_ring_count = IGB_DEFAULT_TXD;
2346	adapter->rx_ring_count = IGB_DEFAULT_RXD;
2347
2348	/* set default ITR values */
2349	adapter->rx_itr_setting = IGB_DEFAULT_ITR;
2350	adapter->tx_itr_setting = IGB_DEFAULT_ITR;
2351
2352	/* set default work limits */
2353	adapter->tx_work_limit = IGB_DEFAULT_TX_WORK;
2354
2355	adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN +
2356				  VLAN_HLEN;
2357	adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
2358
2359	adapter->node = -1;
2360
2361	spin_lock_init(&adapter->stats64_lock);
2362#ifdef CONFIG_PCI_IOV
2363	switch (hw->mac.type) {
2364	case e1000_82576:
2365	case e1000_i350:
2366		if (max_vfs > 7) {
2367			dev_warn(&pdev->dev,
2368				 "Maximum of 7 VFs per PF, using max\n");
2369			adapter->vfs_allocated_count = 7;
2370		} else
2371			adapter->vfs_allocated_count = max_vfs;
2372		break;
2373	case e1000_i210:
2374	case e1000_i211:
2375		adapter->vfs_allocated_count = 0;
2376		break;
2377	default:
2378		break;
2379	}
2380#endif /* CONFIG_PCI_IOV */
2381	switch (hw->mac.type) {
2382	case e1000_i210:
2383		adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES_I210,
2384			num_online_cpus());
2385		break;
2386	case e1000_i211:
2387		adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES_I211,
2388			num_online_cpus());
2389		break;
2390	default:
2391		adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES,
2392		num_online_cpus());
2393		break;
2394	}
2395	/* i350 cannot do RSS and SR-IOV at the same time */
2396	if (hw->mac.type == e1000_i350 && adapter->vfs_allocated_count)
2397		adapter->rss_queues = 1;
2398
2399	/*
2400	 * if rss_queues > 4 or vfs are going to be allocated with rss_queues
2401	 * then we should combine the queues into a queue pair in order to
2402	 * conserve interrupts due to limited supply
2403	 */
2404	if ((adapter->rss_queues > 4) ||
2405	    ((adapter->rss_queues > 1) && (adapter->vfs_allocated_count > 6)))
2406		adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
2407
2408	/* Setup and initialize a copy of the hw vlan table array */
2409	adapter->shadow_vfta = kzalloc(sizeof(u32) *
2410				E1000_VLAN_FILTER_TBL_SIZE,
2411				GFP_ATOMIC);
2412
2413	/* This call may decrease the number of queues */
2414	if (igb_init_interrupt_scheme(adapter)) {
2415		dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
2416		return -ENOMEM;
2417	}
2418
2419	igb_probe_vfs(adapter);
2420
2421	/* Explicitly disable IRQ since the NIC can be in any state. */
2422	igb_irq_disable(adapter);
2423
2424	if (hw->mac.type >= e1000_i350)
2425		adapter->flags &= ~IGB_FLAG_DMAC;
2426
2427	set_bit(__IGB_DOWN, &adapter->state);
2428	return 0;
2429}
2430
2431/**
2432 * igb_open - Called when a network interface is made active
2433 * @netdev: network interface device structure
2434 *
2435 * Returns 0 on success, negative value on failure
2436 *
2437 * The open entry point is called when a network interface is made
2438 * active by the system (IFF_UP).  At this point all resources needed
2439 * for transmit and receive operations are allocated, the interrupt
2440 * handler is registered with the OS, the watchdog timer is started,
2441 * and the stack is notified that the interface is ready.
2442 **/
2443static int __igb_open(struct net_device *netdev, bool resuming)
2444{
2445	struct igb_adapter *adapter = netdev_priv(netdev);
2446	struct e1000_hw *hw = &adapter->hw;
2447	struct pci_dev *pdev = adapter->pdev;
2448	int err;
2449	int i;
2450
2451	/* disallow open during test */
2452	if (test_bit(__IGB_TESTING, &adapter->state)) {
2453		WARN_ON(resuming);
2454		return -EBUSY;
2455	}
2456
2457	if (!resuming)
2458		pm_runtime_get_sync(&pdev->dev);
2459
2460	netif_carrier_off(netdev);
2461
2462	/* allocate transmit descriptors */
2463	err = igb_setup_all_tx_resources(adapter);
2464	if (err)
2465		goto err_setup_tx;
2466
2467	/* allocate receive descriptors */
2468	err = igb_setup_all_rx_resources(adapter);
2469	if (err)
2470		goto err_setup_rx;
2471
2472	igb_power_up_link(adapter);
2473
2474	/* before we allocate an interrupt, we must be ready to handle it.
2475	 * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2476	 * as soon as we call pci_request_irq, so we have to setup our
2477	 * clean_rx handler before we do so.  */
2478	igb_configure(adapter);
2479
2480	err = igb_request_irq(adapter);
2481	if (err)
2482		goto err_req_irq;
2483
2484	/* From here on the code is the same as igb_up() */
2485	clear_bit(__IGB_DOWN, &adapter->state);
2486
2487	for (i = 0; i < adapter->num_q_vectors; i++)
2488		napi_enable(&(adapter->q_vector[i]->napi));
2489
2490	/* Clear any pending interrupts. */
2491	rd32(E1000_ICR);
2492
2493	igb_irq_enable(adapter);
2494
2495	/* notify VFs that reset has been completed */
2496	if (adapter->vfs_allocated_count) {
2497		u32 reg_data = rd32(E1000_CTRL_EXT);
2498		reg_data |= E1000_CTRL_EXT_PFRSTD;
2499		wr32(E1000_CTRL_EXT, reg_data);
2500	}
2501
2502	netif_tx_start_all_queues(netdev);
2503
2504	if (!resuming)
2505		pm_runtime_put(&pdev->dev);
2506
2507	/* start the watchdog. */
2508	hw->mac.get_link_status = 1;
2509	schedule_work(&adapter->watchdog_task);
2510
2511	return 0;
2512
2513err_req_irq:
2514	igb_release_hw_control(adapter);
2515	igb_power_down_link(adapter);
2516	igb_free_all_rx_resources(adapter);
2517err_setup_rx:
2518	igb_free_all_tx_resources(adapter);
2519err_setup_tx:
2520	igb_reset(adapter);
2521	if (!resuming)
2522		pm_runtime_put(&pdev->dev);
2523
2524	return err;
2525}
2526
2527static int igb_open(struct net_device *netdev)
2528{
2529	return __igb_open(netdev, false);
2530}
2531
2532/**
2533 * igb_close - Disables a network interface
2534 * @netdev: network interface device structure
2535 *
2536 * Returns 0, this is not allowed to fail
2537 *
2538 * The close entry point is called when an interface is de-activated
2539 * by the OS.  The hardware is still under the driver's control, but
2540 * needs to be disabled.  A global MAC reset is issued to stop the
2541 * hardware, and all transmit and receive resources are freed.
2542 **/
2543static int __igb_close(struct net_device *netdev, bool suspending)
2544{
2545	struct igb_adapter *adapter = netdev_priv(netdev);
2546	struct pci_dev *pdev = adapter->pdev;
2547
2548	WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
2549
2550	if (!suspending)
2551		pm_runtime_get_sync(&pdev->dev);
2552
2553	igb_down(adapter);
2554	igb_free_irq(adapter);
2555
2556	igb_free_all_tx_resources(adapter);
2557	igb_free_all_rx_resources(adapter);
2558
2559	if (!suspending)
2560		pm_runtime_put_sync(&pdev->dev);
2561	return 0;
2562}
2563
2564static int igb_close(struct net_device *netdev)
2565{
2566	return __igb_close(netdev, false);
2567}
2568
2569/**
2570 * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2571 * @tx_ring: tx descriptor ring (for a specific queue) to setup
2572 *
2573 * Return 0 on success, negative on failure
2574 **/
2575int igb_setup_tx_resources(struct igb_ring *tx_ring)
2576{
2577	struct device *dev = tx_ring->dev;
2578	int orig_node = dev_to_node(dev);
2579	int size;
2580
2581	size = sizeof(struct igb_tx_buffer) * tx_ring->count;
2582	tx_ring->tx_buffer_info = vzalloc_node(size, tx_ring->numa_node);
2583	if (!tx_ring->tx_buffer_info)
2584		tx_ring->tx_buffer_info = vzalloc(size);
2585	if (!tx_ring->tx_buffer_info)
2586		goto err;
2587
2588	/* round up to nearest 4K */
2589	tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2590	tx_ring->size = ALIGN(tx_ring->size, 4096);
2591
2592	set_dev_node(dev, tx_ring->numa_node);
2593	tx_ring->desc = dma_alloc_coherent(dev,
2594					   tx_ring->size,
2595					   &tx_ring->dma,
2596					   GFP_KERNEL);
2597	set_dev_node(dev, orig_node);
2598	if (!tx_ring->desc)
2599		tx_ring->desc = dma_alloc_coherent(dev,
2600						   tx_ring->size,
2601						   &tx_ring->dma,
2602						   GFP_KERNEL);
2603
2604	if (!tx_ring->desc)
2605		goto err;
2606
2607	tx_ring->next_to_use = 0;
2608	tx_ring->next_to_clean = 0;
2609
2610	return 0;
2611
2612err:
2613	vfree(tx_ring->tx_buffer_info);
2614	dev_err(dev,
2615		"Unable to allocate memory for the transmit descriptor ring\n");
2616	return -ENOMEM;
2617}
2618
2619/**
2620 * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2621 *				  (Descriptors) for all queues
2622 * @adapter: board private structure
2623 *
2624 * Return 0 on success, negative on failure
2625 **/
2626static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2627{
2628	struct pci_dev *pdev = adapter->pdev;
2629	int i, err = 0;
2630
2631	for (i = 0; i < adapter->num_tx_queues; i++) {
2632		err = igb_setup_tx_resources(adapter->tx_ring[i]);
2633		if (err) {
2634			dev_err(&pdev->dev,
2635				"Allocation for Tx Queue %u failed\n", i);
2636			for (i--; i >= 0; i--)
2637				igb_free_tx_resources(adapter->tx_ring[i]);
2638			break;
2639		}
2640	}
2641
2642	return err;
2643}
2644
2645/**
2646 * igb_setup_tctl - configure the transmit control registers
2647 * @adapter: Board private structure
2648 **/
2649void igb_setup_tctl(struct igb_adapter *adapter)
2650{
2651	struct e1000_hw *hw = &adapter->hw;
2652	u32 tctl;
2653
2654	/* disable queue 0 which is enabled by default on 82575 and 82576 */
2655	wr32(E1000_TXDCTL(0), 0);
2656
2657	/* Program the Transmit Control Register */
2658	tctl = rd32(E1000_TCTL);
2659	tctl &= ~E1000_TCTL_CT;
2660	tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2661		(E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2662
2663	igb_config_collision_dist(hw);
2664
2665	/* Enable transmits */
2666	tctl |= E1000_TCTL_EN;
2667
2668	wr32(E1000_TCTL, tctl);
2669}
2670
2671/**
2672 * igb_configure_tx_ring - Configure transmit ring after Reset
2673 * @adapter: board private structure
2674 * @ring: tx ring to configure
2675 *
2676 * Configure a transmit ring after a reset.
2677 **/
2678void igb_configure_tx_ring(struct igb_adapter *adapter,
2679                           struct igb_ring *ring)
2680{
2681	struct e1000_hw *hw = &adapter->hw;
2682	u32 txdctl = 0;
2683	u64 tdba = ring->dma;
2684	int reg_idx = ring->reg_idx;
2685
2686	/* disable the queue */
2687	wr32(E1000_TXDCTL(reg_idx), 0);
2688	wrfl();
2689	mdelay(10);
2690
2691	wr32(E1000_TDLEN(reg_idx),
2692	                ring->count * sizeof(union e1000_adv_tx_desc));
2693	wr32(E1000_TDBAL(reg_idx),
2694	                tdba & 0x00000000ffffffffULL);
2695	wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2696
2697	ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2698	wr32(E1000_TDH(reg_idx), 0);
2699	writel(0, ring->tail);
2700
2701	txdctl |= IGB_TX_PTHRESH;
2702	txdctl |= IGB_TX_HTHRESH << 8;
2703	txdctl |= IGB_TX_WTHRESH << 16;
2704
2705	txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2706	wr32(E1000_TXDCTL(reg_idx), txdctl);
2707}
2708
2709/**
2710 * igb_configure_tx - Configure transmit Unit after Reset
2711 * @adapter: board private structure
2712 *
2713 * Configure the Tx unit of the MAC after a reset.
2714 **/
2715static void igb_configure_tx(struct igb_adapter *adapter)
2716{
2717	int i;
2718
2719	for (i = 0; i < adapter->num_tx_queues; i++)
2720		igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
2721}
2722
2723/**
2724 * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2725 * @rx_ring:    rx descriptor ring (for a specific queue) to setup
2726 *
2727 * Returns 0 on success, negative on failure
2728 **/
2729int igb_setup_rx_resources(struct igb_ring *rx_ring)
2730{
2731	struct device *dev = rx_ring->dev;
2732	int orig_node = dev_to_node(dev);
2733	int size, desc_len;
2734
2735	size = sizeof(struct igb_rx_buffer) * rx_ring->count;
2736	rx_ring->rx_buffer_info = vzalloc_node(size, rx_ring->numa_node);
2737	if (!rx_ring->rx_buffer_info)
2738		rx_ring->rx_buffer_info = vzalloc(size);
2739	if (!rx_ring->rx_buffer_info)
2740		goto err;
2741
2742	desc_len = sizeof(union e1000_adv_rx_desc);
2743
2744	/* Round up to nearest 4K */
2745	rx_ring->size = rx_ring->count * desc_len;
2746	rx_ring->size = ALIGN(rx_ring->size, 4096);
2747
2748	set_dev_node(dev, rx_ring->numa_node);
2749	rx_ring->desc = dma_alloc_coherent(dev,
2750					   rx_ring->size,
2751					   &rx_ring->dma,
2752					   GFP_KERNEL);
2753	set_dev_node(dev, orig_node);
2754	if (!rx_ring->desc)
2755		rx_ring->desc = dma_alloc_coherent(dev,
2756						   rx_ring->size,
2757						   &rx_ring->dma,
2758						   GFP_KERNEL);
2759
2760	if (!rx_ring->desc)
2761		goto err;
2762
2763	rx_ring->next_to_clean = 0;
2764	rx_ring->next_to_use = 0;
2765
2766	return 0;
2767
2768err:
2769	vfree(rx_ring->rx_buffer_info);
2770	rx_ring->rx_buffer_info = NULL;
2771	dev_err(dev, "Unable to allocate memory for the receive descriptor"
2772		" ring\n");
2773	return -ENOMEM;
2774}
2775
2776/**
2777 * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2778 *				  (Descriptors) for all queues
2779 * @adapter: board private structure
2780 *
2781 * Return 0 on success, negative on failure
2782 **/
2783static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2784{
2785	struct pci_dev *pdev = adapter->pdev;
2786	int i, err = 0;
2787
2788	for (i = 0; i < adapter->num_rx_queues; i++) {
2789		err = igb_setup_rx_resources(adapter->rx_ring[i]);
2790		if (err) {
2791			dev_err(&pdev->dev,
2792				"Allocation for Rx Queue %u failed\n", i);
2793			for (i--; i >= 0; i--)
2794				igb_free_rx_resources(adapter->rx_ring[i]);
2795			break;
2796		}
2797	}
2798
2799	return err;
2800}
2801
2802/**
2803 * igb_setup_mrqc - configure the multiple receive queue control registers
2804 * @adapter: Board private structure
2805 **/
2806static void igb_setup_mrqc(struct igb_adapter *adapter)
2807{
2808	struct e1000_hw *hw = &adapter->hw;
2809	u32 mrqc, rxcsum;
2810	u32 j, num_rx_queues, shift = 0, shift2 = 0;
2811	union e1000_reta {
2812		u32 dword;
2813		u8  bytes[4];
2814	} reta;
2815	static const u8 rsshash[40] = {
2816		0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2817		0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2818		0xae, 0x7b, 0x30, 0xb4,	0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2819		0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2820
2821	/* Fill out hash function seeds */
2822	for (j = 0; j < 10; j++) {
2823		u32 rsskey = rsshash[(j * 4)];
2824		rsskey |= rsshash[(j * 4) + 1] << 8;
2825		rsskey |= rsshash[(j * 4) + 2] << 16;
2826		rsskey |= rsshash[(j * 4) + 3] << 24;
2827		array_wr32(E1000_RSSRK(0), j, rsskey);
2828	}
2829
2830	num_rx_queues = adapter->rss_queues;
2831
2832	if (adapter->vfs_allocated_count) {
2833		/* 82575 and 82576 supports 2 RSS queues for VMDq */
2834		switch (hw->mac.type) {
2835		case e1000_i350:
2836		case e1000_82580:
2837			num_rx_queues = 1;
2838			shift = 0;
2839			break;
2840		case e1000_82576:
2841			shift = 3;
2842			num_rx_queues = 2;
2843			break;
2844		case e1000_82575:
2845			shift = 2;
2846			shift2 = 6;
2847		default:
2848			break;
2849		}
2850	} else {
2851		if (hw->mac.type == e1000_82575)
2852			shift = 6;
2853	}
2854
2855	for (j = 0; j < (32 * 4); j++) {
2856		reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2857		if (shift2)
2858			reta.bytes[j & 3] |= num_rx_queues << shift2;
2859		if ((j & 3) == 3)
2860			wr32(E1000_RETA(j >> 2), reta.dword);
2861	}
2862
2863	/*
2864	 * Disable raw packet checksumming so that RSS hash is placed in
2865	 * descriptor on writeback.  No need to enable TCP/UDP/IP checksum
2866	 * offloads as they are enabled by default
2867	 */
2868	rxcsum = rd32(E1000_RXCSUM);
2869	rxcsum |= E1000_RXCSUM_PCSD;
2870
2871	if (adapter->hw.mac.type >= e1000_82576)
2872		/* Enable Receive Checksum Offload for SCTP */
2873		rxcsum |= E1000_RXCSUM_CRCOFL;
2874
2875	/* Don't need to set TUOFL or IPOFL, they default to 1 */
2876	wr32(E1000_RXCSUM, rxcsum);
2877	/*
2878	 * Generate RSS hash based on TCP port numbers and/or
2879	 * IPv4/v6 src and dst addresses since UDP cannot be
2880	 * hashed reliably due to IP fragmentation
2881	 */
2882
2883	mrqc = E1000_MRQC_RSS_FIELD_IPV4 |
2884	       E1000_MRQC_RSS_FIELD_IPV4_TCP |
2885	       E1000_MRQC_RSS_FIELD_IPV6 |
2886	       E1000_MRQC_RSS_FIELD_IPV6_TCP |
2887	       E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
2888
2889	/* If VMDq is enabled then we set the appropriate mode for that, else
2890	 * we default to RSS so that an RSS hash is calculated per packet even
2891	 * if we are only using one queue */
2892	if (adapter->vfs_allocated_count) {
2893		if (hw->mac.type > e1000_82575) {
2894			/* Set the default pool for the PF's first queue */
2895			u32 vtctl = rd32(E1000_VT_CTL);
2896			vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2897				   E1000_VT_CTL_DISABLE_DEF_POOL);
2898			vtctl |= adapter->vfs_allocated_count <<
2899				E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2900			wr32(E1000_VT_CTL, vtctl);
2901		}
2902		if (adapter->rss_queues > 1)
2903			mrqc |= E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2904		else
2905			mrqc |= E1000_MRQC_ENABLE_VMDQ;
2906	} else {
2907		if (hw->mac.type != e1000_i211)
2908			mrqc |= E1000_MRQC_ENABLE_RSS_4Q;
2909	}
2910	igb_vmm_control(adapter);
2911
2912	wr32(E1000_MRQC, mrqc);
2913}
2914
2915/**
2916 * igb_setup_rctl - configure the receive control registers
2917 * @adapter: Board private structure
2918 **/
2919void igb_setup_rctl(struct igb_adapter *adapter)
2920{
2921	struct e1000_hw *hw = &adapter->hw;
2922	u32 rctl;
2923
2924	rctl = rd32(E1000_RCTL);
2925
2926	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2927	rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2928
2929	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2930		(hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2931
2932	/*
2933	 * enable stripping of CRC. It's unlikely this will break BMC
2934	 * redirection as it did with e1000. Newer features require
2935	 * that the HW strips the CRC.
2936	 */
2937	rctl |= E1000_RCTL_SECRC;
2938
2939	/* disable store bad packets and clear size bits. */
2940	rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
2941
2942	/* enable LPE to prevent packets larger than max_frame_size */
2943	rctl |= E1000_RCTL_LPE;
2944
2945	/* disable queue 0 to prevent tail write w/o re-config */
2946	wr32(E1000_RXDCTL(0), 0);
2947
2948	/* Attention!!!  For SR-IOV PF driver operations you must enable
2949	 * queue drop for all VF and PF queues to prevent head of line blocking
2950	 * if an un-trusted VF does not provide descriptors to hardware.
2951	 */
2952	if (adapter->vfs_allocated_count) {
2953		/* set all queue drop enable bits */
2954		wr32(E1000_QDE, ALL_QUEUES);
2955	}
2956
2957	/* This is useful for sniffing bad packets. */
2958	if (adapter->netdev->features & NETIF_F_RXALL) {
2959		/* UPE and MPE will be handled by normal PROMISC logic
2960		 * in e1000e_set_rx_mode */
2961		rctl |= (E1000_RCTL_SBP | /* Receive bad packets */
2962			 E1000_RCTL_BAM | /* RX All Bcast Pkts */
2963			 E1000_RCTL_PMCF); /* RX All MAC Ctrl Pkts */
2964
2965		rctl &= ~(E1000_RCTL_VFE | /* Disable VLAN filter */
2966			  E1000_RCTL_DPF | /* Allow filtered pause */
2967			  E1000_RCTL_CFIEN); /* Dis VLAN CFIEN Filter */
2968		/* Do not mess with E1000_CTRL_VME, it affects transmit as well,
2969		 * and that breaks VLANs.
2970		 */
2971	}
2972
2973	wr32(E1000_RCTL, rctl);
2974}
2975
2976static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
2977                                   int vfn)
2978{
2979	struct e1000_hw *hw = &adapter->hw;
2980	u32 vmolr;
2981
2982	/* if it isn't the PF check to see if VFs are enabled and
2983	 * increase the size to support vlan tags */
2984	if (vfn < adapter->vfs_allocated_count &&
2985	    adapter->vf_data[vfn].vlans_enabled)
2986		size += VLAN_TAG_SIZE;
2987
2988	vmolr = rd32(E1000_VMOLR(vfn));
2989	vmolr &= ~E1000_VMOLR_RLPML_MASK;
2990	vmolr |= size | E1000_VMOLR_LPE;
2991	wr32(E1000_VMOLR(vfn), vmolr);
2992
2993	return 0;
2994}
2995
2996/**
2997 * igb_rlpml_set - set maximum receive packet size
2998 * @adapter: board private structure
2999 *
3000 * Configure maximum receivable packet size.
3001 **/
3002static void igb_rlpml_set(struct igb_adapter *adapter)
3003{
3004	u32 max_frame_size = adapter->max_frame_size;
3005	struct e1000_hw *hw = &adapter->hw;
3006	u16 pf_id = adapter->vfs_allocated_count;
3007
3008	if (pf_id) {
3009		igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
3010		/*
3011		 * If we're in VMDQ or SR-IOV mode, then set global RLPML
3012		 * to our max jumbo frame size, in case we need to enable
3013		 * jumbo frames on one of the rings later.
3014		 * This will not pass over-length frames into the default
3015		 * queue because it's gated by the VMOLR.RLPML.
3016		 */
3017		max_frame_size = MAX_JUMBO_FRAME_SIZE;
3018	}
3019
3020	wr32(E1000_RLPML, max_frame_size);
3021}
3022
3023static inline void igb_set_vmolr(struct igb_adapter *adapter,
3024				 int vfn, bool aupe)
3025{
3026	struct e1000_hw *hw = &adapter->hw;
3027	u32 vmolr;
3028
3029	/*
3030	 * This register exists only on 82576 and newer so if we are older then
3031	 * we should exit and do nothing
3032	 */
3033	if (hw->mac.type < e1000_82576)
3034		return;
3035
3036	vmolr = rd32(E1000_VMOLR(vfn));
3037	vmolr |= E1000_VMOLR_STRVLAN;      /* Strip vlan tags */
3038	if (aupe)
3039		vmolr |= E1000_VMOLR_AUPE;        /* Accept untagged packets */
3040	else
3041		vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
3042
3043	/* clear all bits that might not be set */
3044	vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
3045
3046	if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
3047		vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
3048	/*
3049	 * for VMDq only allow the VFs and pool 0 to accept broadcast and
3050	 * multicast packets
3051	 */
3052	if (vfn <= adapter->vfs_allocated_count)
3053		vmolr |= E1000_VMOLR_BAM;	   /* Accept broadcast */
3054
3055	wr32(E1000_VMOLR(vfn), vmolr);
3056}
3057
3058/**
3059 * igb_configure_rx_ring - Configure a receive ring after Reset
3060 * @adapter: board private structure
3061 * @ring: receive ring to be configured
3062 *
3063 * Configure the Rx unit of the MAC after a reset.
3064 **/
3065void igb_configure_rx_ring(struct igb_adapter *adapter,
3066                           struct igb_ring *ring)
3067{
3068	struct e1000_hw *hw = &adapter->hw;
3069	u64 rdba = ring->dma;
3070	int reg_idx = ring->reg_idx;
3071	u32 srrctl = 0, rxdctl = 0;
3072
3073	/* disable the queue */
3074	wr32(E1000_RXDCTL(reg_idx), 0);
3075
3076	/* Set DMA base address registers */
3077	wr32(E1000_RDBAL(reg_idx),
3078	     rdba & 0x00000000ffffffffULL);
3079	wr32(E1000_RDBAH(reg_idx), rdba >> 32);
3080	wr32(E1000_RDLEN(reg_idx),
3081	               ring->count * sizeof(union e1000_adv_rx_desc));
3082
3083	/* initialize head and tail */
3084	ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
3085	wr32(E1000_RDH(reg_idx), 0);
3086	writel(0, ring->tail);
3087
3088	/* set descriptor configuration */
3089	srrctl = IGB_RX_HDR_LEN << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
3090#if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
3091	srrctl |= IGB_RXBUFFER_16384 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3092#else
3093	srrctl |= (PAGE_SIZE / 2) >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3094#endif
3095	srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3096	if (hw->mac.type >= e1000_82580)
3097		srrctl |= E1000_SRRCTL_TIMESTAMP;
3098	/* Only set Drop Enable if we are supporting multiple queues */
3099	if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
3100		srrctl |= E1000_SRRCTL_DROP_EN;
3101
3102	wr32(E1000_SRRCTL(reg_idx), srrctl);
3103
3104	/* set filtering for VMDQ pools */
3105	igb_set_vmolr(adapter, reg_idx & 0x7, true);
3106
3107	rxdctl |= IGB_RX_PTHRESH;
3108	rxdctl |= IGB_RX_HTHRESH << 8;
3109	rxdctl |= IGB_RX_WTHRESH << 16;
3110
3111	/* enable receive descriptor fetching */
3112	rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
3113	wr32(E1000_RXDCTL(reg_idx), rxdctl);
3114}
3115
3116/**
3117 * igb_configure_rx - Configure receive Unit after Reset
3118 * @adapter: board private structure
3119 *
3120 * Configure the Rx unit of the MAC after a reset.
3121 **/
3122static void igb_configure_rx(struct igb_adapter *adapter)
3123{
3124	int i;
3125
3126	/* set UTA to appropriate mode */
3127	igb_set_uta(adapter);
3128
3129	/* set the correct pool for the PF default MAC address in entry 0 */
3130	igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
3131	                 adapter->vfs_allocated_count);
3132
3133	/* Setup the HW Rx Head and Tail Descriptor Pointers and
3134	 * the Base and Length of the Rx Descriptor Ring */
3135	for (i = 0; i < adapter->num_rx_queues; i++)
3136		igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
3137}
3138
3139/**
3140 * igb_free_tx_resources - Free Tx Resources per Queue
3141 * @tx_ring: Tx descriptor ring for a specific queue
3142 *
3143 * Free all transmit software resources
3144 **/
3145void igb_free_tx_resources(struct igb_ring *tx_ring)
3146{
3147	igb_clean_tx_ring(tx_ring);
3148
3149	vfree(tx_ring->tx_buffer_info);
3150	tx_ring->tx_buffer_info = NULL;
3151
3152	/* if not set, then don't free */
3153	if (!tx_ring->desc)
3154		return;
3155
3156	dma_free_coherent(tx_ring->dev, tx_ring->size,
3157			  tx_ring->desc, tx_ring->dma);
3158
3159	tx_ring->desc = NULL;
3160}
3161
3162/**
3163 * igb_free_all_tx_resources - Free Tx Resources for All Queues
3164 * @adapter: board private structure
3165 *
3166 * Free all transmit software resources
3167 **/
3168static void igb_free_all_tx_resources(struct igb_adapter *adapter)
3169{
3170	int i;
3171
3172	for (i = 0; i < adapter->num_tx_queues; i++)
3173		igb_free_tx_resources(adapter->tx_ring[i]);
3174}
3175
3176void igb_unmap_and_free_tx_resource(struct igb_ring *ring,
3177				    struct igb_tx_buffer *tx_buffer)
3178{
3179	if (tx_buffer->skb) {
3180		dev_kfree_skb_any(tx_buffer->skb);
3181		if (tx_buffer->dma)
3182			dma_unmap_single(ring->dev,
3183					 tx_buffer->dma,
3184					 tx_buffer->length,
3185					 DMA_TO_DEVICE);
3186	} else if (tx_buffer->dma) {
3187		dma_unmap_page(ring->dev,
3188			       tx_buffer->dma,
3189			       tx_buffer->length,
3190			       DMA_TO_DEVICE);
3191	}
3192	tx_buffer->next_to_watch = NULL;
3193	tx_buffer->skb = NULL;
3194	tx_buffer->dma = 0;
3195	/* buffer_info must be completely set up in the transmit path */
3196}
3197
3198/**
3199 * igb_clean_tx_ring - Free Tx Buffers
3200 * @tx_ring: ring to be cleaned
3201 **/
3202static void igb_clean_tx_ring(struct igb_ring *tx_ring)
3203{
3204	struct igb_tx_buffer *buffer_info;
3205	unsigned long size;
3206	u16 i;
3207
3208	if (!tx_ring->tx_buffer_info)
3209		return;
3210	/* Free all the Tx ring sk_buffs */
3211
3212	for (i = 0; i < tx_ring->count; i++) {
3213		buffer_info = &tx_ring->tx_buffer_info[i];
3214		igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3215	}
3216
3217	netdev_tx_reset_queue(txring_txq(tx_ring));
3218
3219	size = sizeof(struct igb_tx_buffer) * tx_ring->count;
3220	memset(tx_ring->tx_buffer_info, 0, size);
3221
3222	/* Zero out the descriptor ring */
3223	memset(tx_ring->desc, 0, tx_ring->size);
3224
3225	tx_ring->next_to_use = 0;
3226	tx_ring->next_to_clean = 0;
3227}
3228
3229/**
3230 * igb_clean_all_tx_rings - Free Tx Buffers for all queues
3231 * @adapter: board private structure
3232 **/
3233static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
3234{
3235	int i;
3236
3237	for (i = 0; i < adapter->num_tx_queues; i++)
3238		igb_clean_tx_ring(adapter->tx_ring[i]);
3239}
3240
3241/**
3242 * igb_free_rx_resources - Free Rx Resources
3243 * @rx_ring: ring to clean the resources from
3244 *
3245 * Free all receive software resources
3246 **/
3247void igb_free_rx_resources(struct igb_ring *rx_ring)
3248{
3249	igb_clean_rx_ring(rx_ring);
3250
3251	vfree(rx_ring->rx_buffer_info);
3252	rx_ring->rx_buffer_info = NULL;
3253
3254	/* if not set, then don't free */
3255	if (!rx_ring->desc)
3256		return;
3257
3258	dma_free_coherent(rx_ring->dev, rx_ring->size,
3259			  rx_ring->desc, rx_ring->dma);
3260
3261	rx_ring->desc = NULL;
3262}
3263
3264/**
3265 * igb_free_all_rx_resources - Free Rx Resources for All Queues
3266 * @adapter: board private structure
3267 *
3268 * Free all receive software resources
3269 **/
3270static void igb_free_all_rx_resources(struct igb_adapter *adapter)
3271{
3272	int i;
3273
3274	for (i = 0; i < adapter->num_rx_queues; i++)
3275		igb_free_rx_resources(adapter->rx_ring[i]);
3276}
3277
3278/**
3279 * igb_clean_rx_ring - Free Rx Buffers per Queue
3280 * @rx_ring: ring to free buffers from
3281 **/
3282static void igb_clean_rx_ring(struct igb_ring *rx_ring)
3283{
3284	unsigned long size;
3285	u16 i;
3286
3287	if (!rx_ring->rx_buffer_info)
3288		return;
3289
3290	/* Free all the Rx ring sk_buffs */
3291	for (i = 0; i < rx_ring->count; i++) {
3292		struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
3293		if (buffer_info->dma) {
3294			dma_unmap_single(rx_ring->dev,
3295			                 buffer_info->dma,
3296					 IGB_RX_HDR_LEN,
3297					 DMA_FROM_DEVICE);
3298			buffer_info->dma = 0;
3299		}
3300
3301		if (buffer_info->skb) {
3302			dev_kfree_skb(buffer_info->skb);
3303			buffer_info->skb = NULL;
3304		}
3305		if (buffer_info->page_dma) {
3306			dma_unmap_page(rx_ring->dev,
3307			               buffer_info->page_dma,
3308				       PAGE_SIZE / 2,
3309				       DMA_FROM_DEVICE);
3310			buffer_info->page_dma = 0;
3311		}
3312		if (buffer_info->page) {
3313			put_page(buffer_info->page);
3314			buffer_info->page = NULL;
3315			buffer_info->page_offset = 0;
3316		}
3317	}
3318
3319	size = sizeof(struct igb_rx_buffer) * rx_ring->count;
3320	memset(rx_ring->rx_buffer_info, 0, size);
3321
3322	/* Zero out the descriptor ring */
3323	memset(rx_ring->desc, 0, rx_ring->size);
3324
3325	rx_ring->next_to_clean = 0;
3326	rx_ring->next_to_use = 0;
3327}
3328
3329/**
3330 * igb_clean_all_rx_rings - Free Rx Buffers for all queues
3331 * @adapter: board private structure
3332 **/
3333static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
3334{
3335	int i;
3336
3337	for (i = 0; i < adapter->num_rx_queues; i++)
3338		igb_clean_rx_ring(adapter->rx_ring[i]);
3339}
3340
3341/**
3342 * igb_set_mac - Change the Ethernet Address of the NIC
3343 * @netdev: network interface device structure
3344 * @p: pointer to an address structure
3345 *
3346 * Returns 0 on success, negative on failure
3347 **/
3348static int igb_set_mac(struct net_device *netdev, void *p)
3349{
3350	struct igb_adapter *adapter = netdev_priv(netdev);
3351	struct e1000_hw *hw = &adapter->hw;
3352	struct sockaddr *addr = p;
3353
3354	if (!is_valid_ether_addr(addr->sa_data))
3355		return -EADDRNOTAVAIL;
3356
3357	memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
3358	memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
3359
3360	/* set the correct pool for the new PF MAC address in entry 0 */
3361	igb_rar_set_qsel(adapter, hw->mac.addr, 0,
3362	                 adapter->vfs_allocated_count);
3363
3364	return 0;
3365}
3366
3367/**
3368 * igb_write_mc_addr_list - write multicast addresses to MTA
3369 * @netdev: network interface device structure
3370 *
3371 * Writes multicast address list to the MTA hash table.
3372 * Returns: -ENOMEM on failure
3373 *                0 on no addresses written
3374 *                X on writing X addresses to MTA
3375 **/
3376static int igb_write_mc_addr_list(struct net_device *netdev)
3377{
3378	struct igb_adapter *adapter = netdev_priv(netdev);
3379	struct e1000_hw *hw = &adapter->hw;
3380	struct netdev_hw_addr *ha;
3381	u8  *mta_list;
3382	int i;
3383
3384	if (netdev_mc_empty(netdev)) {
3385		/* nothing to program, so clear mc list */
3386		igb_update_mc_addr_list(hw, NULL, 0);
3387		igb_restore_vf_multicasts(adapter);
3388		return 0;
3389	}
3390
3391	mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
3392	if (!mta_list)
3393		return -ENOMEM;
3394
3395	/* The shared function expects a packed array of only addresses. */
3396	i = 0;
3397	netdev_for_each_mc_addr(ha, netdev)
3398		memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
3399
3400	igb_update_mc_addr_list(hw, mta_list, i);
3401	kfree(mta_list);
3402
3403	return netdev_mc_count(netdev);
3404}
3405
3406/**
3407 * igb_write_uc_addr_list - write unicast addresses to RAR table
3408 * @netdev: network interface device structure
3409 *
3410 * Writes unicast address list to the RAR table.
3411 * Returns: -ENOMEM on failure/insufficient address space
3412 *                0 on no addresses written
3413 *                X on writing X addresses to the RAR table
3414 **/
3415static int igb_write_uc_addr_list(struct net_device *netdev)
3416{
3417	struct igb_adapter *adapter = netdev_priv(netdev);
3418	struct e1000_hw *hw = &adapter->hw;
3419	unsigned int vfn = adapter->vfs_allocated_count;
3420	unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
3421	int count = 0;
3422
3423	/* return ENOMEM indicating insufficient memory for addresses */
3424	if (netdev_uc_count(netdev) > rar_entries)
3425		return -ENOMEM;
3426
3427	if (!netdev_uc_empty(netdev) && rar_entries) {
3428		struct netdev_hw_addr *ha;
3429
3430		netdev_for_each_uc_addr(ha, netdev) {
3431			if (!rar_entries)
3432				break;
3433			igb_rar_set_qsel(adapter, ha->addr,
3434			                 rar_entries--,
3435			                 vfn);
3436			count++;
3437		}
3438	}
3439	/* write the addresses in reverse order to avoid write combining */
3440	for (; rar_entries > 0 ; rar_entries--) {
3441		wr32(E1000_RAH(rar_entries), 0);
3442		wr32(E1000_RAL(rar_entries), 0);
3443	}
3444	wrfl();
3445
3446	return count;
3447}
3448
3449/**
3450 * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
3451 * @netdev: network interface device structure
3452 *
3453 * The set_rx_mode entry point is called whenever the unicast or multicast
3454 * address lists or the network interface flags are updated.  This routine is
3455 * responsible for configuring the hardware for proper unicast, multicast,
3456 * promiscuous mode, and all-multi behavior.
3457 **/
3458static void igb_set_rx_mode(struct net_device *netdev)
3459{
3460	struct igb_adapter *adapter = netdev_priv(netdev);
3461	struct e1000_hw *hw = &adapter->hw;
3462	unsigned int vfn = adapter->vfs_allocated_count;
3463	u32 rctl, vmolr = 0;
3464	int count;
3465
3466	/* Check for Promiscuous and All Multicast modes */
3467	rctl = rd32(E1000_RCTL);
3468
3469	/* clear the effected bits */
3470	rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
3471
3472	if (netdev->flags & IFF_PROMISC) {
3473		rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
3474		vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
3475	} else {
3476		if (netdev->flags & IFF_ALLMULTI) {
3477			rctl |= E1000_RCTL_MPE;
3478			vmolr |= E1000_VMOLR_MPME;
3479		} else {
3480			/*
3481			 * Write addresses to the MTA, if the attempt fails
3482			 * then we should just turn on promiscuous mode so
3483			 * that we can at least receive multicast traffic
3484			 */
3485			count = igb_write_mc_addr_list(netdev);
3486			if (count < 0) {
3487				rctl |= E1000_RCTL_MPE;
3488				vmolr |= E1000_VMOLR_MPME;
3489			} else if (count) {
3490				vmolr |= E1000_VMOLR_ROMPE;
3491			}
3492		}
3493		/*
3494		 * Write addresses to available RAR registers, if there is not
3495		 * sufficient space to store all the addresses then enable
3496		 * unicast promiscuous mode
3497		 */
3498		count = igb_write_uc_addr_list(netdev);
3499		if (count < 0) {
3500			rctl |= E1000_RCTL_UPE;
3501			vmolr |= E1000_VMOLR_ROPE;
3502		}
3503		rctl |= E1000_RCTL_VFE;
3504	}
3505	wr32(E1000_RCTL, rctl);
3506
3507	/*
3508	 * In order to support SR-IOV and eventually VMDq it is necessary to set
3509	 * the VMOLR to enable the appropriate modes.  Without this workaround
3510	 * we will have issues with VLAN tag stripping not being done for frames
3511	 * that are only arriving because we are the default pool
3512	 */
3513	if ((hw->mac.type < e1000_82576) || (hw->mac.type > e1000_i350))
3514		return;
3515
3516	vmolr |= rd32(E1000_VMOLR(vfn)) &
3517	         ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
3518	wr32(E1000_VMOLR(vfn), vmolr);
3519	igb_restore_vf_multicasts(adapter);
3520}
3521
3522static void igb_check_wvbr(struct igb_adapter *adapter)
3523{
3524	struct e1000_hw *hw = &adapter->hw;
3525	u32 wvbr = 0;
3526
3527	switch (hw->mac.type) {
3528	case e1000_82576:
3529	case e1000_i350:
3530		if (!(wvbr = rd32(E1000_WVBR)))
3531			return;
3532		break;
3533	default:
3534		break;
3535	}
3536
3537	adapter->wvbr |= wvbr;
3538}
3539
3540#define IGB_STAGGERED_QUEUE_OFFSET 8
3541
3542static void igb_spoof_check(struct igb_adapter *adapter)
3543{
3544	int j;
3545
3546	if (!adapter->wvbr)
3547		return;
3548
3549	for(j = 0; j < adapter->vfs_allocated_count; j++) {
3550		if (adapter->wvbr & (1 << j) ||
3551		    adapter->wvbr & (1 << (j + IGB_STAGGERED_QUEUE_OFFSET))) {
3552			dev_warn(&adapter->pdev->dev,
3553				"Spoof event(s) detected on VF %d\n", j);
3554			adapter->wvbr &=
3555				~((1 << j) |
3556				  (1 << (j + IGB_STAGGERED_QUEUE_OFFSET)));
3557		}
3558	}
3559}
3560
3561/* Need to wait a few seconds after link up to get diagnostic information from
3562 * the phy */
3563static void igb_update_phy_info(unsigned long data)
3564{
3565	struct igb_adapter *adapter = (struct igb_adapter *) data;
3566	igb_get_phy_info(&adapter->hw);
3567}
3568
3569/**
3570 * igb_has_link - check shared code for link and determine up/down
3571 * @adapter: pointer to driver private info
3572 **/
3573bool igb_has_link(struct igb_adapter *adapter)
3574{
3575	struct e1000_hw *hw = &adapter->hw;
3576	bool link_active = false;
3577	s32 ret_val = 0;
3578
3579	/* get_link_status is set on LSC (link status) interrupt or
3580	 * rx sequence error interrupt.  get_link_status will stay
3581	 * false until the e1000_check_for_link establishes link
3582	 * for copper adapters ONLY
3583	 */
3584	switch (hw->phy.media_type) {
3585	case e1000_media_type_copper:
3586		if (hw->mac.get_link_status) {
3587			ret_val = hw->mac.ops.check_for_link(hw);
3588			link_active = !hw->mac.get_link_status;
3589		} else {
3590			link_active = true;
3591		}
3592		break;
3593	case e1000_media_type_internal_serdes:
3594		ret_val = hw->mac.ops.check_for_link(hw);
3595		link_active = hw->mac.serdes_has_link;
3596		break;
3597	default:
3598	case e1000_media_type_unknown:
3599		break;
3600	}
3601
3602	return link_active;
3603}
3604
3605static bool igb_thermal_sensor_event(struct e1000_hw *hw, u32 event)
3606{
3607	bool ret = false;
3608	u32 ctrl_ext, thstat;
3609
3610	/* check for thermal sensor event on i350 copper only */
3611	if (hw->mac.type == e1000_i350) {
3612		thstat = rd32(E1000_THSTAT);
3613		ctrl_ext = rd32(E1000_CTRL_EXT);
3614
3615		if ((hw->phy.media_type == e1000_media_type_copper) &&
3616		    !(ctrl_ext & E1000_CTRL_EXT_LINK_MODE_SGMII)) {
3617			ret = !!(thstat & event);
3618		}
3619	}
3620
3621	return ret;
3622}
3623
3624/**
3625 * igb_watchdog - Timer Call-back
3626 * @data: pointer to adapter cast into an unsigned long
3627 **/
3628static void igb_watchdog(unsigned long data)
3629{
3630	struct igb_adapter *adapter = (struct igb_adapter *)data;
3631	/* Do the rest outside of interrupt context */
3632	schedule_work(&adapter->watchdog_task);
3633}
3634
3635static void igb_watchdog_task(struct work_struct *work)
3636{
3637	struct igb_adapter *adapter = container_of(work,
3638	                                           struct igb_adapter,
3639                                                   watchdog_task);
3640	struct e1000_hw *hw = &adapter->hw;
3641	struct net_device *netdev = adapter->netdev;
3642	u32 link;
3643	int i;
3644
3645	link = igb_has_link(adapter);
3646	if (link) {
3647		/* Cancel scheduled suspend requests. */
3648		pm_runtime_resume(netdev->dev.parent);
3649
3650		if (!netif_carrier_ok(netdev)) {
3651			u32 ctrl;
3652			hw->mac.ops.get_speed_and_duplex(hw,
3653			                                 &adapter->link_speed,
3654			                                 &adapter->link_duplex);
3655
3656			ctrl = rd32(E1000_CTRL);
3657			/* Links status message must follow this format */
3658			printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s "
3659			       "Duplex, Flow Control: %s\n",
3660			       netdev->name,
3661			       adapter->link_speed,
3662			       adapter->link_duplex == FULL_DUPLEX ?
3663			       "Full" : "Half",
3664			       (ctrl & E1000_CTRL_TFCE) &&
3665			       (ctrl & E1000_CTRL_RFCE) ? "RX/TX" :
3666			       (ctrl & E1000_CTRL_RFCE) ?  "RX" :
3667			       (ctrl & E1000_CTRL_TFCE) ?  "TX" : "None");
3668
3669			/* check for thermal sensor event */
3670			if (igb_thermal_sensor_event(hw,
3671			    E1000_THSTAT_LINK_THROTTLE)) {
3672				netdev_info(netdev, "The network adapter link "
3673					    "speed was downshifted because it "
3674					    "overheated\n");
3675			}
3676
3677			/* adjust timeout factor according to speed/duplex */
3678			adapter->tx_timeout_factor = 1;
3679			switch (adapter->link_speed) {
3680			case SPEED_10:
3681				adapter->tx_timeout_factor = 14;
3682				break;
3683			case SPEED_100:
3684				/* maybe add some timeout factor ? */
3685				break;
3686			}
3687
3688			netif_carrier_on(netdev);
3689
3690			igb_ping_all_vfs(adapter);
3691			igb_check_vf_rate_limit(adapter);
3692
3693			/* link state has changed, schedule phy info update */
3694			if (!test_bit(__IGB_DOWN, &adapter->state))
3695				mod_timer(&adapter->phy_info_timer,
3696					  round_jiffies(jiffies + 2 * HZ));
3697		}
3698	} else {
3699		if (netif_carrier_ok(netdev)) {
3700			adapter->link_speed = 0;
3701			adapter->link_duplex = 0;
3702
3703			/* check for thermal sensor event */
3704			if (igb_thermal_sensor_event(hw,
3705			    E1000_THSTAT_PWR_DOWN)) {
3706				netdev_err(netdev, "The network adapter was "
3707					   "stopped because it overheated\n");
3708			}
3709
3710			/* Links status message must follow this format */
3711			printk(KERN_INFO "igb: %s NIC Link is Down\n",
3712			       netdev->name);
3713			netif_carrier_off(netdev);
3714
3715			igb_ping_all_vfs(adapter);
3716
3717			/* link state has changed, schedule phy info update */
3718			if (!test_bit(__IGB_DOWN, &adapter->state))
3719				mod_timer(&adapter->phy_info_timer,
3720					  round_jiffies(jiffies + 2 * HZ));
3721
3722			pm_schedule_suspend(netdev->dev.parent,
3723					    MSEC_PER_SEC * 5);
3724		}
3725	}
3726
3727	spin_lock(&adapter->stats64_lock);
3728	igb_update_stats(adapter, &adapter->stats64);
3729	spin_unlock(&adapter->stats64_lock);
3730
3731	for (i = 0; i < adapter->num_tx_queues; i++) {
3732		struct igb_ring *tx_ring = adapter->tx_ring[i];
3733		if (!netif_carrier_ok(netdev)) {
3734			/* We've lost link, so the controller stops DMA,
3735			 * but we've got queued Tx work that's never going
3736			 * to get done, so reset controller to flush Tx.
3737			 * (Do the reset outside of interrupt context). */
3738			if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3739				adapter->tx_timeout_count++;
3740				schedule_work(&adapter->reset_task);
3741				/* return immediately since reset is imminent */
3742				return;
3743			}
3744		}
3745
3746		/* Force detection of hung controller every watchdog period */
3747		set_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
3748	}
3749
3750	/* Cause software interrupt to ensure rx ring is cleaned */
3751	if (adapter->msix_entries) {
3752		u32 eics = 0;
3753		for (i = 0; i < adapter->num_q_vectors; i++)
3754			eics |= adapter->q_vector[i]->eims_value;
3755		wr32(E1000_EICS, eics);
3756	} else {
3757		wr32(E1000_ICS, E1000_ICS_RXDMT0);
3758	}
3759
3760	igb_spoof_check(adapter);
3761
3762	/* Reset the timer */
3763	if (!test_bit(__IGB_DOWN, &adapter->state))
3764		mod_timer(&adapter->watchdog_timer,
3765			  round_jiffies(jiffies + 2 * HZ));
3766}
3767
3768enum latency_range {
3769	lowest_latency = 0,
3770	low_latency = 1,
3771	bulk_latency = 2,
3772	latency_invalid = 255
3773};
3774
3775/**
3776 * igb_update_ring_itr - update the dynamic ITR value based on packet size
3777 *
3778 *      Stores a new ITR value based on strictly on packet size.  This
3779 *      algorithm is less sophisticated than that used in igb_update_itr,
3780 *      due to the difficulty of synchronizing statistics across multiple
3781 *      receive rings.  The divisors and thresholds used by this function
3782 *      were determined based on theoretical maximum wire speed and testing
3783 *      data, in order to minimize response time while increasing bulk
3784 *      throughput.
3785 *      This functionality is controlled by the InterruptThrottleRate module
3786 *      parameter (see igb_param.c)
3787 *      NOTE:  This function is called only when operating in a multiqueue
3788 *             receive environment.
3789 * @q_vector: pointer to q_vector
3790 **/
3791static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3792{
3793	int new_val = q_vector->itr_val;
3794	int avg_wire_size = 0;
3795	struct igb_adapter *adapter = q_vector->adapter;
3796	unsigned int packets;
3797
3798	/* For non-gigabit speeds, just fix the interrupt rate at 4000
3799	 * ints/sec - ITR timer value of 120 ticks.
3800	 */
3801	if (adapter->link_speed != SPEED_1000) {
3802		new_val = IGB_4K_ITR;
3803		goto set_itr_val;
3804	}
3805
3806	packets = q_vector->rx.total_packets;
3807	if (packets)
3808		avg_wire_size = q_vector->rx.total_bytes / packets;
3809
3810	packets = q_vector->tx.total_packets;
3811	if (packets)
3812		avg_wire_size = max_t(u32, avg_wire_size,
3813				      q_vector->tx.total_bytes / packets);
3814
3815	/* if avg_wire_size isn't set no work was done */
3816	if (!avg_wire_size)
3817		goto clear_counts;
3818
3819	/* Add 24 bytes to size to account for CRC, preamble, and gap */
3820	avg_wire_size += 24;
3821
3822	/* Don't starve jumbo frames */
3823	avg_wire_size = min(avg_wire_size, 3000);
3824
3825	/* Give a little boost to mid-size frames */
3826	if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3827		new_val = avg_wire_size / 3;
3828	else
3829		new_val = avg_wire_size / 2;
3830
3831	/* conservative mode (itr 3) eliminates the lowest_latency setting */
3832	if (new_val < IGB_20K_ITR &&
3833	    ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
3834	     (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
3835		new_val = IGB_20K_ITR;
3836
3837set_itr_val:
3838	if (new_val != q_vector->itr_val) {
3839		q_vector->itr_val = new_val;
3840		q_vector->set_itr = 1;
3841	}
3842clear_counts:
3843	q_vector->rx.total_bytes = 0;
3844	q_vector->rx.total_packets = 0;
3845	q_vector->tx.total_bytes = 0;
3846	q_vector->tx.total_packets = 0;
3847}
3848
3849/**
3850 * igb_update_itr - update the dynamic ITR value based on statistics
3851 *      Stores a new ITR value based on packets and byte
3852 *      counts during the last interrupt.  The advantage of per interrupt
3853 *      computation is faster updates and more accurate ITR for the current
3854 *      traffic pattern.  Constants in this function were computed
3855 *      based on theoretical maximum wire speed and thresholds were set based
3856 *      on testing data as well as attempting to minimize response time
3857 *      while increasing bulk throughput.
3858 *      this functionality is controlled by the InterruptThrottleRate module
3859 *      parameter (see igb_param.c)
3860 *      NOTE:  These calculations are only valid when operating in a single-
3861 *             queue environment.
3862 * @q_vector: pointer to q_vector
3863 * @ring_container: ring info to update the itr for
3864 **/
3865static void igb_update_itr(struct igb_q_vector *q_vector,
3866			   struct igb_ring_container *ring_container)
3867{
3868	unsigned int packets = ring_container->total_packets;
3869	unsigned int bytes = ring_container->total_bytes;
3870	u8 itrval = ring_container->itr;
3871
3872	/* no packets, exit with status unchanged */
3873	if (packets == 0)
3874		return;
3875
3876	switch (itrval) {
3877	case lowest_latency:
3878		/* handle TSO and jumbo frames */
3879		if (bytes/packets > 8000)
3880			itrval = bulk_latency;
3881		else if ((packets < 5) && (bytes > 512))
3882			itrval = low_latency;
3883		break;
3884	case low_latency:  /* 50 usec aka 20000 ints/s */
3885		if (bytes > 10000) {
3886			/* this if handles the TSO accounting */
3887			if (bytes/packets > 8000) {
3888				itrval = bulk_latency;
3889			} else if ((packets < 10) || ((bytes/packets) > 1200)) {
3890				itrval = bulk_latency;
3891			} else if ((packets > 35)) {
3892				itrval = lowest_latency;
3893			}
3894		} else if (bytes/packets > 2000) {
3895			itrval = bulk_latency;
3896		} else if (packets <= 2 && bytes < 512) {
3897			itrval = lowest_latency;
3898		}
3899		break;
3900	case bulk_latency: /* 250 usec aka 4000 ints/s */
3901		if (bytes > 25000) {
3902			if (packets > 35)
3903				itrval = low_latency;
3904		} else if (bytes < 1500) {
3905			itrval = low_latency;
3906		}
3907		break;
3908	}
3909
3910	/* clear work counters since we have the values we need */
3911	ring_container->total_bytes = 0;
3912	ring_container->total_packets = 0;
3913
3914	/* write updated itr to ring container */
3915	ring_container->itr = itrval;
3916}
3917
3918static void igb_set_itr(struct igb_q_vector *q_vector)
3919{
3920	struct igb_adapter *adapter = q_vector->adapter;
3921	u32 new_itr = q_vector->itr_val;
3922	u8 current_itr = 0;
3923
3924	/* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3925	if (adapter->link_speed != SPEED_1000) {
3926		current_itr = 0;
3927		new_itr = IGB_4K_ITR;
3928		goto set_itr_now;
3929	}
3930
3931	igb_update_itr(q_vector, &q_vector->tx);
3932	igb_update_itr(q_vector, &q_vector->rx);
3933
3934	current_itr = max(q_vector->rx.itr, q_vector->tx.itr);
3935
3936	/* conservative mode (itr 3) eliminates the lowest_latency setting */
3937	if (current_itr == lowest_latency &&
3938	    ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
3939	     (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
3940		current_itr = low_latency;
3941
3942	switch (current_itr) {
3943	/* counts and packets in update_itr are dependent on these numbers */
3944	case lowest_latency:
3945		new_itr = IGB_70K_ITR; /* 70,000 ints/sec */
3946		break;
3947	case low_latency:
3948		new_itr = IGB_20K_ITR; /* 20,000 ints/sec */
3949		break;
3950	case bulk_latency:
3951		new_itr = IGB_4K_ITR;  /* 4,000 ints/sec */
3952		break;
3953	default:
3954		break;
3955	}
3956
3957set_itr_now:
3958	if (new_itr != q_vector->itr_val) {
3959		/* this attempts to bias the interrupt rate towards Bulk
3960		 * by adding intermediate steps when interrupt rate is
3961		 * increasing */
3962		new_itr = new_itr > q_vector->itr_val ?
3963		             max((new_itr * q_vector->itr_val) /
3964		                 (new_itr + (q_vector->itr_val >> 2)),
3965				 new_itr) :
3966			     new_itr;
3967		/* Don't write the value here; it resets the adapter's
3968		 * internal timer, and causes us to delay far longer than
3969		 * we should between interrupts.  Instead, we write the ITR
3970		 * value at the beginning of the next interrupt so the timing
3971		 * ends up being correct.
3972		 */
3973		q_vector->itr_val = new_itr;
3974		q_vector->set_itr = 1;
3975	}
3976}
3977
3978static void igb_tx_ctxtdesc(struct igb_ring *tx_ring, u32 vlan_macip_lens,
3979			    u32 type_tucmd, u32 mss_l4len_idx)
3980{
3981	struct e1000_adv_tx_context_desc *context_desc;
3982	u16 i = tx_ring->next_to_use;
3983
3984	context_desc = IGB_TX_CTXTDESC(tx_ring, i);
3985
3986	i++;
3987	tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
3988
3989	/* set bits to identify this as an advanced context descriptor */
3990	type_tucmd |= E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3991
3992	/* For 82575, context index must be unique per ring. */
3993	if (test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
3994		mss_l4len_idx |= tx_ring->reg_idx << 4;
3995
3996	context_desc->vlan_macip_lens	= cpu_to_le32(vlan_macip_lens);
3997	context_desc->seqnum_seed	= 0;
3998	context_desc->type_tucmd_mlhl	= cpu_to_le32(type_tucmd);
3999	context_desc->mss_l4len_idx	= cpu_to_le32(mss_l4len_idx);
4000}
4001
4002static int igb_tso(struct igb_ring *tx_ring,
4003		   struct igb_tx_buffer *first,
4004		   u8 *hdr_len)
4005{
4006	struct sk_buff *skb = first->skb;
4007	u32 vlan_macip_lens, type_tucmd;
4008	u32 mss_l4len_idx, l4len;
4009
4010	if (!skb_is_gso(skb))
4011		return 0;
4012
4013	if (skb_header_cloned(skb)) {
4014		int err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
4015		if (err)
4016			return err;
4017	}
4018
4019	/* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
4020	type_tucmd = E1000_ADVTXD_TUCMD_L4T_TCP;
4021
4022	if (first->protocol == __constant_htons(ETH_P_IP)) {
4023		struct iphdr *iph = ip_hdr(skb);
4024		iph->tot_len = 0;
4025		iph->check = 0;
4026		tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
4027							 iph->daddr, 0,
4028							 IPPROTO_TCP,
4029							 0);
4030		type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
4031		first->tx_flags |= IGB_TX_FLAGS_TSO |
4032				   IGB_TX_FLAGS_CSUM |
4033				   IGB_TX_FLAGS_IPV4;
4034	} else if (skb_is_gso_v6(skb)) {
4035		ipv6_hdr(skb)->payload_len = 0;
4036		tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
4037						       &ipv6_hdr(skb)->daddr,
4038						       0, IPPROTO_TCP, 0);
4039		first->tx_flags |= IGB_TX_FLAGS_TSO |
4040				   IGB_TX_FLAGS_CSUM;
4041	}
4042
4043	/* compute header lengths */
4044	l4len = tcp_hdrlen(skb);
4045	*hdr_len = skb_transport_offset(skb) + l4len;
4046
4047	/* update gso size and bytecount with header size */
4048	first->gso_segs = skb_shinfo(skb)->gso_segs;
4049	first->bytecount += (first->gso_segs - 1) * *hdr_len;
4050
4051	/* MSS L4LEN IDX */
4052	mss_l4len_idx = l4len << E1000_ADVTXD_L4LEN_SHIFT;
4053	mss_l4len_idx |= skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT;
4054
4055	/* VLAN MACLEN IPLEN */
4056	vlan_macip_lens = skb_network_header_len(skb);
4057	vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
4058	vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
4059
4060	igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
4061
4062	return 1;
4063}
4064
4065static void igb_tx_csum(struct igb_ring *tx_ring, struct igb_tx_buffer *first)
4066{
4067	struct sk_buff *skb = first->skb;
4068	u32 vlan_macip_lens = 0;
4069	u32 mss_l4len_idx = 0;
4070	u32 type_tucmd = 0;
4071
4072	if (skb->ip_summed != CHECKSUM_PARTIAL) {
4073		if (!(first->tx_flags & IGB_TX_FLAGS_VLAN))
4074			return;
4075	} else {
4076		u8 l4_hdr = 0;
4077		switch (first->protocol) {
4078		case __constant_htons(ETH_P_IP):
4079			vlan_macip_lens |= skb_network_header_len(skb);
4080			type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
4081			l4_hdr = ip_hdr(skb)->protocol;
4082			break;
4083		case __constant_htons(ETH_P_IPV6):
4084			vlan_macip_lens |= skb_network_header_len(skb);
4085			l4_hdr = ipv6_hdr(skb)->nexthdr;
4086			break;
4087		default:
4088			if (unlikely(net_ratelimit())) {
4089				dev_warn(tx_ring->dev,
4090				 "partial checksum but proto=%x!\n",
4091				 first->protocol);
4092			}
4093			break;
4094		}
4095
4096		switch (l4_hdr) {
4097		case IPPROTO_TCP:
4098			type_tucmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
4099			mss_l4len_idx = tcp_hdrlen(skb) <<
4100					E1000_ADVTXD_L4LEN_SHIFT;
4101			break;
4102		case IPPROTO_SCTP:
4103			type_tucmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
4104			mss_l4len_idx = sizeof(struct sctphdr) <<
4105					E1000_ADVTXD_L4LEN_SHIFT;
4106			break;
4107		case IPPROTO_UDP:
4108			mss_l4len_idx = sizeof(struct udphdr) <<
4109					E1000_ADVTXD_L4LEN_SHIFT;
4110			break;
4111		default:
4112			if (unlikely(net_ratelimit())) {
4113				dev_warn(tx_ring->dev,
4114				 "partial checksum but l4 proto=%x!\n",
4115				 l4_hdr);
4116			}
4117			break;
4118		}
4119
4120		/* update TX checksum flag */
4121		first->tx_flags |= IGB_TX_FLAGS_CSUM;
4122	}
4123
4124	vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
4125	vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
4126
4127	igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
4128}
4129
4130static __le32 igb_tx_cmd_type(u32 tx_flags)
4131{
4132	/* set type for advanced descriptor with frame checksum insertion */
4133	__le32 cmd_type = cpu_to_le32(E1000_ADVTXD_DTYP_DATA |
4134				      E1000_ADVTXD_DCMD_IFCS |
4135				      E1000_ADVTXD_DCMD_DEXT);
4136
4137	/* set HW vlan bit if vlan is present */
4138	if (tx_flags & IGB_TX_FLAGS_VLAN)
4139		cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_VLE);
4140
4141	/* set timestamp bit if present */
4142	if (tx_flags & IGB_TX_FLAGS_TSTAMP)
4143		cmd_type |= cpu_to_le32(E1000_ADVTXD_MAC_TSTAMP);
4144
4145	/* set segmentation bits for TSO */
4146	if (tx_flags & IGB_TX_FLAGS_TSO)
4147		cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_TSE);
4148
4149	return cmd_type;
4150}
4151
4152static void igb_tx_olinfo_status(struct igb_ring *tx_ring,
4153				 union e1000_adv_tx_desc *tx_desc,
4154				 u32 tx_flags, unsigned int paylen)
4155{
4156	u32 olinfo_status = paylen << E1000_ADVTXD_PAYLEN_SHIFT;
4157
4158	/* 82575 requires a unique index per ring if any offload is enabled */
4159	if ((tx_flags & (IGB_TX_FLAGS_CSUM | IGB_TX_FLAGS_VLAN)) &&
4160	    test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
4161		olinfo_status |= tx_ring->reg_idx << 4;
4162
4163	/* insert L4 checksum */
4164	if (tx_flags & IGB_TX_FLAGS_CSUM) {
4165		olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4166
4167		/* insert IPv4 checksum */
4168		if (tx_flags & IGB_TX_FLAGS_IPV4)
4169			olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
4170	}
4171
4172	tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
4173}
4174
4175/*
4176 * The largest size we can write to the descriptor is 65535.  In order to
4177 * maintain a power of two alignment we have to limit ourselves to 32K.
4178 */
4179#define IGB_MAX_TXD_PWR	15
4180#define IGB_MAX_DATA_PER_TXD	(1<<IGB_MAX_TXD_PWR)
4181
4182static void igb_tx_map(struct igb_ring *tx_ring,
4183		       struct igb_tx_buffer *first,
4184		       const u8 hdr_len)
4185{
4186	struct sk_buff *skb = first->skb;
4187	struct igb_tx_buffer *tx_buffer_info;
4188	union e1000_adv_tx_desc *tx_desc;
4189	dma_addr_t dma;
4190	struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0];
4191	unsigned int data_len = skb->data_len;
4192	unsigned int size = skb_headlen(skb);
4193	unsigned int paylen = skb->len - hdr_len;
4194	__le32 cmd_type;
4195	u32 tx_flags = first->tx_flags;
4196	u16 i = tx_ring->next_to_use;
4197
4198	tx_desc = IGB_TX_DESC(tx_ring, i);
4199
4200	igb_tx_olinfo_status(tx_ring, tx_desc, tx_flags, paylen);
4201	cmd_type = igb_tx_cmd_type(tx_flags);
4202
4203	dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
4204	if (dma_mapping_error(tx_ring->dev, dma))
4205		goto dma_error;
4206
4207	/* record length, and DMA address */
4208	first->length = size;
4209	first->dma = dma;
4210	tx_desc->read.buffer_addr = cpu_to_le64(dma);
4211
4212	for (;;) {
4213		while (unlikely(size > IGB_MAX_DATA_PER_TXD)) {
4214			tx_desc->read.cmd_type_len =
4215				cmd_type | cpu_to_le32(IGB_MAX_DATA_PER_TXD);
4216
4217			i++;
4218			tx_desc++;
4219			if (i == tx_ring->count) {
4220				tx_desc = IGB_TX_DESC(tx_ring, 0);
4221				i = 0;
4222			}
4223
4224			dma += IGB_MAX_DATA_PER_TXD;
4225			size -= IGB_MAX_DATA_PER_TXD;
4226
4227			tx_desc->read.olinfo_status = 0;
4228			tx_desc->read.buffer_addr = cpu_to_le64(dma);
4229		}
4230
4231		if (likely(!data_len))
4232			break;
4233
4234		tx_desc->read.cmd_type_len = cmd_type | cpu_to_le32(size);
4235
4236		i++;
4237		tx_desc++;
4238		if (i == tx_ring->count) {
4239			tx_desc = IGB_TX_DESC(tx_ring, 0);
4240			i = 0;
4241		}
4242
4243		size = skb_frag_size(frag);
4244		data_len -= size;
4245
4246		dma = skb_frag_dma_map(tx_ring->dev, frag, 0,
4247				   size, DMA_TO_DEVICE);
4248		if (dma_mapping_error(tx_ring->dev, dma))
4249			goto dma_error;
4250
4251		tx_buffer_info = &tx_ring->tx_buffer_info[i];
4252		tx_buffer_info->length = size;
4253		tx_buffer_info->dma = dma;
4254
4255		tx_desc->read.olinfo_status = 0;
4256		tx_desc->read.buffer_addr = cpu_to_le64(dma);
4257
4258		frag++;
4259	}
4260
4261	netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount);
4262
4263	/* write last descriptor with RS and EOP bits */
4264	cmd_type |= cpu_to_le32(size) | cpu_to_le32(IGB_TXD_DCMD);
4265	if (unlikely(skb->no_fcs))
4266		cmd_type &= ~(cpu_to_le32(E1000_ADVTXD_DCMD_IFCS));
4267	tx_desc->read.cmd_type_len = cmd_type;
4268
4269	/* set the timestamp */
4270	first->time_stamp = jiffies;
4271
4272	/*
4273	 * Force memory writes to complete before letting h/w know there
4274	 * are new descriptors to fetch.  (Only applicable for weak-ordered
4275	 * memory model archs, such as IA-64).
4276	 *
4277	 * We also need this memory barrier to make certain all of the
4278	 * status bits have been updated before next_to_watch is written.
4279	 */
4280	wmb();
4281
4282	/* set next_to_watch value indicating a packet is present */
4283	first->next_to_watch = tx_desc;
4284
4285	i++;
4286	if (i == tx_ring->count)
4287		i = 0;
4288
4289	tx_ring->next_to_use = i;
4290
4291	writel(i, tx_ring->tail);
4292
4293	/* we need this if more than one processor can write to our tail
4294	 * at a time, it syncronizes IO on IA64/Altix systems */
4295	mmiowb();
4296
4297	return;
4298
4299dma_error:
4300	dev_err(tx_ring->dev, "TX DMA map failed\n");
4301
4302	/* clear dma mappings for failed tx_buffer_info map */
4303	for (;;) {
4304		tx_buffer_info = &tx_ring->tx_buffer_info[i];
4305		igb_unmap_and_free_tx_resource(tx_ring, tx_buffer_info);
4306		if (tx_buffer_info == first)
4307			break;
4308		if (i == 0)
4309			i = tx_ring->count;
4310		i--;
4311	}
4312
4313	tx_ring->next_to_use = i;
4314}
4315
4316static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
4317{
4318	struct net_device *netdev = tx_ring->netdev;
4319
4320	netif_stop_subqueue(netdev, tx_ring->queue_index);
4321
4322	/* Herbert's original patch had:
4323	 *  smp_mb__after_netif_stop_queue();
4324	 * but since that doesn't exist yet, just open code it. */
4325	smp_mb();
4326
4327	/* We need to check again in a case another CPU has just
4328	 * made room available. */
4329	if (igb_desc_unused(tx_ring) < size)
4330		return -EBUSY;
4331
4332	/* A reprieve! */
4333	netif_wake_subqueue(netdev, tx_ring->queue_index);
4334
4335	u64_stats_update_begin(&tx_ring->tx_syncp2);
4336	tx_ring->tx_stats.restart_queue2++;
4337	u64_stats_update_end(&tx_ring->tx_syncp2);
4338
4339	return 0;
4340}
4341
4342static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
4343{
4344	if (igb_desc_unused(tx_ring) >= size)
4345		return 0;
4346	return __igb_maybe_stop_tx(tx_ring, size);
4347}
4348
4349netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb,
4350				struct igb_ring *tx_ring)
4351{
4352	struct igb_tx_buffer *first;
4353	int tso;
4354	u32 tx_flags = 0;
4355	__be16 protocol = vlan_get_protocol(skb);
4356	u8 hdr_len = 0;
4357
4358	/* need: 1 descriptor per page,
4359	 *       + 2 desc gap to keep tail from touching head,
4360	 *       + 1 desc for skb->data,
4361	 *       + 1 desc for context descriptor,
4362	 * otherwise try next time */
4363	if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
4364		/* this is a hard error */
4365		return NETDEV_TX_BUSY;
4366	}
4367
4368	/* record the location of the first descriptor for this packet */
4369	first = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
4370	first->skb = skb;
4371	first->bytecount = skb->len;
4372	first->gso_segs = 1;
4373
4374	if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
4375		skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
4376		tx_flags |= IGB_TX_FLAGS_TSTAMP;
4377	}
4378
4379	if (vlan_tx_tag_present(skb)) {
4380		tx_flags |= IGB_TX_FLAGS_VLAN;
4381		tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
4382	}
4383
4384	/* record initial flags and protocol */
4385	first->tx_flags = tx_flags;
4386	first->protocol = protocol;
4387
4388	tso = igb_tso(tx_ring, first, &hdr_len);
4389	if (tso < 0)
4390		goto out_drop;
4391	else if (!tso)
4392		igb_tx_csum(tx_ring, first);
4393
4394	igb_tx_map(tx_ring, first, hdr_len);
4395
4396	/* Make sure there is space in the ring for the next send. */
4397	igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
4398
4399	return NETDEV_TX_OK;
4400
4401out_drop:
4402	igb_unmap_and_free_tx_resource(tx_ring, first);
4403
4404	return NETDEV_TX_OK;
4405}
4406
4407static inline struct igb_ring *igb_tx_queue_mapping(struct igb_adapter *adapter,
4408						    struct sk_buff *skb)
4409{
4410	unsigned int r_idx = skb->queue_mapping;
4411
4412	if (r_idx >= adapter->num_tx_queues)
4413		r_idx = r_idx % adapter->num_tx_queues;
4414
4415	return adapter->tx_ring[r_idx];
4416}
4417
4418static netdev_tx_t igb_xmit_frame(struct sk_buff *skb,
4419				  struct net_device *netdev)
4420{
4421	struct igb_adapter *adapter = netdev_priv(netdev);
4422
4423	if (test_bit(__IGB_DOWN, &adapter->state)) {
4424		dev_kfree_skb_any(skb);
4425		return NETDEV_TX_OK;
4426	}
4427
4428	if (skb->len <= 0) {
4429		dev_kfree_skb_any(skb);
4430		return NETDEV_TX_OK;
4431	}
4432
4433	/*
4434	 * The minimum packet size with TCTL.PSP set is 17 so pad the skb
4435	 * in order to meet this minimum size requirement.
4436	 */
4437	if (skb->len < 17) {
4438		if (skb_padto(skb, 17))
4439			return NETDEV_TX_OK;
4440		skb->len = 17;
4441	}
4442
4443	return igb_xmit_frame_ring(skb, igb_tx_queue_mapping(adapter, skb));
4444}
4445
4446/**
4447 * igb_tx_timeout - Respond to a Tx Hang
4448 * @netdev: network interface device structure
4449 **/
4450static void igb_tx_timeout(struct net_device *netdev)
4451{
4452	struct igb_adapter *adapter = netdev_priv(netdev);
4453	struct e1000_hw *hw = &adapter->hw;
4454
4455	/* Do the reset outside of interrupt context */
4456	adapter->tx_timeout_count++;
4457
4458	if (hw->mac.type >= e1000_82580)
4459		hw->dev_spec._82575.global_device_reset = true;
4460
4461	schedule_work(&adapter->reset_task);
4462	wr32(E1000_EICS,
4463	     (adapter->eims_enable_mask & ~adapter->eims_other));
4464}
4465
4466static void igb_reset_task(struct work_struct *work)
4467{
4468	struct igb_adapter *adapter;
4469	adapter = container_of(work, struct igb_adapter, reset_task);
4470
4471	igb_dump(adapter);
4472	netdev_err(adapter->netdev, "Reset adapter\n");
4473	igb_reinit_locked(adapter);
4474}
4475
4476/**
4477 * igb_get_stats64 - Get System Network Statistics
4478 * @netdev: network interface device structure
4479 * @stats: rtnl_link_stats64 pointer
4480 *
4481 **/
4482static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *netdev,
4483						 struct rtnl_link_stats64 *stats)
4484{
4485	struct igb_adapter *adapter = netdev_priv(netdev);
4486
4487	spin_lock(&adapter->stats64_lock);
4488	igb_update_stats(adapter, &adapter->stats64);
4489	memcpy(stats, &adapter->stats64, sizeof(*stats));
4490	spin_unlock(&adapter->stats64_lock);
4491
4492	return stats;
4493}
4494
4495/**
4496 * igb_change_mtu - Change the Maximum Transfer Unit
4497 * @netdev: network interface device structure
4498 * @new_mtu: new value for maximum frame size
4499 *
4500 * Returns 0 on success, negative on failure
4501 **/
4502static int igb_change_mtu(struct net_device *netdev, int new_mtu)
4503{
4504	struct igb_adapter *adapter = netdev_priv(netdev);
4505	struct pci_dev *pdev = adapter->pdev;
4506	int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
4507
4508	if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
4509		dev_err(&pdev->dev, "Invalid MTU setting\n");
4510		return -EINVAL;
4511	}
4512
4513#define MAX_STD_JUMBO_FRAME_SIZE 9238
4514	if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
4515		dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
4516		return -EINVAL;
4517	}
4518
4519	while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
4520		msleep(1);
4521
4522	/* igb_down has a dependency on max_frame_size */
4523	adapter->max_frame_size = max_frame;
4524
4525	if (netif_running(netdev))
4526		igb_down(adapter);
4527
4528	dev_info(&pdev->dev, "changing MTU from %d to %d\n",
4529		 netdev->mtu, new_mtu);
4530	netdev->mtu = new_mtu;
4531
4532	if (netif_running(netdev))
4533		igb_up(adapter);
4534	else
4535		igb_reset(adapter);
4536
4537	clear_bit(__IGB_RESETTING, &adapter->state);
4538
4539	return 0;
4540}
4541
4542/**
4543 * igb_update_stats - Update the board statistics counters
4544 * @adapter: board private structure
4545 **/
4546
4547void igb_update_stats(struct igb_adapter *adapter,
4548		      struct rtnl_link_stats64 *net_stats)
4549{
4550	struct e1000_hw *hw = &adapter->hw;
4551	struct pci_dev *pdev = adapter->pdev;
4552	u32 reg, mpc;
4553	u16 phy_tmp;
4554	int i;
4555	u64 bytes, packets;
4556	unsigned int start;
4557	u64 _bytes, _packets;
4558
4559#define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
4560
4561	/*
4562	 * Prevent stats update while adapter is being reset, or if the pci
4563	 * connection is down.
4564	 */
4565	if (adapter->link_speed == 0)
4566		return;
4567	if (pci_channel_offline(pdev))
4568		return;
4569
4570	bytes = 0;
4571	packets = 0;
4572	for (i = 0; i < adapter->num_rx_queues; i++) {
4573		u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
4574		struct igb_ring *ring = adapter->rx_ring[i];
4575
4576		ring->rx_stats.drops += rqdpc_tmp;
4577		net_stats->rx_fifo_errors += rqdpc_tmp;
4578
4579		do {
4580			start = u64_stats_fetch_begin_bh(&ring->rx_syncp);
4581			_bytes = ring->rx_stats.bytes;
4582			_packets = ring->rx_stats.packets;
4583		} while (u64_stats_fetch_retry_bh(&ring->rx_syncp, start));
4584		bytes += _bytes;
4585		packets += _packets;
4586	}
4587
4588	net_stats->rx_bytes = bytes;
4589	net_stats->rx_packets = packets;
4590
4591	bytes = 0;
4592	packets = 0;
4593	for (i = 0; i < adapter->num_tx_queues; i++) {
4594		struct igb_ring *ring = adapter->tx_ring[i];
4595		do {
4596			start = u64_stats_fetch_begin_bh(&ring->tx_syncp);
4597			_bytes = ring->tx_stats.bytes;
4598			_packets = ring->tx_stats.packets;
4599		} while (u64_stats_fetch_retry_bh(&ring->tx_syncp, start));
4600		bytes += _bytes;
4601		packets += _packets;
4602	}
4603	net_stats->tx_bytes = bytes;
4604	net_stats->tx_packets = packets;
4605
4606	/* read stats registers */
4607	adapter->stats.crcerrs += rd32(E1000_CRCERRS);
4608	adapter->stats.gprc += rd32(E1000_GPRC);
4609	adapter->stats.gorc += rd32(E1000_GORCL);
4610	rd32(E1000_GORCH); /* clear GORCL */
4611	adapter->stats.bprc += rd32(E1000_BPRC);
4612	adapter->stats.mprc += rd32(E1000_MPRC);
4613	adapter->stats.roc += rd32(E1000_ROC);
4614
4615	adapter->stats.prc64 += rd32(E1000_PRC64);
4616	adapter->stats.prc127 += rd32(E1000_PRC127);
4617	adapter->stats.prc255 += rd32(E1000_PRC255);
4618	adapter->stats.prc511 += rd32(E1000_PRC511);
4619	adapter->stats.prc1023 += rd32(E1000_PRC1023);
4620	adapter->stats.prc1522 += rd32(E1000_PRC1522);
4621	adapter->stats.symerrs += rd32(E1000_SYMERRS);
4622	adapter->stats.sec += rd32(E1000_SEC);
4623
4624	mpc = rd32(E1000_MPC);
4625	adapter->stats.mpc += mpc;
4626	net_stats->rx_fifo_errors += mpc;
4627	adapter->stats.scc += rd32(E1000_SCC);
4628	adapter->stats.ecol += rd32(E1000_ECOL);
4629	adapter->stats.mcc += rd32(E1000_MCC);
4630	adapter->stats.latecol += rd32(E1000_LATECOL);
4631	adapter->stats.dc += rd32(E1000_DC);
4632	adapter->stats.rlec += rd32(E1000_RLEC);
4633	adapter->stats.xonrxc += rd32(E1000_XONRXC);
4634	adapter->stats.xontxc += rd32(E1000_XONTXC);
4635	adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
4636	adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
4637	adapter->stats.fcruc += rd32(E1000_FCRUC);
4638	adapter->stats.gptc += rd32(E1000_GPTC);
4639	adapter->stats.gotc += rd32(E1000_GOTCL);
4640	rd32(E1000_GOTCH); /* clear GOTCL */
4641	adapter->stats.rnbc += rd32(E1000_RNBC);
4642	adapter->stats.ruc += rd32(E1000_RUC);
4643	adapter->stats.rfc += rd32(E1000_RFC);
4644	adapter->stats.rjc += rd32(E1000_RJC);
4645	adapter->stats.tor += rd32(E1000_TORH);
4646	adapter->stats.tot += rd32(E1000_TOTH);
4647	adapter->stats.tpr += rd32(E1000_TPR);
4648
4649	adapter->stats.ptc64 += rd32(E1000_PTC64);
4650	adapter->stats.ptc127 += rd32(E1000_PTC127);
4651	adapter->stats.ptc255 += rd32(E1000_PTC255);
4652	adapter->stats.ptc511 += rd32(E1000_PTC511);
4653	adapter->stats.ptc1023 += rd32(E1000_PTC1023);
4654	adapter->stats.ptc1522 += rd32(E1000_PTC1522);
4655
4656	adapter->stats.mptc += rd32(E1000_MPTC);
4657	adapter->stats.bptc += rd32(E1000_BPTC);
4658
4659	adapter->stats.tpt += rd32(E1000_TPT);
4660	adapter->stats.colc += rd32(E1000_COLC);
4661
4662	adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
4663	/* read internal phy specific stats */
4664	reg = rd32(E1000_CTRL_EXT);
4665	if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
4666		adapter->stats.rxerrc += rd32(E1000_RXERRC);
4667		adapter->stats.tncrs += rd32(E1000_TNCRS);
4668	}
4669
4670	adapter->stats.tsctc += rd32(E1000_TSCTC);
4671	adapter->stats.tsctfc += rd32(E1000_TSCTFC);
4672
4673	adapter->stats.iac += rd32(E1000_IAC);
4674	adapter->stats.icrxoc += rd32(E1000_ICRXOC);
4675	adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
4676	adapter->stats.icrxatc += rd32(E1000_ICRXATC);
4677	adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
4678	adapter->stats.ictxatc += rd32(E1000_ICTXATC);
4679	adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
4680	adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
4681	adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
4682
4683	/* Fill out the OS statistics structure */
4684	net_stats->multicast = adapter->stats.mprc;
4685	net_stats->collisions = adapter->stats.colc;
4686
4687	/* Rx Errors */
4688
4689	/* RLEC on some newer hardware can be incorrect so build
4690	 * our own version based on RUC and ROC */
4691	net_stats->rx_errors = adapter->stats.rxerrc +
4692		adapter->stats.crcerrs + adapter->stats.algnerrc +
4693		adapter->stats.ruc + adapter->stats.roc +
4694		adapter->stats.cexterr;
4695	net_stats->rx_length_errors = adapter->stats.ruc +
4696				      adapter->stats.roc;
4697	net_stats->rx_crc_errors = adapter->stats.crcerrs;
4698	net_stats->rx_frame_errors = adapter->stats.algnerrc;
4699	net_stats->rx_missed_errors = adapter->stats.mpc;
4700
4701	/* Tx Errors */
4702	net_stats->tx_errors = adapter->stats.ecol +
4703			       adapter->stats.latecol;
4704	net_stats->tx_aborted_errors = adapter->stats.ecol;
4705	net_stats->tx_window_errors = adapter->stats.latecol;
4706	net_stats->tx_carrier_errors = adapter->stats.tncrs;
4707
4708	/* Tx Dropped needs to be maintained elsewhere */
4709
4710	/* Phy Stats */
4711	if (hw->phy.media_type == e1000_media_type_copper) {
4712		if ((adapter->link_speed == SPEED_1000) &&
4713		   (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
4714			phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
4715			adapter->phy_stats.idle_errors += phy_tmp;
4716		}
4717	}
4718
4719	/* Management Stats */
4720	adapter->stats.mgptc += rd32(E1000_MGTPTC);
4721	adapter->stats.mgprc += rd32(E1000_MGTPRC);
4722	adapter->stats.mgpdc += rd32(E1000_MGTPDC);
4723
4724	/* OS2BMC Stats */
4725	reg = rd32(E1000_MANC);
4726	if (reg & E1000_MANC_EN_BMC2OS) {
4727		adapter->stats.o2bgptc += rd32(E1000_O2BGPTC);
4728		adapter->stats.o2bspc += rd32(E1000_O2BSPC);
4729		adapter->stats.b2ospc += rd32(E1000_B2OSPC);
4730		adapter->stats.b2ogprc += rd32(E1000_B2OGPRC);
4731	}
4732}
4733
4734static irqreturn_t igb_msix_other(int irq, void *data)
4735{
4736	struct igb_adapter *adapter = data;
4737	struct e1000_hw *hw = &adapter->hw;
4738	u32 icr = rd32(E1000_ICR);
4739	/* reading ICR causes bit 31 of EICR to be cleared */
4740
4741	if (icr & E1000_ICR_DRSTA)
4742		schedule_work(&adapter->reset_task);
4743
4744	if (icr & E1000_ICR_DOUTSYNC) {
4745		/* HW is reporting DMA is out of sync */
4746		adapter->stats.doosync++;
4747		/* The DMA Out of Sync is also indication of a spoof event
4748		 * in IOV mode. Check the Wrong VM Behavior register to
4749		 * see if it is really a spoof event. */
4750		igb_check_wvbr(adapter);
4751	}
4752
4753	/* Check for a mailbox event */
4754	if (icr & E1000_ICR_VMMB)
4755		igb_msg_task(adapter);
4756
4757	if (icr & E1000_ICR_LSC) {
4758		hw->mac.get_link_status = 1;
4759		/* guard against interrupt when we're going down */
4760		if (!test_bit(__IGB_DOWN, &adapter->state))
4761			mod_timer(&adapter->watchdog_timer, jiffies + 1);
4762	}
4763
4764	wr32(E1000_EIMS, adapter->eims_other);
4765
4766	return IRQ_HANDLED;
4767}
4768
4769static void igb_write_itr(struct igb_q_vector *q_vector)
4770{
4771	struct igb_adapter *adapter = q_vector->adapter;
4772	u32 itr_val = q_vector->itr_val & 0x7FFC;
4773
4774	if (!q_vector->set_itr)
4775		return;
4776
4777	if (!itr_val)
4778		itr_val = 0x4;
4779
4780	if (adapter->hw.mac.type == e1000_82575)
4781		itr_val |= itr_val << 16;
4782	else
4783		itr_val |= E1000_EITR_CNT_IGNR;
4784
4785	writel(itr_val, q_vector->itr_register);
4786	q_vector->set_itr = 0;
4787}
4788
4789static irqreturn_t igb_msix_ring(int irq, void *data)
4790{
4791	struct igb_q_vector *q_vector = data;
4792
4793	/* Write the ITR value calculated from the previous interrupt. */
4794	igb_write_itr(q_vector);
4795
4796	napi_schedule(&q_vector->napi);
4797
4798	return IRQ_HANDLED;
4799}
4800
4801#ifdef CONFIG_IGB_DCA
4802static void igb_update_dca(struct igb_q_vector *q_vector)
4803{
4804	struct igb_adapter *adapter = q_vector->adapter;
4805	struct e1000_hw *hw = &adapter->hw;
4806	int cpu = get_cpu();
4807
4808	if (q_vector->cpu == cpu)
4809		goto out_no_update;
4810
4811	if (q_vector->tx.ring) {
4812		int q = q_vector->tx.ring->reg_idx;
4813		u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4814		if (hw->mac.type == e1000_82575) {
4815			dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4816			dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4817		} else {
4818			dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4819			dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4820			              E1000_DCA_TXCTRL_CPUID_SHIFT;
4821		}
4822		dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4823		wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4824	}
4825	if (q_vector->rx.ring) {
4826		int q = q_vector->rx.ring->reg_idx;
4827		u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4828		if (hw->mac.type == e1000_82575) {
4829			dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4830			dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4831		} else {
4832			dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4833			dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4834			              E1000_DCA_RXCTRL_CPUID_SHIFT;
4835		}
4836		dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4837		dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4838		dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4839		wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4840	}
4841	q_vector->cpu = cpu;
4842out_no_update:
4843	put_cpu();
4844}
4845
4846static void igb_setup_dca(struct igb_adapter *adapter)
4847{
4848	struct e1000_hw *hw = &adapter->hw;
4849	int i;
4850
4851	if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4852		return;
4853
4854	/* Always use CB2 mode, difference is masked in the CB driver. */
4855	wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4856
4857	for (i = 0; i < adapter->num_q_vectors; i++) {
4858		adapter->q_vector[i]->cpu = -1;
4859		igb_update_dca(adapter->q_vector[i]);
4860	}
4861}
4862
4863static int __igb_notify_dca(struct device *dev, void *data)
4864{
4865	struct net_device *netdev = dev_get_drvdata(dev);
4866	struct igb_adapter *adapter = netdev_priv(netdev);
4867	struct pci_dev *pdev = adapter->pdev;
4868	struct e1000_hw *hw = &adapter->hw;
4869	unsigned long event = *(unsigned long *)data;
4870
4871	switch (event) {
4872	case DCA_PROVIDER_ADD:
4873		/* if already enabled, don't do it again */
4874		if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4875			break;
4876		if (dca_add_requester(dev) == 0) {
4877			adapter->flags |= IGB_FLAG_DCA_ENABLED;
4878			dev_info(&pdev->dev, "DCA enabled\n");
4879			igb_setup_dca(adapter);
4880			break;
4881		}
4882		/* Fall Through since DCA is disabled. */
4883	case DCA_PROVIDER_REMOVE:
4884		if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4885			/* without this a class_device is left
4886			 * hanging around in the sysfs model */
4887			dca_remove_requester(dev);
4888			dev_info(&pdev->dev, "DCA disabled\n");
4889			adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4890			wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4891		}
4892		break;
4893	}
4894
4895	return 0;
4896}
4897
4898static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4899                          void *p)
4900{
4901	int ret_val;
4902
4903	ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4904	                                 __igb_notify_dca);
4905
4906	return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4907}
4908#endif /* CONFIG_IGB_DCA */
4909
4910#ifdef CONFIG_PCI_IOV
4911static int igb_vf_configure(struct igb_adapter *adapter, int vf)
4912{
4913	unsigned char mac_addr[ETH_ALEN];
4914	struct pci_dev *pdev = adapter->pdev;
4915	struct e1000_hw *hw = &adapter->hw;
4916	struct pci_dev *pvfdev;
4917	unsigned int device_id;
4918	u16 thisvf_devfn;
4919
4920	random_ether_addr(mac_addr);
4921	igb_set_vf_mac(adapter, vf, mac_addr);
4922
4923	switch (adapter->hw.mac.type) {
4924	case e1000_82576:
4925		device_id = IGB_82576_VF_DEV_ID;
4926		/* VF Stride for 82576 is 2 */
4927		thisvf_devfn = (pdev->devfn + 0x80 + (vf << 1)) |
4928			(pdev->devfn & 1);
4929		break;
4930	case e1000_i350:
4931		device_id = IGB_I350_VF_DEV_ID;
4932		/* VF Stride for I350 is 4 */
4933		thisvf_devfn = (pdev->devfn + 0x80 + (vf << 2)) |
4934				(pdev->devfn & 3);
4935		break;
4936	default:
4937		device_id = 0;
4938		thisvf_devfn = 0;
4939		break;
4940	}
4941
4942	pvfdev = pci_get_device(hw->vendor_id, device_id, NULL);
4943	while (pvfdev) {
4944		if (pvfdev->devfn == thisvf_devfn)
4945			break;
4946		pvfdev = pci_get_device(hw->vendor_id,
4947					device_id, pvfdev);
4948	}
4949
4950	if (pvfdev)
4951		adapter->vf_data[vf].vfdev = pvfdev;
4952	else
4953		dev_err(&pdev->dev,
4954			"Couldn't find pci dev ptr for VF %4.4x\n",
4955			thisvf_devfn);
4956	return pvfdev != NULL;
4957}
4958
4959static int igb_find_enabled_vfs(struct igb_adapter *adapter)
4960{
4961	struct e1000_hw *hw = &adapter->hw;
4962	struct pci_dev *pdev = adapter->pdev;
4963	struct pci_dev *pvfdev;
4964	u16 vf_devfn = 0;
4965	u16 vf_stride;
4966	unsigned int device_id;
4967	int vfs_found = 0;
4968
4969	switch (adapter->hw.mac.type) {
4970	case e1000_82576:
4971		device_id = IGB_82576_VF_DEV_ID;
4972		/* VF Stride for 82576 is 2 */
4973		vf_stride = 2;
4974		break;
4975	case e1000_i350:
4976		device_id = IGB_I350_VF_DEV_ID;
4977		/* VF Stride for I350 is 4 */
4978		vf_stride = 4;
4979		break;
4980	default:
4981		device_id = 0;
4982		vf_stride = 0;
4983		break;
4984	}
4985
4986	vf_devfn = pdev->devfn + 0x80;
4987	pvfdev = pci_get_device(hw->vendor_id, device_id, NULL);
4988	while (pvfdev) {
4989		if (pvfdev->devfn == vf_devfn &&
4990		    (pvfdev->bus->number >= pdev->bus->number))
4991			vfs_found++;
4992		vf_devfn += vf_stride;
4993		pvfdev = pci_get_device(hw->vendor_id,
4994					device_id, pvfdev);
4995	}
4996
4997	return vfs_found;
4998}
4999
5000static int igb_check_vf_assignment(struct igb_adapter *adapter)
5001{
5002	int i;
5003	for (i = 0; i < adapter->vfs_allocated_count; i++) {
5004		if (adapter->vf_data[i].vfdev) {
5005			if (adapter->vf_data[i].vfdev->dev_flags &
5006			    PCI_DEV_FLAGS_ASSIGNED)
5007				return true;
5008		}
5009	}
5010	return false;
5011}
5012
5013#endif
5014static void igb_ping_all_vfs(struct igb_adapter *adapter)
5015{
5016	struct e1000_hw *hw = &adapter->hw;
5017	u32 ping;
5018	int i;
5019
5020	for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
5021		ping = E1000_PF_CONTROL_MSG;
5022		if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
5023			ping |= E1000_VT_MSGTYPE_CTS;
5024		igb_write_mbx(hw, &ping, 1, i);
5025	}
5026}
5027
5028static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5029{
5030	struct e1000_hw *hw = &adapter->hw;
5031	u32 vmolr = rd32(E1000_VMOLR(vf));
5032	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5033
5034	vf_data->flags &= ~(IGB_VF_FLAG_UNI_PROMISC |
5035	                    IGB_VF_FLAG_MULTI_PROMISC);
5036	vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
5037
5038	if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
5039		vmolr |= E1000_VMOLR_MPME;
5040		vf_data->flags |= IGB_VF_FLAG_MULTI_PROMISC;
5041		*msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
5042	} else {
5043		/*
5044		 * if we have hashes and we are clearing a multicast promisc
5045		 * flag we need to write the hashes to the MTA as this step
5046		 * was previously skipped
5047		 */
5048		if (vf_data->num_vf_mc_hashes > 30) {
5049			vmolr |= E1000_VMOLR_MPME;
5050		} else if (vf_data->num_vf_mc_hashes) {
5051			int j;
5052			vmolr |= E1000_VMOLR_ROMPE;
5053			for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
5054				igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
5055		}
5056	}
5057
5058	wr32(E1000_VMOLR(vf), vmolr);
5059
5060	/* there are flags left unprocessed, likely not supported */
5061	if (*msgbuf & E1000_VT_MSGINFO_MASK)
5062		return -EINVAL;
5063
5064	return 0;
5065
5066}
5067
5068static int igb_set_vf_multicasts(struct igb_adapter *adapter,
5069				  u32 *msgbuf, u32 vf)
5070{
5071	int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5072	u16 *hash_list = (u16 *)&msgbuf[1];
5073	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5074	int i;
5075
5076	/* salt away the number of multicast addresses assigned
5077	 * to this VF for later use to restore when the PF multi cast
5078	 * list changes
5079	 */
5080	vf_data->num_vf_mc_hashes = n;
5081
5082	/* only up to 30 hash values supported */
5083	if (n > 30)
5084		n = 30;
5085
5086	/* store the hashes for later use */
5087	for (i = 0; i < n; i++)
5088		vf_data->vf_mc_hashes[i] = hash_list[i];
5089
5090	/* Flush and reset the mta with the new values */
5091	igb_set_rx_mode(adapter->netdev);
5092
5093	return 0;
5094}
5095
5096static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
5097{
5098	struct e1000_hw *hw = &adapter->hw;
5099	struct vf_data_storage *vf_data;
5100	int i, j;
5101
5102	for (i = 0; i < adapter->vfs_allocated_count; i++) {
5103		u32 vmolr = rd32(E1000_VMOLR(i));
5104		vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
5105
5106		vf_data = &adapter->vf_data[i];
5107
5108		if ((vf_data->num_vf_mc_hashes > 30) ||
5109		    (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
5110			vmolr |= E1000_VMOLR_MPME;
5111		} else if (vf_data->num_vf_mc_hashes) {
5112			vmolr |= E1000_VMOLR_ROMPE;
5113			for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
5114				igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
5115		}
5116		wr32(E1000_VMOLR(i), vmolr);
5117	}
5118}
5119
5120static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
5121{
5122	struct e1000_hw *hw = &adapter->hw;
5123	u32 pool_mask, reg, vid;
5124	int i;
5125
5126	pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5127
5128	/* Find the vlan filter for this id */
5129	for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5130		reg = rd32(E1000_VLVF(i));
5131
5132		/* remove the vf from the pool */
5133		reg &= ~pool_mask;
5134
5135		/* if pool is empty then remove entry from vfta */
5136		if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
5137		    (reg & E1000_VLVF_VLANID_ENABLE)) {
5138			reg = 0;
5139			vid = reg & E1000_VLVF_VLANID_MASK;
5140			igb_vfta_set(hw, vid, false);
5141		}
5142
5143		wr32(E1000_VLVF(i), reg);
5144	}
5145
5146	adapter->vf_data[vf].vlans_enabled = 0;
5147}
5148
5149static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
5150{
5151	struct e1000_hw *hw = &adapter->hw;
5152	u32 reg, i;
5153
5154	/* The vlvf table only exists on 82576 hardware and newer */
5155	if (hw->mac.type < e1000_82576)
5156		return -1;
5157
5158	/* we only need to do this if VMDq is enabled */
5159	if (!adapter->vfs_allocated_count)
5160		return -1;
5161
5162	/* Find the vlan filter for this id */
5163	for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5164		reg = rd32(E1000_VLVF(i));
5165		if ((reg & E1000_VLVF_VLANID_ENABLE) &&
5166		    vid == (reg & E1000_VLVF_VLANID_MASK))
5167			break;
5168	}
5169
5170	if (add) {
5171		if (i == E1000_VLVF_ARRAY_SIZE) {
5172			/* Did not find a matching VLAN ID entry that was
5173			 * enabled.  Search for a free filter entry, i.e.
5174			 * one without the enable bit set
5175			 */
5176			for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5177				reg = rd32(E1000_VLVF(i));
5178				if (!(reg & E1000_VLVF_VLANID_ENABLE))
5179					break;
5180			}
5181		}
5182		if (i < E1000_VLVF_ARRAY_SIZE) {
5183			/* Found an enabled/available entry */
5184			reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5185
5186			/* if !enabled we need to set this up in vfta */
5187			if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
5188				/* add VID to filter table */
5189				igb_vfta_set(hw, vid, true);
5190				reg |= E1000_VLVF_VLANID_ENABLE;
5191			}
5192			reg &= ~E1000_VLVF_VLANID_MASK;
5193			reg |= vid;
5194			wr32(E1000_VLVF(i), reg);
5195
5196			/* do not modify RLPML for PF devices */
5197			if (vf >= adapter->vfs_allocated_count)
5198				return 0;
5199
5200			if (!adapter->vf_data[vf].vlans_enabled) {
5201				u32 size;
5202				reg = rd32(E1000_VMOLR(vf));
5203				size = reg & E1000_VMOLR_RLPML_MASK;
5204				size += 4;
5205				reg &= ~E1000_VMOLR_RLPML_MASK;
5206				reg |= size;
5207				wr32(E1000_VMOLR(vf), reg);
5208			}
5209
5210			adapter->vf_data[vf].vlans_enabled++;
5211		}
5212	} else {
5213		if (i < E1000_VLVF_ARRAY_SIZE) {
5214			/* remove vf from the pool */
5215			reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
5216			/* if pool is empty then remove entry from vfta */
5217			if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
5218				reg = 0;
5219				igb_vfta_set(hw, vid, false);
5220			}
5221			wr32(E1000_VLVF(i), reg);
5222
5223			/* do not modify RLPML for PF devices */
5224			if (vf >= adapter->vfs_allocated_count)
5225				return 0;
5226
5227			adapter->vf_data[vf].vlans_enabled--;
5228			if (!adapter->vf_data[vf].vlans_enabled) {
5229				u32 size;
5230				reg = rd32(E1000_VMOLR(vf));
5231				size = reg & E1000_VMOLR_RLPML_MASK;
5232				size -= 4;
5233				reg &= ~E1000_VMOLR_RLPML_MASK;
5234				reg |= size;
5235				wr32(E1000_VMOLR(vf), reg);
5236			}
5237		}
5238	}
5239	return 0;
5240}
5241
5242static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
5243{
5244	struct e1000_hw *hw = &adapter->hw;
5245
5246	if (vid)
5247		wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
5248	else
5249		wr32(E1000_VMVIR(vf), 0);
5250}
5251
5252static int igb_ndo_set_vf_vlan(struct net_device *netdev,
5253			       int vf, u16 vlan, u8 qos)
5254{
5255	int err = 0;
5256	struct igb_adapter *adapter = netdev_priv(netdev);
5257
5258	if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
5259		return -EINVAL;
5260	if (vlan || qos) {
5261		err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
5262		if (err)
5263			goto out;
5264		igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
5265		igb_set_vmolr(adapter, vf, !vlan);
5266		adapter->vf_data[vf].pf_vlan = vlan;
5267		adapter->vf_data[vf].pf_qos = qos;
5268		dev_info(&adapter->pdev->dev,
5269			 "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
5270		if (test_bit(__IGB_DOWN, &adapter->state)) {
5271			dev_warn(&adapter->pdev->dev,
5272				 "The VF VLAN has been set,"
5273				 " but the PF device is not up.\n");
5274			dev_warn(&adapter->pdev->dev,
5275				 "Bring the PF device up before"
5276				 " attempting to use the VF device.\n");
5277		}
5278	} else {
5279		igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
5280				   false, vf);
5281		igb_set_vmvir(adapter, vlan, vf);
5282		igb_set_vmolr(adapter, vf, true);
5283		adapter->vf_data[vf].pf_vlan = 0;
5284		adapter->vf_data[vf].pf_qos = 0;
5285       }
5286out:
5287       return err;
5288}
5289
5290static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5291{
5292	int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5293	int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
5294
5295	return igb_vlvf_set(adapter, vid, add, vf);
5296}
5297
5298static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
5299{
5300	/* clear flags - except flag that indicates PF has set the MAC */
5301	adapter->vf_data[vf].flags &= IGB_VF_FLAG_PF_SET_MAC;
5302	adapter->vf_data[vf].last_nack = jiffies;
5303
5304	/* reset offloads to defaults */
5305	igb_set_vmolr(adapter, vf, true);
5306
5307	/* reset vlans for device */
5308	igb_clear_vf_vfta(adapter, vf);
5309	if (adapter->vf_data[vf].pf_vlan)
5310		igb_ndo_set_vf_vlan(adapter->netdev, vf,
5311				    adapter->vf_data[vf].pf_vlan,
5312				    adapter->vf_data[vf].pf_qos);
5313	else
5314		igb_clear_vf_vfta(adapter, vf);
5315
5316	/* reset multicast table array for vf */
5317	adapter->vf_data[vf].num_vf_mc_hashes = 0;
5318
5319	/* Flush and reset the mta with the new values */
5320	igb_set_rx_mode(adapter->netdev);
5321}
5322
5323static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
5324{
5325	unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5326
5327	/* generate a new mac address as we were hotplug removed/added */
5328	if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
5329		random_ether_addr(vf_mac);
5330
5331	/* process remaining reset events */
5332	igb_vf_reset(adapter, vf);
5333}
5334
5335static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
5336{
5337	struct e1000_hw *hw = &adapter->hw;
5338	unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5339	int rar_entry = hw->mac.rar_entry_count - (vf + 1);
5340	u32 reg, msgbuf[3];
5341	u8 *addr = (u8 *)(&msgbuf[1]);
5342
5343	/* process all the same items cleared in a function level reset */
5344	igb_vf_reset(adapter, vf);
5345
5346	/* set vf mac address */
5347	igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
5348
5349	/* enable transmit and receive for vf */
5350	reg = rd32(E1000_VFTE);
5351	wr32(E1000_VFTE, reg | (1 << vf));
5352	reg = rd32(E1000_VFRE);
5353	wr32(E1000_VFRE, reg | (1 << vf));
5354
5355	adapter->vf_data[vf].flags |= IGB_VF_FLAG_CTS;
5356
5357	/* reply to reset with ack and vf mac address */
5358	msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
5359	memcpy(addr, vf_mac, 6);
5360	igb_write_mbx(hw, msgbuf, 3, vf);
5361}
5362
5363static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
5364{
5365	/*
5366	 * The VF MAC Address is stored in a packed array of bytes
5367	 * starting at the second 32 bit word of the msg array
5368	 */
5369	unsigned char *addr = (char *)&msg[1];
5370	int err = -1;
5371
5372	if (is_valid_ether_addr(addr))
5373		err = igb_set_vf_mac(adapter, vf, addr);
5374
5375	return err;
5376}
5377
5378static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
5379{
5380	struct e1000_hw *hw = &adapter->hw;
5381	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5382	u32 msg = E1000_VT_MSGTYPE_NACK;
5383
5384	/* if device isn't clear to send it shouldn't be reading either */
5385	if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
5386	    time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
5387		igb_write_mbx(hw, &msg, 1, vf);
5388		vf_data->last_nack = jiffies;
5389	}
5390}
5391
5392static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
5393{
5394	struct pci_dev *pdev = adapter->pdev;
5395	u32 msgbuf[E1000_VFMAILBOX_SIZE];
5396	struct e1000_hw *hw = &adapter->hw;
5397	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5398	s32 retval;
5399
5400	retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
5401
5402	if (retval) {
5403		/* if receive failed revoke VF CTS stats and restart init */
5404		dev_err(&pdev->dev, "Error receiving message from VF\n");
5405		vf_data->flags &= ~IGB_VF_FLAG_CTS;
5406		if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5407			return;
5408		goto out;
5409	}
5410
5411	/* this is a message we already processed, do nothing */
5412	if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
5413		return;
5414
5415	/*
5416	 * until the vf completes a reset it should not be
5417	 * allowed to start any configuration.
5418	 */
5419
5420	if (msgbuf[0] == E1000_VF_RESET) {
5421		igb_vf_reset_msg(adapter, vf);
5422		return;
5423	}
5424
5425	if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
5426		if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5427			return;
5428		retval = -1;
5429		goto out;
5430	}
5431
5432	switch ((msgbuf[0] & 0xFFFF)) {
5433	case E1000_VF_SET_MAC_ADDR:
5434		retval = -EINVAL;
5435		if (!(vf_data->flags & IGB_VF_FLAG_PF_SET_MAC))
5436			retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
5437		else
5438			dev_warn(&pdev->dev,
5439				 "VF %d attempted to override administratively "
5440				 "set MAC address\nReload the VF driver to "
5441				 "resume operations\n", vf);
5442		break;
5443	case E1000_VF_SET_PROMISC:
5444		retval = igb_set_vf_promisc(adapter, msgbuf, vf);
5445		break;
5446	case E1000_VF_SET_MULTICAST:
5447		retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
5448		break;
5449	case E1000_VF_SET_LPE:
5450		retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
5451		break;
5452	case E1000_VF_SET_VLAN:
5453		retval = -1;
5454		if (vf_data->pf_vlan)
5455			dev_warn(&pdev->dev,
5456				 "VF %d attempted to override administratively "
5457				 "set VLAN tag\nReload the VF driver to "
5458				 "resume operations\n", vf);
5459		else
5460			retval = igb_set_vf_vlan(adapter, msgbuf, vf);
5461		break;
5462	default:
5463		dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
5464		retval = -1;
5465		break;
5466	}
5467
5468	msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
5469out:
5470	/* notify the VF of the results of what it sent us */
5471	if (retval)
5472		msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
5473	else
5474		msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
5475
5476	igb_write_mbx(hw, msgbuf, 1, vf);
5477}
5478
5479static void igb_msg_task(struct igb_adapter *adapter)
5480{
5481	struct e1000_hw *hw = &adapter->hw;
5482	u32 vf;
5483
5484	for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
5485		/* process any reset requests */
5486		if (!igb_check_for_rst(hw, vf))
5487			igb_vf_reset_event(adapter, vf);
5488
5489		/* process any messages pending */
5490		if (!igb_check_for_msg(hw, vf))
5491			igb_rcv_msg_from_vf(adapter, vf);
5492
5493		/* process any acks */
5494		if (!igb_check_for_ack(hw, vf))
5495			igb_rcv_ack_from_vf(adapter, vf);
5496	}
5497}
5498
5499/**
5500 *  igb_set_uta - Set unicast filter table address
5501 *  @adapter: board private structure
5502 *
5503 *  The unicast table address is a register array of 32-bit registers.
5504 *  The table is meant to be used in a way similar to how the MTA is used
5505 *  however due to certain limitations in the hardware it is necessary to
5506 *  set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscuous
5507 *  enable bit to allow vlan tag stripping when promiscuous mode is enabled
5508 **/
5509static void igb_set_uta(struct igb_adapter *adapter)
5510{
5511	struct e1000_hw *hw = &adapter->hw;
5512	int i;
5513
5514	/* The UTA table only exists on 82576 hardware and newer */
5515	if (hw->mac.type < e1000_82576)
5516		return;
5517
5518	/* we only need to do this if VMDq is enabled */
5519	if (!adapter->vfs_allocated_count)
5520		return;
5521
5522	for (i = 0; i < hw->mac.uta_reg_count; i++)
5523		array_wr32(E1000_UTA, i, ~0);
5524}
5525
5526/**
5527 * igb_intr_msi - Interrupt Handler
5528 * @irq: interrupt number
5529 * @data: pointer to a network interface device structure
5530 **/
5531static irqreturn_t igb_intr_msi(int irq, void *data)
5532{
5533	struct igb_adapter *adapter = data;
5534	struct igb_q_vector *q_vector = adapter->q_vector[0];
5535	struct e1000_hw *hw = &adapter->hw;
5536	/* read ICR disables interrupts using IAM */
5537	u32 icr = rd32(E1000_ICR);
5538
5539	igb_write_itr(q_vector);
5540
5541	if (icr & E1000_ICR_DRSTA)
5542		schedule_work(&adapter->reset_task);
5543
5544	if (icr & E1000_ICR_DOUTSYNC) {
5545		/* HW is reporting DMA is out of sync */
5546		adapter->stats.doosync++;
5547	}
5548
5549	if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5550		hw->mac.get_link_status = 1;
5551		if (!test_bit(__IGB_DOWN, &adapter->state))
5552			mod_timer(&adapter->watchdog_timer, jiffies + 1);
5553	}
5554
5555	napi_schedule(&q_vector->napi);
5556
5557	return IRQ_HANDLED;
5558}
5559
5560/**
5561 * igb_intr - Legacy Interrupt Handler
5562 * @irq: interrupt number
5563 * @data: pointer to a network interface device structure
5564 **/
5565static irqreturn_t igb_intr(int irq, void *data)
5566{
5567	struct igb_adapter *adapter = data;
5568	struct igb_q_vector *q_vector = adapter->q_vector[0];
5569	struct e1000_hw *hw = &adapter->hw;
5570	/* Interrupt Auto-Mask...upon reading ICR, interrupts are masked.  No
5571	 * need for the IMC write */
5572	u32 icr = rd32(E1000_ICR);
5573
5574	/* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
5575	 * not set, then the adapter didn't send an interrupt */
5576	if (!(icr & E1000_ICR_INT_ASSERTED))
5577		return IRQ_NONE;
5578
5579	igb_write_itr(q_vector);
5580
5581	if (icr & E1000_ICR_DRSTA)
5582		schedule_work(&adapter->reset_task);
5583
5584	if (icr & E1000_ICR_DOUTSYNC) {
5585		/* HW is reporting DMA is out of sync */
5586		adapter->stats.doosync++;
5587	}
5588
5589	if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5590		hw->mac.get_link_status = 1;
5591		/* guard against interrupt when we're going down */
5592		if (!test_bit(__IGB_DOWN, &adapter->state))
5593			mod_timer(&adapter->watchdog_timer, jiffies + 1);
5594	}
5595
5596	napi_schedule(&q_vector->napi);
5597
5598	return IRQ_HANDLED;
5599}
5600
5601static void igb_ring_irq_enable(struct igb_q_vector *q_vector)
5602{
5603	struct igb_adapter *adapter = q_vector->adapter;
5604	struct e1000_hw *hw = &adapter->hw;
5605
5606	if ((q_vector->rx.ring && (adapter->rx_itr_setting & 3)) ||
5607	    (!q_vector->rx.ring && (adapter->tx_itr_setting & 3))) {
5608		if ((adapter->num_q_vectors == 1) && !adapter->vf_data)
5609			igb_set_itr(q_vector);
5610		else
5611			igb_update_ring_itr(q_vector);
5612	}
5613
5614	if (!test_bit(__IGB_DOWN, &adapter->state)) {
5615		if (adapter->msix_entries)
5616			wr32(E1000_EIMS, q_vector->eims_value);
5617		else
5618			igb_irq_enable(adapter);
5619	}
5620}
5621
5622/**
5623 * igb_poll - NAPI Rx polling callback
5624 * @napi: napi polling structure
5625 * @budget: count of how many packets we should handle
5626 **/
5627static int igb_poll(struct napi_struct *napi, int budget)
5628{
5629	struct igb_q_vector *q_vector = container_of(napi,
5630	                                             struct igb_q_vector,
5631	                                             napi);
5632	bool clean_complete = true;
5633
5634#ifdef CONFIG_IGB_DCA
5635	if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
5636		igb_update_dca(q_vector);
5637#endif
5638	if (q_vector->tx.ring)
5639		clean_complete = igb_clean_tx_irq(q_vector);
5640
5641	if (q_vector->rx.ring)
5642		clean_complete &= igb_clean_rx_irq(q_vector, budget);
5643
5644	/* If all work not completed, return budget and keep polling */
5645	if (!clean_complete)
5646		return budget;
5647
5648	/* If not enough Rx work done, exit the polling mode */
5649	napi_complete(napi);
5650	igb_ring_irq_enable(q_vector);
5651
5652	return 0;
5653}
5654
5655#ifdef CONFIG_IGB_PTP
5656/**
5657 * igb_tx_hwtstamp - utility function which checks for TX time stamp
5658 * @q_vector: pointer to q_vector containing needed info
5659 * @buffer: pointer to igb_tx_buffer structure
5660 *
5661 * If we were asked to do hardware stamping and such a time stamp is
5662 * available, then it must have been for this skb here because we only
5663 * allow only one such packet into the queue.
5664 */
5665static void igb_tx_hwtstamp(struct igb_q_vector *q_vector,
5666			    struct igb_tx_buffer *buffer_info)
5667{
5668	struct igb_adapter *adapter = q_vector->adapter;
5669	struct e1000_hw *hw = &adapter->hw;
5670	struct skb_shared_hwtstamps shhwtstamps;
5671	u64 regval;
5672
5673	/* if skb does not support hw timestamp or TX stamp not valid exit */
5674	if (likely(!(buffer_info->tx_flags & IGB_TX_FLAGS_TSTAMP)) ||
5675	    !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
5676		return;
5677
5678	regval = rd32(E1000_TXSTMPL);
5679	regval |= (u64)rd32(E1000_TXSTMPH) << 32;
5680
5681	igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
5682	skb_tstamp_tx(buffer_info->skb, &shhwtstamps);
5683}
5684
5685#endif
5686/**
5687 * igb_clean_tx_irq - Reclaim resources after transmit completes
5688 * @q_vector: pointer to q_vector containing needed info
5689 * returns true if ring is completely cleaned
5690 **/
5691static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
5692{
5693	struct igb_adapter *adapter = q_vector->adapter;
5694	struct igb_ring *tx_ring = q_vector->tx.ring;
5695	struct igb_tx_buffer *tx_buffer;
5696	union e1000_adv_tx_desc *tx_desc, *eop_desc;
5697	unsigned int total_bytes = 0, total_packets = 0;
5698	unsigned int budget = q_vector->tx.work_limit;
5699	unsigned int i = tx_ring->next_to_clean;
5700
5701	if (test_bit(__IGB_DOWN, &adapter->state))
5702		return true;
5703
5704	tx_buffer = &tx_ring->tx_buffer_info[i];
5705	tx_desc = IGB_TX_DESC(tx_ring, i);
5706	i -= tx_ring->count;
5707
5708	for (; budget; budget--) {
5709		eop_desc = tx_buffer->next_to_watch;
5710
5711		/* prevent any other reads prior to eop_desc */
5712		rmb();
5713
5714		/* if next_to_watch is not set then there is no work pending */
5715		if (!eop_desc)
5716			break;
5717
5718		/* if DD is not set pending work has not been completed */
5719		if (!(eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)))
5720			break;
5721
5722		/* clear next_to_watch to prevent false hangs */
5723		tx_buffer->next_to_watch = NULL;
5724
5725		/* update the statistics for this packet */
5726		total_bytes += tx_buffer->bytecount;
5727		total_packets += tx_buffer->gso_segs;
5728
5729#ifdef CONFIG_IGB_PTP
5730		/* retrieve hardware timestamp */
5731		igb_tx_hwtstamp(q_vector, tx_buffer);
5732
5733#endif
5734		/* free the skb */
5735		dev_kfree_skb_any(tx_buffer->skb);
5736		tx_buffer->skb = NULL;
5737
5738		/* unmap skb header data */
5739		dma_unmap_single(tx_ring->dev,
5740				 tx_buffer->dma,
5741				 tx_buffer->length,
5742				 DMA_TO_DEVICE);
5743
5744		/* clear last DMA location and unmap remaining buffers */
5745		while (tx_desc != eop_desc) {
5746			tx_buffer->dma = 0;
5747
5748			tx_buffer++;
5749			tx_desc++;
5750			i++;
5751			if (unlikely(!i)) {
5752				i -= tx_ring->count;
5753				tx_buffer = tx_ring->tx_buffer_info;
5754				tx_desc = IGB_TX_DESC(tx_ring, 0);
5755			}
5756
5757			/* unmap any remaining paged data */
5758			if (tx_buffer->dma) {
5759				dma_unmap_page(tx_ring->dev,
5760					       tx_buffer->dma,
5761					       tx_buffer->length,
5762					       DMA_TO_DEVICE);
5763			}
5764		}
5765
5766		/* clear last DMA location */
5767		tx_buffer->dma = 0;
5768
5769		/* move us one more past the eop_desc for start of next pkt */
5770		tx_buffer++;
5771		tx_desc++;
5772		i++;
5773		if (unlikely(!i)) {
5774			i -= tx_ring->count;
5775			tx_buffer = tx_ring->tx_buffer_info;
5776			tx_desc = IGB_TX_DESC(tx_ring, 0);
5777		}
5778	}
5779
5780	netdev_tx_completed_queue(txring_txq(tx_ring),
5781				  total_packets, total_bytes);
5782	i += tx_ring->count;
5783	tx_ring->next_to_clean = i;
5784	u64_stats_update_begin(&tx_ring->tx_syncp);
5785	tx_ring->tx_stats.bytes += total_bytes;
5786	tx_ring->tx_stats.packets += total_packets;
5787	u64_stats_update_end(&tx_ring->tx_syncp);
5788	q_vector->tx.total_bytes += total_bytes;
5789	q_vector->tx.total_packets += total_packets;
5790
5791	if (test_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags)) {
5792		struct e1000_hw *hw = &adapter->hw;
5793
5794		eop_desc = tx_buffer->next_to_watch;
5795
5796		/* Detect a transmit hang in hardware, this serializes the
5797		 * check with the clearing of time_stamp and movement of i */
5798		clear_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
5799		if (eop_desc &&
5800		    time_after(jiffies, tx_buffer->time_stamp +
5801			       (adapter->tx_timeout_factor * HZ)) &&
5802		    !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
5803
5804			/* detected Tx unit hang */
5805			dev_err(tx_ring->dev,
5806				"Detected Tx Unit Hang\n"
5807				"  Tx Queue             <%d>\n"
5808				"  TDH                  <%x>\n"
5809				"  TDT                  <%x>\n"
5810				"  next_to_use          <%x>\n"
5811				"  next_to_clean        <%x>\n"
5812				"buffer_info[next_to_clean]\n"
5813				"  time_stamp           <%lx>\n"
5814				"  next_to_watch        <%p>\n"
5815				"  jiffies              <%lx>\n"
5816				"  desc.status          <%x>\n",
5817				tx_ring->queue_index,
5818				rd32(E1000_TDH(tx_ring->reg_idx)),
5819				readl(tx_ring->tail),
5820				tx_ring->next_to_use,
5821				tx_ring->next_to_clean,
5822				tx_buffer->time_stamp,
5823				eop_desc,
5824				jiffies,
5825				eop_desc->wb.status);
5826			netif_stop_subqueue(tx_ring->netdev,
5827					    tx_ring->queue_index);
5828
5829			/* we are about to reset, no point in enabling stuff */
5830			return true;
5831		}
5832	}
5833
5834	if (unlikely(total_packets &&
5835		     netif_carrier_ok(tx_ring->netdev) &&
5836		     igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
5837		/* Make sure that anybody stopping the queue after this
5838		 * sees the new next_to_clean.
5839		 */
5840		smp_mb();
5841		if (__netif_subqueue_stopped(tx_ring->netdev,
5842					     tx_ring->queue_index) &&
5843		    !(test_bit(__IGB_DOWN, &adapter->state))) {
5844			netif_wake_subqueue(tx_ring->netdev,
5845					    tx_ring->queue_index);
5846
5847			u64_stats_update_begin(&tx_ring->tx_syncp);
5848			tx_ring->tx_stats.restart_queue++;
5849			u64_stats_update_end(&tx_ring->tx_syncp);
5850		}
5851	}
5852
5853	return !!budget;
5854}
5855
5856static inline void igb_rx_checksum(struct igb_ring *ring,
5857				   union e1000_adv_rx_desc *rx_desc,
5858				   struct sk_buff *skb)
5859{
5860	skb_checksum_none_assert(skb);
5861
5862	/* Ignore Checksum bit is set */
5863	if (igb_test_staterr(rx_desc, E1000_RXD_STAT_IXSM))
5864		return;
5865
5866	/* Rx checksum disabled via ethtool */
5867	if (!(ring->netdev->features & NETIF_F_RXCSUM))
5868		return;
5869
5870	/* TCP/UDP checksum error bit is set */
5871	if (igb_test_staterr(rx_desc,
5872			     E1000_RXDEXT_STATERR_TCPE |
5873			     E1000_RXDEXT_STATERR_IPE)) {
5874		/*
5875		 * work around errata with sctp packets where the TCPE aka
5876		 * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
5877		 * packets, (aka let the stack check the crc32c)
5878		 */
5879		if (!((skb->len == 60) &&
5880		      test_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags))) {
5881			u64_stats_update_begin(&ring->rx_syncp);
5882			ring->rx_stats.csum_err++;
5883			u64_stats_update_end(&ring->rx_syncp);
5884		}
5885		/* let the stack verify checksum errors */
5886		return;
5887	}
5888	/* It must be a TCP or UDP packet with a valid checksum */
5889	if (igb_test_staterr(rx_desc, E1000_RXD_STAT_TCPCS |
5890				      E1000_RXD_STAT_UDPCS))
5891		skb->ip_summed = CHECKSUM_UNNECESSARY;
5892
5893	dev_dbg(ring->dev, "cksum success: bits %08X\n",
5894		le32_to_cpu(rx_desc->wb.upper.status_error));
5895}
5896
5897static inline void igb_rx_hash(struct igb_ring *ring,
5898			       union e1000_adv_rx_desc *rx_desc,
5899			       struct sk_buff *skb)
5900{
5901	if (ring->netdev->features & NETIF_F_RXHASH)
5902		skb->rxhash = le32_to_cpu(rx_desc->wb.lower.hi_dword.rss);
5903}
5904
5905#ifdef CONFIG_IGB_PTP
5906static void igb_rx_hwtstamp(struct igb_q_vector *q_vector,
5907			    union e1000_adv_rx_desc *rx_desc,
5908			    struct sk_buff *skb)
5909{
5910	struct igb_adapter *adapter = q_vector->adapter;
5911	struct e1000_hw *hw = &adapter->hw;
5912	u64 regval;
5913
5914	if (!igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP |
5915				       E1000_RXDADV_STAT_TS))
5916		return;
5917
5918	/*
5919	 * If this bit is set, then the RX registers contain the time stamp. No
5920	 * other packet will be time stamped until we read these registers, so
5921	 * read the registers to make them available again. Because only one
5922	 * packet can be time stamped at a time, we know that the register
5923	 * values must belong to this one here and therefore we don't need to
5924	 * compare any of the additional attributes stored for it.
5925	 *
5926	 * If nothing went wrong, then it should have a shared tx_flags that we
5927	 * can turn into a skb_shared_hwtstamps.
5928	 */
5929	if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) {
5930		u32 *stamp = (u32 *)skb->data;
5931		regval = le32_to_cpu(*(stamp + 2));
5932		regval |= (u64)le32_to_cpu(*(stamp + 3)) << 32;
5933		skb_pull(skb, IGB_TS_HDR_LEN);
5934	} else {
5935		if(!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
5936			return;
5937
5938		regval = rd32(E1000_RXSTMPL);
5939		regval |= (u64)rd32(E1000_RXSTMPH) << 32;
5940	}
5941
5942	igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
5943}
5944
5945#endif
5946static void igb_rx_vlan(struct igb_ring *ring,
5947			union e1000_adv_rx_desc *rx_desc,
5948			struct sk_buff *skb)
5949{
5950	if (igb_test_staterr(rx_desc, E1000_RXD_STAT_VP)) {
5951		u16 vid;
5952		if (igb_test_staterr(rx_desc, E1000_RXDEXT_STATERR_LB) &&
5953		    test_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &ring->flags))
5954			vid = be16_to_cpu(rx_desc->wb.upper.vlan);
5955		else
5956			vid = le16_to_cpu(rx_desc->wb.upper.vlan);
5957
5958		__vlan_hwaccel_put_tag(skb, vid);
5959	}
5960}
5961
5962static inline u16 igb_get_hlen(union e1000_adv_rx_desc *rx_desc)
5963{
5964	/* HW will not DMA in data larger than the given buffer, even if it
5965	 * parses the (NFS, of course) header to be larger.  In that case, it
5966	 * fills the header buffer and spills the rest into the page.
5967	 */
5968	u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
5969	           E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
5970	if (hlen > IGB_RX_HDR_LEN)
5971		hlen = IGB_RX_HDR_LEN;
5972	return hlen;
5973}
5974
5975static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, int budget)
5976{
5977	struct igb_ring *rx_ring = q_vector->rx.ring;
5978	union e1000_adv_rx_desc *rx_desc;
5979	const int current_node = numa_node_id();
5980	unsigned int total_bytes = 0, total_packets = 0;
5981	u16 cleaned_count = igb_desc_unused(rx_ring);
5982	u16 i = rx_ring->next_to_clean;
5983
5984	rx_desc = IGB_RX_DESC(rx_ring, i);
5985
5986	while (igb_test_staterr(rx_desc, E1000_RXD_STAT_DD)) {
5987		struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
5988		struct sk_buff *skb = buffer_info->skb;
5989		union e1000_adv_rx_desc *next_rxd;
5990
5991		buffer_info->skb = NULL;
5992		prefetch(skb->data);
5993
5994		i++;
5995		if (i == rx_ring->count)
5996			i = 0;
5997
5998		next_rxd = IGB_RX_DESC(rx_ring, i);
5999		prefetch(next_rxd);
6000
6001		/*
6002		 * This memory barrier is needed to keep us from reading
6003		 * any other fields out of the rx_desc until we know the
6004		 * RXD_STAT_DD bit is set
6005		 */
6006		rmb();
6007
6008		if (!skb_is_nonlinear(skb)) {
6009			__skb_put(skb, igb_get_hlen(rx_desc));
6010			dma_unmap_single(rx_ring->dev, buffer_info->dma,
6011					 IGB_RX_HDR_LEN,
6012					 DMA_FROM_DEVICE);
6013			buffer_info->dma = 0;
6014		}
6015
6016		if (rx_desc->wb.upper.length) {
6017			u16 length = le16_to_cpu(rx_desc->wb.upper.length);
6018
6019			skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
6020						buffer_info->page,
6021						buffer_info->page_offset,
6022						length);
6023
6024			skb->len += length;
6025			skb->data_len += length;
6026			skb->truesize += PAGE_SIZE / 2;
6027
6028			if ((page_count(buffer_info->page) != 1) ||
6029			    (page_to_nid(buffer_info->page) != current_node))
6030				buffer_info->page = NULL;
6031			else
6032				get_page(buffer_info->page);
6033
6034			dma_unmap_page(rx_ring->dev, buffer_info->page_dma,
6035				       PAGE_SIZE / 2, DMA_FROM_DEVICE);
6036			buffer_info->page_dma = 0;
6037		}
6038
6039		if (!igb_test_staterr(rx_desc, E1000_RXD_STAT_EOP)) {
6040			struct igb_rx_buffer *next_buffer;
6041			next_buffer = &rx_ring->rx_buffer_info[i];
6042			buffer_info->skb = next_buffer->skb;
6043			buffer_info->dma = next_buffer->dma;
6044			next_buffer->skb = skb;
6045			next_buffer->dma = 0;
6046			goto next_desc;
6047		}
6048
6049		if (unlikely((igb_test_staterr(rx_desc,
6050					       E1000_RXDEXT_ERR_FRAME_ERR_MASK))
6051			     && !(rx_ring->netdev->features & NETIF_F_RXALL))) {
6052			dev_kfree_skb_any(skb);
6053			goto next_desc;
6054		}
6055
6056#ifdef CONFIG_IGB_PTP
6057		igb_rx_hwtstamp(q_vector, rx_desc, skb);
6058#endif
6059		igb_rx_hash(rx_ring, rx_desc, skb);
6060		igb_rx_checksum(rx_ring, rx_desc, skb);
6061		igb_rx_vlan(rx_ring, rx_desc, skb);
6062
6063		total_bytes += skb->len;
6064		total_packets++;
6065
6066		skb->protocol = eth_type_trans(skb, rx_ring->netdev);
6067
6068		napi_gro_receive(&q_vector->napi, skb);
6069
6070		budget--;
6071next_desc:
6072		if (!budget)
6073			break;
6074
6075		cleaned_count++;
6076		/* return some buffers to hardware, one at a time is too slow */
6077		if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
6078			igb_alloc_rx_buffers(rx_ring, cleaned_count);
6079			cleaned_count = 0;
6080		}
6081
6082		/* use prefetched values */
6083		rx_desc = next_rxd;
6084	}
6085
6086	rx_ring->next_to_clean = i;
6087	u64_stats_update_begin(&rx_ring->rx_syncp);
6088	rx_ring->rx_stats.packets += total_packets;
6089	rx_ring->rx_stats.bytes += total_bytes;
6090	u64_stats_update_end(&rx_ring->rx_syncp);
6091	q_vector->rx.total_packets += total_packets;
6092	q_vector->rx.total_bytes += total_bytes;
6093
6094	if (cleaned_count)
6095		igb_alloc_rx_buffers(rx_ring, cleaned_count);
6096
6097	return !!budget;
6098}
6099
6100static bool igb_alloc_mapped_skb(struct igb_ring *rx_ring,
6101				 struct igb_rx_buffer *bi)
6102{
6103	struct sk_buff *skb = bi->skb;
6104	dma_addr_t dma = bi->dma;
6105
6106	if (dma)
6107		return true;
6108
6109	if (likely(!skb)) {
6110		skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
6111						IGB_RX_HDR_LEN);
6112		bi->skb = skb;
6113		if (!skb) {
6114			rx_ring->rx_stats.alloc_failed++;
6115			return false;
6116		}
6117
6118		/* initialize skb for ring */
6119		skb_record_rx_queue(skb, rx_ring->queue_index);
6120	}
6121
6122	dma = dma_map_single(rx_ring->dev, skb->data,
6123			     IGB_RX_HDR_LEN, DMA_FROM_DEVICE);
6124
6125	if (dma_mapping_error(rx_ring->dev, dma)) {
6126		rx_ring->rx_stats.alloc_failed++;
6127		return false;
6128	}
6129
6130	bi->dma = dma;
6131	return true;
6132}
6133
6134static bool igb_alloc_mapped_page(struct igb_ring *rx_ring,
6135				  struct igb_rx_buffer *bi)
6136{
6137	struct page *page = bi->page;
6138	dma_addr_t page_dma = bi->page_dma;
6139	unsigned int page_offset = bi->page_offset ^ (PAGE_SIZE / 2);
6140
6141	if (page_dma)
6142		return true;
6143
6144	if (!page) {
6145		page = alloc_page(GFP_ATOMIC | __GFP_COLD);
6146		bi->page = page;
6147		if (unlikely(!page)) {
6148			rx_ring->rx_stats.alloc_failed++;
6149			return false;
6150		}
6151	}
6152
6153	page_dma = dma_map_page(rx_ring->dev, page,
6154				page_offset, PAGE_SIZE / 2,
6155				DMA_FROM_DEVICE);
6156
6157	if (dma_mapping_error(rx_ring->dev, page_dma)) {
6158		rx_ring->rx_stats.alloc_failed++;
6159		return false;
6160	}
6161
6162	bi->page_dma = page_dma;
6163	bi->page_offset = page_offset;
6164	return true;
6165}
6166
6167/**
6168 * igb_alloc_rx_buffers - Replace used receive buffers; packet split
6169 * @adapter: address of board private structure
6170 **/
6171void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count)
6172{
6173	union e1000_adv_rx_desc *rx_desc;
6174	struct igb_rx_buffer *bi;
6175	u16 i = rx_ring->next_to_use;
6176
6177	rx_desc = IGB_RX_DESC(rx_ring, i);
6178	bi = &rx_ring->rx_buffer_info[i];
6179	i -= rx_ring->count;
6180
6181	while (cleaned_count--) {
6182		if (!igb_alloc_mapped_skb(rx_ring, bi))
6183			break;
6184
6185		/* Refresh the desc even if buffer_addrs didn't change
6186		 * because each write-back erases this info. */
6187		rx_desc->read.hdr_addr = cpu_to_le64(bi->dma);
6188
6189		if (!igb_alloc_mapped_page(rx_ring, bi))
6190			break;
6191
6192		rx_desc->read.pkt_addr = cpu_to_le64(bi->page_dma);
6193
6194		rx_desc++;
6195		bi++;
6196		i++;
6197		if (unlikely(!i)) {
6198			rx_desc = IGB_RX_DESC(rx_ring, 0);
6199			bi = rx_ring->rx_buffer_info;
6200			i -= rx_ring->count;
6201		}
6202
6203		/* clear the hdr_addr for the next_to_use descriptor */
6204		rx_desc->read.hdr_addr = 0;
6205	}
6206
6207	i += rx_ring->count;
6208
6209	if (rx_ring->next_to_use != i) {
6210		rx_ring->next_to_use = i;
6211
6212		/* Force memory writes to complete before letting h/w
6213		 * know there are new descriptors to fetch.  (Only
6214		 * applicable for weak-ordered memory model archs,
6215		 * such as IA-64). */
6216		wmb();
6217		writel(i, rx_ring->tail);
6218	}
6219}
6220
6221/**
6222 * igb_mii_ioctl -
6223 * @netdev:
6224 * @ifreq:
6225 * @cmd:
6226 **/
6227static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6228{
6229	struct igb_adapter *adapter = netdev_priv(netdev);
6230	struct mii_ioctl_data *data = if_mii(ifr);
6231
6232	if (adapter->hw.phy.media_type != e1000_media_type_copper)
6233		return -EOPNOTSUPP;
6234
6235	switch (cmd) {
6236	case SIOCGMIIPHY:
6237		data->phy_id = adapter->hw.phy.addr;
6238		break;
6239	case SIOCGMIIREG:
6240		if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
6241		                     &data->val_out))
6242			return -EIO;
6243		break;
6244	case SIOCSMIIREG:
6245	default:
6246		return -EOPNOTSUPP;
6247	}
6248	return 0;
6249}
6250
6251/**
6252 * igb_hwtstamp_ioctl - control hardware time stamping
6253 * @netdev:
6254 * @ifreq:
6255 * @cmd:
6256 *
6257 * Outgoing time stamping can be enabled and disabled. Play nice and
6258 * disable it when requested, although it shouldn't case any overhead
6259 * when no packet needs it. At most one packet in the queue may be
6260 * marked for time stamping, otherwise it would be impossible to tell
6261 * for sure to which packet the hardware time stamp belongs.
6262 *
6263 * Incoming time stamping has to be configured via the hardware
6264 * filters. Not all combinations are supported, in particular event
6265 * type has to be specified. Matching the kind of event packet is
6266 * not supported, with the exception of "all V2 events regardless of
6267 * level 2 or 4".
6268 *
6269 **/
6270static int igb_hwtstamp_ioctl(struct net_device *netdev,
6271			      struct ifreq *ifr, int cmd)
6272{
6273	struct igb_adapter *adapter = netdev_priv(netdev);
6274	struct e1000_hw *hw = &adapter->hw;
6275	struct hwtstamp_config config;
6276	u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
6277	u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6278	u32 tsync_rx_cfg = 0;
6279	bool is_l4 = false;
6280	bool is_l2 = false;
6281	u32 regval;
6282
6283	if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
6284		return -EFAULT;
6285
6286	/* reserved for future extensions */
6287	if (config.flags)
6288		return -EINVAL;
6289
6290	switch (config.tx_type) {
6291	case HWTSTAMP_TX_OFF:
6292		tsync_tx_ctl = 0;
6293	case HWTSTAMP_TX_ON:
6294		break;
6295	default:
6296		return -ERANGE;
6297	}
6298
6299	switch (config.rx_filter) {
6300	case HWTSTAMP_FILTER_NONE:
6301		tsync_rx_ctl = 0;
6302		break;
6303	case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
6304	case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
6305	case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
6306	case HWTSTAMP_FILTER_ALL:
6307		/*
6308		 * register TSYNCRXCFG must be set, therefore it is not
6309		 * possible to time stamp both Sync and Delay_Req messages
6310		 * => fall back to time stamping all packets
6311		 */
6312		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6313		config.rx_filter = HWTSTAMP_FILTER_ALL;
6314		break;
6315	case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
6316		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6317		tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
6318		is_l4 = true;
6319		break;
6320	case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
6321		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6322		tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
6323		is_l4 = true;
6324		break;
6325	case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
6326	case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
6327		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6328		tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
6329		is_l2 = true;
6330		is_l4 = true;
6331		config.rx_filter = HWTSTAMP_FILTER_SOME;
6332		break;
6333	case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
6334	case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
6335		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6336		tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
6337		is_l2 = true;
6338		is_l4 = true;
6339		config.rx_filter = HWTSTAMP_FILTER_SOME;
6340		break;
6341	case HWTSTAMP_FILTER_PTP_V2_EVENT:
6342	case HWTSTAMP_FILTER_PTP_V2_SYNC:
6343	case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
6344		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
6345		config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
6346		is_l2 = true;
6347		is_l4 = true;
6348		break;
6349	default:
6350		return -ERANGE;
6351	}
6352
6353	if (hw->mac.type == e1000_82575) {
6354		if (tsync_rx_ctl | tsync_tx_ctl)
6355			return -EINVAL;
6356		return 0;
6357	}
6358
6359	/*
6360	 * Per-packet timestamping only works if all packets are
6361	 * timestamped, so enable timestamping in all packets as
6362	 * long as one rx filter was configured.
6363	 */
6364	if ((hw->mac.type >= e1000_82580) && tsync_rx_ctl) {
6365		tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6366		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6367	}
6368
6369	/* enable/disable TX */
6370	regval = rd32(E1000_TSYNCTXCTL);
6371	regval &= ~E1000_TSYNCTXCTL_ENABLED;
6372	regval |= tsync_tx_ctl;
6373	wr32(E1000_TSYNCTXCTL, regval);
6374
6375	/* enable/disable RX */
6376	regval = rd32(E1000_TSYNCRXCTL);
6377	regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
6378	regval |= tsync_rx_ctl;
6379	wr32(E1000_TSYNCRXCTL, regval);
6380
6381	/* define which PTP packets are time stamped */
6382	wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
6383
6384	/* define ethertype filter for timestamped packets */
6385	if (is_l2)
6386		wr32(E1000_ETQF(3),
6387		                (E1000_ETQF_FILTER_ENABLE | /* enable filter */
6388		                 E1000_ETQF_1588 | /* enable timestamping */
6389		                 ETH_P_1588));     /* 1588 eth protocol type */
6390	else
6391		wr32(E1000_ETQF(3), 0);
6392
6393#define PTP_PORT 319
6394	/* L4 Queue Filter[3]: filter by destination port and protocol */
6395	if (is_l4) {
6396		u32 ftqf = (IPPROTO_UDP /* UDP */
6397			| E1000_FTQF_VF_BP /* VF not compared */
6398			| E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
6399			| E1000_FTQF_MASK); /* mask all inputs */
6400		ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
6401
6402		wr32(E1000_IMIR(3), htons(PTP_PORT));
6403		wr32(E1000_IMIREXT(3),
6404		     (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
6405		if (hw->mac.type == e1000_82576) {
6406			/* enable source port check */
6407			wr32(E1000_SPQF(3), htons(PTP_PORT));
6408			ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
6409		}
6410		wr32(E1000_FTQF(3), ftqf);
6411	} else {
6412		wr32(E1000_FTQF(3), E1000_FTQF_MASK);
6413	}
6414	wrfl();
6415
6416	adapter->hwtstamp_config = config;
6417
6418	/* clear TX/RX time stamp registers, just to be sure */
6419	regval = rd32(E1000_TXSTMPH);
6420	regval = rd32(E1000_RXSTMPH);
6421
6422	return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
6423		-EFAULT : 0;
6424}
6425
6426/**
6427 * igb_ioctl -
6428 * @netdev:
6429 * @ifreq:
6430 * @cmd:
6431 **/
6432static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6433{
6434	switch (cmd) {
6435	case SIOCGMIIPHY:
6436	case SIOCGMIIREG:
6437	case SIOCSMIIREG:
6438		return igb_mii_ioctl(netdev, ifr, cmd);
6439	case SIOCSHWTSTAMP:
6440		return igb_hwtstamp_ioctl(netdev, ifr, cmd);
6441	default:
6442		return -EOPNOTSUPP;
6443	}
6444}
6445
6446s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6447{
6448	struct igb_adapter *adapter = hw->back;
6449	u16 cap_offset;
6450
6451	cap_offset = adapter->pdev->pcie_cap;
6452	if (!cap_offset)
6453		return -E1000_ERR_CONFIG;
6454
6455	pci_read_config_word(adapter->pdev, cap_offset + reg, value);
6456
6457	return 0;
6458}
6459
6460s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6461{
6462	struct igb_adapter *adapter = hw->back;
6463	u16 cap_offset;
6464
6465	cap_offset = adapter->pdev->pcie_cap;
6466	if (!cap_offset)
6467		return -E1000_ERR_CONFIG;
6468
6469	pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
6470
6471	return 0;
6472}
6473
6474static void igb_vlan_mode(struct net_device *netdev, netdev_features_t features)
6475{
6476	struct igb_adapter *adapter = netdev_priv(netdev);
6477	struct e1000_hw *hw = &adapter->hw;
6478	u32 ctrl, rctl;
6479	bool enable = !!(features & NETIF_F_HW_VLAN_RX);
6480
6481	if (enable) {
6482		/* enable VLAN tag insert/strip */
6483		ctrl = rd32(E1000_CTRL);
6484		ctrl |= E1000_CTRL_VME;
6485		wr32(E1000_CTRL, ctrl);
6486
6487		/* Disable CFI check */
6488		rctl = rd32(E1000_RCTL);
6489		rctl &= ~E1000_RCTL_CFIEN;
6490		wr32(E1000_RCTL, rctl);
6491	} else {
6492		/* disable VLAN tag insert/strip */
6493		ctrl = rd32(E1000_CTRL);
6494		ctrl &= ~E1000_CTRL_VME;
6495		wr32(E1000_CTRL, ctrl);
6496	}
6497
6498	igb_rlpml_set(adapter);
6499}
6500
6501static int igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
6502{
6503	struct igb_adapter *adapter = netdev_priv(netdev);
6504	struct e1000_hw *hw = &adapter->hw;
6505	int pf_id = adapter->vfs_allocated_count;
6506
6507	/* attempt to add filter to vlvf array */
6508	igb_vlvf_set(adapter, vid, true, pf_id);
6509
6510	/* add the filter since PF can receive vlans w/o entry in vlvf */
6511	igb_vfta_set(hw, vid, true);
6512
6513	set_bit(vid, adapter->active_vlans);
6514
6515	return 0;
6516}
6517
6518static int igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
6519{
6520	struct igb_adapter *adapter = netdev_priv(netdev);
6521	struct e1000_hw *hw = &adapter->hw;
6522	int pf_id = adapter->vfs_allocated_count;
6523	s32 err;
6524
6525	/* remove vlan from VLVF table array */
6526	err = igb_vlvf_set(adapter, vid, false, pf_id);
6527
6528	/* if vid was not present in VLVF just remove it from table */
6529	if (err)
6530		igb_vfta_set(hw, vid, false);
6531
6532	clear_bit(vid, adapter->active_vlans);
6533
6534	return 0;
6535}
6536
6537static void igb_restore_vlan(struct igb_adapter *adapter)
6538{
6539	u16 vid;
6540
6541	igb_vlan_mode(adapter->netdev, adapter->netdev->features);
6542
6543	for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID)
6544		igb_vlan_rx_add_vid(adapter->netdev, vid);
6545}
6546
6547int igb_set_spd_dplx(struct igb_adapter *adapter, u32 spd, u8 dplx)
6548{
6549	struct pci_dev *pdev = adapter->pdev;
6550	struct e1000_mac_info *mac = &adapter->hw.mac;
6551
6552	mac->autoneg = 0;
6553
6554	/* Make sure dplx is at most 1 bit and lsb of speed is not set
6555	 * for the switch() below to work */
6556	if ((spd & 1) || (dplx & ~1))
6557		goto err_inval;
6558
6559	/* Fiber NIC's only allow 1000 Gbps Full duplex */
6560	if ((adapter->hw.phy.media_type == e1000_media_type_internal_serdes) &&
6561	    spd != SPEED_1000 &&
6562	    dplx != DUPLEX_FULL)
6563		goto err_inval;
6564
6565	switch (spd + dplx) {
6566	case SPEED_10 + DUPLEX_HALF:
6567		mac->forced_speed_duplex = ADVERTISE_10_HALF;
6568		break;
6569	case SPEED_10 + DUPLEX_FULL:
6570		mac->forced_speed_duplex = ADVERTISE_10_FULL;
6571		break;
6572	case SPEED_100 + DUPLEX_HALF:
6573		mac->forced_speed_duplex = ADVERTISE_100_HALF;
6574		break;
6575	case SPEED_100 + DUPLEX_FULL:
6576		mac->forced_speed_duplex = ADVERTISE_100_FULL;
6577		break;
6578	case SPEED_1000 + DUPLEX_FULL:
6579		mac->autoneg = 1;
6580		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
6581		break;
6582	case SPEED_1000 + DUPLEX_HALF: /* not supported */
6583	default:
6584		goto err_inval;
6585	}
6586	return 0;
6587
6588err_inval:
6589	dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
6590	return -EINVAL;
6591}
6592
6593static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake,
6594			  bool runtime)
6595{
6596	struct net_device *netdev = pci_get_drvdata(pdev);
6597	struct igb_adapter *adapter = netdev_priv(netdev);
6598	struct e1000_hw *hw = &adapter->hw;
6599	u32 ctrl, rctl, status;
6600	u32 wufc = runtime ? E1000_WUFC_LNKC : adapter->wol;
6601#ifdef CONFIG_PM
6602	int retval = 0;
6603#endif
6604
6605	netif_device_detach(netdev);
6606
6607	if (netif_running(netdev))
6608		__igb_close(netdev, true);
6609
6610	igb_clear_interrupt_scheme(adapter);
6611
6612#ifdef CONFIG_PM
6613	retval = pci_save_state(pdev);
6614	if (retval)
6615		return retval;
6616#endif
6617
6618	status = rd32(E1000_STATUS);
6619	if (status & E1000_STATUS_LU)
6620		wufc &= ~E1000_WUFC_LNKC;
6621
6622	if (wufc) {
6623		igb_setup_rctl(adapter);
6624		igb_set_rx_mode(netdev);
6625
6626		/* turn on all-multi mode if wake on multicast is enabled */
6627		if (wufc & E1000_WUFC_MC) {
6628			rctl = rd32(E1000_RCTL);
6629			rctl |= E1000_RCTL_MPE;
6630			wr32(E1000_RCTL, rctl);
6631		}
6632
6633		ctrl = rd32(E1000_CTRL);
6634		/* advertise wake from D3Cold */
6635		#define E1000_CTRL_ADVD3WUC 0x00100000
6636		/* phy power management enable */
6637		#define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
6638		ctrl |= E1000_CTRL_ADVD3WUC;
6639		wr32(E1000_CTRL, ctrl);
6640
6641		/* Allow time for pending master requests to run */
6642		igb_disable_pcie_master(hw);
6643
6644		wr32(E1000_WUC, E1000_WUC_PME_EN);
6645		wr32(E1000_WUFC, wufc);
6646	} else {
6647		wr32(E1000_WUC, 0);
6648		wr32(E1000_WUFC, 0);
6649	}
6650
6651	*enable_wake = wufc || adapter->en_mng_pt;
6652	if (!*enable_wake)
6653		igb_power_down_link(adapter);
6654	else
6655		igb_power_up_link(adapter);
6656
6657	/* Release control of h/w to f/w.  If f/w is AMT enabled, this
6658	 * would have already happened in close and is redundant. */
6659	igb_release_hw_control(adapter);
6660
6661	pci_disable_device(pdev);
6662
6663	return 0;
6664}
6665
6666#ifdef CONFIG_PM
6667#ifdef CONFIG_PM_SLEEP
6668static int igb_suspend(struct device *dev)
6669{
6670	int retval;
6671	bool wake;
6672	struct pci_dev *pdev = to_pci_dev(dev);
6673
6674	retval = __igb_shutdown(pdev, &wake, 0);
6675	if (retval)
6676		return retval;
6677
6678	if (wake) {
6679		pci_prepare_to_sleep(pdev);
6680	} else {
6681		pci_wake_from_d3(pdev, false);
6682		pci_set_power_state(pdev, PCI_D3hot);
6683	}
6684
6685	return 0;
6686}
6687#endif /* CONFIG_PM_SLEEP */
6688
6689static int igb_resume(struct device *dev)
6690{
6691	struct pci_dev *pdev = to_pci_dev(dev);
6692	struct net_device *netdev = pci_get_drvdata(pdev);
6693	struct igb_adapter *adapter = netdev_priv(netdev);
6694	struct e1000_hw *hw = &adapter->hw;
6695	u32 err;
6696
6697	pci_set_power_state(pdev, PCI_D0);
6698	pci_restore_state(pdev);
6699	pci_save_state(pdev);
6700
6701	err = pci_enable_device_mem(pdev);
6702	if (err) {
6703		dev_err(&pdev->dev,
6704			"igb: Cannot enable PCI device from suspend\n");
6705		return err;
6706	}
6707	pci_set_master(pdev);
6708
6709	pci_enable_wake(pdev, PCI_D3hot, 0);
6710	pci_enable_wake(pdev, PCI_D3cold, 0);
6711
6712	if (igb_init_interrupt_scheme(adapter)) {
6713		dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
6714		return -ENOMEM;
6715	}
6716
6717	igb_reset(adapter);
6718
6719	/* let the f/w know that the h/w is now under the control of the
6720	 * driver. */
6721	igb_get_hw_control(adapter);
6722
6723	wr32(E1000_WUS, ~0);
6724
6725	if (netdev->flags & IFF_UP) {
6726		err = __igb_open(netdev, true);
6727		if (err)
6728			return err;
6729	}
6730
6731	netif_device_attach(netdev);
6732	return 0;
6733}
6734
6735#ifdef CONFIG_PM_RUNTIME
6736static int igb_runtime_idle(struct device *dev)
6737{
6738	struct pci_dev *pdev = to_pci_dev(dev);
6739	struct net_device *netdev = pci_get_drvdata(pdev);
6740	struct igb_adapter *adapter = netdev_priv(netdev);
6741
6742	if (!igb_has_link(adapter))
6743		pm_schedule_suspend(dev, MSEC_PER_SEC * 5);
6744
6745	return -EBUSY;
6746}
6747
6748static int igb_runtime_suspend(struct device *dev)
6749{
6750	struct pci_dev *pdev = to_pci_dev(dev);
6751	int retval;
6752	bool wake;
6753
6754	retval = __igb_shutdown(pdev, &wake, 1);
6755	if (retval)
6756		return retval;
6757
6758	if (wake) {
6759		pci_prepare_to_sleep(pdev);
6760	} else {
6761		pci_wake_from_d3(pdev, false);
6762		pci_set_power_state(pdev, PCI_D3hot);
6763	}
6764
6765	return 0;
6766}
6767
6768static int igb_runtime_resume(struct device *dev)
6769{
6770	return igb_resume(dev);
6771}
6772#endif /* CONFIG_PM_RUNTIME */
6773#endif
6774
6775static void igb_shutdown(struct pci_dev *pdev)
6776{
6777	bool wake;
6778
6779	__igb_shutdown(pdev, &wake, 0);
6780
6781	if (system_state == SYSTEM_POWER_OFF) {
6782		pci_wake_from_d3(pdev, wake);
6783		pci_set_power_state(pdev, PCI_D3hot);
6784	}
6785}
6786
6787#ifdef CONFIG_NET_POLL_CONTROLLER
6788/*
6789 * Polling 'interrupt' - used by things like netconsole to send skbs
6790 * without having to re-enable interrupts. It's not called while
6791 * the interrupt routine is executing.
6792 */
6793static void igb_netpoll(struct net_device *netdev)
6794{
6795	struct igb_adapter *adapter = netdev_priv(netdev);
6796	struct e1000_hw *hw = &adapter->hw;
6797	struct igb_q_vector *q_vector;
6798	int i;
6799
6800	for (i = 0; i < adapter->num_q_vectors; i++) {
6801		q_vector = adapter->q_vector[i];
6802		if (adapter->msix_entries)
6803			wr32(E1000_EIMC, q_vector->eims_value);
6804		else
6805			igb_irq_disable(adapter);
6806		napi_schedule(&q_vector->napi);
6807	}
6808}
6809#endif /* CONFIG_NET_POLL_CONTROLLER */
6810
6811/**
6812 * igb_io_error_detected - called when PCI error is detected
6813 * @pdev: Pointer to PCI device
6814 * @state: The current pci connection state
6815 *
6816 * This function is called after a PCI bus error affecting
6817 * this device has been detected.
6818 */
6819static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
6820					      pci_channel_state_t state)
6821{
6822	struct net_device *netdev = pci_get_drvdata(pdev);
6823	struct igb_adapter *adapter = netdev_priv(netdev);
6824
6825	netif_device_detach(netdev);
6826
6827	if (state == pci_channel_io_perm_failure)
6828		return PCI_ERS_RESULT_DISCONNECT;
6829
6830	if (netif_running(netdev))
6831		igb_down(adapter);
6832	pci_disable_device(pdev);
6833
6834	/* Request a slot slot reset. */
6835	return PCI_ERS_RESULT_NEED_RESET;
6836}
6837
6838/**
6839 * igb_io_slot_reset - called after the pci bus has been reset.
6840 * @pdev: Pointer to PCI device
6841 *
6842 * Restart the card from scratch, as if from a cold-boot. Implementation
6843 * resembles the first-half of the igb_resume routine.
6844 */
6845static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
6846{
6847	struct net_device *netdev = pci_get_drvdata(pdev);
6848	struct igb_adapter *adapter = netdev_priv(netdev);
6849	struct e1000_hw *hw = &adapter->hw;
6850	pci_ers_result_t result;
6851	int err;
6852
6853	if (pci_enable_device_mem(pdev)) {
6854		dev_err(&pdev->dev,
6855			"Cannot re-enable PCI device after reset.\n");
6856		result = PCI_ERS_RESULT_DISCONNECT;
6857	} else {
6858		pci_set_master(pdev);
6859		pci_restore_state(pdev);
6860		pci_save_state(pdev);
6861
6862		pci_enable_wake(pdev, PCI_D3hot, 0);
6863		pci_enable_wake(pdev, PCI_D3cold, 0);
6864
6865		igb_reset(adapter);
6866		wr32(E1000_WUS, ~0);
6867		result = PCI_ERS_RESULT_RECOVERED;
6868	}
6869
6870	err = pci_cleanup_aer_uncorrect_error_status(pdev);
6871	if (err) {
6872		dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
6873		        "failed 0x%0x\n", err);
6874		/* non-fatal, continue */
6875	}
6876
6877	return result;
6878}
6879
6880/**
6881 * igb_io_resume - called when traffic can start flowing again.
6882 * @pdev: Pointer to PCI device
6883 *
6884 * This callback is called when the error recovery driver tells us that
6885 * its OK to resume normal operation. Implementation resembles the
6886 * second-half of the igb_resume routine.
6887 */
6888static void igb_io_resume(struct pci_dev *pdev)
6889{
6890	struct net_device *netdev = pci_get_drvdata(pdev);
6891	struct igb_adapter *adapter = netdev_priv(netdev);
6892
6893	if (netif_running(netdev)) {
6894		if (igb_up(adapter)) {
6895			dev_err(&pdev->dev, "igb_up failed after reset\n");
6896			return;
6897		}
6898	}
6899
6900	netif_device_attach(netdev);
6901
6902	/* let the f/w know that the h/w is now under the control of the
6903	 * driver. */
6904	igb_get_hw_control(adapter);
6905}
6906
6907static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
6908                             u8 qsel)
6909{
6910	u32 rar_low, rar_high;
6911	struct e1000_hw *hw = &adapter->hw;
6912
6913	/* HW expects these in little endian so we reverse the byte order
6914	 * from network order (big endian) to little endian
6915	 */
6916	rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
6917	          ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
6918	rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
6919
6920	/* Indicate to hardware the Address is Valid. */
6921	rar_high |= E1000_RAH_AV;
6922
6923	if (hw->mac.type == e1000_82575)
6924		rar_high |= E1000_RAH_POOL_1 * qsel;
6925	else
6926		rar_high |= E1000_RAH_POOL_1 << qsel;
6927
6928	wr32(E1000_RAL(index), rar_low);
6929	wrfl();
6930	wr32(E1000_RAH(index), rar_high);
6931	wrfl();
6932}
6933
6934static int igb_set_vf_mac(struct igb_adapter *adapter,
6935                          int vf, unsigned char *mac_addr)
6936{
6937	struct e1000_hw *hw = &adapter->hw;
6938	/* VF MAC addresses start at end of receive addresses and moves
6939	 * torwards the first, as a result a collision should not be possible */
6940	int rar_entry = hw->mac.rar_entry_count - (vf + 1);
6941
6942	memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
6943
6944	igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
6945
6946	return 0;
6947}
6948
6949static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
6950{
6951	struct igb_adapter *adapter = netdev_priv(netdev);
6952	if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
6953		return -EINVAL;
6954	adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
6955	dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
6956	dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
6957				      " change effective.");
6958	if (test_bit(__IGB_DOWN, &adapter->state)) {
6959		dev_warn(&adapter->pdev->dev, "The VF MAC address has been set,"
6960			 " but the PF device is not up.\n");
6961		dev_warn(&adapter->pdev->dev, "Bring the PF device up before"
6962			 " attempting to use the VF device.\n");
6963	}
6964	return igb_set_vf_mac(adapter, vf, mac);
6965}
6966
6967static int igb_link_mbps(int internal_link_speed)
6968{
6969	switch (internal_link_speed) {
6970	case SPEED_100:
6971		return 100;
6972	case SPEED_1000:
6973		return 1000;
6974	default:
6975		return 0;
6976	}
6977}
6978
6979static void igb_set_vf_rate_limit(struct e1000_hw *hw, int vf, int tx_rate,
6980				  int link_speed)
6981{
6982	int rf_dec, rf_int;
6983	u32 bcnrc_val;
6984
6985	if (tx_rate != 0) {
6986		/* Calculate the rate factor values to set */
6987		rf_int = link_speed / tx_rate;
6988		rf_dec = (link_speed - (rf_int * tx_rate));
6989		rf_dec = (rf_dec * (1<<E1000_RTTBCNRC_RF_INT_SHIFT)) / tx_rate;
6990
6991		bcnrc_val = E1000_RTTBCNRC_RS_ENA;
6992		bcnrc_val |= ((rf_int<<E1000_RTTBCNRC_RF_INT_SHIFT) &
6993		               E1000_RTTBCNRC_RF_INT_MASK);
6994		bcnrc_val |= (rf_dec & E1000_RTTBCNRC_RF_DEC_MASK);
6995	} else {
6996		bcnrc_val = 0;
6997	}
6998
6999	wr32(E1000_RTTDQSEL, vf); /* vf X uses queue X */
7000	/*
7001	 * Set global transmit compensation time to the MMW_SIZE in RTTBCNRM
7002	 * register. MMW_SIZE=0x014 if 9728-byte jumbo is supported.
7003	 */
7004	wr32(E1000_RTTBCNRM, 0x14);
7005	wr32(E1000_RTTBCNRC, bcnrc_val);
7006}
7007
7008static void igb_check_vf_rate_limit(struct igb_adapter *adapter)
7009{
7010	int actual_link_speed, i;
7011	bool reset_rate = false;
7012
7013	/* VF TX rate limit was not set or not supported */
7014	if ((adapter->vf_rate_link_speed == 0) ||
7015	    (adapter->hw.mac.type != e1000_82576))
7016		return;
7017
7018	actual_link_speed = igb_link_mbps(adapter->link_speed);
7019	if (actual_link_speed != adapter->vf_rate_link_speed) {
7020		reset_rate = true;
7021		adapter->vf_rate_link_speed = 0;
7022		dev_info(&adapter->pdev->dev,
7023		         "Link speed has been changed. VF Transmit "
7024		         "rate is disabled\n");
7025	}
7026
7027	for (i = 0; i < adapter->vfs_allocated_count; i++) {
7028		if (reset_rate)
7029			adapter->vf_data[i].tx_rate = 0;
7030
7031		igb_set_vf_rate_limit(&adapter->hw, i,
7032		                      adapter->vf_data[i].tx_rate,
7033		                      actual_link_speed);
7034	}
7035}
7036
7037static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
7038{
7039	struct igb_adapter *adapter = netdev_priv(netdev);
7040	struct e1000_hw *hw = &adapter->hw;
7041	int actual_link_speed;
7042
7043	if (hw->mac.type != e1000_82576)
7044		return -EOPNOTSUPP;
7045
7046	actual_link_speed = igb_link_mbps(adapter->link_speed);
7047	if ((vf >= adapter->vfs_allocated_count) ||
7048	    (!(rd32(E1000_STATUS) & E1000_STATUS_LU)) ||
7049	    (tx_rate < 0) || (tx_rate > actual_link_speed))
7050		return -EINVAL;
7051
7052	adapter->vf_rate_link_speed = actual_link_speed;
7053	adapter->vf_data[vf].tx_rate = (u16)tx_rate;
7054	igb_set_vf_rate_limit(hw, vf, tx_rate, actual_link_speed);
7055
7056	return 0;
7057}
7058
7059static int igb_ndo_get_vf_config(struct net_device *netdev,
7060				 int vf, struct ifla_vf_info *ivi)
7061{
7062	struct igb_adapter *adapter = netdev_priv(netdev);
7063	if (vf >= adapter->vfs_allocated_count)
7064		return -EINVAL;
7065	ivi->vf = vf;
7066	memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
7067	ivi->tx_rate = adapter->vf_data[vf].tx_rate;
7068	ivi->vlan = adapter->vf_data[vf].pf_vlan;
7069	ivi->qos = adapter->vf_data[vf].pf_qos;
7070	return 0;
7071}
7072
7073static void igb_vmm_control(struct igb_adapter *adapter)
7074{
7075	struct e1000_hw *hw = &adapter->hw;
7076	u32 reg;
7077
7078	switch (hw->mac.type) {
7079	case e1000_82575:
7080	case e1000_i210:
7081	case e1000_i211:
7082	default:
7083		/* replication is not supported for 82575 */
7084		return;
7085	case e1000_82576:
7086		/* notify HW that the MAC is adding vlan tags */
7087		reg = rd32(E1000_DTXCTL);
7088		reg |= E1000_DTXCTL_VLAN_ADDED;
7089		wr32(E1000_DTXCTL, reg);
7090	case e1000_82580:
7091		/* enable replication vlan tag stripping */
7092		reg = rd32(E1000_RPLOLR);
7093		reg |= E1000_RPLOLR_STRVLAN;
7094		wr32(E1000_RPLOLR, reg);
7095	case e1000_i350:
7096		/* none of the above registers are supported by i350 */
7097		break;
7098	}
7099
7100	if (adapter->vfs_allocated_count) {
7101		igb_vmdq_set_loopback_pf(hw, true);
7102		igb_vmdq_set_replication_pf(hw, true);
7103		igb_vmdq_set_anti_spoofing_pf(hw, true,
7104						adapter->vfs_allocated_count);
7105	} else {
7106		igb_vmdq_set_loopback_pf(hw, false);
7107		igb_vmdq_set_replication_pf(hw, false);
7108	}
7109}
7110
7111static void igb_init_dmac(struct igb_adapter *adapter, u32 pba)
7112{
7113	struct e1000_hw *hw = &adapter->hw;
7114	u32 dmac_thr;
7115	u16 hwm;
7116
7117	if (hw->mac.type > e1000_82580) {
7118		if (adapter->flags & IGB_FLAG_DMAC) {
7119			u32 reg;
7120
7121			/* force threshold to 0. */
7122			wr32(E1000_DMCTXTH, 0);
7123
7124			/*
7125			 * DMA Coalescing high water mark needs to be greater
7126			 * than the Rx threshold. Set hwm to PBA - max frame
7127			 * size in 16B units, capping it at PBA - 6KB.
7128			 */
7129			hwm = 64 * pba - adapter->max_frame_size / 16;
7130			if (hwm < 64 * (pba - 6))
7131				hwm = 64 * (pba - 6);
7132			reg = rd32(E1000_FCRTC);
7133			reg &= ~E1000_FCRTC_RTH_COAL_MASK;
7134			reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT)
7135				& E1000_FCRTC_RTH_COAL_MASK);
7136			wr32(E1000_FCRTC, reg);
7137
7138			/*
7139			 * Set the DMA Coalescing Rx threshold to PBA - 2 * max
7140			 * frame size, capping it at PBA - 10KB.
7141			 */
7142			dmac_thr = pba - adapter->max_frame_size / 512;
7143			if (dmac_thr < pba - 10)
7144				dmac_thr = pba - 10;
7145			reg = rd32(E1000_DMACR);
7146			reg &= ~E1000_DMACR_DMACTHR_MASK;
7147			reg |= ((dmac_thr << E1000_DMACR_DMACTHR_SHIFT)
7148				& E1000_DMACR_DMACTHR_MASK);
7149
7150			/* transition to L0x or L1 if available..*/
7151			reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
7152
7153			/* watchdog timer= +-1000 usec in 32usec intervals */
7154			reg |= (1000 >> 5);
7155
7156			/* Disable BMC-to-OS Watchdog Enable */
7157			reg &= ~E1000_DMACR_DC_BMC2OSW_EN;
7158			wr32(E1000_DMACR, reg);
7159
7160			/*
7161			 * no lower threshold to disable
7162			 * coalescing(smart fifb)-UTRESH=0
7163			 */
7164			wr32(E1000_DMCRTRH, 0);
7165
7166			reg = (IGB_DMCTLX_DCFLUSH_DIS | 0x4);
7167
7168			wr32(E1000_DMCTLX, reg);
7169
7170			/*
7171			 * free space in tx packet buffer to wake from
7172			 * DMA coal
7173			 */
7174			wr32(E1000_DMCTXTH, (IGB_MIN_TXPBSIZE -
7175			     (IGB_TX_BUF_4096 + adapter->max_frame_size)) >> 6);
7176
7177			/*
7178			 * make low power state decision controlled
7179			 * by DMA coal
7180			 */
7181			reg = rd32(E1000_PCIEMISC);
7182			reg &= ~E1000_PCIEMISC_LX_DECISION;
7183			wr32(E1000_PCIEMISC, reg);
7184		} /* endif adapter->dmac is not disabled */
7185	} else if (hw->mac.type == e1000_82580) {
7186		u32 reg = rd32(E1000_PCIEMISC);
7187		wr32(E1000_PCIEMISC, reg & ~E1000_PCIEMISC_LX_DECISION);
7188		wr32(E1000_DMACR, 0);
7189	}
7190}
7191
7192/* igb_main.c */
7193