igb_main.c revision d9dd966d7fc088a6bed991c2b1e2fba4485e0a31
1/*******************************************************************************
2
3  Intel(R) Gigabit Ethernet Linux driver
4  Copyright(c) 2007-2012 Intel Corporation.
5
6  This program is free software; you can redistribute it and/or modify it
7  under the terms and conditions of the GNU General Public License,
8  version 2, as published by the Free Software Foundation.
9
10  This program is distributed in the hope it will be useful, but WITHOUT
11  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13  more details.
14
15  You should have received a copy of the GNU General Public License along with
16  this program; if not, write to the Free Software Foundation, Inc.,
17  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18
19  The full GNU General Public License is included in this distribution in
20  the file called "COPYING".
21
22  Contact Information:
23  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
25
26*******************************************************************************/
27
28#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
29
30#include <linux/module.h>
31#include <linux/types.h>
32#include <linux/init.h>
33#include <linux/bitops.h>
34#include <linux/vmalloc.h>
35#include <linux/pagemap.h>
36#include <linux/netdevice.h>
37#include <linux/ipv6.h>
38#include <linux/slab.h>
39#include <net/checksum.h>
40#include <net/ip6_checksum.h>
41#include <linux/net_tstamp.h>
42#include <linux/mii.h>
43#include <linux/ethtool.h>
44#include <linux/if.h>
45#include <linux/if_vlan.h>
46#include <linux/pci.h>
47#include <linux/pci-aspm.h>
48#include <linux/delay.h>
49#include <linux/interrupt.h>
50#include <linux/ip.h>
51#include <linux/tcp.h>
52#include <linux/sctp.h>
53#include <linux/if_ether.h>
54#include <linux/aer.h>
55#include <linux/prefetch.h>
56#include <linux/pm_runtime.h>
57#ifdef CONFIG_IGB_DCA
58#include <linux/dca.h>
59#endif
60#include "igb.h"
61
62#define MAJ 3
63#define MIN 2
64#define BUILD 10
65#define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \
66__stringify(BUILD) "-k"
67char igb_driver_name[] = "igb";
68char igb_driver_version[] = DRV_VERSION;
69static const char igb_driver_string[] =
70				"Intel(R) Gigabit Ethernet Network Driver";
71static const char igb_copyright[] = "Copyright (c) 2007-2012 Intel Corporation.";
72
73static const struct e1000_info *igb_info_tbl[] = {
74	[board_82575] = &e1000_82575_info,
75};
76
77static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
78	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER), board_82575 },
79	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER), board_82575 },
80	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES), board_82575 },
81	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII), board_82575 },
82	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
83	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
84	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_QUAD_FIBER), board_82575 },
85	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
86	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
87	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
88	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SGMII), board_82575 },
89	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SERDES), board_82575 },
90	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_BACKPLANE), board_82575 },
91	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SFP), board_82575 },
92	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
93	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
94	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
95	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
96	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
97	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
98	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2), board_82575 },
99	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
100	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
101	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
102	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
103	/* required last entry */
104	{0, }
105};
106
107MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
108
109void igb_reset(struct igb_adapter *);
110static int igb_setup_all_tx_resources(struct igb_adapter *);
111static int igb_setup_all_rx_resources(struct igb_adapter *);
112static void igb_free_all_tx_resources(struct igb_adapter *);
113static void igb_free_all_rx_resources(struct igb_adapter *);
114static void igb_setup_mrqc(struct igb_adapter *);
115static int igb_probe(struct pci_dev *, const struct pci_device_id *);
116static void __devexit igb_remove(struct pci_dev *pdev);
117static void igb_init_hw_timer(struct igb_adapter *adapter);
118static int igb_sw_init(struct igb_adapter *);
119static int igb_open(struct net_device *);
120static int igb_close(struct net_device *);
121static void igb_configure_tx(struct igb_adapter *);
122static void igb_configure_rx(struct igb_adapter *);
123static void igb_clean_all_tx_rings(struct igb_adapter *);
124static void igb_clean_all_rx_rings(struct igb_adapter *);
125static void igb_clean_tx_ring(struct igb_ring *);
126static void igb_clean_rx_ring(struct igb_ring *);
127static void igb_set_rx_mode(struct net_device *);
128static void igb_update_phy_info(unsigned long);
129static void igb_watchdog(unsigned long);
130static void igb_watchdog_task(struct work_struct *);
131static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, struct net_device *);
132static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *dev,
133						 struct rtnl_link_stats64 *stats);
134static int igb_change_mtu(struct net_device *, int);
135static int igb_set_mac(struct net_device *, void *);
136static void igb_set_uta(struct igb_adapter *adapter);
137static irqreturn_t igb_intr(int irq, void *);
138static irqreturn_t igb_intr_msi(int irq, void *);
139static irqreturn_t igb_msix_other(int irq, void *);
140static irqreturn_t igb_msix_ring(int irq, void *);
141#ifdef CONFIG_IGB_DCA
142static void igb_update_dca(struct igb_q_vector *);
143static void igb_setup_dca(struct igb_adapter *);
144#endif /* CONFIG_IGB_DCA */
145static int igb_poll(struct napi_struct *, int);
146static bool igb_clean_tx_irq(struct igb_q_vector *);
147static bool igb_clean_rx_irq(struct igb_q_vector *, int);
148static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
149static void igb_tx_timeout(struct net_device *);
150static void igb_reset_task(struct work_struct *);
151static void igb_vlan_mode(struct net_device *netdev, netdev_features_t features);
152static int igb_vlan_rx_add_vid(struct net_device *, u16);
153static int igb_vlan_rx_kill_vid(struct net_device *, u16);
154static void igb_restore_vlan(struct igb_adapter *);
155static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
156static void igb_ping_all_vfs(struct igb_adapter *);
157static void igb_msg_task(struct igb_adapter *);
158static void igb_vmm_control(struct igb_adapter *);
159static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
160static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
161static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
162static int igb_ndo_set_vf_vlan(struct net_device *netdev,
163			       int vf, u16 vlan, u8 qos);
164static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
165static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
166				 struct ifla_vf_info *ivi);
167static void igb_check_vf_rate_limit(struct igb_adapter *);
168
169#ifdef CONFIG_PCI_IOV
170static int igb_vf_configure(struct igb_adapter *adapter, int vf);
171static int igb_find_enabled_vfs(struct igb_adapter *adapter);
172static int igb_check_vf_assignment(struct igb_adapter *adapter);
173#endif
174
175#ifdef CONFIG_PM
176#ifdef CONFIG_PM_SLEEP
177static int igb_suspend(struct device *);
178#endif
179static int igb_resume(struct device *);
180#ifdef CONFIG_PM_RUNTIME
181static int igb_runtime_suspend(struct device *dev);
182static int igb_runtime_resume(struct device *dev);
183static int igb_runtime_idle(struct device *dev);
184#endif
185static const struct dev_pm_ops igb_pm_ops = {
186	SET_SYSTEM_SLEEP_PM_OPS(igb_suspend, igb_resume)
187	SET_RUNTIME_PM_OPS(igb_runtime_suspend, igb_runtime_resume,
188			igb_runtime_idle)
189};
190#endif
191static void igb_shutdown(struct pci_dev *);
192#ifdef CONFIG_IGB_DCA
193static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
194static struct notifier_block dca_notifier = {
195	.notifier_call	= igb_notify_dca,
196	.next		= NULL,
197	.priority	= 0
198};
199#endif
200#ifdef CONFIG_NET_POLL_CONTROLLER
201/* for netdump / net console */
202static void igb_netpoll(struct net_device *);
203#endif
204#ifdef CONFIG_PCI_IOV
205static unsigned int max_vfs = 0;
206module_param(max_vfs, uint, 0);
207MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
208                 "per physical function");
209#endif /* CONFIG_PCI_IOV */
210
211static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
212		     pci_channel_state_t);
213static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
214static void igb_io_resume(struct pci_dev *);
215
216static struct pci_error_handlers igb_err_handler = {
217	.error_detected = igb_io_error_detected,
218	.slot_reset = igb_io_slot_reset,
219	.resume = igb_io_resume,
220};
221
222static void igb_init_dmac(struct igb_adapter *adapter, u32 pba);
223
224static struct pci_driver igb_driver = {
225	.name     = igb_driver_name,
226	.id_table = igb_pci_tbl,
227	.probe    = igb_probe,
228	.remove   = __devexit_p(igb_remove),
229#ifdef CONFIG_PM
230	.driver.pm = &igb_pm_ops,
231#endif
232	.shutdown = igb_shutdown,
233	.err_handler = &igb_err_handler
234};
235
236MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
237MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
238MODULE_LICENSE("GPL");
239MODULE_VERSION(DRV_VERSION);
240
241struct igb_reg_info {
242	u32 ofs;
243	char *name;
244};
245
246static const struct igb_reg_info igb_reg_info_tbl[] = {
247
248	/* General Registers */
249	{E1000_CTRL, "CTRL"},
250	{E1000_STATUS, "STATUS"},
251	{E1000_CTRL_EXT, "CTRL_EXT"},
252
253	/* Interrupt Registers */
254	{E1000_ICR, "ICR"},
255
256	/* RX Registers */
257	{E1000_RCTL, "RCTL"},
258	{E1000_RDLEN(0), "RDLEN"},
259	{E1000_RDH(0), "RDH"},
260	{E1000_RDT(0), "RDT"},
261	{E1000_RXDCTL(0), "RXDCTL"},
262	{E1000_RDBAL(0), "RDBAL"},
263	{E1000_RDBAH(0), "RDBAH"},
264
265	/* TX Registers */
266	{E1000_TCTL, "TCTL"},
267	{E1000_TDBAL(0), "TDBAL"},
268	{E1000_TDBAH(0), "TDBAH"},
269	{E1000_TDLEN(0), "TDLEN"},
270	{E1000_TDH(0), "TDH"},
271	{E1000_TDT(0), "TDT"},
272	{E1000_TXDCTL(0), "TXDCTL"},
273	{E1000_TDFH, "TDFH"},
274	{E1000_TDFT, "TDFT"},
275	{E1000_TDFHS, "TDFHS"},
276	{E1000_TDFPC, "TDFPC"},
277
278	/* List Terminator */
279	{}
280};
281
282/*
283 * igb_regdump - register printout routine
284 */
285static void igb_regdump(struct e1000_hw *hw, struct igb_reg_info *reginfo)
286{
287	int n = 0;
288	char rname[16];
289	u32 regs[8];
290
291	switch (reginfo->ofs) {
292	case E1000_RDLEN(0):
293		for (n = 0; n < 4; n++)
294			regs[n] = rd32(E1000_RDLEN(n));
295		break;
296	case E1000_RDH(0):
297		for (n = 0; n < 4; n++)
298			regs[n] = rd32(E1000_RDH(n));
299		break;
300	case E1000_RDT(0):
301		for (n = 0; n < 4; n++)
302			regs[n] = rd32(E1000_RDT(n));
303		break;
304	case E1000_RXDCTL(0):
305		for (n = 0; n < 4; n++)
306			regs[n] = rd32(E1000_RXDCTL(n));
307		break;
308	case E1000_RDBAL(0):
309		for (n = 0; n < 4; n++)
310			regs[n] = rd32(E1000_RDBAL(n));
311		break;
312	case E1000_RDBAH(0):
313		for (n = 0; n < 4; n++)
314			regs[n] = rd32(E1000_RDBAH(n));
315		break;
316	case E1000_TDBAL(0):
317		for (n = 0; n < 4; n++)
318			regs[n] = rd32(E1000_RDBAL(n));
319		break;
320	case E1000_TDBAH(0):
321		for (n = 0; n < 4; n++)
322			regs[n] = rd32(E1000_TDBAH(n));
323		break;
324	case E1000_TDLEN(0):
325		for (n = 0; n < 4; n++)
326			regs[n] = rd32(E1000_TDLEN(n));
327		break;
328	case E1000_TDH(0):
329		for (n = 0; n < 4; n++)
330			regs[n] = rd32(E1000_TDH(n));
331		break;
332	case E1000_TDT(0):
333		for (n = 0; n < 4; n++)
334			regs[n] = rd32(E1000_TDT(n));
335		break;
336	case E1000_TXDCTL(0):
337		for (n = 0; n < 4; n++)
338			regs[n] = rd32(E1000_TXDCTL(n));
339		break;
340	default:
341		pr_info("%-15s %08x\n", reginfo->name, rd32(reginfo->ofs));
342		return;
343	}
344
345	snprintf(rname, 16, "%s%s", reginfo->name, "[0-3]");
346	pr_info("%-15s %08x %08x %08x %08x\n", rname, regs[0], regs[1],
347		regs[2], regs[3]);
348}
349
350/*
351 * igb_dump - Print registers, tx-rings and rx-rings
352 */
353static void igb_dump(struct igb_adapter *adapter)
354{
355	struct net_device *netdev = adapter->netdev;
356	struct e1000_hw *hw = &adapter->hw;
357	struct igb_reg_info *reginfo;
358	struct igb_ring *tx_ring;
359	union e1000_adv_tx_desc *tx_desc;
360	struct my_u0 { u64 a; u64 b; } *u0;
361	struct igb_ring *rx_ring;
362	union e1000_adv_rx_desc *rx_desc;
363	u32 staterr;
364	u16 i, n;
365
366	if (!netif_msg_hw(adapter))
367		return;
368
369	/* Print netdevice Info */
370	if (netdev) {
371		dev_info(&adapter->pdev->dev, "Net device Info\n");
372		pr_info("Device Name     state            trans_start      "
373			"last_rx\n");
374		pr_info("%-15s %016lX %016lX %016lX\n", netdev->name,
375			netdev->state, netdev->trans_start, netdev->last_rx);
376	}
377
378	/* Print Registers */
379	dev_info(&adapter->pdev->dev, "Register Dump\n");
380	pr_info(" Register Name   Value\n");
381	for (reginfo = (struct igb_reg_info *)igb_reg_info_tbl;
382	     reginfo->name; reginfo++) {
383		igb_regdump(hw, reginfo);
384	}
385
386	/* Print TX Ring Summary */
387	if (!netdev || !netif_running(netdev))
388		goto exit;
389
390	dev_info(&adapter->pdev->dev, "TX Rings Summary\n");
391	pr_info("Queue [NTU] [NTC] [bi(ntc)->dma  ] leng ntw timestamp\n");
392	for (n = 0; n < adapter->num_tx_queues; n++) {
393		struct igb_tx_buffer *buffer_info;
394		tx_ring = adapter->tx_ring[n];
395		buffer_info = &tx_ring->tx_buffer_info[tx_ring->next_to_clean];
396		pr_info(" %5d %5X %5X %016llX %04X %p %016llX\n",
397			n, tx_ring->next_to_use, tx_ring->next_to_clean,
398			(u64)buffer_info->dma,
399			buffer_info->length,
400			buffer_info->next_to_watch,
401			(u64)buffer_info->time_stamp);
402	}
403
404	/* Print TX Rings */
405	if (!netif_msg_tx_done(adapter))
406		goto rx_ring_summary;
407
408	dev_info(&adapter->pdev->dev, "TX Rings Dump\n");
409
410	/* Transmit Descriptor Formats
411	 *
412	 * Advanced Transmit Descriptor
413	 *   +--------------------------------------------------------------+
414	 * 0 |         Buffer Address [63:0]                                |
415	 *   +--------------------------------------------------------------+
416	 * 8 | PAYLEN  | PORTS  |CC|IDX | STA | DCMD  |DTYP|MAC|RSV| DTALEN |
417	 *   +--------------------------------------------------------------+
418	 *   63      46 45    40 39 38 36 35 32 31   24             15       0
419	 */
420
421	for (n = 0; n < adapter->num_tx_queues; n++) {
422		tx_ring = adapter->tx_ring[n];
423		pr_info("------------------------------------\n");
424		pr_info("TX QUEUE INDEX = %d\n", tx_ring->queue_index);
425		pr_info("------------------------------------\n");
426		pr_info("T [desc]     [address 63:0  ] [PlPOCIStDDM Ln] "
427			"[bi->dma       ] leng  ntw timestamp        "
428			"bi->skb\n");
429
430		for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) {
431			const char *next_desc;
432			struct igb_tx_buffer *buffer_info;
433			tx_desc = IGB_TX_DESC(tx_ring, i);
434			buffer_info = &tx_ring->tx_buffer_info[i];
435			u0 = (struct my_u0 *)tx_desc;
436			if (i == tx_ring->next_to_use &&
437			    i == tx_ring->next_to_clean)
438				next_desc = " NTC/U";
439			else if (i == tx_ring->next_to_use)
440				next_desc = " NTU";
441			else if (i == tx_ring->next_to_clean)
442				next_desc = " NTC";
443			else
444				next_desc = "";
445
446			pr_info("T [0x%03X]    %016llX %016llX %016llX"
447				" %04X  %p %016llX %p%s\n", i,
448				le64_to_cpu(u0->a),
449				le64_to_cpu(u0->b),
450				(u64)buffer_info->dma,
451				buffer_info->length,
452				buffer_info->next_to_watch,
453				(u64)buffer_info->time_stamp,
454				buffer_info->skb, next_desc);
455
456			if (netif_msg_pktdata(adapter) && buffer_info->dma != 0)
457				print_hex_dump(KERN_INFO, "",
458					DUMP_PREFIX_ADDRESS,
459					16, 1, phys_to_virt(buffer_info->dma),
460					buffer_info->length, true);
461		}
462	}
463
464	/* Print RX Rings Summary */
465rx_ring_summary:
466	dev_info(&adapter->pdev->dev, "RX Rings Summary\n");
467	pr_info("Queue [NTU] [NTC]\n");
468	for (n = 0; n < adapter->num_rx_queues; n++) {
469		rx_ring = adapter->rx_ring[n];
470		pr_info(" %5d %5X %5X\n",
471			n, rx_ring->next_to_use, rx_ring->next_to_clean);
472	}
473
474	/* Print RX Rings */
475	if (!netif_msg_rx_status(adapter))
476		goto exit;
477
478	dev_info(&adapter->pdev->dev, "RX Rings Dump\n");
479
480	/* Advanced Receive Descriptor (Read) Format
481	 *    63                                           1        0
482	 *    +-----------------------------------------------------+
483	 *  0 |       Packet Buffer Address [63:1]           |A0/NSE|
484	 *    +----------------------------------------------+------+
485	 *  8 |       Header Buffer Address [63:1]           |  DD  |
486	 *    +-----------------------------------------------------+
487	 *
488	 *
489	 * Advanced Receive Descriptor (Write-Back) Format
490	 *
491	 *   63       48 47    32 31  30      21 20 17 16   4 3     0
492	 *   +------------------------------------------------------+
493	 * 0 | Packet     IP     |SPH| HDR_LEN   | RSV|Packet|  RSS |
494	 *   | Checksum   Ident  |   |           |    | Type | Type |
495	 *   +------------------------------------------------------+
496	 * 8 | VLAN Tag | Length | Extended Error | Extended Status |
497	 *   +------------------------------------------------------+
498	 *   63       48 47    32 31            20 19               0
499	 */
500
501	for (n = 0; n < adapter->num_rx_queues; n++) {
502		rx_ring = adapter->rx_ring[n];
503		pr_info("------------------------------------\n");
504		pr_info("RX QUEUE INDEX = %d\n", rx_ring->queue_index);
505		pr_info("------------------------------------\n");
506		pr_info("R  [desc]      [ PktBuf     A0] [  HeadBuf   DD] "
507			"[bi->dma       ] [bi->skb] <-- Adv Rx Read format\n");
508		pr_info("RWB[desc]      [PcsmIpSHl PtRs] [vl er S cks ln] -----"
509			"----------- [bi->skb] <-- Adv Rx Write-Back format\n");
510
511		for (i = 0; i < rx_ring->count; i++) {
512			const char *next_desc;
513			struct igb_rx_buffer *buffer_info;
514			buffer_info = &rx_ring->rx_buffer_info[i];
515			rx_desc = IGB_RX_DESC(rx_ring, i);
516			u0 = (struct my_u0 *)rx_desc;
517			staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
518
519			if (i == rx_ring->next_to_use)
520				next_desc = " NTU";
521			else if (i == rx_ring->next_to_clean)
522				next_desc = " NTC";
523			else
524				next_desc = "";
525
526			if (staterr & E1000_RXD_STAT_DD) {
527				/* Descriptor Done */
528				pr_info("%s[0x%03X]     %016llX %016llX -------"
529					"--------- %p%s\n", "RWB", i,
530					le64_to_cpu(u0->a),
531					le64_to_cpu(u0->b),
532					buffer_info->skb, next_desc);
533			} else {
534				pr_info("%s[0x%03X]     %016llX %016llX %016llX"
535					" %p%s\n", "R  ", i,
536					le64_to_cpu(u0->a),
537					le64_to_cpu(u0->b),
538					(u64)buffer_info->dma,
539					buffer_info->skb, next_desc);
540
541				if (netif_msg_pktdata(adapter)) {
542					print_hex_dump(KERN_INFO, "",
543						DUMP_PREFIX_ADDRESS,
544						16, 1,
545						phys_to_virt(buffer_info->dma),
546						IGB_RX_HDR_LEN, true);
547					print_hex_dump(KERN_INFO, "",
548					  DUMP_PREFIX_ADDRESS,
549					  16, 1,
550					  phys_to_virt(
551					    buffer_info->page_dma +
552					    buffer_info->page_offset),
553					  PAGE_SIZE/2, true);
554				}
555			}
556		}
557	}
558
559exit:
560	return;
561}
562
563
564/**
565 * igb_read_clock - read raw cycle counter (to be used by time counter)
566 */
567static cycle_t igb_read_clock(const struct cyclecounter *tc)
568{
569	struct igb_adapter *adapter =
570		container_of(tc, struct igb_adapter, cycles);
571	struct e1000_hw *hw = &adapter->hw;
572	u64 stamp = 0;
573	int shift = 0;
574
575	/*
576	 * The timestamp latches on lowest register read. For the 82580
577	 * the lowest register is SYSTIMR instead of SYSTIML.  However we never
578	 * adjusted TIMINCA so SYSTIMR will just read as all 0s so ignore it.
579	 */
580	if (hw->mac.type >= e1000_82580) {
581		stamp = rd32(E1000_SYSTIMR) >> 8;
582		shift = IGB_82580_TSYNC_SHIFT;
583	}
584
585	stamp |= (u64)rd32(E1000_SYSTIML) << shift;
586	stamp |= (u64)rd32(E1000_SYSTIMH) << (shift + 32);
587	return stamp;
588}
589
590/**
591 * igb_get_hw_dev - return device
592 * used by hardware layer to print debugging information
593 **/
594struct net_device *igb_get_hw_dev(struct e1000_hw *hw)
595{
596	struct igb_adapter *adapter = hw->back;
597	return adapter->netdev;
598}
599
600/**
601 * igb_init_module - Driver Registration Routine
602 *
603 * igb_init_module is the first routine called when the driver is
604 * loaded. All it does is register with the PCI subsystem.
605 **/
606static int __init igb_init_module(void)
607{
608	int ret;
609	pr_info("%s - version %s\n",
610	       igb_driver_string, igb_driver_version);
611
612	pr_info("%s\n", igb_copyright);
613
614#ifdef CONFIG_IGB_DCA
615	dca_register_notify(&dca_notifier);
616#endif
617	ret = pci_register_driver(&igb_driver);
618	return ret;
619}
620
621module_init(igb_init_module);
622
623/**
624 * igb_exit_module - Driver Exit Cleanup Routine
625 *
626 * igb_exit_module is called just before the driver is removed
627 * from memory.
628 **/
629static void __exit igb_exit_module(void)
630{
631#ifdef CONFIG_IGB_DCA
632	dca_unregister_notify(&dca_notifier);
633#endif
634	pci_unregister_driver(&igb_driver);
635}
636
637module_exit(igb_exit_module);
638
639#define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
640/**
641 * igb_cache_ring_register - Descriptor ring to register mapping
642 * @adapter: board private structure to initialize
643 *
644 * Once we know the feature-set enabled for the device, we'll cache
645 * the register offset the descriptor ring is assigned to.
646 **/
647static void igb_cache_ring_register(struct igb_adapter *adapter)
648{
649	int i = 0, j = 0;
650	u32 rbase_offset = adapter->vfs_allocated_count;
651
652	switch (adapter->hw.mac.type) {
653	case e1000_82576:
654		/* The queues are allocated for virtualization such that VF 0
655		 * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
656		 * In order to avoid collision we start at the first free queue
657		 * and continue consuming queues in the same sequence
658		 */
659		if (adapter->vfs_allocated_count) {
660			for (; i < adapter->rss_queues; i++)
661				adapter->rx_ring[i]->reg_idx = rbase_offset +
662				                               Q_IDX_82576(i);
663		}
664	case e1000_82575:
665	case e1000_82580:
666	case e1000_i350:
667	default:
668		for (; i < adapter->num_rx_queues; i++)
669			adapter->rx_ring[i]->reg_idx = rbase_offset + i;
670		for (; j < adapter->num_tx_queues; j++)
671			adapter->tx_ring[j]->reg_idx = rbase_offset + j;
672		break;
673	}
674}
675
676static void igb_free_queues(struct igb_adapter *adapter)
677{
678	int i;
679
680	for (i = 0; i < adapter->num_tx_queues; i++) {
681		kfree(adapter->tx_ring[i]);
682		adapter->tx_ring[i] = NULL;
683	}
684	for (i = 0; i < adapter->num_rx_queues; i++) {
685		kfree(adapter->rx_ring[i]);
686		adapter->rx_ring[i] = NULL;
687	}
688	adapter->num_rx_queues = 0;
689	adapter->num_tx_queues = 0;
690}
691
692/**
693 * igb_alloc_queues - Allocate memory for all rings
694 * @adapter: board private structure to initialize
695 *
696 * We allocate one ring per queue at run-time since we don't know the
697 * number of queues at compile-time.
698 **/
699static int igb_alloc_queues(struct igb_adapter *adapter)
700{
701	struct igb_ring *ring;
702	int i;
703	int orig_node = adapter->node;
704
705	for (i = 0; i < adapter->num_tx_queues; i++) {
706		if (orig_node == -1) {
707			int cur_node = next_online_node(adapter->node);
708			if (cur_node == MAX_NUMNODES)
709				cur_node = first_online_node;
710			adapter->node = cur_node;
711		}
712		ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL,
713				    adapter->node);
714		if (!ring)
715			ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
716		if (!ring)
717			goto err;
718		ring->count = adapter->tx_ring_count;
719		ring->queue_index = i;
720		ring->dev = &adapter->pdev->dev;
721		ring->netdev = adapter->netdev;
722		ring->numa_node = adapter->node;
723		/* For 82575, context index must be unique per ring. */
724		if (adapter->hw.mac.type == e1000_82575)
725			set_bit(IGB_RING_FLAG_TX_CTX_IDX, &ring->flags);
726		adapter->tx_ring[i] = ring;
727	}
728	/* Restore the adapter's original node */
729	adapter->node = orig_node;
730
731	for (i = 0; i < adapter->num_rx_queues; i++) {
732		if (orig_node == -1) {
733			int cur_node = next_online_node(adapter->node);
734			if (cur_node == MAX_NUMNODES)
735				cur_node = first_online_node;
736			adapter->node = cur_node;
737		}
738		ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL,
739				    adapter->node);
740		if (!ring)
741			ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
742		if (!ring)
743			goto err;
744		ring->count = adapter->rx_ring_count;
745		ring->queue_index = i;
746		ring->dev = &adapter->pdev->dev;
747		ring->netdev = adapter->netdev;
748		ring->numa_node = adapter->node;
749		/* set flag indicating ring supports SCTP checksum offload */
750		if (adapter->hw.mac.type >= e1000_82576)
751			set_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags);
752
753		/* On i350, loopback VLAN packets have the tag byte-swapped. */
754		if (adapter->hw.mac.type == e1000_i350)
755			set_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &ring->flags);
756
757		adapter->rx_ring[i] = ring;
758	}
759	/* Restore the adapter's original node */
760	adapter->node = orig_node;
761
762	igb_cache_ring_register(adapter);
763
764	return 0;
765
766err:
767	/* Restore the adapter's original node */
768	adapter->node = orig_node;
769	igb_free_queues(adapter);
770
771	return -ENOMEM;
772}
773
774/**
775 *  igb_write_ivar - configure ivar for given MSI-X vector
776 *  @hw: pointer to the HW structure
777 *  @msix_vector: vector number we are allocating to a given ring
778 *  @index: row index of IVAR register to write within IVAR table
779 *  @offset: column offset of in IVAR, should be multiple of 8
780 *
781 *  This function is intended to handle the writing of the IVAR register
782 *  for adapters 82576 and newer.  The IVAR table consists of 2 columns,
783 *  each containing an cause allocation for an Rx and Tx ring, and a
784 *  variable number of rows depending on the number of queues supported.
785 **/
786static void igb_write_ivar(struct e1000_hw *hw, int msix_vector,
787			   int index, int offset)
788{
789	u32 ivar = array_rd32(E1000_IVAR0, index);
790
791	/* clear any bits that are currently set */
792	ivar &= ~((u32)0xFF << offset);
793
794	/* write vector and valid bit */
795	ivar |= (msix_vector | E1000_IVAR_VALID) << offset;
796
797	array_wr32(E1000_IVAR0, index, ivar);
798}
799
800#define IGB_N0_QUEUE -1
801static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
802{
803	struct igb_adapter *adapter = q_vector->adapter;
804	struct e1000_hw *hw = &adapter->hw;
805	int rx_queue = IGB_N0_QUEUE;
806	int tx_queue = IGB_N0_QUEUE;
807	u32 msixbm = 0;
808
809	if (q_vector->rx.ring)
810		rx_queue = q_vector->rx.ring->reg_idx;
811	if (q_vector->tx.ring)
812		tx_queue = q_vector->tx.ring->reg_idx;
813
814	switch (hw->mac.type) {
815	case e1000_82575:
816		/* The 82575 assigns vectors using a bitmask, which matches the
817		   bitmask for the EICR/EIMS/EIMC registers.  To assign one
818		   or more queues to a vector, we write the appropriate bits
819		   into the MSIXBM register for that vector. */
820		if (rx_queue > IGB_N0_QUEUE)
821			msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
822		if (tx_queue > IGB_N0_QUEUE)
823			msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
824		if (!adapter->msix_entries && msix_vector == 0)
825			msixbm |= E1000_EIMS_OTHER;
826		array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
827		q_vector->eims_value = msixbm;
828		break;
829	case e1000_82576:
830		/*
831		 * 82576 uses a table that essentially consists of 2 columns
832		 * with 8 rows.  The ordering is column-major so we use the
833		 * lower 3 bits as the row index, and the 4th bit as the
834		 * column offset.
835		 */
836		if (rx_queue > IGB_N0_QUEUE)
837			igb_write_ivar(hw, msix_vector,
838				       rx_queue & 0x7,
839				       (rx_queue & 0x8) << 1);
840		if (tx_queue > IGB_N0_QUEUE)
841			igb_write_ivar(hw, msix_vector,
842				       tx_queue & 0x7,
843				       ((tx_queue & 0x8) << 1) + 8);
844		q_vector->eims_value = 1 << msix_vector;
845		break;
846	case e1000_82580:
847	case e1000_i350:
848		/*
849		 * On 82580 and newer adapters the scheme is similar to 82576
850		 * however instead of ordering column-major we have things
851		 * ordered row-major.  So we traverse the table by using
852		 * bit 0 as the column offset, and the remaining bits as the
853		 * row index.
854		 */
855		if (rx_queue > IGB_N0_QUEUE)
856			igb_write_ivar(hw, msix_vector,
857				       rx_queue >> 1,
858				       (rx_queue & 0x1) << 4);
859		if (tx_queue > IGB_N0_QUEUE)
860			igb_write_ivar(hw, msix_vector,
861				       tx_queue >> 1,
862				       ((tx_queue & 0x1) << 4) + 8);
863		q_vector->eims_value = 1 << msix_vector;
864		break;
865	default:
866		BUG();
867		break;
868	}
869
870	/* add q_vector eims value to global eims_enable_mask */
871	adapter->eims_enable_mask |= q_vector->eims_value;
872
873	/* configure q_vector to set itr on first interrupt */
874	q_vector->set_itr = 1;
875}
876
877/**
878 * igb_configure_msix - Configure MSI-X hardware
879 *
880 * igb_configure_msix sets up the hardware to properly
881 * generate MSI-X interrupts.
882 **/
883static void igb_configure_msix(struct igb_adapter *adapter)
884{
885	u32 tmp;
886	int i, vector = 0;
887	struct e1000_hw *hw = &adapter->hw;
888
889	adapter->eims_enable_mask = 0;
890
891	/* set vector for other causes, i.e. link changes */
892	switch (hw->mac.type) {
893	case e1000_82575:
894		tmp = rd32(E1000_CTRL_EXT);
895		/* enable MSI-X PBA support*/
896		tmp |= E1000_CTRL_EXT_PBA_CLR;
897
898		/* Auto-Mask interrupts upon ICR read. */
899		tmp |= E1000_CTRL_EXT_EIAME;
900		tmp |= E1000_CTRL_EXT_IRCA;
901
902		wr32(E1000_CTRL_EXT, tmp);
903
904		/* enable msix_other interrupt */
905		array_wr32(E1000_MSIXBM(0), vector++,
906		                      E1000_EIMS_OTHER);
907		adapter->eims_other = E1000_EIMS_OTHER;
908
909		break;
910
911	case e1000_82576:
912	case e1000_82580:
913	case e1000_i350:
914		/* Turn on MSI-X capability first, or our settings
915		 * won't stick.  And it will take days to debug. */
916		wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
917		                E1000_GPIE_PBA | E1000_GPIE_EIAME |
918		                E1000_GPIE_NSICR);
919
920		/* enable msix_other interrupt */
921		adapter->eims_other = 1 << vector;
922		tmp = (vector++ | E1000_IVAR_VALID) << 8;
923
924		wr32(E1000_IVAR_MISC, tmp);
925		break;
926	default:
927		/* do nothing, since nothing else supports MSI-X */
928		break;
929	} /* switch (hw->mac.type) */
930
931	adapter->eims_enable_mask |= adapter->eims_other;
932
933	for (i = 0; i < adapter->num_q_vectors; i++)
934		igb_assign_vector(adapter->q_vector[i], vector++);
935
936	wrfl();
937}
938
939/**
940 * igb_request_msix - Initialize MSI-X interrupts
941 *
942 * igb_request_msix allocates MSI-X vectors and requests interrupts from the
943 * kernel.
944 **/
945static int igb_request_msix(struct igb_adapter *adapter)
946{
947	struct net_device *netdev = adapter->netdev;
948	struct e1000_hw *hw = &adapter->hw;
949	int i, err = 0, vector = 0;
950
951	err = request_irq(adapter->msix_entries[vector].vector,
952	                  igb_msix_other, 0, netdev->name, adapter);
953	if (err)
954		goto out;
955	vector++;
956
957	for (i = 0; i < adapter->num_q_vectors; i++) {
958		struct igb_q_vector *q_vector = adapter->q_vector[i];
959
960		q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
961
962		if (q_vector->rx.ring && q_vector->tx.ring)
963			sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
964				q_vector->rx.ring->queue_index);
965		else if (q_vector->tx.ring)
966			sprintf(q_vector->name, "%s-tx-%u", netdev->name,
967				q_vector->tx.ring->queue_index);
968		else if (q_vector->rx.ring)
969			sprintf(q_vector->name, "%s-rx-%u", netdev->name,
970				q_vector->rx.ring->queue_index);
971		else
972			sprintf(q_vector->name, "%s-unused", netdev->name);
973
974		err = request_irq(adapter->msix_entries[vector].vector,
975		                  igb_msix_ring, 0, q_vector->name,
976		                  q_vector);
977		if (err)
978			goto out;
979		vector++;
980	}
981
982	igb_configure_msix(adapter);
983	return 0;
984out:
985	return err;
986}
987
988static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
989{
990	if (adapter->msix_entries) {
991		pci_disable_msix(adapter->pdev);
992		kfree(adapter->msix_entries);
993		adapter->msix_entries = NULL;
994	} else if (adapter->flags & IGB_FLAG_HAS_MSI) {
995		pci_disable_msi(adapter->pdev);
996	}
997}
998
999/**
1000 * igb_free_q_vectors - Free memory allocated for interrupt vectors
1001 * @adapter: board private structure to initialize
1002 *
1003 * This function frees the memory allocated to the q_vectors.  In addition if
1004 * NAPI is enabled it will delete any references to the NAPI struct prior
1005 * to freeing the q_vector.
1006 **/
1007static void igb_free_q_vectors(struct igb_adapter *adapter)
1008{
1009	int v_idx;
1010
1011	for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1012		struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1013		adapter->q_vector[v_idx] = NULL;
1014		if (!q_vector)
1015			continue;
1016		netif_napi_del(&q_vector->napi);
1017		kfree(q_vector);
1018	}
1019	adapter->num_q_vectors = 0;
1020}
1021
1022/**
1023 * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
1024 *
1025 * This function resets the device so that it has 0 rx queues, tx queues, and
1026 * MSI-X interrupts allocated.
1027 */
1028static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
1029{
1030	igb_free_queues(adapter);
1031	igb_free_q_vectors(adapter);
1032	igb_reset_interrupt_capability(adapter);
1033}
1034
1035/**
1036 * igb_set_interrupt_capability - set MSI or MSI-X if supported
1037 *
1038 * Attempt to configure interrupts using the best available
1039 * capabilities of the hardware and kernel.
1040 **/
1041static int igb_set_interrupt_capability(struct igb_adapter *adapter)
1042{
1043	int err;
1044	int numvecs, i;
1045
1046	/* Number of supported queues. */
1047	adapter->num_rx_queues = adapter->rss_queues;
1048	if (adapter->vfs_allocated_count)
1049		adapter->num_tx_queues = 1;
1050	else
1051		adapter->num_tx_queues = adapter->rss_queues;
1052
1053	/* start with one vector for every rx queue */
1054	numvecs = adapter->num_rx_queues;
1055
1056	/* if tx handler is separate add 1 for every tx queue */
1057	if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
1058		numvecs += adapter->num_tx_queues;
1059
1060	/* store the number of vectors reserved for queues */
1061	adapter->num_q_vectors = numvecs;
1062
1063	/* add 1 vector for link status interrupts */
1064	numvecs++;
1065	adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
1066					GFP_KERNEL);
1067	if (!adapter->msix_entries)
1068		goto msi_only;
1069
1070	for (i = 0; i < numvecs; i++)
1071		adapter->msix_entries[i].entry = i;
1072
1073	err = pci_enable_msix(adapter->pdev,
1074			      adapter->msix_entries,
1075			      numvecs);
1076	if (err == 0)
1077		goto out;
1078
1079	igb_reset_interrupt_capability(adapter);
1080
1081	/* If we can't do MSI-X, try MSI */
1082msi_only:
1083#ifdef CONFIG_PCI_IOV
1084	/* disable SR-IOV for non MSI-X configurations */
1085	if (adapter->vf_data) {
1086		struct e1000_hw *hw = &adapter->hw;
1087		/* disable iov and allow time for transactions to clear */
1088		pci_disable_sriov(adapter->pdev);
1089		msleep(500);
1090
1091		kfree(adapter->vf_data);
1092		adapter->vf_data = NULL;
1093		wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1094		wrfl();
1095		msleep(100);
1096		dev_info(&adapter->pdev->dev, "IOV Disabled\n");
1097	}
1098#endif
1099	adapter->vfs_allocated_count = 0;
1100	adapter->rss_queues = 1;
1101	adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1102	adapter->num_rx_queues = 1;
1103	adapter->num_tx_queues = 1;
1104	adapter->num_q_vectors = 1;
1105	if (!pci_enable_msi(adapter->pdev))
1106		adapter->flags |= IGB_FLAG_HAS_MSI;
1107out:
1108	/* Notify the stack of the (possibly) reduced queue counts. */
1109	netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues);
1110	return netif_set_real_num_rx_queues(adapter->netdev,
1111					    adapter->num_rx_queues);
1112}
1113
1114/**
1115 * igb_alloc_q_vectors - Allocate memory for interrupt vectors
1116 * @adapter: board private structure to initialize
1117 *
1118 * We allocate one q_vector per queue interrupt.  If allocation fails we
1119 * return -ENOMEM.
1120 **/
1121static int igb_alloc_q_vectors(struct igb_adapter *adapter)
1122{
1123	struct igb_q_vector *q_vector;
1124	struct e1000_hw *hw = &adapter->hw;
1125	int v_idx;
1126	int orig_node = adapter->node;
1127
1128	for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1129		if ((adapter->num_q_vectors == (adapter->num_rx_queues +
1130						adapter->num_tx_queues)) &&
1131		    (adapter->num_rx_queues == v_idx))
1132			adapter->node = orig_node;
1133		if (orig_node == -1) {
1134			int cur_node = next_online_node(adapter->node);
1135			if (cur_node == MAX_NUMNODES)
1136				cur_node = first_online_node;
1137			adapter->node = cur_node;
1138		}
1139		q_vector = kzalloc_node(sizeof(struct igb_q_vector), GFP_KERNEL,
1140					adapter->node);
1141		if (!q_vector)
1142			q_vector = kzalloc(sizeof(struct igb_q_vector),
1143					   GFP_KERNEL);
1144		if (!q_vector)
1145			goto err_out;
1146		q_vector->adapter = adapter;
1147		q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
1148		q_vector->itr_val = IGB_START_ITR;
1149		netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
1150		adapter->q_vector[v_idx] = q_vector;
1151	}
1152	/* Restore the adapter's original node */
1153	adapter->node = orig_node;
1154
1155	return 0;
1156
1157err_out:
1158	/* Restore the adapter's original node */
1159	adapter->node = orig_node;
1160	igb_free_q_vectors(adapter);
1161	return -ENOMEM;
1162}
1163
1164static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
1165                                      int ring_idx, int v_idx)
1166{
1167	struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1168
1169	q_vector->rx.ring = adapter->rx_ring[ring_idx];
1170	q_vector->rx.ring->q_vector = q_vector;
1171	q_vector->rx.count++;
1172	q_vector->itr_val = adapter->rx_itr_setting;
1173	if (q_vector->itr_val && q_vector->itr_val <= 3)
1174		q_vector->itr_val = IGB_START_ITR;
1175}
1176
1177static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
1178                                      int ring_idx, int v_idx)
1179{
1180	struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1181
1182	q_vector->tx.ring = adapter->tx_ring[ring_idx];
1183	q_vector->tx.ring->q_vector = q_vector;
1184	q_vector->tx.count++;
1185	q_vector->itr_val = adapter->tx_itr_setting;
1186	q_vector->tx.work_limit = adapter->tx_work_limit;
1187	if (q_vector->itr_val && q_vector->itr_val <= 3)
1188		q_vector->itr_val = IGB_START_ITR;
1189}
1190
1191/**
1192 * igb_map_ring_to_vector - maps allocated queues to vectors
1193 *
1194 * This function maps the recently allocated queues to vectors.
1195 **/
1196static int igb_map_ring_to_vector(struct igb_adapter *adapter)
1197{
1198	int i;
1199	int v_idx = 0;
1200
1201	if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
1202	    (adapter->num_q_vectors < adapter->num_tx_queues))
1203		return -ENOMEM;
1204
1205	if (adapter->num_q_vectors >=
1206	    (adapter->num_rx_queues + adapter->num_tx_queues)) {
1207		for (i = 0; i < adapter->num_rx_queues; i++)
1208			igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1209		for (i = 0; i < adapter->num_tx_queues; i++)
1210			igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1211	} else {
1212		for (i = 0; i < adapter->num_rx_queues; i++) {
1213			if (i < adapter->num_tx_queues)
1214				igb_map_tx_ring_to_vector(adapter, i, v_idx);
1215			igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1216		}
1217		for (; i < adapter->num_tx_queues; i++)
1218			igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1219	}
1220	return 0;
1221}
1222
1223/**
1224 * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
1225 *
1226 * This function initializes the interrupts and allocates all of the queues.
1227 **/
1228static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
1229{
1230	struct pci_dev *pdev = adapter->pdev;
1231	int err;
1232
1233	err = igb_set_interrupt_capability(adapter);
1234	if (err)
1235		return err;
1236
1237	err = igb_alloc_q_vectors(adapter);
1238	if (err) {
1239		dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
1240		goto err_alloc_q_vectors;
1241	}
1242
1243	err = igb_alloc_queues(adapter);
1244	if (err) {
1245		dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1246		goto err_alloc_queues;
1247	}
1248
1249	err = igb_map_ring_to_vector(adapter);
1250	if (err) {
1251		dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
1252		goto err_map_queues;
1253	}
1254
1255
1256	return 0;
1257err_map_queues:
1258	igb_free_queues(adapter);
1259err_alloc_queues:
1260	igb_free_q_vectors(adapter);
1261err_alloc_q_vectors:
1262	igb_reset_interrupt_capability(adapter);
1263	return err;
1264}
1265
1266/**
1267 * igb_request_irq - initialize interrupts
1268 *
1269 * Attempts to configure interrupts using the best available
1270 * capabilities of the hardware and kernel.
1271 **/
1272static int igb_request_irq(struct igb_adapter *adapter)
1273{
1274	struct net_device *netdev = adapter->netdev;
1275	struct pci_dev *pdev = adapter->pdev;
1276	int err = 0;
1277
1278	if (adapter->msix_entries) {
1279		err = igb_request_msix(adapter);
1280		if (!err)
1281			goto request_done;
1282		/* fall back to MSI */
1283		igb_clear_interrupt_scheme(adapter);
1284		if (!pci_enable_msi(pdev))
1285			adapter->flags |= IGB_FLAG_HAS_MSI;
1286		igb_free_all_tx_resources(adapter);
1287		igb_free_all_rx_resources(adapter);
1288		adapter->num_tx_queues = 1;
1289		adapter->num_rx_queues = 1;
1290		adapter->num_q_vectors = 1;
1291		err = igb_alloc_q_vectors(adapter);
1292		if (err) {
1293			dev_err(&pdev->dev,
1294			        "Unable to allocate memory for vectors\n");
1295			goto request_done;
1296		}
1297		err = igb_alloc_queues(adapter);
1298		if (err) {
1299			dev_err(&pdev->dev,
1300			        "Unable to allocate memory for queues\n");
1301			igb_free_q_vectors(adapter);
1302			goto request_done;
1303		}
1304		igb_setup_all_tx_resources(adapter);
1305		igb_setup_all_rx_resources(adapter);
1306	}
1307
1308	igb_assign_vector(adapter->q_vector[0], 0);
1309
1310	if (adapter->flags & IGB_FLAG_HAS_MSI) {
1311		err = request_irq(pdev->irq, igb_intr_msi, 0,
1312				  netdev->name, adapter);
1313		if (!err)
1314			goto request_done;
1315
1316		/* fall back to legacy interrupts */
1317		igb_reset_interrupt_capability(adapter);
1318		adapter->flags &= ~IGB_FLAG_HAS_MSI;
1319	}
1320
1321	err = request_irq(pdev->irq, igb_intr, IRQF_SHARED,
1322			  netdev->name, adapter);
1323
1324	if (err)
1325		dev_err(&pdev->dev, "Error %d getting interrupt\n",
1326			err);
1327
1328request_done:
1329	return err;
1330}
1331
1332static void igb_free_irq(struct igb_adapter *adapter)
1333{
1334	if (adapter->msix_entries) {
1335		int vector = 0, i;
1336
1337		free_irq(adapter->msix_entries[vector++].vector, adapter);
1338
1339		for (i = 0; i < adapter->num_q_vectors; i++)
1340			free_irq(adapter->msix_entries[vector++].vector,
1341				 adapter->q_vector[i]);
1342	} else {
1343		free_irq(adapter->pdev->irq, adapter);
1344	}
1345}
1346
1347/**
1348 * igb_irq_disable - Mask off interrupt generation on the NIC
1349 * @adapter: board private structure
1350 **/
1351static void igb_irq_disable(struct igb_adapter *adapter)
1352{
1353	struct e1000_hw *hw = &adapter->hw;
1354
1355	/*
1356	 * we need to be careful when disabling interrupts.  The VFs are also
1357	 * mapped into these registers and so clearing the bits can cause
1358	 * issues on the VF drivers so we only need to clear what we set
1359	 */
1360	if (adapter->msix_entries) {
1361		u32 regval = rd32(E1000_EIAM);
1362		wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
1363		wr32(E1000_EIMC, adapter->eims_enable_mask);
1364		regval = rd32(E1000_EIAC);
1365		wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
1366	}
1367
1368	wr32(E1000_IAM, 0);
1369	wr32(E1000_IMC, ~0);
1370	wrfl();
1371	if (adapter->msix_entries) {
1372		int i;
1373		for (i = 0; i < adapter->num_q_vectors; i++)
1374			synchronize_irq(adapter->msix_entries[i].vector);
1375	} else {
1376		synchronize_irq(adapter->pdev->irq);
1377	}
1378}
1379
1380/**
1381 * igb_irq_enable - Enable default interrupt generation settings
1382 * @adapter: board private structure
1383 **/
1384static void igb_irq_enable(struct igb_adapter *adapter)
1385{
1386	struct e1000_hw *hw = &adapter->hw;
1387
1388	if (adapter->msix_entries) {
1389		u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC | E1000_IMS_DRSTA;
1390		u32 regval = rd32(E1000_EIAC);
1391		wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
1392		regval = rd32(E1000_EIAM);
1393		wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
1394		wr32(E1000_EIMS, adapter->eims_enable_mask);
1395		if (adapter->vfs_allocated_count) {
1396			wr32(E1000_MBVFIMR, 0xFF);
1397			ims |= E1000_IMS_VMMB;
1398		}
1399		wr32(E1000_IMS, ims);
1400	} else {
1401		wr32(E1000_IMS, IMS_ENABLE_MASK |
1402				E1000_IMS_DRSTA);
1403		wr32(E1000_IAM, IMS_ENABLE_MASK |
1404				E1000_IMS_DRSTA);
1405	}
1406}
1407
1408static void igb_update_mng_vlan(struct igb_adapter *adapter)
1409{
1410	struct e1000_hw *hw = &adapter->hw;
1411	u16 vid = adapter->hw.mng_cookie.vlan_id;
1412	u16 old_vid = adapter->mng_vlan_id;
1413
1414	if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1415		/* add VID to filter table */
1416		igb_vfta_set(hw, vid, true);
1417		adapter->mng_vlan_id = vid;
1418	} else {
1419		adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1420	}
1421
1422	if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1423	    (vid != old_vid) &&
1424	    !test_bit(old_vid, adapter->active_vlans)) {
1425		/* remove VID from filter table */
1426		igb_vfta_set(hw, old_vid, false);
1427	}
1428}
1429
1430/**
1431 * igb_release_hw_control - release control of the h/w to f/w
1432 * @adapter: address of board private structure
1433 *
1434 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1435 * For ASF and Pass Through versions of f/w this means that the
1436 * driver is no longer loaded.
1437 *
1438 **/
1439static void igb_release_hw_control(struct igb_adapter *adapter)
1440{
1441	struct e1000_hw *hw = &adapter->hw;
1442	u32 ctrl_ext;
1443
1444	/* Let firmware take over control of h/w */
1445	ctrl_ext = rd32(E1000_CTRL_EXT);
1446	wr32(E1000_CTRL_EXT,
1447			ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1448}
1449
1450/**
1451 * igb_get_hw_control - get control of the h/w from f/w
1452 * @adapter: address of board private structure
1453 *
1454 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1455 * For ASF and Pass Through versions of f/w this means that
1456 * the driver is loaded.
1457 *
1458 **/
1459static void igb_get_hw_control(struct igb_adapter *adapter)
1460{
1461	struct e1000_hw *hw = &adapter->hw;
1462	u32 ctrl_ext;
1463
1464	/* Let firmware know the driver has taken over */
1465	ctrl_ext = rd32(E1000_CTRL_EXT);
1466	wr32(E1000_CTRL_EXT,
1467			ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1468}
1469
1470/**
1471 * igb_configure - configure the hardware for RX and TX
1472 * @adapter: private board structure
1473 **/
1474static void igb_configure(struct igb_adapter *adapter)
1475{
1476	struct net_device *netdev = adapter->netdev;
1477	int i;
1478
1479	igb_get_hw_control(adapter);
1480	igb_set_rx_mode(netdev);
1481
1482	igb_restore_vlan(adapter);
1483
1484	igb_setup_tctl(adapter);
1485	igb_setup_mrqc(adapter);
1486	igb_setup_rctl(adapter);
1487
1488	igb_configure_tx(adapter);
1489	igb_configure_rx(adapter);
1490
1491	igb_rx_fifo_flush_82575(&adapter->hw);
1492
1493	/* call igb_desc_unused which always leaves
1494	 * at least 1 descriptor unused to make sure
1495	 * next_to_use != next_to_clean */
1496	for (i = 0; i < adapter->num_rx_queues; i++) {
1497		struct igb_ring *ring = adapter->rx_ring[i];
1498		igb_alloc_rx_buffers(ring, igb_desc_unused(ring));
1499	}
1500}
1501
1502/**
1503 * igb_power_up_link - Power up the phy/serdes link
1504 * @adapter: address of board private structure
1505 **/
1506void igb_power_up_link(struct igb_adapter *adapter)
1507{
1508	if (adapter->hw.phy.media_type == e1000_media_type_copper)
1509		igb_power_up_phy_copper(&adapter->hw);
1510	else
1511		igb_power_up_serdes_link_82575(&adapter->hw);
1512	igb_reset_phy(&adapter->hw);
1513}
1514
1515/**
1516 * igb_power_down_link - Power down the phy/serdes link
1517 * @adapter: address of board private structure
1518 */
1519static void igb_power_down_link(struct igb_adapter *adapter)
1520{
1521	if (adapter->hw.phy.media_type == e1000_media_type_copper)
1522		igb_power_down_phy_copper_82575(&adapter->hw);
1523	else
1524		igb_shutdown_serdes_link_82575(&adapter->hw);
1525}
1526
1527/**
1528 * igb_up - Open the interface and prepare it to handle traffic
1529 * @adapter: board private structure
1530 **/
1531int igb_up(struct igb_adapter *adapter)
1532{
1533	struct e1000_hw *hw = &adapter->hw;
1534	int i;
1535
1536	/* hardware has been reset, we need to reload some things */
1537	igb_configure(adapter);
1538
1539	clear_bit(__IGB_DOWN, &adapter->state);
1540
1541	for (i = 0; i < adapter->num_q_vectors; i++)
1542		napi_enable(&(adapter->q_vector[i]->napi));
1543
1544	if (adapter->msix_entries)
1545		igb_configure_msix(adapter);
1546	else
1547		igb_assign_vector(adapter->q_vector[0], 0);
1548
1549	/* Clear any pending interrupts. */
1550	rd32(E1000_ICR);
1551	igb_irq_enable(adapter);
1552
1553	/* notify VFs that reset has been completed */
1554	if (adapter->vfs_allocated_count) {
1555		u32 reg_data = rd32(E1000_CTRL_EXT);
1556		reg_data |= E1000_CTRL_EXT_PFRSTD;
1557		wr32(E1000_CTRL_EXT, reg_data);
1558	}
1559
1560	netif_tx_start_all_queues(adapter->netdev);
1561
1562	/* start the watchdog. */
1563	hw->mac.get_link_status = 1;
1564	schedule_work(&adapter->watchdog_task);
1565
1566	return 0;
1567}
1568
1569void igb_down(struct igb_adapter *adapter)
1570{
1571	struct net_device *netdev = adapter->netdev;
1572	struct e1000_hw *hw = &adapter->hw;
1573	u32 tctl, rctl;
1574	int i;
1575
1576	/* signal that we're down so the interrupt handler does not
1577	 * reschedule our watchdog timer */
1578	set_bit(__IGB_DOWN, &adapter->state);
1579
1580	/* disable receives in the hardware */
1581	rctl = rd32(E1000_RCTL);
1582	wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1583	/* flush and sleep below */
1584
1585	netif_tx_stop_all_queues(netdev);
1586
1587	/* disable transmits in the hardware */
1588	tctl = rd32(E1000_TCTL);
1589	tctl &= ~E1000_TCTL_EN;
1590	wr32(E1000_TCTL, tctl);
1591	/* flush both disables and wait for them to finish */
1592	wrfl();
1593	msleep(10);
1594
1595	for (i = 0; i < adapter->num_q_vectors; i++)
1596		napi_disable(&(adapter->q_vector[i]->napi));
1597
1598	igb_irq_disable(adapter);
1599
1600	del_timer_sync(&adapter->watchdog_timer);
1601	del_timer_sync(&adapter->phy_info_timer);
1602
1603	netif_carrier_off(netdev);
1604
1605	/* record the stats before reset*/
1606	spin_lock(&adapter->stats64_lock);
1607	igb_update_stats(adapter, &adapter->stats64);
1608	spin_unlock(&adapter->stats64_lock);
1609
1610	adapter->link_speed = 0;
1611	adapter->link_duplex = 0;
1612
1613	if (!pci_channel_offline(adapter->pdev))
1614		igb_reset(adapter);
1615	igb_clean_all_tx_rings(adapter);
1616	igb_clean_all_rx_rings(adapter);
1617#ifdef CONFIG_IGB_DCA
1618
1619	/* since we reset the hardware DCA settings were cleared */
1620	igb_setup_dca(adapter);
1621#endif
1622}
1623
1624void igb_reinit_locked(struct igb_adapter *adapter)
1625{
1626	WARN_ON(in_interrupt());
1627	while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1628		msleep(1);
1629	igb_down(adapter);
1630	igb_up(adapter);
1631	clear_bit(__IGB_RESETTING, &adapter->state);
1632}
1633
1634void igb_reset(struct igb_adapter *adapter)
1635{
1636	struct pci_dev *pdev = adapter->pdev;
1637	struct e1000_hw *hw = &adapter->hw;
1638	struct e1000_mac_info *mac = &hw->mac;
1639	struct e1000_fc_info *fc = &hw->fc;
1640	u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1641	u16 hwm;
1642
1643	/* Repartition Pba for greater than 9k mtu
1644	 * To take effect CTRL.RST is required.
1645	 */
1646	switch (mac->type) {
1647	case e1000_i350:
1648	case e1000_82580:
1649		pba = rd32(E1000_RXPBS);
1650		pba = igb_rxpbs_adjust_82580(pba);
1651		break;
1652	case e1000_82576:
1653		pba = rd32(E1000_RXPBS);
1654		pba &= E1000_RXPBS_SIZE_MASK_82576;
1655		break;
1656	case e1000_82575:
1657	default:
1658		pba = E1000_PBA_34K;
1659		break;
1660	}
1661
1662	if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1663	    (mac->type < e1000_82576)) {
1664		/* adjust PBA for jumbo frames */
1665		wr32(E1000_PBA, pba);
1666
1667		/* To maintain wire speed transmits, the Tx FIFO should be
1668		 * large enough to accommodate two full transmit packets,
1669		 * rounded up to the next 1KB and expressed in KB.  Likewise,
1670		 * the Rx FIFO should be large enough to accommodate at least
1671		 * one full receive packet and is similarly rounded up and
1672		 * expressed in KB. */
1673		pba = rd32(E1000_PBA);
1674		/* upper 16 bits has Tx packet buffer allocation size in KB */
1675		tx_space = pba >> 16;
1676		/* lower 16 bits has Rx packet buffer allocation size in KB */
1677		pba &= 0xffff;
1678		/* the tx fifo also stores 16 bytes of information about the tx
1679		 * but don't include ethernet FCS because hardware appends it */
1680		min_tx_space = (adapter->max_frame_size +
1681				sizeof(union e1000_adv_tx_desc) -
1682				ETH_FCS_LEN) * 2;
1683		min_tx_space = ALIGN(min_tx_space, 1024);
1684		min_tx_space >>= 10;
1685		/* software strips receive CRC, so leave room for it */
1686		min_rx_space = adapter->max_frame_size;
1687		min_rx_space = ALIGN(min_rx_space, 1024);
1688		min_rx_space >>= 10;
1689
1690		/* If current Tx allocation is less than the min Tx FIFO size,
1691		 * and the min Tx FIFO size is less than the current Rx FIFO
1692		 * allocation, take space away from current Rx allocation */
1693		if (tx_space < min_tx_space &&
1694		    ((min_tx_space - tx_space) < pba)) {
1695			pba = pba - (min_tx_space - tx_space);
1696
1697			/* if short on rx space, rx wins and must trump tx
1698			 * adjustment */
1699			if (pba < min_rx_space)
1700				pba = min_rx_space;
1701		}
1702		wr32(E1000_PBA, pba);
1703	}
1704
1705	/* flow control settings */
1706	/* The high water mark must be low enough to fit one full frame
1707	 * (or the size used for early receive) above it in the Rx FIFO.
1708	 * Set it to the lower of:
1709	 * - 90% of the Rx FIFO size, or
1710	 * - the full Rx FIFO size minus one full frame */
1711	hwm = min(((pba << 10) * 9 / 10),
1712			((pba << 10) - 2 * adapter->max_frame_size));
1713
1714	fc->high_water = hwm & 0xFFF0;	/* 16-byte granularity */
1715	fc->low_water = fc->high_water - 16;
1716	fc->pause_time = 0xFFFF;
1717	fc->send_xon = 1;
1718	fc->current_mode = fc->requested_mode;
1719
1720	/* disable receive for all VFs and wait one second */
1721	if (adapter->vfs_allocated_count) {
1722		int i;
1723		for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1724			adapter->vf_data[i].flags &= IGB_VF_FLAG_PF_SET_MAC;
1725
1726		/* ping all the active vfs to let them know we are going down */
1727		igb_ping_all_vfs(adapter);
1728
1729		/* disable transmits and receives */
1730		wr32(E1000_VFRE, 0);
1731		wr32(E1000_VFTE, 0);
1732	}
1733
1734	/* Allow time for pending master requests to run */
1735	hw->mac.ops.reset_hw(hw);
1736	wr32(E1000_WUC, 0);
1737
1738	if (hw->mac.ops.init_hw(hw))
1739		dev_err(&pdev->dev, "Hardware Error\n");
1740
1741	igb_init_dmac(adapter, pba);
1742	if (!netif_running(adapter->netdev))
1743		igb_power_down_link(adapter);
1744
1745	igb_update_mng_vlan(adapter);
1746
1747	/* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1748	wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1749
1750	igb_get_phy_info(hw);
1751}
1752
1753static netdev_features_t igb_fix_features(struct net_device *netdev,
1754	netdev_features_t features)
1755{
1756	/*
1757	 * Since there is no support for separate rx/tx vlan accel
1758	 * enable/disable make sure tx flag is always in same state as rx.
1759	 */
1760	if (features & NETIF_F_HW_VLAN_RX)
1761		features |= NETIF_F_HW_VLAN_TX;
1762	else
1763		features &= ~NETIF_F_HW_VLAN_TX;
1764
1765	return features;
1766}
1767
1768static int igb_set_features(struct net_device *netdev,
1769	netdev_features_t features)
1770{
1771	netdev_features_t changed = netdev->features ^ features;
1772
1773	if (changed & NETIF_F_HW_VLAN_RX)
1774		igb_vlan_mode(netdev, features);
1775
1776	return 0;
1777}
1778
1779static const struct net_device_ops igb_netdev_ops = {
1780	.ndo_open		= igb_open,
1781	.ndo_stop		= igb_close,
1782	.ndo_start_xmit		= igb_xmit_frame,
1783	.ndo_get_stats64	= igb_get_stats64,
1784	.ndo_set_rx_mode	= igb_set_rx_mode,
1785	.ndo_set_mac_address	= igb_set_mac,
1786	.ndo_change_mtu		= igb_change_mtu,
1787	.ndo_do_ioctl		= igb_ioctl,
1788	.ndo_tx_timeout		= igb_tx_timeout,
1789	.ndo_validate_addr	= eth_validate_addr,
1790	.ndo_vlan_rx_add_vid	= igb_vlan_rx_add_vid,
1791	.ndo_vlan_rx_kill_vid	= igb_vlan_rx_kill_vid,
1792	.ndo_set_vf_mac		= igb_ndo_set_vf_mac,
1793	.ndo_set_vf_vlan	= igb_ndo_set_vf_vlan,
1794	.ndo_set_vf_tx_rate	= igb_ndo_set_vf_bw,
1795	.ndo_get_vf_config	= igb_ndo_get_vf_config,
1796#ifdef CONFIG_NET_POLL_CONTROLLER
1797	.ndo_poll_controller	= igb_netpoll,
1798#endif
1799	.ndo_fix_features	= igb_fix_features,
1800	.ndo_set_features	= igb_set_features,
1801};
1802
1803/**
1804 * igb_probe - Device Initialization Routine
1805 * @pdev: PCI device information struct
1806 * @ent: entry in igb_pci_tbl
1807 *
1808 * Returns 0 on success, negative on failure
1809 *
1810 * igb_probe initializes an adapter identified by a pci_dev structure.
1811 * The OS initialization, configuring of the adapter private structure,
1812 * and a hardware reset occur.
1813 **/
1814static int __devinit igb_probe(struct pci_dev *pdev,
1815			       const struct pci_device_id *ent)
1816{
1817	struct net_device *netdev;
1818	struct igb_adapter *adapter;
1819	struct e1000_hw *hw;
1820	u16 eeprom_data = 0;
1821	s32 ret_val;
1822	static int global_quad_port_a; /* global quad port a indication */
1823	const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1824	unsigned long mmio_start, mmio_len;
1825	int err, pci_using_dac;
1826	u16 eeprom_apme_mask = IGB_EEPROM_APME;
1827	u8 part_str[E1000_PBANUM_LENGTH];
1828
1829	/* Catch broken hardware that put the wrong VF device ID in
1830	 * the PCIe SR-IOV capability.
1831	 */
1832	if (pdev->is_virtfn) {
1833		WARN(1, KERN_ERR "%s (%hx:%hx) should not be a VF!\n",
1834		     pci_name(pdev), pdev->vendor, pdev->device);
1835		return -EINVAL;
1836	}
1837
1838	err = pci_enable_device_mem(pdev);
1839	if (err)
1840		return err;
1841
1842	pci_using_dac = 0;
1843	err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
1844	if (!err) {
1845		err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
1846		if (!err)
1847			pci_using_dac = 1;
1848	} else {
1849		err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
1850		if (err) {
1851			err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
1852			if (err) {
1853				dev_err(&pdev->dev, "No usable DMA "
1854					"configuration, aborting\n");
1855				goto err_dma;
1856			}
1857		}
1858	}
1859
1860	err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1861	                                   IORESOURCE_MEM),
1862	                                   igb_driver_name);
1863	if (err)
1864		goto err_pci_reg;
1865
1866	pci_enable_pcie_error_reporting(pdev);
1867
1868	pci_set_master(pdev);
1869	pci_save_state(pdev);
1870
1871	err = -ENOMEM;
1872	netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1873				   IGB_MAX_TX_QUEUES);
1874	if (!netdev)
1875		goto err_alloc_etherdev;
1876
1877	SET_NETDEV_DEV(netdev, &pdev->dev);
1878
1879	pci_set_drvdata(pdev, netdev);
1880	adapter = netdev_priv(netdev);
1881	adapter->netdev = netdev;
1882	adapter->pdev = pdev;
1883	hw = &adapter->hw;
1884	hw->back = adapter;
1885	adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE;
1886
1887	mmio_start = pci_resource_start(pdev, 0);
1888	mmio_len = pci_resource_len(pdev, 0);
1889
1890	err = -EIO;
1891	hw->hw_addr = ioremap(mmio_start, mmio_len);
1892	if (!hw->hw_addr)
1893		goto err_ioremap;
1894
1895	netdev->netdev_ops = &igb_netdev_ops;
1896	igb_set_ethtool_ops(netdev);
1897	netdev->watchdog_timeo = 5 * HZ;
1898
1899	strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1900
1901	netdev->mem_start = mmio_start;
1902	netdev->mem_end = mmio_start + mmio_len;
1903
1904	/* PCI config space info */
1905	hw->vendor_id = pdev->vendor;
1906	hw->device_id = pdev->device;
1907	hw->revision_id = pdev->revision;
1908	hw->subsystem_vendor_id = pdev->subsystem_vendor;
1909	hw->subsystem_device_id = pdev->subsystem_device;
1910
1911	/* Copy the default MAC, PHY and NVM function pointers */
1912	memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1913	memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1914	memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1915	/* Initialize skew-specific constants */
1916	err = ei->get_invariants(hw);
1917	if (err)
1918		goto err_sw_init;
1919
1920	/* setup the private structure */
1921	err = igb_sw_init(adapter);
1922	if (err)
1923		goto err_sw_init;
1924
1925	igb_get_bus_info_pcie(hw);
1926
1927	hw->phy.autoneg_wait_to_complete = false;
1928
1929	/* Copper options */
1930	if (hw->phy.media_type == e1000_media_type_copper) {
1931		hw->phy.mdix = AUTO_ALL_MODES;
1932		hw->phy.disable_polarity_correction = false;
1933		hw->phy.ms_type = e1000_ms_hw_default;
1934	}
1935
1936	if (igb_check_reset_block(hw))
1937		dev_info(&pdev->dev,
1938			"PHY reset is blocked due to SOL/IDER session.\n");
1939
1940	/*
1941	 * features is initialized to 0 in allocation, it might have bits
1942	 * set by igb_sw_init so we should use an or instead of an
1943	 * assignment.
1944	 */
1945	netdev->features |= NETIF_F_SG |
1946			    NETIF_F_IP_CSUM |
1947			    NETIF_F_IPV6_CSUM |
1948			    NETIF_F_TSO |
1949			    NETIF_F_TSO6 |
1950			    NETIF_F_RXHASH |
1951			    NETIF_F_RXCSUM |
1952			    NETIF_F_HW_VLAN_RX |
1953			    NETIF_F_HW_VLAN_TX;
1954
1955	/* copy netdev features into list of user selectable features */
1956	netdev->hw_features |= netdev->features;
1957
1958	/* set this bit last since it cannot be part of hw_features */
1959	netdev->features |= NETIF_F_HW_VLAN_FILTER;
1960
1961	netdev->vlan_features |= NETIF_F_TSO |
1962				 NETIF_F_TSO6 |
1963				 NETIF_F_IP_CSUM |
1964				 NETIF_F_IPV6_CSUM |
1965				 NETIF_F_SG;
1966
1967	if (pci_using_dac) {
1968		netdev->features |= NETIF_F_HIGHDMA;
1969		netdev->vlan_features |= NETIF_F_HIGHDMA;
1970	}
1971
1972	if (hw->mac.type >= e1000_82576) {
1973		netdev->hw_features |= NETIF_F_SCTP_CSUM;
1974		netdev->features |= NETIF_F_SCTP_CSUM;
1975	}
1976
1977	netdev->priv_flags |= IFF_UNICAST_FLT;
1978
1979	adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
1980
1981	/* before reading the NVM, reset the controller to put the device in a
1982	 * known good starting state */
1983	hw->mac.ops.reset_hw(hw);
1984
1985	/* make sure the NVM is good */
1986	if (hw->nvm.ops.validate(hw) < 0) {
1987		dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
1988		err = -EIO;
1989		goto err_eeprom;
1990	}
1991
1992	/* copy the MAC address out of the NVM */
1993	if (hw->mac.ops.read_mac_addr(hw))
1994		dev_err(&pdev->dev, "NVM Read Error\n");
1995
1996	memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
1997	memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
1998
1999	if (!is_valid_ether_addr(netdev->perm_addr)) {
2000		dev_err(&pdev->dev, "Invalid MAC Address\n");
2001		err = -EIO;
2002		goto err_eeprom;
2003	}
2004
2005	setup_timer(&adapter->watchdog_timer, igb_watchdog,
2006	            (unsigned long) adapter);
2007	setup_timer(&adapter->phy_info_timer, igb_update_phy_info,
2008	            (unsigned long) adapter);
2009
2010	INIT_WORK(&adapter->reset_task, igb_reset_task);
2011	INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
2012
2013	/* Initialize link properties that are user-changeable */
2014	adapter->fc_autoneg = true;
2015	hw->mac.autoneg = true;
2016	hw->phy.autoneg_advertised = 0x2f;
2017
2018	hw->fc.requested_mode = e1000_fc_default;
2019	hw->fc.current_mode = e1000_fc_default;
2020
2021	igb_validate_mdi_setting(hw);
2022
2023	/* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
2024	 * enable the ACPI Magic Packet filter
2025	 */
2026
2027	if (hw->bus.func == 0)
2028		hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
2029	else if (hw->mac.type >= e1000_82580)
2030		hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
2031		                 NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
2032		                 &eeprom_data);
2033	else if (hw->bus.func == 1)
2034		hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
2035
2036	if (eeprom_data & eeprom_apme_mask)
2037		adapter->eeprom_wol |= E1000_WUFC_MAG;
2038
2039	/* now that we have the eeprom settings, apply the special cases where
2040	 * the eeprom may be wrong or the board simply won't support wake on
2041	 * lan on a particular port */
2042	switch (pdev->device) {
2043	case E1000_DEV_ID_82575GB_QUAD_COPPER:
2044		adapter->eeprom_wol = 0;
2045		break;
2046	case E1000_DEV_ID_82575EB_FIBER_SERDES:
2047	case E1000_DEV_ID_82576_FIBER:
2048	case E1000_DEV_ID_82576_SERDES:
2049		/* Wake events only supported on port A for dual fiber
2050		 * regardless of eeprom setting */
2051		if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
2052			adapter->eeprom_wol = 0;
2053		break;
2054	case E1000_DEV_ID_82576_QUAD_COPPER:
2055	case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
2056		/* if quad port adapter, disable WoL on all but port A */
2057		if (global_quad_port_a != 0)
2058			adapter->eeprom_wol = 0;
2059		else
2060			adapter->flags |= IGB_FLAG_QUAD_PORT_A;
2061		/* Reset for multiple quad port adapters */
2062		if (++global_quad_port_a == 4)
2063			global_quad_port_a = 0;
2064		break;
2065	}
2066
2067	/* initialize the wol settings based on the eeprom settings */
2068	adapter->wol = adapter->eeprom_wol;
2069	device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
2070
2071	/* reset the hardware with the new settings */
2072	igb_reset(adapter);
2073
2074	/* let the f/w know that the h/w is now under the control of the
2075	 * driver. */
2076	igb_get_hw_control(adapter);
2077
2078	strcpy(netdev->name, "eth%d");
2079	err = register_netdev(netdev);
2080	if (err)
2081		goto err_register;
2082
2083	/* carrier off reporting is important to ethtool even BEFORE open */
2084	netif_carrier_off(netdev);
2085
2086#ifdef CONFIG_IGB_DCA
2087	if (dca_add_requester(&pdev->dev) == 0) {
2088		adapter->flags |= IGB_FLAG_DCA_ENABLED;
2089		dev_info(&pdev->dev, "DCA enabled\n");
2090		igb_setup_dca(adapter);
2091	}
2092
2093#endif
2094	/* do hw tstamp init after resetting */
2095	igb_init_hw_timer(adapter);
2096
2097	dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
2098	/* print bus type/speed/width info */
2099	dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
2100		 netdev->name,
2101		 ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
2102		  (hw->bus.speed == e1000_bus_speed_5000) ? "5.0Gb/s" :
2103		                                            "unknown"),
2104		 ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
2105		  (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
2106		  (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
2107		   "unknown"),
2108		 netdev->dev_addr);
2109
2110	ret_val = igb_read_part_string(hw, part_str, E1000_PBANUM_LENGTH);
2111	if (ret_val)
2112		strcpy(part_str, "Unknown");
2113	dev_info(&pdev->dev, "%s: PBA No: %s\n", netdev->name, part_str);
2114	dev_info(&pdev->dev,
2115		"Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
2116		adapter->msix_entries ? "MSI-X" :
2117		(adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
2118		adapter->num_rx_queues, adapter->num_tx_queues);
2119	switch (hw->mac.type) {
2120	case e1000_i350:
2121		igb_set_eee_i350(hw);
2122		break;
2123	default:
2124		break;
2125	}
2126
2127	pm_runtime_put_noidle(&pdev->dev);
2128	return 0;
2129
2130err_register:
2131	igb_release_hw_control(adapter);
2132err_eeprom:
2133	if (!igb_check_reset_block(hw))
2134		igb_reset_phy(hw);
2135
2136	if (hw->flash_address)
2137		iounmap(hw->flash_address);
2138err_sw_init:
2139	igb_clear_interrupt_scheme(adapter);
2140	iounmap(hw->hw_addr);
2141err_ioremap:
2142	free_netdev(netdev);
2143err_alloc_etherdev:
2144	pci_release_selected_regions(pdev,
2145	                             pci_select_bars(pdev, IORESOURCE_MEM));
2146err_pci_reg:
2147err_dma:
2148	pci_disable_device(pdev);
2149	return err;
2150}
2151
2152/**
2153 * igb_remove - Device Removal Routine
2154 * @pdev: PCI device information struct
2155 *
2156 * igb_remove is called by the PCI subsystem to alert the driver
2157 * that it should release a PCI device.  The could be caused by a
2158 * Hot-Plug event, or because the driver is going to be removed from
2159 * memory.
2160 **/
2161static void __devexit igb_remove(struct pci_dev *pdev)
2162{
2163	struct net_device *netdev = pci_get_drvdata(pdev);
2164	struct igb_adapter *adapter = netdev_priv(netdev);
2165	struct e1000_hw *hw = &adapter->hw;
2166
2167	pm_runtime_get_noresume(&pdev->dev);
2168
2169	/*
2170	 * The watchdog timer may be rescheduled, so explicitly
2171	 * disable watchdog from being rescheduled.
2172	 */
2173	set_bit(__IGB_DOWN, &adapter->state);
2174	del_timer_sync(&adapter->watchdog_timer);
2175	del_timer_sync(&adapter->phy_info_timer);
2176
2177	cancel_work_sync(&adapter->reset_task);
2178	cancel_work_sync(&adapter->watchdog_task);
2179
2180#ifdef CONFIG_IGB_DCA
2181	if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
2182		dev_info(&pdev->dev, "DCA disabled\n");
2183		dca_remove_requester(&pdev->dev);
2184		adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
2185		wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
2186	}
2187#endif
2188
2189	/* Release control of h/w to f/w.  If f/w is AMT enabled, this
2190	 * would have already happened in close and is redundant. */
2191	igb_release_hw_control(adapter);
2192
2193	unregister_netdev(netdev);
2194
2195	igb_clear_interrupt_scheme(adapter);
2196
2197#ifdef CONFIG_PCI_IOV
2198	/* reclaim resources allocated to VFs */
2199	if (adapter->vf_data) {
2200		/* disable iov and allow time for transactions to clear */
2201		if (!igb_check_vf_assignment(adapter)) {
2202			pci_disable_sriov(pdev);
2203			msleep(500);
2204		} else {
2205			dev_info(&pdev->dev, "VF(s) assigned to guests!\n");
2206		}
2207
2208		kfree(adapter->vf_data);
2209		adapter->vf_data = NULL;
2210		wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
2211		wrfl();
2212		msleep(100);
2213		dev_info(&pdev->dev, "IOV Disabled\n");
2214	}
2215#endif
2216
2217	iounmap(hw->hw_addr);
2218	if (hw->flash_address)
2219		iounmap(hw->flash_address);
2220	pci_release_selected_regions(pdev,
2221	                             pci_select_bars(pdev, IORESOURCE_MEM));
2222
2223	kfree(adapter->shadow_vfta);
2224	free_netdev(netdev);
2225
2226	pci_disable_pcie_error_reporting(pdev);
2227
2228	pci_disable_device(pdev);
2229}
2230
2231/**
2232 * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
2233 * @adapter: board private structure to initialize
2234 *
2235 * This function initializes the vf specific data storage and then attempts to
2236 * allocate the VFs.  The reason for ordering it this way is because it is much
2237 * mor expensive time wise to disable SR-IOV than it is to allocate and free
2238 * the memory for the VFs.
2239 **/
2240static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
2241{
2242#ifdef CONFIG_PCI_IOV
2243	struct pci_dev *pdev = adapter->pdev;
2244	int old_vfs = igb_find_enabled_vfs(adapter);
2245	int i;
2246
2247	if (old_vfs) {
2248		dev_info(&pdev->dev, "%d pre-allocated VFs found - override "
2249			 "max_vfs setting of %d\n", old_vfs, max_vfs);
2250		adapter->vfs_allocated_count = old_vfs;
2251	}
2252
2253	if (!adapter->vfs_allocated_count)
2254		return;
2255
2256	adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
2257				sizeof(struct vf_data_storage), GFP_KERNEL);
2258	/* if allocation failed then we do not support SR-IOV */
2259	if (!adapter->vf_data) {
2260		adapter->vfs_allocated_count = 0;
2261		dev_err(&pdev->dev, "Unable to allocate memory for VF "
2262			"Data Storage\n");
2263		goto out;
2264	}
2265
2266	if (!old_vfs) {
2267		if (pci_enable_sriov(pdev, adapter->vfs_allocated_count))
2268			goto err_out;
2269	}
2270	dev_info(&pdev->dev, "%d VFs allocated\n",
2271		 adapter->vfs_allocated_count);
2272	for (i = 0; i < adapter->vfs_allocated_count; i++)
2273		igb_vf_configure(adapter, i);
2274
2275	/* DMA Coalescing is not supported in IOV mode. */
2276	adapter->flags &= ~IGB_FLAG_DMAC;
2277	goto out;
2278err_out:
2279	kfree(adapter->vf_data);
2280	adapter->vf_data = NULL;
2281	adapter->vfs_allocated_count = 0;
2282out:
2283	return;
2284#endif /* CONFIG_PCI_IOV */
2285}
2286
2287/**
2288 * igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp
2289 * @adapter: board private structure to initialize
2290 *
2291 * igb_init_hw_timer initializes the function pointer and values for the hw
2292 * timer found in hardware.
2293 **/
2294static void igb_init_hw_timer(struct igb_adapter *adapter)
2295{
2296	struct e1000_hw *hw = &adapter->hw;
2297
2298	switch (hw->mac.type) {
2299	case e1000_i350:
2300	case e1000_82580:
2301		memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2302		adapter->cycles.read = igb_read_clock;
2303		adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2304		adapter->cycles.mult = 1;
2305		/*
2306		 * The 82580 timesync updates the system timer every 8ns by 8ns
2307		 * and the value cannot be shifted.  Instead we need to shift
2308		 * the registers to generate a 64bit timer value.  As a result
2309		 * SYSTIMR/L/H, TXSTMPL/H, RXSTMPL/H all have to be shifted by
2310		 * 24 in order to generate a larger value for synchronization.
2311		 */
2312		adapter->cycles.shift = IGB_82580_TSYNC_SHIFT;
2313		/* disable system timer temporarily by setting bit 31 */
2314		wr32(E1000_TSAUXC, 0x80000000);
2315		wrfl();
2316
2317		/* Set registers so that rollover occurs soon to test this. */
2318		wr32(E1000_SYSTIMR, 0x00000000);
2319		wr32(E1000_SYSTIML, 0x80000000);
2320		wr32(E1000_SYSTIMH, 0x000000FF);
2321		wrfl();
2322
2323		/* enable system timer by clearing bit 31 */
2324		wr32(E1000_TSAUXC, 0x0);
2325		wrfl();
2326
2327		timecounter_init(&adapter->clock,
2328				 &adapter->cycles,
2329				 ktime_to_ns(ktime_get_real()));
2330		/*
2331		 * Synchronize our NIC clock against system wall clock. NIC
2332		 * time stamp reading requires ~3us per sample, each sample
2333		 * was pretty stable even under load => only require 10
2334		 * samples for each offset comparison.
2335		 */
2336		memset(&adapter->compare, 0, sizeof(adapter->compare));
2337		adapter->compare.source = &adapter->clock;
2338		adapter->compare.target = ktime_get_real;
2339		adapter->compare.num_samples = 10;
2340		timecompare_update(&adapter->compare, 0);
2341		break;
2342	case e1000_82576:
2343		/*
2344		 * Initialize hardware timer: we keep it running just in case
2345		 * that some program needs it later on.
2346		 */
2347		memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2348		adapter->cycles.read = igb_read_clock;
2349		adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2350		adapter->cycles.mult = 1;
2351		/**
2352		 * Scale the NIC clock cycle by a large factor so that
2353		 * relatively small clock corrections can be added or
2354		 * subtracted at each clock tick. The drawbacks of a large
2355		 * factor are a) that the clock register overflows more quickly
2356		 * (not such a big deal) and b) that the increment per tick has
2357		 * to fit into 24 bits.  As a result we need to use a shift of
2358		 * 19 so we can fit a value of 16 into the TIMINCA register.
2359		 */
2360		adapter->cycles.shift = IGB_82576_TSYNC_SHIFT;
2361		wr32(E1000_TIMINCA,
2362		                (1 << E1000_TIMINCA_16NS_SHIFT) |
2363		                (16 << IGB_82576_TSYNC_SHIFT));
2364
2365		/* Set registers so that rollover occurs soon to test this. */
2366		wr32(E1000_SYSTIML, 0x00000000);
2367		wr32(E1000_SYSTIMH, 0xFF800000);
2368		wrfl();
2369
2370		timecounter_init(&adapter->clock,
2371				 &adapter->cycles,
2372				 ktime_to_ns(ktime_get_real()));
2373		/*
2374		 * Synchronize our NIC clock against system wall clock. NIC
2375		 * time stamp reading requires ~3us per sample, each sample
2376		 * was pretty stable even under load => only require 10
2377		 * samples for each offset comparison.
2378		 */
2379		memset(&adapter->compare, 0, sizeof(adapter->compare));
2380		adapter->compare.source = &adapter->clock;
2381		adapter->compare.target = ktime_get_real;
2382		adapter->compare.num_samples = 10;
2383		timecompare_update(&adapter->compare, 0);
2384		break;
2385	case e1000_82575:
2386		/* 82575 does not support timesync */
2387	default:
2388		break;
2389	}
2390
2391}
2392
2393/**
2394 * igb_sw_init - Initialize general software structures (struct igb_adapter)
2395 * @adapter: board private structure to initialize
2396 *
2397 * igb_sw_init initializes the Adapter private data structure.
2398 * Fields are initialized based on PCI device information and
2399 * OS network device settings (MTU size).
2400 **/
2401static int __devinit igb_sw_init(struct igb_adapter *adapter)
2402{
2403	struct e1000_hw *hw = &adapter->hw;
2404	struct net_device *netdev = adapter->netdev;
2405	struct pci_dev *pdev = adapter->pdev;
2406
2407	pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
2408
2409	/* set default ring sizes */
2410	adapter->tx_ring_count = IGB_DEFAULT_TXD;
2411	adapter->rx_ring_count = IGB_DEFAULT_RXD;
2412
2413	/* set default ITR values */
2414	adapter->rx_itr_setting = IGB_DEFAULT_ITR;
2415	adapter->tx_itr_setting = IGB_DEFAULT_ITR;
2416
2417	/* set default work limits */
2418	adapter->tx_work_limit = IGB_DEFAULT_TX_WORK;
2419
2420	adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN +
2421				  VLAN_HLEN;
2422	adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
2423
2424	adapter->node = -1;
2425
2426	spin_lock_init(&adapter->stats64_lock);
2427#ifdef CONFIG_PCI_IOV
2428	switch (hw->mac.type) {
2429	case e1000_82576:
2430	case e1000_i350:
2431		if (max_vfs > 7) {
2432			dev_warn(&pdev->dev,
2433				 "Maximum of 7 VFs per PF, using max\n");
2434			adapter->vfs_allocated_count = 7;
2435		} else
2436			adapter->vfs_allocated_count = max_vfs;
2437		break;
2438	default:
2439		break;
2440	}
2441#endif /* CONFIG_PCI_IOV */
2442	adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
2443	/* i350 cannot do RSS and SR-IOV at the same time */
2444	if (hw->mac.type == e1000_i350 && adapter->vfs_allocated_count)
2445		adapter->rss_queues = 1;
2446
2447	/*
2448	 * if rss_queues > 4 or vfs are going to be allocated with rss_queues
2449	 * then we should combine the queues into a queue pair in order to
2450	 * conserve interrupts due to limited supply
2451	 */
2452	if ((adapter->rss_queues > 4) ||
2453	    ((adapter->rss_queues > 1) && (adapter->vfs_allocated_count > 6)))
2454		adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
2455
2456	/* Setup and initialize a copy of the hw vlan table array */
2457	adapter->shadow_vfta = kzalloc(sizeof(u32) *
2458				E1000_VLAN_FILTER_TBL_SIZE,
2459				GFP_ATOMIC);
2460
2461	/* This call may decrease the number of queues */
2462	if (igb_init_interrupt_scheme(adapter)) {
2463		dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
2464		return -ENOMEM;
2465	}
2466
2467	igb_probe_vfs(adapter);
2468
2469	/* Explicitly disable IRQ since the NIC can be in any state. */
2470	igb_irq_disable(adapter);
2471
2472	if (hw->mac.type == e1000_i350)
2473		adapter->flags &= ~IGB_FLAG_DMAC;
2474
2475	set_bit(__IGB_DOWN, &adapter->state);
2476	return 0;
2477}
2478
2479/**
2480 * igb_open - Called when a network interface is made active
2481 * @netdev: network interface device structure
2482 *
2483 * Returns 0 on success, negative value on failure
2484 *
2485 * The open entry point is called when a network interface is made
2486 * active by the system (IFF_UP).  At this point all resources needed
2487 * for transmit and receive operations are allocated, the interrupt
2488 * handler is registered with the OS, the watchdog timer is started,
2489 * and the stack is notified that the interface is ready.
2490 **/
2491static int __igb_open(struct net_device *netdev, bool resuming)
2492{
2493	struct igb_adapter *adapter = netdev_priv(netdev);
2494	struct e1000_hw *hw = &adapter->hw;
2495	struct pci_dev *pdev = adapter->pdev;
2496	int err;
2497	int i;
2498
2499	/* disallow open during test */
2500	if (test_bit(__IGB_TESTING, &adapter->state)) {
2501		WARN_ON(resuming);
2502		return -EBUSY;
2503	}
2504
2505	if (!resuming)
2506		pm_runtime_get_sync(&pdev->dev);
2507
2508	netif_carrier_off(netdev);
2509
2510	/* allocate transmit descriptors */
2511	err = igb_setup_all_tx_resources(adapter);
2512	if (err)
2513		goto err_setup_tx;
2514
2515	/* allocate receive descriptors */
2516	err = igb_setup_all_rx_resources(adapter);
2517	if (err)
2518		goto err_setup_rx;
2519
2520	igb_power_up_link(adapter);
2521
2522	/* before we allocate an interrupt, we must be ready to handle it.
2523	 * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2524	 * as soon as we call pci_request_irq, so we have to setup our
2525	 * clean_rx handler before we do so.  */
2526	igb_configure(adapter);
2527
2528	err = igb_request_irq(adapter);
2529	if (err)
2530		goto err_req_irq;
2531
2532	/* From here on the code is the same as igb_up() */
2533	clear_bit(__IGB_DOWN, &adapter->state);
2534
2535	for (i = 0; i < adapter->num_q_vectors; i++)
2536		napi_enable(&(adapter->q_vector[i]->napi));
2537
2538	/* Clear any pending interrupts. */
2539	rd32(E1000_ICR);
2540
2541	igb_irq_enable(adapter);
2542
2543	/* notify VFs that reset has been completed */
2544	if (adapter->vfs_allocated_count) {
2545		u32 reg_data = rd32(E1000_CTRL_EXT);
2546		reg_data |= E1000_CTRL_EXT_PFRSTD;
2547		wr32(E1000_CTRL_EXT, reg_data);
2548	}
2549
2550	netif_tx_start_all_queues(netdev);
2551
2552	if (!resuming)
2553		pm_runtime_put(&pdev->dev);
2554
2555	/* start the watchdog. */
2556	hw->mac.get_link_status = 1;
2557	schedule_work(&adapter->watchdog_task);
2558
2559	return 0;
2560
2561err_req_irq:
2562	igb_release_hw_control(adapter);
2563	igb_power_down_link(adapter);
2564	igb_free_all_rx_resources(adapter);
2565err_setup_rx:
2566	igb_free_all_tx_resources(adapter);
2567err_setup_tx:
2568	igb_reset(adapter);
2569	if (!resuming)
2570		pm_runtime_put(&pdev->dev);
2571
2572	return err;
2573}
2574
2575static int igb_open(struct net_device *netdev)
2576{
2577	return __igb_open(netdev, false);
2578}
2579
2580/**
2581 * igb_close - Disables a network interface
2582 * @netdev: network interface device structure
2583 *
2584 * Returns 0, this is not allowed to fail
2585 *
2586 * The close entry point is called when an interface is de-activated
2587 * by the OS.  The hardware is still under the driver's control, but
2588 * needs to be disabled.  A global MAC reset is issued to stop the
2589 * hardware, and all transmit and receive resources are freed.
2590 **/
2591static int __igb_close(struct net_device *netdev, bool suspending)
2592{
2593	struct igb_adapter *adapter = netdev_priv(netdev);
2594	struct pci_dev *pdev = adapter->pdev;
2595
2596	WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
2597
2598	if (!suspending)
2599		pm_runtime_get_sync(&pdev->dev);
2600
2601	igb_down(adapter);
2602	igb_free_irq(adapter);
2603
2604	igb_free_all_tx_resources(adapter);
2605	igb_free_all_rx_resources(adapter);
2606
2607	if (!suspending)
2608		pm_runtime_put_sync(&pdev->dev);
2609	return 0;
2610}
2611
2612static int igb_close(struct net_device *netdev)
2613{
2614	return __igb_close(netdev, false);
2615}
2616
2617/**
2618 * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2619 * @tx_ring: tx descriptor ring (for a specific queue) to setup
2620 *
2621 * Return 0 on success, negative on failure
2622 **/
2623int igb_setup_tx_resources(struct igb_ring *tx_ring)
2624{
2625	struct device *dev = tx_ring->dev;
2626	int orig_node = dev_to_node(dev);
2627	int size;
2628
2629	size = sizeof(struct igb_tx_buffer) * tx_ring->count;
2630	tx_ring->tx_buffer_info = vzalloc_node(size, tx_ring->numa_node);
2631	if (!tx_ring->tx_buffer_info)
2632		tx_ring->tx_buffer_info = vzalloc(size);
2633	if (!tx_ring->tx_buffer_info)
2634		goto err;
2635
2636	/* round up to nearest 4K */
2637	tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2638	tx_ring->size = ALIGN(tx_ring->size, 4096);
2639
2640	set_dev_node(dev, tx_ring->numa_node);
2641	tx_ring->desc = dma_alloc_coherent(dev,
2642					   tx_ring->size,
2643					   &tx_ring->dma,
2644					   GFP_KERNEL);
2645	set_dev_node(dev, orig_node);
2646	if (!tx_ring->desc)
2647		tx_ring->desc = dma_alloc_coherent(dev,
2648						   tx_ring->size,
2649						   &tx_ring->dma,
2650						   GFP_KERNEL);
2651
2652	if (!tx_ring->desc)
2653		goto err;
2654
2655	tx_ring->next_to_use = 0;
2656	tx_ring->next_to_clean = 0;
2657
2658	return 0;
2659
2660err:
2661	vfree(tx_ring->tx_buffer_info);
2662	dev_err(dev,
2663		"Unable to allocate memory for the transmit descriptor ring\n");
2664	return -ENOMEM;
2665}
2666
2667/**
2668 * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2669 *				  (Descriptors) for all queues
2670 * @adapter: board private structure
2671 *
2672 * Return 0 on success, negative on failure
2673 **/
2674static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2675{
2676	struct pci_dev *pdev = adapter->pdev;
2677	int i, err = 0;
2678
2679	for (i = 0; i < adapter->num_tx_queues; i++) {
2680		err = igb_setup_tx_resources(adapter->tx_ring[i]);
2681		if (err) {
2682			dev_err(&pdev->dev,
2683				"Allocation for Tx Queue %u failed\n", i);
2684			for (i--; i >= 0; i--)
2685				igb_free_tx_resources(adapter->tx_ring[i]);
2686			break;
2687		}
2688	}
2689
2690	return err;
2691}
2692
2693/**
2694 * igb_setup_tctl - configure the transmit control registers
2695 * @adapter: Board private structure
2696 **/
2697void igb_setup_tctl(struct igb_adapter *adapter)
2698{
2699	struct e1000_hw *hw = &adapter->hw;
2700	u32 tctl;
2701
2702	/* disable queue 0 which is enabled by default on 82575 and 82576 */
2703	wr32(E1000_TXDCTL(0), 0);
2704
2705	/* Program the Transmit Control Register */
2706	tctl = rd32(E1000_TCTL);
2707	tctl &= ~E1000_TCTL_CT;
2708	tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2709		(E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2710
2711	igb_config_collision_dist(hw);
2712
2713	/* Enable transmits */
2714	tctl |= E1000_TCTL_EN;
2715
2716	wr32(E1000_TCTL, tctl);
2717}
2718
2719/**
2720 * igb_configure_tx_ring - Configure transmit ring after Reset
2721 * @adapter: board private structure
2722 * @ring: tx ring to configure
2723 *
2724 * Configure a transmit ring after a reset.
2725 **/
2726void igb_configure_tx_ring(struct igb_adapter *adapter,
2727                           struct igb_ring *ring)
2728{
2729	struct e1000_hw *hw = &adapter->hw;
2730	u32 txdctl = 0;
2731	u64 tdba = ring->dma;
2732	int reg_idx = ring->reg_idx;
2733
2734	/* disable the queue */
2735	wr32(E1000_TXDCTL(reg_idx), 0);
2736	wrfl();
2737	mdelay(10);
2738
2739	wr32(E1000_TDLEN(reg_idx),
2740	                ring->count * sizeof(union e1000_adv_tx_desc));
2741	wr32(E1000_TDBAL(reg_idx),
2742	                tdba & 0x00000000ffffffffULL);
2743	wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2744
2745	ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2746	wr32(E1000_TDH(reg_idx), 0);
2747	writel(0, ring->tail);
2748
2749	txdctl |= IGB_TX_PTHRESH;
2750	txdctl |= IGB_TX_HTHRESH << 8;
2751	txdctl |= IGB_TX_WTHRESH << 16;
2752
2753	txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2754	wr32(E1000_TXDCTL(reg_idx), txdctl);
2755}
2756
2757/**
2758 * igb_configure_tx - Configure transmit Unit after Reset
2759 * @adapter: board private structure
2760 *
2761 * Configure the Tx unit of the MAC after a reset.
2762 **/
2763static void igb_configure_tx(struct igb_adapter *adapter)
2764{
2765	int i;
2766
2767	for (i = 0; i < adapter->num_tx_queues; i++)
2768		igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
2769}
2770
2771/**
2772 * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2773 * @rx_ring:    rx descriptor ring (for a specific queue) to setup
2774 *
2775 * Returns 0 on success, negative on failure
2776 **/
2777int igb_setup_rx_resources(struct igb_ring *rx_ring)
2778{
2779	struct device *dev = rx_ring->dev;
2780	int orig_node = dev_to_node(dev);
2781	int size, desc_len;
2782
2783	size = sizeof(struct igb_rx_buffer) * rx_ring->count;
2784	rx_ring->rx_buffer_info = vzalloc_node(size, rx_ring->numa_node);
2785	if (!rx_ring->rx_buffer_info)
2786		rx_ring->rx_buffer_info = vzalloc(size);
2787	if (!rx_ring->rx_buffer_info)
2788		goto err;
2789
2790	desc_len = sizeof(union e1000_adv_rx_desc);
2791
2792	/* Round up to nearest 4K */
2793	rx_ring->size = rx_ring->count * desc_len;
2794	rx_ring->size = ALIGN(rx_ring->size, 4096);
2795
2796	set_dev_node(dev, rx_ring->numa_node);
2797	rx_ring->desc = dma_alloc_coherent(dev,
2798					   rx_ring->size,
2799					   &rx_ring->dma,
2800					   GFP_KERNEL);
2801	set_dev_node(dev, orig_node);
2802	if (!rx_ring->desc)
2803		rx_ring->desc = dma_alloc_coherent(dev,
2804						   rx_ring->size,
2805						   &rx_ring->dma,
2806						   GFP_KERNEL);
2807
2808	if (!rx_ring->desc)
2809		goto err;
2810
2811	rx_ring->next_to_clean = 0;
2812	rx_ring->next_to_use = 0;
2813
2814	return 0;
2815
2816err:
2817	vfree(rx_ring->rx_buffer_info);
2818	rx_ring->rx_buffer_info = NULL;
2819	dev_err(dev, "Unable to allocate memory for the receive descriptor"
2820		" ring\n");
2821	return -ENOMEM;
2822}
2823
2824/**
2825 * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2826 *				  (Descriptors) for all queues
2827 * @adapter: board private structure
2828 *
2829 * Return 0 on success, negative on failure
2830 **/
2831static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2832{
2833	struct pci_dev *pdev = adapter->pdev;
2834	int i, err = 0;
2835
2836	for (i = 0; i < adapter->num_rx_queues; i++) {
2837		err = igb_setup_rx_resources(adapter->rx_ring[i]);
2838		if (err) {
2839			dev_err(&pdev->dev,
2840				"Allocation for Rx Queue %u failed\n", i);
2841			for (i--; i >= 0; i--)
2842				igb_free_rx_resources(adapter->rx_ring[i]);
2843			break;
2844		}
2845	}
2846
2847	return err;
2848}
2849
2850/**
2851 * igb_setup_mrqc - configure the multiple receive queue control registers
2852 * @adapter: Board private structure
2853 **/
2854static void igb_setup_mrqc(struct igb_adapter *adapter)
2855{
2856	struct e1000_hw *hw = &adapter->hw;
2857	u32 mrqc, rxcsum;
2858	u32 j, num_rx_queues, shift = 0, shift2 = 0;
2859	union e1000_reta {
2860		u32 dword;
2861		u8  bytes[4];
2862	} reta;
2863	static const u8 rsshash[40] = {
2864		0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2865		0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2866		0xae, 0x7b, 0x30, 0xb4,	0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2867		0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2868
2869	/* Fill out hash function seeds */
2870	for (j = 0; j < 10; j++) {
2871		u32 rsskey = rsshash[(j * 4)];
2872		rsskey |= rsshash[(j * 4) + 1] << 8;
2873		rsskey |= rsshash[(j * 4) + 2] << 16;
2874		rsskey |= rsshash[(j * 4) + 3] << 24;
2875		array_wr32(E1000_RSSRK(0), j, rsskey);
2876	}
2877
2878	num_rx_queues = adapter->rss_queues;
2879
2880	if (adapter->vfs_allocated_count) {
2881		/* 82575 and 82576 supports 2 RSS queues for VMDq */
2882		switch (hw->mac.type) {
2883		case e1000_i350:
2884		case e1000_82580:
2885			num_rx_queues = 1;
2886			shift = 0;
2887			break;
2888		case e1000_82576:
2889			shift = 3;
2890			num_rx_queues = 2;
2891			break;
2892		case e1000_82575:
2893			shift = 2;
2894			shift2 = 6;
2895		default:
2896			break;
2897		}
2898	} else {
2899		if (hw->mac.type == e1000_82575)
2900			shift = 6;
2901	}
2902
2903	for (j = 0; j < (32 * 4); j++) {
2904		reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2905		if (shift2)
2906			reta.bytes[j & 3] |= num_rx_queues << shift2;
2907		if ((j & 3) == 3)
2908			wr32(E1000_RETA(j >> 2), reta.dword);
2909	}
2910
2911	/*
2912	 * Disable raw packet checksumming so that RSS hash is placed in
2913	 * descriptor on writeback.  No need to enable TCP/UDP/IP checksum
2914	 * offloads as they are enabled by default
2915	 */
2916	rxcsum = rd32(E1000_RXCSUM);
2917	rxcsum |= E1000_RXCSUM_PCSD;
2918
2919	if (adapter->hw.mac.type >= e1000_82576)
2920		/* Enable Receive Checksum Offload for SCTP */
2921		rxcsum |= E1000_RXCSUM_CRCOFL;
2922
2923	/* Don't need to set TUOFL or IPOFL, they default to 1 */
2924	wr32(E1000_RXCSUM, rxcsum);
2925
2926	/* If VMDq is enabled then we set the appropriate mode for that, else
2927	 * we default to RSS so that an RSS hash is calculated per packet even
2928	 * if we are only using one queue */
2929	if (adapter->vfs_allocated_count) {
2930		if (hw->mac.type > e1000_82575) {
2931			/* Set the default pool for the PF's first queue */
2932			u32 vtctl = rd32(E1000_VT_CTL);
2933			vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2934				   E1000_VT_CTL_DISABLE_DEF_POOL);
2935			vtctl |= adapter->vfs_allocated_count <<
2936				E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2937			wr32(E1000_VT_CTL, vtctl);
2938		}
2939		if (adapter->rss_queues > 1)
2940			mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2941		else
2942			mrqc = E1000_MRQC_ENABLE_VMDQ;
2943	} else {
2944		mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2945	}
2946	igb_vmm_control(adapter);
2947
2948	/*
2949	 * Generate RSS hash based on TCP port numbers and/or
2950	 * IPv4/v6 src and dst addresses since UDP cannot be
2951	 * hashed reliably due to IP fragmentation
2952	 */
2953	mrqc |= E1000_MRQC_RSS_FIELD_IPV4 |
2954		E1000_MRQC_RSS_FIELD_IPV4_TCP |
2955		E1000_MRQC_RSS_FIELD_IPV6 |
2956		E1000_MRQC_RSS_FIELD_IPV6_TCP |
2957		E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
2958
2959	wr32(E1000_MRQC, mrqc);
2960}
2961
2962/**
2963 * igb_setup_rctl - configure the receive control registers
2964 * @adapter: Board private structure
2965 **/
2966void igb_setup_rctl(struct igb_adapter *adapter)
2967{
2968	struct e1000_hw *hw = &adapter->hw;
2969	u32 rctl;
2970
2971	rctl = rd32(E1000_RCTL);
2972
2973	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2974	rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2975
2976	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2977		(hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2978
2979	/*
2980	 * enable stripping of CRC. It's unlikely this will break BMC
2981	 * redirection as it did with e1000. Newer features require
2982	 * that the HW strips the CRC.
2983	 */
2984	rctl |= E1000_RCTL_SECRC;
2985
2986	/* disable store bad packets and clear size bits. */
2987	rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
2988
2989	/* enable LPE to prevent packets larger than max_frame_size */
2990	rctl |= E1000_RCTL_LPE;
2991
2992	/* disable queue 0 to prevent tail write w/o re-config */
2993	wr32(E1000_RXDCTL(0), 0);
2994
2995	/* Attention!!!  For SR-IOV PF driver operations you must enable
2996	 * queue drop for all VF and PF queues to prevent head of line blocking
2997	 * if an un-trusted VF does not provide descriptors to hardware.
2998	 */
2999	if (adapter->vfs_allocated_count) {
3000		/* set all queue drop enable bits */
3001		wr32(E1000_QDE, ALL_QUEUES);
3002	}
3003
3004	wr32(E1000_RCTL, rctl);
3005}
3006
3007static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
3008                                   int vfn)
3009{
3010	struct e1000_hw *hw = &adapter->hw;
3011	u32 vmolr;
3012
3013	/* if it isn't the PF check to see if VFs are enabled and
3014	 * increase the size to support vlan tags */
3015	if (vfn < adapter->vfs_allocated_count &&
3016	    adapter->vf_data[vfn].vlans_enabled)
3017		size += VLAN_TAG_SIZE;
3018
3019	vmolr = rd32(E1000_VMOLR(vfn));
3020	vmolr &= ~E1000_VMOLR_RLPML_MASK;
3021	vmolr |= size | E1000_VMOLR_LPE;
3022	wr32(E1000_VMOLR(vfn), vmolr);
3023
3024	return 0;
3025}
3026
3027/**
3028 * igb_rlpml_set - set maximum receive packet size
3029 * @adapter: board private structure
3030 *
3031 * Configure maximum receivable packet size.
3032 **/
3033static void igb_rlpml_set(struct igb_adapter *adapter)
3034{
3035	u32 max_frame_size = adapter->max_frame_size;
3036	struct e1000_hw *hw = &adapter->hw;
3037	u16 pf_id = adapter->vfs_allocated_count;
3038
3039	if (pf_id) {
3040		igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
3041		/*
3042		 * If we're in VMDQ or SR-IOV mode, then set global RLPML
3043		 * to our max jumbo frame size, in case we need to enable
3044		 * jumbo frames on one of the rings later.
3045		 * This will not pass over-length frames into the default
3046		 * queue because it's gated by the VMOLR.RLPML.
3047		 */
3048		max_frame_size = MAX_JUMBO_FRAME_SIZE;
3049	}
3050
3051	wr32(E1000_RLPML, max_frame_size);
3052}
3053
3054static inline void igb_set_vmolr(struct igb_adapter *adapter,
3055				 int vfn, bool aupe)
3056{
3057	struct e1000_hw *hw = &adapter->hw;
3058	u32 vmolr;
3059
3060	/*
3061	 * This register exists only on 82576 and newer so if we are older then
3062	 * we should exit and do nothing
3063	 */
3064	if (hw->mac.type < e1000_82576)
3065		return;
3066
3067	vmolr = rd32(E1000_VMOLR(vfn));
3068	vmolr |= E1000_VMOLR_STRVLAN;      /* Strip vlan tags */
3069	if (aupe)
3070		vmolr |= E1000_VMOLR_AUPE;        /* Accept untagged packets */
3071	else
3072		vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
3073
3074	/* clear all bits that might not be set */
3075	vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
3076
3077	if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
3078		vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
3079	/*
3080	 * for VMDq only allow the VFs and pool 0 to accept broadcast and
3081	 * multicast packets
3082	 */
3083	if (vfn <= adapter->vfs_allocated_count)
3084		vmolr |= E1000_VMOLR_BAM;	   /* Accept broadcast */
3085
3086	wr32(E1000_VMOLR(vfn), vmolr);
3087}
3088
3089/**
3090 * igb_configure_rx_ring - Configure a receive ring after Reset
3091 * @adapter: board private structure
3092 * @ring: receive ring to be configured
3093 *
3094 * Configure the Rx unit of the MAC after a reset.
3095 **/
3096void igb_configure_rx_ring(struct igb_adapter *adapter,
3097                           struct igb_ring *ring)
3098{
3099	struct e1000_hw *hw = &adapter->hw;
3100	u64 rdba = ring->dma;
3101	int reg_idx = ring->reg_idx;
3102	u32 srrctl = 0, rxdctl = 0;
3103
3104	/* disable the queue */
3105	wr32(E1000_RXDCTL(reg_idx), 0);
3106
3107	/* Set DMA base address registers */
3108	wr32(E1000_RDBAL(reg_idx),
3109	     rdba & 0x00000000ffffffffULL);
3110	wr32(E1000_RDBAH(reg_idx), rdba >> 32);
3111	wr32(E1000_RDLEN(reg_idx),
3112	               ring->count * sizeof(union e1000_adv_rx_desc));
3113
3114	/* initialize head and tail */
3115	ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
3116	wr32(E1000_RDH(reg_idx), 0);
3117	writel(0, ring->tail);
3118
3119	/* set descriptor configuration */
3120	srrctl = IGB_RX_HDR_LEN << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
3121#if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
3122	srrctl |= IGB_RXBUFFER_16384 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3123#else
3124	srrctl |= (PAGE_SIZE / 2) >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3125#endif
3126	srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3127	if (hw->mac.type >= e1000_82580)
3128		srrctl |= E1000_SRRCTL_TIMESTAMP;
3129	/* Only set Drop Enable if we are supporting multiple queues */
3130	if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
3131		srrctl |= E1000_SRRCTL_DROP_EN;
3132
3133	wr32(E1000_SRRCTL(reg_idx), srrctl);
3134
3135	/* set filtering for VMDQ pools */
3136	igb_set_vmolr(adapter, reg_idx & 0x7, true);
3137
3138	rxdctl |= IGB_RX_PTHRESH;
3139	rxdctl |= IGB_RX_HTHRESH << 8;
3140	rxdctl |= IGB_RX_WTHRESH << 16;
3141
3142	/* enable receive descriptor fetching */
3143	rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
3144	wr32(E1000_RXDCTL(reg_idx), rxdctl);
3145}
3146
3147/**
3148 * igb_configure_rx - Configure receive Unit after Reset
3149 * @adapter: board private structure
3150 *
3151 * Configure the Rx unit of the MAC after a reset.
3152 **/
3153static void igb_configure_rx(struct igb_adapter *adapter)
3154{
3155	int i;
3156
3157	/* set UTA to appropriate mode */
3158	igb_set_uta(adapter);
3159
3160	/* set the correct pool for the PF default MAC address in entry 0 */
3161	igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
3162	                 adapter->vfs_allocated_count);
3163
3164	/* Setup the HW Rx Head and Tail Descriptor Pointers and
3165	 * the Base and Length of the Rx Descriptor Ring */
3166	for (i = 0; i < adapter->num_rx_queues; i++)
3167		igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
3168}
3169
3170/**
3171 * igb_free_tx_resources - Free Tx Resources per Queue
3172 * @tx_ring: Tx descriptor ring for a specific queue
3173 *
3174 * Free all transmit software resources
3175 **/
3176void igb_free_tx_resources(struct igb_ring *tx_ring)
3177{
3178	igb_clean_tx_ring(tx_ring);
3179
3180	vfree(tx_ring->tx_buffer_info);
3181	tx_ring->tx_buffer_info = NULL;
3182
3183	/* if not set, then don't free */
3184	if (!tx_ring->desc)
3185		return;
3186
3187	dma_free_coherent(tx_ring->dev, tx_ring->size,
3188			  tx_ring->desc, tx_ring->dma);
3189
3190	tx_ring->desc = NULL;
3191}
3192
3193/**
3194 * igb_free_all_tx_resources - Free Tx Resources for All Queues
3195 * @adapter: board private structure
3196 *
3197 * Free all transmit software resources
3198 **/
3199static void igb_free_all_tx_resources(struct igb_adapter *adapter)
3200{
3201	int i;
3202
3203	for (i = 0; i < adapter->num_tx_queues; i++)
3204		igb_free_tx_resources(adapter->tx_ring[i]);
3205}
3206
3207void igb_unmap_and_free_tx_resource(struct igb_ring *ring,
3208				    struct igb_tx_buffer *tx_buffer)
3209{
3210	if (tx_buffer->skb) {
3211		dev_kfree_skb_any(tx_buffer->skb);
3212		if (tx_buffer->dma)
3213			dma_unmap_single(ring->dev,
3214					 tx_buffer->dma,
3215					 tx_buffer->length,
3216					 DMA_TO_DEVICE);
3217	} else if (tx_buffer->dma) {
3218		dma_unmap_page(ring->dev,
3219			       tx_buffer->dma,
3220			       tx_buffer->length,
3221			       DMA_TO_DEVICE);
3222	}
3223	tx_buffer->next_to_watch = NULL;
3224	tx_buffer->skb = NULL;
3225	tx_buffer->dma = 0;
3226	/* buffer_info must be completely set up in the transmit path */
3227}
3228
3229/**
3230 * igb_clean_tx_ring - Free Tx Buffers
3231 * @tx_ring: ring to be cleaned
3232 **/
3233static void igb_clean_tx_ring(struct igb_ring *tx_ring)
3234{
3235	struct igb_tx_buffer *buffer_info;
3236	unsigned long size;
3237	u16 i;
3238
3239	if (!tx_ring->tx_buffer_info)
3240		return;
3241	/* Free all the Tx ring sk_buffs */
3242
3243	for (i = 0; i < tx_ring->count; i++) {
3244		buffer_info = &tx_ring->tx_buffer_info[i];
3245		igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3246	}
3247	netdev_tx_reset_queue(txring_txq(tx_ring));
3248
3249	size = sizeof(struct igb_tx_buffer) * tx_ring->count;
3250	memset(tx_ring->tx_buffer_info, 0, size);
3251
3252	/* Zero out the descriptor ring */
3253	memset(tx_ring->desc, 0, tx_ring->size);
3254
3255	tx_ring->next_to_use = 0;
3256	tx_ring->next_to_clean = 0;
3257}
3258
3259/**
3260 * igb_clean_all_tx_rings - Free Tx Buffers for all queues
3261 * @adapter: board private structure
3262 **/
3263static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
3264{
3265	int i;
3266
3267	for (i = 0; i < adapter->num_tx_queues; i++)
3268		igb_clean_tx_ring(adapter->tx_ring[i]);
3269}
3270
3271/**
3272 * igb_free_rx_resources - Free Rx Resources
3273 * @rx_ring: ring to clean the resources from
3274 *
3275 * Free all receive software resources
3276 **/
3277void igb_free_rx_resources(struct igb_ring *rx_ring)
3278{
3279	igb_clean_rx_ring(rx_ring);
3280
3281	vfree(rx_ring->rx_buffer_info);
3282	rx_ring->rx_buffer_info = NULL;
3283
3284	/* if not set, then don't free */
3285	if (!rx_ring->desc)
3286		return;
3287
3288	dma_free_coherent(rx_ring->dev, rx_ring->size,
3289			  rx_ring->desc, rx_ring->dma);
3290
3291	rx_ring->desc = NULL;
3292}
3293
3294/**
3295 * igb_free_all_rx_resources - Free Rx Resources for All Queues
3296 * @adapter: board private structure
3297 *
3298 * Free all receive software resources
3299 **/
3300static void igb_free_all_rx_resources(struct igb_adapter *adapter)
3301{
3302	int i;
3303
3304	for (i = 0; i < adapter->num_rx_queues; i++)
3305		igb_free_rx_resources(adapter->rx_ring[i]);
3306}
3307
3308/**
3309 * igb_clean_rx_ring - Free Rx Buffers per Queue
3310 * @rx_ring: ring to free buffers from
3311 **/
3312static void igb_clean_rx_ring(struct igb_ring *rx_ring)
3313{
3314	unsigned long size;
3315	u16 i;
3316
3317	if (!rx_ring->rx_buffer_info)
3318		return;
3319
3320	/* Free all the Rx ring sk_buffs */
3321	for (i = 0; i < rx_ring->count; i++) {
3322		struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
3323		if (buffer_info->dma) {
3324			dma_unmap_single(rx_ring->dev,
3325			                 buffer_info->dma,
3326					 IGB_RX_HDR_LEN,
3327					 DMA_FROM_DEVICE);
3328			buffer_info->dma = 0;
3329		}
3330
3331		if (buffer_info->skb) {
3332			dev_kfree_skb(buffer_info->skb);
3333			buffer_info->skb = NULL;
3334		}
3335		if (buffer_info->page_dma) {
3336			dma_unmap_page(rx_ring->dev,
3337			               buffer_info->page_dma,
3338				       PAGE_SIZE / 2,
3339				       DMA_FROM_DEVICE);
3340			buffer_info->page_dma = 0;
3341		}
3342		if (buffer_info->page) {
3343			put_page(buffer_info->page);
3344			buffer_info->page = NULL;
3345			buffer_info->page_offset = 0;
3346		}
3347	}
3348
3349	size = sizeof(struct igb_rx_buffer) * rx_ring->count;
3350	memset(rx_ring->rx_buffer_info, 0, size);
3351
3352	/* Zero out the descriptor ring */
3353	memset(rx_ring->desc, 0, rx_ring->size);
3354
3355	rx_ring->next_to_clean = 0;
3356	rx_ring->next_to_use = 0;
3357}
3358
3359/**
3360 * igb_clean_all_rx_rings - Free Rx Buffers for all queues
3361 * @adapter: board private structure
3362 **/
3363static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
3364{
3365	int i;
3366
3367	for (i = 0; i < adapter->num_rx_queues; i++)
3368		igb_clean_rx_ring(adapter->rx_ring[i]);
3369}
3370
3371/**
3372 * igb_set_mac - Change the Ethernet Address of the NIC
3373 * @netdev: network interface device structure
3374 * @p: pointer to an address structure
3375 *
3376 * Returns 0 on success, negative on failure
3377 **/
3378static int igb_set_mac(struct net_device *netdev, void *p)
3379{
3380	struct igb_adapter *adapter = netdev_priv(netdev);
3381	struct e1000_hw *hw = &adapter->hw;
3382	struct sockaddr *addr = p;
3383
3384	if (!is_valid_ether_addr(addr->sa_data))
3385		return -EADDRNOTAVAIL;
3386
3387	memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
3388	memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
3389
3390	/* set the correct pool for the new PF MAC address in entry 0 */
3391	igb_rar_set_qsel(adapter, hw->mac.addr, 0,
3392	                 adapter->vfs_allocated_count);
3393
3394	return 0;
3395}
3396
3397/**
3398 * igb_write_mc_addr_list - write multicast addresses to MTA
3399 * @netdev: network interface device structure
3400 *
3401 * Writes multicast address list to the MTA hash table.
3402 * Returns: -ENOMEM on failure
3403 *                0 on no addresses written
3404 *                X on writing X addresses to MTA
3405 **/
3406static int igb_write_mc_addr_list(struct net_device *netdev)
3407{
3408	struct igb_adapter *adapter = netdev_priv(netdev);
3409	struct e1000_hw *hw = &adapter->hw;
3410	struct netdev_hw_addr *ha;
3411	u8  *mta_list;
3412	int i;
3413
3414	if (netdev_mc_empty(netdev)) {
3415		/* nothing to program, so clear mc list */
3416		igb_update_mc_addr_list(hw, NULL, 0);
3417		igb_restore_vf_multicasts(adapter);
3418		return 0;
3419	}
3420
3421	mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
3422	if (!mta_list)
3423		return -ENOMEM;
3424
3425	/* The shared function expects a packed array of only addresses. */
3426	i = 0;
3427	netdev_for_each_mc_addr(ha, netdev)
3428		memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
3429
3430	igb_update_mc_addr_list(hw, mta_list, i);
3431	kfree(mta_list);
3432
3433	return netdev_mc_count(netdev);
3434}
3435
3436/**
3437 * igb_write_uc_addr_list - write unicast addresses to RAR table
3438 * @netdev: network interface device structure
3439 *
3440 * Writes unicast address list to the RAR table.
3441 * Returns: -ENOMEM on failure/insufficient address space
3442 *                0 on no addresses written
3443 *                X on writing X addresses to the RAR table
3444 **/
3445static int igb_write_uc_addr_list(struct net_device *netdev)
3446{
3447	struct igb_adapter *adapter = netdev_priv(netdev);
3448	struct e1000_hw *hw = &adapter->hw;
3449	unsigned int vfn = adapter->vfs_allocated_count;
3450	unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
3451	int count = 0;
3452
3453	/* return ENOMEM indicating insufficient memory for addresses */
3454	if (netdev_uc_count(netdev) > rar_entries)
3455		return -ENOMEM;
3456
3457	if (!netdev_uc_empty(netdev) && rar_entries) {
3458		struct netdev_hw_addr *ha;
3459
3460		netdev_for_each_uc_addr(ha, netdev) {
3461			if (!rar_entries)
3462				break;
3463			igb_rar_set_qsel(adapter, ha->addr,
3464			                 rar_entries--,
3465			                 vfn);
3466			count++;
3467		}
3468	}
3469	/* write the addresses in reverse order to avoid write combining */
3470	for (; rar_entries > 0 ; rar_entries--) {
3471		wr32(E1000_RAH(rar_entries), 0);
3472		wr32(E1000_RAL(rar_entries), 0);
3473	}
3474	wrfl();
3475
3476	return count;
3477}
3478
3479/**
3480 * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
3481 * @netdev: network interface device structure
3482 *
3483 * The set_rx_mode entry point is called whenever the unicast or multicast
3484 * address lists or the network interface flags are updated.  This routine is
3485 * responsible for configuring the hardware for proper unicast, multicast,
3486 * promiscuous mode, and all-multi behavior.
3487 **/
3488static void igb_set_rx_mode(struct net_device *netdev)
3489{
3490	struct igb_adapter *adapter = netdev_priv(netdev);
3491	struct e1000_hw *hw = &adapter->hw;
3492	unsigned int vfn = adapter->vfs_allocated_count;
3493	u32 rctl, vmolr = 0;
3494	int count;
3495
3496	/* Check for Promiscuous and All Multicast modes */
3497	rctl = rd32(E1000_RCTL);
3498
3499	/* clear the effected bits */
3500	rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
3501
3502	if (netdev->flags & IFF_PROMISC) {
3503		rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
3504		vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
3505	} else {
3506		if (netdev->flags & IFF_ALLMULTI) {
3507			rctl |= E1000_RCTL_MPE;
3508			vmolr |= E1000_VMOLR_MPME;
3509		} else {
3510			/*
3511			 * Write addresses to the MTA, if the attempt fails
3512			 * then we should just turn on promiscuous mode so
3513			 * that we can at least receive multicast traffic
3514			 */
3515			count = igb_write_mc_addr_list(netdev);
3516			if (count < 0) {
3517				rctl |= E1000_RCTL_MPE;
3518				vmolr |= E1000_VMOLR_MPME;
3519			} else if (count) {
3520				vmolr |= E1000_VMOLR_ROMPE;
3521			}
3522		}
3523		/*
3524		 * Write addresses to available RAR registers, if there is not
3525		 * sufficient space to store all the addresses then enable
3526		 * unicast promiscuous mode
3527		 */
3528		count = igb_write_uc_addr_list(netdev);
3529		if (count < 0) {
3530			rctl |= E1000_RCTL_UPE;
3531			vmolr |= E1000_VMOLR_ROPE;
3532		}
3533		rctl |= E1000_RCTL_VFE;
3534	}
3535	wr32(E1000_RCTL, rctl);
3536
3537	/*
3538	 * In order to support SR-IOV and eventually VMDq it is necessary to set
3539	 * the VMOLR to enable the appropriate modes.  Without this workaround
3540	 * we will have issues with VLAN tag stripping not being done for frames
3541	 * that are only arriving because we are the default pool
3542	 */
3543	if (hw->mac.type < e1000_82576)
3544		return;
3545
3546	vmolr |= rd32(E1000_VMOLR(vfn)) &
3547	         ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
3548	wr32(E1000_VMOLR(vfn), vmolr);
3549	igb_restore_vf_multicasts(adapter);
3550}
3551
3552static void igb_check_wvbr(struct igb_adapter *adapter)
3553{
3554	struct e1000_hw *hw = &adapter->hw;
3555	u32 wvbr = 0;
3556
3557	switch (hw->mac.type) {
3558	case e1000_82576:
3559	case e1000_i350:
3560		if (!(wvbr = rd32(E1000_WVBR)))
3561			return;
3562		break;
3563	default:
3564		break;
3565	}
3566
3567	adapter->wvbr |= wvbr;
3568}
3569
3570#define IGB_STAGGERED_QUEUE_OFFSET 8
3571
3572static void igb_spoof_check(struct igb_adapter *adapter)
3573{
3574	int j;
3575
3576	if (!adapter->wvbr)
3577		return;
3578
3579	for(j = 0; j < adapter->vfs_allocated_count; j++) {
3580		if (adapter->wvbr & (1 << j) ||
3581		    adapter->wvbr & (1 << (j + IGB_STAGGERED_QUEUE_OFFSET))) {
3582			dev_warn(&adapter->pdev->dev,
3583				"Spoof event(s) detected on VF %d\n", j);
3584			adapter->wvbr &=
3585				~((1 << j) |
3586				  (1 << (j + IGB_STAGGERED_QUEUE_OFFSET)));
3587		}
3588	}
3589}
3590
3591/* Need to wait a few seconds after link up to get diagnostic information from
3592 * the phy */
3593static void igb_update_phy_info(unsigned long data)
3594{
3595	struct igb_adapter *adapter = (struct igb_adapter *) data;
3596	igb_get_phy_info(&adapter->hw);
3597}
3598
3599/**
3600 * igb_has_link - check shared code for link and determine up/down
3601 * @adapter: pointer to driver private info
3602 **/
3603bool igb_has_link(struct igb_adapter *adapter)
3604{
3605	struct e1000_hw *hw = &adapter->hw;
3606	bool link_active = false;
3607	s32 ret_val = 0;
3608
3609	/* get_link_status is set on LSC (link status) interrupt or
3610	 * rx sequence error interrupt.  get_link_status will stay
3611	 * false until the e1000_check_for_link establishes link
3612	 * for copper adapters ONLY
3613	 */
3614	switch (hw->phy.media_type) {
3615	case e1000_media_type_copper:
3616		if (hw->mac.get_link_status) {
3617			ret_val = hw->mac.ops.check_for_link(hw);
3618			link_active = !hw->mac.get_link_status;
3619		} else {
3620			link_active = true;
3621		}
3622		break;
3623	case e1000_media_type_internal_serdes:
3624		ret_val = hw->mac.ops.check_for_link(hw);
3625		link_active = hw->mac.serdes_has_link;
3626		break;
3627	default:
3628	case e1000_media_type_unknown:
3629		break;
3630	}
3631
3632	return link_active;
3633}
3634
3635static bool igb_thermal_sensor_event(struct e1000_hw *hw, u32 event)
3636{
3637	bool ret = false;
3638	u32 ctrl_ext, thstat;
3639
3640	/* check for thermal sensor event on i350, copper only */
3641	if (hw->mac.type == e1000_i350) {
3642		thstat = rd32(E1000_THSTAT);
3643		ctrl_ext = rd32(E1000_CTRL_EXT);
3644
3645		if ((hw->phy.media_type == e1000_media_type_copper) &&
3646		    !(ctrl_ext & E1000_CTRL_EXT_LINK_MODE_SGMII)) {
3647			ret = !!(thstat & event);
3648		}
3649	}
3650
3651	return ret;
3652}
3653
3654/**
3655 * igb_watchdog - Timer Call-back
3656 * @data: pointer to adapter cast into an unsigned long
3657 **/
3658static void igb_watchdog(unsigned long data)
3659{
3660	struct igb_adapter *adapter = (struct igb_adapter *)data;
3661	/* Do the rest outside of interrupt context */
3662	schedule_work(&adapter->watchdog_task);
3663}
3664
3665static void igb_watchdog_task(struct work_struct *work)
3666{
3667	struct igb_adapter *adapter = container_of(work,
3668	                                           struct igb_adapter,
3669                                                   watchdog_task);
3670	struct e1000_hw *hw = &adapter->hw;
3671	struct net_device *netdev = adapter->netdev;
3672	u32 link;
3673	int i;
3674
3675	link = igb_has_link(adapter);
3676	if (link) {
3677		/* Cancel scheduled suspend requests. */
3678		pm_runtime_resume(netdev->dev.parent);
3679
3680		if (!netif_carrier_ok(netdev)) {
3681			u32 ctrl;
3682			hw->mac.ops.get_speed_and_duplex(hw,
3683			                                 &adapter->link_speed,
3684			                                 &adapter->link_duplex);
3685
3686			ctrl = rd32(E1000_CTRL);
3687			/* Links status message must follow this format */
3688			printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s "
3689			       "Duplex, Flow Control: %s\n",
3690			       netdev->name,
3691			       adapter->link_speed,
3692			       adapter->link_duplex == FULL_DUPLEX ?
3693			       "Full" : "Half",
3694			       (ctrl & E1000_CTRL_TFCE) &&
3695			       (ctrl & E1000_CTRL_RFCE) ? "RX/TX" :
3696			       (ctrl & E1000_CTRL_RFCE) ?  "RX" :
3697			       (ctrl & E1000_CTRL_TFCE) ?  "TX" : "None");
3698
3699			/* check for thermal sensor event */
3700			if (igb_thermal_sensor_event(hw,
3701			    E1000_THSTAT_LINK_THROTTLE)) {
3702				netdev_info(netdev, "The network adapter link "
3703					    "speed was downshifted because it "
3704					    "overheated\n");
3705			}
3706
3707			/* adjust timeout factor according to speed/duplex */
3708			adapter->tx_timeout_factor = 1;
3709			switch (adapter->link_speed) {
3710			case SPEED_10:
3711				adapter->tx_timeout_factor = 14;
3712				break;
3713			case SPEED_100:
3714				/* maybe add some timeout factor ? */
3715				break;
3716			}
3717
3718			netif_carrier_on(netdev);
3719
3720			igb_ping_all_vfs(adapter);
3721			igb_check_vf_rate_limit(adapter);
3722
3723			/* link state has changed, schedule phy info update */
3724			if (!test_bit(__IGB_DOWN, &adapter->state))
3725				mod_timer(&adapter->phy_info_timer,
3726					  round_jiffies(jiffies + 2 * HZ));
3727		}
3728	} else {
3729		if (netif_carrier_ok(netdev)) {
3730			adapter->link_speed = 0;
3731			adapter->link_duplex = 0;
3732
3733			/* check for thermal sensor event */
3734			if (igb_thermal_sensor_event(hw,
3735			    E1000_THSTAT_PWR_DOWN)) {
3736				netdev_err(netdev, "The network adapter was "
3737					   "stopped because it overheated\n");
3738			}
3739
3740			/* Links status message must follow this format */
3741			printk(KERN_INFO "igb: %s NIC Link is Down\n",
3742			       netdev->name);
3743			netif_carrier_off(netdev);
3744
3745			igb_ping_all_vfs(adapter);
3746
3747			/* link state has changed, schedule phy info update */
3748			if (!test_bit(__IGB_DOWN, &adapter->state))
3749				mod_timer(&adapter->phy_info_timer,
3750					  round_jiffies(jiffies + 2 * HZ));
3751
3752			pm_schedule_suspend(netdev->dev.parent,
3753					    MSEC_PER_SEC * 5);
3754		}
3755	}
3756
3757	spin_lock(&adapter->stats64_lock);
3758	igb_update_stats(adapter, &adapter->stats64);
3759	spin_unlock(&adapter->stats64_lock);
3760
3761	for (i = 0; i < adapter->num_tx_queues; i++) {
3762		struct igb_ring *tx_ring = adapter->tx_ring[i];
3763		if (!netif_carrier_ok(netdev)) {
3764			/* We've lost link, so the controller stops DMA,
3765			 * but we've got queued Tx work that's never going
3766			 * to get done, so reset controller to flush Tx.
3767			 * (Do the reset outside of interrupt context). */
3768			if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3769				adapter->tx_timeout_count++;
3770				schedule_work(&adapter->reset_task);
3771				/* return immediately since reset is imminent */
3772				return;
3773			}
3774		}
3775
3776		/* Force detection of hung controller every watchdog period */
3777		set_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
3778	}
3779
3780	/* Cause software interrupt to ensure rx ring is cleaned */
3781	if (adapter->msix_entries) {
3782		u32 eics = 0;
3783		for (i = 0; i < adapter->num_q_vectors; i++)
3784			eics |= adapter->q_vector[i]->eims_value;
3785		wr32(E1000_EICS, eics);
3786	} else {
3787		wr32(E1000_ICS, E1000_ICS_RXDMT0);
3788	}
3789
3790	igb_spoof_check(adapter);
3791
3792	/* Reset the timer */
3793	if (!test_bit(__IGB_DOWN, &adapter->state))
3794		mod_timer(&adapter->watchdog_timer,
3795			  round_jiffies(jiffies + 2 * HZ));
3796}
3797
3798enum latency_range {
3799	lowest_latency = 0,
3800	low_latency = 1,
3801	bulk_latency = 2,
3802	latency_invalid = 255
3803};
3804
3805/**
3806 * igb_update_ring_itr - update the dynamic ITR value based on packet size
3807 *
3808 *      Stores a new ITR value based on strictly on packet size.  This
3809 *      algorithm is less sophisticated than that used in igb_update_itr,
3810 *      due to the difficulty of synchronizing statistics across multiple
3811 *      receive rings.  The divisors and thresholds used by this function
3812 *      were determined based on theoretical maximum wire speed and testing
3813 *      data, in order to minimize response time while increasing bulk
3814 *      throughput.
3815 *      This functionality is controlled by the InterruptThrottleRate module
3816 *      parameter (see igb_param.c)
3817 *      NOTE:  This function is called only when operating in a multiqueue
3818 *             receive environment.
3819 * @q_vector: pointer to q_vector
3820 **/
3821static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3822{
3823	int new_val = q_vector->itr_val;
3824	int avg_wire_size = 0;
3825	struct igb_adapter *adapter = q_vector->adapter;
3826	unsigned int packets;
3827
3828	/* For non-gigabit speeds, just fix the interrupt rate at 4000
3829	 * ints/sec - ITR timer value of 120 ticks.
3830	 */
3831	if (adapter->link_speed != SPEED_1000) {
3832		new_val = IGB_4K_ITR;
3833		goto set_itr_val;
3834	}
3835
3836	packets = q_vector->rx.total_packets;
3837	if (packets)
3838		avg_wire_size = q_vector->rx.total_bytes / packets;
3839
3840	packets = q_vector->tx.total_packets;
3841	if (packets)
3842		avg_wire_size = max_t(u32, avg_wire_size,
3843				      q_vector->tx.total_bytes / packets);
3844
3845	/* if avg_wire_size isn't set no work was done */
3846	if (!avg_wire_size)
3847		goto clear_counts;
3848
3849	/* Add 24 bytes to size to account for CRC, preamble, and gap */
3850	avg_wire_size += 24;
3851
3852	/* Don't starve jumbo frames */
3853	avg_wire_size = min(avg_wire_size, 3000);
3854
3855	/* Give a little boost to mid-size frames */
3856	if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3857		new_val = avg_wire_size / 3;
3858	else
3859		new_val = avg_wire_size / 2;
3860
3861	/* conservative mode (itr 3) eliminates the lowest_latency setting */
3862	if (new_val < IGB_20K_ITR &&
3863	    ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
3864	     (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
3865		new_val = IGB_20K_ITR;
3866
3867set_itr_val:
3868	if (new_val != q_vector->itr_val) {
3869		q_vector->itr_val = new_val;
3870		q_vector->set_itr = 1;
3871	}
3872clear_counts:
3873	q_vector->rx.total_bytes = 0;
3874	q_vector->rx.total_packets = 0;
3875	q_vector->tx.total_bytes = 0;
3876	q_vector->tx.total_packets = 0;
3877}
3878
3879/**
3880 * igb_update_itr - update the dynamic ITR value based on statistics
3881 *      Stores a new ITR value based on packets and byte
3882 *      counts during the last interrupt.  The advantage of per interrupt
3883 *      computation is faster updates and more accurate ITR for the current
3884 *      traffic pattern.  Constants in this function were computed
3885 *      based on theoretical maximum wire speed and thresholds were set based
3886 *      on testing data as well as attempting to minimize response time
3887 *      while increasing bulk throughput.
3888 *      this functionality is controlled by the InterruptThrottleRate module
3889 *      parameter (see igb_param.c)
3890 *      NOTE:  These calculations are only valid when operating in a single-
3891 *             queue environment.
3892 * @q_vector: pointer to q_vector
3893 * @ring_container: ring info to update the itr for
3894 **/
3895static void igb_update_itr(struct igb_q_vector *q_vector,
3896			   struct igb_ring_container *ring_container)
3897{
3898	unsigned int packets = ring_container->total_packets;
3899	unsigned int bytes = ring_container->total_bytes;
3900	u8 itrval = ring_container->itr;
3901
3902	/* no packets, exit with status unchanged */
3903	if (packets == 0)
3904		return;
3905
3906	switch (itrval) {
3907	case lowest_latency:
3908		/* handle TSO and jumbo frames */
3909		if (bytes/packets > 8000)
3910			itrval = bulk_latency;
3911		else if ((packets < 5) && (bytes > 512))
3912			itrval = low_latency;
3913		break;
3914	case low_latency:  /* 50 usec aka 20000 ints/s */
3915		if (bytes > 10000) {
3916			/* this if handles the TSO accounting */
3917			if (bytes/packets > 8000) {
3918				itrval = bulk_latency;
3919			} else if ((packets < 10) || ((bytes/packets) > 1200)) {
3920				itrval = bulk_latency;
3921			} else if ((packets > 35)) {
3922				itrval = lowest_latency;
3923			}
3924		} else if (bytes/packets > 2000) {
3925			itrval = bulk_latency;
3926		} else if (packets <= 2 && bytes < 512) {
3927			itrval = lowest_latency;
3928		}
3929		break;
3930	case bulk_latency: /* 250 usec aka 4000 ints/s */
3931		if (bytes > 25000) {
3932			if (packets > 35)
3933				itrval = low_latency;
3934		} else if (bytes < 1500) {
3935			itrval = low_latency;
3936		}
3937		break;
3938	}
3939
3940	/* clear work counters since we have the values we need */
3941	ring_container->total_bytes = 0;
3942	ring_container->total_packets = 0;
3943
3944	/* write updated itr to ring container */
3945	ring_container->itr = itrval;
3946}
3947
3948static void igb_set_itr(struct igb_q_vector *q_vector)
3949{
3950	struct igb_adapter *adapter = q_vector->adapter;
3951	u32 new_itr = q_vector->itr_val;
3952	u8 current_itr = 0;
3953
3954	/* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3955	if (adapter->link_speed != SPEED_1000) {
3956		current_itr = 0;
3957		new_itr = IGB_4K_ITR;
3958		goto set_itr_now;
3959	}
3960
3961	igb_update_itr(q_vector, &q_vector->tx);
3962	igb_update_itr(q_vector, &q_vector->rx);
3963
3964	current_itr = max(q_vector->rx.itr, q_vector->tx.itr);
3965
3966	/* conservative mode (itr 3) eliminates the lowest_latency setting */
3967	if (current_itr == lowest_latency &&
3968	    ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
3969	     (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
3970		current_itr = low_latency;
3971
3972	switch (current_itr) {
3973	/* counts and packets in update_itr are dependent on these numbers */
3974	case lowest_latency:
3975		new_itr = IGB_70K_ITR; /* 70,000 ints/sec */
3976		break;
3977	case low_latency:
3978		new_itr = IGB_20K_ITR; /* 20,000 ints/sec */
3979		break;
3980	case bulk_latency:
3981		new_itr = IGB_4K_ITR;  /* 4,000 ints/sec */
3982		break;
3983	default:
3984		break;
3985	}
3986
3987set_itr_now:
3988	if (new_itr != q_vector->itr_val) {
3989		/* this attempts to bias the interrupt rate towards Bulk
3990		 * by adding intermediate steps when interrupt rate is
3991		 * increasing */
3992		new_itr = new_itr > q_vector->itr_val ?
3993		             max((new_itr * q_vector->itr_val) /
3994		                 (new_itr + (q_vector->itr_val >> 2)),
3995				 new_itr) :
3996			     new_itr;
3997		/* Don't write the value here; it resets the adapter's
3998		 * internal timer, and causes us to delay far longer than
3999		 * we should between interrupts.  Instead, we write the ITR
4000		 * value at the beginning of the next interrupt so the timing
4001		 * ends up being correct.
4002		 */
4003		q_vector->itr_val = new_itr;
4004		q_vector->set_itr = 1;
4005	}
4006}
4007
4008static void igb_tx_ctxtdesc(struct igb_ring *tx_ring, u32 vlan_macip_lens,
4009			    u32 type_tucmd, u32 mss_l4len_idx)
4010{
4011	struct e1000_adv_tx_context_desc *context_desc;
4012	u16 i = tx_ring->next_to_use;
4013
4014	context_desc = IGB_TX_CTXTDESC(tx_ring, i);
4015
4016	i++;
4017	tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
4018
4019	/* set bits to identify this as an advanced context descriptor */
4020	type_tucmd |= E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
4021
4022	/* For 82575, context index must be unique per ring. */
4023	if (test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
4024		mss_l4len_idx |= tx_ring->reg_idx << 4;
4025
4026	context_desc->vlan_macip_lens	= cpu_to_le32(vlan_macip_lens);
4027	context_desc->seqnum_seed	= 0;
4028	context_desc->type_tucmd_mlhl	= cpu_to_le32(type_tucmd);
4029	context_desc->mss_l4len_idx	= cpu_to_le32(mss_l4len_idx);
4030}
4031
4032static int igb_tso(struct igb_ring *tx_ring,
4033		   struct igb_tx_buffer *first,
4034		   u8 *hdr_len)
4035{
4036	struct sk_buff *skb = first->skb;
4037	u32 vlan_macip_lens, type_tucmd;
4038	u32 mss_l4len_idx, l4len;
4039
4040	if (!skb_is_gso(skb))
4041		return 0;
4042
4043	if (skb_header_cloned(skb)) {
4044		int err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
4045		if (err)
4046			return err;
4047	}
4048
4049	/* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
4050	type_tucmd = E1000_ADVTXD_TUCMD_L4T_TCP;
4051
4052	if (first->protocol == __constant_htons(ETH_P_IP)) {
4053		struct iphdr *iph = ip_hdr(skb);
4054		iph->tot_len = 0;
4055		iph->check = 0;
4056		tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
4057							 iph->daddr, 0,
4058							 IPPROTO_TCP,
4059							 0);
4060		type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
4061		first->tx_flags |= IGB_TX_FLAGS_TSO |
4062				   IGB_TX_FLAGS_CSUM |
4063				   IGB_TX_FLAGS_IPV4;
4064	} else if (skb_is_gso_v6(skb)) {
4065		ipv6_hdr(skb)->payload_len = 0;
4066		tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
4067						       &ipv6_hdr(skb)->daddr,
4068						       0, IPPROTO_TCP, 0);
4069		first->tx_flags |= IGB_TX_FLAGS_TSO |
4070				   IGB_TX_FLAGS_CSUM;
4071	}
4072
4073	/* compute header lengths */
4074	l4len = tcp_hdrlen(skb);
4075	*hdr_len = skb_transport_offset(skb) + l4len;
4076
4077	/* update gso size and bytecount with header size */
4078	first->gso_segs = skb_shinfo(skb)->gso_segs;
4079	first->bytecount += (first->gso_segs - 1) * *hdr_len;
4080
4081	/* MSS L4LEN IDX */
4082	mss_l4len_idx = l4len << E1000_ADVTXD_L4LEN_SHIFT;
4083	mss_l4len_idx |= skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT;
4084
4085	/* VLAN MACLEN IPLEN */
4086	vlan_macip_lens = skb_network_header_len(skb);
4087	vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
4088	vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
4089
4090	igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
4091
4092	return 1;
4093}
4094
4095static void igb_tx_csum(struct igb_ring *tx_ring, struct igb_tx_buffer *first)
4096{
4097	struct sk_buff *skb = first->skb;
4098	u32 vlan_macip_lens = 0;
4099	u32 mss_l4len_idx = 0;
4100	u32 type_tucmd = 0;
4101
4102	if (skb->ip_summed != CHECKSUM_PARTIAL) {
4103		if (!(first->tx_flags & IGB_TX_FLAGS_VLAN))
4104			return;
4105	} else {
4106		u8 l4_hdr = 0;
4107		switch (first->protocol) {
4108		case __constant_htons(ETH_P_IP):
4109			vlan_macip_lens |= skb_network_header_len(skb);
4110			type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
4111			l4_hdr = ip_hdr(skb)->protocol;
4112			break;
4113		case __constant_htons(ETH_P_IPV6):
4114			vlan_macip_lens |= skb_network_header_len(skb);
4115			l4_hdr = ipv6_hdr(skb)->nexthdr;
4116			break;
4117		default:
4118			if (unlikely(net_ratelimit())) {
4119				dev_warn(tx_ring->dev,
4120				 "partial checksum but proto=%x!\n",
4121				 first->protocol);
4122			}
4123			break;
4124		}
4125
4126		switch (l4_hdr) {
4127		case IPPROTO_TCP:
4128			type_tucmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
4129			mss_l4len_idx = tcp_hdrlen(skb) <<
4130					E1000_ADVTXD_L4LEN_SHIFT;
4131			break;
4132		case IPPROTO_SCTP:
4133			type_tucmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
4134			mss_l4len_idx = sizeof(struct sctphdr) <<
4135					E1000_ADVTXD_L4LEN_SHIFT;
4136			break;
4137		case IPPROTO_UDP:
4138			mss_l4len_idx = sizeof(struct udphdr) <<
4139					E1000_ADVTXD_L4LEN_SHIFT;
4140			break;
4141		default:
4142			if (unlikely(net_ratelimit())) {
4143				dev_warn(tx_ring->dev,
4144				 "partial checksum but l4 proto=%x!\n",
4145				 l4_hdr);
4146			}
4147			break;
4148		}
4149
4150		/* update TX checksum flag */
4151		first->tx_flags |= IGB_TX_FLAGS_CSUM;
4152	}
4153
4154	vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
4155	vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
4156
4157	igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
4158}
4159
4160static __le32 igb_tx_cmd_type(u32 tx_flags)
4161{
4162	/* set type for advanced descriptor with frame checksum insertion */
4163	__le32 cmd_type = cpu_to_le32(E1000_ADVTXD_DTYP_DATA |
4164				      E1000_ADVTXD_DCMD_IFCS |
4165				      E1000_ADVTXD_DCMD_DEXT);
4166
4167	/* set HW vlan bit if vlan is present */
4168	if (tx_flags & IGB_TX_FLAGS_VLAN)
4169		cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_VLE);
4170
4171	/* set timestamp bit if present */
4172	if (tx_flags & IGB_TX_FLAGS_TSTAMP)
4173		cmd_type |= cpu_to_le32(E1000_ADVTXD_MAC_TSTAMP);
4174
4175	/* set segmentation bits for TSO */
4176	if (tx_flags & IGB_TX_FLAGS_TSO)
4177		cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_TSE);
4178
4179	return cmd_type;
4180}
4181
4182static void igb_tx_olinfo_status(struct igb_ring *tx_ring,
4183				 union e1000_adv_tx_desc *tx_desc,
4184				 u32 tx_flags, unsigned int paylen)
4185{
4186	u32 olinfo_status = paylen << E1000_ADVTXD_PAYLEN_SHIFT;
4187
4188	/* 82575 requires a unique index per ring if any offload is enabled */
4189	if ((tx_flags & (IGB_TX_FLAGS_CSUM | IGB_TX_FLAGS_VLAN)) &&
4190	    test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
4191		olinfo_status |= tx_ring->reg_idx << 4;
4192
4193	/* insert L4 checksum */
4194	if (tx_flags & IGB_TX_FLAGS_CSUM) {
4195		olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4196
4197		/* insert IPv4 checksum */
4198		if (tx_flags & IGB_TX_FLAGS_IPV4)
4199			olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
4200	}
4201
4202	tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
4203}
4204
4205/*
4206 * The largest size we can write to the descriptor is 65535.  In order to
4207 * maintain a power of two alignment we have to limit ourselves to 32K.
4208 */
4209#define IGB_MAX_TXD_PWR	15
4210#define IGB_MAX_DATA_PER_TXD	(1<<IGB_MAX_TXD_PWR)
4211
4212static void igb_tx_map(struct igb_ring *tx_ring,
4213		       struct igb_tx_buffer *first,
4214		       const u8 hdr_len)
4215{
4216	struct sk_buff *skb = first->skb;
4217	struct igb_tx_buffer *tx_buffer_info;
4218	union e1000_adv_tx_desc *tx_desc;
4219	dma_addr_t dma;
4220	struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0];
4221	unsigned int data_len = skb->data_len;
4222	unsigned int size = skb_headlen(skb);
4223	unsigned int paylen = skb->len - hdr_len;
4224	__le32 cmd_type;
4225	u32 tx_flags = first->tx_flags;
4226	u16 i = tx_ring->next_to_use;
4227
4228	tx_desc = IGB_TX_DESC(tx_ring, i);
4229
4230	igb_tx_olinfo_status(tx_ring, tx_desc, tx_flags, paylen);
4231	cmd_type = igb_tx_cmd_type(tx_flags);
4232
4233	dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
4234	if (dma_mapping_error(tx_ring->dev, dma))
4235		goto dma_error;
4236
4237	/* record length, and DMA address */
4238	first->length = size;
4239	first->dma = dma;
4240	tx_desc->read.buffer_addr = cpu_to_le64(dma);
4241
4242	for (;;) {
4243		while (unlikely(size > IGB_MAX_DATA_PER_TXD)) {
4244			tx_desc->read.cmd_type_len =
4245				cmd_type | cpu_to_le32(IGB_MAX_DATA_PER_TXD);
4246
4247			i++;
4248			tx_desc++;
4249			if (i == tx_ring->count) {
4250				tx_desc = IGB_TX_DESC(tx_ring, 0);
4251				i = 0;
4252			}
4253
4254			dma += IGB_MAX_DATA_PER_TXD;
4255			size -= IGB_MAX_DATA_PER_TXD;
4256
4257			tx_desc->read.olinfo_status = 0;
4258			tx_desc->read.buffer_addr = cpu_to_le64(dma);
4259		}
4260
4261		if (likely(!data_len))
4262			break;
4263
4264		tx_desc->read.cmd_type_len = cmd_type | cpu_to_le32(size);
4265
4266		i++;
4267		tx_desc++;
4268		if (i == tx_ring->count) {
4269			tx_desc = IGB_TX_DESC(tx_ring, 0);
4270			i = 0;
4271		}
4272
4273		size = skb_frag_size(frag);
4274		data_len -= size;
4275
4276		dma = skb_frag_dma_map(tx_ring->dev, frag, 0,
4277				   size, DMA_TO_DEVICE);
4278		if (dma_mapping_error(tx_ring->dev, dma))
4279			goto dma_error;
4280
4281		tx_buffer_info = &tx_ring->tx_buffer_info[i];
4282		tx_buffer_info->length = size;
4283		tx_buffer_info->dma = dma;
4284
4285		tx_desc->read.olinfo_status = 0;
4286		tx_desc->read.buffer_addr = cpu_to_le64(dma);
4287
4288		frag++;
4289	}
4290
4291	netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount);
4292
4293	/* write last descriptor with RS and EOP bits */
4294	cmd_type |= cpu_to_le32(size) | cpu_to_le32(IGB_TXD_DCMD);
4295	tx_desc->read.cmd_type_len = cmd_type;
4296
4297	/* set the timestamp */
4298	first->time_stamp = jiffies;
4299
4300	/*
4301	 * Force memory writes to complete before letting h/w know there
4302	 * are new descriptors to fetch.  (Only applicable for weak-ordered
4303	 * memory model archs, such as IA-64).
4304	 *
4305	 * We also need this memory barrier to make certain all of the
4306	 * status bits have been updated before next_to_watch is written.
4307	 */
4308	wmb();
4309
4310	/* set next_to_watch value indicating a packet is present */
4311	first->next_to_watch = tx_desc;
4312
4313	i++;
4314	if (i == tx_ring->count)
4315		i = 0;
4316
4317	tx_ring->next_to_use = i;
4318
4319	writel(i, tx_ring->tail);
4320
4321	/* we need this if more than one processor can write to our tail
4322	 * at a time, it syncronizes IO on IA64/Altix systems */
4323	mmiowb();
4324
4325	return;
4326
4327dma_error:
4328	dev_err(tx_ring->dev, "TX DMA map failed\n");
4329
4330	/* clear dma mappings for failed tx_buffer_info map */
4331	for (;;) {
4332		tx_buffer_info = &tx_ring->tx_buffer_info[i];
4333		igb_unmap_and_free_tx_resource(tx_ring, tx_buffer_info);
4334		if (tx_buffer_info == first)
4335			break;
4336		if (i == 0)
4337			i = tx_ring->count;
4338		i--;
4339	}
4340
4341	tx_ring->next_to_use = i;
4342}
4343
4344static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
4345{
4346	struct net_device *netdev = tx_ring->netdev;
4347
4348	netif_stop_subqueue(netdev, tx_ring->queue_index);
4349
4350	/* Herbert's original patch had:
4351	 *  smp_mb__after_netif_stop_queue();
4352	 * but since that doesn't exist yet, just open code it. */
4353	smp_mb();
4354
4355	/* We need to check again in a case another CPU has just
4356	 * made room available. */
4357	if (igb_desc_unused(tx_ring) < size)
4358		return -EBUSY;
4359
4360	/* A reprieve! */
4361	netif_wake_subqueue(netdev, tx_ring->queue_index);
4362
4363	u64_stats_update_begin(&tx_ring->tx_syncp2);
4364	tx_ring->tx_stats.restart_queue2++;
4365	u64_stats_update_end(&tx_ring->tx_syncp2);
4366
4367	return 0;
4368}
4369
4370static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
4371{
4372	if (igb_desc_unused(tx_ring) >= size)
4373		return 0;
4374	return __igb_maybe_stop_tx(tx_ring, size);
4375}
4376
4377netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb,
4378				struct igb_ring *tx_ring)
4379{
4380	struct igb_tx_buffer *first;
4381	int tso;
4382	u32 tx_flags = 0;
4383	__be16 protocol = vlan_get_protocol(skb);
4384	u8 hdr_len = 0;
4385
4386	/* need: 1 descriptor per page,
4387	 *       + 2 desc gap to keep tail from touching head,
4388	 *       + 1 desc for skb->data,
4389	 *       + 1 desc for context descriptor,
4390	 * otherwise try next time */
4391	if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
4392		/* this is a hard error */
4393		return NETDEV_TX_BUSY;
4394	}
4395
4396	/* record the location of the first descriptor for this packet */
4397	first = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
4398	first->skb = skb;
4399	first->bytecount = skb->len;
4400	first->gso_segs = 1;
4401
4402	if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
4403		skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
4404		tx_flags |= IGB_TX_FLAGS_TSTAMP;
4405	}
4406
4407	if (vlan_tx_tag_present(skb)) {
4408		tx_flags |= IGB_TX_FLAGS_VLAN;
4409		tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
4410	}
4411
4412	/* record initial flags and protocol */
4413	first->tx_flags = tx_flags;
4414	first->protocol = protocol;
4415
4416	tso = igb_tso(tx_ring, first, &hdr_len);
4417	if (tso < 0)
4418		goto out_drop;
4419	else if (!tso)
4420		igb_tx_csum(tx_ring, first);
4421
4422	igb_tx_map(tx_ring, first, hdr_len);
4423
4424	/* Make sure there is space in the ring for the next send. */
4425	igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
4426
4427	return NETDEV_TX_OK;
4428
4429out_drop:
4430	igb_unmap_and_free_tx_resource(tx_ring, first);
4431
4432	return NETDEV_TX_OK;
4433}
4434
4435static inline struct igb_ring *igb_tx_queue_mapping(struct igb_adapter *adapter,
4436						    struct sk_buff *skb)
4437{
4438	unsigned int r_idx = skb->queue_mapping;
4439
4440	if (r_idx >= adapter->num_tx_queues)
4441		r_idx = r_idx % adapter->num_tx_queues;
4442
4443	return adapter->tx_ring[r_idx];
4444}
4445
4446static netdev_tx_t igb_xmit_frame(struct sk_buff *skb,
4447				  struct net_device *netdev)
4448{
4449	struct igb_adapter *adapter = netdev_priv(netdev);
4450
4451	if (test_bit(__IGB_DOWN, &adapter->state)) {
4452		dev_kfree_skb_any(skb);
4453		return NETDEV_TX_OK;
4454	}
4455
4456	if (skb->len <= 0) {
4457		dev_kfree_skb_any(skb);
4458		return NETDEV_TX_OK;
4459	}
4460
4461	/*
4462	 * The minimum packet size with TCTL.PSP set is 17 so pad the skb
4463	 * in order to meet this minimum size requirement.
4464	 */
4465	if (skb->len < 17) {
4466		if (skb_padto(skb, 17))
4467			return NETDEV_TX_OK;
4468		skb->len = 17;
4469	}
4470
4471	return igb_xmit_frame_ring(skb, igb_tx_queue_mapping(adapter, skb));
4472}
4473
4474/**
4475 * igb_tx_timeout - Respond to a Tx Hang
4476 * @netdev: network interface device structure
4477 **/
4478static void igb_tx_timeout(struct net_device *netdev)
4479{
4480	struct igb_adapter *adapter = netdev_priv(netdev);
4481	struct e1000_hw *hw = &adapter->hw;
4482
4483	/* Do the reset outside of interrupt context */
4484	adapter->tx_timeout_count++;
4485
4486	if (hw->mac.type >= e1000_82580)
4487		hw->dev_spec._82575.global_device_reset = true;
4488
4489	schedule_work(&adapter->reset_task);
4490	wr32(E1000_EICS,
4491	     (adapter->eims_enable_mask & ~adapter->eims_other));
4492}
4493
4494static void igb_reset_task(struct work_struct *work)
4495{
4496	struct igb_adapter *adapter;
4497	adapter = container_of(work, struct igb_adapter, reset_task);
4498
4499	igb_dump(adapter);
4500	netdev_err(adapter->netdev, "Reset adapter\n");
4501	igb_reinit_locked(adapter);
4502}
4503
4504/**
4505 * igb_get_stats64 - Get System Network Statistics
4506 * @netdev: network interface device structure
4507 * @stats: rtnl_link_stats64 pointer
4508 *
4509 **/
4510static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *netdev,
4511						 struct rtnl_link_stats64 *stats)
4512{
4513	struct igb_adapter *adapter = netdev_priv(netdev);
4514
4515	spin_lock(&adapter->stats64_lock);
4516	igb_update_stats(adapter, &adapter->stats64);
4517	memcpy(stats, &adapter->stats64, sizeof(*stats));
4518	spin_unlock(&adapter->stats64_lock);
4519
4520	return stats;
4521}
4522
4523/**
4524 * igb_change_mtu - Change the Maximum Transfer Unit
4525 * @netdev: network interface device structure
4526 * @new_mtu: new value for maximum frame size
4527 *
4528 * Returns 0 on success, negative on failure
4529 **/
4530static int igb_change_mtu(struct net_device *netdev, int new_mtu)
4531{
4532	struct igb_adapter *adapter = netdev_priv(netdev);
4533	struct pci_dev *pdev = adapter->pdev;
4534	int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
4535
4536	if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
4537		dev_err(&pdev->dev, "Invalid MTU setting\n");
4538		return -EINVAL;
4539	}
4540
4541#define MAX_STD_JUMBO_FRAME_SIZE 9238
4542	if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
4543		dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
4544		return -EINVAL;
4545	}
4546
4547	while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
4548		msleep(1);
4549
4550	/* igb_down has a dependency on max_frame_size */
4551	adapter->max_frame_size = max_frame;
4552
4553	if (netif_running(netdev))
4554		igb_down(adapter);
4555
4556	dev_info(&pdev->dev, "changing MTU from %d to %d\n",
4557		 netdev->mtu, new_mtu);
4558	netdev->mtu = new_mtu;
4559
4560	if (netif_running(netdev))
4561		igb_up(adapter);
4562	else
4563		igb_reset(adapter);
4564
4565	clear_bit(__IGB_RESETTING, &adapter->state);
4566
4567	return 0;
4568}
4569
4570/**
4571 * igb_update_stats - Update the board statistics counters
4572 * @adapter: board private structure
4573 **/
4574
4575void igb_update_stats(struct igb_adapter *adapter,
4576		      struct rtnl_link_stats64 *net_stats)
4577{
4578	struct e1000_hw *hw = &adapter->hw;
4579	struct pci_dev *pdev = adapter->pdev;
4580	u32 reg, mpc;
4581	u16 phy_tmp;
4582	int i;
4583	u64 bytes, packets;
4584	unsigned int start;
4585	u64 _bytes, _packets;
4586
4587#define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
4588
4589	/*
4590	 * Prevent stats update while adapter is being reset, or if the pci
4591	 * connection is down.
4592	 */
4593	if (adapter->link_speed == 0)
4594		return;
4595	if (pci_channel_offline(pdev))
4596		return;
4597
4598	bytes = 0;
4599	packets = 0;
4600	for (i = 0; i < adapter->num_rx_queues; i++) {
4601		u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
4602		struct igb_ring *ring = adapter->rx_ring[i];
4603
4604		ring->rx_stats.drops += rqdpc_tmp;
4605		net_stats->rx_fifo_errors += rqdpc_tmp;
4606
4607		do {
4608			start = u64_stats_fetch_begin_bh(&ring->rx_syncp);
4609			_bytes = ring->rx_stats.bytes;
4610			_packets = ring->rx_stats.packets;
4611		} while (u64_stats_fetch_retry_bh(&ring->rx_syncp, start));
4612		bytes += _bytes;
4613		packets += _packets;
4614	}
4615
4616	net_stats->rx_bytes = bytes;
4617	net_stats->rx_packets = packets;
4618
4619	bytes = 0;
4620	packets = 0;
4621	for (i = 0; i < adapter->num_tx_queues; i++) {
4622		struct igb_ring *ring = adapter->tx_ring[i];
4623		do {
4624			start = u64_stats_fetch_begin_bh(&ring->tx_syncp);
4625			_bytes = ring->tx_stats.bytes;
4626			_packets = ring->tx_stats.packets;
4627		} while (u64_stats_fetch_retry_bh(&ring->tx_syncp, start));
4628		bytes += _bytes;
4629		packets += _packets;
4630	}
4631	net_stats->tx_bytes = bytes;
4632	net_stats->tx_packets = packets;
4633
4634	/* read stats registers */
4635	adapter->stats.crcerrs += rd32(E1000_CRCERRS);
4636	adapter->stats.gprc += rd32(E1000_GPRC);
4637	adapter->stats.gorc += rd32(E1000_GORCL);
4638	rd32(E1000_GORCH); /* clear GORCL */
4639	adapter->stats.bprc += rd32(E1000_BPRC);
4640	adapter->stats.mprc += rd32(E1000_MPRC);
4641	adapter->stats.roc += rd32(E1000_ROC);
4642
4643	adapter->stats.prc64 += rd32(E1000_PRC64);
4644	adapter->stats.prc127 += rd32(E1000_PRC127);
4645	adapter->stats.prc255 += rd32(E1000_PRC255);
4646	adapter->stats.prc511 += rd32(E1000_PRC511);
4647	adapter->stats.prc1023 += rd32(E1000_PRC1023);
4648	adapter->stats.prc1522 += rd32(E1000_PRC1522);
4649	adapter->stats.symerrs += rd32(E1000_SYMERRS);
4650	adapter->stats.sec += rd32(E1000_SEC);
4651
4652	mpc = rd32(E1000_MPC);
4653	adapter->stats.mpc += mpc;
4654	net_stats->rx_fifo_errors += mpc;
4655	adapter->stats.scc += rd32(E1000_SCC);
4656	adapter->stats.ecol += rd32(E1000_ECOL);
4657	adapter->stats.mcc += rd32(E1000_MCC);
4658	adapter->stats.latecol += rd32(E1000_LATECOL);
4659	adapter->stats.dc += rd32(E1000_DC);
4660	adapter->stats.rlec += rd32(E1000_RLEC);
4661	adapter->stats.xonrxc += rd32(E1000_XONRXC);
4662	adapter->stats.xontxc += rd32(E1000_XONTXC);
4663	adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
4664	adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
4665	adapter->stats.fcruc += rd32(E1000_FCRUC);
4666	adapter->stats.gptc += rd32(E1000_GPTC);
4667	adapter->stats.gotc += rd32(E1000_GOTCL);
4668	rd32(E1000_GOTCH); /* clear GOTCL */
4669	adapter->stats.rnbc += rd32(E1000_RNBC);
4670	adapter->stats.ruc += rd32(E1000_RUC);
4671	adapter->stats.rfc += rd32(E1000_RFC);
4672	adapter->stats.rjc += rd32(E1000_RJC);
4673	adapter->stats.tor += rd32(E1000_TORH);
4674	adapter->stats.tot += rd32(E1000_TOTH);
4675	adapter->stats.tpr += rd32(E1000_TPR);
4676
4677	adapter->stats.ptc64 += rd32(E1000_PTC64);
4678	adapter->stats.ptc127 += rd32(E1000_PTC127);
4679	adapter->stats.ptc255 += rd32(E1000_PTC255);
4680	adapter->stats.ptc511 += rd32(E1000_PTC511);
4681	adapter->stats.ptc1023 += rd32(E1000_PTC1023);
4682	adapter->stats.ptc1522 += rd32(E1000_PTC1522);
4683
4684	adapter->stats.mptc += rd32(E1000_MPTC);
4685	adapter->stats.bptc += rd32(E1000_BPTC);
4686
4687	adapter->stats.tpt += rd32(E1000_TPT);
4688	adapter->stats.colc += rd32(E1000_COLC);
4689
4690	adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
4691	/* read internal phy specific stats */
4692	reg = rd32(E1000_CTRL_EXT);
4693	if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
4694		adapter->stats.rxerrc += rd32(E1000_RXERRC);
4695		adapter->stats.tncrs += rd32(E1000_TNCRS);
4696	}
4697
4698	adapter->stats.tsctc += rd32(E1000_TSCTC);
4699	adapter->stats.tsctfc += rd32(E1000_TSCTFC);
4700
4701	adapter->stats.iac += rd32(E1000_IAC);
4702	adapter->stats.icrxoc += rd32(E1000_ICRXOC);
4703	adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
4704	adapter->stats.icrxatc += rd32(E1000_ICRXATC);
4705	adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
4706	adapter->stats.ictxatc += rd32(E1000_ICTXATC);
4707	adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
4708	adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
4709	adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
4710
4711	/* Fill out the OS statistics structure */
4712	net_stats->multicast = adapter->stats.mprc;
4713	net_stats->collisions = adapter->stats.colc;
4714
4715	/* Rx Errors */
4716
4717	/* RLEC on some newer hardware can be incorrect so build
4718	 * our own version based on RUC and ROC */
4719	net_stats->rx_errors = adapter->stats.rxerrc +
4720		adapter->stats.crcerrs + adapter->stats.algnerrc +
4721		adapter->stats.ruc + adapter->stats.roc +
4722		adapter->stats.cexterr;
4723	net_stats->rx_length_errors = adapter->stats.ruc +
4724				      adapter->stats.roc;
4725	net_stats->rx_crc_errors = adapter->stats.crcerrs;
4726	net_stats->rx_frame_errors = adapter->stats.algnerrc;
4727	net_stats->rx_missed_errors = adapter->stats.mpc;
4728
4729	/* Tx Errors */
4730	net_stats->tx_errors = adapter->stats.ecol +
4731			       adapter->stats.latecol;
4732	net_stats->tx_aborted_errors = adapter->stats.ecol;
4733	net_stats->tx_window_errors = adapter->stats.latecol;
4734	net_stats->tx_carrier_errors = adapter->stats.tncrs;
4735
4736	/* Tx Dropped needs to be maintained elsewhere */
4737
4738	/* Phy Stats */
4739	if (hw->phy.media_type == e1000_media_type_copper) {
4740		if ((adapter->link_speed == SPEED_1000) &&
4741		   (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
4742			phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
4743			adapter->phy_stats.idle_errors += phy_tmp;
4744		}
4745	}
4746
4747	/* Management Stats */
4748	adapter->stats.mgptc += rd32(E1000_MGTPTC);
4749	adapter->stats.mgprc += rd32(E1000_MGTPRC);
4750	adapter->stats.mgpdc += rd32(E1000_MGTPDC);
4751
4752	/* OS2BMC Stats */
4753	reg = rd32(E1000_MANC);
4754	if (reg & E1000_MANC_EN_BMC2OS) {
4755		adapter->stats.o2bgptc += rd32(E1000_O2BGPTC);
4756		adapter->stats.o2bspc += rd32(E1000_O2BSPC);
4757		adapter->stats.b2ospc += rd32(E1000_B2OSPC);
4758		adapter->stats.b2ogprc += rd32(E1000_B2OGPRC);
4759	}
4760}
4761
4762static irqreturn_t igb_msix_other(int irq, void *data)
4763{
4764	struct igb_adapter *adapter = data;
4765	struct e1000_hw *hw = &adapter->hw;
4766	u32 icr = rd32(E1000_ICR);
4767	/* reading ICR causes bit 31 of EICR to be cleared */
4768
4769	if (icr & E1000_ICR_DRSTA)
4770		schedule_work(&adapter->reset_task);
4771
4772	if (icr & E1000_ICR_DOUTSYNC) {
4773		/* HW is reporting DMA is out of sync */
4774		adapter->stats.doosync++;
4775		/* The DMA Out of Sync is also indication of a spoof event
4776		 * in IOV mode. Check the Wrong VM Behavior register to
4777		 * see if it is really a spoof event. */
4778		igb_check_wvbr(adapter);
4779	}
4780
4781	/* Check for a mailbox event */
4782	if (icr & E1000_ICR_VMMB)
4783		igb_msg_task(adapter);
4784
4785	if (icr & E1000_ICR_LSC) {
4786		hw->mac.get_link_status = 1;
4787		/* guard against interrupt when we're going down */
4788		if (!test_bit(__IGB_DOWN, &adapter->state))
4789			mod_timer(&adapter->watchdog_timer, jiffies + 1);
4790	}
4791
4792	wr32(E1000_EIMS, adapter->eims_other);
4793
4794	return IRQ_HANDLED;
4795}
4796
4797static void igb_write_itr(struct igb_q_vector *q_vector)
4798{
4799	struct igb_adapter *adapter = q_vector->adapter;
4800	u32 itr_val = q_vector->itr_val & 0x7FFC;
4801
4802	if (!q_vector->set_itr)
4803		return;
4804
4805	if (!itr_val)
4806		itr_val = 0x4;
4807
4808	if (adapter->hw.mac.type == e1000_82575)
4809		itr_val |= itr_val << 16;
4810	else
4811		itr_val |= E1000_EITR_CNT_IGNR;
4812
4813	writel(itr_val, q_vector->itr_register);
4814	q_vector->set_itr = 0;
4815}
4816
4817static irqreturn_t igb_msix_ring(int irq, void *data)
4818{
4819	struct igb_q_vector *q_vector = data;
4820
4821	/* Write the ITR value calculated from the previous interrupt. */
4822	igb_write_itr(q_vector);
4823
4824	napi_schedule(&q_vector->napi);
4825
4826	return IRQ_HANDLED;
4827}
4828
4829#ifdef CONFIG_IGB_DCA
4830static void igb_update_dca(struct igb_q_vector *q_vector)
4831{
4832	struct igb_adapter *adapter = q_vector->adapter;
4833	struct e1000_hw *hw = &adapter->hw;
4834	int cpu = get_cpu();
4835
4836	if (q_vector->cpu == cpu)
4837		goto out_no_update;
4838
4839	if (q_vector->tx.ring) {
4840		int q = q_vector->tx.ring->reg_idx;
4841		u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4842		if (hw->mac.type == e1000_82575) {
4843			dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4844			dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4845		} else {
4846			dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4847			dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4848			              E1000_DCA_TXCTRL_CPUID_SHIFT;
4849		}
4850		dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4851		wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4852	}
4853	if (q_vector->rx.ring) {
4854		int q = q_vector->rx.ring->reg_idx;
4855		u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4856		if (hw->mac.type == e1000_82575) {
4857			dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4858			dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4859		} else {
4860			dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4861			dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4862			              E1000_DCA_RXCTRL_CPUID_SHIFT;
4863		}
4864		dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4865		dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4866		dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4867		wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4868	}
4869	q_vector->cpu = cpu;
4870out_no_update:
4871	put_cpu();
4872}
4873
4874static void igb_setup_dca(struct igb_adapter *adapter)
4875{
4876	struct e1000_hw *hw = &adapter->hw;
4877	int i;
4878
4879	if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4880		return;
4881
4882	/* Always use CB2 mode, difference is masked in the CB driver. */
4883	wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4884
4885	for (i = 0; i < adapter->num_q_vectors; i++) {
4886		adapter->q_vector[i]->cpu = -1;
4887		igb_update_dca(adapter->q_vector[i]);
4888	}
4889}
4890
4891static int __igb_notify_dca(struct device *dev, void *data)
4892{
4893	struct net_device *netdev = dev_get_drvdata(dev);
4894	struct igb_adapter *adapter = netdev_priv(netdev);
4895	struct pci_dev *pdev = adapter->pdev;
4896	struct e1000_hw *hw = &adapter->hw;
4897	unsigned long event = *(unsigned long *)data;
4898
4899	switch (event) {
4900	case DCA_PROVIDER_ADD:
4901		/* if already enabled, don't do it again */
4902		if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4903			break;
4904		if (dca_add_requester(dev) == 0) {
4905			adapter->flags |= IGB_FLAG_DCA_ENABLED;
4906			dev_info(&pdev->dev, "DCA enabled\n");
4907			igb_setup_dca(adapter);
4908			break;
4909		}
4910		/* Fall Through since DCA is disabled. */
4911	case DCA_PROVIDER_REMOVE:
4912		if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4913			/* without this a class_device is left
4914			 * hanging around in the sysfs model */
4915			dca_remove_requester(dev);
4916			dev_info(&pdev->dev, "DCA disabled\n");
4917			adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4918			wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4919		}
4920		break;
4921	}
4922
4923	return 0;
4924}
4925
4926static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4927                          void *p)
4928{
4929	int ret_val;
4930
4931	ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4932	                                 __igb_notify_dca);
4933
4934	return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4935}
4936#endif /* CONFIG_IGB_DCA */
4937
4938#ifdef CONFIG_PCI_IOV
4939static int igb_vf_configure(struct igb_adapter *adapter, int vf)
4940{
4941	unsigned char mac_addr[ETH_ALEN];
4942	struct pci_dev *pdev = adapter->pdev;
4943	struct e1000_hw *hw = &adapter->hw;
4944	struct pci_dev *pvfdev;
4945	unsigned int device_id;
4946	u16 thisvf_devfn;
4947
4948	random_ether_addr(mac_addr);
4949	igb_set_vf_mac(adapter, vf, mac_addr);
4950
4951	switch (adapter->hw.mac.type) {
4952	case e1000_82576:
4953		device_id = IGB_82576_VF_DEV_ID;
4954		/* VF Stride for 82576 is 2 */
4955		thisvf_devfn = (pdev->devfn + 0x80 + (vf << 1)) |
4956			(pdev->devfn & 1);
4957		break;
4958	case e1000_i350:
4959		device_id = IGB_I350_VF_DEV_ID;
4960		/* VF Stride for I350 is 4 */
4961		thisvf_devfn = (pdev->devfn + 0x80 + (vf << 2)) |
4962				(pdev->devfn & 3);
4963		break;
4964	default:
4965		device_id = 0;
4966		thisvf_devfn = 0;
4967		break;
4968	}
4969
4970	pvfdev = pci_get_device(hw->vendor_id, device_id, NULL);
4971	while (pvfdev) {
4972		if (pvfdev->devfn == thisvf_devfn)
4973			break;
4974		pvfdev = pci_get_device(hw->vendor_id,
4975					device_id, pvfdev);
4976	}
4977
4978	if (pvfdev)
4979		adapter->vf_data[vf].vfdev = pvfdev;
4980	else
4981		dev_err(&pdev->dev,
4982			"Couldn't find pci dev ptr for VF %4.4x\n",
4983			thisvf_devfn);
4984	return pvfdev != NULL;
4985}
4986
4987static int igb_find_enabled_vfs(struct igb_adapter *adapter)
4988{
4989	struct e1000_hw *hw = &adapter->hw;
4990	struct pci_dev *pdev = adapter->pdev;
4991	struct pci_dev *pvfdev;
4992	u16 vf_devfn = 0;
4993	u16 vf_stride;
4994	unsigned int device_id;
4995	int vfs_found = 0;
4996
4997	switch (adapter->hw.mac.type) {
4998	case e1000_82576:
4999		device_id = IGB_82576_VF_DEV_ID;
5000		/* VF Stride for 82576 is 2 */
5001		vf_stride = 2;
5002		break;
5003	case e1000_i350:
5004		device_id = IGB_I350_VF_DEV_ID;
5005		/* VF Stride for I350 is 4 */
5006		vf_stride = 4;
5007		break;
5008	default:
5009		device_id = 0;
5010		vf_stride = 0;
5011		break;
5012	}
5013
5014	vf_devfn = pdev->devfn + 0x80;
5015	pvfdev = pci_get_device(hw->vendor_id, device_id, NULL);
5016	while (pvfdev) {
5017		if (pvfdev->devfn == vf_devfn)
5018			vfs_found++;
5019		vf_devfn += vf_stride;
5020		pvfdev = pci_get_device(hw->vendor_id,
5021					device_id, pvfdev);
5022	}
5023
5024	return vfs_found;
5025}
5026
5027static int igb_check_vf_assignment(struct igb_adapter *adapter)
5028{
5029	int i;
5030	for (i = 0; i < adapter->vfs_allocated_count; i++) {
5031		if (adapter->vf_data[i].vfdev) {
5032			if (adapter->vf_data[i].vfdev->dev_flags &
5033			    PCI_DEV_FLAGS_ASSIGNED)
5034				return true;
5035		}
5036	}
5037	return false;
5038}
5039
5040#endif
5041static void igb_ping_all_vfs(struct igb_adapter *adapter)
5042{
5043	struct e1000_hw *hw = &adapter->hw;
5044	u32 ping;
5045	int i;
5046
5047	for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
5048		ping = E1000_PF_CONTROL_MSG;
5049		if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
5050			ping |= E1000_VT_MSGTYPE_CTS;
5051		igb_write_mbx(hw, &ping, 1, i);
5052	}
5053}
5054
5055static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5056{
5057	struct e1000_hw *hw = &adapter->hw;
5058	u32 vmolr = rd32(E1000_VMOLR(vf));
5059	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5060
5061	vf_data->flags &= ~(IGB_VF_FLAG_UNI_PROMISC |
5062	                    IGB_VF_FLAG_MULTI_PROMISC);
5063	vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
5064
5065	if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
5066		vmolr |= E1000_VMOLR_MPME;
5067		vf_data->flags |= IGB_VF_FLAG_MULTI_PROMISC;
5068		*msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
5069	} else {
5070		/*
5071		 * if we have hashes and we are clearing a multicast promisc
5072		 * flag we need to write the hashes to the MTA as this step
5073		 * was previously skipped
5074		 */
5075		if (vf_data->num_vf_mc_hashes > 30) {
5076			vmolr |= E1000_VMOLR_MPME;
5077		} else if (vf_data->num_vf_mc_hashes) {
5078			int j;
5079			vmolr |= E1000_VMOLR_ROMPE;
5080			for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
5081				igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
5082		}
5083	}
5084
5085	wr32(E1000_VMOLR(vf), vmolr);
5086
5087	/* there are flags left unprocessed, likely not supported */
5088	if (*msgbuf & E1000_VT_MSGINFO_MASK)
5089		return -EINVAL;
5090
5091	return 0;
5092
5093}
5094
5095static int igb_set_vf_multicasts(struct igb_adapter *adapter,
5096				  u32 *msgbuf, u32 vf)
5097{
5098	int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5099	u16 *hash_list = (u16 *)&msgbuf[1];
5100	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5101	int i;
5102
5103	/* salt away the number of multicast addresses assigned
5104	 * to this VF for later use to restore when the PF multi cast
5105	 * list changes
5106	 */
5107	vf_data->num_vf_mc_hashes = n;
5108
5109	/* only up to 30 hash values supported */
5110	if (n > 30)
5111		n = 30;
5112
5113	/* store the hashes for later use */
5114	for (i = 0; i < n; i++)
5115		vf_data->vf_mc_hashes[i] = hash_list[i];
5116
5117	/* Flush and reset the mta with the new values */
5118	igb_set_rx_mode(adapter->netdev);
5119
5120	return 0;
5121}
5122
5123static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
5124{
5125	struct e1000_hw *hw = &adapter->hw;
5126	struct vf_data_storage *vf_data;
5127	int i, j;
5128
5129	for (i = 0; i < adapter->vfs_allocated_count; i++) {
5130		u32 vmolr = rd32(E1000_VMOLR(i));
5131		vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
5132
5133		vf_data = &adapter->vf_data[i];
5134
5135		if ((vf_data->num_vf_mc_hashes > 30) ||
5136		    (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
5137			vmolr |= E1000_VMOLR_MPME;
5138		} else if (vf_data->num_vf_mc_hashes) {
5139			vmolr |= E1000_VMOLR_ROMPE;
5140			for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
5141				igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
5142		}
5143		wr32(E1000_VMOLR(i), vmolr);
5144	}
5145}
5146
5147static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
5148{
5149	struct e1000_hw *hw = &adapter->hw;
5150	u32 pool_mask, reg, vid;
5151	int i;
5152
5153	pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5154
5155	/* Find the vlan filter for this id */
5156	for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5157		reg = rd32(E1000_VLVF(i));
5158
5159		/* remove the vf from the pool */
5160		reg &= ~pool_mask;
5161
5162		/* if pool is empty then remove entry from vfta */
5163		if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
5164		    (reg & E1000_VLVF_VLANID_ENABLE)) {
5165			reg = 0;
5166			vid = reg & E1000_VLVF_VLANID_MASK;
5167			igb_vfta_set(hw, vid, false);
5168		}
5169
5170		wr32(E1000_VLVF(i), reg);
5171	}
5172
5173	adapter->vf_data[vf].vlans_enabled = 0;
5174}
5175
5176static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
5177{
5178	struct e1000_hw *hw = &adapter->hw;
5179	u32 reg, i;
5180
5181	/* The vlvf table only exists on 82576 hardware and newer */
5182	if (hw->mac.type < e1000_82576)
5183		return -1;
5184
5185	/* we only need to do this if VMDq is enabled */
5186	if (!adapter->vfs_allocated_count)
5187		return -1;
5188
5189	/* Find the vlan filter for this id */
5190	for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5191		reg = rd32(E1000_VLVF(i));
5192		if ((reg & E1000_VLVF_VLANID_ENABLE) &&
5193		    vid == (reg & E1000_VLVF_VLANID_MASK))
5194			break;
5195	}
5196
5197	if (add) {
5198		if (i == E1000_VLVF_ARRAY_SIZE) {
5199			/* Did not find a matching VLAN ID entry that was
5200			 * enabled.  Search for a free filter entry, i.e.
5201			 * one without the enable bit set
5202			 */
5203			for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5204				reg = rd32(E1000_VLVF(i));
5205				if (!(reg & E1000_VLVF_VLANID_ENABLE))
5206					break;
5207			}
5208		}
5209		if (i < E1000_VLVF_ARRAY_SIZE) {
5210			/* Found an enabled/available entry */
5211			reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5212
5213			/* if !enabled we need to set this up in vfta */
5214			if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
5215				/* add VID to filter table */
5216				igb_vfta_set(hw, vid, true);
5217				reg |= E1000_VLVF_VLANID_ENABLE;
5218			}
5219			reg &= ~E1000_VLVF_VLANID_MASK;
5220			reg |= vid;
5221			wr32(E1000_VLVF(i), reg);
5222
5223			/* do not modify RLPML for PF devices */
5224			if (vf >= adapter->vfs_allocated_count)
5225				return 0;
5226
5227			if (!adapter->vf_data[vf].vlans_enabled) {
5228				u32 size;
5229				reg = rd32(E1000_VMOLR(vf));
5230				size = reg & E1000_VMOLR_RLPML_MASK;
5231				size += 4;
5232				reg &= ~E1000_VMOLR_RLPML_MASK;
5233				reg |= size;
5234				wr32(E1000_VMOLR(vf), reg);
5235			}
5236
5237			adapter->vf_data[vf].vlans_enabled++;
5238		}
5239	} else {
5240		if (i < E1000_VLVF_ARRAY_SIZE) {
5241			/* remove vf from the pool */
5242			reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
5243			/* if pool is empty then remove entry from vfta */
5244			if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
5245				reg = 0;
5246				igb_vfta_set(hw, vid, false);
5247			}
5248			wr32(E1000_VLVF(i), reg);
5249
5250			/* do not modify RLPML for PF devices */
5251			if (vf >= adapter->vfs_allocated_count)
5252				return 0;
5253
5254			adapter->vf_data[vf].vlans_enabled--;
5255			if (!adapter->vf_data[vf].vlans_enabled) {
5256				u32 size;
5257				reg = rd32(E1000_VMOLR(vf));
5258				size = reg & E1000_VMOLR_RLPML_MASK;
5259				size -= 4;
5260				reg &= ~E1000_VMOLR_RLPML_MASK;
5261				reg |= size;
5262				wr32(E1000_VMOLR(vf), reg);
5263			}
5264		}
5265	}
5266	return 0;
5267}
5268
5269static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
5270{
5271	struct e1000_hw *hw = &adapter->hw;
5272
5273	if (vid)
5274		wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
5275	else
5276		wr32(E1000_VMVIR(vf), 0);
5277}
5278
5279static int igb_ndo_set_vf_vlan(struct net_device *netdev,
5280			       int vf, u16 vlan, u8 qos)
5281{
5282	int err = 0;
5283	struct igb_adapter *adapter = netdev_priv(netdev);
5284
5285	if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
5286		return -EINVAL;
5287	if (vlan || qos) {
5288		err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
5289		if (err)
5290			goto out;
5291		igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
5292		igb_set_vmolr(adapter, vf, !vlan);
5293		adapter->vf_data[vf].pf_vlan = vlan;
5294		adapter->vf_data[vf].pf_qos = qos;
5295		dev_info(&adapter->pdev->dev,
5296			 "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
5297		if (test_bit(__IGB_DOWN, &adapter->state)) {
5298			dev_warn(&adapter->pdev->dev,
5299				 "The VF VLAN has been set,"
5300				 " but the PF device is not up.\n");
5301			dev_warn(&adapter->pdev->dev,
5302				 "Bring the PF device up before"
5303				 " attempting to use the VF device.\n");
5304		}
5305	} else {
5306		igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
5307				   false, vf);
5308		igb_set_vmvir(adapter, vlan, vf);
5309		igb_set_vmolr(adapter, vf, true);
5310		adapter->vf_data[vf].pf_vlan = 0;
5311		adapter->vf_data[vf].pf_qos = 0;
5312       }
5313out:
5314       return err;
5315}
5316
5317static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5318{
5319	int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5320	int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
5321
5322	return igb_vlvf_set(adapter, vid, add, vf);
5323}
5324
5325static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
5326{
5327	/* clear flags - except flag that indicates PF has set the MAC */
5328	adapter->vf_data[vf].flags &= IGB_VF_FLAG_PF_SET_MAC;
5329	adapter->vf_data[vf].last_nack = jiffies;
5330
5331	/* reset offloads to defaults */
5332	igb_set_vmolr(adapter, vf, true);
5333
5334	/* reset vlans for device */
5335	igb_clear_vf_vfta(adapter, vf);
5336	if (adapter->vf_data[vf].pf_vlan)
5337		igb_ndo_set_vf_vlan(adapter->netdev, vf,
5338				    adapter->vf_data[vf].pf_vlan,
5339				    adapter->vf_data[vf].pf_qos);
5340	else
5341		igb_clear_vf_vfta(adapter, vf);
5342
5343	/* reset multicast table array for vf */
5344	adapter->vf_data[vf].num_vf_mc_hashes = 0;
5345
5346	/* Flush and reset the mta with the new values */
5347	igb_set_rx_mode(adapter->netdev);
5348}
5349
5350static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
5351{
5352	unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5353
5354	/* generate a new mac address as we were hotplug removed/added */
5355	if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
5356		random_ether_addr(vf_mac);
5357
5358	/* process remaining reset events */
5359	igb_vf_reset(adapter, vf);
5360}
5361
5362static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
5363{
5364	struct e1000_hw *hw = &adapter->hw;
5365	unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5366	int rar_entry = hw->mac.rar_entry_count - (vf + 1);
5367	u32 reg, msgbuf[3];
5368	u8 *addr = (u8 *)(&msgbuf[1]);
5369
5370	/* process all the same items cleared in a function level reset */
5371	igb_vf_reset(adapter, vf);
5372
5373	/* set vf mac address */
5374	igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
5375
5376	/* enable transmit and receive for vf */
5377	reg = rd32(E1000_VFTE);
5378	wr32(E1000_VFTE, reg | (1 << vf));
5379	reg = rd32(E1000_VFRE);
5380	wr32(E1000_VFRE, reg | (1 << vf));
5381
5382	adapter->vf_data[vf].flags |= IGB_VF_FLAG_CTS;
5383
5384	/* reply to reset with ack and vf mac address */
5385	msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
5386	memcpy(addr, vf_mac, 6);
5387	igb_write_mbx(hw, msgbuf, 3, vf);
5388}
5389
5390static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
5391{
5392	/*
5393	 * The VF MAC Address is stored in a packed array of bytes
5394	 * starting at the second 32 bit word of the msg array
5395	 */
5396	unsigned char *addr = (char *)&msg[1];
5397	int err = -1;
5398
5399	if (is_valid_ether_addr(addr))
5400		err = igb_set_vf_mac(adapter, vf, addr);
5401
5402	return err;
5403}
5404
5405static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
5406{
5407	struct e1000_hw *hw = &adapter->hw;
5408	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5409	u32 msg = E1000_VT_MSGTYPE_NACK;
5410
5411	/* if device isn't clear to send it shouldn't be reading either */
5412	if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
5413	    time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
5414		igb_write_mbx(hw, &msg, 1, vf);
5415		vf_data->last_nack = jiffies;
5416	}
5417}
5418
5419static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
5420{
5421	struct pci_dev *pdev = adapter->pdev;
5422	u32 msgbuf[E1000_VFMAILBOX_SIZE];
5423	struct e1000_hw *hw = &adapter->hw;
5424	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5425	s32 retval;
5426
5427	retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
5428
5429	if (retval) {
5430		/* if receive failed revoke VF CTS stats and restart init */
5431		dev_err(&pdev->dev, "Error receiving message from VF\n");
5432		vf_data->flags &= ~IGB_VF_FLAG_CTS;
5433		if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5434			return;
5435		goto out;
5436	}
5437
5438	/* this is a message we already processed, do nothing */
5439	if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
5440		return;
5441
5442	/*
5443	 * until the vf completes a reset it should not be
5444	 * allowed to start any configuration.
5445	 */
5446
5447	if (msgbuf[0] == E1000_VF_RESET) {
5448		igb_vf_reset_msg(adapter, vf);
5449		return;
5450	}
5451
5452	if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
5453		if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5454			return;
5455		retval = -1;
5456		goto out;
5457	}
5458
5459	switch ((msgbuf[0] & 0xFFFF)) {
5460	case E1000_VF_SET_MAC_ADDR:
5461		retval = -EINVAL;
5462		if (!(vf_data->flags & IGB_VF_FLAG_PF_SET_MAC))
5463			retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
5464		else
5465			dev_warn(&pdev->dev,
5466				 "VF %d attempted to override administratively "
5467				 "set MAC address\nReload the VF driver to "
5468				 "resume operations\n", vf);
5469		break;
5470	case E1000_VF_SET_PROMISC:
5471		retval = igb_set_vf_promisc(adapter, msgbuf, vf);
5472		break;
5473	case E1000_VF_SET_MULTICAST:
5474		retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
5475		break;
5476	case E1000_VF_SET_LPE:
5477		retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
5478		break;
5479	case E1000_VF_SET_VLAN:
5480		retval = -1;
5481		if (vf_data->pf_vlan)
5482			dev_warn(&pdev->dev,
5483				 "VF %d attempted to override administratively "
5484				 "set VLAN tag\nReload the VF driver to "
5485				 "resume operations\n", vf);
5486		else
5487			retval = igb_set_vf_vlan(adapter, msgbuf, vf);
5488		break;
5489	default:
5490		dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
5491		retval = -1;
5492		break;
5493	}
5494
5495	msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
5496out:
5497	/* notify the VF of the results of what it sent us */
5498	if (retval)
5499		msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
5500	else
5501		msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
5502
5503	igb_write_mbx(hw, msgbuf, 1, vf);
5504}
5505
5506static void igb_msg_task(struct igb_adapter *adapter)
5507{
5508	struct e1000_hw *hw = &adapter->hw;
5509	u32 vf;
5510
5511	for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
5512		/* process any reset requests */
5513		if (!igb_check_for_rst(hw, vf))
5514			igb_vf_reset_event(adapter, vf);
5515
5516		/* process any messages pending */
5517		if (!igb_check_for_msg(hw, vf))
5518			igb_rcv_msg_from_vf(adapter, vf);
5519
5520		/* process any acks */
5521		if (!igb_check_for_ack(hw, vf))
5522			igb_rcv_ack_from_vf(adapter, vf);
5523	}
5524}
5525
5526/**
5527 *  igb_set_uta - Set unicast filter table address
5528 *  @adapter: board private structure
5529 *
5530 *  The unicast table address is a register array of 32-bit registers.
5531 *  The table is meant to be used in a way similar to how the MTA is used
5532 *  however due to certain limitations in the hardware it is necessary to
5533 *  set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscuous
5534 *  enable bit to allow vlan tag stripping when promiscuous mode is enabled
5535 **/
5536static void igb_set_uta(struct igb_adapter *adapter)
5537{
5538	struct e1000_hw *hw = &adapter->hw;
5539	int i;
5540
5541	/* The UTA table only exists on 82576 hardware and newer */
5542	if (hw->mac.type < e1000_82576)
5543		return;
5544
5545	/* we only need to do this if VMDq is enabled */
5546	if (!adapter->vfs_allocated_count)
5547		return;
5548
5549	for (i = 0; i < hw->mac.uta_reg_count; i++)
5550		array_wr32(E1000_UTA, i, ~0);
5551}
5552
5553/**
5554 * igb_intr_msi - Interrupt Handler
5555 * @irq: interrupt number
5556 * @data: pointer to a network interface device structure
5557 **/
5558static irqreturn_t igb_intr_msi(int irq, void *data)
5559{
5560	struct igb_adapter *adapter = data;
5561	struct igb_q_vector *q_vector = adapter->q_vector[0];
5562	struct e1000_hw *hw = &adapter->hw;
5563	/* read ICR disables interrupts using IAM */
5564	u32 icr = rd32(E1000_ICR);
5565
5566	igb_write_itr(q_vector);
5567
5568	if (icr & E1000_ICR_DRSTA)
5569		schedule_work(&adapter->reset_task);
5570
5571	if (icr & E1000_ICR_DOUTSYNC) {
5572		/* HW is reporting DMA is out of sync */
5573		adapter->stats.doosync++;
5574	}
5575
5576	if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5577		hw->mac.get_link_status = 1;
5578		if (!test_bit(__IGB_DOWN, &adapter->state))
5579			mod_timer(&adapter->watchdog_timer, jiffies + 1);
5580	}
5581
5582	napi_schedule(&q_vector->napi);
5583
5584	return IRQ_HANDLED;
5585}
5586
5587/**
5588 * igb_intr - Legacy Interrupt Handler
5589 * @irq: interrupt number
5590 * @data: pointer to a network interface device structure
5591 **/
5592static irqreturn_t igb_intr(int irq, void *data)
5593{
5594	struct igb_adapter *adapter = data;
5595	struct igb_q_vector *q_vector = adapter->q_vector[0];
5596	struct e1000_hw *hw = &adapter->hw;
5597	/* Interrupt Auto-Mask...upon reading ICR, interrupts are masked.  No
5598	 * need for the IMC write */
5599	u32 icr = rd32(E1000_ICR);
5600
5601	/* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
5602	 * not set, then the adapter didn't send an interrupt */
5603	if (!(icr & E1000_ICR_INT_ASSERTED))
5604		return IRQ_NONE;
5605
5606	igb_write_itr(q_vector);
5607
5608	if (icr & E1000_ICR_DRSTA)
5609		schedule_work(&adapter->reset_task);
5610
5611	if (icr & E1000_ICR_DOUTSYNC) {
5612		/* HW is reporting DMA is out of sync */
5613		adapter->stats.doosync++;
5614	}
5615
5616	if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5617		hw->mac.get_link_status = 1;
5618		/* guard against interrupt when we're going down */
5619		if (!test_bit(__IGB_DOWN, &adapter->state))
5620			mod_timer(&adapter->watchdog_timer, jiffies + 1);
5621	}
5622
5623	napi_schedule(&q_vector->napi);
5624
5625	return IRQ_HANDLED;
5626}
5627
5628static void igb_ring_irq_enable(struct igb_q_vector *q_vector)
5629{
5630	struct igb_adapter *adapter = q_vector->adapter;
5631	struct e1000_hw *hw = &adapter->hw;
5632
5633	if ((q_vector->rx.ring && (adapter->rx_itr_setting & 3)) ||
5634	    (!q_vector->rx.ring && (adapter->tx_itr_setting & 3))) {
5635		if ((adapter->num_q_vectors == 1) && !adapter->vf_data)
5636			igb_set_itr(q_vector);
5637		else
5638			igb_update_ring_itr(q_vector);
5639	}
5640
5641	if (!test_bit(__IGB_DOWN, &adapter->state)) {
5642		if (adapter->msix_entries)
5643			wr32(E1000_EIMS, q_vector->eims_value);
5644		else
5645			igb_irq_enable(adapter);
5646	}
5647}
5648
5649/**
5650 * igb_poll - NAPI Rx polling callback
5651 * @napi: napi polling structure
5652 * @budget: count of how many packets we should handle
5653 **/
5654static int igb_poll(struct napi_struct *napi, int budget)
5655{
5656	struct igb_q_vector *q_vector = container_of(napi,
5657	                                             struct igb_q_vector,
5658	                                             napi);
5659	bool clean_complete = true;
5660
5661#ifdef CONFIG_IGB_DCA
5662	if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
5663		igb_update_dca(q_vector);
5664#endif
5665	if (q_vector->tx.ring)
5666		clean_complete = igb_clean_tx_irq(q_vector);
5667
5668	if (q_vector->rx.ring)
5669		clean_complete &= igb_clean_rx_irq(q_vector, budget);
5670
5671	/* If all work not completed, return budget and keep polling */
5672	if (!clean_complete)
5673		return budget;
5674
5675	/* If not enough Rx work done, exit the polling mode */
5676	napi_complete(napi);
5677	igb_ring_irq_enable(q_vector);
5678
5679	return 0;
5680}
5681
5682/**
5683 * igb_systim_to_hwtstamp - convert system time value to hw timestamp
5684 * @adapter: board private structure
5685 * @shhwtstamps: timestamp structure to update
5686 * @regval: unsigned 64bit system time value.
5687 *
5688 * We need to convert the system time value stored in the RX/TXSTMP registers
5689 * into a hwtstamp which can be used by the upper level timestamping functions
5690 */
5691static void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
5692                                   struct skb_shared_hwtstamps *shhwtstamps,
5693                                   u64 regval)
5694{
5695	u64 ns;
5696
5697	/*
5698	 * The 82580 starts with 1ns at bit 0 in RX/TXSTMPL, shift this up to
5699	 * 24 to match clock shift we setup earlier.
5700	 */
5701	if (adapter->hw.mac.type >= e1000_82580)
5702		regval <<= IGB_82580_TSYNC_SHIFT;
5703
5704	ns = timecounter_cyc2time(&adapter->clock, regval);
5705	timecompare_update(&adapter->compare, ns);
5706	memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
5707	shhwtstamps->hwtstamp = ns_to_ktime(ns);
5708	shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns);
5709}
5710
5711/**
5712 * igb_tx_hwtstamp - utility function which checks for TX time stamp
5713 * @q_vector: pointer to q_vector containing needed info
5714 * @buffer: pointer to igb_tx_buffer structure
5715 *
5716 * If we were asked to do hardware stamping and such a time stamp is
5717 * available, then it must have been for this skb here because we only
5718 * allow only one such packet into the queue.
5719 */
5720static void igb_tx_hwtstamp(struct igb_q_vector *q_vector,
5721			    struct igb_tx_buffer *buffer_info)
5722{
5723	struct igb_adapter *adapter = q_vector->adapter;
5724	struct e1000_hw *hw = &adapter->hw;
5725	struct skb_shared_hwtstamps shhwtstamps;
5726	u64 regval;
5727
5728	/* if skb does not support hw timestamp or TX stamp not valid exit */
5729	if (likely(!(buffer_info->tx_flags & IGB_TX_FLAGS_TSTAMP)) ||
5730	    !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
5731		return;
5732
5733	regval = rd32(E1000_TXSTMPL);
5734	regval |= (u64)rd32(E1000_TXSTMPH) << 32;
5735
5736	igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
5737	skb_tstamp_tx(buffer_info->skb, &shhwtstamps);
5738}
5739
5740/**
5741 * igb_clean_tx_irq - Reclaim resources after transmit completes
5742 * @q_vector: pointer to q_vector containing needed info
5743 * returns true if ring is completely cleaned
5744 **/
5745static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
5746{
5747	struct igb_adapter *adapter = q_vector->adapter;
5748	struct igb_ring *tx_ring = q_vector->tx.ring;
5749	struct igb_tx_buffer *tx_buffer;
5750	union e1000_adv_tx_desc *tx_desc, *eop_desc;
5751	unsigned int total_bytes = 0, total_packets = 0;
5752	unsigned int budget = q_vector->tx.work_limit;
5753	unsigned int i = tx_ring->next_to_clean;
5754
5755	if (test_bit(__IGB_DOWN, &adapter->state))
5756		return true;
5757
5758	tx_buffer = &tx_ring->tx_buffer_info[i];
5759	tx_desc = IGB_TX_DESC(tx_ring, i);
5760	i -= tx_ring->count;
5761
5762	for (; budget; budget--) {
5763		eop_desc = tx_buffer->next_to_watch;
5764
5765		/* prevent any other reads prior to eop_desc */
5766		rmb();
5767
5768		/* if next_to_watch is not set then there is no work pending */
5769		if (!eop_desc)
5770			break;
5771
5772		/* if DD is not set pending work has not been completed */
5773		if (!(eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)))
5774			break;
5775
5776		/* clear next_to_watch to prevent false hangs */
5777		tx_buffer->next_to_watch = NULL;
5778
5779		/* update the statistics for this packet */
5780		total_bytes += tx_buffer->bytecount;
5781		total_packets += tx_buffer->gso_segs;
5782
5783		/* retrieve hardware timestamp */
5784		igb_tx_hwtstamp(q_vector, tx_buffer);
5785
5786		/* free the skb */
5787		dev_kfree_skb_any(tx_buffer->skb);
5788		tx_buffer->skb = NULL;
5789
5790		/* unmap skb header data */
5791		dma_unmap_single(tx_ring->dev,
5792				 tx_buffer->dma,
5793				 tx_buffer->length,
5794				 DMA_TO_DEVICE);
5795
5796		/* clear last DMA location and unmap remaining buffers */
5797		while (tx_desc != eop_desc) {
5798			tx_buffer->dma = 0;
5799
5800			tx_buffer++;
5801			tx_desc++;
5802			i++;
5803			if (unlikely(!i)) {
5804				i -= tx_ring->count;
5805				tx_buffer = tx_ring->tx_buffer_info;
5806				tx_desc = IGB_TX_DESC(tx_ring, 0);
5807			}
5808
5809			/* unmap any remaining paged data */
5810			if (tx_buffer->dma) {
5811				dma_unmap_page(tx_ring->dev,
5812					       tx_buffer->dma,
5813					       tx_buffer->length,
5814					       DMA_TO_DEVICE);
5815			}
5816		}
5817
5818		/* clear last DMA location */
5819		tx_buffer->dma = 0;
5820
5821		/* move us one more past the eop_desc for start of next pkt */
5822		tx_buffer++;
5823		tx_desc++;
5824		i++;
5825		if (unlikely(!i)) {
5826			i -= tx_ring->count;
5827			tx_buffer = tx_ring->tx_buffer_info;
5828			tx_desc = IGB_TX_DESC(tx_ring, 0);
5829		}
5830	}
5831
5832	netdev_tx_completed_queue(txring_txq(tx_ring),
5833				  total_packets, total_bytes);
5834	i += tx_ring->count;
5835	tx_ring->next_to_clean = i;
5836	u64_stats_update_begin(&tx_ring->tx_syncp);
5837	tx_ring->tx_stats.bytes += total_bytes;
5838	tx_ring->tx_stats.packets += total_packets;
5839	u64_stats_update_end(&tx_ring->tx_syncp);
5840	q_vector->tx.total_bytes += total_bytes;
5841	q_vector->tx.total_packets += total_packets;
5842
5843	if (test_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags)) {
5844		struct e1000_hw *hw = &adapter->hw;
5845
5846		eop_desc = tx_buffer->next_to_watch;
5847
5848		/* Detect a transmit hang in hardware, this serializes the
5849		 * check with the clearing of time_stamp and movement of i */
5850		clear_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
5851		if (eop_desc &&
5852		    time_after(jiffies, tx_buffer->time_stamp +
5853			       (adapter->tx_timeout_factor * HZ)) &&
5854		    !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
5855
5856			/* detected Tx unit hang */
5857			dev_err(tx_ring->dev,
5858				"Detected Tx Unit Hang\n"
5859				"  Tx Queue             <%d>\n"
5860				"  TDH                  <%x>\n"
5861				"  TDT                  <%x>\n"
5862				"  next_to_use          <%x>\n"
5863				"  next_to_clean        <%x>\n"
5864				"buffer_info[next_to_clean]\n"
5865				"  time_stamp           <%lx>\n"
5866				"  next_to_watch        <%p>\n"
5867				"  jiffies              <%lx>\n"
5868				"  desc.status          <%x>\n",
5869				tx_ring->queue_index,
5870				rd32(E1000_TDH(tx_ring->reg_idx)),
5871				readl(tx_ring->tail),
5872				tx_ring->next_to_use,
5873				tx_ring->next_to_clean,
5874				tx_buffer->time_stamp,
5875				eop_desc,
5876				jiffies,
5877				eop_desc->wb.status);
5878			netif_stop_subqueue(tx_ring->netdev,
5879					    tx_ring->queue_index);
5880
5881			/* we are about to reset, no point in enabling stuff */
5882			return true;
5883		}
5884	}
5885
5886	if (unlikely(total_packets &&
5887		     netif_carrier_ok(tx_ring->netdev) &&
5888		     igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
5889		/* Make sure that anybody stopping the queue after this
5890		 * sees the new next_to_clean.
5891		 */
5892		smp_mb();
5893		if (__netif_subqueue_stopped(tx_ring->netdev,
5894					     tx_ring->queue_index) &&
5895		    !(test_bit(__IGB_DOWN, &adapter->state))) {
5896			netif_wake_subqueue(tx_ring->netdev,
5897					    tx_ring->queue_index);
5898
5899			u64_stats_update_begin(&tx_ring->tx_syncp);
5900			tx_ring->tx_stats.restart_queue++;
5901			u64_stats_update_end(&tx_ring->tx_syncp);
5902		}
5903	}
5904
5905	return !!budget;
5906}
5907
5908static inline void igb_rx_checksum(struct igb_ring *ring,
5909				   union e1000_adv_rx_desc *rx_desc,
5910				   struct sk_buff *skb)
5911{
5912	skb_checksum_none_assert(skb);
5913
5914	/* Ignore Checksum bit is set */
5915	if (igb_test_staterr(rx_desc, E1000_RXD_STAT_IXSM))
5916		return;
5917
5918	/* Rx checksum disabled via ethtool */
5919	if (!(ring->netdev->features & NETIF_F_RXCSUM))
5920		return;
5921
5922	/* TCP/UDP checksum error bit is set */
5923	if (igb_test_staterr(rx_desc,
5924			     E1000_RXDEXT_STATERR_TCPE |
5925			     E1000_RXDEXT_STATERR_IPE)) {
5926		/*
5927		 * work around errata with sctp packets where the TCPE aka
5928		 * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
5929		 * packets, (aka let the stack check the crc32c)
5930		 */
5931		if (!((skb->len == 60) &&
5932		      test_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags))) {
5933			u64_stats_update_begin(&ring->rx_syncp);
5934			ring->rx_stats.csum_err++;
5935			u64_stats_update_end(&ring->rx_syncp);
5936		}
5937		/* let the stack verify checksum errors */
5938		return;
5939	}
5940	/* It must be a TCP or UDP packet with a valid checksum */
5941	if (igb_test_staterr(rx_desc, E1000_RXD_STAT_TCPCS |
5942				      E1000_RXD_STAT_UDPCS))
5943		skb->ip_summed = CHECKSUM_UNNECESSARY;
5944
5945	dev_dbg(ring->dev, "cksum success: bits %08X\n",
5946		le32_to_cpu(rx_desc->wb.upper.status_error));
5947}
5948
5949static inline void igb_rx_hash(struct igb_ring *ring,
5950			       union e1000_adv_rx_desc *rx_desc,
5951			       struct sk_buff *skb)
5952{
5953	if (ring->netdev->features & NETIF_F_RXHASH)
5954		skb->rxhash = le32_to_cpu(rx_desc->wb.lower.hi_dword.rss);
5955}
5956
5957static void igb_rx_hwtstamp(struct igb_q_vector *q_vector,
5958			    union e1000_adv_rx_desc *rx_desc,
5959			    struct sk_buff *skb)
5960{
5961	struct igb_adapter *adapter = q_vector->adapter;
5962	struct e1000_hw *hw = &adapter->hw;
5963	u64 regval;
5964
5965	if (!igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP |
5966				       E1000_RXDADV_STAT_TS))
5967		return;
5968
5969	/*
5970	 * If this bit is set, then the RX registers contain the time stamp. No
5971	 * other packet will be time stamped until we read these registers, so
5972	 * read the registers to make them available again. Because only one
5973	 * packet can be time stamped at a time, we know that the register
5974	 * values must belong to this one here and therefore we don't need to
5975	 * compare any of the additional attributes stored for it.
5976	 *
5977	 * If nothing went wrong, then it should have a shared tx_flags that we
5978	 * can turn into a skb_shared_hwtstamps.
5979	 */
5980	if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) {
5981		u32 *stamp = (u32 *)skb->data;
5982		regval = le32_to_cpu(*(stamp + 2));
5983		regval |= (u64)le32_to_cpu(*(stamp + 3)) << 32;
5984		skb_pull(skb, IGB_TS_HDR_LEN);
5985	} else {
5986		if(!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
5987			return;
5988
5989		regval = rd32(E1000_RXSTMPL);
5990		regval |= (u64)rd32(E1000_RXSTMPH) << 32;
5991	}
5992
5993	igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
5994}
5995
5996static void igb_rx_vlan(struct igb_ring *ring,
5997			union e1000_adv_rx_desc *rx_desc,
5998			struct sk_buff *skb)
5999{
6000	if (igb_test_staterr(rx_desc, E1000_RXD_STAT_VP)) {
6001		u16 vid;
6002		if (igb_test_staterr(rx_desc, E1000_RXDEXT_STATERR_LB) &&
6003		    test_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &ring->flags))
6004			vid = be16_to_cpu(rx_desc->wb.upper.vlan);
6005		else
6006			vid = le16_to_cpu(rx_desc->wb.upper.vlan);
6007
6008		__vlan_hwaccel_put_tag(skb, vid);
6009	}
6010}
6011
6012static inline u16 igb_get_hlen(union e1000_adv_rx_desc *rx_desc)
6013{
6014	/* HW will not DMA in data larger than the given buffer, even if it
6015	 * parses the (NFS, of course) header to be larger.  In that case, it
6016	 * fills the header buffer and spills the rest into the page.
6017	 */
6018	u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
6019	           E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
6020	if (hlen > IGB_RX_HDR_LEN)
6021		hlen = IGB_RX_HDR_LEN;
6022	return hlen;
6023}
6024
6025static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, int budget)
6026{
6027	struct igb_ring *rx_ring = q_vector->rx.ring;
6028	union e1000_adv_rx_desc *rx_desc;
6029	const int current_node = numa_node_id();
6030	unsigned int total_bytes = 0, total_packets = 0;
6031	u16 cleaned_count = igb_desc_unused(rx_ring);
6032	u16 i = rx_ring->next_to_clean;
6033
6034	rx_desc = IGB_RX_DESC(rx_ring, i);
6035
6036	while (igb_test_staterr(rx_desc, E1000_RXD_STAT_DD)) {
6037		struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
6038		struct sk_buff *skb = buffer_info->skb;
6039		union e1000_adv_rx_desc *next_rxd;
6040
6041		buffer_info->skb = NULL;
6042		prefetch(skb->data);
6043
6044		i++;
6045		if (i == rx_ring->count)
6046			i = 0;
6047
6048		next_rxd = IGB_RX_DESC(rx_ring, i);
6049		prefetch(next_rxd);
6050
6051		/*
6052		 * This memory barrier is needed to keep us from reading
6053		 * any other fields out of the rx_desc until we know the
6054		 * RXD_STAT_DD bit is set
6055		 */
6056		rmb();
6057
6058		if (!skb_is_nonlinear(skb)) {
6059			__skb_put(skb, igb_get_hlen(rx_desc));
6060			dma_unmap_single(rx_ring->dev, buffer_info->dma,
6061					 IGB_RX_HDR_LEN,
6062					 DMA_FROM_DEVICE);
6063			buffer_info->dma = 0;
6064		}
6065
6066		if (rx_desc->wb.upper.length) {
6067			u16 length = le16_to_cpu(rx_desc->wb.upper.length);
6068
6069			skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
6070						buffer_info->page,
6071						buffer_info->page_offset,
6072						length);
6073
6074			skb->len += length;
6075			skb->data_len += length;
6076			skb->truesize += PAGE_SIZE / 2;
6077
6078			if ((page_count(buffer_info->page) != 1) ||
6079			    (page_to_nid(buffer_info->page) != current_node))
6080				buffer_info->page = NULL;
6081			else
6082				get_page(buffer_info->page);
6083
6084			dma_unmap_page(rx_ring->dev, buffer_info->page_dma,
6085				       PAGE_SIZE / 2, DMA_FROM_DEVICE);
6086			buffer_info->page_dma = 0;
6087		}
6088
6089		if (!igb_test_staterr(rx_desc, E1000_RXD_STAT_EOP)) {
6090			struct igb_rx_buffer *next_buffer;
6091			next_buffer = &rx_ring->rx_buffer_info[i];
6092			buffer_info->skb = next_buffer->skb;
6093			buffer_info->dma = next_buffer->dma;
6094			next_buffer->skb = skb;
6095			next_buffer->dma = 0;
6096			goto next_desc;
6097		}
6098
6099		if (igb_test_staterr(rx_desc,
6100				     E1000_RXDEXT_ERR_FRAME_ERR_MASK)) {
6101			dev_kfree_skb_any(skb);
6102			goto next_desc;
6103		}
6104
6105		igb_rx_hwtstamp(q_vector, rx_desc, skb);
6106		igb_rx_hash(rx_ring, rx_desc, skb);
6107		igb_rx_checksum(rx_ring, rx_desc, skb);
6108		igb_rx_vlan(rx_ring, rx_desc, skb);
6109
6110		total_bytes += skb->len;
6111		total_packets++;
6112
6113		skb->protocol = eth_type_trans(skb, rx_ring->netdev);
6114
6115		napi_gro_receive(&q_vector->napi, skb);
6116
6117		budget--;
6118next_desc:
6119		if (!budget)
6120			break;
6121
6122		cleaned_count++;
6123		/* return some buffers to hardware, one at a time is too slow */
6124		if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
6125			igb_alloc_rx_buffers(rx_ring, cleaned_count);
6126			cleaned_count = 0;
6127		}
6128
6129		/* use prefetched values */
6130		rx_desc = next_rxd;
6131	}
6132
6133	rx_ring->next_to_clean = i;
6134	u64_stats_update_begin(&rx_ring->rx_syncp);
6135	rx_ring->rx_stats.packets += total_packets;
6136	rx_ring->rx_stats.bytes += total_bytes;
6137	u64_stats_update_end(&rx_ring->rx_syncp);
6138	q_vector->rx.total_packets += total_packets;
6139	q_vector->rx.total_bytes += total_bytes;
6140
6141	if (cleaned_count)
6142		igb_alloc_rx_buffers(rx_ring, cleaned_count);
6143
6144	return !!budget;
6145}
6146
6147static bool igb_alloc_mapped_skb(struct igb_ring *rx_ring,
6148				 struct igb_rx_buffer *bi)
6149{
6150	struct sk_buff *skb = bi->skb;
6151	dma_addr_t dma = bi->dma;
6152
6153	if (dma)
6154		return true;
6155
6156	if (likely(!skb)) {
6157		skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
6158						IGB_RX_HDR_LEN);
6159		bi->skb = skb;
6160		if (!skb) {
6161			rx_ring->rx_stats.alloc_failed++;
6162			return false;
6163		}
6164
6165		/* initialize skb for ring */
6166		skb_record_rx_queue(skb, rx_ring->queue_index);
6167	}
6168
6169	dma = dma_map_single(rx_ring->dev, skb->data,
6170			     IGB_RX_HDR_LEN, DMA_FROM_DEVICE);
6171
6172	if (dma_mapping_error(rx_ring->dev, dma)) {
6173		rx_ring->rx_stats.alloc_failed++;
6174		return false;
6175	}
6176
6177	bi->dma = dma;
6178	return true;
6179}
6180
6181static bool igb_alloc_mapped_page(struct igb_ring *rx_ring,
6182				  struct igb_rx_buffer *bi)
6183{
6184	struct page *page = bi->page;
6185	dma_addr_t page_dma = bi->page_dma;
6186	unsigned int page_offset = bi->page_offset ^ (PAGE_SIZE / 2);
6187
6188	if (page_dma)
6189		return true;
6190
6191	if (!page) {
6192		page = alloc_page(GFP_ATOMIC | __GFP_COLD);
6193		bi->page = page;
6194		if (unlikely(!page)) {
6195			rx_ring->rx_stats.alloc_failed++;
6196			return false;
6197		}
6198	}
6199
6200	page_dma = dma_map_page(rx_ring->dev, page,
6201				page_offset, PAGE_SIZE / 2,
6202				DMA_FROM_DEVICE);
6203
6204	if (dma_mapping_error(rx_ring->dev, page_dma)) {
6205		rx_ring->rx_stats.alloc_failed++;
6206		return false;
6207	}
6208
6209	bi->page_dma = page_dma;
6210	bi->page_offset = page_offset;
6211	return true;
6212}
6213
6214/**
6215 * igb_alloc_rx_buffers - Replace used receive buffers; packet split
6216 * @adapter: address of board private structure
6217 **/
6218void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count)
6219{
6220	union e1000_adv_rx_desc *rx_desc;
6221	struct igb_rx_buffer *bi;
6222	u16 i = rx_ring->next_to_use;
6223
6224	rx_desc = IGB_RX_DESC(rx_ring, i);
6225	bi = &rx_ring->rx_buffer_info[i];
6226	i -= rx_ring->count;
6227
6228	while (cleaned_count--) {
6229		if (!igb_alloc_mapped_skb(rx_ring, bi))
6230			break;
6231
6232		/* Refresh the desc even if buffer_addrs didn't change
6233		 * because each write-back erases this info. */
6234		rx_desc->read.hdr_addr = cpu_to_le64(bi->dma);
6235
6236		if (!igb_alloc_mapped_page(rx_ring, bi))
6237			break;
6238
6239		rx_desc->read.pkt_addr = cpu_to_le64(bi->page_dma);
6240
6241		rx_desc++;
6242		bi++;
6243		i++;
6244		if (unlikely(!i)) {
6245			rx_desc = IGB_RX_DESC(rx_ring, 0);
6246			bi = rx_ring->rx_buffer_info;
6247			i -= rx_ring->count;
6248		}
6249
6250		/* clear the hdr_addr for the next_to_use descriptor */
6251		rx_desc->read.hdr_addr = 0;
6252	}
6253
6254	i += rx_ring->count;
6255
6256	if (rx_ring->next_to_use != i) {
6257		rx_ring->next_to_use = i;
6258
6259		/* Force memory writes to complete before letting h/w
6260		 * know there are new descriptors to fetch.  (Only
6261		 * applicable for weak-ordered memory model archs,
6262		 * such as IA-64). */
6263		wmb();
6264		writel(i, rx_ring->tail);
6265	}
6266}
6267
6268/**
6269 * igb_mii_ioctl -
6270 * @netdev:
6271 * @ifreq:
6272 * @cmd:
6273 **/
6274static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6275{
6276	struct igb_adapter *adapter = netdev_priv(netdev);
6277	struct mii_ioctl_data *data = if_mii(ifr);
6278
6279	if (adapter->hw.phy.media_type != e1000_media_type_copper)
6280		return -EOPNOTSUPP;
6281
6282	switch (cmd) {
6283	case SIOCGMIIPHY:
6284		data->phy_id = adapter->hw.phy.addr;
6285		break;
6286	case SIOCGMIIREG:
6287		if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
6288		                     &data->val_out))
6289			return -EIO;
6290		break;
6291	case SIOCSMIIREG:
6292	default:
6293		return -EOPNOTSUPP;
6294	}
6295	return 0;
6296}
6297
6298/**
6299 * igb_hwtstamp_ioctl - control hardware time stamping
6300 * @netdev:
6301 * @ifreq:
6302 * @cmd:
6303 *
6304 * Outgoing time stamping can be enabled and disabled. Play nice and
6305 * disable it when requested, although it shouldn't case any overhead
6306 * when no packet needs it. At most one packet in the queue may be
6307 * marked for time stamping, otherwise it would be impossible to tell
6308 * for sure to which packet the hardware time stamp belongs.
6309 *
6310 * Incoming time stamping has to be configured via the hardware
6311 * filters. Not all combinations are supported, in particular event
6312 * type has to be specified. Matching the kind of event packet is
6313 * not supported, with the exception of "all V2 events regardless of
6314 * level 2 or 4".
6315 *
6316 **/
6317static int igb_hwtstamp_ioctl(struct net_device *netdev,
6318			      struct ifreq *ifr, int cmd)
6319{
6320	struct igb_adapter *adapter = netdev_priv(netdev);
6321	struct e1000_hw *hw = &adapter->hw;
6322	struct hwtstamp_config config;
6323	u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
6324	u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6325	u32 tsync_rx_cfg = 0;
6326	bool is_l4 = false;
6327	bool is_l2 = false;
6328	u32 regval;
6329
6330	if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
6331		return -EFAULT;
6332
6333	/* reserved for future extensions */
6334	if (config.flags)
6335		return -EINVAL;
6336
6337	switch (config.tx_type) {
6338	case HWTSTAMP_TX_OFF:
6339		tsync_tx_ctl = 0;
6340	case HWTSTAMP_TX_ON:
6341		break;
6342	default:
6343		return -ERANGE;
6344	}
6345
6346	switch (config.rx_filter) {
6347	case HWTSTAMP_FILTER_NONE:
6348		tsync_rx_ctl = 0;
6349		break;
6350	case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
6351	case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
6352	case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
6353	case HWTSTAMP_FILTER_ALL:
6354		/*
6355		 * register TSYNCRXCFG must be set, therefore it is not
6356		 * possible to time stamp both Sync and Delay_Req messages
6357		 * => fall back to time stamping all packets
6358		 */
6359		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6360		config.rx_filter = HWTSTAMP_FILTER_ALL;
6361		break;
6362	case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
6363		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6364		tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
6365		is_l4 = true;
6366		break;
6367	case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
6368		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6369		tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
6370		is_l4 = true;
6371		break;
6372	case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
6373	case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
6374		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6375		tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
6376		is_l2 = true;
6377		is_l4 = true;
6378		config.rx_filter = HWTSTAMP_FILTER_SOME;
6379		break;
6380	case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
6381	case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
6382		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6383		tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
6384		is_l2 = true;
6385		is_l4 = true;
6386		config.rx_filter = HWTSTAMP_FILTER_SOME;
6387		break;
6388	case HWTSTAMP_FILTER_PTP_V2_EVENT:
6389	case HWTSTAMP_FILTER_PTP_V2_SYNC:
6390	case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
6391		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
6392		config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
6393		is_l2 = true;
6394		is_l4 = true;
6395		break;
6396	default:
6397		return -ERANGE;
6398	}
6399
6400	if (hw->mac.type == e1000_82575) {
6401		if (tsync_rx_ctl | tsync_tx_ctl)
6402			return -EINVAL;
6403		return 0;
6404	}
6405
6406	/*
6407	 * Per-packet timestamping only works if all packets are
6408	 * timestamped, so enable timestamping in all packets as
6409	 * long as one rx filter was configured.
6410	 */
6411	if ((hw->mac.type >= e1000_82580) && tsync_rx_ctl) {
6412		tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6413		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6414	}
6415
6416	/* enable/disable TX */
6417	regval = rd32(E1000_TSYNCTXCTL);
6418	regval &= ~E1000_TSYNCTXCTL_ENABLED;
6419	regval |= tsync_tx_ctl;
6420	wr32(E1000_TSYNCTXCTL, regval);
6421
6422	/* enable/disable RX */
6423	regval = rd32(E1000_TSYNCRXCTL);
6424	regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
6425	regval |= tsync_rx_ctl;
6426	wr32(E1000_TSYNCRXCTL, regval);
6427
6428	/* define which PTP packets are time stamped */
6429	wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
6430
6431	/* define ethertype filter for timestamped packets */
6432	if (is_l2)
6433		wr32(E1000_ETQF(3),
6434		                (E1000_ETQF_FILTER_ENABLE | /* enable filter */
6435		                 E1000_ETQF_1588 | /* enable timestamping */
6436		                 ETH_P_1588));     /* 1588 eth protocol type */
6437	else
6438		wr32(E1000_ETQF(3), 0);
6439
6440#define PTP_PORT 319
6441	/* L4 Queue Filter[3]: filter by destination port and protocol */
6442	if (is_l4) {
6443		u32 ftqf = (IPPROTO_UDP /* UDP */
6444			| E1000_FTQF_VF_BP /* VF not compared */
6445			| E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
6446			| E1000_FTQF_MASK); /* mask all inputs */
6447		ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
6448
6449		wr32(E1000_IMIR(3), htons(PTP_PORT));
6450		wr32(E1000_IMIREXT(3),
6451		     (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
6452		if (hw->mac.type == e1000_82576) {
6453			/* enable source port check */
6454			wr32(E1000_SPQF(3), htons(PTP_PORT));
6455			ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
6456		}
6457		wr32(E1000_FTQF(3), ftqf);
6458	} else {
6459		wr32(E1000_FTQF(3), E1000_FTQF_MASK);
6460	}
6461	wrfl();
6462
6463	adapter->hwtstamp_config = config;
6464
6465	/* clear TX/RX time stamp registers, just to be sure */
6466	regval = rd32(E1000_TXSTMPH);
6467	regval = rd32(E1000_RXSTMPH);
6468
6469	return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
6470		-EFAULT : 0;
6471}
6472
6473/**
6474 * igb_ioctl -
6475 * @netdev:
6476 * @ifreq:
6477 * @cmd:
6478 **/
6479static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6480{
6481	switch (cmd) {
6482	case SIOCGMIIPHY:
6483	case SIOCGMIIREG:
6484	case SIOCSMIIREG:
6485		return igb_mii_ioctl(netdev, ifr, cmd);
6486	case SIOCSHWTSTAMP:
6487		return igb_hwtstamp_ioctl(netdev, ifr, cmd);
6488	default:
6489		return -EOPNOTSUPP;
6490	}
6491}
6492
6493s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6494{
6495	struct igb_adapter *adapter = hw->back;
6496	u16 cap_offset;
6497
6498	cap_offset = adapter->pdev->pcie_cap;
6499	if (!cap_offset)
6500		return -E1000_ERR_CONFIG;
6501
6502	pci_read_config_word(adapter->pdev, cap_offset + reg, value);
6503
6504	return 0;
6505}
6506
6507s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6508{
6509	struct igb_adapter *adapter = hw->back;
6510	u16 cap_offset;
6511
6512	cap_offset = adapter->pdev->pcie_cap;
6513	if (!cap_offset)
6514		return -E1000_ERR_CONFIG;
6515
6516	pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
6517
6518	return 0;
6519}
6520
6521static void igb_vlan_mode(struct net_device *netdev, netdev_features_t features)
6522{
6523	struct igb_adapter *adapter = netdev_priv(netdev);
6524	struct e1000_hw *hw = &adapter->hw;
6525	u32 ctrl, rctl;
6526	bool enable = !!(features & NETIF_F_HW_VLAN_RX);
6527
6528	if (enable) {
6529		/* enable VLAN tag insert/strip */
6530		ctrl = rd32(E1000_CTRL);
6531		ctrl |= E1000_CTRL_VME;
6532		wr32(E1000_CTRL, ctrl);
6533
6534		/* Disable CFI check */
6535		rctl = rd32(E1000_RCTL);
6536		rctl &= ~E1000_RCTL_CFIEN;
6537		wr32(E1000_RCTL, rctl);
6538	} else {
6539		/* disable VLAN tag insert/strip */
6540		ctrl = rd32(E1000_CTRL);
6541		ctrl &= ~E1000_CTRL_VME;
6542		wr32(E1000_CTRL, ctrl);
6543	}
6544
6545	igb_rlpml_set(adapter);
6546}
6547
6548static int igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
6549{
6550	struct igb_adapter *adapter = netdev_priv(netdev);
6551	struct e1000_hw *hw = &adapter->hw;
6552	int pf_id = adapter->vfs_allocated_count;
6553
6554	/* attempt to add filter to vlvf array */
6555	igb_vlvf_set(adapter, vid, true, pf_id);
6556
6557	/* add the filter since PF can receive vlans w/o entry in vlvf */
6558	igb_vfta_set(hw, vid, true);
6559
6560	set_bit(vid, adapter->active_vlans);
6561
6562	return 0;
6563}
6564
6565static int igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
6566{
6567	struct igb_adapter *adapter = netdev_priv(netdev);
6568	struct e1000_hw *hw = &adapter->hw;
6569	int pf_id = adapter->vfs_allocated_count;
6570	s32 err;
6571
6572	/* remove vlan from VLVF table array */
6573	err = igb_vlvf_set(adapter, vid, false, pf_id);
6574
6575	/* if vid was not present in VLVF just remove it from table */
6576	if (err)
6577		igb_vfta_set(hw, vid, false);
6578
6579	clear_bit(vid, adapter->active_vlans);
6580
6581	return 0;
6582}
6583
6584static void igb_restore_vlan(struct igb_adapter *adapter)
6585{
6586	u16 vid;
6587
6588	igb_vlan_mode(adapter->netdev, adapter->netdev->features);
6589
6590	for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID)
6591		igb_vlan_rx_add_vid(adapter->netdev, vid);
6592}
6593
6594int igb_set_spd_dplx(struct igb_adapter *adapter, u32 spd, u8 dplx)
6595{
6596	struct pci_dev *pdev = adapter->pdev;
6597	struct e1000_mac_info *mac = &adapter->hw.mac;
6598
6599	mac->autoneg = 0;
6600
6601	/* Make sure dplx is at most 1 bit and lsb of speed is not set
6602	 * for the switch() below to work */
6603	if ((spd & 1) || (dplx & ~1))
6604		goto err_inval;
6605
6606	/* Fiber NIC's only allow 1000 Gbps Full duplex */
6607	if ((adapter->hw.phy.media_type == e1000_media_type_internal_serdes) &&
6608	    spd != SPEED_1000 &&
6609	    dplx != DUPLEX_FULL)
6610		goto err_inval;
6611
6612	switch (spd + dplx) {
6613	case SPEED_10 + DUPLEX_HALF:
6614		mac->forced_speed_duplex = ADVERTISE_10_HALF;
6615		break;
6616	case SPEED_10 + DUPLEX_FULL:
6617		mac->forced_speed_duplex = ADVERTISE_10_FULL;
6618		break;
6619	case SPEED_100 + DUPLEX_HALF:
6620		mac->forced_speed_duplex = ADVERTISE_100_HALF;
6621		break;
6622	case SPEED_100 + DUPLEX_FULL:
6623		mac->forced_speed_duplex = ADVERTISE_100_FULL;
6624		break;
6625	case SPEED_1000 + DUPLEX_FULL:
6626		mac->autoneg = 1;
6627		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
6628		break;
6629	case SPEED_1000 + DUPLEX_HALF: /* not supported */
6630	default:
6631		goto err_inval;
6632	}
6633	return 0;
6634
6635err_inval:
6636	dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
6637	return -EINVAL;
6638}
6639
6640static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake,
6641			  bool runtime)
6642{
6643	struct net_device *netdev = pci_get_drvdata(pdev);
6644	struct igb_adapter *adapter = netdev_priv(netdev);
6645	struct e1000_hw *hw = &adapter->hw;
6646	u32 ctrl, rctl, status;
6647	u32 wufc = runtime ? E1000_WUFC_LNKC : adapter->wol;
6648#ifdef CONFIG_PM
6649	int retval = 0;
6650#endif
6651
6652	netif_device_detach(netdev);
6653
6654	if (netif_running(netdev))
6655		__igb_close(netdev, true);
6656
6657	igb_clear_interrupt_scheme(adapter);
6658
6659#ifdef CONFIG_PM
6660	retval = pci_save_state(pdev);
6661	if (retval)
6662		return retval;
6663#endif
6664
6665	status = rd32(E1000_STATUS);
6666	if (status & E1000_STATUS_LU)
6667		wufc &= ~E1000_WUFC_LNKC;
6668
6669	if (wufc) {
6670		igb_setup_rctl(adapter);
6671		igb_set_rx_mode(netdev);
6672
6673		/* turn on all-multi mode if wake on multicast is enabled */
6674		if (wufc & E1000_WUFC_MC) {
6675			rctl = rd32(E1000_RCTL);
6676			rctl |= E1000_RCTL_MPE;
6677			wr32(E1000_RCTL, rctl);
6678		}
6679
6680		ctrl = rd32(E1000_CTRL);
6681		/* advertise wake from D3Cold */
6682		#define E1000_CTRL_ADVD3WUC 0x00100000
6683		/* phy power management enable */
6684		#define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
6685		ctrl |= E1000_CTRL_ADVD3WUC;
6686		wr32(E1000_CTRL, ctrl);
6687
6688		/* Allow time for pending master requests to run */
6689		igb_disable_pcie_master(hw);
6690
6691		wr32(E1000_WUC, E1000_WUC_PME_EN);
6692		wr32(E1000_WUFC, wufc);
6693	} else {
6694		wr32(E1000_WUC, 0);
6695		wr32(E1000_WUFC, 0);
6696	}
6697
6698	*enable_wake = wufc || adapter->en_mng_pt;
6699	if (!*enable_wake)
6700		igb_power_down_link(adapter);
6701	else
6702		igb_power_up_link(adapter);
6703
6704	/* Release control of h/w to f/w.  If f/w is AMT enabled, this
6705	 * would have already happened in close and is redundant. */
6706	igb_release_hw_control(adapter);
6707
6708	pci_disable_device(pdev);
6709
6710	return 0;
6711}
6712
6713#ifdef CONFIG_PM
6714#ifdef CONFIG_PM_SLEEP
6715static int igb_suspend(struct device *dev)
6716{
6717	int retval;
6718	bool wake;
6719	struct pci_dev *pdev = to_pci_dev(dev);
6720
6721	retval = __igb_shutdown(pdev, &wake, 0);
6722	if (retval)
6723		return retval;
6724
6725	if (wake) {
6726		pci_prepare_to_sleep(pdev);
6727	} else {
6728		pci_wake_from_d3(pdev, false);
6729		pci_set_power_state(pdev, PCI_D3hot);
6730	}
6731
6732	return 0;
6733}
6734#endif /* CONFIG_PM_SLEEP */
6735
6736static int igb_resume(struct device *dev)
6737{
6738	struct pci_dev *pdev = to_pci_dev(dev);
6739	struct net_device *netdev = pci_get_drvdata(pdev);
6740	struct igb_adapter *adapter = netdev_priv(netdev);
6741	struct e1000_hw *hw = &adapter->hw;
6742	u32 err;
6743
6744	pci_set_power_state(pdev, PCI_D0);
6745	pci_restore_state(pdev);
6746	pci_save_state(pdev);
6747
6748	err = pci_enable_device_mem(pdev);
6749	if (err) {
6750		dev_err(&pdev->dev,
6751			"igb: Cannot enable PCI device from suspend\n");
6752		return err;
6753	}
6754	pci_set_master(pdev);
6755
6756	pci_enable_wake(pdev, PCI_D3hot, 0);
6757	pci_enable_wake(pdev, PCI_D3cold, 0);
6758
6759	if (!rtnl_is_locked()) {
6760		/*
6761		 * shut up ASSERT_RTNL() warning in
6762		 * netif_set_real_num_tx/rx_queues.
6763		 */
6764		rtnl_lock();
6765		err = igb_init_interrupt_scheme(adapter);
6766		rtnl_unlock();
6767	} else {
6768		err = igb_init_interrupt_scheme(adapter);
6769	}
6770	if (err) {
6771		dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
6772		return -ENOMEM;
6773	}
6774
6775	igb_reset(adapter);
6776
6777	/* let the f/w know that the h/w is now under the control of the
6778	 * driver. */
6779	igb_get_hw_control(adapter);
6780
6781	wr32(E1000_WUS, ~0);
6782
6783	if (netdev->flags & IFF_UP) {
6784		err = __igb_open(netdev, true);
6785		if (err)
6786			return err;
6787	}
6788
6789	netif_device_attach(netdev);
6790	return 0;
6791}
6792
6793#ifdef CONFIG_PM_RUNTIME
6794static int igb_runtime_idle(struct device *dev)
6795{
6796	struct pci_dev *pdev = to_pci_dev(dev);
6797	struct net_device *netdev = pci_get_drvdata(pdev);
6798	struct igb_adapter *adapter = netdev_priv(netdev);
6799
6800	if (!igb_has_link(adapter))
6801		pm_schedule_suspend(dev, MSEC_PER_SEC * 5);
6802
6803	return -EBUSY;
6804}
6805
6806static int igb_runtime_suspend(struct device *dev)
6807{
6808	struct pci_dev *pdev = to_pci_dev(dev);
6809	int retval;
6810	bool wake;
6811
6812	retval = __igb_shutdown(pdev, &wake, 1);
6813	if (retval)
6814		return retval;
6815
6816	if (wake) {
6817		pci_prepare_to_sleep(pdev);
6818	} else {
6819		pci_wake_from_d3(pdev, false);
6820		pci_set_power_state(pdev, PCI_D3hot);
6821	}
6822
6823	return 0;
6824}
6825
6826static int igb_runtime_resume(struct device *dev)
6827{
6828	return igb_resume(dev);
6829}
6830#endif /* CONFIG_PM_RUNTIME */
6831#endif
6832
6833static void igb_shutdown(struct pci_dev *pdev)
6834{
6835	bool wake;
6836
6837	__igb_shutdown(pdev, &wake, 0);
6838
6839	if (system_state == SYSTEM_POWER_OFF) {
6840		pci_wake_from_d3(pdev, wake);
6841		pci_set_power_state(pdev, PCI_D3hot);
6842	}
6843}
6844
6845#ifdef CONFIG_NET_POLL_CONTROLLER
6846/*
6847 * Polling 'interrupt' - used by things like netconsole to send skbs
6848 * without having to re-enable interrupts. It's not called while
6849 * the interrupt routine is executing.
6850 */
6851static void igb_netpoll(struct net_device *netdev)
6852{
6853	struct igb_adapter *adapter = netdev_priv(netdev);
6854	struct e1000_hw *hw = &adapter->hw;
6855	struct igb_q_vector *q_vector;
6856	int i;
6857
6858	for (i = 0; i < adapter->num_q_vectors; i++) {
6859		q_vector = adapter->q_vector[i];
6860		if (adapter->msix_entries)
6861			wr32(E1000_EIMC, q_vector->eims_value);
6862		else
6863			igb_irq_disable(adapter);
6864		napi_schedule(&q_vector->napi);
6865	}
6866}
6867#endif /* CONFIG_NET_POLL_CONTROLLER */
6868
6869/**
6870 * igb_io_error_detected - called when PCI error is detected
6871 * @pdev: Pointer to PCI device
6872 * @state: The current pci connection state
6873 *
6874 * This function is called after a PCI bus error affecting
6875 * this device has been detected.
6876 */
6877static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
6878					      pci_channel_state_t state)
6879{
6880	struct net_device *netdev = pci_get_drvdata(pdev);
6881	struct igb_adapter *adapter = netdev_priv(netdev);
6882
6883	netif_device_detach(netdev);
6884
6885	if (state == pci_channel_io_perm_failure)
6886		return PCI_ERS_RESULT_DISCONNECT;
6887
6888	if (netif_running(netdev))
6889		igb_down(adapter);
6890	pci_disable_device(pdev);
6891
6892	/* Request a slot slot reset. */
6893	return PCI_ERS_RESULT_NEED_RESET;
6894}
6895
6896/**
6897 * igb_io_slot_reset - called after the pci bus has been reset.
6898 * @pdev: Pointer to PCI device
6899 *
6900 * Restart the card from scratch, as if from a cold-boot. Implementation
6901 * resembles the first-half of the igb_resume routine.
6902 */
6903static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
6904{
6905	struct net_device *netdev = pci_get_drvdata(pdev);
6906	struct igb_adapter *adapter = netdev_priv(netdev);
6907	struct e1000_hw *hw = &adapter->hw;
6908	pci_ers_result_t result;
6909	int err;
6910
6911	if (pci_enable_device_mem(pdev)) {
6912		dev_err(&pdev->dev,
6913			"Cannot re-enable PCI device after reset.\n");
6914		result = PCI_ERS_RESULT_DISCONNECT;
6915	} else {
6916		pci_set_master(pdev);
6917		pci_restore_state(pdev);
6918		pci_save_state(pdev);
6919
6920		pci_enable_wake(pdev, PCI_D3hot, 0);
6921		pci_enable_wake(pdev, PCI_D3cold, 0);
6922
6923		igb_reset(adapter);
6924		wr32(E1000_WUS, ~0);
6925		result = PCI_ERS_RESULT_RECOVERED;
6926	}
6927
6928	err = pci_cleanup_aer_uncorrect_error_status(pdev);
6929	if (err) {
6930		dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
6931		        "failed 0x%0x\n", err);
6932		/* non-fatal, continue */
6933	}
6934
6935	return result;
6936}
6937
6938/**
6939 * igb_io_resume - called when traffic can start flowing again.
6940 * @pdev: Pointer to PCI device
6941 *
6942 * This callback is called when the error recovery driver tells us that
6943 * its OK to resume normal operation. Implementation resembles the
6944 * second-half of the igb_resume routine.
6945 */
6946static void igb_io_resume(struct pci_dev *pdev)
6947{
6948	struct net_device *netdev = pci_get_drvdata(pdev);
6949	struct igb_adapter *adapter = netdev_priv(netdev);
6950
6951	if (netif_running(netdev)) {
6952		if (igb_up(adapter)) {
6953			dev_err(&pdev->dev, "igb_up failed after reset\n");
6954			return;
6955		}
6956	}
6957
6958	netif_device_attach(netdev);
6959
6960	/* let the f/w know that the h/w is now under the control of the
6961	 * driver. */
6962	igb_get_hw_control(adapter);
6963}
6964
6965static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
6966                             u8 qsel)
6967{
6968	u32 rar_low, rar_high;
6969	struct e1000_hw *hw = &adapter->hw;
6970
6971	/* HW expects these in little endian so we reverse the byte order
6972	 * from network order (big endian) to little endian
6973	 */
6974	rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
6975	          ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
6976	rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
6977
6978	/* Indicate to hardware the Address is Valid. */
6979	rar_high |= E1000_RAH_AV;
6980
6981	if (hw->mac.type == e1000_82575)
6982		rar_high |= E1000_RAH_POOL_1 * qsel;
6983	else
6984		rar_high |= E1000_RAH_POOL_1 << qsel;
6985
6986	wr32(E1000_RAL(index), rar_low);
6987	wrfl();
6988	wr32(E1000_RAH(index), rar_high);
6989	wrfl();
6990}
6991
6992static int igb_set_vf_mac(struct igb_adapter *adapter,
6993                          int vf, unsigned char *mac_addr)
6994{
6995	struct e1000_hw *hw = &adapter->hw;
6996	/* VF MAC addresses start at end of receive addresses and moves
6997	 * torwards the first, as a result a collision should not be possible */
6998	int rar_entry = hw->mac.rar_entry_count - (vf + 1);
6999
7000	memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
7001
7002	igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
7003
7004	return 0;
7005}
7006
7007static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
7008{
7009	struct igb_adapter *adapter = netdev_priv(netdev);
7010	if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
7011		return -EINVAL;
7012	adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
7013	dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
7014	dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
7015				      " change effective.");
7016	if (test_bit(__IGB_DOWN, &adapter->state)) {
7017		dev_warn(&adapter->pdev->dev, "The VF MAC address has been set,"
7018			 " but the PF device is not up.\n");
7019		dev_warn(&adapter->pdev->dev, "Bring the PF device up before"
7020			 " attempting to use the VF device.\n");
7021	}
7022	return igb_set_vf_mac(adapter, vf, mac);
7023}
7024
7025static int igb_link_mbps(int internal_link_speed)
7026{
7027	switch (internal_link_speed) {
7028	case SPEED_100:
7029		return 100;
7030	case SPEED_1000:
7031		return 1000;
7032	default:
7033		return 0;
7034	}
7035}
7036
7037static void igb_set_vf_rate_limit(struct e1000_hw *hw, int vf, int tx_rate,
7038				  int link_speed)
7039{
7040	int rf_dec, rf_int;
7041	u32 bcnrc_val;
7042
7043	if (tx_rate != 0) {
7044		/* Calculate the rate factor values to set */
7045		rf_int = link_speed / tx_rate;
7046		rf_dec = (link_speed - (rf_int * tx_rate));
7047		rf_dec = (rf_dec * (1<<E1000_RTTBCNRC_RF_INT_SHIFT)) / tx_rate;
7048
7049		bcnrc_val = E1000_RTTBCNRC_RS_ENA;
7050		bcnrc_val |= ((rf_int<<E1000_RTTBCNRC_RF_INT_SHIFT) &
7051		               E1000_RTTBCNRC_RF_INT_MASK);
7052		bcnrc_val |= (rf_dec & E1000_RTTBCNRC_RF_DEC_MASK);
7053	} else {
7054		bcnrc_val = 0;
7055	}
7056
7057	wr32(E1000_RTTDQSEL, vf); /* vf X uses queue X */
7058	wr32(E1000_RTTBCNRC, bcnrc_val);
7059}
7060
7061static void igb_check_vf_rate_limit(struct igb_adapter *adapter)
7062{
7063	int actual_link_speed, i;
7064	bool reset_rate = false;
7065
7066	/* VF TX rate limit was not set or not supported */
7067	if ((adapter->vf_rate_link_speed == 0) ||
7068	    (adapter->hw.mac.type != e1000_82576))
7069		return;
7070
7071	actual_link_speed = igb_link_mbps(adapter->link_speed);
7072	if (actual_link_speed != adapter->vf_rate_link_speed) {
7073		reset_rate = true;
7074		adapter->vf_rate_link_speed = 0;
7075		dev_info(&adapter->pdev->dev,
7076		         "Link speed has been changed. VF Transmit "
7077		         "rate is disabled\n");
7078	}
7079
7080	for (i = 0; i < adapter->vfs_allocated_count; i++) {
7081		if (reset_rate)
7082			adapter->vf_data[i].tx_rate = 0;
7083
7084		igb_set_vf_rate_limit(&adapter->hw, i,
7085		                      adapter->vf_data[i].tx_rate,
7086		                      actual_link_speed);
7087	}
7088}
7089
7090static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
7091{
7092	struct igb_adapter *adapter = netdev_priv(netdev);
7093	struct e1000_hw *hw = &adapter->hw;
7094	int actual_link_speed;
7095
7096	if (hw->mac.type != e1000_82576)
7097		return -EOPNOTSUPP;
7098
7099	actual_link_speed = igb_link_mbps(adapter->link_speed);
7100	if ((vf >= adapter->vfs_allocated_count) ||
7101	    (!(rd32(E1000_STATUS) & E1000_STATUS_LU)) ||
7102	    (tx_rate < 0) || (tx_rate > actual_link_speed))
7103		return -EINVAL;
7104
7105	adapter->vf_rate_link_speed = actual_link_speed;
7106	adapter->vf_data[vf].tx_rate = (u16)tx_rate;
7107	igb_set_vf_rate_limit(hw, vf, tx_rate, actual_link_speed);
7108
7109	return 0;
7110}
7111
7112static int igb_ndo_get_vf_config(struct net_device *netdev,
7113				 int vf, struct ifla_vf_info *ivi)
7114{
7115	struct igb_adapter *adapter = netdev_priv(netdev);
7116	if (vf >= adapter->vfs_allocated_count)
7117		return -EINVAL;
7118	ivi->vf = vf;
7119	memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
7120	ivi->tx_rate = adapter->vf_data[vf].tx_rate;
7121	ivi->vlan = adapter->vf_data[vf].pf_vlan;
7122	ivi->qos = adapter->vf_data[vf].pf_qos;
7123	return 0;
7124}
7125
7126static void igb_vmm_control(struct igb_adapter *adapter)
7127{
7128	struct e1000_hw *hw = &adapter->hw;
7129	u32 reg;
7130
7131	switch (hw->mac.type) {
7132	case e1000_82575:
7133	default:
7134		/* replication is not supported for 82575 */
7135		return;
7136	case e1000_82576:
7137		/* notify HW that the MAC is adding vlan tags */
7138		reg = rd32(E1000_DTXCTL);
7139		reg |= E1000_DTXCTL_VLAN_ADDED;
7140		wr32(E1000_DTXCTL, reg);
7141	case e1000_82580:
7142		/* enable replication vlan tag stripping */
7143		reg = rd32(E1000_RPLOLR);
7144		reg |= E1000_RPLOLR_STRVLAN;
7145		wr32(E1000_RPLOLR, reg);
7146	case e1000_i350:
7147		/* none of the above registers are supported by i350 */
7148		break;
7149	}
7150
7151	if (adapter->vfs_allocated_count) {
7152		igb_vmdq_set_loopback_pf(hw, true);
7153		igb_vmdq_set_replication_pf(hw, true);
7154		igb_vmdq_set_anti_spoofing_pf(hw, true,
7155						adapter->vfs_allocated_count);
7156	} else {
7157		igb_vmdq_set_loopback_pf(hw, false);
7158		igb_vmdq_set_replication_pf(hw, false);
7159	}
7160}
7161
7162static void igb_init_dmac(struct igb_adapter *adapter, u32 pba)
7163{
7164	struct e1000_hw *hw = &adapter->hw;
7165	u32 dmac_thr;
7166	u16 hwm;
7167
7168	if (hw->mac.type > e1000_82580) {
7169		if (adapter->flags & IGB_FLAG_DMAC) {
7170			u32 reg;
7171
7172			/* force threshold to 0. */
7173			wr32(E1000_DMCTXTH, 0);
7174
7175			/*
7176			 * DMA Coalescing high water mark needs to be greater
7177			 * than the Rx threshold. Set hwm to PBA - max frame
7178			 * size in 16B units, capping it at PBA - 6KB.
7179			 */
7180			hwm = 64 * pba - adapter->max_frame_size / 16;
7181			if (hwm < 64 * (pba - 6))
7182				hwm = 64 * (pba - 6);
7183			reg = rd32(E1000_FCRTC);
7184			reg &= ~E1000_FCRTC_RTH_COAL_MASK;
7185			reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT)
7186				& E1000_FCRTC_RTH_COAL_MASK);
7187			wr32(E1000_FCRTC, reg);
7188
7189			/*
7190			 * Set the DMA Coalescing Rx threshold to PBA - 2 * max
7191			 * frame size, capping it at PBA - 10KB.
7192			 */
7193			dmac_thr = pba - adapter->max_frame_size / 512;
7194			if (dmac_thr < pba - 10)
7195				dmac_thr = pba - 10;
7196			reg = rd32(E1000_DMACR);
7197			reg &= ~E1000_DMACR_DMACTHR_MASK;
7198			reg |= ((dmac_thr << E1000_DMACR_DMACTHR_SHIFT)
7199				& E1000_DMACR_DMACTHR_MASK);
7200
7201			/* transition to L0x or L1 if available..*/
7202			reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
7203
7204			/* watchdog timer= +-1000 usec in 32usec intervals */
7205			reg |= (1000 >> 5);
7206			wr32(E1000_DMACR, reg);
7207
7208			/*
7209			 * no lower threshold to disable
7210			 * coalescing(smart fifb)-UTRESH=0
7211			 */
7212			wr32(E1000_DMCRTRH, 0);
7213
7214			reg = (IGB_DMCTLX_DCFLUSH_DIS | 0x4);
7215
7216			wr32(E1000_DMCTLX, reg);
7217
7218			/*
7219			 * free space in tx packet buffer to wake from
7220			 * DMA coal
7221			 */
7222			wr32(E1000_DMCTXTH, (IGB_MIN_TXPBSIZE -
7223			     (IGB_TX_BUF_4096 + adapter->max_frame_size)) >> 6);
7224
7225			/*
7226			 * make low power state decision controlled
7227			 * by DMA coal
7228			 */
7229			reg = rd32(E1000_PCIEMISC);
7230			reg &= ~E1000_PCIEMISC_LX_DECISION;
7231			wr32(E1000_PCIEMISC, reg);
7232		} /* endif adapter->dmac is not disabled */
7233	} else if (hw->mac.type == e1000_82580) {
7234		u32 reg = rd32(E1000_PCIEMISC);
7235		wr32(E1000_PCIEMISC, reg & ~E1000_PCIEMISC_LX_DECISION);
7236		wr32(E1000_DMACR, 0);
7237	}
7238}
7239
7240/* igb_main.c */
7241