igb_main.c revision 479d02dfadfbe850ced61c5c83ca16b8b9d4ac5f
1/*******************************************************************************
2
3  Intel(R) Gigabit Ethernet Linux driver
4  Copyright(c) 2007-2013 Intel Corporation.
5
6  This program is free software; you can redistribute it and/or modify it
7  under the terms and conditions of the GNU General Public License,
8  version 2, as published by the Free Software Foundation.
9
10  This program is distributed in the hope it will be useful, but WITHOUT
11  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13  more details.
14
15  You should have received a copy of the GNU General Public License along with
16  this program; if not, write to the Free Software Foundation, Inc.,
17  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18
19  The full GNU General Public License is included in this distribution in
20  the file called "COPYING".
21
22  Contact Information:
23  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
25
26*******************************************************************************/
27
28#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
29
30#include <linux/module.h>
31#include <linux/types.h>
32#include <linux/init.h>
33#include <linux/bitops.h>
34#include <linux/vmalloc.h>
35#include <linux/pagemap.h>
36#include <linux/netdevice.h>
37#include <linux/ipv6.h>
38#include <linux/slab.h>
39#include <net/checksum.h>
40#include <net/ip6_checksum.h>
41#include <linux/net_tstamp.h>
42#include <linux/mii.h>
43#include <linux/ethtool.h>
44#include <linux/if.h>
45#include <linux/if_vlan.h>
46#include <linux/pci.h>
47#include <linux/pci-aspm.h>
48#include <linux/delay.h>
49#include <linux/interrupt.h>
50#include <linux/ip.h>
51#include <linux/tcp.h>
52#include <linux/sctp.h>
53#include <linux/if_ether.h>
54#include <linux/aer.h>
55#include <linux/prefetch.h>
56#include <linux/pm_runtime.h>
57#ifdef CONFIG_IGB_DCA
58#include <linux/dca.h>
59#endif
60#include <linux/i2c.h>
61#include "igb.h"
62
63#define MAJ 5
64#define MIN 0
65#define BUILD 5
66#define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \
67__stringify(BUILD) "-k"
68char igb_driver_name[] = "igb";
69char igb_driver_version[] = DRV_VERSION;
70static const char igb_driver_string[] =
71				"Intel(R) Gigabit Ethernet Network Driver";
72static const char igb_copyright[] =
73				"Copyright (c) 2007-2013 Intel Corporation.";
74
75static const struct e1000_info *igb_info_tbl[] = {
76	[board_82575] = &e1000_82575_info,
77};
78
79static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
80	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I354_BACKPLANE_1GBPS) },
81	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I354_SGMII) },
82	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I354_BACKPLANE_2_5GBPS) },
83	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I211_COPPER), board_82575 },
84	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_COPPER), board_82575 },
85	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_FIBER), board_82575 },
86	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_SERDES), board_82575 },
87	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_SGMII), board_82575 },
88	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_COPPER_FLASHLESS), board_82575 },
89	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_SERDES_FLASHLESS), board_82575 },
90	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER), board_82575 },
91	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER), board_82575 },
92	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES), board_82575 },
93	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII), board_82575 },
94	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
95	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
96	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_QUAD_FIBER), board_82575 },
97	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
98	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
99	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
100	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SGMII), board_82575 },
101	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SERDES), board_82575 },
102	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_BACKPLANE), board_82575 },
103	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SFP), board_82575 },
104	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
105	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
106	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
107	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
108	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
109	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
110	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2), board_82575 },
111	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
112	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
113	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
114	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
115	/* required last entry */
116	{0, }
117};
118
119MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
120
121void igb_reset(struct igb_adapter *);
122static int igb_setup_all_tx_resources(struct igb_adapter *);
123static int igb_setup_all_rx_resources(struct igb_adapter *);
124static void igb_free_all_tx_resources(struct igb_adapter *);
125static void igb_free_all_rx_resources(struct igb_adapter *);
126static void igb_setup_mrqc(struct igb_adapter *);
127static int igb_probe(struct pci_dev *, const struct pci_device_id *);
128static void igb_remove(struct pci_dev *pdev);
129static int igb_sw_init(struct igb_adapter *);
130static int igb_open(struct net_device *);
131static int igb_close(struct net_device *);
132static void igb_configure(struct igb_adapter *);
133static void igb_configure_tx(struct igb_adapter *);
134static void igb_configure_rx(struct igb_adapter *);
135static void igb_clean_all_tx_rings(struct igb_adapter *);
136static void igb_clean_all_rx_rings(struct igb_adapter *);
137static void igb_clean_tx_ring(struct igb_ring *);
138static void igb_clean_rx_ring(struct igb_ring *);
139static void igb_set_rx_mode(struct net_device *);
140static void igb_update_phy_info(unsigned long);
141static void igb_watchdog(unsigned long);
142static void igb_watchdog_task(struct work_struct *);
143static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, struct net_device *);
144static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *dev,
145						 struct rtnl_link_stats64 *stats);
146static int igb_change_mtu(struct net_device *, int);
147static int igb_set_mac(struct net_device *, void *);
148static void igb_set_uta(struct igb_adapter *adapter);
149static irqreturn_t igb_intr(int irq, void *);
150static irqreturn_t igb_intr_msi(int irq, void *);
151static irqreturn_t igb_msix_other(int irq, void *);
152static irqreturn_t igb_msix_ring(int irq, void *);
153#ifdef CONFIG_IGB_DCA
154static void igb_update_dca(struct igb_q_vector *);
155static void igb_setup_dca(struct igb_adapter *);
156#endif /* CONFIG_IGB_DCA */
157static int igb_poll(struct napi_struct *, int);
158static bool igb_clean_tx_irq(struct igb_q_vector *);
159static bool igb_clean_rx_irq(struct igb_q_vector *, int);
160static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
161static void igb_tx_timeout(struct net_device *);
162static void igb_reset_task(struct work_struct *);
163static void igb_vlan_mode(struct net_device *netdev, netdev_features_t features);
164static int igb_vlan_rx_add_vid(struct net_device *, __be16, u16);
165static int igb_vlan_rx_kill_vid(struct net_device *, __be16, u16);
166static void igb_restore_vlan(struct igb_adapter *);
167static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
168static void igb_ping_all_vfs(struct igb_adapter *);
169static void igb_msg_task(struct igb_adapter *);
170static void igb_vmm_control(struct igb_adapter *);
171static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
172static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
173static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
174static int igb_ndo_set_vf_vlan(struct net_device *netdev,
175			       int vf, u16 vlan, u8 qos);
176static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
177static int igb_ndo_set_vf_spoofchk(struct net_device *netdev, int vf,
178				   bool setting);
179static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
180				 struct ifla_vf_info *ivi);
181static void igb_check_vf_rate_limit(struct igb_adapter *);
182
183#ifdef CONFIG_PCI_IOV
184static int igb_vf_configure(struct igb_adapter *adapter, int vf);
185static int igb_pci_enable_sriov(struct pci_dev *dev, int num_vfs);
186#endif
187
188#ifdef CONFIG_PM
189#ifdef CONFIG_PM_SLEEP
190static int igb_suspend(struct device *);
191#endif
192static int igb_resume(struct device *);
193#ifdef CONFIG_PM_RUNTIME
194static int igb_runtime_suspend(struct device *dev);
195static int igb_runtime_resume(struct device *dev);
196static int igb_runtime_idle(struct device *dev);
197#endif
198static const struct dev_pm_ops igb_pm_ops = {
199	SET_SYSTEM_SLEEP_PM_OPS(igb_suspend, igb_resume)
200	SET_RUNTIME_PM_OPS(igb_runtime_suspend, igb_runtime_resume,
201			igb_runtime_idle)
202};
203#endif
204static void igb_shutdown(struct pci_dev *);
205static int igb_pci_sriov_configure(struct pci_dev *dev, int num_vfs);
206#ifdef CONFIG_IGB_DCA
207static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
208static struct notifier_block dca_notifier = {
209	.notifier_call	= igb_notify_dca,
210	.next		= NULL,
211	.priority	= 0
212};
213#endif
214#ifdef CONFIG_NET_POLL_CONTROLLER
215/* for netdump / net console */
216static void igb_netpoll(struct net_device *);
217#endif
218#ifdef CONFIG_PCI_IOV
219static unsigned int max_vfs = 0;
220module_param(max_vfs, uint, 0);
221MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
222                 "per physical function");
223#endif /* CONFIG_PCI_IOV */
224
225static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
226		     pci_channel_state_t);
227static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
228static void igb_io_resume(struct pci_dev *);
229
230static const struct pci_error_handlers igb_err_handler = {
231	.error_detected = igb_io_error_detected,
232	.slot_reset = igb_io_slot_reset,
233	.resume = igb_io_resume,
234};
235
236static void igb_init_dmac(struct igb_adapter *adapter, u32 pba);
237
238static struct pci_driver igb_driver = {
239	.name     = igb_driver_name,
240	.id_table = igb_pci_tbl,
241	.probe    = igb_probe,
242	.remove   = igb_remove,
243#ifdef CONFIG_PM
244	.driver.pm = &igb_pm_ops,
245#endif
246	.shutdown = igb_shutdown,
247	.sriov_configure = igb_pci_sriov_configure,
248	.err_handler = &igb_err_handler
249};
250
251MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
252MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
253MODULE_LICENSE("GPL");
254MODULE_VERSION(DRV_VERSION);
255
256#define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV|NETIF_MSG_PROBE|NETIF_MSG_LINK)
257static int debug = -1;
258module_param(debug, int, 0);
259MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
260
261struct igb_reg_info {
262	u32 ofs;
263	char *name;
264};
265
266static const struct igb_reg_info igb_reg_info_tbl[] = {
267
268	/* General Registers */
269	{E1000_CTRL, "CTRL"},
270	{E1000_STATUS, "STATUS"},
271	{E1000_CTRL_EXT, "CTRL_EXT"},
272
273	/* Interrupt Registers */
274	{E1000_ICR, "ICR"},
275
276	/* RX Registers */
277	{E1000_RCTL, "RCTL"},
278	{E1000_RDLEN(0), "RDLEN"},
279	{E1000_RDH(0), "RDH"},
280	{E1000_RDT(0), "RDT"},
281	{E1000_RXDCTL(0), "RXDCTL"},
282	{E1000_RDBAL(0), "RDBAL"},
283	{E1000_RDBAH(0), "RDBAH"},
284
285	/* TX Registers */
286	{E1000_TCTL, "TCTL"},
287	{E1000_TDBAL(0), "TDBAL"},
288	{E1000_TDBAH(0), "TDBAH"},
289	{E1000_TDLEN(0), "TDLEN"},
290	{E1000_TDH(0), "TDH"},
291	{E1000_TDT(0), "TDT"},
292	{E1000_TXDCTL(0), "TXDCTL"},
293	{E1000_TDFH, "TDFH"},
294	{E1000_TDFT, "TDFT"},
295	{E1000_TDFHS, "TDFHS"},
296	{E1000_TDFPC, "TDFPC"},
297
298	/* List Terminator */
299	{}
300};
301
302/* igb_regdump - register printout routine */
303static void igb_regdump(struct e1000_hw *hw, struct igb_reg_info *reginfo)
304{
305	int n = 0;
306	char rname[16];
307	u32 regs[8];
308
309	switch (reginfo->ofs) {
310	case E1000_RDLEN(0):
311		for (n = 0; n < 4; n++)
312			regs[n] = rd32(E1000_RDLEN(n));
313		break;
314	case E1000_RDH(0):
315		for (n = 0; n < 4; n++)
316			regs[n] = rd32(E1000_RDH(n));
317		break;
318	case E1000_RDT(0):
319		for (n = 0; n < 4; n++)
320			regs[n] = rd32(E1000_RDT(n));
321		break;
322	case E1000_RXDCTL(0):
323		for (n = 0; n < 4; n++)
324			regs[n] = rd32(E1000_RXDCTL(n));
325		break;
326	case E1000_RDBAL(0):
327		for (n = 0; n < 4; n++)
328			regs[n] = rd32(E1000_RDBAL(n));
329		break;
330	case E1000_RDBAH(0):
331		for (n = 0; n < 4; n++)
332			regs[n] = rd32(E1000_RDBAH(n));
333		break;
334	case E1000_TDBAL(0):
335		for (n = 0; n < 4; n++)
336			regs[n] = rd32(E1000_RDBAL(n));
337		break;
338	case E1000_TDBAH(0):
339		for (n = 0; n < 4; n++)
340			regs[n] = rd32(E1000_TDBAH(n));
341		break;
342	case E1000_TDLEN(0):
343		for (n = 0; n < 4; n++)
344			regs[n] = rd32(E1000_TDLEN(n));
345		break;
346	case E1000_TDH(0):
347		for (n = 0; n < 4; n++)
348			regs[n] = rd32(E1000_TDH(n));
349		break;
350	case E1000_TDT(0):
351		for (n = 0; n < 4; n++)
352			regs[n] = rd32(E1000_TDT(n));
353		break;
354	case E1000_TXDCTL(0):
355		for (n = 0; n < 4; n++)
356			regs[n] = rd32(E1000_TXDCTL(n));
357		break;
358	default:
359		pr_info("%-15s %08x\n", reginfo->name, rd32(reginfo->ofs));
360		return;
361	}
362
363	snprintf(rname, 16, "%s%s", reginfo->name, "[0-3]");
364	pr_info("%-15s %08x %08x %08x %08x\n", rname, regs[0], regs[1],
365		regs[2], regs[3]);
366}
367
368/* igb_dump - Print registers, Tx-rings and Rx-rings */
369static void igb_dump(struct igb_adapter *adapter)
370{
371	struct net_device *netdev = adapter->netdev;
372	struct e1000_hw *hw = &adapter->hw;
373	struct igb_reg_info *reginfo;
374	struct igb_ring *tx_ring;
375	union e1000_adv_tx_desc *tx_desc;
376	struct my_u0 { u64 a; u64 b; } *u0;
377	struct igb_ring *rx_ring;
378	union e1000_adv_rx_desc *rx_desc;
379	u32 staterr;
380	u16 i, n;
381
382	if (!netif_msg_hw(adapter))
383		return;
384
385	/* Print netdevice Info */
386	if (netdev) {
387		dev_info(&adapter->pdev->dev, "Net device Info\n");
388		pr_info("Device Name     state            trans_start      "
389			"last_rx\n");
390		pr_info("%-15s %016lX %016lX %016lX\n", netdev->name,
391			netdev->state, netdev->trans_start, netdev->last_rx);
392	}
393
394	/* Print Registers */
395	dev_info(&adapter->pdev->dev, "Register Dump\n");
396	pr_info(" Register Name   Value\n");
397	for (reginfo = (struct igb_reg_info *)igb_reg_info_tbl;
398	     reginfo->name; reginfo++) {
399		igb_regdump(hw, reginfo);
400	}
401
402	/* Print TX Ring Summary */
403	if (!netdev || !netif_running(netdev))
404		goto exit;
405
406	dev_info(&adapter->pdev->dev, "TX Rings Summary\n");
407	pr_info("Queue [NTU] [NTC] [bi(ntc)->dma  ] leng ntw timestamp\n");
408	for (n = 0; n < adapter->num_tx_queues; n++) {
409		struct igb_tx_buffer *buffer_info;
410		tx_ring = adapter->tx_ring[n];
411		buffer_info = &tx_ring->tx_buffer_info[tx_ring->next_to_clean];
412		pr_info(" %5d %5X %5X %016llX %04X %p %016llX\n",
413			n, tx_ring->next_to_use, tx_ring->next_to_clean,
414			(u64)dma_unmap_addr(buffer_info, dma),
415			dma_unmap_len(buffer_info, len),
416			buffer_info->next_to_watch,
417			(u64)buffer_info->time_stamp);
418	}
419
420	/* Print TX Rings */
421	if (!netif_msg_tx_done(adapter))
422		goto rx_ring_summary;
423
424	dev_info(&adapter->pdev->dev, "TX Rings Dump\n");
425
426	/* Transmit Descriptor Formats
427	 *
428	 * Advanced Transmit Descriptor
429	 *   +--------------------------------------------------------------+
430	 * 0 |         Buffer Address [63:0]                                |
431	 *   +--------------------------------------------------------------+
432	 * 8 | PAYLEN  | PORTS  |CC|IDX | STA | DCMD  |DTYP|MAC|RSV| DTALEN |
433	 *   +--------------------------------------------------------------+
434	 *   63      46 45    40 39 38 36 35 32 31   24             15       0
435	 */
436
437	for (n = 0; n < adapter->num_tx_queues; n++) {
438		tx_ring = adapter->tx_ring[n];
439		pr_info("------------------------------------\n");
440		pr_info("TX QUEUE INDEX = %d\n", tx_ring->queue_index);
441		pr_info("------------------------------------\n");
442		pr_info("T [desc]     [address 63:0  ] [PlPOCIStDDM Ln] "
443			"[bi->dma       ] leng  ntw timestamp        "
444			"bi->skb\n");
445
446		for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) {
447			const char *next_desc;
448			struct igb_tx_buffer *buffer_info;
449			tx_desc = IGB_TX_DESC(tx_ring, i);
450			buffer_info = &tx_ring->tx_buffer_info[i];
451			u0 = (struct my_u0 *)tx_desc;
452			if (i == tx_ring->next_to_use &&
453			    i == tx_ring->next_to_clean)
454				next_desc = " NTC/U";
455			else if (i == tx_ring->next_to_use)
456				next_desc = " NTU";
457			else if (i == tx_ring->next_to_clean)
458				next_desc = " NTC";
459			else
460				next_desc = "";
461
462			pr_info("T [0x%03X]    %016llX %016llX %016llX"
463				" %04X  %p %016llX %p%s\n", i,
464				le64_to_cpu(u0->a),
465				le64_to_cpu(u0->b),
466				(u64)dma_unmap_addr(buffer_info, dma),
467				dma_unmap_len(buffer_info, len),
468				buffer_info->next_to_watch,
469				(u64)buffer_info->time_stamp,
470				buffer_info->skb, next_desc);
471
472			if (netif_msg_pktdata(adapter) && buffer_info->skb)
473				print_hex_dump(KERN_INFO, "",
474					DUMP_PREFIX_ADDRESS,
475					16, 1, buffer_info->skb->data,
476					dma_unmap_len(buffer_info, len),
477					true);
478		}
479	}
480
481	/* Print RX Rings Summary */
482rx_ring_summary:
483	dev_info(&adapter->pdev->dev, "RX Rings Summary\n");
484	pr_info("Queue [NTU] [NTC]\n");
485	for (n = 0; n < adapter->num_rx_queues; n++) {
486		rx_ring = adapter->rx_ring[n];
487		pr_info(" %5d %5X %5X\n",
488			n, rx_ring->next_to_use, rx_ring->next_to_clean);
489	}
490
491	/* Print RX Rings */
492	if (!netif_msg_rx_status(adapter))
493		goto exit;
494
495	dev_info(&adapter->pdev->dev, "RX Rings Dump\n");
496
497	/* Advanced Receive Descriptor (Read) Format
498	 *    63                                           1        0
499	 *    +-----------------------------------------------------+
500	 *  0 |       Packet Buffer Address [63:1]           |A0/NSE|
501	 *    +----------------------------------------------+------+
502	 *  8 |       Header Buffer Address [63:1]           |  DD  |
503	 *    +-----------------------------------------------------+
504	 *
505	 *
506	 * Advanced Receive Descriptor (Write-Back) Format
507	 *
508	 *   63       48 47    32 31  30      21 20 17 16   4 3     0
509	 *   +------------------------------------------------------+
510	 * 0 | Packet     IP     |SPH| HDR_LEN   | RSV|Packet|  RSS |
511	 *   | Checksum   Ident  |   |           |    | Type | Type |
512	 *   +------------------------------------------------------+
513	 * 8 | VLAN Tag | Length | Extended Error | Extended Status |
514	 *   +------------------------------------------------------+
515	 *   63       48 47    32 31            20 19               0
516	 */
517
518	for (n = 0; n < adapter->num_rx_queues; n++) {
519		rx_ring = adapter->rx_ring[n];
520		pr_info("------------------------------------\n");
521		pr_info("RX QUEUE INDEX = %d\n", rx_ring->queue_index);
522		pr_info("------------------------------------\n");
523		pr_info("R  [desc]      [ PktBuf     A0] [  HeadBuf   DD] "
524			"[bi->dma       ] [bi->skb] <-- Adv Rx Read format\n");
525		pr_info("RWB[desc]      [PcsmIpSHl PtRs] [vl er S cks ln] -----"
526			"----------- [bi->skb] <-- Adv Rx Write-Back format\n");
527
528		for (i = 0; i < rx_ring->count; i++) {
529			const char *next_desc;
530			struct igb_rx_buffer *buffer_info;
531			buffer_info = &rx_ring->rx_buffer_info[i];
532			rx_desc = IGB_RX_DESC(rx_ring, i);
533			u0 = (struct my_u0 *)rx_desc;
534			staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
535
536			if (i == rx_ring->next_to_use)
537				next_desc = " NTU";
538			else if (i == rx_ring->next_to_clean)
539				next_desc = " NTC";
540			else
541				next_desc = "";
542
543			if (staterr & E1000_RXD_STAT_DD) {
544				/* Descriptor Done */
545				pr_info("%s[0x%03X]     %016llX %016llX ---------------- %s\n",
546					"RWB", i,
547					le64_to_cpu(u0->a),
548					le64_to_cpu(u0->b),
549					next_desc);
550			} else {
551				pr_info("%s[0x%03X]     %016llX %016llX %016llX %s\n",
552					"R  ", i,
553					le64_to_cpu(u0->a),
554					le64_to_cpu(u0->b),
555					(u64)buffer_info->dma,
556					next_desc);
557
558				if (netif_msg_pktdata(adapter) &&
559				    buffer_info->dma && buffer_info->page) {
560					print_hex_dump(KERN_INFO, "",
561					  DUMP_PREFIX_ADDRESS,
562					  16, 1,
563					  page_address(buffer_info->page) +
564						      buffer_info->page_offset,
565					  IGB_RX_BUFSZ, true);
566				}
567			}
568		}
569	}
570
571exit:
572	return;
573}
574
575/**
576 *  igb_get_i2c_data - Reads the I2C SDA data bit
577 *  @hw: pointer to hardware structure
578 *  @i2cctl: Current value of I2CCTL register
579 *
580 *  Returns the I2C data bit value
581 **/
582static int igb_get_i2c_data(void *data)
583{
584	struct igb_adapter *adapter = (struct igb_adapter *)data;
585	struct e1000_hw *hw = &adapter->hw;
586	s32 i2cctl = rd32(E1000_I2CPARAMS);
587
588	return ((i2cctl & E1000_I2C_DATA_IN) != 0);
589}
590
591/**
592 *  igb_set_i2c_data - Sets the I2C data bit
593 *  @data: pointer to hardware structure
594 *  @state: I2C data value (0 or 1) to set
595 *
596 *  Sets the I2C data bit
597 **/
598static void igb_set_i2c_data(void *data, int state)
599{
600	struct igb_adapter *adapter = (struct igb_adapter *)data;
601	struct e1000_hw *hw = &adapter->hw;
602	s32 i2cctl = rd32(E1000_I2CPARAMS);
603
604	if (state)
605		i2cctl |= E1000_I2C_DATA_OUT;
606	else
607		i2cctl &= ~E1000_I2C_DATA_OUT;
608
609	i2cctl &= ~E1000_I2C_DATA_OE_N;
610	i2cctl |= E1000_I2C_CLK_OE_N;
611	wr32(E1000_I2CPARAMS, i2cctl);
612	wrfl();
613
614}
615
616/**
617 *  igb_set_i2c_clk - Sets the I2C SCL clock
618 *  @data: pointer to hardware structure
619 *  @state: state to set clock
620 *
621 *  Sets the I2C clock line to state
622 **/
623static void igb_set_i2c_clk(void *data, int state)
624{
625	struct igb_adapter *adapter = (struct igb_adapter *)data;
626	struct e1000_hw *hw = &adapter->hw;
627	s32 i2cctl = rd32(E1000_I2CPARAMS);
628
629	if (state) {
630		i2cctl |= E1000_I2C_CLK_OUT;
631		i2cctl &= ~E1000_I2C_CLK_OE_N;
632	} else {
633		i2cctl &= ~E1000_I2C_CLK_OUT;
634		i2cctl &= ~E1000_I2C_CLK_OE_N;
635	}
636	wr32(E1000_I2CPARAMS, i2cctl);
637	wrfl();
638}
639
640/**
641 *  igb_get_i2c_clk - Gets the I2C SCL clock state
642 *  @data: pointer to hardware structure
643 *
644 *  Gets the I2C clock state
645 **/
646static int igb_get_i2c_clk(void *data)
647{
648	struct igb_adapter *adapter = (struct igb_adapter *)data;
649	struct e1000_hw *hw = &adapter->hw;
650	s32 i2cctl = rd32(E1000_I2CPARAMS);
651
652	return ((i2cctl & E1000_I2C_CLK_IN) != 0);
653}
654
655static const struct i2c_algo_bit_data igb_i2c_algo = {
656	.setsda		= igb_set_i2c_data,
657	.setscl		= igb_set_i2c_clk,
658	.getsda		= igb_get_i2c_data,
659	.getscl		= igb_get_i2c_clk,
660	.udelay		= 5,
661	.timeout	= 20,
662};
663
664/**
665 *  igb_get_hw_dev - return device
666 *  @hw: pointer to hardware structure
667 *
668 *  used by hardware layer to print debugging information
669 **/
670struct net_device *igb_get_hw_dev(struct e1000_hw *hw)
671{
672	struct igb_adapter *adapter = hw->back;
673	return adapter->netdev;
674}
675
676/**
677 *  igb_init_module - Driver Registration Routine
678 *
679 *  igb_init_module is the first routine called when the driver is
680 *  loaded. All it does is register with the PCI subsystem.
681 **/
682static int __init igb_init_module(void)
683{
684	int ret;
685	pr_info("%s - version %s\n",
686	       igb_driver_string, igb_driver_version);
687
688	pr_info("%s\n", igb_copyright);
689
690#ifdef CONFIG_IGB_DCA
691	dca_register_notify(&dca_notifier);
692#endif
693	ret = pci_register_driver(&igb_driver);
694	return ret;
695}
696
697module_init(igb_init_module);
698
699/**
700 *  igb_exit_module - Driver Exit Cleanup Routine
701 *
702 *  igb_exit_module is called just before the driver is removed
703 *  from memory.
704 **/
705static void __exit igb_exit_module(void)
706{
707#ifdef CONFIG_IGB_DCA
708	dca_unregister_notify(&dca_notifier);
709#endif
710	pci_unregister_driver(&igb_driver);
711}
712
713module_exit(igb_exit_module);
714
715#define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
716/**
717 *  igb_cache_ring_register - Descriptor ring to register mapping
718 *  @adapter: board private structure to initialize
719 *
720 *  Once we know the feature-set enabled for the device, we'll cache
721 *  the register offset the descriptor ring is assigned to.
722 **/
723static void igb_cache_ring_register(struct igb_adapter *adapter)
724{
725	int i = 0, j = 0;
726	u32 rbase_offset = adapter->vfs_allocated_count;
727
728	switch (adapter->hw.mac.type) {
729	case e1000_82576:
730		/* The queues are allocated for virtualization such that VF 0
731		 * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
732		 * In order to avoid collision we start at the first free queue
733		 * and continue consuming queues in the same sequence
734		 */
735		if (adapter->vfs_allocated_count) {
736			for (; i < adapter->rss_queues; i++)
737				adapter->rx_ring[i]->reg_idx = rbase_offset +
738							       Q_IDX_82576(i);
739		}
740	case e1000_82575:
741	case e1000_82580:
742	case e1000_i350:
743	case e1000_i354:
744	case e1000_i210:
745	case e1000_i211:
746	default:
747		for (; i < adapter->num_rx_queues; i++)
748			adapter->rx_ring[i]->reg_idx = rbase_offset + i;
749		for (; j < adapter->num_tx_queues; j++)
750			adapter->tx_ring[j]->reg_idx = rbase_offset + j;
751		break;
752	}
753}
754
755/**
756 *  igb_write_ivar - configure ivar for given MSI-X vector
757 *  @hw: pointer to the HW structure
758 *  @msix_vector: vector number we are allocating to a given ring
759 *  @index: row index of IVAR register to write within IVAR table
760 *  @offset: column offset of in IVAR, should be multiple of 8
761 *
762 *  This function is intended to handle the writing of the IVAR register
763 *  for adapters 82576 and newer.  The IVAR table consists of 2 columns,
764 *  each containing an cause allocation for an Rx and Tx ring, and a
765 *  variable number of rows depending on the number of queues supported.
766 **/
767static void igb_write_ivar(struct e1000_hw *hw, int msix_vector,
768			   int index, int offset)
769{
770	u32 ivar = array_rd32(E1000_IVAR0, index);
771
772	/* clear any bits that are currently set */
773	ivar &= ~((u32)0xFF << offset);
774
775	/* write vector and valid bit */
776	ivar |= (msix_vector | E1000_IVAR_VALID) << offset;
777
778	array_wr32(E1000_IVAR0, index, ivar);
779}
780
781#define IGB_N0_QUEUE -1
782static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
783{
784	struct igb_adapter *adapter = q_vector->adapter;
785	struct e1000_hw *hw = &adapter->hw;
786	int rx_queue = IGB_N0_QUEUE;
787	int tx_queue = IGB_N0_QUEUE;
788	u32 msixbm = 0;
789
790	if (q_vector->rx.ring)
791		rx_queue = q_vector->rx.ring->reg_idx;
792	if (q_vector->tx.ring)
793		tx_queue = q_vector->tx.ring->reg_idx;
794
795	switch (hw->mac.type) {
796	case e1000_82575:
797		/* The 82575 assigns vectors using a bitmask, which matches the
798		 * bitmask for the EICR/EIMS/EIMC registers.  To assign one
799		 * or more queues to a vector, we write the appropriate bits
800		 * into the MSIXBM register for that vector.
801		 */
802		if (rx_queue > IGB_N0_QUEUE)
803			msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
804		if (tx_queue > IGB_N0_QUEUE)
805			msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
806		if (!(adapter->flags & IGB_FLAG_HAS_MSIX) && msix_vector == 0)
807			msixbm |= E1000_EIMS_OTHER;
808		array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
809		q_vector->eims_value = msixbm;
810		break;
811	case e1000_82576:
812		/* 82576 uses a table that essentially consists of 2 columns
813		 * with 8 rows.  The ordering is column-major so we use the
814		 * lower 3 bits as the row index, and the 4th bit as the
815		 * column offset.
816		 */
817		if (rx_queue > IGB_N0_QUEUE)
818			igb_write_ivar(hw, msix_vector,
819				       rx_queue & 0x7,
820				       (rx_queue & 0x8) << 1);
821		if (tx_queue > IGB_N0_QUEUE)
822			igb_write_ivar(hw, msix_vector,
823				       tx_queue & 0x7,
824				       ((tx_queue & 0x8) << 1) + 8);
825		q_vector->eims_value = 1 << msix_vector;
826		break;
827	case e1000_82580:
828	case e1000_i350:
829	case e1000_i354:
830	case e1000_i210:
831	case e1000_i211:
832		/* On 82580 and newer adapters the scheme is similar to 82576
833		 * however instead of ordering column-major we have things
834		 * ordered row-major.  So we traverse the table by using
835		 * bit 0 as the column offset, and the remaining bits as the
836		 * row index.
837		 */
838		if (rx_queue > IGB_N0_QUEUE)
839			igb_write_ivar(hw, msix_vector,
840				       rx_queue >> 1,
841				       (rx_queue & 0x1) << 4);
842		if (tx_queue > IGB_N0_QUEUE)
843			igb_write_ivar(hw, msix_vector,
844				       tx_queue >> 1,
845				       ((tx_queue & 0x1) << 4) + 8);
846		q_vector->eims_value = 1 << msix_vector;
847		break;
848	default:
849		BUG();
850		break;
851	}
852
853	/* add q_vector eims value to global eims_enable_mask */
854	adapter->eims_enable_mask |= q_vector->eims_value;
855
856	/* configure q_vector to set itr on first interrupt */
857	q_vector->set_itr = 1;
858}
859
860/**
861 *  igb_configure_msix - Configure MSI-X hardware
862 *  @adapter: board private structure to initialize
863 *
864 *  igb_configure_msix sets up the hardware to properly
865 *  generate MSI-X interrupts.
866 **/
867static void igb_configure_msix(struct igb_adapter *adapter)
868{
869	u32 tmp;
870	int i, vector = 0;
871	struct e1000_hw *hw = &adapter->hw;
872
873	adapter->eims_enable_mask = 0;
874
875	/* set vector for other causes, i.e. link changes */
876	switch (hw->mac.type) {
877	case e1000_82575:
878		tmp = rd32(E1000_CTRL_EXT);
879		/* enable MSI-X PBA support*/
880		tmp |= E1000_CTRL_EXT_PBA_CLR;
881
882		/* Auto-Mask interrupts upon ICR read. */
883		tmp |= E1000_CTRL_EXT_EIAME;
884		tmp |= E1000_CTRL_EXT_IRCA;
885
886		wr32(E1000_CTRL_EXT, tmp);
887
888		/* enable msix_other interrupt */
889		array_wr32(E1000_MSIXBM(0), vector++, E1000_EIMS_OTHER);
890		adapter->eims_other = E1000_EIMS_OTHER;
891
892		break;
893
894	case e1000_82576:
895	case e1000_82580:
896	case e1000_i350:
897	case e1000_i354:
898	case e1000_i210:
899	case e1000_i211:
900		/* Turn on MSI-X capability first, or our settings
901		 * won't stick.  And it will take days to debug.
902		 */
903		wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
904		     E1000_GPIE_PBA | E1000_GPIE_EIAME |
905		     E1000_GPIE_NSICR);
906
907		/* enable msix_other interrupt */
908		adapter->eims_other = 1 << vector;
909		tmp = (vector++ | E1000_IVAR_VALID) << 8;
910
911		wr32(E1000_IVAR_MISC, tmp);
912		break;
913	default:
914		/* do nothing, since nothing else supports MSI-X */
915		break;
916	} /* switch (hw->mac.type) */
917
918	adapter->eims_enable_mask |= adapter->eims_other;
919
920	for (i = 0; i < adapter->num_q_vectors; i++)
921		igb_assign_vector(adapter->q_vector[i], vector++);
922
923	wrfl();
924}
925
926/**
927 *  igb_request_msix - Initialize MSI-X interrupts
928 *  @adapter: board private structure to initialize
929 *
930 *  igb_request_msix allocates MSI-X vectors and requests interrupts from the
931 *  kernel.
932 **/
933static int igb_request_msix(struct igb_adapter *adapter)
934{
935	struct net_device *netdev = adapter->netdev;
936	struct e1000_hw *hw = &adapter->hw;
937	int i, err = 0, vector = 0, free_vector = 0;
938
939	err = request_irq(adapter->msix_entries[vector].vector,
940			  igb_msix_other, 0, netdev->name, adapter);
941	if (err)
942		goto err_out;
943
944	for (i = 0; i < adapter->num_q_vectors; i++) {
945		struct igb_q_vector *q_vector = adapter->q_vector[i];
946
947		vector++;
948
949		q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
950
951		if (q_vector->rx.ring && q_vector->tx.ring)
952			sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
953				q_vector->rx.ring->queue_index);
954		else if (q_vector->tx.ring)
955			sprintf(q_vector->name, "%s-tx-%u", netdev->name,
956				q_vector->tx.ring->queue_index);
957		else if (q_vector->rx.ring)
958			sprintf(q_vector->name, "%s-rx-%u", netdev->name,
959				q_vector->rx.ring->queue_index);
960		else
961			sprintf(q_vector->name, "%s-unused", netdev->name);
962
963		err = request_irq(adapter->msix_entries[vector].vector,
964				  igb_msix_ring, 0, q_vector->name,
965				  q_vector);
966		if (err)
967			goto err_free;
968	}
969
970	igb_configure_msix(adapter);
971	return 0;
972
973err_free:
974	/* free already assigned IRQs */
975	free_irq(adapter->msix_entries[free_vector++].vector, adapter);
976
977	vector--;
978	for (i = 0; i < vector; i++) {
979		free_irq(adapter->msix_entries[free_vector++].vector,
980			 adapter->q_vector[i]);
981	}
982err_out:
983	return err;
984}
985
986/**
987 *  igb_free_q_vector - Free memory allocated for specific interrupt vector
988 *  @adapter: board private structure to initialize
989 *  @v_idx: Index of vector to be freed
990 *
991 *  This function frees the memory allocated to the q_vector.
992 **/
993static void igb_free_q_vector(struct igb_adapter *adapter, int v_idx)
994{
995	struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
996
997	adapter->q_vector[v_idx] = NULL;
998
999	/* igb_get_stats64() might access the rings on this vector,
1000	 * we must wait a grace period before freeing it.
1001	 */
1002	kfree_rcu(q_vector, rcu);
1003}
1004
1005/**
1006 *  igb_reset_q_vector - Reset config for interrupt vector
1007 *  @adapter: board private structure to initialize
1008 *  @v_idx: Index of vector to be reset
1009 *
1010 *  If NAPI is enabled it will delete any references to the
1011 *  NAPI struct. This is preparation for igb_free_q_vector.
1012 **/
1013static void igb_reset_q_vector(struct igb_adapter *adapter, int v_idx)
1014{
1015	struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1016
1017	if (q_vector->tx.ring)
1018		adapter->tx_ring[q_vector->tx.ring->queue_index] = NULL;
1019
1020	if (q_vector->rx.ring)
1021		adapter->tx_ring[q_vector->rx.ring->queue_index] = NULL;
1022
1023	netif_napi_del(&q_vector->napi);
1024
1025}
1026
1027static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
1028{
1029	int v_idx = adapter->num_q_vectors;
1030
1031	if (adapter->flags & IGB_FLAG_HAS_MSIX)
1032		pci_disable_msix(adapter->pdev);
1033	else if (adapter->flags & IGB_FLAG_HAS_MSI)
1034		pci_disable_msi(adapter->pdev);
1035
1036	while (v_idx--)
1037		igb_reset_q_vector(adapter, v_idx);
1038}
1039
1040/**
1041 *  igb_free_q_vectors - Free memory allocated for interrupt vectors
1042 *  @adapter: board private structure to initialize
1043 *
1044 *  This function frees the memory allocated to the q_vectors.  In addition if
1045 *  NAPI is enabled it will delete any references to the NAPI struct prior
1046 *  to freeing the q_vector.
1047 **/
1048static void igb_free_q_vectors(struct igb_adapter *adapter)
1049{
1050	int v_idx = adapter->num_q_vectors;
1051
1052	adapter->num_tx_queues = 0;
1053	adapter->num_rx_queues = 0;
1054	adapter->num_q_vectors = 0;
1055
1056	while (v_idx--) {
1057		igb_reset_q_vector(adapter, v_idx);
1058		igb_free_q_vector(adapter, v_idx);
1059	}
1060}
1061
1062/**
1063 *  igb_clear_interrupt_scheme - reset the device to a state of no interrupts
1064 *  @adapter: board private structure to initialize
1065 *
1066 *  This function resets the device so that it has 0 Rx queues, Tx queues, and
1067 *  MSI-X interrupts allocated.
1068 */
1069static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
1070{
1071	igb_free_q_vectors(adapter);
1072	igb_reset_interrupt_capability(adapter);
1073}
1074
1075/**
1076 *  igb_set_interrupt_capability - set MSI or MSI-X if supported
1077 *  @adapter: board private structure to initialize
1078 *  @msix: boolean value of MSIX capability
1079 *
1080 *  Attempt to configure interrupts using the best available
1081 *  capabilities of the hardware and kernel.
1082 **/
1083static void igb_set_interrupt_capability(struct igb_adapter *adapter, bool msix)
1084{
1085	int err;
1086	int numvecs, i;
1087
1088	if (!msix)
1089		goto msi_only;
1090	adapter->flags |= IGB_FLAG_HAS_MSIX;
1091
1092	/* Number of supported queues. */
1093	adapter->num_rx_queues = adapter->rss_queues;
1094	if (adapter->vfs_allocated_count)
1095		adapter->num_tx_queues = 1;
1096	else
1097		adapter->num_tx_queues = adapter->rss_queues;
1098
1099	/* start with one vector for every Rx queue */
1100	numvecs = adapter->num_rx_queues;
1101
1102	/* if Tx handler is separate add 1 for every Tx queue */
1103	if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
1104		numvecs += adapter->num_tx_queues;
1105
1106	/* store the number of vectors reserved for queues */
1107	adapter->num_q_vectors = numvecs;
1108
1109	/* add 1 vector for link status interrupts */
1110	numvecs++;
1111	for (i = 0; i < numvecs; i++)
1112		adapter->msix_entries[i].entry = i;
1113
1114	err = pci_enable_msix_range(adapter->pdev,
1115				    adapter->msix_entries,
1116				    numvecs,
1117				    numvecs);
1118	if (err > 0)
1119		return;
1120
1121	igb_reset_interrupt_capability(adapter);
1122
1123	/* If we can't do MSI-X, try MSI */
1124msi_only:
1125#ifdef CONFIG_PCI_IOV
1126	/* disable SR-IOV for non MSI-X configurations */
1127	if (adapter->vf_data) {
1128		struct e1000_hw *hw = &adapter->hw;
1129		/* disable iov and allow time for transactions to clear */
1130		pci_disable_sriov(adapter->pdev);
1131		msleep(500);
1132
1133		kfree(adapter->vf_data);
1134		adapter->vf_data = NULL;
1135		wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1136		wrfl();
1137		msleep(100);
1138		dev_info(&adapter->pdev->dev, "IOV Disabled\n");
1139	}
1140#endif
1141	adapter->vfs_allocated_count = 0;
1142	adapter->rss_queues = 1;
1143	adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1144	adapter->num_rx_queues = 1;
1145	adapter->num_tx_queues = 1;
1146	adapter->num_q_vectors = 1;
1147	if (!pci_enable_msi(adapter->pdev))
1148		adapter->flags |= IGB_FLAG_HAS_MSI;
1149}
1150
1151static void igb_add_ring(struct igb_ring *ring,
1152			 struct igb_ring_container *head)
1153{
1154	head->ring = ring;
1155	head->count++;
1156}
1157
1158/**
1159 *  igb_alloc_q_vector - Allocate memory for a single interrupt vector
1160 *  @adapter: board private structure to initialize
1161 *  @v_count: q_vectors allocated on adapter, used for ring interleaving
1162 *  @v_idx: index of vector in adapter struct
1163 *  @txr_count: total number of Tx rings to allocate
1164 *  @txr_idx: index of first Tx ring to allocate
1165 *  @rxr_count: total number of Rx rings to allocate
1166 *  @rxr_idx: index of first Rx ring to allocate
1167 *
1168 *  We allocate one q_vector.  If allocation fails we return -ENOMEM.
1169 **/
1170static int igb_alloc_q_vector(struct igb_adapter *adapter,
1171			      int v_count, int v_idx,
1172			      int txr_count, int txr_idx,
1173			      int rxr_count, int rxr_idx)
1174{
1175	struct igb_q_vector *q_vector;
1176	struct igb_ring *ring;
1177	int ring_count, size;
1178
1179	/* igb only supports 1 Tx and/or 1 Rx queue per vector */
1180	if (txr_count > 1 || rxr_count > 1)
1181		return -ENOMEM;
1182
1183	ring_count = txr_count + rxr_count;
1184	size = sizeof(struct igb_q_vector) +
1185	       (sizeof(struct igb_ring) * ring_count);
1186
1187	/* allocate q_vector and rings */
1188	q_vector = adapter->q_vector[v_idx];
1189	if (!q_vector)
1190		q_vector = kzalloc(size, GFP_KERNEL);
1191	if (!q_vector)
1192		return -ENOMEM;
1193
1194	/* initialize NAPI */
1195	netif_napi_add(adapter->netdev, &q_vector->napi,
1196		       igb_poll, 64);
1197
1198	/* tie q_vector and adapter together */
1199	adapter->q_vector[v_idx] = q_vector;
1200	q_vector->adapter = adapter;
1201
1202	/* initialize work limits */
1203	q_vector->tx.work_limit = adapter->tx_work_limit;
1204
1205	/* initialize ITR configuration */
1206	q_vector->itr_register = adapter->hw.hw_addr + E1000_EITR(0);
1207	q_vector->itr_val = IGB_START_ITR;
1208
1209	/* initialize pointer to rings */
1210	ring = q_vector->ring;
1211
1212	/* intialize ITR */
1213	if (rxr_count) {
1214		/* rx or rx/tx vector */
1215		if (!adapter->rx_itr_setting || adapter->rx_itr_setting > 3)
1216			q_vector->itr_val = adapter->rx_itr_setting;
1217	} else {
1218		/* tx only vector */
1219		if (!adapter->tx_itr_setting || adapter->tx_itr_setting > 3)
1220			q_vector->itr_val = adapter->tx_itr_setting;
1221	}
1222
1223	if (txr_count) {
1224		/* assign generic ring traits */
1225		ring->dev = &adapter->pdev->dev;
1226		ring->netdev = adapter->netdev;
1227
1228		/* configure backlink on ring */
1229		ring->q_vector = q_vector;
1230
1231		/* update q_vector Tx values */
1232		igb_add_ring(ring, &q_vector->tx);
1233
1234		/* For 82575, context index must be unique per ring. */
1235		if (adapter->hw.mac.type == e1000_82575)
1236			set_bit(IGB_RING_FLAG_TX_CTX_IDX, &ring->flags);
1237
1238		/* apply Tx specific ring traits */
1239		ring->count = adapter->tx_ring_count;
1240		ring->queue_index = txr_idx;
1241
1242		u64_stats_init(&ring->tx_syncp);
1243		u64_stats_init(&ring->tx_syncp2);
1244
1245		/* assign ring to adapter */
1246		adapter->tx_ring[txr_idx] = ring;
1247
1248		/* push pointer to next ring */
1249		ring++;
1250	}
1251
1252	if (rxr_count) {
1253		/* assign generic ring traits */
1254		ring->dev = &adapter->pdev->dev;
1255		ring->netdev = adapter->netdev;
1256
1257		/* configure backlink on ring */
1258		ring->q_vector = q_vector;
1259
1260		/* update q_vector Rx values */
1261		igb_add_ring(ring, &q_vector->rx);
1262
1263		/* set flag indicating ring supports SCTP checksum offload */
1264		if (adapter->hw.mac.type >= e1000_82576)
1265			set_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags);
1266
1267		/*
1268		 * On i350, i354, i210, and i211, loopback VLAN packets
1269		 * have the tag byte-swapped.
1270		 */
1271		if (adapter->hw.mac.type >= e1000_i350)
1272			set_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &ring->flags);
1273
1274		/* apply Rx specific ring traits */
1275		ring->count = adapter->rx_ring_count;
1276		ring->queue_index = rxr_idx;
1277
1278		u64_stats_init(&ring->rx_syncp);
1279
1280		/* assign ring to adapter */
1281		adapter->rx_ring[rxr_idx] = ring;
1282	}
1283
1284	return 0;
1285}
1286
1287
1288/**
1289 *  igb_alloc_q_vectors - Allocate memory for interrupt vectors
1290 *  @adapter: board private structure to initialize
1291 *
1292 *  We allocate one q_vector per queue interrupt.  If allocation fails we
1293 *  return -ENOMEM.
1294 **/
1295static int igb_alloc_q_vectors(struct igb_adapter *adapter)
1296{
1297	int q_vectors = adapter->num_q_vectors;
1298	int rxr_remaining = adapter->num_rx_queues;
1299	int txr_remaining = adapter->num_tx_queues;
1300	int rxr_idx = 0, txr_idx = 0, v_idx = 0;
1301	int err;
1302
1303	if (q_vectors >= (rxr_remaining + txr_remaining)) {
1304		for (; rxr_remaining; v_idx++) {
1305			err = igb_alloc_q_vector(adapter, q_vectors, v_idx,
1306						 0, 0, 1, rxr_idx);
1307
1308			if (err)
1309				goto err_out;
1310
1311			/* update counts and index */
1312			rxr_remaining--;
1313			rxr_idx++;
1314		}
1315	}
1316
1317	for (; v_idx < q_vectors; v_idx++) {
1318		int rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors - v_idx);
1319		int tqpv = DIV_ROUND_UP(txr_remaining, q_vectors - v_idx);
1320		err = igb_alloc_q_vector(adapter, q_vectors, v_idx,
1321					 tqpv, txr_idx, rqpv, rxr_idx);
1322
1323		if (err)
1324			goto err_out;
1325
1326		/* update counts and index */
1327		rxr_remaining -= rqpv;
1328		txr_remaining -= tqpv;
1329		rxr_idx++;
1330		txr_idx++;
1331	}
1332
1333	return 0;
1334
1335err_out:
1336	adapter->num_tx_queues = 0;
1337	adapter->num_rx_queues = 0;
1338	adapter->num_q_vectors = 0;
1339
1340	while (v_idx--)
1341		igb_free_q_vector(adapter, v_idx);
1342
1343	return -ENOMEM;
1344}
1345
1346/**
1347 *  igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
1348 *  @adapter: board private structure to initialize
1349 *  @msix: boolean value of MSIX capability
1350 *
1351 *  This function initializes the interrupts and allocates all of the queues.
1352 **/
1353static int igb_init_interrupt_scheme(struct igb_adapter *adapter, bool msix)
1354{
1355	struct pci_dev *pdev = adapter->pdev;
1356	int err;
1357
1358	igb_set_interrupt_capability(adapter, msix);
1359
1360	err = igb_alloc_q_vectors(adapter);
1361	if (err) {
1362		dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
1363		goto err_alloc_q_vectors;
1364	}
1365
1366	igb_cache_ring_register(adapter);
1367
1368	return 0;
1369
1370err_alloc_q_vectors:
1371	igb_reset_interrupt_capability(adapter);
1372	return err;
1373}
1374
1375/**
1376 *  igb_request_irq - initialize interrupts
1377 *  @adapter: board private structure to initialize
1378 *
1379 *  Attempts to configure interrupts using the best available
1380 *  capabilities of the hardware and kernel.
1381 **/
1382static int igb_request_irq(struct igb_adapter *adapter)
1383{
1384	struct net_device *netdev = adapter->netdev;
1385	struct pci_dev *pdev = adapter->pdev;
1386	int err = 0;
1387
1388	if (adapter->flags & IGB_FLAG_HAS_MSIX) {
1389		err = igb_request_msix(adapter);
1390		if (!err)
1391			goto request_done;
1392		/* fall back to MSI */
1393		igb_free_all_tx_resources(adapter);
1394		igb_free_all_rx_resources(adapter);
1395
1396		igb_clear_interrupt_scheme(adapter);
1397		err = igb_init_interrupt_scheme(adapter, false);
1398		if (err)
1399			goto request_done;
1400
1401		igb_setup_all_tx_resources(adapter);
1402		igb_setup_all_rx_resources(adapter);
1403		igb_configure(adapter);
1404	}
1405
1406	igb_assign_vector(adapter->q_vector[0], 0);
1407
1408	if (adapter->flags & IGB_FLAG_HAS_MSI) {
1409		err = request_irq(pdev->irq, igb_intr_msi, 0,
1410				  netdev->name, adapter);
1411		if (!err)
1412			goto request_done;
1413
1414		/* fall back to legacy interrupts */
1415		igb_reset_interrupt_capability(adapter);
1416		adapter->flags &= ~IGB_FLAG_HAS_MSI;
1417	}
1418
1419	err = request_irq(pdev->irq, igb_intr, IRQF_SHARED,
1420			  netdev->name, adapter);
1421
1422	if (err)
1423		dev_err(&pdev->dev, "Error %d getting interrupt\n",
1424			err);
1425
1426request_done:
1427	return err;
1428}
1429
1430static void igb_free_irq(struct igb_adapter *adapter)
1431{
1432	if (adapter->flags & IGB_FLAG_HAS_MSIX) {
1433		int vector = 0, i;
1434
1435		free_irq(adapter->msix_entries[vector++].vector, adapter);
1436
1437		for (i = 0; i < adapter->num_q_vectors; i++)
1438			free_irq(adapter->msix_entries[vector++].vector,
1439				 adapter->q_vector[i]);
1440	} else {
1441		free_irq(adapter->pdev->irq, adapter);
1442	}
1443}
1444
1445/**
1446 *  igb_irq_disable - Mask off interrupt generation on the NIC
1447 *  @adapter: board private structure
1448 **/
1449static void igb_irq_disable(struct igb_adapter *adapter)
1450{
1451	struct e1000_hw *hw = &adapter->hw;
1452
1453	/* we need to be careful when disabling interrupts.  The VFs are also
1454	 * mapped into these registers and so clearing the bits can cause
1455	 * issues on the VF drivers so we only need to clear what we set
1456	 */
1457	if (adapter->flags & IGB_FLAG_HAS_MSIX) {
1458		u32 regval = rd32(E1000_EIAM);
1459		wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
1460		wr32(E1000_EIMC, adapter->eims_enable_mask);
1461		regval = rd32(E1000_EIAC);
1462		wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
1463	}
1464
1465	wr32(E1000_IAM, 0);
1466	wr32(E1000_IMC, ~0);
1467	wrfl();
1468	if (adapter->flags & IGB_FLAG_HAS_MSIX) {
1469		int i;
1470		for (i = 0; i < adapter->num_q_vectors; i++)
1471			synchronize_irq(adapter->msix_entries[i].vector);
1472	} else {
1473		synchronize_irq(adapter->pdev->irq);
1474	}
1475}
1476
1477/**
1478 *  igb_irq_enable - Enable default interrupt generation settings
1479 *  @adapter: board private structure
1480 **/
1481static void igb_irq_enable(struct igb_adapter *adapter)
1482{
1483	struct e1000_hw *hw = &adapter->hw;
1484
1485	if (adapter->flags & IGB_FLAG_HAS_MSIX) {
1486		u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC | E1000_IMS_DRSTA;
1487		u32 regval = rd32(E1000_EIAC);
1488		wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
1489		regval = rd32(E1000_EIAM);
1490		wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
1491		wr32(E1000_EIMS, adapter->eims_enable_mask);
1492		if (adapter->vfs_allocated_count) {
1493			wr32(E1000_MBVFIMR, 0xFF);
1494			ims |= E1000_IMS_VMMB;
1495		}
1496		wr32(E1000_IMS, ims);
1497	} else {
1498		wr32(E1000_IMS, IMS_ENABLE_MASK |
1499				E1000_IMS_DRSTA);
1500		wr32(E1000_IAM, IMS_ENABLE_MASK |
1501				E1000_IMS_DRSTA);
1502	}
1503}
1504
1505static void igb_update_mng_vlan(struct igb_adapter *adapter)
1506{
1507	struct e1000_hw *hw = &adapter->hw;
1508	u16 vid = adapter->hw.mng_cookie.vlan_id;
1509	u16 old_vid = adapter->mng_vlan_id;
1510
1511	if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1512		/* add VID to filter table */
1513		igb_vfta_set(hw, vid, true);
1514		adapter->mng_vlan_id = vid;
1515	} else {
1516		adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1517	}
1518
1519	if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1520	    (vid != old_vid) &&
1521	    !test_bit(old_vid, adapter->active_vlans)) {
1522		/* remove VID from filter table */
1523		igb_vfta_set(hw, old_vid, false);
1524	}
1525}
1526
1527/**
1528 *  igb_release_hw_control - release control of the h/w to f/w
1529 *  @adapter: address of board private structure
1530 *
1531 *  igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1532 *  For ASF and Pass Through versions of f/w this means that the
1533 *  driver is no longer loaded.
1534 **/
1535static void igb_release_hw_control(struct igb_adapter *adapter)
1536{
1537	struct e1000_hw *hw = &adapter->hw;
1538	u32 ctrl_ext;
1539
1540	/* Let firmware take over control of h/w */
1541	ctrl_ext = rd32(E1000_CTRL_EXT);
1542	wr32(E1000_CTRL_EXT,
1543			ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1544}
1545
1546/**
1547 *  igb_get_hw_control - get control of the h/w from f/w
1548 *  @adapter: address of board private structure
1549 *
1550 *  igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1551 *  For ASF and Pass Through versions of f/w this means that
1552 *  the driver is loaded.
1553 **/
1554static void igb_get_hw_control(struct igb_adapter *adapter)
1555{
1556	struct e1000_hw *hw = &adapter->hw;
1557	u32 ctrl_ext;
1558
1559	/* Let firmware know the driver has taken over */
1560	ctrl_ext = rd32(E1000_CTRL_EXT);
1561	wr32(E1000_CTRL_EXT,
1562			ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1563}
1564
1565/**
1566 *  igb_configure - configure the hardware for RX and TX
1567 *  @adapter: private board structure
1568 **/
1569static void igb_configure(struct igb_adapter *adapter)
1570{
1571	struct net_device *netdev = adapter->netdev;
1572	int i;
1573
1574	igb_get_hw_control(adapter);
1575	igb_set_rx_mode(netdev);
1576
1577	igb_restore_vlan(adapter);
1578
1579	igb_setup_tctl(adapter);
1580	igb_setup_mrqc(adapter);
1581	igb_setup_rctl(adapter);
1582
1583	igb_configure_tx(adapter);
1584	igb_configure_rx(adapter);
1585
1586	igb_rx_fifo_flush_82575(&adapter->hw);
1587
1588	/* call igb_desc_unused which always leaves
1589	 * at least 1 descriptor unused to make sure
1590	 * next_to_use != next_to_clean
1591	 */
1592	for (i = 0; i < adapter->num_rx_queues; i++) {
1593		struct igb_ring *ring = adapter->rx_ring[i];
1594		igb_alloc_rx_buffers(ring, igb_desc_unused(ring));
1595	}
1596}
1597
1598/**
1599 *  igb_power_up_link - Power up the phy/serdes link
1600 *  @adapter: address of board private structure
1601 **/
1602void igb_power_up_link(struct igb_adapter *adapter)
1603{
1604	igb_reset_phy(&adapter->hw);
1605
1606	if (adapter->hw.phy.media_type == e1000_media_type_copper)
1607		igb_power_up_phy_copper(&adapter->hw);
1608	else
1609		igb_power_up_serdes_link_82575(&adapter->hw);
1610}
1611
1612/**
1613 *  igb_power_down_link - Power down the phy/serdes link
1614 *  @adapter: address of board private structure
1615 */
1616static void igb_power_down_link(struct igb_adapter *adapter)
1617{
1618	if (adapter->hw.phy.media_type == e1000_media_type_copper)
1619		igb_power_down_phy_copper_82575(&adapter->hw);
1620	else
1621		igb_shutdown_serdes_link_82575(&adapter->hw);
1622}
1623
1624/**
1625 * Detect and switch function for Media Auto Sense
1626 * @adapter: address of the board private structure
1627 **/
1628static void igb_check_swap_media(struct igb_adapter *adapter)
1629{
1630	struct e1000_hw *hw = &adapter->hw;
1631	u32 ctrl_ext, connsw;
1632	bool swap_now = false;
1633
1634	ctrl_ext = rd32(E1000_CTRL_EXT);
1635	connsw = rd32(E1000_CONNSW);
1636
1637	/* need to live swap if current media is copper and we have fiber/serdes
1638	 * to go to.
1639	 */
1640
1641	if ((hw->phy.media_type == e1000_media_type_copper) &&
1642	    (!(connsw & E1000_CONNSW_AUTOSENSE_EN))) {
1643		swap_now = true;
1644	} else if (!(connsw & E1000_CONNSW_SERDESD)) {
1645		/* copper signal takes time to appear */
1646		if (adapter->copper_tries < 4) {
1647			adapter->copper_tries++;
1648			connsw |= E1000_CONNSW_AUTOSENSE_CONF;
1649			wr32(E1000_CONNSW, connsw);
1650			return;
1651		} else {
1652			adapter->copper_tries = 0;
1653			if ((connsw & E1000_CONNSW_PHYSD) &&
1654			    (!(connsw & E1000_CONNSW_PHY_PDN))) {
1655				swap_now = true;
1656				connsw &= ~E1000_CONNSW_AUTOSENSE_CONF;
1657				wr32(E1000_CONNSW, connsw);
1658			}
1659		}
1660	}
1661
1662	if (!swap_now)
1663		return;
1664
1665	switch (hw->phy.media_type) {
1666	case e1000_media_type_copper:
1667		netdev_info(adapter->netdev,
1668			"MAS: changing media to fiber/serdes\n");
1669		ctrl_ext |=
1670			E1000_CTRL_EXT_LINK_MODE_PCIE_SERDES;
1671		adapter->flags |= IGB_FLAG_MEDIA_RESET;
1672		adapter->copper_tries = 0;
1673		break;
1674	case e1000_media_type_internal_serdes:
1675	case e1000_media_type_fiber:
1676		netdev_info(adapter->netdev,
1677			"MAS: changing media to copper\n");
1678		ctrl_ext &=
1679			~E1000_CTRL_EXT_LINK_MODE_PCIE_SERDES;
1680		adapter->flags |= IGB_FLAG_MEDIA_RESET;
1681		break;
1682	default:
1683		/* shouldn't get here during regular operation */
1684		netdev_err(adapter->netdev,
1685			"AMS: Invalid media type found, returning\n");
1686		break;
1687	}
1688	wr32(E1000_CTRL_EXT, ctrl_ext);
1689}
1690
1691/**
1692 *  igb_up - Open the interface and prepare it to handle traffic
1693 *  @adapter: board private structure
1694 **/
1695int igb_up(struct igb_adapter *adapter)
1696{
1697	struct e1000_hw *hw = &adapter->hw;
1698	int i;
1699
1700	/* hardware has been reset, we need to reload some things */
1701	igb_configure(adapter);
1702
1703	clear_bit(__IGB_DOWN, &adapter->state);
1704
1705	for (i = 0; i < adapter->num_q_vectors; i++)
1706		napi_enable(&(adapter->q_vector[i]->napi));
1707
1708	if (adapter->flags & IGB_FLAG_HAS_MSIX)
1709		igb_configure_msix(adapter);
1710	else
1711		igb_assign_vector(adapter->q_vector[0], 0);
1712
1713	/* Clear any pending interrupts. */
1714	rd32(E1000_ICR);
1715	igb_irq_enable(adapter);
1716
1717	/* notify VFs that reset has been completed */
1718	if (adapter->vfs_allocated_count) {
1719		u32 reg_data = rd32(E1000_CTRL_EXT);
1720		reg_data |= E1000_CTRL_EXT_PFRSTD;
1721		wr32(E1000_CTRL_EXT, reg_data);
1722	}
1723
1724	netif_tx_start_all_queues(adapter->netdev);
1725
1726	/* start the watchdog. */
1727	hw->mac.get_link_status = 1;
1728	schedule_work(&adapter->watchdog_task);
1729
1730	return 0;
1731}
1732
1733void igb_down(struct igb_adapter *adapter)
1734{
1735	struct net_device *netdev = adapter->netdev;
1736	struct e1000_hw *hw = &adapter->hw;
1737	u32 tctl, rctl;
1738	int i;
1739
1740	/* signal that we're down so the interrupt handler does not
1741	 * reschedule our watchdog timer
1742	 */
1743	set_bit(__IGB_DOWN, &adapter->state);
1744
1745	/* disable receives in the hardware */
1746	rctl = rd32(E1000_RCTL);
1747	wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1748	/* flush and sleep below */
1749
1750	netif_tx_stop_all_queues(netdev);
1751
1752	/* disable transmits in the hardware */
1753	tctl = rd32(E1000_TCTL);
1754	tctl &= ~E1000_TCTL_EN;
1755	wr32(E1000_TCTL, tctl);
1756	/* flush both disables and wait for them to finish */
1757	wrfl();
1758	msleep(10);
1759
1760	igb_irq_disable(adapter);
1761
1762	adapter->flags &= ~IGB_FLAG_NEED_LINK_UPDATE;
1763
1764	for (i = 0; i < adapter->num_q_vectors; i++) {
1765		napi_synchronize(&(adapter->q_vector[i]->napi));
1766		napi_disable(&(adapter->q_vector[i]->napi));
1767	}
1768
1769
1770	del_timer_sync(&adapter->watchdog_timer);
1771	del_timer_sync(&adapter->phy_info_timer);
1772
1773	netif_carrier_off(netdev);
1774
1775	/* record the stats before reset*/
1776	spin_lock(&adapter->stats64_lock);
1777	igb_update_stats(adapter, &adapter->stats64);
1778	spin_unlock(&adapter->stats64_lock);
1779
1780	adapter->link_speed = 0;
1781	adapter->link_duplex = 0;
1782
1783	if (!pci_channel_offline(adapter->pdev))
1784		igb_reset(adapter);
1785	igb_clean_all_tx_rings(adapter);
1786	igb_clean_all_rx_rings(adapter);
1787#ifdef CONFIG_IGB_DCA
1788
1789	/* since we reset the hardware DCA settings were cleared */
1790	igb_setup_dca(adapter);
1791#endif
1792}
1793
1794void igb_reinit_locked(struct igb_adapter *adapter)
1795{
1796	WARN_ON(in_interrupt());
1797	while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1798		msleep(1);
1799	igb_down(adapter);
1800	igb_up(adapter);
1801	clear_bit(__IGB_RESETTING, &adapter->state);
1802}
1803
1804/** igb_enable_mas - Media Autosense re-enable after swap
1805 *
1806 * @adapter: adapter struct
1807 **/
1808static s32 igb_enable_mas(struct igb_adapter *adapter)
1809{
1810	struct e1000_hw *hw = &adapter->hw;
1811	u32 connsw;
1812	s32 ret_val = 0;
1813
1814	connsw = rd32(E1000_CONNSW);
1815	if (!(hw->phy.media_type == e1000_media_type_copper))
1816		return ret_val;
1817
1818	/* configure for SerDes media detect */
1819	if (!(connsw & E1000_CONNSW_SERDESD)) {
1820		connsw |= E1000_CONNSW_ENRGSRC;
1821		connsw |= E1000_CONNSW_AUTOSENSE_EN;
1822		wr32(E1000_CONNSW, connsw);
1823		wrfl();
1824	} else if (connsw & E1000_CONNSW_SERDESD) {
1825		/* already SerDes, no need to enable anything */
1826		return ret_val;
1827	} else {
1828		netdev_info(adapter->netdev,
1829			"MAS: Unable to configure feature, disabling..\n");
1830		adapter->flags &= ~IGB_FLAG_MAS_ENABLE;
1831	}
1832	return ret_val;
1833}
1834
1835void igb_reset(struct igb_adapter *adapter)
1836{
1837	struct pci_dev *pdev = adapter->pdev;
1838	struct e1000_hw *hw = &adapter->hw;
1839	struct e1000_mac_info *mac = &hw->mac;
1840	struct e1000_fc_info *fc = &hw->fc;
1841	u32 pba = 0, tx_space, min_tx_space, min_rx_space, hwm;
1842
1843	/* Repartition Pba for greater than 9k mtu
1844	 * To take effect CTRL.RST is required.
1845	 */
1846	switch (mac->type) {
1847	case e1000_i350:
1848	case e1000_i354:
1849	case e1000_82580:
1850		pba = rd32(E1000_RXPBS);
1851		pba = igb_rxpbs_adjust_82580(pba);
1852		break;
1853	case e1000_82576:
1854		pba = rd32(E1000_RXPBS);
1855		pba &= E1000_RXPBS_SIZE_MASK_82576;
1856		break;
1857	case e1000_82575:
1858	case e1000_i210:
1859	case e1000_i211:
1860	default:
1861		pba = E1000_PBA_34K;
1862		break;
1863	}
1864
1865	if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1866	    (mac->type < e1000_82576)) {
1867		/* adjust PBA for jumbo frames */
1868		wr32(E1000_PBA, pba);
1869
1870		/* To maintain wire speed transmits, the Tx FIFO should be
1871		 * large enough to accommodate two full transmit packets,
1872		 * rounded up to the next 1KB and expressed in KB.  Likewise,
1873		 * the Rx FIFO should be large enough to accommodate at least
1874		 * one full receive packet and is similarly rounded up and
1875		 * expressed in KB.
1876		 */
1877		pba = rd32(E1000_PBA);
1878		/* upper 16 bits has Tx packet buffer allocation size in KB */
1879		tx_space = pba >> 16;
1880		/* lower 16 bits has Rx packet buffer allocation size in KB */
1881		pba &= 0xffff;
1882		/* the Tx fifo also stores 16 bytes of information about the Tx
1883		 * but don't include ethernet FCS because hardware appends it
1884		 */
1885		min_tx_space = (adapter->max_frame_size +
1886				sizeof(union e1000_adv_tx_desc) -
1887				ETH_FCS_LEN) * 2;
1888		min_tx_space = ALIGN(min_tx_space, 1024);
1889		min_tx_space >>= 10;
1890		/* software strips receive CRC, so leave room for it */
1891		min_rx_space = adapter->max_frame_size;
1892		min_rx_space = ALIGN(min_rx_space, 1024);
1893		min_rx_space >>= 10;
1894
1895		/* If current Tx allocation is less than the min Tx FIFO size,
1896		 * and the min Tx FIFO size is less than the current Rx FIFO
1897		 * allocation, take space away from current Rx allocation
1898		 */
1899		if (tx_space < min_tx_space &&
1900		    ((min_tx_space - tx_space) < pba)) {
1901			pba = pba - (min_tx_space - tx_space);
1902
1903			/* if short on Rx space, Rx wins and must trump Tx
1904			 * adjustment
1905			 */
1906			if (pba < min_rx_space)
1907				pba = min_rx_space;
1908		}
1909		wr32(E1000_PBA, pba);
1910	}
1911
1912	/* flow control settings */
1913	/* The high water mark must be low enough to fit one full frame
1914	 * (or the size used for early receive) above it in the Rx FIFO.
1915	 * Set it to the lower of:
1916	 * - 90% of the Rx FIFO size, or
1917	 * - the full Rx FIFO size minus one full frame
1918	 */
1919	hwm = min(((pba << 10) * 9 / 10),
1920			((pba << 10) - 2 * adapter->max_frame_size));
1921
1922	fc->high_water = hwm & 0xFFFFFFF0;	/* 16-byte granularity */
1923	fc->low_water = fc->high_water - 16;
1924	fc->pause_time = 0xFFFF;
1925	fc->send_xon = 1;
1926	fc->current_mode = fc->requested_mode;
1927
1928	/* disable receive for all VFs and wait one second */
1929	if (adapter->vfs_allocated_count) {
1930		int i;
1931		for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1932			adapter->vf_data[i].flags &= IGB_VF_FLAG_PF_SET_MAC;
1933
1934		/* ping all the active vfs to let them know we are going down */
1935		igb_ping_all_vfs(adapter);
1936
1937		/* disable transmits and receives */
1938		wr32(E1000_VFRE, 0);
1939		wr32(E1000_VFTE, 0);
1940	}
1941
1942	/* Allow time for pending master requests to run */
1943	hw->mac.ops.reset_hw(hw);
1944	wr32(E1000_WUC, 0);
1945
1946	if (adapter->flags & IGB_FLAG_MEDIA_RESET) {
1947		/* need to resetup here after media swap */
1948		adapter->ei.get_invariants(hw);
1949		adapter->flags &= ~IGB_FLAG_MEDIA_RESET;
1950	}
1951	if (adapter->flags & IGB_FLAG_MAS_ENABLE) {
1952		if (igb_enable_mas(adapter))
1953			dev_err(&pdev->dev,
1954				"Error enabling Media Auto Sense\n");
1955	}
1956	if (hw->mac.ops.init_hw(hw))
1957		dev_err(&pdev->dev, "Hardware Error\n");
1958
1959	/* Flow control settings reset on hardware reset, so guarantee flow
1960	 * control is off when forcing speed.
1961	 */
1962	if (!hw->mac.autoneg)
1963		igb_force_mac_fc(hw);
1964
1965	igb_init_dmac(adapter, pba);
1966#ifdef CONFIG_IGB_HWMON
1967	/* Re-initialize the thermal sensor on i350 devices. */
1968	if (!test_bit(__IGB_DOWN, &adapter->state)) {
1969		if (mac->type == e1000_i350 && hw->bus.func == 0) {
1970			/* If present, re-initialize the external thermal sensor
1971			 * interface.
1972			 */
1973			if (adapter->ets)
1974				mac->ops.init_thermal_sensor_thresh(hw);
1975		}
1976	}
1977#endif
1978	if (!netif_running(adapter->netdev))
1979		igb_power_down_link(adapter);
1980
1981	igb_update_mng_vlan(adapter);
1982
1983	/* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1984	wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1985
1986	/* Re-enable PTP, where applicable. */
1987	igb_ptp_reset(adapter);
1988
1989	igb_get_phy_info(hw);
1990}
1991
1992static netdev_features_t igb_fix_features(struct net_device *netdev,
1993	netdev_features_t features)
1994{
1995	/* Since there is no support for separate Rx/Tx vlan accel
1996	 * enable/disable make sure Tx flag is always in same state as Rx.
1997	 */
1998	if (features & NETIF_F_HW_VLAN_CTAG_RX)
1999		features |= NETIF_F_HW_VLAN_CTAG_TX;
2000	else
2001		features &= ~NETIF_F_HW_VLAN_CTAG_TX;
2002
2003	return features;
2004}
2005
2006static int igb_set_features(struct net_device *netdev,
2007	netdev_features_t features)
2008{
2009	netdev_features_t changed = netdev->features ^ features;
2010	struct igb_adapter *adapter = netdev_priv(netdev);
2011
2012	if (changed & NETIF_F_HW_VLAN_CTAG_RX)
2013		igb_vlan_mode(netdev, features);
2014
2015	if (!(changed & NETIF_F_RXALL))
2016		return 0;
2017
2018	netdev->features = features;
2019
2020	if (netif_running(netdev))
2021		igb_reinit_locked(adapter);
2022	else
2023		igb_reset(adapter);
2024
2025	return 0;
2026}
2027
2028static const struct net_device_ops igb_netdev_ops = {
2029	.ndo_open		= igb_open,
2030	.ndo_stop		= igb_close,
2031	.ndo_start_xmit		= igb_xmit_frame,
2032	.ndo_get_stats64	= igb_get_stats64,
2033	.ndo_set_rx_mode	= igb_set_rx_mode,
2034	.ndo_set_mac_address	= igb_set_mac,
2035	.ndo_change_mtu		= igb_change_mtu,
2036	.ndo_do_ioctl		= igb_ioctl,
2037	.ndo_tx_timeout		= igb_tx_timeout,
2038	.ndo_validate_addr	= eth_validate_addr,
2039	.ndo_vlan_rx_add_vid	= igb_vlan_rx_add_vid,
2040	.ndo_vlan_rx_kill_vid	= igb_vlan_rx_kill_vid,
2041	.ndo_set_vf_mac		= igb_ndo_set_vf_mac,
2042	.ndo_set_vf_vlan	= igb_ndo_set_vf_vlan,
2043	.ndo_set_vf_tx_rate	= igb_ndo_set_vf_bw,
2044	.ndo_set_vf_spoofchk	= igb_ndo_set_vf_spoofchk,
2045	.ndo_get_vf_config	= igb_ndo_get_vf_config,
2046#ifdef CONFIG_NET_POLL_CONTROLLER
2047	.ndo_poll_controller	= igb_netpoll,
2048#endif
2049	.ndo_fix_features	= igb_fix_features,
2050	.ndo_set_features	= igb_set_features,
2051};
2052
2053/**
2054 * igb_set_fw_version - Configure version string for ethtool
2055 * @adapter: adapter struct
2056 **/
2057void igb_set_fw_version(struct igb_adapter *adapter)
2058{
2059	struct e1000_hw *hw = &adapter->hw;
2060	struct e1000_fw_version fw;
2061
2062	igb_get_fw_version(hw, &fw);
2063
2064	switch (hw->mac.type) {
2065	case e1000_i210:
2066	case e1000_i211:
2067		if (!(igb_get_flash_presence_i210(hw))) {
2068			snprintf(adapter->fw_version,
2069				 sizeof(adapter->fw_version),
2070				 "%2d.%2d-%d",
2071				 fw.invm_major, fw.invm_minor,
2072				 fw.invm_img_type);
2073			break;
2074		}
2075		/* fall through */
2076	default:
2077		/* if option is rom valid, display its version too */
2078		if (fw.or_valid) {
2079			snprintf(adapter->fw_version,
2080				 sizeof(adapter->fw_version),
2081				 "%d.%d, 0x%08x, %d.%d.%d",
2082				 fw.eep_major, fw.eep_minor, fw.etrack_id,
2083				 fw.or_major, fw.or_build, fw.or_patch);
2084		/* no option rom */
2085		} else if (fw.etrack_id != 0X0000) {
2086			snprintf(adapter->fw_version,
2087			    sizeof(adapter->fw_version),
2088			    "%d.%d, 0x%08x",
2089			    fw.eep_major, fw.eep_minor, fw.etrack_id);
2090		} else {
2091		snprintf(adapter->fw_version,
2092		    sizeof(adapter->fw_version),
2093		    "%d.%d.%d",
2094		    fw.eep_major, fw.eep_minor, fw.eep_build);
2095		}
2096		break;
2097	}
2098	return;
2099}
2100
2101/**
2102 * igb_init_mas - init Media Autosense feature if enabled in the NVM
2103 *
2104 * @adapter: adapter struct
2105 **/
2106static void igb_init_mas(struct igb_adapter *adapter)
2107{
2108	struct e1000_hw *hw = &adapter->hw;
2109	u16 eeprom_data;
2110
2111	hw->nvm.ops.read(hw, NVM_COMPAT, 1, &eeprom_data);
2112	switch (hw->bus.func) {
2113	case E1000_FUNC_0:
2114		if (eeprom_data & IGB_MAS_ENABLE_0) {
2115			adapter->flags |= IGB_FLAG_MAS_ENABLE;
2116			netdev_info(adapter->netdev,
2117				"MAS: Enabling Media Autosense for port %d\n",
2118				hw->bus.func);
2119		}
2120		break;
2121	case E1000_FUNC_1:
2122		if (eeprom_data & IGB_MAS_ENABLE_1) {
2123			adapter->flags |= IGB_FLAG_MAS_ENABLE;
2124			netdev_info(adapter->netdev,
2125				"MAS: Enabling Media Autosense for port %d\n",
2126				hw->bus.func);
2127		}
2128		break;
2129	case E1000_FUNC_2:
2130		if (eeprom_data & IGB_MAS_ENABLE_2) {
2131			adapter->flags |= IGB_FLAG_MAS_ENABLE;
2132			netdev_info(adapter->netdev,
2133				"MAS: Enabling Media Autosense for port %d\n",
2134				hw->bus.func);
2135		}
2136		break;
2137	case E1000_FUNC_3:
2138		if (eeprom_data & IGB_MAS_ENABLE_3) {
2139			adapter->flags |= IGB_FLAG_MAS_ENABLE;
2140			netdev_info(adapter->netdev,
2141				"MAS: Enabling Media Autosense for port %d\n",
2142				hw->bus.func);
2143		}
2144		break;
2145	default:
2146		/* Shouldn't get here */
2147		netdev_err(adapter->netdev,
2148			"MAS: Invalid port configuration, returning\n");
2149		break;
2150	}
2151}
2152
2153/**
2154 *  igb_init_i2c - Init I2C interface
2155 *  @adapter: pointer to adapter structure
2156 **/
2157static s32 igb_init_i2c(struct igb_adapter *adapter)
2158{
2159	s32 status = E1000_SUCCESS;
2160
2161	/* I2C interface supported on i350 devices */
2162	if (adapter->hw.mac.type != e1000_i350)
2163		return E1000_SUCCESS;
2164
2165	/* Initialize the i2c bus which is controlled by the registers.
2166	 * This bus will use the i2c_algo_bit structue that implements
2167	 * the protocol through toggling of the 4 bits in the register.
2168	 */
2169	adapter->i2c_adap.owner = THIS_MODULE;
2170	adapter->i2c_algo = igb_i2c_algo;
2171	adapter->i2c_algo.data = adapter;
2172	adapter->i2c_adap.algo_data = &adapter->i2c_algo;
2173	adapter->i2c_adap.dev.parent = &adapter->pdev->dev;
2174	strlcpy(adapter->i2c_adap.name, "igb BB",
2175		sizeof(adapter->i2c_adap.name));
2176	status = i2c_bit_add_bus(&adapter->i2c_adap);
2177	return status;
2178}
2179
2180/**
2181 *  igb_probe - Device Initialization Routine
2182 *  @pdev: PCI device information struct
2183 *  @ent: entry in igb_pci_tbl
2184 *
2185 *  Returns 0 on success, negative on failure
2186 *
2187 *  igb_probe initializes an adapter identified by a pci_dev structure.
2188 *  The OS initialization, configuring of the adapter private structure,
2189 *  and a hardware reset occur.
2190 **/
2191static int igb_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
2192{
2193	struct net_device *netdev;
2194	struct igb_adapter *adapter;
2195	struct e1000_hw *hw;
2196	u16 eeprom_data = 0;
2197	s32 ret_val;
2198	static int global_quad_port_a; /* global quad port a indication */
2199	const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
2200	int err, pci_using_dac;
2201	u8 part_str[E1000_PBANUM_LENGTH];
2202
2203	/* Catch broken hardware that put the wrong VF device ID in
2204	 * the PCIe SR-IOV capability.
2205	 */
2206	if (pdev->is_virtfn) {
2207		WARN(1, KERN_ERR "%s (%hx:%hx) should not be a VF!\n",
2208			pci_name(pdev), pdev->vendor, pdev->device);
2209		return -EINVAL;
2210	}
2211
2212	err = pci_enable_device_mem(pdev);
2213	if (err)
2214		return err;
2215
2216	pci_using_dac = 0;
2217	err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
2218	if (!err) {
2219		pci_using_dac = 1;
2220	} else {
2221		err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
2222		if (err) {
2223			dev_err(&pdev->dev,
2224				"No usable DMA configuration, aborting\n");
2225			goto err_dma;
2226		}
2227	}
2228
2229	err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
2230					   IORESOURCE_MEM),
2231					   igb_driver_name);
2232	if (err)
2233		goto err_pci_reg;
2234
2235	pci_enable_pcie_error_reporting(pdev);
2236
2237	pci_set_master(pdev);
2238	pci_save_state(pdev);
2239
2240	err = -ENOMEM;
2241	netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
2242				   IGB_MAX_TX_QUEUES);
2243	if (!netdev)
2244		goto err_alloc_etherdev;
2245
2246	SET_NETDEV_DEV(netdev, &pdev->dev);
2247
2248	pci_set_drvdata(pdev, netdev);
2249	adapter = netdev_priv(netdev);
2250	adapter->netdev = netdev;
2251	adapter->pdev = pdev;
2252	hw = &adapter->hw;
2253	hw->back = adapter;
2254	adapter->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE);
2255
2256	err = -EIO;
2257	hw->hw_addr = pci_iomap(pdev, 0, 0);
2258	if (!hw->hw_addr)
2259		goto err_ioremap;
2260
2261	netdev->netdev_ops = &igb_netdev_ops;
2262	igb_set_ethtool_ops(netdev);
2263	netdev->watchdog_timeo = 5 * HZ;
2264
2265	strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
2266
2267	netdev->mem_start = pci_resource_start(pdev, 0);
2268	netdev->mem_end = pci_resource_end(pdev, 0);
2269
2270	/* PCI config space info */
2271	hw->vendor_id = pdev->vendor;
2272	hw->device_id = pdev->device;
2273	hw->revision_id = pdev->revision;
2274	hw->subsystem_vendor_id = pdev->subsystem_vendor;
2275	hw->subsystem_device_id = pdev->subsystem_device;
2276
2277	/* Copy the default MAC, PHY and NVM function pointers */
2278	memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
2279	memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
2280	memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
2281	/* Initialize skew-specific constants */
2282	err = ei->get_invariants(hw);
2283	if (err)
2284		goto err_sw_init;
2285
2286	/* setup the private structure */
2287	err = igb_sw_init(adapter);
2288	if (err)
2289		goto err_sw_init;
2290
2291	igb_get_bus_info_pcie(hw);
2292
2293	hw->phy.autoneg_wait_to_complete = false;
2294
2295	/* Copper options */
2296	if (hw->phy.media_type == e1000_media_type_copper) {
2297		hw->phy.mdix = AUTO_ALL_MODES;
2298		hw->phy.disable_polarity_correction = false;
2299		hw->phy.ms_type = e1000_ms_hw_default;
2300	}
2301
2302	if (igb_check_reset_block(hw))
2303		dev_info(&pdev->dev,
2304			"PHY reset is blocked due to SOL/IDER session.\n");
2305
2306	/* features is initialized to 0 in allocation, it might have bits
2307	 * set by igb_sw_init so we should use an or instead of an
2308	 * assignment.
2309	 */
2310	netdev->features |= NETIF_F_SG |
2311			    NETIF_F_IP_CSUM |
2312			    NETIF_F_IPV6_CSUM |
2313			    NETIF_F_TSO |
2314			    NETIF_F_TSO6 |
2315			    NETIF_F_RXHASH |
2316			    NETIF_F_RXCSUM |
2317			    NETIF_F_HW_VLAN_CTAG_RX |
2318			    NETIF_F_HW_VLAN_CTAG_TX;
2319
2320	/* copy netdev features into list of user selectable features */
2321	netdev->hw_features |= netdev->features;
2322	netdev->hw_features |= NETIF_F_RXALL;
2323
2324	/* set this bit last since it cannot be part of hw_features */
2325	netdev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
2326
2327	netdev->vlan_features |= NETIF_F_TSO |
2328				 NETIF_F_TSO6 |
2329				 NETIF_F_IP_CSUM |
2330				 NETIF_F_IPV6_CSUM |
2331				 NETIF_F_SG;
2332
2333	netdev->priv_flags |= IFF_SUPP_NOFCS;
2334
2335	if (pci_using_dac) {
2336		netdev->features |= NETIF_F_HIGHDMA;
2337		netdev->vlan_features |= NETIF_F_HIGHDMA;
2338	}
2339
2340	if (hw->mac.type >= e1000_82576) {
2341		netdev->hw_features |= NETIF_F_SCTP_CSUM;
2342		netdev->features |= NETIF_F_SCTP_CSUM;
2343	}
2344
2345	netdev->priv_flags |= IFF_UNICAST_FLT;
2346
2347	adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
2348
2349	/* before reading the NVM, reset the controller to put the device in a
2350	 * known good starting state
2351	 */
2352	hw->mac.ops.reset_hw(hw);
2353
2354	/* make sure the NVM is good , i211/i210 parts can have special NVM
2355	 * that doesn't contain a checksum
2356	 */
2357	switch (hw->mac.type) {
2358	case e1000_i210:
2359	case e1000_i211:
2360		if (igb_get_flash_presence_i210(hw)) {
2361			if (hw->nvm.ops.validate(hw) < 0) {
2362				dev_err(&pdev->dev,
2363					"The NVM Checksum Is Not Valid\n");
2364				err = -EIO;
2365				goto err_eeprom;
2366			}
2367		}
2368		break;
2369	default:
2370		if (hw->nvm.ops.validate(hw) < 0) {
2371			dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
2372			err = -EIO;
2373			goto err_eeprom;
2374		}
2375		break;
2376	}
2377
2378	/* copy the MAC address out of the NVM */
2379	if (hw->mac.ops.read_mac_addr(hw))
2380		dev_err(&pdev->dev, "NVM Read Error\n");
2381
2382	memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
2383
2384	if (!is_valid_ether_addr(netdev->dev_addr)) {
2385		dev_err(&pdev->dev, "Invalid MAC Address\n");
2386		err = -EIO;
2387		goto err_eeprom;
2388	}
2389
2390	/* get firmware version for ethtool -i */
2391	igb_set_fw_version(adapter);
2392
2393	setup_timer(&adapter->watchdog_timer, igb_watchdog,
2394		    (unsigned long) adapter);
2395	setup_timer(&adapter->phy_info_timer, igb_update_phy_info,
2396		    (unsigned long) adapter);
2397
2398	INIT_WORK(&adapter->reset_task, igb_reset_task);
2399	INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
2400
2401	/* Initialize link properties that are user-changeable */
2402	adapter->fc_autoneg = true;
2403	hw->mac.autoneg = true;
2404	hw->phy.autoneg_advertised = 0x2f;
2405
2406	hw->fc.requested_mode = e1000_fc_default;
2407	hw->fc.current_mode = e1000_fc_default;
2408
2409	igb_validate_mdi_setting(hw);
2410
2411	/* By default, support wake on port A */
2412	if (hw->bus.func == 0)
2413		adapter->flags |= IGB_FLAG_WOL_SUPPORTED;
2414
2415	/* Check the NVM for wake support on non-port A ports */
2416	if (hw->mac.type >= e1000_82580)
2417		hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
2418				 NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
2419				 &eeprom_data);
2420	else if (hw->bus.func == 1)
2421		hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
2422
2423	if (eeprom_data & IGB_EEPROM_APME)
2424		adapter->flags |= IGB_FLAG_WOL_SUPPORTED;
2425
2426	/* now that we have the eeprom settings, apply the special cases where
2427	 * the eeprom may be wrong or the board simply won't support wake on
2428	 * lan on a particular port
2429	 */
2430	switch (pdev->device) {
2431	case E1000_DEV_ID_82575GB_QUAD_COPPER:
2432		adapter->flags &= ~IGB_FLAG_WOL_SUPPORTED;
2433		break;
2434	case E1000_DEV_ID_82575EB_FIBER_SERDES:
2435	case E1000_DEV_ID_82576_FIBER:
2436	case E1000_DEV_ID_82576_SERDES:
2437		/* Wake events only supported on port A for dual fiber
2438		 * regardless of eeprom setting
2439		 */
2440		if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
2441			adapter->flags &= ~IGB_FLAG_WOL_SUPPORTED;
2442		break;
2443	case E1000_DEV_ID_82576_QUAD_COPPER:
2444	case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
2445		/* if quad port adapter, disable WoL on all but port A */
2446		if (global_quad_port_a != 0)
2447			adapter->flags &= ~IGB_FLAG_WOL_SUPPORTED;
2448		else
2449			adapter->flags |= IGB_FLAG_QUAD_PORT_A;
2450		/* Reset for multiple quad port adapters */
2451		if (++global_quad_port_a == 4)
2452			global_quad_port_a = 0;
2453		break;
2454	default:
2455		/* If the device can't wake, don't set software support */
2456		if (!device_can_wakeup(&adapter->pdev->dev))
2457			adapter->flags &= ~IGB_FLAG_WOL_SUPPORTED;
2458	}
2459
2460	/* initialize the wol settings based on the eeprom settings */
2461	if (adapter->flags & IGB_FLAG_WOL_SUPPORTED)
2462		adapter->wol |= E1000_WUFC_MAG;
2463
2464	/* Some vendors want WoL disabled by default, but still supported */
2465	if ((hw->mac.type == e1000_i350) &&
2466	    (pdev->subsystem_vendor == PCI_VENDOR_ID_HP)) {
2467		adapter->flags |= IGB_FLAG_WOL_SUPPORTED;
2468		adapter->wol = 0;
2469	}
2470
2471	device_set_wakeup_enable(&adapter->pdev->dev,
2472				 adapter->flags & IGB_FLAG_WOL_SUPPORTED);
2473
2474	/* reset the hardware with the new settings */
2475	igb_reset(adapter);
2476
2477	/* Init the I2C interface */
2478	err = igb_init_i2c(adapter);
2479	if (err) {
2480		dev_err(&pdev->dev, "failed to init i2c interface\n");
2481		goto err_eeprom;
2482	}
2483
2484	/* let the f/w know that the h/w is now under the control of the
2485	 * driver. */
2486	igb_get_hw_control(adapter);
2487
2488	strcpy(netdev->name, "eth%d");
2489	err = register_netdev(netdev);
2490	if (err)
2491		goto err_register;
2492
2493	/* carrier off reporting is important to ethtool even BEFORE open */
2494	netif_carrier_off(netdev);
2495
2496#ifdef CONFIG_IGB_DCA
2497	if (dca_add_requester(&pdev->dev) == 0) {
2498		adapter->flags |= IGB_FLAG_DCA_ENABLED;
2499		dev_info(&pdev->dev, "DCA enabled\n");
2500		igb_setup_dca(adapter);
2501	}
2502
2503#endif
2504#ifdef CONFIG_IGB_HWMON
2505	/* Initialize the thermal sensor on i350 devices. */
2506	if (hw->mac.type == e1000_i350 && hw->bus.func == 0) {
2507		u16 ets_word;
2508
2509		/* Read the NVM to determine if this i350 device supports an
2510		 * external thermal sensor.
2511		 */
2512		hw->nvm.ops.read(hw, NVM_ETS_CFG, 1, &ets_word);
2513		if (ets_word != 0x0000 && ets_word != 0xFFFF)
2514			adapter->ets = true;
2515		else
2516			adapter->ets = false;
2517		if (igb_sysfs_init(adapter))
2518			dev_err(&pdev->dev,
2519				"failed to allocate sysfs resources\n");
2520	} else {
2521		adapter->ets = false;
2522	}
2523#endif
2524	/* Check if Media Autosense is enabled */
2525	adapter->ei = *ei;
2526	if (hw->dev_spec._82575.mas_capable)
2527		igb_init_mas(adapter);
2528
2529	/* do hw tstamp init after resetting */
2530	igb_ptp_init(adapter);
2531
2532	dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
2533	/* print bus type/speed/width info, not applicable to i354 */
2534	if (hw->mac.type != e1000_i354) {
2535		dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
2536			 netdev->name,
2537			 ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
2538			  (hw->bus.speed == e1000_bus_speed_5000) ? "5.0Gb/s" :
2539			   "unknown"),
2540			 ((hw->bus.width == e1000_bus_width_pcie_x4) ?
2541			  "Width x4" :
2542			  (hw->bus.width == e1000_bus_width_pcie_x2) ?
2543			  "Width x2" :
2544			  (hw->bus.width == e1000_bus_width_pcie_x1) ?
2545			  "Width x1" : "unknown"), netdev->dev_addr);
2546	}
2547
2548	if ((hw->mac.type >= e1000_i210 ||
2549	     igb_get_flash_presence_i210(hw))) {
2550		ret_val = igb_read_part_string(hw, part_str,
2551					       E1000_PBANUM_LENGTH);
2552	} else {
2553		ret_val = -E1000_ERR_INVM_VALUE_NOT_FOUND;
2554	}
2555
2556	if (ret_val)
2557		strcpy(part_str, "Unknown");
2558	dev_info(&pdev->dev, "%s: PBA No: %s\n", netdev->name, part_str);
2559	dev_info(&pdev->dev,
2560		"Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
2561		(adapter->flags & IGB_FLAG_HAS_MSIX) ? "MSI-X" :
2562		(adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
2563		adapter->num_rx_queues, adapter->num_tx_queues);
2564	switch (hw->mac.type) {
2565	case e1000_i350:
2566	case e1000_i210:
2567	case e1000_i211:
2568		igb_set_eee_i350(hw);
2569		break;
2570	case e1000_i354:
2571		if (hw->phy.media_type == e1000_media_type_copper) {
2572			if ((rd32(E1000_CTRL_EXT) &
2573			    E1000_CTRL_EXT_LINK_MODE_SGMII))
2574				igb_set_eee_i354(hw);
2575		}
2576		break;
2577	default:
2578		break;
2579	}
2580
2581	pm_runtime_put_noidle(&pdev->dev);
2582	return 0;
2583
2584err_register:
2585	igb_release_hw_control(adapter);
2586	memset(&adapter->i2c_adap, 0, sizeof(adapter->i2c_adap));
2587err_eeprom:
2588	if (!igb_check_reset_block(hw))
2589		igb_reset_phy(hw);
2590
2591	if (hw->flash_address)
2592		iounmap(hw->flash_address);
2593err_sw_init:
2594	igb_clear_interrupt_scheme(adapter);
2595	iounmap(hw->hw_addr);
2596err_ioremap:
2597	free_netdev(netdev);
2598err_alloc_etherdev:
2599	pci_release_selected_regions(pdev,
2600				     pci_select_bars(pdev, IORESOURCE_MEM));
2601err_pci_reg:
2602err_dma:
2603	pci_disable_device(pdev);
2604	return err;
2605}
2606
2607#ifdef CONFIG_PCI_IOV
2608static int igb_disable_sriov(struct pci_dev *pdev)
2609{
2610	struct net_device *netdev = pci_get_drvdata(pdev);
2611	struct igb_adapter *adapter = netdev_priv(netdev);
2612	struct e1000_hw *hw = &adapter->hw;
2613
2614	/* reclaim resources allocated to VFs */
2615	if (adapter->vf_data) {
2616		/* disable iov and allow time for transactions to clear */
2617		if (pci_vfs_assigned(pdev)) {
2618			dev_warn(&pdev->dev,
2619				 "Cannot deallocate SR-IOV virtual functions while they are assigned - VFs will not be deallocated\n");
2620			return -EPERM;
2621		} else {
2622			pci_disable_sriov(pdev);
2623			msleep(500);
2624		}
2625
2626		kfree(adapter->vf_data);
2627		adapter->vf_data = NULL;
2628		adapter->vfs_allocated_count = 0;
2629		wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
2630		wrfl();
2631		msleep(100);
2632		dev_info(&pdev->dev, "IOV Disabled\n");
2633
2634		/* Re-enable DMA Coalescing flag since IOV is turned off */
2635		adapter->flags |= IGB_FLAG_DMAC;
2636	}
2637
2638	return 0;
2639}
2640
2641static int igb_enable_sriov(struct pci_dev *pdev, int num_vfs)
2642{
2643	struct net_device *netdev = pci_get_drvdata(pdev);
2644	struct igb_adapter *adapter = netdev_priv(netdev);
2645	int old_vfs = pci_num_vf(pdev);
2646	int err = 0;
2647	int i;
2648
2649	if (!(adapter->flags & IGB_FLAG_HAS_MSIX) || num_vfs > 7) {
2650		err = -EPERM;
2651		goto out;
2652	}
2653	if (!num_vfs)
2654		goto out;
2655
2656	if (old_vfs) {
2657		dev_info(&pdev->dev, "%d pre-allocated VFs found - override max_vfs setting of %d\n",
2658			 old_vfs, max_vfs);
2659		adapter->vfs_allocated_count = old_vfs;
2660	} else
2661		adapter->vfs_allocated_count = num_vfs;
2662
2663	adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
2664				sizeof(struct vf_data_storage), GFP_KERNEL);
2665
2666	/* if allocation failed then we do not support SR-IOV */
2667	if (!adapter->vf_data) {
2668		adapter->vfs_allocated_count = 0;
2669		dev_err(&pdev->dev,
2670			"Unable to allocate memory for VF Data Storage\n");
2671		err = -ENOMEM;
2672		goto out;
2673	}
2674
2675	/* only call pci_enable_sriov() if no VFs are allocated already */
2676	if (!old_vfs) {
2677		err = pci_enable_sriov(pdev, adapter->vfs_allocated_count);
2678		if (err)
2679			goto err_out;
2680	}
2681	dev_info(&pdev->dev, "%d VFs allocated\n",
2682		 adapter->vfs_allocated_count);
2683	for (i = 0; i < adapter->vfs_allocated_count; i++)
2684		igb_vf_configure(adapter, i);
2685
2686	/* DMA Coalescing is not supported in IOV mode. */
2687	adapter->flags &= ~IGB_FLAG_DMAC;
2688	goto out;
2689
2690err_out:
2691	kfree(adapter->vf_data);
2692	adapter->vf_data = NULL;
2693	adapter->vfs_allocated_count = 0;
2694out:
2695	return err;
2696}
2697
2698#endif
2699/**
2700 *  igb_remove_i2c - Cleanup  I2C interface
2701 *  @adapter: pointer to adapter structure
2702 **/
2703static void igb_remove_i2c(struct igb_adapter *adapter)
2704{
2705	/* free the adapter bus structure */
2706	i2c_del_adapter(&adapter->i2c_adap);
2707}
2708
2709/**
2710 *  igb_remove - Device Removal Routine
2711 *  @pdev: PCI device information struct
2712 *
2713 *  igb_remove is called by the PCI subsystem to alert the driver
2714 *  that it should release a PCI device.  The could be caused by a
2715 *  Hot-Plug event, or because the driver is going to be removed from
2716 *  memory.
2717 **/
2718static void igb_remove(struct pci_dev *pdev)
2719{
2720	struct net_device *netdev = pci_get_drvdata(pdev);
2721	struct igb_adapter *adapter = netdev_priv(netdev);
2722	struct e1000_hw *hw = &adapter->hw;
2723
2724	pm_runtime_get_noresume(&pdev->dev);
2725#ifdef CONFIG_IGB_HWMON
2726	igb_sysfs_exit(adapter);
2727#endif
2728	igb_remove_i2c(adapter);
2729	igb_ptp_stop(adapter);
2730	/* The watchdog timer may be rescheduled, so explicitly
2731	 * disable watchdog from being rescheduled.
2732	 */
2733	set_bit(__IGB_DOWN, &adapter->state);
2734	del_timer_sync(&adapter->watchdog_timer);
2735	del_timer_sync(&adapter->phy_info_timer);
2736
2737	cancel_work_sync(&adapter->reset_task);
2738	cancel_work_sync(&adapter->watchdog_task);
2739
2740#ifdef CONFIG_IGB_DCA
2741	if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
2742		dev_info(&pdev->dev, "DCA disabled\n");
2743		dca_remove_requester(&pdev->dev);
2744		adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
2745		wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
2746	}
2747#endif
2748
2749	/* Release control of h/w to f/w.  If f/w is AMT enabled, this
2750	 * would have already happened in close and is redundant.
2751	 */
2752	igb_release_hw_control(adapter);
2753
2754	unregister_netdev(netdev);
2755
2756	igb_clear_interrupt_scheme(adapter);
2757
2758#ifdef CONFIG_PCI_IOV
2759	igb_disable_sriov(pdev);
2760#endif
2761
2762	iounmap(hw->hw_addr);
2763	if (hw->flash_address)
2764		iounmap(hw->flash_address);
2765	pci_release_selected_regions(pdev,
2766				     pci_select_bars(pdev, IORESOURCE_MEM));
2767
2768	kfree(adapter->shadow_vfta);
2769	free_netdev(netdev);
2770
2771	pci_disable_pcie_error_reporting(pdev);
2772
2773	pci_disable_device(pdev);
2774}
2775
2776/**
2777 *  igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
2778 *  @adapter: board private structure to initialize
2779 *
2780 *  This function initializes the vf specific data storage and then attempts to
2781 *  allocate the VFs.  The reason for ordering it this way is because it is much
2782 *  mor expensive time wise to disable SR-IOV than it is to allocate and free
2783 *  the memory for the VFs.
2784 **/
2785static void igb_probe_vfs(struct igb_adapter *adapter)
2786{
2787#ifdef CONFIG_PCI_IOV
2788	struct pci_dev *pdev = adapter->pdev;
2789	struct e1000_hw *hw = &adapter->hw;
2790
2791	/* Virtualization features not supported on i210 family. */
2792	if ((hw->mac.type == e1000_i210) || (hw->mac.type == e1000_i211))
2793		return;
2794
2795	pci_sriov_set_totalvfs(pdev, 7);
2796	igb_pci_enable_sriov(pdev, max_vfs);
2797
2798#endif /* CONFIG_PCI_IOV */
2799}
2800
2801static void igb_init_queue_configuration(struct igb_adapter *adapter)
2802{
2803	struct e1000_hw *hw = &adapter->hw;
2804	u32 max_rss_queues;
2805
2806	/* Determine the maximum number of RSS queues supported. */
2807	switch (hw->mac.type) {
2808	case e1000_i211:
2809		max_rss_queues = IGB_MAX_RX_QUEUES_I211;
2810		break;
2811	case e1000_82575:
2812	case e1000_i210:
2813		max_rss_queues = IGB_MAX_RX_QUEUES_82575;
2814		break;
2815	case e1000_i350:
2816		/* I350 cannot do RSS and SR-IOV at the same time */
2817		if (!!adapter->vfs_allocated_count) {
2818			max_rss_queues = 1;
2819			break;
2820		}
2821		/* fall through */
2822	case e1000_82576:
2823		if (!!adapter->vfs_allocated_count) {
2824			max_rss_queues = 2;
2825			break;
2826		}
2827		/* fall through */
2828	case e1000_82580:
2829	case e1000_i354:
2830	default:
2831		max_rss_queues = IGB_MAX_RX_QUEUES;
2832		break;
2833	}
2834
2835	adapter->rss_queues = min_t(u32, max_rss_queues, num_online_cpus());
2836
2837	/* Determine if we need to pair queues. */
2838	switch (hw->mac.type) {
2839	case e1000_82575:
2840	case e1000_i211:
2841		/* Device supports enough interrupts without queue pairing. */
2842		break;
2843	case e1000_82576:
2844		/* If VFs are going to be allocated with RSS queues then we
2845		 * should pair the queues in order to conserve interrupts due
2846		 * to limited supply.
2847		 */
2848		if ((adapter->rss_queues > 1) &&
2849		    (adapter->vfs_allocated_count > 6))
2850			adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
2851		/* fall through */
2852	case e1000_82580:
2853	case e1000_i350:
2854	case e1000_i354:
2855	case e1000_i210:
2856	default:
2857		/* If rss_queues > half of max_rss_queues, pair the queues in
2858		 * order to conserve interrupts due to limited supply.
2859		 */
2860		if (adapter->rss_queues > (max_rss_queues / 2))
2861			adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
2862		break;
2863	}
2864}
2865
2866/**
2867 *  igb_sw_init - Initialize general software structures (struct igb_adapter)
2868 *  @adapter: board private structure to initialize
2869 *
2870 *  igb_sw_init initializes the Adapter private data structure.
2871 *  Fields are initialized based on PCI device information and
2872 *  OS network device settings (MTU size).
2873 **/
2874static int igb_sw_init(struct igb_adapter *adapter)
2875{
2876	struct e1000_hw *hw = &adapter->hw;
2877	struct net_device *netdev = adapter->netdev;
2878	struct pci_dev *pdev = adapter->pdev;
2879
2880	pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
2881
2882	/* set default ring sizes */
2883	adapter->tx_ring_count = IGB_DEFAULT_TXD;
2884	adapter->rx_ring_count = IGB_DEFAULT_RXD;
2885
2886	/* set default ITR values */
2887	adapter->rx_itr_setting = IGB_DEFAULT_ITR;
2888	adapter->tx_itr_setting = IGB_DEFAULT_ITR;
2889
2890	/* set default work limits */
2891	adapter->tx_work_limit = IGB_DEFAULT_TX_WORK;
2892
2893	adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN +
2894				  VLAN_HLEN;
2895	adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
2896
2897	spin_lock_init(&adapter->stats64_lock);
2898#ifdef CONFIG_PCI_IOV
2899	switch (hw->mac.type) {
2900	case e1000_82576:
2901	case e1000_i350:
2902		if (max_vfs > 7) {
2903			dev_warn(&pdev->dev,
2904				 "Maximum of 7 VFs per PF, using max\n");
2905			max_vfs = adapter->vfs_allocated_count = 7;
2906		} else
2907			adapter->vfs_allocated_count = max_vfs;
2908		if (adapter->vfs_allocated_count)
2909			dev_warn(&pdev->dev,
2910				 "Enabling SR-IOV VFs using the module parameter is deprecated - please use the pci sysfs interface.\n");
2911		break;
2912	default:
2913		break;
2914	}
2915#endif /* CONFIG_PCI_IOV */
2916
2917	igb_init_queue_configuration(adapter);
2918
2919	/* Setup and initialize a copy of the hw vlan table array */
2920	adapter->shadow_vfta = kcalloc(E1000_VLAN_FILTER_TBL_SIZE, sizeof(u32),
2921				       GFP_ATOMIC);
2922
2923	/* This call may decrease the number of queues */
2924	if (igb_init_interrupt_scheme(adapter, true)) {
2925		dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
2926		return -ENOMEM;
2927	}
2928
2929	igb_probe_vfs(adapter);
2930
2931	/* Explicitly disable IRQ since the NIC can be in any state. */
2932	igb_irq_disable(adapter);
2933
2934	if (hw->mac.type >= e1000_i350)
2935		adapter->flags &= ~IGB_FLAG_DMAC;
2936
2937	set_bit(__IGB_DOWN, &adapter->state);
2938	return 0;
2939}
2940
2941/**
2942 *  igb_open - Called when a network interface is made active
2943 *  @netdev: network interface device structure
2944 *
2945 *  Returns 0 on success, negative value on failure
2946 *
2947 *  The open entry point is called when a network interface is made
2948 *  active by the system (IFF_UP).  At this point all resources needed
2949 *  for transmit and receive operations are allocated, the interrupt
2950 *  handler is registered with the OS, the watchdog timer is started,
2951 *  and the stack is notified that the interface is ready.
2952 **/
2953static int __igb_open(struct net_device *netdev, bool resuming)
2954{
2955	struct igb_adapter *adapter = netdev_priv(netdev);
2956	struct e1000_hw *hw = &adapter->hw;
2957	struct pci_dev *pdev = adapter->pdev;
2958	int err;
2959	int i;
2960
2961	/* disallow open during test */
2962	if (test_bit(__IGB_TESTING, &adapter->state)) {
2963		WARN_ON(resuming);
2964		return -EBUSY;
2965	}
2966
2967	if (!resuming)
2968		pm_runtime_get_sync(&pdev->dev);
2969
2970	netif_carrier_off(netdev);
2971
2972	/* allocate transmit descriptors */
2973	err = igb_setup_all_tx_resources(adapter);
2974	if (err)
2975		goto err_setup_tx;
2976
2977	/* allocate receive descriptors */
2978	err = igb_setup_all_rx_resources(adapter);
2979	if (err)
2980		goto err_setup_rx;
2981
2982	igb_power_up_link(adapter);
2983
2984	/* before we allocate an interrupt, we must be ready to handle it.
2985	 * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2986	 * as soon as we call pci_request_irq, so we have to setup our
2987	 * clean_rx handler before we do so.
2988	 */
2989	igb_configure(adapter);
2990
2991	err = igb_request_irq(adapter);
2992	if (err)
2993		goto err_req_irq;
2994
2995	/* Notify the stack of the actual queue counts. */
2996	err = netif_set_real_num_tx_queues(adapter->netdev,
2997					   adapter->num_tx_queues);
2998	if (err)
2999		goto err_set_queues;
3000
3001	err = netif_set_real_num_rx_queues(adapter->netdev,
3002					   adapter->num_rx_queues);
3003	if (err)
3004		goto err_set_queues;
3005
3006	/* From here on the code is the same as igb_up() */
3007	clear_bit(__IGB_DOWN, &adapter->state);
3008
3009	for (i = 0; i < adapter->num_q_vectors; i++)
3010		napi_enable(&(adapter->q_vector[i]->napi));
3011
3012	/* Clear any pending interrupts. */
3013	rd32(E1000_ICR);
3014
3015	igb_irq_enable(adapter);
3016
3017	/* notify VFs that reset has been completed */
3018	if (adapter->vfs_allocated_count) {
3019		u32 reg_data = rd32(E1000_CTRL_EXT);
3020		reg_data |= E1000_CTRL_EXT_PFRSTD;
3021		wr32(E1000_CTRL_EXT, reg_data);
3022	}
3023
3024	netif_tx_start_all_queues(netdev);
3025
3026	if (!resuming)
3027		pm_runtime_put(&pdev->dev);
3028
3029	/* start the watchdog. */
3030	hw->mac.get_link_status = 1;
3031	schedule_work(&adapter->watchdog_task);
3032
3033	return 0;
3034
3035err_set_queues:
3036	igb_free_irq(adapter);
3037err_req_irq:
3038	igb_release_hw_control(adapter);
3039	igb_power_down_link(adapter);
3040	igb_free_all_rx_resources(adapter);
3041err_setup_rx:
3042	igb_free_all_tx_resources(adapter);
3043err_setup_tx:
3044	igb_reset(adapter);
3045	if (!resuming)
3046		pm_runtime_put(&pdev->dev);
3047
3048	return err;
3049}
3050
3051static int igb_open(struct net_device *netdev)
3052{
3053	return __igb_open(netdev, false);
3054}
3055
3056/**
3057 *  igb_close - Disables a network interface
3058 *  @netdev: network interface device structure
3059 *
3060 *  Returns 0, this is not allowed to fail
3061 *
3062 *  The close entry point is called when an interface is de-activated
3063 *  by the OS.  The hardware is still under the driver's control, but
3064 *  needs to be disabled.  A global MAC reset is issued to stop the
3065 *  hardware, and all transmit and receive resources are freed.
3066 **/
3067static int __igb_close(struct net_device *netdev, bool suspending)
3068{
3069	struct igb_adapter *adapter = netdev_priv(netdev);
3070	struct pci_dev *pdev = adapter->pdev;
3071
3072	WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
3073
3074	if (!suspending)
3075		pm_runtime_get_sync(&pdev->dev);
3076
3077	igb_down(adapter);
3078	igb_free_irq(adapter);
3079
3080	igb_free_all_tx_resources(adapter);
3081	igb_free_all_rx_resources(adapter);
3082
3083	if (!suspending)
3084		pm_runtime_put_sync(&pdev->dev);
3085	return 0;
3086}
3087
3088static int igb_close(struct net_device *netdev)
3089{
3090	return __igb_close(netdev, false);
3091}
3092
3093/**
3094 *  igb_setup_tx_resources - allocate Tx resources (Descriptors)
3095 *  @tx_ring: tx descriptor ring (for a specific queue) to setup
3096 *
3097 *  Return 0 on success, negative on failure
3098 **/
3099int igb_setup_tx_resources(struct igb_ring *tx_ring)
3100{
3101	struct device *dev = tx_ring->dev;
3102	int size;
3103
3104	size = sizeof(struct igb_tx_buffer) * tx_ring->count;
3105
3106	tx_ring->tx_buffer_info = vzalloc(size);
3107	if (!tx_ring->tx_buffer_info)
3108		goto err;
3109
3110	/* round up to nearest 4K */
3111	tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
3112	tx_ring->size = ALIGN(tx_ring->size, 4096);
3113
3114	tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size,
3115					   &tx_ring->dma, GFP_KERNEL);
3116	if (!tx_ring->desc)
3117		goto err;
3118
3119	tx_ring->next_to_use = 0;
3120	tx_ring->next_to_clean = 0;
3121
3122	return 0;
3123
3124err:
3125	vfree(tx_ring->tx_buffer_info);
3126	tx_ring->tx_buffer_info = NULL;
3127	dev_err(dev, "Unable to allocate memory for the Tx descriptor ring\n");
3128	return -ENOMEM;
3129}
3130
3131/**
3132 *  igb_setup_all_tx_resources - wrapper to allocate Tx resources
3133 *				 (Descriptors) for all queues
3134 *  @adapter: board private structure
3135 *
3136 *  Return 0 on success, negative on failure
3137 **/
3138static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
3139{
3140	struct pci_dev *pdev = adapter->pdev;
3141	int i, err = 0;
3142
3143	for (i = 0; i < adapter->num_tx_queues; i++) {
3144		err = igb_setup_tx_resources(adapter->tx_ring[i]);
3145		if (err) {
3146			dev_err(&pdev->dev,
3147				"Allocation for Tx Queue %u failed\n", i);
3148			for (i--; i >= 0; i--)
3149				igb_free_tx_resources(adapter->tx_ring[i]);
3150			break;
3151		}
3152	}
3153
3154	return err;
3155}
3156
3157/**
3158 *  igb_setup_tctl - configure the transmit control registers
3159 *  @adapter: Board private structure
3160 **/
3161void igb_setup_tctl(struct igb_adapter *adapter)
3162{
3163	struct e1000_hw *hw = &adapter->hw;
3164	u32 tctl;
3165
3166	/* disable queue 0 which is enabled by default on 82575 and 82576 */
3167	wr32(E1000_TXDCTL(0), 0);
3168
3169	/* Program the Transmit Control Register */
3170	tctl = rd32(E1000_TCTL);
3171	tctl &= ~E1000_TCTL_CT;
3172	tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
3173		(E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
3174
3175	igb_config_collision_dist(hw);
3176
3177	/* Enable transmits */
3178	tctl |= E1000_TCTL_EN;
3179
3180	wr32(E1000_TCTL, tctl);
3181}
3182
3183/**
3184 *  igb_configure_tx_ring - Configure transmit ring after Reset
3185 *  @adapter: board private structure
3186 *  @ring: tx ring to configure
3187 *
3188 *  Configure a transmit ring after a reset.
3189 **/
3190void igb_configure_tx_ring(struct igb_adapter *adapter,
3191                           struct igb_ring *ring)
3192{
3193	struct e1000_hw *hw = &adapter->hw;
3194	u32 txdctl = 0;
3195	u64 tdba = ring->dma;
3196	int reg_idx = ring->reg_idx;
3197
3198	/* disable the queue */
3199	wr32(E1000_TXDCTL(reg_idx), 0);
3200	wrfl();
3201	mdelay(10);
3202
3203	wr32(E1000_TDLEN(reg_idx),
3204	     ring->count * sizeof(union e1000_adv_tx_desc));
3205	wr32(E1000_TDBAL(reg_idx),
3206	     tdba & 0x00000000ffffffffULL);
3207	wr32(E1000_TDBAH(reg_idx), tdba >> 32);
3208
3209	ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
3210	wr32(E1000_TDH(reg_idx), 0);
3211	writel(0, ring->tail);
3212
3213	txdctl |= IGB_TX_PTHRESH;
3214	txdctl |= IGB_TX_HTHRESH << 8;
3215	txdctl |= IGB_TX_WTHRESH << 16;
3216
3217	txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
3218	wr32(E1000_TXDCTL(reg_idx), txdctl);
3219}
3220
3221/**
3222 *  igb_configure_tx - Configure transmit Unit after Reset
3223 *  @adapter: board private structure
3224 *
3225 *  Configure the Tx unit of the MAC after a reset.
3226 **/
3227static void igb_configure_tx(struct igb_adapter *adapter)
3228{
3229	int i;
3230
3231	for (i = 0; i < adapter->num_tx_queues; i++)
3232		igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
3233}
3234
3235/**
3236 *  igb_setup_rx_resources - allocate Rx resources (Descriptors)
3237 *  @rx_ring: Rx descriptor ring (for a specific queue) to setup
3238 *
3239 *  Returns 0 on success, negative on failure
3240 **/
3241int igb_setup_rx_resources(struct igb_ring *rx_ring)
3242{
3243	struct device *dev = rx_ring->dev;
3244	int size;
3245
3246	size = sizeof(struct igb_rx_buffer) * rx_ring->count;
3247
3248	rx_ring->rx_buffer_info = vzalloc(size);
3249	if (!rx_ring->rx_buffer_info)
3250		goto err;
3251
3252	/* Round up to nearest 4K */
3253	rx_ring->size = rx_ring->count * sizeof(union e1000_adv_rx_desc);
3254	rx_ring->size = ALIGN(rx_ring->size, 4096);
3255
3256	rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size,
3257					   &rx_ring->dma, GFP_KERNEL);
3258	if (!rx_ring->desc)
3259		goto err;
3260
3261	rx_ring->next_to_alloc = 0;
3262	rx_ring->next_to_clean = 0;
3263	rx_ring->next_to_use = 0;
3264
3265	return 0;
3266
3267err:
3268	vfree(rx_ring->rx_buffer_info);
3269	rx_ring->rx_buffer_info = NULL;
3270	dev_err(dev, "Unable to allocate memory for the Rx descriptor ring\n");
3271	return -ENOMEM;
3272}
3273
3274/**
3275 *  igb_setup_all_rx_resources - wrapper to allocate Rx resources
3276 *				 (Descriptors) for all queues
3277 *  @adapter: board private structure
3278 *
3279 *  Return 0 on success, negative on failure
3280 **/
3281static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
3282{
3283	struct pci_dev *pdev = adapter->pdev;
3284	int i, err = 0;
3285
3286	for (i = 0; i < adapter->num_rx_queues; i++) {
3287		err = igb_setup_rx_resources(adapter->rx_ring[i]);
3288		if (err) {
3289			dev_err(&pdev->dev,
3290				"Allocation for Rx Queue %u failed\n", i);
3291			for (i--; i >= 0; i--)
3292				igb_free_rx_resources(adapter->rx_ring[i]);
3293			break;
3294		}
3295	}
3296
3297	return err;
3298}
3299
3300/**
3301 *  igb_setup_mrqc - configure the multiple receive queue control registers
3302 *  @adapter: Board private structure
3303 **/
3304static void igb_setup_mrqc(struct igb_adapter *adapter)
3305{
3306	struct e1000_hw *hw = &adapter->hw;
3307	u32 mrqc, rxcsum;
3308	u32 j, num_rx_queues;
3309	static const u32 rsskey[10] = { 0xDA565A6D, 0xC20E5B25, 0x3D256741,
3310					0xB08FA343, 0xCB2BCAD0, 0xB4307BAE,
3311					0xA32DCB77, 0x0CF23080, 0x3BB7426A,
3312					0xFA01ACBE };
3313
3314	/* Fill out hash function seeds */
3315	for (j = 0; j < 10; j++)
3316		wr32(E1000_RSSRK(j), rsskey[j]);
3317
3318	num_rx_queues = adapter->rss_queues;
3319
3320	switch (hw->mac.type) {
3321	case e1000_82576:
3322		/* 82576 supports 2 RSS queues for SR-IOV */
3323		if (adapter->vfs_allocated_count)
3324			num_rx_queues = 2;
3325		break;
3326	default:
3327		break;
3328	}
3329
3330	if (adapter->rss_indir_tbl_init != num_rx_queues) {
3331		for (j = 0; j < IGB_RETA_SIZE; j++)
3332			adapter->rss_indir_tbl[j] = (j * num_rx_queues) / IGB_RETA_SIZE;
3333		adapter->rss_indir_tbl_init = num_rx_queues;
3334	}
3335	igb_write_rss_indir_tbl(adapter);
3336
3337	/* Disable raw packet checksumming so that RSS hash is placed in
3338	 * descriptor on writeback.  No need to enable TCP/UDP/IP checksum
3339	 * offloads as they are enabled by default
3340	 */
3341	rxcsum = rd32(E1000_RXCSUM);
3342	rxcsum |= E1000_RXCSUM_PCSD;
3343
3344	if (adapter->hw.mac.type >= e1000_82576)
3345		/* Enable Receive Checksum Offload for SCTP */
3346		rxcsum |= E1000_RXCSUM_CRCOFL;
3347
3348	/* Don't need to set TUOFL or IPOFL, they default to 1 */
3349	wr32(E1000_RXCSUM, rxcsum);
3350
3351	/* Generate RSS hash based on packet types, TCP/UDP
3352	 * port numbers and/or IPv4/v6 src and dst addresses
3353	 */
3354	mrqc = E1000_MRQC_RSS_FIELD_IPV4 |
3355	       E1000_MRQC_RSS_FIELD_IPV4_TCP |
3356	       E1000_MRQC_RSS_FIELD_IPV6 |
3357	       E1000_MRQC_RSS_FIELD_IPV6_TCP |
3358	       E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
3359
3360	if (adapter->flags & IGB_FLAG_RSS_FIELD_IPV4_UDP)
3361		mrqc |= E1000_MRQC_RSS_FIELD_IPV4_UDP;
3362	if (adapter->flags & IGB_FLAG_RSS_FIELD_IPV6_UDP)
3363		mrqc |= E1000_MRQC_RSS_FIELD_IPV6_UDP;
3364
3365	/* If VMDq is enabled then we set the appropriate mode for that, else
3366	 * we default to RSS so that an RSS hash is calculated per packet even
3367	 * if we are only using one queue
3368	 */
3369	if (adapter->vfs_allocated_count) {
3370		if (hw->mac.type > e1000_82575) {
3371			/* Set the default pool for the PF's first queue */
3372			u32 vtctl = rd32(E1000_VT_CTL);
3373			vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
3374				   E1000_VT_CTL_DISABLE_DEF_POOL);
3375			vtctl |= adapter->vfs_allocated_count <<
3376				E1000_VT_CTL_DEFAULT_POOL_SHIFT;
3377			wr32(E1000_VT_CTL, vtctl);
3378		}
3379		if (adapter->rss_queues > 1)
3380			mrqc |= E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
3381		else
3382			mrqc |= E1000_MRQC_ENABLE_VMDQ;
3383	} else {
3384		if (hw->mac.type != e1000_i211)
3385			mrqc |= E1000_MRQC_ENABLE_RSS_4Q;
3386	}
3387	igb_vmm_control(adapter);
3388
3389	wr32(E1000_MRQC, mrqc);
3390}
3391
3392/**
3393 *  igb_setup_rctl - configure the receive control registers
3394 *  @adapter: Board private structure
3395 **/
3396void igb_setup_rctl(struct igb_adapter *adapter)
3397{
3398	struct e1000_hw *hw = &adapter->hw;
3399	u32 rctl;
3400
3401	rctl = rd32(E1000_RCTL);
3402
3403	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
3404	rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
3405
3406	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
3407		(hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
3408
3409	/* enable stripping of CRC. It's unlikely this will break BMC
3410	 * redirection as it did with e1000. Newer features require
3411	 * that the HW strips the CRC.
3412	 */
3413	rctl |= E1000_RCTL_SECRC;
3414
3415	/* disable store bad packets and clear size bits. */
3416	rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
3417
3418	/* enable LPE to prevent packets larger than max_frame_size */
3419	rctl |= E1000_RCTL_LPE;
3420
3421	/* disable queue 0 to prevent tail write w/o re-config */
3422	wr32(E1000_RXDCTL(0), 0);
3423
3424	/* Attention!!!  For SR-IOV PF driver operations you must enable
3425	 * queue drop for all VF and PF queues to prevent head of line blocking
3426	 * if an un-trusted VF does not provide descriptors to hardware.
3427	 */
3428	if (adapter->vfs_allocated_count) {
3429		/* set all queue drop enable bits */
3430		wr32(E1000_QDE, ALL_QUEUES);
3431	}
3432
3433	/* This is useful for sniffing bad packets. */
3434	if (adapter->netdev->features & NETIF_F_RXALL) {
3435		/* UPE and MPE will be handled by normal PROMISC logic
3436		 * in e1000e_set_rx_mode
3437		 */
3438		rctl |= (E1000_RCTL_SBP | /* Receive bad packets */
3439			 E1000_RCTL_BAM | /* RX All Bcast Pkts */
3440			 E1000_RCTL_PMCF); /* RX All MAC Ctrl Pkts */
3441
3442		rctl &= ~(E1000_RCTL_VFE | /* Disable VLAN filter */
3443			  E1000_RCTL_DPF | /* Allow filtered pause */
3444			  E1000_RCTL_CFIEN); /* Dis VLAN CFIEN Filter */
3445		/* Do not mess with E1000_CTRL_VME, it affects transmit as well,
3446		 * and that breaks VLANs.
3447		 */
3448	}
3449
3450	wr32(E1000_RCTL, rctl);
3451}
3452
3453static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
3454                                   int vfn)
3455{
3456	struct e1000_hw *hw = &adapter->hw;
3457	u32 vmolr;
3458
3459	/* if it isn't the PF check to see if VFs are enabled and
3460	 * increase the size to support vlan tags
3461	 */
3462	if (vfn < adapter->vfs_allocated_count &&
3463	    adapter->vf_data[vfn].vlans_enabled)
3464		size += VLAN_TAG_SIZE;
3465
3466	vmolr = rd32(E1000_VMOLR(vfn));
3467	vmolr &= ~E1000_VMOLR_RLPML_MASK;
3468	vmolr |= size | E1000_VMOLR_LPE;
3469	wr32(E1000_VMOLR(vfn), vmolr);
3470
3471	return 0;
3472}
3473
3474/**
3475 *  igb_rlpml_set - set maximum receive packet size
3476 *  @adapter: board private structure
3477 *
3478 *  Configure maximum receivable packet size.
3479 **/
3480static void igb_rlpml_set(struct igb_adapter *adapter)
3481{
3482	u32 max_frame_size = adapter->max_frame_size;
3483	struct e1000_hw *hw = &adapter->hw;
3484	u16 pf_id = adapter->vfs_allocated_count;
3485
3486	if (pf_id) {
3487		igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
3488		/* If we're in VMDQ or SR-IOV mode, then set global RLPML
3489		 * to our max jumbo frame size, in case we need to enable
3490		 * jumbo frames on one of the rings later.
3491		 * This will not pass over-length frames into the default
3492		 * queue because it's gated by the VMOLR.RLPML.
3493		 */
3494		max_frame_size = MAX_JUMBO_FRAME_SIZE;
3495	}
3496
3497	wr32(E1000_RLPML, max_frame_size);
3498}
3499
3500static inline void igb_set_vmolr(struct igb_adapter *adapter,
3501				 int vfn, bool aupe)
3502{
3503	struct e1000_hw *hw = &adapter->hw;
3504	u32 vmolr;
3505
3506	/* This register exists only on 82576 and newer so if we are older then
3507	 * we should exit and do nothing
3508	 */
3509	if (hw->mac.type < e1000_82576)
3510		return;
3511
3512	vmolr = rd32(E1000_VMOLR(vfn));
3513	vmolr |= E1000_VMOLR_STRVLAN; /* Strip vlan tags */
3514	if (aupe)
3515		vmolr |= E1000_VMOLR_AUPE; /* Accept untagged packets */
3516	else
3517		vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
3518
3519	/* clear all bits that might not be set */
3520	vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
3521
3522	if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
3523		vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
3524	/* for VMDq only allow the VFs and pool 0 to accept broadcast and
3525	 * multicast packets
3526	 */
3527	if (vfn <= adapter->vfs_allocated_count)
3528		vmolr |= E1000_VMOLR_BAM; /* Accept broadcast */
3529
3530	wr32(E1000_VMOLR(vfn), vmolr);
3531}
3532
3533/**
3534 *  igb_configure_rx_ring - Configure a receive ring after Reset
3535 *  @adapter: board private structure
3536 *  @ring: receive ring to be configured
3537 *
3538 *  Configure the Rx unit of the MAC after a reset.
3539 **/
3540void igb_configure_rx_ring(struct igb_adapter *adapter,
3541			   struct igb_ring *ring)
3542{
3543	struct e1000_hw *hw = &adapter->hw;
3544	u64 rdba = ring->dma;
3545	int reg_idx = ring->reg_idx;
3546	u32 srrctl = 0, rxdctl = 0;
3547
3548	/* disable the queue */
3549	wr32(E1000_RXDCTL(reg_idx), 0);
3550
3551	/* Set DMA base address registers */
3552	wr32(E1000_RDBAL(reg_idx),
3553	     rdba & 0x00000000ffffffffULL);
3554	wr32(E1000_RDBAH(reg_idx), rdba >> 32);
3555	wr32(E1000_RDLEN(reg_idx),
3556	     ring->count * sizeof(union e1000_adv_rx_desc));
3557
3558	/* initialize head and tail */
3559	ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
3560	wr32(E1000_RDH(reg_idx), 0);
3561	writel(0, ring->tail);
3562
3563	/* set descriptor configuration */
3564	srrctl = IGB_RX_HDR_LEN << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
3565	srrctl |= IGB_RX_BUFSZ >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3566	srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
3567	if (hw->mac.type >= e1000_82580)
3568		srrctl |= E1000_SRRCTL_TIMESTAMP;
3569	/* Only set Drop Enable if we are supporting multiple queues */
3570	if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
3571		srrctl |= E1000_SRRCTL_DROP_EN;
3572
3573	wr32(E1000_SRRCTL(reg_idx), srrctl);
3574
3575	/* set filtering for VMDQ pools */
3576	igb_set_vmolr(adapter, reg_idx & 0x7, true);
3577
3578	rxdctl |= IGB_RX_PTHRESH;
3579	rxdctl |= IGB_RX_HTHRESH << 8;
3580	rxdctl |= IGB_RX_WTHRESH << 16;
3581
3582	/* enable receive descriptor fetching */
3583	rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
3584	wr32(E1000_RXDCTL(reg_idx), rxdctl);
3585}
3586
3587/**
3588 *  igb_configure_rx - Configure receive Unit after Reset
3589 *  @adapter: board private structure
3590 *
3591 *  Configure the Rx unit of the MAC after a reset.
3592 **/
3593static void igb_configure_rx(struct igb_adapter *adapter)
3594{
3595	int i;
3596
3597	/* set UTA to appropriate mode */
3598	igb_set_uta(adapter);
3599
3600	/* set the correct pool for the PF default MAC address in entry 0 */
3601	igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
3602			 adapter->vfs_allocated_count);
3603
3604	/* Setup the HW Rx Head and Tail Descriptor Pointers and
3605	 * the Base and Length of the Rx Descriptor Ring
3606	 */
3607	for (i = 0; i < adapter->num_rx_queues; i++)
3608		igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
3609}
3610
3611/**
3612 *  igb_free_tx_resources - Free Tx Resources per Queue
3613 *  @tx_ring: Tx descriptor ring for a specific queue
3614 *
3615 *  Free all transmit software resources
3616 **/
3617void igb_free_tx_resources(struct igb_ring *tx_ring)
3618{
3619	igb_clean_tx_ring(tx_ring);
3620
3621	vfree(tx_ring->tx_buffer_info);
3622	tx_ring->tx_buffer_info = NULL;
3623
3624	/* if not set, then don't free */
3625	if (!tx_ring->desc)
3626		return;
3627
3628	dma_free_coherent(tx_ring->dev, tx_ring->size,
3629			  tx_ring->desc, tx_ring->dma);
3630
3631	tx_ring->desc = NULL;
3632}
3633
3634/**
3635 *  igb_free_all_tx_resources - Free Tx Resources for All Queues
3636 *  @adapter: board private structure
3637 *
3638 *  Free all transmit software resources
3639 **/
3640static void igb_free_all_tx_resources(struct igb_adapter *adapter)
3641{
3642	int i;
3643
3644	for (i = 0; i < adapter->num_tx_queues; i++)
3645		igb_free_tx_resources(adapter->tx_ring[i]);
3646}
3647
3648void igb_unmap_and_free_tx_resource(struct igb_ring *ring,
3649				    struct igb_tx_buffer *tx_buffer)
3650{
3651	if (tx_buffer->skb) {
3652		dev_kfree_skb_any(tx_buffer->skb);
3653		if (dma_unmap_len(tx_buffer, len))
3654			dma_unmap_single(ring->dev,
3655					 dma_unmap_addr(tx_buffer, dma),
3656					 dma_unmap_len(tx_buffer, len),
3657					 DMA_TO_DEVICE);
3658	} else if (dma_unmap_len(tx_buffer, len)) {
3659		dma_unmap_page(ring->dev,
3660			       dma_unmap_addr(tx_buffer, dma),
3661			       dma_unmap_len(tx_buffer, len),
3662			       DMA_TO_DEVICE);
3663	}
3664	tx_buffer->next_to_watch = NULL;
3665	tx_buffer->skb = NULL;
3666	dma_unmap_len_set(tx_buffer, len, 0);
3667	/* buffer_info must be completely set up in the transmit path */
3668}
3669
3670/**
3671 *  igb_clean_tx_ring - Free Tx Buffers
3672 *  @tx_ring: ring to be cleaned
3673 **/
3674static void igb_clean_tx_ring(struct igb_ring *tx_ring)
3675{
3676	struct igb_tx_buffer *buffer_info;
3677	unsigned long size;
3678	u16 i;
3679
3680	if (!tx_ring->tx_buffer_info)
3681		return;
3682	/* Free all the Tx ring sk_buffs */
3683
3684	for (i = 0; i < tx_ring->count; i++) {
3685		buffer_info = &tx_ring->tx_buffer_info[i];
3686		igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3687	}
3688
3689	netdev_tx_reset_queue(txring_txq(tx_ring));
3690
3691	size = sizeof(struct igb_tx_buffer) * tx_ring->count;
3692	memset(tx_ring->tx_buffer_info, 0, size);
3693
3694	/* Zero out the descriptor ring */
3695	memset(tx_ring->desc, 0, tx_ring->size);
3696
3697	tx_ring->next_to_use = 0;
3698	tx_ring->next_to_clean = 0;
3699}
3700
3701/**
3702 *  igb_clean_all_tx_rings - Free Tx Buffers for all queues
3703 *  @adapter: board private structure
3704 **/
3705static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
3706{
3707	int i;
3708
3709	for (i = 0; i < adapter->num_tx_queues; i++)
3710		igb_clean_tx_ring(adapter->tx_ring[i]);
3711}
3712
3713/**
3714 *  igb_free_rx_resources - Free Rx Resources
3715 *  @rx_ring: ring to clean the resources from
3716 *
3717 *  Free all receive software resources
3718 **/
3719void igb_free_rx_resources(struct igb_ring *rx_ring)
3720{
3721	igb_clean_rx_ring(rx_ring);
3722
3723	vfree(rx_ring->rx_buffer_info);
3724	rx_ring->rx_buffer_info = NULL;
3725
3726	/* if not set, then don't free */
3727	if (!rx_ring->desc)
3728		return;
3729
3730	dma_free_coherent(rx_ring->dev, rx_ring->size,
3731			  rx_ring->desc, rx_ring->dma);
3732
3733	rx_ring->desc = NULL;
3734}
3735
3736/**
3737 *  igb_free_all_rx_resources - Free Rx Resources for All Queues
3738 *  @adapter: board private structure
3739 *
3740 *  Free all receive software resources
3741 **/
3742static void igb_free_all_rx_resources(struct igb_adapter *adapter)
3743{
3744	int i;
3745
3746	for (i = 0; i < adapter->num_rx_queues; i++)
3747		igb_free_rx_resources(adapter->rx_ring[i]);
3748}
3749
3750/**
3751 *  igb_clean_rx_ring - Free Rx Buffers per Queue
3752 *  @rx_ring: ring to free buffers from
3753 **/
3754static void igb_clean_rx_ring(struct igb_ring *rx_ring)
3755{
3756	unsigned long size;
3757	u16 i;
3758
3759	if (rx_ring->skb)
3760		dev_kfree_skb(rx_ring->skb);
3761	rx_ring->skb = NULL;
3762
3763	if (!rx_ring->rx_buffer_info)
3764		return;
3765
3766	/* Free all the Rx ring sk_buffs */
3767	for (i = 0; i < rx_ring->count; i++) {
3768		struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
3769
3770		if (!buffer_info->page)
3771			continue;
3772
3773		dma_unmap_page(rx_ring->dev,
3774			       buffer_info->dma,
3775			       PAGE_SIZE,
3776			       DMA_FROM_DEVICE);
3777		__free_page(buffer_info->page);
3778
3779		buffer_info->page = NULL;
3780	}
3781
3782	size = sizeof(struct igb_rx_buffer) * rx_ring->count;
3783	memset(rx_ring->rx_buffer_info, 0, size);
3784
3785	/* Zero out the descriptor ring */
3786	memset(rx_ring->desc, 0, rx_ring->size);
3787
3788	rx_ring->next_to_alloc = 0;
3789	rx_ring->next_to_clean = 0;
3790	rx_ring->next_to_use = 0;
3791}
3792
3793/**
3794 *  igb_clean_all_rx_rings - Free Rx Buffers for all queues
3795 *  @adapter: board private structure
3796 **/
3797static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
3798{
3799	int i;
3800
3801	for (i = 0; i < adapter->num_rx_queues; i++)
3802		igb_clean_rx_ring(adapter->rx_ring[i]);
3803}
3804
3805/**
3806 *  igb_set_mac - Change the Ethernet Address of the NIC
3807 *  @netdev: network interface device structure
3808 *  @p: pointer to an address structure
3809 *
3810 *  Returns 0 on success, negative on failure
3811 **/
3812static int igb_set_mac(struct net_device *netdev, void *p)
3813{
3814	struct igb_adapter *adapter = netdev_priv(netdev);
3815	struct e1000_hw *hw = &adapter->hw;
3816	struct sockaddr *addr = p;
3817
3818	if (!is_valid_ether_addr(addr->sa_data))
3819		return -EADDRNOTAVAIL;
3820
3821	memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
3822	memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
3823
3824	/* set the correct pool for the new PF MAC address in entry 0 */
3825	igb_rar_set_qsel(adapter, hw->mac.addr, 0,
3826			 adapter->vfs_allocated_count);
3827
3828	return 0;
3829}
3830
3831/**
3832 *  igb_write_mc_addr_list - write multicast addresses to MTA
3833 *  @netdev: network interface device structure
3834 *
3835 *  Writes multicast address list to the MTA hash table.
3836 *  Returns: -ENOMEM on failure
3837 *           0 on no addresses written
3838 *           X on writing X addresses to MTA
3839 **/
3840static int igb_write_mc_addr_list(struct net_device *netdev)
3841{
3842	struct igb_adapter *adapter = netdev_priv(netdev);
3843	struct e1000_hw *hw = &adapter->hw;
3844	struct netdev_hw_addr *ha;
3845	u8  *mta_list;
3846	int i;
3847
3848	if (netdev_mc_empty(netdev)) {
3849		/* nothing to program, so clear mc list */
3850		igb_update_mc_addr_list(hw, NULL, 0);
3851		igb_restore_vf_multicasts(adapter);
3852		return 0;
3853	}
3854
3855	mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
3856	if (!mta_list)
3857		return -ENOMEM;
3858
3859	/* The shared function expects a packed array of only addresses. */
3860	i = 0;
3861	netdev_for_each_mc_addr(ha, netdev)
3862		memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
3863
3864	igb_update_mc_addr_list(hw, mta_list, i);
3865	kfree(mta_list);
3866
3867	return netdev_mc_count(netdev);
3868}
3869
3870/**
3871 *  igb_write_uc_addr_list - write unicast addresses to RAR table
3872 *  @netdev: network interface device structure
3873 *
3874 *  Writes unicast address list to the RAR table.
3875 *  Returns: -ENOMEM on failure/insufficient address space
3876 *           0 on no addresses written
3877 *           X on writing X addresses to the RAR table
3878 **/
3879static int igb_write_uc_addr_list(struct net_device *netdev)
3880{
3881	struct igb_adapter *adapter = netdev_priv(netdev);
3882	struct e1000_hw *hw = &adapter->hw;
3883	unsigned int vfn = adapter->vfs_allocated_count;
3884	unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
3885	int count = 0;
3886
3887	/* return ENOMEM indicating insufficient memory for addresses */
3888	if (netdev_uc_count(netdev) > rar_entries)
3889		return -ENOMEM;
3890
3891	if (!netdev_uc_empty(netdev) && rar_entries) {
3892		struct netdev_hw_addr *ha;
3893
3894		netdev_for_each_uc_addr(ha, netdev) {
3895			if (!rar_entries)
3896				break;
3897			igb_rar_set_qsel(adapter, ha->addr,
3898					 rar_entries--,
3899					 vfn);
3900			count++;
3901		}
3902	}
3903	/* write the addresses in reverse order to avoid write combining */
3904	for (; rar_entries > 0 ; rar_entries--) {
3905		wr32(E1000_RAH(rar_entries), 0);
3906		wr32(E1000_RAL(rar_entries), 0);
3907	}
3908	wrfl();
3909
3910	return count;
3911}
3912
3913/**
3914 *  igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
3915 *  @netdev: network interface device structure
3916 *
3917 *  The set_rx_mode entry point is called whenever the unicast or multicast
3918 *  address lists or the network interface flags are updated.  This routine is
3919 *  responsible for configuring the hardware for proper unicast, multicast,
3920 *  promiscuous mode, and all-multi behavior.
3921 **/
3922static void igb_set_rx_mode(struct net_device *netdev)
3923{
3924	struct igb_adapter *adapter = netdev_priv(netdev);
3925	struct e1000_hw *hw = &adapter->hw;
3926	unsigned int vfn = adapter->vfs_allocated_count;
3927	u32 rctl, vmolr = 0;
3928	int count;
3929
3930	/* Check for Promiscuous and All Multicast modes */
3931	rctl = rd32(E1000_RCTL);
3932
3933	/* clear the effected bits */
3934	rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
3935
3936	if (netdev->flags & IFF_PROMISC) {
3937		/* retain VLAN HW filtering if in VT mode */
3938		if (adapter->vfs_allocated_count)
3939			rctl |= E1000_RCTL_VFE;
3940		rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
3941		vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
3942	} else {
3943		if (netdev->flags & IFF_ALLMULTI) {
3944			rctl |= E1000_RCTL_MPE;
3945			vmolr |= E1000_VMOLR_MPME;
3946		} else {
3947			/* Write addresses to the MTA, if the attempt fails
3948			 * then we should just turn on promiscuous mode so
3949			 * that we can at least receive multicast traffic
3950			 */
3951			count = igb_write_mc_addr_list(netdev);
3952			if (count < 0) {
3953				rctl |= E1000_RCTL_MPE;
3954				vmolr |= E1000_VMOLR_MPME;
3955			} else if (count) {
3956				vmolr |= E1000_VMOLR_ROMPE;
3957			}
3958		}
3959		/* Write addresses to available RAR registers, if there is not
3960		 * sufficient space to store all the addresses then enable
3961		 * unicast promiscuous mode
3962		 */
3963		count = igb_write_uc_addr_list(netdev);
3964		if (count < 0) {
3965			rctl |= E1000_RCTL_UPE;
3966			vmolr |= E1000_VMOLR_ROPE;
3967		}
3968		rctl |= E1000_RCTL_VFE;
3969	}
3970	wr32(E1000_RCTL, rctl);
3971
3972	/* In order to support SR-IOV and eventually VMDq it is necessary to set
3973	 * the VMOLR to enable the appropriate modes.  Without this workaround
3974	 * we will have issues with VLAN tag stripping not being done for frames
3975	 * that are only arriving because we are the default pool
3976	 */
3977	if ((hw->mac.type < e1000_82576) || (hw->mac.type > e1000_i350))
3978		return;
3979
3980	vmolr |= rd32(E1000_VMOLR(vfn)) &
3981		 ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
3982	wr32(E1000_VMOLR(vfn), vmolr);
3983	igb_restore_vf_multicasts(adapter);
3984}
3985
3986static void igb_check_wvbr(struct igb_adapter *adapter)
3987{
3988	struct e1000_hw *hw = &adapter->hw;
3989	u32 wvbr = 0;
3990
3991	switch (hw->mac.type) {
3992	case e1000_82576:
3993	case e1000_i350:
3994		if (!(wvbr = rd32(E1000_WVBR)))
3995			return;
3996		break;
3997	default:
3998		break;
3999	}
4000
4001	adapter->wvbr |= wvbr;
4002}
4003
4004#define IGB_STAGGERED_QUEUE_OFFSET 8
4005
4006static void igb_spoof_check(struct igb_adapter *adapter)
4007{
4008	int j;
4009
4010	if (!adapter->wvbr)
4011		return;
4012
4013	for(j = 0; j < adapter->vfs_allocated_count; j++) {
4014		if (adapter->wvbr & (1 << j) ||
4015		    adapter->wvbr & (1 << (j + IGB_STAGGERED_QUEUE_OFFSET))) {
4016			dev_warn(&adapter->pdev->dev,
4017				"Spoof event(s) detected on VF %d\n", j);
4018			adapter->wvbr &=
4019				~((1 << j) |
4020				  (1 << (j + IGB_STAGGERED_QUEUE_OFFSET)));
4021		}
4022	}
4023}
4024
4025/* Need to wait a few seconds after link up to get diagnostic information from
4026 * the phy
4027 */
4028static void igb_update_phy_info(unsigned long data)
4029{
4030	struct igb_adapter *adapter = (struct igb_adapter *) data;
4031	igb_get_phy_info(&adapter->hw);
4032}
4033
4034/**
4035 *  igb_has_link - check shared code for link and determine up/down
4036 *  @adapter: pointer to driver private info
4037 **/
4038bool igb_has_link(struct igb_adapter *adapter)
4039{
4040	struct e1000_hw *hw = &adapter->hw;
4041	bool link_active = false;
4042
4043	/* get_link_status is set on LSC (link status) interrupt or
4044	 * rx sequence error interrupt.  get_link_status will stay
4045	 * false until the e1000_check_for_link establishes link
4046	 * for copper adapters ONLY
4047	 */
4048	switch (hw->phy.media_type) {
4049	case e1000_media_type_copper:
4050		if (!hw->mac.get_link_status)
4051			return true;
4052	case e1000_media_type_internal_serdes:
4053		hw->mac.ops.check_for_link(hw);
4054		link_active = !hw->mac.get_link_status;
4055		break;
4056	default:
4057	case e1000_media_type_unknown:
4058		break;
4059	}
4060
4061	if (((hw->mac.type == e1000_i210) ||
4062	     (hw->mac.type == e1000_i211)) &&
4063	     (hw->phy.id == I210_I_PHY_ID)) {
4064		if (!netif_carrier_ok(adapter->netdev)) {
4065			adapter->flags &= ~IGB_FLAG_NEED_LINK_UPDATE;
4066		} else if (!(adapter->flags & IGB_FLAG_NEED_LINK_UPDATE)) {
4067			adapter->flags |= IGB_FLAG_NEED_LINK_UPDATE;
4068			adapter->link_check_timeout = jiffies;
4069		}
4070	}
4071
4072	return link_active;
4073}
4074
4075static bool igb_thermal_sensor_event(struct e1000_hw *hw, u32 event)
4076{
4077	bool ret = false;
4078	u32 ctrl_ext, thstat;
4079
4080	/* check for thermal sensor event on i350 copper only */
4081	if (hw->mac.type == e1000_i350) {
4082		thstat = rd32(E1000_THSTAT);
4083		ctrl_ext = rd32(E1000_CTRL_EXT);
4084
4085		if ((hw->phy.media_type == e1000_media_type_copper) &&
4086		    !(ctrl_ext & E1000_CTRL_EXT_LINK_MODE_SGMII))
4087			ret = !!(thstat & event);
4088	}
4089
4090	return ret;
4091}
4092
4093/**
4094 *  igb_watchdog - Timer Call-back
4095 *  @data: pointer to adapter cast into an unsigned long
4096 **/
4097static void igb_watchdog(unsigned long data)
4098{
4099	struct igb_adapter *adapter = (struct igb_adapter *)data;
4100	/* Do the rest outside of interrupt context */
4101	schedule_work(&adapter->watchdog_task);
4102}
4103
4104static void igb_watchdog_task(struct work_struct *work)
4105{
4106	struct igb_adapter *adapter = container_of(work,
4107						   struct igb_adapter,
4108						   watchdog_task);
4109	struct e1000_hw *hw = &adapter->hw;
4110	struct e1000_phy_info *phy = &hw->phy;
4111	struct net_device *netdev = adapter->netdev;
4112	u32 link;
4113	int i;
4114	u32 connsw;
4115
4116	link = igb_has_link(adapter);
4117
4118	if (adapter->flags & IGB_FLAG_NEED_LINK_UPDATE) {
4119		if (time_after(jiffies, (adapter->link_check_timeout + HZ)))
4120			adapter->flags &= ~IGB_FLAG_NEED_LINK_UPDATE;
4121		else
4122			link = false;
4123	}
4124
4125	/* Force link down if we have fiber to swap to */
4126	if (adapter->flags & IGB_FLAG_MAS_ENABLE) {
4127		if (hw->phy.media_type == e1000_media_type_copper) {
4128			connsw = rd32(E1000_CONNSW);
4129			if (!(connsw & E1000_CONNSW_AUTOSENSE_EN))
4130				link = 0;
4131		}
4132	}
4133	if (link) {
4134		/* Perform a reset if the media type changed. */
4135		if (hw->dev_spec._82575.media_changed) {
4136			hw->dev_spec._82575.media_changed = false;
4137			adapter->flags |= IGB_FLAG_MEDIA_RESET;
4138			igb_reset(adapter);
4139		}
4140		/* Cancel scheduled suspend requests. */
4141		pm_runtime_resume(netdev->dev.parent);
4142
4143		if (!netif_carrier_ok(netdev)) {
4144			u32 ctrl;
4145			hw->mac.ops.get_speed_and_duplex(hw,
4146							 &adapter->link_speed,
4147							 &adapter->link_duplex);
4148
4149			ctrl = rd32(E1000_CTRL);
4150			/* Links status message must follow this format */
4151			printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s "
4152			       "Duplex, Flow Control: %s\n",
4153			       netdev->name,
4154			       adapter->link_speed,
4155			       adapter->link_duplex == FULL_DUPLEX ?
4156			       "Full" : "Half",
4157			       (ctrl & E1000_CTRL_TFCE) &&
4158			       (ctrl & E1000_CTRL_RFCE) ? "RX/TX" :
4159			       (ctrl & E1000_CTRL_RFCE) ?  "RX" :
4160			       (ctrl & E1000_CTRL_TFCE) ?  "TX" : "None");
4161
4162			/* check if SmartSpeed worked */
4163			igb_check_downshift(hw);
4164			if (phy->speed_downgraded)
4165				netdev_warn(netdev, "Link Speed was downgraded by SmartSpeed\n");
4166
4167			/* check for thermal sensor event */
4168			if (igb_thermal_sensor_event(hw,
4169			    E1000_THSTAT_LINK_THROTTLE)) {
4170				netdev_info(netdev, "The network adapter link "
4171					    "speed was downshifted because it "
4172					    "overheated\n");
4173			}
4174
4175			/* adjust timeout factor according to speed/duplex */
4176			adapter->tx_timeout_factor = 1;
4177			switch (adapter->link_speed) {
4178			case SPEED_10:
4179				adapter->tx_timeout_factor = 14;
4180				break;
4181			case SPEED_100:
4182				/* maybe add some timeout factor ? */
4183				break;
4184			}
4185
4186			netif_carrier_on(netdev);
4187
4188			igb_ping_all_vfs(adapter);
4189			igb_check_vf_rate_limit(adapter);
4190
4191			/* link state has changed, schedule phy info update */
4192			if (!test_bit(__IGB_DOWN, &adapter->state))
4193				mod_timer(&adapter->phy_info_timer,
4194					  round_jiffies(jiffies + 2 * HZ));
4195		}
4196	} else {
4197		if (netif_carrier_ok(netdev)) {
4198			adapter->link_speed = 0;
4199			adapter->link_duplex = 0;
4200
4201			/* check for thermal sensor event */
4202			if (igb_thermal_sensor_event(hw,
4203			    E1000_THSTAT_PWR_DOWN)) {
4204				netdev_err(netdev, "The network adapter was "
4205					   "stopped because it overheated\n");
4206			}
4207
4208			/* Links status message must follow this format */
4209			printk(KERN_INFO "igb: %s NIC Link is Down\n",
4210			       netdev->name);
4211			netif_carrier_off(netdev);
4212
4213			igb_ping_all_vfs(adapter);
4214
4215			/* link state has changed, schedule phy info update */
4216			if (!test_bit(__IGB_DOWN, &adapter->state))
4217				mod_timer(&adapter->phy_info_timer,
4218					  round_jiffies(jiffies + 2 * HZ));
4219
4220			/* link is down, time to check for alternate media */
4221			if (adapter->flags & IGB_FLAG_MAS_ENABLE) {
4222				igb_check_swap_media(adapter);
4223				if (adapter->flags & IGB_FLAG_MEDIA_RESET) {
4224					schedule_work(&adapter->reset_task);
4225					/* return immediately */
4226					return;
4227				}
4228			}
4229			pm_schedule_suspend(netdev->dev.parent,
4230					    MSEC_PER_SEC * 5);
4231
4232		/* also check for alternate media here */
4233		} else if (!netif_carrier_ok(netdev) &&
4234			   (adapter->flags & IGB_FLAG_MAS_ENABLE)) {
4235			igb_check_swap_media(adapter);
4236			if (adapter->flags & IGB_FLAG_MEDIA_RESET) {
4237				schedule_work(&adapter->reset_task);
4238				/* return immediately */
4239				return;
4240			}
4241		}
4242	}
4243
4244	spin_lock(&adapter->stats64_lock);
4245	igb_update_stats(adapter, &adapter->stats64);
4246	spin_unlock(&adapter->stats64_lock);
4247
4248	for (i = 0; i < adapter->num_tx_queues; i++) {
4249		struct igb_ring *tx_ring = adapter->tx_ring[i];
4250		if (!netif_carrier_ok(netdev)) {
4251			/* We've lost link, so the controller stops DMA,
4252			 * but we've got queued Tx work that's never going
4253			 * to get done, so reset controller to flush Tx.
4254			 * (Do the reset outside of interrupt context).
4255			 */
4256			if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
4257				adapter->tx_timeout_count++;
4258				schedule_work(&adapter->reset_task);
4259				/* return immediately since reset is imminent */
4260				return;
4261			}
4262		}
4263
4264		/* Force detection of hung controller every watchdog period */
4265		set_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
4266	}
4267
4268	/* Cause software interrupt to ensure Rx ring is cleaned */
4269	if (adapter->flags & IGB_FLAG_HAS_MSIX) {
4270		u32 eics = 0;
4271		for (i = 0; i < adapter->num_q_vectors; i++)
4272			eics |= adapter->q_vector[i]->eims_value;
4273		wr32(E1000_EICS, eics);
4274	} else {
4275		wr32(E1000_ICS, E1000_ICS_RXDMT0);
4276	}
4277
4278	igb_spoof_check(adapter);
4279	igb_ptp_rx_hang(adapter);
4280
4281	/* Reset the timer */
4282	if (!test_bit(__IGB_DOWN, &adapter->state)) {
4283		if (adapter->flags & IGB_FLAG_NEED_LINK_UPDATE)
4284			mod_timer(&adapter->watchdog_timer,
4285				  round_jiffies(jiffies +  HZ));
4286		else
4287			mod_timer(&adapter->watchdog_timer,
4288				  round_jiffies(jiffies + 2 * HZ));
4289	}
4290}
4291
4292enum latency_range {
4293	lowest_latency = 0,
4294	low_latency = 1,
4295	bulk_latency = 2,
4296	latency_invalid = 255
4297};
4298
4299/**
4300 *  igb_update_ring_itr - update the dynamic ITR value based on packet size
4301 *  @q_vector: pointer to q_vector
4302 *
4303 *  Stores a new ITR value based on strictly on packet size.  This
4304 *  algorithm is less sophisticated than that used in igb_update_itr,
4305 *  due to the difficulty of synchronizing statistics across multiple
4306 *  receive rings.  The divisors and thresholds used by this function
4307 *  were determined based on theoretical maximum wire speed and testing
4308 *  data, in order to minimize response time while increasing bulk
4309 *  throughput.
4310 *  This functionality is controlled by the InterruptThrottleRate module
4311 *  parameter (see igb_param.c)
4312 *  NOTE:  This function is called only when operating in a multiqueue
4313 *         receive environment.
4314 **/
4315static void igb_update_ring_itr(struct igb_q_vector *q_vector)
4316{
4317	int new_val = q_vector->itr_val;
4318	int avg_wire_size = 0;
4319	struct igb_adapter *adapter = q_vector->adapter;
4320	unsigned int packets;
4321
4322	/* For non-gigabit speeds, just fix the interrupt rate at 4000
4323	 * ints/sec - ITR timer value of 120 ticks.
4324	 */
4325	if (adapter->link_speed != SPEED_1000) {
4326		new_val = IGB_4K_ITR;
4327		goto set_itr_val;
4328	}
4329
4330	packets = q_vector->rx.total_packets;
4331	if (packets)
4332		avg_wire_size = q_vector->rx.total_bytes / packets;
4333
4334	packets = q_vector->tx.total_packets;
4335	if (packets)
4336		avg_wire_size = max_t(u32, avg_wire_size,
4337				      q_vector->tx.total_bytes / packets);
4338
4339	/* if avg_wire_size isn't set no work was done */
4340	if (!avg_wire_size)
4341		goto clear_counts;
4342
4343	/* Add 24 bytes to size to account for CRC, preamble, and gap */
4344	avg_wire_size += 24;
4345
4346	/* Don't starve jumbo frames */
4347	avg_wire_size = min(avg_wire_size, 3000);
4348
4349	/* Give a little boost to mid-size frames */
4350	if ((avg_wire_size > 300) && (avg_wire_size < 1200))
4351		new_val = avg_wire_size / 3;
4352	else
4353		new_val = avg_wire_size / 2;
4354
4355	/* conservative mode (itr 3) eliminates the lowest_latency setting */
4356	if (new_val < IGB_20K_ITR &&
4357	    ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
4358	     (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
4359		new_val = IGB_20K_ITR;
4360
4361set_itr_val:
4362	if (new_val != q_vector->itr_val) {
4363		q_vector->itr_val = new_val;
4364		q_vector->set_itr = 1;
4365	}
4366clear_counts:
4367	q_vector->rx.total_bytes = 0;
4368	q_vector->rx.total_packets = 0;
4369	q_vector->tx.total_bytes = 0;
4370	q_vector->tx.total_packets = 0;
4371}
4372
4373/**
4374 *  igb_update_itr - update the dynamic ITR value based on statistics
4375 *  @q_vector: pointer to q_vector
4376 *  @ring_container: ring info to update the itr for
4377 *
4378 *  Stores a new ITR value based on packets and byte
4379 *  counts during the last interrupt.  The advantage of per interrupt
4380 *  computation is faster updates and more accurate ITR for the current
4381 *  traffic pattern.  Constants in this function were computed
4382 *  based on theoretical maximum wire speed and thresholds were set based
4383 *  on testing data as well as attempting to minimize response time
4384 *  while increasing bulk throughput.
4385 *  this functionality is controlled by the InterruptThrottleRate module
4386 *  parameter (see igb_param.c)
4387 *  NOTE:  These calculations are only valid when operating in a single-
4388 *         queue environment.
4389 **/
4390static void igb_update_itr(struct igb_q_vector *q_vector,
4391			   struct igb_ring_container *ring_container)
4392{
4393	unsigned int packets = ring_container->total_packets;
4394	unsigned int bytes = ring_container->total_bytes;
4395	u8 itrval = ring_container->itr;
4396
4397	/* no packets, exit with status unchanged */
4398	if (packets == 0)
4399		return;
4400
4401	switch (itrval) {
4402	case lowest_latency:
4403		/* handle TSO and jumbo frames */
4404		if (bytes/packets > 8000)
4405			itrval = bulk_latency;
4406		else if ((packets < 5) && (bytes > 512))
4407			itrval = low_latency;
4408		break;
4409	case low_latency:  /* 50 usec aka 20000 ints/s */
4410		if (bytes > 10000) {
4411			/* this if handles the TSO accounting */
4412			if (bytes/packets > 8000) {
4413				itrval = bulk_latency;
4414			} else if ((packets < 10) || ((bytes/packets) > 1200)) {
4415				itrval = bulk_latency;
4416			} else if ((packets > 35)) {
4417				itrval = lowest_latency;
4418			}
4419		} else if (bytes/packets > 2000) {
4420			itrval = bulk_latency;
4421		} else if (packets <= 2 && bytes < 512) {
4422			itrval = lowest_latency;
4423		}
4424		break;
4425	case bulk_latency: /* 250 usec aka 4000 ints/s */
4426		if (bytes > 25000) {
4427			if (packets > 35)
4428				itrval = low_latency;
4429		} else if (bytes < 1500) {
4430			itrval = low_latency;
4431		}
4432		break;
4433	}
4434
4435	/* clear work counters since we have the values we need */
4436	ring_container->total_bytes = 0;
4437	ring_container->total_packets = 0;
4438
4439	/* write updated itr to ring container */
4440	ring_container->itr = itrval;
4441}
4442
4443static void igb_set_itr(struct igb_q_vector *q_vector)
4444{
4445	struct igb_adapter *adapter = q_vector->adapter;
4446	u32 new_itr = q_vector->itr_val;
4447	u8 current_itr = 0;
4448
4449	/* for non-gigabit speeds, just fix the interrupt rate at 4000 */
4450	if (adapter->link_speed != SPEED_1000) {
4451		current_itr = 0;
4452		new_itr = IGB_4K_ITR;
4453		goto set_itr_now;
4454	}
4455
4456	igb_update_itr(q_vector, &q_vector->tx);
4457	igb_update_itr(q_vector, &q_vector->rx);
4458
4459	current_itr = max(q_vector->rx.itr, q_vector->tx.itr);
4460
4461	/* conservative mode (itr 3) eliminates the lowest_latency setting */
4462	if (current_itr == lowest_latency &&
4463	    ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
4464	     (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
4465		current_itr = low_latency;
4466
4467	switch (current_itr) {
4468	/* counts and packets in update_itr are dependent on these numbers */
4469	case lowest_latency:
4470		new_itr = IGB_70K_ITR; /* 70,000 ints/sec */
4471		break;
4472	case low_latency:
4473		new_itr = IGB_20K_ITR; /* 20,000 ints/sec */
4474		break;
4475	case bulk_latency:
4476		new_itr = IGB_4K_ITR;  /* 4,000 ints/sec */
4477		break;
4478	default:
4479		break;
4480	}
4481
4482set_itr_now:
4483	if (new_itr != q_vector->itr_val) {
4484		/* this attempts to bias the interrupt rate towards Bulk
4485		 * by adding intermediate steps when interrupt rate is
4486		 * increasing
4487		 */
4488		new_itr = new_itr > q_vector->itr_val ?
4489			  max((new_itr * q_vector->itr_val) /
4490			  (new_itr + (q_vector->itr_val >> 2)),
4491			  new_itr) : new_itr;
4492		/* Don't write the value here; it resets the adapter's
4493		 * internal timer, and causes us to delay far longer than
4494		 * we should between interrupts.  Instead, we write the ITR
4495		 * value at the beginning of the next interrupt so the timing
4496		 * ends up being correct.
4497		 */
4498		q_vector->itr_val = new_itr;
4499		q_vector->set_itr = 1;
4500	}
4501}
4502
4503static void igb_tx_ctxtdesc(struct igb_ring *tx_ring, u32 vlan_macip_lens,
4504			    u32 type_tucmd, u32 mss_l4len_idx)
4505{
4506	struct e1000_adv_tx_context_desc *context_desc;
4507	u16 i = tx_ring->next_to_use;
4508
4509	context_desc = IGB_TX_CTXTDESC(tx_ring, i);
4510
4511	i++;
4512	tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
4513
4514	/* set bits to identify this as an advanced context descriptor */
4515	type_tucmd |= E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
4516
4517	/* For 82575, context index must be unique per ring. */
4518	if (test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
4519		mss_l4len_idx |= tx_ring->reg_idx << 4;
4520
4521	context_desc->vlan_macip_lens	= cpu_to_le32(vlan_macip_lens);
4522	context_desc->seqnum_seed	= 0;
4523	context_desc->type_tucmd_mlhl	= cpu_to_le32(type_tucmd);
4524	context_desc->mss_l4len_idx	= cpu_to_le32(mss_l4len_idx);
4525}
4526
4527static int igb_tso(struct igb_ring *tx_ring,
4528		   struct igb_tx_buffer *first,
4529		   u8 *hdr_len)
4530{
4531	struct sk_buff *skb = first->skb;
4532	u32 vlan_macip_lens, type_tucmd;
4533	u32 mss_l4len_idx, l4len;
4534
4535	if (skb->ip_summed != CHECKSUM_PARTIAL)
4536		return 0;
4537
4538	if (!skb_is_gso(skb))
4539		return 0;
4540
4541	if (skb_header_cloned(skb)) {
4542		int err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
4543		if (err)
4544			return err;
4545	}
4546
4547	/* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
4548	type_tucmd = E1000_ADVTXD_TUCMD_L4T_TCP;
4549
4550	if (first->protocol == __constant_htons(ETH_P_IP)) {
4551		struct iphdr *iph = ip_hdr(skb);
4552		iph->tot_len = 0;
4553		iph->check = 0;
4554		tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
4555							 iph->daddr, 0,
4556							 IPPROTO_TCP,
4557							 0);
4558		type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
4559		first->tx_flags |= IGB_TX_FLAGS_TSO |
4560				   IGB_TX_FLAGS_CSUM |
4561				   IGB_TX_FLAGS_IPV4;
4562	} else if (skb_is_gso_v6(skb)) {
4563		ipv6_hdr(skb)->payload_len = 0;
4564		tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
4565						       &ipv6_hdr(skb)->daddr,
4566						       0, IPPROTO_TCP, 0);
4567		first->tx_flags |= IGB_TX_FLAGS_TSO |
4568				   IGB_TX_FLAGS_CSUM;
4569	}
4570
4571	/* compute header lengths */
4572	l4len = tcp_hdrlen(skb);
4573	*hdr_len = skb_transport_offset(skb) + l4len;
4574
4575	/* update gso size and bytecount with header size */
4576	first->gso_segs = skb_shinfo(skb)->gso_segs;
4577	first->bytecount += (first->gso_segs - 1) * *hdr_len;
4578
4579	/* MSS L4LEN IDX */
4580	mss_l4len_idx = l4len << E1000_ADVTXD_L4LEN_SHIFT;
4581	mss_l4len_idx |= skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT;
4582
4583	/* VLAN MACLEN IPLEN */
4584	vlan_macip_lens = skb_network_header_len(skb);
4585	vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
4586	vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
4587
4588	igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
4589
4590	return 1;
4591}
4592
4593static void igb_tx_csum(struct igb_ring *tx_ring, struct igb_tx_buffer *first)
4594{
4595	struct sk_buff *skb = first->skb;
4596	u32 vlan_macip_lens = 0;
4597	u32 mss_l4len_idx = 0;
4598	u32 type_tucmd = 0;
4599
4600	if (skb->ip_summed != CHECKSUM_PARTIAL) {
4601		if (!(first->tx_flags & IGB_TX_FLAGS_VLAN))
4602			return;
4603	} else {
4604		u8 l4_hdr = 0;
4605		switch (first->protocol) {
4606		case __constant_htons(ETH_P_IP):
4607			vlan_macip_lens |= skb_network_header_len(skb);
4608			type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
4609			l4_hdr = ip_hdr(skb)->protocol;
4610			break;
4611		case __constant_htons(ETH_P_IPV6):
4612			vlan_macip_lens |= skb_network_header_len(skb);
4613			l4_hdr = ipv6_hdr(skb)->nexthdr;
4614			break;
4615		default:
4616			if (unlikely(net_ratelimit())) {
4617				dev_warn(tx_ring->dev,
4618					 "partial checksum but proto=%x!\n",
4619					 first->protocol);
4620			}
4621			break;
4622		}
4623
4624		switch (l4_hdr) {
4625		case IPPROTO_TCP:
4626			type_tucmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
4627			mss_l4len_idx = tcp_hdrlen(skb) <<
4628					E1000_ADVTXD_L4LEN_SHIFT;
4629			break;
4630		case IPPROTO_SCTP:
4631			type_tucmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
4632			mss_l4len_idx = sizeof(struct sctphdr) <<
4633					E1000_ADVTXD_L4LEN_SHIFT;
4634			break;
4635		case IPPROTO_UDP:
4636			mss_l4len_idx = sizeof(struct udphdr) <<
4637					E1000_ADVTXD_L4LEN_SHIFT;
4638			break;
4639		default:
4640			if (unlikely(net_ratelimit())) {
4641				dev_warn(tx_ring->dev,
4642					 "partial checksum but l4 proto=%x!\n",
4643					 l4_hdr);
4644			}
4645			break;
4646		}
4647
4648		/* update TX checksum flag */
4649		first->tx_flags |= IGB_TX_FLAGS_CSUM;
4650	}
4651
4652	vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
4653	vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
4654
4655	igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
4656}
4657
4658#define IGB_SET_FLAG(_input, _flag, _result) \
4659	((_flag <= _result) ? \
4660	 ((u32)(_input & _flag) * (_result / _flag)) : \
4661	 ((u32)(_input & _flag) / (_flag / _result)))
4662
4663static u32 igb_tx_cmd_type(struct sk_buff *skb, u32 tx_flags)
4664{
4665	/* set type for advanced descriptor with frame checksum insertion */
4666	u32 cmd_type = E1000_ADVTXD_DTYP_DATA |
4667		       E1000_ADVTXD_DCMD_DEXT |
4668		       E1000_ADVTXD_DCMD_IFCS;
4669
4670	/* set HW vlan bit if vlan is present */
4671	cmd_type |= IGB_SET_FLAG(tx_flags, IGB_TX_FLAGS_VLAN,
4672				 (E1000_ADVTXD_DCMD_VLE));
4673
4674	/* set segmentation bits for TSO */
4675	cmd_type |= IGB_SET_FLAG(tx_flags, IGB_TX_FLAGS_TSO,
4676				 (E1000_ADVTXD_DCMD_TSE));
4677
4678	/* set timestamp bit if present */
4679	cmd_type |= IGB_SET_FLAG(tx_flags, IGB_TX_FLAGS_TSTAMP,
4680				 (E1000_ADVTXD_MAC_TSTAMP));
4681
4682	/* insert frame checksum */
4683	cmd_type ^= IGB_SET_FLAG(skb->no_fcs, 1, E1000_ADVTXD_DCMD_IFCS);
4684
4685	return cmd_type;
4686}
4687
4688static void igb_tx_olinfo_status(struct igb_ring *tx_ring,
4689				 union e1000_adv_tx_desc *tx_desc,
4690				 u32 tx_flags, unsigned int paylen)
4691{
4692	u32 olinfo_status = paylen << E1000_ADVTXD_PAYLEN_SHIFT;
4693
4694	/* 82575 requires a unique index per ring */
4695	if (test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
4696		olinfo_status |= tx_ring->reg_idx << 4;
4697
4698	/* insert L4 checksum */
4699	olinfo_status |= IGB_SET_FLAG(tx_flags,
4700				      IGB_TX_FLAGS_CSUM,
4701				      (E1000_TXD_POPTS_TXSM << 8));
4702
4703	/* insert IPv4 checksum */
4704	olinfo_status |= IGB_SET_FLAG(tx_flags,
4705				      IGB_TX_FLAGS_IPV4,
4706				      (E1000_TXD_POPTS_IXSM << 8));
4707
4708	tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
4709}
4710
4711static void igb_tx_map(struct igb_ring *tx_ring,
4712		       struct igb_tx_buffer *first,
4713		       const u8 hdr_len)
4714{
4715	struct sk_buff *skb = first->skb;
4716	struct igb_tx_buffer *tx_buffer;
4717	union e1000_adv_tx_desc *tx_desc;
4718	struct skb_frag_struct *frag;
4719	dma_addr_t dma;
4720	unsigned int data_len, size;
4721	u32 tx_flags = first->tx_flags;
4722	u32 cmd_type = igb_tx_cmd_type(skb, tx_flags);
4723	u16 i = tx_ring->next_to_use;
4724
4725	tx_desc = IGB_TX_DESC(tx_ring, i);
4726
4727	igb_tx_olinfo_status(tx_ring, tx_desc, tx_flags, skb->len - hdr_len);
4728
4729	size = skb_headlen(skb);
4730	data_len = skb->data_len;
4731
4732	dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
4733
4734	tx_buffer = first;
4735
4736	for (frag = &skb_shinfo(skb)->frags[0];; frag++) {
4737		if (dma_mapping_error(tx_ring->dev, dma))
4738			goto dma_error;
4739
4740		/* record length, and DMA address */
4741		dma_unmap_len_set(tx_buffer, len, size);
4742		dma_unmap_addr_set(tx_buffer, dma, dma);
4743
4744		tx_desc->read.buffer_addr = cpu_to_le64(dma);
4745
4746		while (unlikely(size > IGB_MAX_DATA_PER_TXD)) {
4747			tx_desc->read.cmd_type_len =
4748				cpu_to_le32(cmd_type ^ IGB_MAX_DATA_PER_TXD);
4749
4750			i++;
4751			tx_desc++;
4752			if (i == tx_ring->count) {
4753				tx_desc = IGB_TX_DESC(tx_ring, 0);
4754				i = 0;
4755			}
4756			tx_desc->read.olinfo_status = 0;
4757
4758			dma += IGB_MAX_DATA_PER_TXD;
4759			size -= IGB_MAX_DATA_PER_TXD;
4760
4761			tx_desc->read.buffer_addr = cpu_to_le64(dma);
4762		}
4763
4764		if (likely(!data_len))
4765			break;
4766
4767		tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type ^ size);
4768
4769		i++;
4770		tx_desc++;
4771		if (i == tx_ring->count) {
4772			tx_desc = IGB_TX_DESC(tx_ring, 0);
4773			i = 0;
4774		}
4775		tx_desc->read.olinfo_status = 0;
4776
4777		size = skb_frag_size(frag);
4778		data_len -= size;
4779
4780		dma = skb_frag_dma_map(tx_ring->dev, frag, 0,
4781				       size, DMA_TO_DEVICE);
4782
4783		tx_buffer = &tx_ring->tx_buffer_info[i];
4784	}
4785
4786	/* write last descriptor with RS and EOP bits */
4787	cmd_type |= size | IGB_TXD_DCMD;
4788	tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type);
4789
4790	netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount);
4791
4792	/* set the timestamp */
4793	first->time_stamp = jiffies;
4794
4795	/* Force memory writes to complete before letting h/w know there
4796	 * are new descriptors to fetch.  (Only applicable for weak-ordered
4797	 * memory model archs, such as IA-64).
4798	 *
4799	 * We also need this memory barrier to make certain all of the
4800	 * status bits have been updated before next_to_watch is written.
4801	 */
4802	wmb();
4803
4804	/* set next_to_watch value indicating a packet is present */
4805	first->next_to_watch = tx_desc;
4806
4807	i++;
4808	if (i == tx_ring->count)
4809		i = 0;
4810
4811	tx_ring->next_to_use = i;
4812
4813	writel(i, tx_ring->tail);
4814
4815	/* we need this if more than one processor can write to our tail
4816	 * at a time, it synchronizes IO on IA64/Altix systems
4817	 */
4818	mmiowb();
4819
4820	return;
4821
4822dma_error:
4823	dev_err(tx_ring->dev, "TX DMA map failed\n");
4824
4825	/* clear dma mappings for failed tx_buffer_info map */
4826	for (;;) {
4827		tx_buffer = &tx_ring->tx_buffer_info[i];
4828		igb_unmap_and_free_tx_resource(tx_ring, tx_buffer);
4829		if (tx_buffer == first)
4830			break;
4831		if (i == 0)
4832			i = tx_ring->count;
4833		i--;
4834	}
4835
4836	tx_ring->next_to_use = i;
4837}
4838
4839static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
4840{
4841	struct net_device *netdev = tx_ring->netdev;
4842
4843	netif_stop_subqueue(netdev, tx_ring->queue_index);
4844
4845	/* Herbert's original patch had:
4846	 *  smp_mb__after_netif_stop_queue();
4847	 * but since that doesn't exist yet, just open code it.
4848	 */
4849	smp_mb();
4850
4851	/* We need to check again in a case another CPU has just
4852	 * made room available.
4853	 */
4854	if (igb_desc_unused(tx_ring) < size)
4855		return -EBUSY;
4856
4857	/* A reprieve! */
4858	netif_wake_subqueue(netdev, tx_ring->queue_index);
4859
4860	u64_stats_update_begin(&tx_ring->tx_syncp2);
4861	tx_ring->tx_stats.restart_queue2++;
4862	u64_stats_update_end(&tx_ring->tx_syncp2);
4863
4864	return 0;
4865}
4866
4867static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
4868{
4869	if (igb_desc_unused(tx_ring) >= size)
4870		return 0;
4871	return __igb_maybe_stop_tx(tx_ring, size);
4872}
4873
4874netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb,
4875				struct igb_ring *tx_ring)
4876{
4877	struct igb_tx_buffer *first;
4878	int tso;
4879	u32 tx_flags = 0;
4880	u16 count = TXD_USE_COUNT(skb_headlen(skb));
4881	__be16 protocol = vlan_get_protocol(skb);
4882	u8 hdr_len = 0;
4883
4884	/* need: 1 descriptor per page * PAGE_SIZE/IGB_MAX_DATA_PER_TXD,
4885	 *       + 1 desc for skb_headlen/IGB_MAX_DATA_PER_TXD,
4886	 *       + 2 desc gap to keep tail from touching head,
4887	 *       + 1 desc for context descriptor,
4888	 * otherwise try next time
4889	 */
4890	if (NETDEV_FRAG_PAGE_MAX_SIZE > IGB_MAX_DATA_PER_TXD) {
4891		unsigned short f;
4892		for (f = 0; f < skb_shinfo(skb)->nr_frags; f++)
4893			count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size);
4894	} else {
4895		count += skb_shinfo(skb)->nr_frags;
4896	}
4897
4898	if (igb_maybe_stop_tx(tx_ring, count + 3)) {
4899		/* this is a hard error */
4900		return NETDEV_TX_BUSY;
4901	}
4902
4903	/* record the location of the first descriptor for this packet */
4904	first = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
4905	first->skb = skb;
4906	first->bytecount = skb->len;
4907	first->gso_segs = 1;
4908
4909	skb_tx_timestamp(skb);
4910
4911	if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
4912		struct igb_adapter *adapter = netdev_priv(tx_ring->netdev);
4913
4914		if (!(adapter->ptp_tx_skb)) {
4915			skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
4916			tx_flags |= IGB_TX_FLAGS_TSTAMP;
4917
4918			adapter->ptp_tx_skb = skb_get(skb);
4919			adapter->ptp_tx_start = jiffies;
4920			if (adapter->hw.mac.type == e1000_82576)
4921				schedule_work(&adapter->ptp_tx_work);
4922		}
4923	}
4924
4925	if (vlan_tx_tag_present(skb)) {
4926		tx_flags |= IGB_TX_FLAGS_VLAN;
4927		tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
4928	}
4929
4930	/* record initial flags and protocol */
4931	first->tx_flags = tx_flags;
4932	first->protocol = protocol;
4933
4934	tso = igb_tso(tx_ring, first, &hdr_len);
4935	if (tso < 0)
4936		goto out_drop;
4937	else if (!tso)
4938		igb_tx_csum(tx_ring, first);
4939
4940	igb_tx_map(tx_ring, first, hdr_len);
4941
4942	/* Make sure there is space in the ring for the next send. */
4943	igb_maybe_stop_tx(tx_ring, DESC_NEEDED);
4944
4945	return NETDEV_TX_OK;
4946
4947out_drop:
4948	igb_unmap_and_free_tx_resource(tx_ring, first);
4949
4950	return NETDEV_TX_OK;
4951}
4952
4953static inline struct igb_ring *igb_tx_queue_mapping(struct igb_adapter *adapter,
4954						    struct sk_buff *skb)
4955{
4956	unsigned int r_idx = skb->queue_mapping;
4957
4958	if (r_idx >= adapter->num_tx_queues)
4959		r_idx = r_idx % adapter->num_tx_queues;
4960
4961	return adapter->tx_ring[r_idx];
4962}
4963
4964static netdev_tx_t igb_xmit_frame(struct sk_buff *skb,
4965				  struct net_device *netdev)
4966{
4967	struct igb_adapter *adapter = netdev_priv(netdev);
4968
4969	if (test_bit(__IGB_DOWN, &adapter->state)) {
4970		dev_kfree_skb_any(skb);
4971		return NETDEV_TX_OK;
4972	}
4973
4974	if (skb->len <= 0) {
4975		dev_kfree_skb_any(skb);
4976		return NETDEV_TX_OK;
4977	}
4978
4979	/* The minimum packet size with TCTL.PSP set is 17 so pad the skb
4980	 * in order to meet this minimum size requirement.
4981	 */
4982	if (unlikely(skb->len < 17)) {
4983		if (skb_pad(skb, 17 - skb->len))
4984			return NETDEV_TX_OK;
4985		skb->len = 17;
4986		skb_set_tail_pointer(skb, 17);
4987	}
4988
4989	return igb_xmit_frame_ring(skb, igb_tx_queue_mapping(adapter, skb));
4990}
4991
4992/**
4993 *  igb_tx_timeout - Respond to a Tx Hang
4994 *  @netdev: network interface device structure
4995 **/
4996static void igb_tx_timeout(struct net_device *netdev)
4997{
4998	struct igb_adapter *adapter = netdev_priv(netdev);
4999	struct e1000_hw *hw = &adapter->hw;
5000
5001	/* Do the reset outside of interrupt context */
5002	adapter->tx_timeout_count++;
5003
5004	if (hw->mac.type >= e1000_82580)
5005		hw->dev_spec._82575.global_device_reset = true;
5006
5007	schedule_work(&adapter->reset_task);
5008	wr32(E1000_EICS,
5009	     (adapter->eims_enable_mask & ~adapter->eims_other));
5010}
5011
5012static void igb_reset_task(struct work_struct *work)
5013{
5014	struct igb_adapter *adapter;
5015	adapter = container_of(work, struct igb_adapter, reset_task);
5016
5017	igb_dump(adapter);
5018	netdev_err(adapter->netdev, "Reset adapter\n");
5019	igb_reinit_locked(adapter);
5020}
5021
5022/**
5023 *  igb_get_stats64 - Get System Network Statistics
5024 *  @netdev: network interface device structure
5025 *  @stats: rtnl_link_stats64 pointer
5026 **/
5027static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *netdev,
5028						struct rtnl_link_stats64 *stats)
5029{
5030	struct igb_adapter *adapter = netdev_priv(netdev);
5031
5032	spin_lock(&adapter->stats64_lock);
5033	igb_update_stats(adapter, &adapter->stats64);
5034	memcpy(stats, &adapter->stats64, sizeof(*stats));
5035	spin_unlock(&adapter->stats64_lock);
5036
5037	return stats;
5038}
5039
5040/**
5041 *  igb_change_mtu - Change the Maximum Transfer Unit
5042 *  @netdev: network interface device structure
5043 *  @new_mtu: new value for maximum frame size
5044 *
5045 *  Returns 0 on success, negative on failure
5046 **/
5047static int igb_change_mtu(struct net_device *netdev, int new_mtu)
5048{
5049	struct igb_adapter *adapter = netdev_priv(netdev);
5050	struct pci_dev *pdev = adapter->pdev;
5051	int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
5052
5053	if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
5054		dev_err(&pdev->dev, "Invalid MTU setting\n");
5055		return -EINVAL;
5056	}
5057
5058#define MAX_STD_JUMBO_FRAME_SIZE 9238
5059	if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
5060		dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
5061		return -EINVAL;
5062	}
5063
5064	/* adjust max frame to be at least the size of a standard frame */
5065	if (max_frame < (ETH_FRAME_LEN + ETH_FCS_LEN))
5066		max_frame = ETH_FRAME_LEN + ETH_FCS_LEN;
5067
5068	while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
5069		msleep(1);
5070
5071	/* igb_down has a dependency on max_frame_size */
5072	adapter->max_frame_size = max_frame;
5073
5074	if (netif_running(netdev))
5075		igb_down(adapter);
5076
5077	dev_info(&pdev->dev, "changing MTU from %d to %d\n",
5078		 netdev->mtu, new_mtu);
5079	netdev->mtu = new_mtu;
5080
5081	if (netif_running(netdev))
5082		igb_up(adapter);
5083	else
5084		igb_reset(adapter);
5085
5086	clear_bit(__IGB_RESETTING, &adapter->state);
5087
5088	return 0;
5089}
5090
5091/**
5092 *  igb_update_stats - Update the board statistics counters
5093 *  @adapter: board private structure
5094 **/
5095void igb_update_stats(struct igb_adapter *adapter,
5096		      struct rtnl_link_stats64 *net_stats)
5097{
5098	struct e1000_hw *hw = &adapter->hw;
5099	struct pci_dev *pdev = adapter->pdev;
5100	u32 reg, mpc;
5101	u16 phy_tmp;
5102	int i;
5103	u64 bytes, packets;
5104	unsigned int start;
5105	u64 _bytes, _packets;
5106
5107#define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
5108
5109	/* Prevent stats update while adapter is being reset, or if the pci
5110	 * connection is down.
5111	 */
5112	if (adapter->link_speed == 0)
5113		return;
5114	if (pci_channel_offline(pdev))
5115		return;
5116
5117	bytes = 0;
5118	packets = 0;
5119
5120	rcu_read_lock();
5121	for (i = 0; i < adapter->num_rx_queues; i++) {
5122		u32 rqdpc = rd32(E1000_RQDPC(i));
5123		struct igb_ring *ring = adapter->rx_ring[i];
5124
5125		if (rqdpc) {
5126			ring->rx_stats.drops += rqdpc;
5127			net_stats->rx_fifo_errors += rqdpc;
5128		}
5129
5130		do {
5131			start = u64_stats_fetch_begin_bh(&ring->rx_syncp);
5132			_bytes = ring->rx_stats.bytes;
5133			_packets = ring->rx_stats.packets;
5134		} while (u64_stats_fetch_retry_bh(&ring->rx_syncp, start));
5135		bytes += _bytes;
5136		packets += _packets;
5137	}
5138
5139	net_stats->rx_bytes = bytes;
5140	net_stats->rx_packets = packets;
5141
5142	bytes = 0;
5143	packets = 0;
5144	for (i = 0; i < adapter->num_tx_queues; i++) {
5145		struct igb_ring *ring = adapter->tx_ring[i];
5146		do {
5147			start = u64_stats_fetch_begin_bh(&ring->tx_syncp);
5148			_bytes = ring->tx_stats.bytes;
5149			_packets = ring->tx_stats.packets;
5150		} while (u64_stats_fetch_retry_bh(&ring->tx_syncp, start));
5151		bytes += _bytes;
5152		packets += _packets;
5153	}
5154	net_stats->tx_bytes = bytes;
5155	net_stats->tx_packets = packets;
5156	rcu_read_unlock();
5157
5158	/* read stats registers */
5159	adapter->stats.crcerrs += rd32(E1000_CRCERRS);
5160	adapter->stats.gprc += rd32(E1000_GPRC);
5161	adapter->stats.gorc += rd32(E1000_GORCL);
5162	rd32(E1000_GORCH); /* clear GORCL */
5163	adapter->stats.bprc += rd32(E1000_BPRC);
5164	adapter->stats.mprc += rd32(E1000_MPRC);
5165	adapter->stats.roc += rd32(E1000_ROC);
5166
5167	adapter->stats.prc64 += rd32(E1000_PRC64);
5168	adapter->stats.prc127 += rd32(E1000_PRC127);
5169	adapter->stats.prc255 += rd32(E1000_PRC255);
5170	adapter->stats.prc511 += rd32(E1000_PRC511);
5171	adapter->stats.prc1023 += rd32(E1000_PRC1023);
5172	adapter->stats.prc1522 += rd32(E1000_PRC1522);
5173	adapter->stats.symerrs += rd32(E1000_SYMERRS);
5174	adapter->stats.sec += rd32(E1000_SEC);
5175
5176	mpc = rd32(E1000_MPC);
5177	adapter->stats.mpc += mpc;
5178	net_stats->rx_fifo_errors += mpc;
5179	adapter->stats.scc += rd32(E1000_SCC);
5180	adapter->stats.ecol += rd32(E1000_ECOL);
5181	adapter->stats.mcc += rd32(E1000_MCC);
5182	adapter->stats.latecol += rd32(E1000_LATECOL);
5183	adapter->stats.dc += rd32(E1000_DC);
5184	adapter->stats.rlec += rd32(E1000_RLEC);
5185	adapter->stats.xonrxc += rd32(E1000_XONRXC);
5186	adapter->stats.xontxc += rd32(E1000_XONTXC);
5187	adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
5188	adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
5189	adapter->stats.fcruc += rd32(E1000_FCRUC);
5190	adapter->stats.gptc += rd32(E1000_GPTC);
5191	adapter->stats.gotc += rd32(E1000_GOTCL);
5192	rd32(E1000_GOTCH); /* clear GOTCL */
5193	adapter->stats.rnbc += rd32(E1000_RNBC);
5194	adapter->stats.ruc += rd32(E1000_RUC);
5195	adapter->stats.rfc += rd32(E1000_RFC);
5196	adapter->stats.rjc += rd32(E1000_RJC);
5197	adapter->stats.tor += rd32(E1000_TORH);
5198	adapter->stats.tot += rd32(E1000_TOTH);
5199	adapter->stats.tpr += rd32(E1000_TPR);
5200
5201	adapter->stats.ptc64 += rd32(E1000_PTC64);
5202	adapter->stats.ptc127 += rd32(E1000_PTC127);
5203	adapter->stats.ptc255 += rd32(E1000_PTC255);
5204	adapter->stats.ptc511 += rd32(E1000_PTC511);
5205	adapter->stats.ptc1023 += rd32(E1000_PTC1023);
5206	adapter->stats.ptc1522 += rd32(E1000_PTC1522);
5207
5208	adapter->stats.mptc += rd32(E1000_MPTC);
5209	adapter->stats.bptc += rd32(E1000_BPTC);
5210
5211	adapter->stats.tpt += rd32(E1000_TPT);
5212	adapter->stats.colc += rd32(E1000_COLC);
5213
5214	adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
5215	/* read internal phy specific stats */
5216	reg = rd32(E1000_CTRL_EXT);
5217	if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
5218		adapter->stats.rxerrc += rd32(E1000_RXERRC);
5219
5220		/* this stat has invalid values on i210/i211 */
5221		if ((hw->mac.type != e1000_i210) &&
5222		    (hw->mac.type != e1000_i211))
5223			adapter->stats.tncrs += rd32(E1000_TNCRS);
5224	}
5225
5226	adapter->stats.tsctc += rd32(E1000_TSCTC);
5227	adapter->stats.tsctfc += rd32(E1000_TSCTFC);
5228
5229	adapter->stats.iac += rd32(E1000_IAC);
5230	adapter->stats.icrxoc += rd32(E1000_ICRXOC);
5231	adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
5232	adapter->stats.icrxatc += rd32(E1000_ICRXATC);
5233	adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
5234	adapter->stats.ictxatc += rd32(E1000_ICTXATC);
5235	adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
5236	adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
5237	adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
5238
5239	/* Fill out the OS statistics structure */
5240	net_stats->multicast = adapter->stats.mprc;
5241	net_stats->collisions = adapter->stats.colc;
5242
5243	/* Rx Errors */
5244
5245	/* RLEC on some newer hardware can be incorrect so build
5246	 * our own version based on RUC and ROC
5247	 */
5248	net_stats->rx_errors = adapter->stats.rxerrc +
5249		adapter->stats.crcerrs + adapter->stats.algnerrc +
5250		adapter->stats.ruc + adapter->stats.roc +
5251		adapter->stats.cexterr;
5252	net_stats->rx_length_errors = adapter->stats.ruc +
5253				      adapter->stats.roc;
5254	net_stats->rx_crc_errors = adapter->stats.crcerrs;
5255	net_stats->rx_frame_errors = adapter->stats.algnerrc;
5256	net_stats->rx_missed_errors = adapter->stats.mpc;
5257
5258	/* Tx Errors */
5259	net_stats->tx_errors = adapter->stats.ecol +
5260			       adapter->stats.latecol;
5261	net_stats->tx_aborted_errors = adapter->stats.ecol;
5262	net_stats->tx_window_errors = adapter->stats.latecol;
5263	net_stats->tx_carrier_errors = adapter->stats.tncrs;
5264
5265	/* Tx Dropped needs to be maintained elsewhere */
5266
5267	/* Phy Stats */
5268	if (hw->phy.media_type == e1000_media_type_copper) {
5269		if ((adapter->link_speed == SPEED_1000) &&
5270		   (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
5271			phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
5272			adapter->phy_stats.idle_errors += phy_tmp;
5273		}
5274	}
5275
5276	/* Management Stats */
5277	adapter->stats.mgptc += rd32(E1000_MGTPTC);
5278	adapter->stats.mgprc += rd32(E1000_MGTPRC);
5279	adapter->stats.mgpdc += rd32(E1000_MGTPDC);
5280
5281	/* OS2BMC Stats */
5282	reg = rd32(E1000_MANC);
5283	if (reg & E1000_MANC_EN_BMC2OS) {
5284		adapter->stats.o2bgptc += rd32(E1000_O2BGPTC);
5285		adapter->stats.o2bspc += rd32(E1000_O2BSPC);
5286		adapter->stats.b2ospc += rd32(E1000_B2OSPC);
5287		adapter->stats.b2ogprc += rd32(E1000_B2OGPRC);
5288	}
5289}
5290
5291static irqreturn_t igb_msix_other(int irq, void *data)
5292{
5293	struct igb_adapter *adapter = data;
5294	struct e1000_hw *hw = &adapter->hw;
5295	u32 icr = rd32(E1000_ICR);
5296	/* reading ICR causes bit 31 of EICR to be cleared */
5297
5298	if (icr & E1000_ICR_DRSTA)
5299		schedule_work(&adapter->reset_task);
5300
5301	if (icr & E1000_ICR_DOUTSYNC) {
5302		/* HW is reporting DMA is out of sync */
5303		adapter->stats.doosync++;
5304		/* The DMA Out of Sync is also indication of a spoof event
5305		 * in IOV mode. Check the Wrong VM Behavior register to
5306		 * see if it is really a spoof event.
5307		 */
5308		igb_check_wvbr(adapter);
5309	}
5310
5311	/* Check for a mailbox event */
5312	if (icr & E1000_ICR_VMMB)
5313		igb_msg_task(adapter);
5314
5315	if (icr & E1000_ICR_LSC) {
5316		hw->mac.get_link_status = 1;
5317		/* guard against interrupt when we're going down */
5318		if (!test_bit(__IGB_DOWN, &adapter->state))
5319			mod_timer(&adapter->watchdog_timer, jiffies + 1);
5320	}
5321
5322	if (icr & E1000_ICR_TS) {
5323		u32 tsicr = rd32(E1000_TSICR);
5324
5325		if (tsicr & E1000_TSICR_TXTS) {
5326			/* acknowledge the interrupt */
5327			wr32(E1000_TSICR, E1000_TSICR_TXTS);
5328			/* retrieve hardware timestamp */
5329			schedule_work(&adapter->ptp_tx_work);
5330		}
5331	}
5332
5333	wr32(E1000_EIMS, adapter->eims_other);
5334
5335	return IRQ_HANDLED;
5336}
5337
5338static void igb_write_itr(struct igb_q_vector *q_vector)
5339{
5340	struct igb_adapter *adapter = q_vector->adapter;
5341	u32 itr_val = q_vector->itr_val & 0x7FFC;
5342
5343	if (!q_vector->set_itr)
5344		return;
5345
5346	if (!itr_val)
5347		itr_val = 0x4;
5348
5349	if (adapter->hw.mac.type == e1000_82575)
5350		itr_val |= itr_val << 16;
5351	else
5352		itr_val |= E1000_EITR_CNT_IGNR;
5353
5354	writel(itr_val, q_vector->itr_register);
5355	q_vector->set_itr = 0;
5356}
5357
5358static irqreturn_t igb_msix_ring(int irq, void *data)
5359{
5360	struct igb_q_vector *q_vector = data;
5361
5362	/* Write the ITR value calculated from the previous interrupt. */
5363	igb_write_itr(q_vector);
5364
5365	napi_schedule(&q_vector->napi);
5366
5367	return IRQ_HANDLED;
5368}
5369
5370#ifdef CONFIG_IGB_DCA
5371static void igb_update_tx_dca(struct igb_adapter *adapter,
5372			      struct igb_ring *tx_ring,
5373			      int cpu)
5374{
5375	struct e1000_hw *hw = &adapter->hw;
5376	u32 txctrl = dca3_get_tag(tx_ring->dev, cpu);
5377
5378	if (hw->mac.type != e1000_82575)
5379		txctrl <<= E1000_DCA_TXCTRL_CPUID_SHIFT;
5380
5381	/* We can enable relaxed ordering for reads, but not writes when
5382	 * DCA is enabled.  This is due to a known issue in some chipsets
5383	 * which will cause the DCA tag to be cleared.
5384	 */
5385	txctrl |= E1000_DCA_TXCTRL_DESC_RRO_EN |
5386		  E1000_DCA_TXCTRL_DATA_RRO_EN |
5387		  E1000_DCA_TXCTRL_DESC_DCA_EN;
5388
5389	wr32(E1000_DCA_TXCTRL(tx_ring->reg_idx), txctrl);
5390}
5391
5392static void igb_update_rx_dca(struct igb_adapter *adapter,
5393			      struct igb_ring *rx_ring,
5394			      int cpu)
5395{
5396	struct e1000_hw *hw = &adapter->hw;
5397	u32 rxctrl = dca3_get_tag(&adapter->pdev->dev, cpu);
5398
5399	if (hw->mac.type != e1000_82575)
5400		rxctrl <<= E1000_DCA_RXCTRL_CPUID_SHIFT;
5401
5402	/* We can enable relaxed ordering for reads, but not writes when
5403	 * DCA is enabled.  This is due to a known issue in some chipsets
5404	 * which will cause the DCA tag to be cleared.
5405	 */
5406	rxctrl |= E1000_DCA_RXCTRL_DESC_RRO_EN |
5407		  E1000_DCA_RXCTRL_DESC_DCA_EN;
5408
5409	wr32(E1000_DCA_RXCTRL(rx_ring->reg_idx), rxctrl);
5410}
5411
5412static void igb_update_dca(struct igb_q_vector *q_vector)
5413{
5414	struct igb_adapter *adapter = q_vector->adapter;
5415	int cpu = get_cpu();
5416
5417	if (q_vector->cpu == cpu)
5418		goto out_no_update;
5419
5420	if (q_vector->tx.ring)
5421		igb_update_tx_dca(adapter, q_vector->tx.ring, cpu);
5422
5423	if (q_vector->rx.ring)
5424		igb_update_rx_dca(adapter, q_vector->rx.ring, cpu);
5425
5426	q_vector->cpu = cpu;
5427out_no_update:
5428	put_cpu();
5429}
5430
5431static void igb_setup_dca(struct igb_adapter *adapter)
5432{
5433	struct e1000_hw *hw = &adapter->hw;
5434	int i;
5435
5436	if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
5437		return;
5438
5439	/* Always use CB2 mode, difference is masked in the CB driver. */
5440	wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
5441
5442	for (i = 0; i < adapter->num_q_vectors; i++) {
5443		adapter->q_vector[i]->cpu = -1;
5444		igb_update_dca(adapter->q_vector[i]);
5445	}
5446}
5447
5448static int __igb_notify_dca(struct device *dev, void *data)
5449{
5450	struct net_device *netdev = dev_get_drvdata(dev);
5451	struct igb_adapter *adapter = netdev_priv(netdev);
5452	struct pci_dev *pdev = adapter->pdev;
5453	struct e1000_hw *hw = &adapter->hw;
5454	unsigned long event = *(unsigned long *)data;
5455
5456	switch (event) {
5457	case DCA_PROVIDER_ADD:
5458		/* if already enabled, don't do it again */
5459		if (adapter->flags & IGB_FLAG_DCA_ENABLED)
5460			break;
5461		if (dca_add_requester(dev) == 0) {
5462			adapter->flags |= IGB_FLAG_DCA_ENABLED;
5463			dev_info(&pdev->dev, "DCA enabled\n");
5464			igb_setup_dca(adapter);
5465			break;
5466		}
5467		/* Fall Through since DCA is disabled. */
5468	case DCA_PROVIDER_REMOVE:
5469		if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
5470			/* without this a class_device is left
5471			 * hanging around in the sysfs model
5472			 */
5473			dca_remove_requester(dev);
5474			dev_info(&pdev->dev, "DCA disabled\n");
5475			adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
5476			wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
5477		}
5478		break;
5479	}
5480
5481	return 0;
5482}
5483
5484static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
5485			  void *p)
5486{
5487	int ret_val;
5488
5489	ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
5490					 __igb_notify_dca);
5491
5492	return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
5493}
5494#endif /* CONFIG_IGB_DCA */
5495
5496#ifdef CONFIG_PCI_IOV
5497static int igb_vf_configure(struct igb_adapter *adapter, int vf)
5498{
5499	unsigned char mac_addr[ETH_ALEN];
5500
5501	eth_zero_addr(mac_addr);
5502	igb_set_vf_mac(adapter, vf, mac_addr);
5503
5504	/* By default spoof check is enabled for all VFs */
5505	adapter->vf_data[vf].spoofchk_enabled = true;
5506
5507	return 0;
5508}
5509
5510#endif
5511static void igb_ping_all_vfs(struct igb_adapter *adapter)
5512{
5513	struct e1000_hw *hw = &adapter->hw;
5514	u32 ping;
5515	int i;
5516
5517	for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
5518		ping = E1000_PF_CONTROL_MSG;
5519		if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
5520			ping |= E1000_VT_MSGTYPE_CTS;
5521		igb_write_mbx(hw, &ping, 1, i);
5522	}
5523}
5524
5525static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5526{
5527	struct e1000_hw *hw = &adapter->hw;
5528	u32 vmolr = rd32(E1000_VMOLR(vf));
5529	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5530
5531	vf_data->flags &= ~(IGB_VF_FLAG_UNI_PROMISC |
5532			    IGB_VF_FLAG_MULTI_PROMISC);
5533	vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
5534
5535	if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
5536		vmolr |= E1000_VMOLR_MPME;
5537		vf_data->flags |= IGB_VF_FLAG_MULTI_PROMISC;
5538		*msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
5539	} else {
5540		/* if we have hashes and we are clearing a multicast promisc
5541		 * flag we need to write the hashes to the MTA as this step
5542		 * was previously skipped
5543		 */
5544		if (vf_data->num_vf_mc_hashes > 30) {
5545			vmolr |= E1000_VMOLR_MPME;
5546		} else if (vf_data->num_vf_mc_hashes) {
5547			int j;
5548			vmolr |= E1000_VMOLR_ROMPE;
5549			for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
5550				igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
5551		}
5552	}
5553
5554	wr32(E1000_VMOLR(vf), vmolr);
5555
5556	/* there are flags left unprocessed, likely not supported */
5557	if (*msgbuf & E1000_VT_MSGINFO_MASK)
5558		return -EINVAL;
5559
5560	return 0;
5561}
5562
5563static int igb_set_vf_multicasts(struct igb_adapter *adapter,
5564				  u32 *msgbuf, u32 vf)
5565{
5566	int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5567	u16 *hash_list = (u16 *)&msgbuf[1];
5568	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5569	int i;
5570
5571	/* salt away the number of multicast addresses assigned
5572	 * to this VF for later use to restore when the PF multi cast
5573	 * list changes
5574	 */
5575	vf_data->num_vf_mc_hashes = n;
5576
5577	/* only up to 30 hash values supported */
5578	if (n > 30)
5579		n = 30;
5580
5581	/* store the hashes for later use */
5582	for (i = 0; i < n; i++)
5583		vf_data->vf_mc_hashes[i] = hash_list[i];
5584
5585	/* Flush and reset the mta with the new values */
5586	igb_set_rx_mode(adapter->netdev);
5587
5588	return 0;
5589}
5590
5591static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
5592{
5593	struct e1000_hw *hw = &adapter->hw;
5594	struct vf_data_storage *vf_data;
5595	int i, j;
5596
5597	for (i = 0; i < adapter->vfs_allocated_count; i++) {
5598		u32 vmolr = rd32(E1000_VMOLR(i));
5599		vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
5600
5601		vf_data = &adapter->vf_data[i];
5602
5603		if ((vf_data->num_vf_mc_hashes > 30) ||
5604		    (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
5605			vmolr |= E1000_VMOLR_MPME;
5606		} else if (vf_data->num_vf_mc_hashes) {
5607			vmolr |= E1000_VMOLR_ROMPE;
5608			for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
5609				igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
5610		}
5611		wr32(E1000_VMOLR(i), vmolr);
5612	}
5613}
5614
5615static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
5616{
5617	struct e1000_hw *hw = &adapter->hw;
5618	u32 pool_mask, reg, vid;
5619	int i;
5620
5621	pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5622
5623	/* Find the vlan filter for this id */
5624	for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5625		reg = rd32(E1000_VLVF(i));
5626
5627		/* remove the vf from the pool */
5628		reg &= ~pool_mask;
5629
5630		/* if pool is empty then remove entry from vfta */
5631		if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
5632		    (reg & E1000_VLVF_VLANID_ENABLE)) {
5633			reg = 0;
5634			vid = reg & E1000_VLVF_VLANID_MASK;
5635			igb_vfta_set(hw, vid, false);
5636		}
5637
5638		wr32(E1000_VLVF(i), reg);
5639	}
5640
5641	adapter->vf_data[vf].vlans_enabled = 0;
5642}
5643
5644static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
5645{
5646	struct e1000_hw *hw = &adapter->hw;
5647	u32 reg, i;
5648
5649	/* The vlvf table only exists on 82576 hardware and newer */
5650	if (hw->mac.type < e1000_82576)
5651		return -1;
5652
5653	/* we only need to do this if VMDq is enabled */
5654	if (!adapter->vfs_allocated_count)
5655		return -1;
5656
5657	/* Find the vlan filter for this id */
5658	for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5659		reg = rd32(E1000_VLVF(i));
5660		if ((reg & E1000_VLVF_VLANID_ENABLE) &&
5661		    vid == (reg & E1000_VLVF_VLANID_MASK))
5662			break;
5663	}
5664
5665	if (add) {
5666		if (i == E1000_VLVF_ARRAY_SIZE) {
5667			/* Did not find a matching VLAN ID entry that was
5668			 * enabled.  Search for a free filter entry, i.e.
5669			 * one without the enable bit set
5670			 */
5671			for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5672				reg = rd32(E1000_VLVF(i));
5673				if (!(reg & E1000_VLVF_VLANID_ENABLE))
5674					break;
5675			}
5676		}
5677		if (i < E1000_VLVF_ARRAY_SIZE) {
5678			/* Found an enabled/available entry */
5679			reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5680
5681			/* if !enabled we need to set this up in vfta */
5682			if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
5683				/* add VID to filter table */
5684				igb_vfta_set(hw, vid, true);
5685				reg |= E1000_VLVF_VLANID_ENABLE;
5686			}
5687			reg &= ~E1000_VLVF_VLANID_MASK;
5688			reg |= vid;
5689			wr32(E1000_VLVF(i), reg);
5690
5691			/* do not modify RLPML for PF devices */
5692			if (vf >= adapter->vfs_allocated_count)
5693				return 0;
5694
5695			if (!adapter->vf_data[vf].vlans_enabled) {
5696				u32 size;
5697				reg = rd32(E1000_VMOLR(vf));
5698				size = reg & E1000_VMOLR_RLPML_MASK;
5699				size += 4;
5700				reg &= ~E1000_VMOLR_RLPML_MASK;
5701				reg |= size;
5702				wr32(E1000_VMOLR(vf), reg);
5703			}
5704
5705			adapter->vf_data[vf].vlans_enabled++;
5706		}
5707	} else {
5708		if (i < E1000_VLVF_ARRAY_SIZE) {
5709			/* remove vf from the pool */
5710			reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
5711			/* if pool is empty then remove entry from vfta */
5712			if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
5713				reg = 0;
5714				igb_vfta_set(hw, vid, false);
5715			}
5716			wr32(E1000_VLVF(i), reg);
5717
5718			/* do not modify RLPML for PF devices */
5719			if (vf >= adapter->vfs_allocated_count)
5720				return 0;
5721
5722			adapter->vf_data[vf].vlans_enabled--;
5723			if (!adapter->vf_data[vf].vlans_enabled) {
5724				u32 size;
5725				reg = rd32(E1000_VMOLR(vf));
5726				size = reg & E1000_VMOLR_RLPML_MASK;
5727				size -= 4;
5728				reg &= ~E1000_VMOLR_RLPML_MASK;
5729				reg |= size;
5730				wr32(E1000_VMOLR(vf), reg);
5731			}
5732		}
5733	}
5734	return 0;
5735}
5736
5737static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
5738{
5739	struct e1000_hw *hw = &adapter->hw;
5740
5741	if (vid)
5742		wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
5743	else
5744		wr32(E1000_VMVIR(vf), 0);
5745}
5746
5747static int igb_ndo_set_vf_vlan(struct net_device *netdev,
5748			       int vf, u16 vlan, u8 qos)
5749{
5750	int err = 0;
5751	struct igb_adapter *adapter = netdev_priv(netdev);
5752
5753	if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
5754		return -EINVAL;
5755	if (vlan || qos) {
5756		err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
5757		if (err)
5758			goto out;
5759		igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
5760		igb_set_vmolr(adapter, vf, !vlan);
5761		adapter->vf_data[vf].pf_vlan = vlan;
5762		adapter->vf_data[vf].pf_qos = qos;
5763		dev_info(&adapter->pdev->dev,
5764			 "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
5765		if (test_bit(__IGB_DOWN, &adapter->state)) {
5766			dev_warn(&adapter->pdev->dev,
5767				 "The VF VLAN has been set, but the PF device is not up.\n");
5768			dev_warn(&adapter->pdev->dev,
5769				 "Bring the PF device up before attempting to use the VF device.\n");
5770		}
5771	} else {
5772		igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
5773			     false, vf);
5774		igb_set_vmvir(adapter, vlan, vf);
5775		igb_set_vmolr(adapter, vf, true);
5776		adapter->vf_data[vf].pf_vlan = 0;
5777		adapter->vf_data[vf].pf_qos = 0;
5778	}
5779out:
5780	return err;
5781}
5782
5783static int igb_find_vlvf_entry(struct igb_adapter *adapter, int vid)
5784{
5785	struct e1000_hw *hw = &adapter->hw;
5786	int i;
5787	u32 reg;
5788
5789	/* Find the vlan filter for this id */
5790	for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5791		reg = rd32(E1000_VLVF(i));
5792		if ((reg & E1000_VLVF_VLANID_ENABLE) &&
5793		    vid == (reg & E1000_VLVF_VLANID_MASK))
5794			break;
5795	}
5796
5797	if (i >= E1000_VLVF_ARRAY_SIZE)
5798		i = -1;
5799
5800	return i;
5801}
5802
5803static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5804{
5805	struct e1000_hw *hw = &adapter->hw;
5806	int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5807	int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
5808	int err = 0;
5809
5810	/* If in promiscuous mode we need to make sure the PF also has
5811	 * the VLAN filter set.
5812	 */
5813	if (add && (adapter->netdev->flags & IFF_PROMISC))
5814		err = igb_vlvf_set(adapter, vid, add,
5815				   adapter->vfs_allocated_count);
5816	if (err)
5817		goto out;
5818
5819	err = igb_vlvf_set(adapter, vid, add, vf);
5820
5821	if (err)
5822		goto out;
5823
5824	/* Go through all the checks to see if the VLAN filter should
5825	 * be wiped completely.
5826	 */
5827	if (!add && (adapter->netdev->flags & IFF_PROMISC)) {
5828		u32 vlvf, bits;
5829
5830		int regndx = igb_find_vlvf_entry(adapter, vid);
5831		if (regndx < 0)
5832			goto out;
5833		/* See if any other pools are set for this VLAN filter
5834		 * entry other than the PF.
5835		 */
5836		vlvf = bits = rd32(E1000_VLVF(regndx));
5837		bits &= 1 << (E1000_VLVF_POOLSEL_SHIFT +
5838			      adapter->vfs_allocated_count);
5839		/* If the filter was removed then ensure PF pool bit
5840		 * is cleared if the PF only added itself to the pool
5841		 * because the PF is in promiscuous mode.
5842		 */
5843		if ((vlvf & VLAN_VID_MASK) == vid &&
5844		    !test_bit(vid, adapter->active_vlans) &&
5845		    !bits)
5846			igb_vlvf_set(adapter, vid, add,
5847				     adapter->vfs_allocated_count);
5848	}
5849
5850out:
5851	return err;
5852}
5853
5854static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
5855{
5856	/* clear flags - except flag that indicates PF has set the MAC */
5857	adapter->vf_data[vf].flags &= IGB_VF_FLAG_PF_SET_MAC;
5858	adapter->vf_data[vf].last_nack = jiffies;
5859
5860	/* reset offloads to defaults */
5861	igb_set_vmolr(adapter, vf, true);
5862
5863	/* reset vlans for device */
5864	igb_clear_vf_vfta(adapter, vf);
5865	if (adapter->vf_data[vf].pf_vlan)
5866		igb_ndo_set_vf_vlan(adapter->netdev, vf,
5867				    adapter->vf_data[vf].pf_vlan,
5868				    adapter->vf_data[vf].pf_qos);
5869	else
5870		igb_clear_vf_vfta(adapter, vf);
5871
5872	/* reset multicast table array for vf */
5873	adapter->vf_data[vf].num_vf_mc_hashes = 0;
5874
5875	/* Flush and reset the mta with the new values */
5876	igb_set_rx_mode(adapter->netdev);
5877}
5878
5879static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
5880{
5881	unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5882
5883	/* clear mac address as we were hotplug removed/added */
5884	if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
5885		eth_zero_addr(vf_mac);
5886
5887	/* process remaining reset events */
5888	igb_vf_reset(adapter, vf);
5889}
5890
5891static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
5892{
5893	struct e1000_hw *hw = &adapter->hw;
5894	unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5895	int rar_entry = hw->mac.rar_entry_count - (vf + 1);
5896	u32 reg, msgbuf[3];
5897	u8 *addr = (u8 *)(&msgbuf[1]);
5898
5899	/* process all the same items cleared in a function level reset */
5900	igb_vf_reset(adapter, vf);
5901
5902	/* set vf mac address */
5903	igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
5904
5905	/* enable transmit and receive for vf */
5906	reg = rd32(E1000_VFTE);
5907	wr32(E1000_VFTE, reg | (1 << vf));
5908	reg = rd32(E1000_VFRE);
5909	wr32(E1000_VFRE, reg | (1 << vf));
5910
5911	adapter->vf_data[vf].flags |= IGB_VF_FLAG_CTS;
5912
5913	/* reply to reset with ack and vf mac address */
5914	msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
5915	memcpy(addr, vf_mac, ETH_ALEN);
5916	igb_write_mbx(hw, msgbuf, 3, vf);
5917}
5918
5919static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
5920{
5921	/* The VF MAC Address is stored in a packed array of bytes
5922	 * starting at the second 32 bit word of the msg array
5923	 */
5924	unsigned char *addr = (char *)&msg[1];
5925	int err = -1;
5926
5927	if (is_valid_ether_addr(addr))
5928		err = igb_set_vf_mac(adapter, vf, addr);
5929
5930	return err;
5931}
5932
5933static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
5934{
5935	struct e1000_hw *hw = &adapter->hw;
5936	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5937	u32 msg = E1000_VT_MSGTYPE_NACK;
5938
5939	/* if device isn't clear to send it shouldn't be reading either */
5940	if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
5941	    time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
5942		igb_write_mbx(hw, &msg, 1, vf);
5943		vf_data->last_nack = jiffies;
5944	}
5945}
5946
5947static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
5948{
5949	struct pci_dev *pdev = adapter->pdev;
5950	u32 msgbuf[E1000_VFMAILBOX_SIZE];
5951	struct e1000_hw *hw = &adapter->hw;
5952	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5953	s32 retval;
5954
5955	retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
5956
5957	if (retval) {
5958		/* if receive failed revoke VF CTS stats and restart init */
5959		dev_err(&pdev->dev, "Error receiving message from VF\n");
5960		vf_data->flags &= ~IGB_VF_FLAG_CTS;
5961		if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5962			return;
5963		goto out;
5964	}
5965
5966	/* this is a message we already processed, do nothing */
5967	if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
5968		return;
5969
5970	/* until the vf completes a reset it should not be
5971	 * allowed to start any configuration.
5972	 */
5973	if (msgbuf[0] == E1000_VF_RESET) {
5974		igb_vf_reset_msg(adapter, vf);
5975		return;
5976	}
5977
5978	if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
5979		if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5980			return;
5981		retval = -1;
5982		goto out;
5983	}
5984
5985	switch ((msgbuf[0] & 0xFFFF)) {
5986	case E1000_VF_SET_MAC_ADDR:
5987		retval = -EINVAL;
5988		if (!(vf_data->flags & IGB_VF_FLAG_PF_SET_MAC))
5989			retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
5990		else
5991			dev_warn(&pdev->dev,
5992				 "VF %d attempted to override administratively set MAC address\nReload the VF driver to resume operations\n",
5993				 vf);
5994		break;
5995	case E1000_VF_SET_PROMISC:
5996		retval = igb_set_vf_promisc(adapter, msgbuf, vf);
5997		break;
5998	case E1000_VF_SET_MULTICAST:
5999		retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
6000		break;
6001	case E1000_VF_SET_LPE:
6002		retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
6003		break;
6004	case E1000_VF_SET_VLAN:
6005		retval = -1;
6006		if (vf_data->pf_vlan)
6007			dev_warn(&pdev->dev,
6008				 "VF %d attempted to override administratively set VLAN tag\nReload the VF driver to resume operations\n",
6009				 vf);
6010		else
6011			retval = igb_set_vf_vlan(adapter, msgbuf, vf);
6012		break;
6013	default:
6014		dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
6015		retval = -1;
6016		break;
6017	}
6018
6019	msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
6020out:
6021	/* notify the VF of the results of what it sent us */
6022	if (retval)
6023		msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
6024	else
6025		msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
6026
6027	igb_write_mbx(hw, msgbuf, 1, vf);
6028}
6029
6030static void igb_msg_task(struct igb_adapter *adapter)
6031{
6032	struct e1000_hw *hw = &adapter->hw;
6033	u32 vf;
6034
6035	for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
6036		/* process any reset requests */
6037		if (!igb_check_for_rst(hw, vf))
6038			igb_vf_reset_event(adapter, vf);
6039
6040		/* process any messages pending */
6041		if (!igb_check_for_msg(hw, vf))
6042			igb_rcv_msg_from_vf(adapter, vf);
6043
6044		/* process any acks */
6045		if (!igb_check_for_ack(hw, vf))
6046			igb_rcv_ack_from_vf(adapter, vf);
6047	}
6048}
6049
6050/**
6051 *  igb_set_uta - Set unicast filter table address
6052 *  @adapter: board private structure
6053 *
6054 *  The unicast table address is a register array of 32-bit registers.
6055 *  The table is meant to be used in a way similar to how the MTA is used
6056 *  however due to certain limitations in the hardware it is necessary to
6057 *  set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscuous
6058 *  enable bit to allow vlan tag stripping when promiscuous mode is enabled
6059 **/
6060static void igb_set_uta(struct igb_adapter *adapter)
6061{
6062	struct e1000_hw *hw = &adapter->hw;
6063	int i;
6064
6065	/* The UTA table only exists on 82576 hardware and newer */
6066	if (hw->mac.type < e1000_82576)
6067		return;
6068
6069	/* we only need to do this if VMDq is enabled */
6070	if (!adapter->vfs_allocated_count)
6071		return;
6072
6073	for (i = 0; i < hw->mac.uta_reg_count; i++)
6074		array_wr32(E1000_UTA, i, ~0);
6075}
6076
6077/**
6078 *  igb_intr_msi - Interrupt Handler
6079 *  @irq: interrupt number
6080 *  @data: pointer to a network interface device structure
6081 **/
6082static irqreturn_t igb_intr_msi(int irq, void *data)
6083{
6084	struct igb_adapter *adapter = data;
6085	struct igb_q_vector *q_vector = adapter->q_vector[0];
6086	struct e1000_hw *hw = &adapter->hw;
6087	/* read ICR disables interrupts using IAM */
6088	u32 icr = rd32(E1000_ICR);
6089
6090	igb_write_itr(q_vector);
6091
6092	if (icr & E1000_ICR_DRSTA)
6093		schedule_work(&adapter->reset_task);
6094
6095	if (icr & E1000_ICR_DOUTSYNC) {
6096		/* HW is reporting DMA is out of sync */
6097		adapter->stats.doosync++;
6098	}
6099
6100	if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
6101		hw->mac.get_link_status = 1;
6102		if (!test_bit(__IGB_DOWN, &adapter->state))
6103			mod_timer(&adapter->watchdog_timer, jiffies + 1);
6104	}
6105
6106	if (icr & E1000_ICR_TS) {
6107		u32 tsicr = rd32(E1000_TSICR);
6108
6109		if (tsicr & E1000_TSICR_TXTS) {
6110			/* acknowledge the interrupt */
6111			wr32(E1000_TSICR, E1000_TSICR_TXTS);
6112			/* retrieve hardware timestamp */
6113			schedule_work(&adapter->ptp_tx_work);
6114		}
6115	}
6116
6117	napi_schedule(&q_vector->napi);
6118
6119	return IRQ_HANDLED;
6120}
6121
6122/**
6123 *  igb_intr - Legacy Interrupt Handler
6124 *  @irq: interrupt number
6125 *  @data: pointer to a network interface device structure
6126 **/
6127static irqreturn_t igb_intr(int irq, void *data)
6128{
6129	struct igb_adapter *adapter = data;
6130	struct igb_q_vector *q_vector = adapter->q_vector[0];
6131	struct e1000_hw *hw = &adapter->hw;
6132	/* Interrupt Auto-Mask...upon reading ICR, interrupts are masked.  No
6133	 * need for the IMC write
6134	 */
6135	u32 icr = rd32(E1000_ICR);
6136
6137	/* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
6138	 * not set, then the adapter didn't send an interrupt
6139	 */
6140	if (!(icr & E1000_ICR_INT_ASSERTED))
6141		return IRQ_NONE;
6142
6143	igb_write_itr(q_vector);
6144
6145	if (icr & E1000_ICR_DRSTA)
6146		schedule_work(&adapter->reset_task);
6147
6148	if (icr & E1000_ICR_DOUTSYNC) {
6149		/* HW is reporting DMA is out of sync */
6150		adapter->stats.doosync++;
6151	}
6152
6153	if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
6154		hw->mac.get_link_status = 1;
6155		/* guard against interrupt when we're going down */
6156		if (!test_bit(__IGB_DOWN, &adapter->state))
6157			mod_timer(&adapter->watchdog_timer, jiffies + 1);
6158	}
6159
6160	if (icr & E1000_ICR_TS) {
6161		u32 tsicr = rd32(E1000_TSICR);
6162
6163		if (tsicr & E1000_TSICR_TXTS) {
6164			/* acknowledge the interrupt */
6165			wr32(E1000_TSICR, E1000_TSICR_TXTS);
6166			/* retrieve hardware timestamp */
6167			schedule_work(&adapter->ptp_tx_work);
6168		}
6169	}
6170
6171	napi_schedule(&q_vector->napi);
6172
6173	return IRQ_HANDLED;
6174}
6175
6176static void igb_ring_irq_enable(struct igb_q_vector *q_vector)
6177{
6178	struct igb_adapter *adapter = q_vector->adapter;
6179	struct e1000_hw *hw = &adapter->hw;
6180
6181	if ((q_vector->rx.ring && (adapter->rx_itr_setting & 3)) ||
6182	    (!q_vector->rx.ring && (adapter->tx_itr_setting & 3))) {
6183		if ((adapter->num_q_vectors == 1) && !adapter->vf_data)
6184			igb_set_itr(q_vector);
6185		else
6186			igb_update_ring_itr(q_vector);
6187	}
6188
6189	if (!test_bit(__IGB_DOWN, &adapter->state)) {
6190		if (adapter->flags & IGB_FLAG_HAS_MSIX)
6191			wr32(E1000_EIMS, q_vector->eims_value);
6192		else
6193			igb_irq_enable(adapter);
6194	}
6195}
6196
6197/**
6198 *  igb_poll - NAPI Rx polling callback
6199 *  @napi: napi polling structure
6200 *  @budget: count of how many packets we should handle
6201 **/
6202static int igb_poll(struct napi_struct *napi, int budget)
6203{
6204	struct igb_q_vector *q_vector = container_of(napi,
6205						     struct igb_q_vector,
6206						     napi);
6207	bool clean_complete = true;
6208
6209#ifdef CONFIG_IGB_DCA
6210	if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
6211		igb_update_dca(q_vector);
6212#endif
6213	if (q_vector->tx.ring)
6214		clean_complete = igb_clean_tx_irq(q_vector);
6215
6216	if (q_vector->rx.ring)
6217		clean_complete &= igb_clean_rx_irq(q_vector, budget);
6218
6219	/* If all work not completed, return budget and keep polling */
6220	if (!clean_complete)
6221		return budget;
6222
6223	/* If not enough Rx work done, exit the polling mode */
6224	napi_complete(napi);
6225	igb_ring_irq_enable(q_vector);
6226
6227	return 0;
6228}
6229
6230/**
6231 *  igb_clean_tx_irq - Reclaim resources after transmit completes
6232 *  @q_vector: pointer to q_vector containing needed info
6233 *
6234 *  returns true if ring is completely cleaned
6235 **/
6236static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
6237{
6238	struct igb_adapter *adapter = q_vector->adapter;
6239	struct igb_ring *tx_ring = q_vector->tx.ring;
6240	struct igb_tx_buffer *tx_buffer;
6241	union e1000_adv_tx_desc *tx_desc;
6242	unsigned int total_bytes = 0, total_packets = 0;
6243	unsigned int budget = q_vector->tx.work_limit;
6244	unsigned int i = tx_ring->next_to_clean;
6245
6246	if (test_bit(__IGB_DOWN, &adapter->state))
6247		return true;
6248
6249	tx_buffer = &tx_ring->tx_buffer_info[i];
6250	tx_desc = IGB_TX_DESC(tx_ring, i);
6251	i -= tx_ring->count;
6252
6253	do {
6254		union e1000_adv_tx_desc *eop_desc = tx_buffer->next_to_watch;
6255
6256		/* if next_to_watch is not set then there is no work pending */
6257		if (!eop_desc)
6258			break;
6259
6260		/* prevent any other reads prior to eop_desc */
6261		read_barrier_depends();
6262
6263		/* if DD is not set pending work has not been completed */
6264		if (!(eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)))
6265			break;
6266
6267		/* clear next_to_watch to prevent false hangs */
6268		tx_buffer->next_to_watch = NULL;
6269
6270		/* update the statistics for this packet */
6271		total_bytes += tx_buffer->bytecount;
6272		total_packets += tx_buffer->gso_segs;
6273
6274		/* free the skb */
6275		dev_kfree_skb_any(tx_buffer->skb);
6276
6277		/* unmap skb header data */
6278		dma_unmap_single(tx_ring->dev,
6279				 dma_unmap_addr(tx_buffer, dma),
6280				 dma_unmap_len(tx_buffer, len),
6281				 DMA_TO_DEVICE);
6282
6283		/* clear tx_buffer data */
6284		tx_buffer->skb = NULL;
6285		dma_unmap_len_set(tx_buffer, len, 0);
6286
6287		/* clear last DMA location and unmap remaining buffers */
6288		while (tx_desc != eop_desc) {
6289			tx_buffer++;
6290			tx_desc++;
6291			i++;
6292			if (unlikely(!i)) {
6293				i -= tx_ring->count;
6294				tx_buffer = tx_ring->tx_buffer_info;
6295				tx_desc = IGB_TX_DESC(tx_ring, 0);
6296			}
6297
6298			/* unmap any remaining paged data */
6299			if (dma_unmap_len(tx_buffer, len)) {
6300				dma_unmap_page(tx_ring->dev,
6301					       dma_unmap_addr(tx_buffer, dma),
6302					       dma_unmap_len(tx_buffer, len),
6303					       DMA_TO_DEVICE);
6304				dma_unmap_len_set(tx_buffer, len, 0);
6305			}
6306		}
6307
6308		/* move us one more past the eop_desc for start of next pkt */
6309		tx_buffer++;
6310		tx_desc++;
6311		i++;
6312		if (unlikely(!i)) {
6313			i -= tx_ring->count;
6314			tx_buffer = tx_ring->tx_buffer_info;
6315			tx_desc = IGB_TX_DESC(tx_ring, 0);
6316		}
6317
6318		/* issue prefetch for next Tx descriptor */
6319		prefetch(tx_desc);
6320
6321		/* update budget accounting */
6322		budget--;
6323	} while (likely(budget));
6324
6325	netdev_tx_completed_queue(txring_txq(tx_ring),
6326				  total_packets, total_bytes);
6327	i += tx_ring->count;
6328	tx_ring->next_to_clean = i;
6329	u64_stats_update_begin(&tx_ring->tx_syncp);
6330	tx_ring->tx_stats.bytes += total_bytes;
6331	tx_ring->tx_stats.packets += total_packets;
6332	u64_stats_update_end(&tx_ring->tx_syncp);
6333	q_vector->tx.total_bytes += total_bytes;
6334	q_vector->tx.total_packets += total_packets;
6335
6336	if (test_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags)) {
6337		struct e1000_hw *hw = &adapter->hw;
6338
6339		/* Detect a transmit hang in hardware, this serializes the
6340		 * check with the clearing of time_stamp and movement of i
6341		 */
6342		clear_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
6343		if (tx_buffer->next_to_watch &&
6344		    time_after(jiffies, tx_buffer->time_stamp +
6345			       (adapter->tx_timeout_factor * HZ)) &&
6346		    !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
6347
6348			/* detected Tx unit hang */
6349			dev_err(tx_ring->dev,
6350				"Detected Tx Unit Hang\n"
6351				"  Tx Queue             <%d>\n"
6352				"  TDH                  <%x>\n"
6353				"  TDT                  <%x>\n"
6354				"  next_to_use          <%x>\n"
6355				"  next_to_clean        <%x>\n"
6356				"buffer_info[next_to_clean]\n"
6357				"  time_stamp           <%lx>\n"
6358				"  next_to_watch        <%p>\n"
6359				"  jiffies              <%lx>\n"
6360				"  desc.status          <%x>\n",
6361				tx_ring->queue_index,
6362				rd32(E1000_TDH(tx_ring->reg_idx)),
6363				readl(tx_ring->tail),
6364				tx_ring->next_to_use,
6365				tx_ring->next_to_clean,
6366				tx_buffer->time_stamp,
6367				tx_buffer->next_to_watch,
6368				jiffies,
6369				tx_buffer->next_to_watch->wb.status);
6370			netif_stop_subqueue(tx_ring->netdev,
6371					    tx_ring->queue_index);
6372
6373			/* we are about to reset, no point in enabling stuff */
6374			return true;
6375		}
6376	}
6377
6378#define TX_WAKE_THRESHOLD (DESC_NEEDED * 2)
6379	if (unlikely(total_packets &&
6380	    netif_carrier_ok(tx_ring->netdev) &&
6381	    igb_desc_unused(tx_ring) >= TX_WAKE_THRESHOLD)) {
6382		/* Make sure that anybody stopping the queue after this
6383		 * sees the new next_to_clean.
6384		 */
6385		smp_mb();
6386		if (__netif_subqueue_stopped(tx_ring->netdev,
6387					     tx_ring->queue_index) &&
6388		    !(test_bit(__IGB_DOWN, &adapter->state))) {
6389			netif_wake_subqueue(tx_ring->netdev,
6390					    tx_ring->queue_index);
6391
6392			u64_stats_update_begin(&tx_ring->tx_syncp);
6393			tx_ring->tx_stats.restart_queue++;
6394			u64_stats_update_end(&tx_ring->tx_syncp);
6395		}
6396	}
6397
6398	return !!budget;
6399}
6400
6401/**
6402 *  igb_reuse_rx_page - page flip buffer and store it back on the ring
6403 *  @rx_ring: rx descriptor ring to store buffers on
6404 *  @old_buff: donor buffer to have page reused
6405 *
6406 *  Synchronizes page for reuse by the adapter
6407 **/
6408static void igb_reuse_rx_page(struct igb_ring *rx_ring,
6409			      struct igb_rx_buffer *old_buff)
6410{
6411	struct igb_rx_buffer *new_buff;
6412	u16 nta = rx_ring->next_to_alloc;
6413
6414	new_buff = &rx_ring->rx_buffer_info[nta];
6415
6416	/* update, and store next to alloc */
6417	nta++;
6418	rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
6419
6420	/* transfer page from old buffer to new buffer */
6421	memcpy(new_buff, old_buff, sizeof(struct igb_rx_buffer));
6422
6423	/* sync the buffer for use by the device */
6424	dma_sync_single_range_for_device(rx_ring->dev, old_buff->dma,
6425					 old_buff->page_offset,
6426					 IGB_RX_BUFSZ,
6427					 DMA_FROM_DEVICE);
6428}
6429
6430static bool igb_can_reuse_rx_page(struct igb_rx_buffer *rx_buffer,
6431				  struct page *page,
6432				  unsigned int truesize)
6433{
6434	/* avoid re-using remote pages */
6435	if (unlikely(page_to_nid(page) != numa_node_id()))
6436		return false;
6437
6438#if (PAGE_SIZE < 8192)
6439	/* if we are only owner of page we can reuse it */
6440	if (unlikely(page_count(page) != 1))
6441		return false;
6442
6443	/* flip page offset to other buffer */
6444	rx_buffer->page_offset ^= IGB_RX_BUFSZ;
6445
6446	/* since we are the only owner of the page and we need to
6447	 * increment it, just set the value to 2 in order to avoid
6448	 * an unnecessary locked operation
6449	 */
6450	atomic_set(&page->_count, 2);
6451#else
6452	/* move offset up to the next cache line */
6453	rx_buffer->page_offset += truesize;
6454
6455	if (rx_buffer->page_offset > (PAGE_SIZE - IGB_RX_BUFSZ))
6456		return false;
6457
6458	/* bump ref count on page before it is given to the stack */
6459	get_page(page);
6460#endif
6461
6462	return true;
6463}
6464
6465/**
6466 *  igb_add_rx_frag - Add contents of Rx buffer to sk_buff
6467 *  @rx_ring: rx descriptor ring to transact packets on
6468 *  @rx_buffer: buffer containing page to add
6469 *  @rx_desc: descriptor containing length of buffer written by hardware
6470 *  @skb: sk_buff to place the data into
6471 *
6472 *  This function will add the data contained in rx_buffer->page to the skb.
6473 *  This is done either through a direct copy if the data in the buffer is
6474 *  less than the skb header size, otherwise it will just attach the page as
6475 *  a frag to the skb.
6476 *
6477 *  The function will then update the page offset if necessary and return
6478 *  true if the buffer can be reused by the adapter.
6479 **/
6480static bool igb_add_rx_frag(struct igb_ring *rx_ring,
6481			    struct igb_rx_buffer *rx_buffer,
6482			    union e1000_adv_rx_desc *rx_desc,
6483			    struct sk_buff *skb)
6484{
6485	struct page *page = rx_buffer->page;
6486	unsigned int size = le16_to_cpu(rx_desc->wb.upper.length);
6487#if (PAGE_SIZE < 8192)
6488	unsigned int truesize = IGB_RX_BUFSZ;
6489#else
6490	unsigned int truesize = ALIGN(size, L1_CACHE_BYTES);
6491#endif
6492
6493	if ((size <= IGB_RX_HDR_LEN) && !skb_is_nonlinear(skb)) {
6494		unsigned char *va = page_address(page) + rx_buffer->page_offset;
6495
6496		if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) {
6497			igb_ptp_rx_pktstamp(rx_ring->q_vector, va, skb);
6498			va += IGB_TS_HDR_LEN;
6499			size -= IGB_TS_HDR_LEN;
6500		}
6501
6502		memcpy(__skb_put(skb, size), va, ALIGN(size, sizeof(long)));
6503
6504		/* we can reuse buffer as-is, just make sure it is local */
6505		if (likely(page_to_nid(page) == numa_node_id()))
6506			return true;
6507
6508		/* this page cannot be reused so discard it */
6509		put_page(page);
6510		return false;
6511	}
6512
6513	skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page,
6514			rx_buffer->page_offset, size, truesize);
6515
6516	return igb_can_reuse_rx_page(rx_buffer, page, truesize);
6517}
6518
6519static struct sk_buff *igb_fetch_rx_buffer(struct igb_ring *rx_ring,
6520					   union e1000_adv_rx_desc *rx_desc,
6521					   struct sk_buff *skb)
6522{
6523	struct igb_rx_buffer *rx_buffer;
6524	struct page *page;
6525
6526	rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean];
6527
6528	page = rx_buffer->page;
6529	prefetchw(page);
6530
6531	if (likely(!skb)) {
6532		void *page_addr = page_address(page) +
6533				  rx_buffer->page_offset;
6534
6535		/* prefetch first cache line of first page */
6536		prefetch(page_addr);
6537#if L1_CACHE_BYTES < 128
6538		prefetch(page_addr + L1_CACHE_BYTES);
6539#endif
6540
6541		/* allocate a skb to store the frags */
6542		skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
6543						IGB_RX_HDR_LEN);
6544		if (unlikely(!skb)) {
6545			rx_ring->rx_stats.alloc_failed++;
6546			return NULL;
6547		}
6548
6549		/* we will be copying header into skb->data in
6550		 * pskb_may_pull so it is in our interest to prefetch
6551		 * it now to avoid a possible cache miss
6552		 */
6553		prefetchw(skb->data);
6554	}
6555
6556	/* we are reusing so sync this buffer for CPU use */
6557	dma_sync_single_range_for_cpu(rx_ring->dev,
6558				      rx_buffer->dma,
6559				      rx_buffer->page_offset,
6560				      IGB_RX_BUFSZ,
6561				      DMA_FROM_DEVICE);
6562
6563	/* pull page into skb */
6564	if (igb_add_rx_frag(rx_ring, rx_buffer, rx_desc, skb)) {
6565		/* hand second half of page back to the ring */
6566		igb_reuse_rx_page(rx_ring, rx_buffer);
6567	} else {
6568		/* we are not reusing the buffer so unmap it */
6569		dma_unmap_page(rx_ring->dev, rx_buffer->dma,
6570			       PAGE_SIZE, DMA_FROM_DEVICE);
6571	}
6572
6573	/* clear contents of rx_buffer */
6574	rx_buffer->page = NULL;
6575
6576	return skb;
6577}
6578
6579static inline void igb_rx_checksum(struct igb_ring *ring,
6580				   union e1000_adv_rx_desc *rx_desc,
6581				   struct sk_buff *skb)
6582{
6583	skb_checksum_none_assert(skb);
6584
6585	/* Ignore Checksum bit is set */
6586	if (igb_test_staterr(rx_desc, E1000_RXD_STAT_IXSM))
6587		return;
6588
6589	/* Rx checksum disabled via ethtool */
6590	if (!(ring->netdev->features & NETIF_F_RXCSUM))
6591		return;
6592
6593	/* TCP/UDP checksum error bit is set */
6594	if (igb_test_staterr(rx_desc,
6595			     E1000_RXDEXT_STATERR_TCPE |
6596			     E1000_RXDEXT_STATERR_IPE)) {
6597		/* work around errata with sctp packets where the TCPE aka
6598		 * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
6599		 * packets, (aka let the stack check the crc32c)
6600		 */
6601		if (!((skb->len == 60) &&
6602		      test_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags))) {
6603			u64_stats_update_begin(&ring->rx_syncp);
6604			ring->rx_stats.csum_err++;
6605			u64_stats_update_end(&ring->rx_syncp);
6606		}
6607		/* let the stack verify checksum errors */
6608		return;
6609	}
6610	/* It must be a TCP or UDP packet with a valid checksum */
6611	if (igb_test_staterr(rx_desc, E1000_RXD_STAT_TCPCS |
6612				      E1000_RXD_STAT_UDPCS))
6613		skb->ip_summed = CHECKSUM_UNNECESSARY;
6614
6615	dev_dbg(ring->dev, "cksum success: bits %08X\n",
6616		le32_to_cpu(rx_desc->wb.upper.status_error));
6617}
6618
6619static inline void igb_rx_hash(struct igb_ring *ring,
6620			       union e1000_adv_rx_desc *rx_desc,
6621			       struct sk_buff *skb)
6622{
6623	if (ring->netdev->features & NETIF_F_RXHASH)
6624		skb->rxhash = le32_to_cpu(rx_desc->wb.lower.hi_dword.rss);
6625}
6626
6627/**
6628 *  igb_is_non_eop - process handling of non-EOP buffers
6629 *  @rx_ring: Rx ring being processed
6630 *  @rx_desc: Rx descriptor for current buffer
6631 *  @skb: current socket buffer containing buffer in progress
6632 *
6633 *  This function updates next to clean.  If the buffer is an EOP buffer
6634 *  this function exits returning false, otherwise it will place the
6635 *  sk_buff in the next buffer to be chained and return true indicating
6636 *  that this is in fact a non-EOP buffer.
6637 **/
6638static bool igb_is_non_eop(struct igb_ring *rx_ring,
6639			   union e1000_adv_rx_desc *rx_desc)
6640{
6641	u32 ntc = rx_ring->next_to_clean + 1;
6642
6643	/* fetch, update, and store next to clean */
6644	ntc = (ntc < rx_ring->count) ? ntc : 0;
6645	rx_ring->next_to_clean = ntc;
6646
6647	prefetch(IGB_RX_DESC(rx_ring, ntc));
6648
6649	if (likely(igb_test_staterr(rx_desc, E1000_RXD_STAT_EOP)))
6650		return false;
6651
6652	return true;
6653}
6654
6655/**
6656 *  igb_get_headlen - determine size of header for LRO/GRO
6657 *  @data: pointer to the start of the headers
6658 *  @max_len: total length of section to find headers in
6659 *
6660 *  This function is meant to determine the length of headers that will
6661 *  be recognized by hardware for LRO, and GRO offloads.  The main
6662 *  motivation of doing this is to only perform one pull for IPv4 TCP
6663 *  packets so that we can do basic things like calculating the gso_size
6664 *  based on the average data per packet.
6665 **/
6666static unsigned int igb_get_headlen(unsigned char *data,
6667				    unsigned int max_len)
6668{
6669	union {
6670		unsigned char *network;
6671		/* l2 headers */
6672		struct ethhdr *eth;
6673		struct vlan_hdr *vlan;
6674		/* l3 headers */
6675		struct iphdr *ipv4;
6676		struct ipv6hdr *ipv6;
6677	} hdr;
6678	__be16 protocol;
6679	u8 nexthdr = 0;	/* default to not TCP */
6680	u8 hlen;
6681
6682	/* this should never happen, but better safe than sorry */
6683	if (max_len < ETH_HLEN)
6684		return max_len;
6685
6686	/* initialize network frame pointer */
6687	hdr.network = data;
6688
6689	/* set first protocol and move network header forward */
6690	protocol = hdr.eth->h_proto;
6691	hdr.network += ETH_HLEN;
6692
6693	/* handle any vlan tag if present */
6694	if (protocol == __constant_htons(ETH_P_8021Q)) {
6695		if ((hdr.network - data) > (max_len - VLAN_HLEN))
6696			return max_len;
6697
6698		protocol = hdr.vlan->h_vlan_encapsulated_proto;
6699		hdr.network += VLAN_HLEN;
6700	}
6701
6702	/* handle L3 protocols */
6703	if (protocol == __constant_htons(ETH_P_IP)) {
6704		if ((hdr.network - data) > (max_len - sizeof(struct iphdr)))
6705			return max_len;
6706
6707		/* access ihl as a u8 to avoid unaligned access on ia64 */
6708		hlen = (hdr.network[0] & 0x0F) << 2;
6709
6710		/* verify hlen meets minimum size requirements */
6711		if (hlen < sizeof(struct iphdr))
6712			return hdr.network - data;
6713
6714		/* record next protocol if header is present */
6715		if (!(hdr.ipv4->frag_off & htons(IP_OFFSET)))
6716			nexthdr = hdr.ipv4->protocol;
6717	} else if (protocol == __constant_htons(ETH_P_IPV6)) {
6718		if ((hdr.network - data) > (max_len - sizeof(struct ipv6hdr)))
6719			return max_len;
6720
6721		/* record next protocol */
6722		nexthdr = hdr.ipv6->nexthdr;
6723		hlen = sizeof(struct ipv6hdr);
6724	} else {
6725		return hdr.network - data;
6726	}
6727
6728	/* relocate pointer to start of L4 header */
6729	hdr.network += hlen;
6730
6731	/* finally sort out TCP */
6732	if (nexthdr == IPPROTO_TCP) {
6733		if ((hdr.network - data) > (max_len - sizeof(struct tcphdr)))
6734			return max_len;
6735
6736		/* access doff as a u8 to avoid unaligned access on ia64 */
6737		hlen = (hdr.network[12] & 0xF0) >> 2;
6738
6739		/* verify hlen meets minimum size requirements */
6740		if (hlen < sizeof(struct tcphdr))
6741			return hdr.network - data;
6742
6743		hdr.network += hlen;
6744	} else if (nexthdr == IPPROTO_UDP) {
6745		if ((hdr.network - data) > (max_len - sizeof(struct udphdr)))
6746			return max_len;
6747
6748		hdr.network += sizeof(struct udphdr);
6749	}
6750
6751	/* If everything has gone correctly hdr.network should be the
6752	 * data section of the packet and will be the end of the header.
6753	 * If not then it probably represents the end of the last recognized
6754	 * header.
6755	 */
6756	if ((hdr.network - data) < max_len)
6757		return hdr.network - data;
6758	else
6759		return max_len;
6760}
6761
6762/**
6763 *  igb_pull_tail - igb specific version of skb_pull_tail
6764 *  @rx_ring: rx descriptor ring packet is being transacted on
6765 *  @rx_desc: pointer to the EOP Rx descriptor
6766 *  @skb: pointer to current skb being adjusted
6767 *
6768 *  This function is an igb specific version of __pskb_pull_tail.  The
6769 *  main difference between this version and the original function is that
6770 *  this function can make several assumptions about the state of things
6771 *  that allow for significant optimizations versus the standard function.
6772 *  As a result we can do things like drop a frag and maintain an accurate
6773 *  truesize for the skb.
6774 */
6775static void igb_pull_tail(struct igb_ring *rx_ring,
6776			  union e1000_adv_rx_desc *rx_desc,
6777			  struct sk_buff *skb)
6778{
6779	struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0];
6780	unsigned char *va;
6781	unsigned int pull_len;
6782
6783	/* it is valid to use page_address instead of kmap since we are
6784	 * working with pages allocated out of the lomem pool per
6785	 * alloc_page(GFP_ATOMIC)
6786	 */
6787	va = skb_frag_address(frag);
6788
6789	if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) {
6790		/* retrieve timestamp from buffer */
6791		igb_ptp_rx_pktstamp(rx_ring->q_vector, va, skb);
6792
6793		/* update pointers to remove timestamp header */
6794		skb_frag_size_sub(frag, IGB_TS_HDR_LEN);
6795		frag->page_offset += IGB_TS_HDR_LEN;
6796		skb->data_len -= IGB_TS_HDR_LEN;
6797		skb->len -= IGB_TS_HDR_LEN;
6798
6799		/* move va to start of packet data */
6800		va += IGB_TS_HDR_LEN;
6801	}
6802
6803	/* we need the header to contain the greater of either ETH_HLEN or
6804	 * 60 bytes if the skb->len is less than 60 for skb_pad.
6805	 */
6806	pull_len = igb_get_headlen(va, IGB_RX_HDR_LEN);
6807
6808	/* align pull length to size of long to optimize memcpy performance */
6809	skb_copy_to_linear_data(skb, va, ALIGN(pull_len, sizeof(long)));
6810
6811	/* update all of the pointers */
6812	skb_frag_size_sub(frag, pull_len);
6813	frag->page_offset += pull_len;
6814	skb->data_len -= pull_len;
6815	skb->tail += pull_len;
6816}
6817
6818/**
6819 *  igb_cleanup_headers - Correct corrupted or empty headers
6820 *  @rx_ring: rx descriptor ring packet is being transacted on
6821 *  @rx_desc: pointer to the EOP Rx descriptor
6822 *  @skb: pointer to current skb being fixed
6823 *
6824 *  Address the case where we are pulling data in on pages only
6825 *  and as such no data is present in the skb header.
6826 *
6827 *  In addition if skb is not at least 60 bytes we need to pad it so that
6828 *  it is large enough to qualify as a valid Ethernet frame.
6829 *
6830 *  Returns true if an error was encountered and skb was freed.
6831 **/
6832static bool igb_cleanup_headers(struct igb_ring *rx_ring,
6833				union e1000_adv_rx_desc *rx_desc,
6834				struct sk_buff *skb)
6835{
6836	if (unlikely((igb_test_staterr(rx_desc,
6837				       E1000_RXDEXT_ERR_FRAME_ERR_MASK)))) {
6838		struct net_device *netdev = rx_ring->netdev;
6839		if (!(netdev->features & NETIF_F_RXALL)) {
6840			dev_kfree_skb_any(skb);
6841			return true;
6842		}
6843	}
6844
6845	/* place header in linear portion of buffer */
6846	if (skb_is_nonlinear(skb))
6847		igb_pull_tail(rx_ring, rx_desc, skb);
6848
6849	/* if skb_pad returns an error the skb was freed */
6850	if (unlikely(skb->len < 60)) {
6851		int pad_len = 60 - skb->len;
6852
6853		if (skb_pad(skb, pad_len))
6854			return true;
6855		__skb_put(skb, pad_len);
6856	}
6857
6858	return false;
6859}
6860
6861/**
6862 *  igb_process_skb_fields - Populate skb header fields from Rx descriptor
6863 *  @rx_ring: rx descriptor ring packet is being transacted on
6864 *  @rx_desc: pointer to the EOP Rx descriptor
6865 *  @skb: pointer to current skb being populated
6866 *
6867 *  This function checks the ring, descriptor, and packet information in
6868 *  order to populate the hash, checksum, VLAN, timestamp, protocol, and
6869 *  other fields within the skb.
6870 **/
6871static void igb_process_skb_fields(struct igb_ring *rx_ring,
6872				   union e1000_adv_rx_desc *rx_desc,
6873				   struct sk_buff *skb)
6874{
6875	struct net_device *dev = rx_ring->netdev;
6876
6877	igb_rx_hash(rx_ring, rx_desc, skb);
6878
6879	igb_rx_checksum(rx_ring, rx_desc, skb);
6880
6881	igb_ptp_rx_hwtstamp(rx_ring, rx_desc, skb);
6882
6883	if ((dev->features & NETIF_F_HW_VLAN_CTAG_RX) &&
6884	    igb_test_staterr(rx_desc, E1000_RXD_STAT_VP)) {
6885		u16 vid;
6886		if (igb_test_staterr(rx_desc, E1000_RXDEXT_STATERR_LB) &&
6887		    test_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &rx_ring->flags))
6888			vid = be16_to_cpu(rx_desc->wb.upper.vlan);
6889		else
6890			vid = le16_to_cpu(rx_desc->wb.upper.vlan);
6891
6892		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid);
6893	}
6894
6895	skb_record_rx_queue(skb, rx_ring->queue_index);
6896
6897	skb->protocol = eth_type_trans(skb, rx_ring->netdev);
6898}
6899
6900static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget)
6901{
6902	struct igb_ring *rx_ring = q_vector->rx.ring;
6903	struct sk_buff *skb = rx_ring->skb;
6904	unsigned int total_bytes = 0, total_packets = 0;
6905	u16 cleaned_count = igb_desc_unused(rx_ring);
6906
6907	do {
6908		union e1000_adv_rx_desc *rx_desc;
6909
6910		/* return some buffers to hardware, one at a time is too slow */
6911		if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
6912			igb_alloc_rx_buffers(rx_ring, cleaned_count);
6913			cleaned_count = 0;
6914		}
6915
6916		rx_desc = IGB_RX_DESC(rx_ring, rx_ring->next_to_clean);
6917
6918		if (!igb_test_staterr(rx_desc, E1000_RXD_STAT_DD))
6919			break;
6920
6921		/* This memory barrier is needed to keep us from reading
6922		 * any other fields out of the rx_desc until we know the
6923		 * RXD_STAT_DD bit is set
6924		 */
6925		rmb();
6926
6927		/* retrieve a buffer from the ring */
6928		skb = igb_fetch_rx_buffer(rx_ring, rx_desc, skb);
6929
6930		/* exit if we failed to retrieve a buffer */
6931		if (!skb)
6932			break;
6933
6934		cleaned_count++;
6935
6936		/* fetch next buffer in frame if non-eop */
6937		if (igb_is_non_eop(rx_ring, rx_desc))
6938			continue;
6939
6940		/* verify the packet layout is correct */
6941		if (igb_cleanup_headers(rx_ring, rx_desc, skb)) {
6942			skb = NULL;
6943			continue;
6944		}
6945
6946		/* probably a little skewed due to removing CRC */
6947		total_bytes += skb->len;
6948
6949		/* populate checksum, timestamp, VLAN, and protocol */
6950		igb_process_skb_fields(rx_ring, rx_desc, skb);
6951
6952		napi_gro_receive(&q_vector->napi, skb);
6953
6954		/* reset skb pointer */
6955		skb = NULL;
6956
6957		/* update budget accounting */
6958		total_packets++;
6959	} while (likely(total_packets < budget));
6960
6961	/* place incomplete frames back on ring for completion */
6962	rx_ring->skb = skb;
6963
6964	u64_stats_update_begin(&rx_ring->rx_syncp);
6965	rx_ring->rx_stats.packets += total_packets;
6966	rx_ring->rx_stats.bytes += total_bytes;
6967	u64_stats_update_end(&rx_ring->rx_syncp);
6968	q_vector->rx.total_packets += total_packets;
6969	q_vector->rx.total_bytes += total_bytes;
6970
6971	if (cleaned_count)
6972		igb_alloc_rx_buffers(rx_ring, cleaned_count);
6973
6974	return (total_packets < budget);
6975}
6976
6977static bool igb_alloc_mapped_page(struct igb_ring *rx_ring,
6978				  struct igb_rx_buffer *bi)
6979{
6980	struct page *page = bi->page;
6981	dma_addr_t dma;
6982
6983	/* since we are recycling buffers we should seldom need to alloc */
6984	if (likely(page))
6985		return true;
6986
6987	/* alloc new page for storage */
6988	page = __skb_alloc_page(GFP_ATOMIC | __GFP_COLD, NULL);
6989	if (unlikely(!page)) {
6990		rx_ring->rx_stats.alloc_failed++;
6991		return false;
6992	}
6993
6994	/* map page for use */
6995	dma = dma_map_page(rx_ring->dev, page, 0, PAGE_SIZE, DMA_FROM_DEVICE);
6996
6997	/* if mapping failed free memory back to system since
6998	 * there isn't much point in holding memory we can't use
6999	 */
7000	if (dma_mapping_error(rx_ring->dev, dma)) {
7001		__free_page(page);
7002
7003		rx_ring->rx_stats.alloc_failed++;
7004		return false;
7005	}
7006
7007	bi->dma = dma;
7008	bi->page = page;
7009	bi->page_offset = 0;
7010
7011	return true;
7012}
7013
7014/**
7015 *  igb_alloc_rx_buffers - Replace used receive buffers; packet split
7016 *  @adapter: address of board private structure
7017 **/
7018void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count)
7019{
7020	union e1000_adv_rx_desc *rx_desc;
7021	struct igb_rx_buffer *bi;
7022	u16 i = rx_ring->next_to_use;
7023
7024	/* nothing to do */
7025	if (!cleaned_count)
7026		return;
7027
7028	rx_desc = IGB_RX_DESC(rx_ring, i);
7029	bi = &rx_ring->rx_buffer_info[i];
7030	i -= rx_ring->count;
7031
7032	do {
7033		if (!igb_alloc_mapped_page(rx_ring, bi))
7034			break;
7035
7036		/* Refresh the desc even if buffer_addrs didn't change
7037		 * because each write-back erases this info.
7038		 */
7039		rx_desc->read.pkt_addr = cpu_to_le64(bi->dma + bi->page_offset);
7040
7041		rx_desc++;
7042		bi++;
7043		i++;
7044		if (unlikely(!i)) {
7045			rx_desc = IGB_RX_DESC(rx_ring, 0);
7046			bi = rx_ring->rx_buffer_info;
7047			i -= rx_ring->count;
7048		}
7049
7050		/* clear the hdr_addr for the next_to_use descriptor */
7051		rx_desc->read.hdr_addr = 0;
7052
7053		cleaned_count--;
7054	} while (cleaned_count);
7055
7056	i += rx_ring->count;
7057
7058	if (rx_ring->next_to_use != i) {
7059		/* record the next descriptor to use */
7060		rx_ring->next_to_use = i;
7061
7062		/* update next to alloc since we have filled the ring */
7063		rx_ring->next_to_alloc = i;
7064
7065		/* Force memory writes to complete before letting h/w
7066		 * know there are new descriptors to fetch.  (Only
7067		 * applicable for weak-ordered memory model archs,
7068		 * such as IA-64).
7069		 */
7070		wmb();
7071		writel(i, rx_ring->tail);
7072	}
7073}
7074
7075/**
7076 * igb_mii_ioctl -
7077 * @netdev:
7078 * @ifreq:
7079 * @cmd:
7080 **/
7081static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
7082{
7083	struct igb_adapter *adapter = netdev_priv(netdev);
7084	struct mii_ioctl_data *data = if_mii(ifr);
7085
7086	if (adapter->hw.phy.media_type != e1000_media_type_copper)
7087		return -EOPNOTSUPP;
7088
7089	switch (cmd) {
7090	case SIOCGMIIPHY:
7091		data->phy_id = adapter->hw.phy.addr;
7092		break;
7093	case SIOCGMIIREG:
7094		if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
7095		                     &data->val_out))
7096			return -EIO;
7097		break;
7098	case SIOCSMIIREG:
7099	default:
7100		return -EOPNOTSUPP;
7101	}
7102	return 0;
7103}
7104
7105/**
7106 * igb_ioctl -
7107 * @netdev:
7108 * @ifreq:
7109 * @cmd:
7110 **/
7111static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
7112{
7113	switch (cmd) {
7114	case SIOCGMIIPHY:
7115	case SIOCGMIIREG:
7116	case SIOCSMIIREG:
7117		return igb_mii_ioctl(netdev, ifr, cmd);
7118	case SIOCSHWTSTAMP:
7119		return igb_ptp_hwtstamp_ioctl(netdev, ifr, cmd);
7120	default:
7121		return -EOPNOTSUPP;
7122	}
7123}
7124
7125s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
7126{
7127	struct igb_adapter *adapter = hw->back;
7128
7129	if (pcie_capability_read_word(adapter->pdev, reg, value))
7130		return -E1000_ERR_CONFIG;
7131
7132	return 0;
7133}
7134
7135s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
7136{
7137	struct igb_adapter *adapter = hw->back;
7138
7139	if (pcie_capability_write_word(adapter->pdev, reg, *value))
7140		return -E1000_ERR_CONFIG;
7141
7142	return 0;
7143}
7144
7145static void igb_vlan_mode(struct net_device *netdev, netdev_features_t features)
7146{
7147	struct igb_adapter *adapter = netdev_priv(netdev);
7148	struct e1000_hw *hw = &adapter->hw;
7149	u32 ctrl, rctl;
7150	bool enable = !!(features & NETIF_F_HW_VLAN_CTAG_RX);
7151
7152	if (enable) {
7153		/* enable VLAN tag insert/strip */
7154		ctrl = rd32(E1000_CTRL);
7155		ctrl |= E1000_CTRL_VME;
7156		wr32(E1000_CTRL, ctrl);
7157
7158		/* Disable CFI check */
7159		rctl = rd32(E1000_RCTL);
7160		rctl &= ~E1000_RCTL_CFIEN;
7161		wr32(E1000_RCTL, rctl);
7162	} else {
7163		/* disable VLAN tag insert/strip */
7164		ctrl = rd32(E1000_CTRL);
7165		ctrl &= ~E1000_CTRL_VME;
7166		wr32(E1000_CTRL, ctrl);
7167	}
7168
7169	igb_rlpml_set(adapter);
7170}
7171
7172static int igb_vlan_rx_add_vid(struct net_device *netdev,
7173			       __be16 proto, u16 vid)
7174{
7175	struct igb_adapter *adapter = netdev_priv(netdev);
7176	struct e1000_hw *hw = &adapter->hw;
7177	int pf_id = adapter->vfs_allocated_count;
7178
7179	/* attempt to add filter to vlvf array */
7180	igb_vlvf_set(adapter, vid, true, pf_id);
7181
7182	/* add the filter since PF can receive vlans w/o entry in vlvf */
7183	igb_vfta_set(hw, vid, true);
7184
7185	set_bit(vid, adapter->active_vlans);
7186
7187	return 0;
7188}
7189
7190static int igb_vlan_rx_kill_vid(struct net_device *netdev,
7191				__be16 proto, u16 vid)
7192{
7193	struct igb_adapter *adapter = netdev_priv(netdev);
7194	struct e1000_hw *hw = &adapter->hw;
7195	int pf_id = adapter->vfs_allocated_count;
7196	s32 err;
7197
7198	/* remove vlan from VLVF table array */
7199	err = igb_vlvf_set(adapter, vid, false, pf_id);
7200
7201	/* if vid was not present in VLVF just remove it from table */
7202	if (err)
7203		igb_vfta_set(hw, vid, false);
7204
7205	clear_bit(vid, adapter->active_vlans);
7206
7207	return 0;
7208}
7209
7210static void igb_restore_vlan(struct igb_adapter *adapter)
7211{
7212	u16 vid;
7213
7214	igb_vlan_mode(adapter->netdev, adapter->netdev->features);
7215
7216	for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID)
7217		igb_vlan_rx_add_vid(adapter->netdev, htons(ETH_P_8021Q), vid);
7218}
7219
7220int igb_set_spd_dplx(struct igb_adapter *adapter, u32 spd, u8 dplx)
7221{
7222	struct pci_dev *pdev = adapter->pdev;
7223	struct e1000_mac_info *mac = &adapter->hw.mac;
7224
7225	mac->autoneg = 0;
7226
7227	/* Make sure dplx is at most 1 bit and lsb of speed is not set
7228	 * for the switch() below to work
7229	 */
7230	if ((spd & 1) || (dplx & ~1))
7231		goto err_inval;
7232
7233	/* Fiber NIC's only allow 1000 gbps Full duplex
7234	 * and 100Mbps Full duplex for 100baseFx sfp
7235	 */
7236	if (adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
7237		switch (spd + dplx) {
7238		case SPEED_10 + DUPLEX_HALF:
7239		case SPEED_10 + DUPLEX_FULL:
7240		case SPEED_100 + DUPLEX_HALF:
7241			goto err_inval;
7242		default:
7243			break;
7244		}
7245	}
7246
7247	switch (spd + dplx) {
7248	case SPEED_10 + DUPLEX_HALF:
7249		mac->forced_speed_duplex = ADVERTISE_10_HALF;
7250		break;
7251	case SPEED_10 + DUPLEX_FULL:
7252		mac->forced_speed_duplex = ADVERTISE_10_FULL;
7253		break;
7254	case SPEED_100 + DUPLEX_HALF:
7255		mac->forced_speed_duplex = ADVERTISE_100_HALF;
7256		break;
7257	case SPEED_100 + DUPLEX_FULL:
7258		mac->forced_speed_duplex = ADVERTISE_100_FULL;
7259		break;
7260	case SPEED_1000 + DUPLEX_FULL:
7261		mac->autoneg = 1;
7262		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
7263		break;
7264	case SPEED_1000 + DUPLEX_HALF: /* not supported */
7265	default:
7266		goto err_inval;
7267	}
7268
7269	/* clear MDI, MDI(-X) override is only allowed when autoneg enabled */
7270	adapter->hw.phy.mdix = AUTO_ALL_MODES;
7271
7272	return 0;
7273
7274err_inval:
7275	dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
7276	return -EINVAL;
7277}
7278
7279static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake,
7280			  bool runtime)
7281{
7282	struct net_device *netdev = pci_get_drvdata(pdev);
7283	struct igb_adapter *adapter = netdev_priv(netdev);
7284	struct e1000_hw *hw = &adapter->hw;
7285	u32 ctrl, rctl, status;
7286	u32 wufc = runtime ? E1000_WUFC_LNKC : adapter->wol;
7287#ifdef CONFIG_PM
7288	int retval = 0;
7289#endif
7290
7291	netif_device_detach(netdev);
7292
7293	if (netif_running(netdev))
7294		__igb_close(netdev, true);
7295
7296	igb_clear_interrupt_scheme(adapter);
7297
7298#ifdef CONFIG_PM
7299	retval = pci_save_state(pdev);
7300	if (retval)
7301		return retval;
7302#endif
7303
7304	status = rd32(E1000_STATUS);
7305	if (status & E1000_STATUS_LU)
7306		wufc &= ~E1000_WUFC_LNKC;
7307
7308	if (wufc) {
7309		igb_setup_rctl(adapter);
7310		igb_set_rx_mode(netdev);
7311
7312		/* turn on all-multi mode if wake on multicast is enabled */
7313		if (wufc & E1000_WUFC_MC) {
7314			rctl = rd32(E1000_RCTL);
7315			rctl |= E1000_RCTL_MPE;
7316			wr32(E1000_RCTL, rctl);
7317		}
7318
7319		ctrl = rd32(E1000_CTRL);
7320		/* advertise wake from D3Cold */
7321		#define E1000_CTRL_ADVD3WUC 0x00100000
7322		/* phy power management enable */
7323		#define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
7324		ctrl |= E1000_CTRL_ADVD3WUC;
7325		wr32(E1000_CTRL, ctrl);
7326
7327		/* Allow time for pending master requests to run */
7328		igb_disable_pcie_master(hw);
7329
7330		wr32(E1000_WUC, E1000_WUC_PME_EN);
7331		wr32(E1000_WUFC, wufc);
7332	} else {
7333		wr32(E1000_WUC, 0);
7334		wr32(E1000_WUFC, 0);
7335	}
7336
7337	*enable_wake = wufc || adapter->en_mng_pt;
7338	if (!*enable_wake)
7339		igb_power_down_link(adapter);
7340	else
7341		igb_power_up_link(adapter);
7342
7343	/* Release control of h/w to f/w.  If f/w is AMT enabled, this
7344	 * would have already happened in close and is redundant.
7345	 */
7346	igb_release_hw_control(adapter);
7347
7348	pci_disable_device(pdev);
7349
7350	return 0;
7351}
7352
7353#ifdef CONFIG_PM
7354#ifdef CONFIG_PM_SLEEP
7355static int igb_suspend(struct device *dev)
7356{
7357	int retval;
7358	bool wake;
7359	struct pci_dev *pdev = to_pci_dev(dev);
7360
7361	retval = __igb_shutdown(pdev, &wake, 0);
7362	if (retval)
7363		return retval;
7364
7365	if (wake) {
7366		pci_prepare_to_sleep(pdev);
7367	} else {
7368		pci_wake_from_d3(pdev, false);
7369		pci_set_power_state(pdev, PCI_D3hot);
7370	}
7371
7372	return 0;
7373}
7374#endif /* CONFIG_PM_SLEEP */
7375
7376static int igb_resume(struct device *dev)
7377{
7378	struct pci_dev *pdev = to_pci_dev(dev);
7379	struct net_device *netdev = pci_get_drvdata(pdev);
7380	struct igb_adapter *adapter = netdev_priv(netdev);
7381	struct e1000_hw *hw = &adapter->hw;
7382	u32 err;
7383
7384	pci_set_power_state(pdev, PCI_D0);
7385	pci_restore_state(pdev);
7386	pci_save_state(pdev);
7387
7388	err = pci_enable_device_mem(pdev);
7389	if (err) {
7390		dev_err(&pdev->dev,
7391			"igb: Cannot enable PCI device from suspend\n");
7392		return err;
7393	}
7394	pci_set_master(pdev);
7395
7396	pci_enable_wake(pdev, PCI_D3hot, 0);
7397	pci_enable_wake(pdev, PCI_D3cold, 0);
7398
7399	if (igb_init_interrupt_scheme(adapter, true)) {
7400		dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
7401		return -ENOMEM;
7402	}
7403
7404	igb_reset(adapter);
7405
7406	/* let the f/w know that the h/w is now under the control of the
7407	 * driver.
7408	 */
7409	igb_get_hw_control(adapter);
7410
7411	wr32(E1000_WUS, ~0);
7412
7413	if (netdev->flags & IFF_UP) {
7414		rtnl_lock();
7415		err = __igb_open(netdev, true);
7416		rtnl_unlock();
7417		if (err)
7418			return err;
7419	}
7420
7421	netif_device_attach(netdev);
7422	return 0;
7423}
7424
7425#ifdef CONFIG_PM_RUNTIME
7426static int igb_runtime_idle(struct device *dev)
7427{
7428	struct pci_dev *pdev = to_pci_dev(dev);
7429	struct net_device *netdev = pci_get_drvdata(pdev);
7430	struct igb_adapter *adapter = netdev_priv(netdev);
7431
7432	if (!igb_has_link(adapter))
7433		pm_schedule_suspend(dev, MSEC_PER_SEC * 5);
7434
7435	return -EBUSY;
7436}
7437
7438static int igb_runtime_suspend(struct device *dev)
7439{
7440	struct pci_dev *pdev = to_pci_dev(dev);
7441	int retval;
7442	bool wake;
7443
7444	retval = __igb_shutdown(pdev, &wake, 1);
7445	if (retval)
7446		return retval;
7447
7448	if (wake) {
7449		pci_prepare_to_sleep(pdev);
7450	} else {
7451		pci_wake_from_d3(pdev, false);
7452		pci_set_power_state(pdev, PCI_D3hot);
7453	}
7454
7455	return 0;
7456}
7457
7458static int igb_runtime_resume(struct device *dev)
7459{
7460	return igb_resume(dev);
7461}
7462#endif /* CONFIG_PM_RUNTIME */
7463#endif
7464
7465static void igb_shutdown(struct pci_dev *pdev)
7466{
7467	bool wake;
7468
7469	__igb_shutdown(pdev, &wake, 0);
7470
7471	if (system_state == SYSTEM_POWER_OFF) {
7472		pci_wake_from_d3(pdev, wake);
7473		pci_set_power_state(pdev, PCI_D3hot);
7474	}
7475}
7476
7477#ifdef CONFIG_PCI_IOV
7478static int igb_sriov_reinit(struct pci_dev *dev)
7479{
7480	struct net_device *netdev = pci_get_drvdata(dev);
7481	struct igb_adapter *adapter = netdev_priv(netdev);
7482	struct pci_dev *pdev = adapter->pdev;
7483
7484	rtnl_lock();
7485
7486	if (netif_running(netdev))
7487		igb_close(netdev);
7488
7489	igb_clear_interrupt_scheme(adapter);
7490
7491	igb_init_queue_configuration(adapter);
7492
7493	if (igb_init_interrupt_scheme(adapter, true)) {
7494		dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
7495		return -ENOMEM;
7496	}
7497
7498	if (netif_running(netdev))
7499		igb_open(netdev);
7500
7501	rtnl_unlock();
7502
7503	return 0;
7504}
7505
7506static int igb_pci_disable_sriov(struct pci_dev *dev)
7507{
7508	int err = igb_disable_sriov(dev);
7509
7510	if (!err)
7511		err = igb_sriov_reinit(dev);
7512
7513	return err;
7514}
7515
7516static int igb_pci_enable_sriov(struct pci_dev *dev, int num_vfs)
7517{
7518	int err = igb_enable_sriov(dev, num_vfs);
7519
7520	if (err)
7521		goto out;
7522
7523	err = igb_sriov_reinit(dev);
7524	if (!err)
7525		return num_vfs;
7526
7527out:
7528	return err;
7529}
7530
7531#endif
7532static int igb_pci_sriov_configure(struct pci_dev *dev, int num_vfs)
7533{
7534#ifdef CONFIG_PCI_IOV
7535	if (num_vfs == 0)
7536		return igb_pci_disable_sriov(dev);
7537	else
7538		return igb_pci_enable_sriov(dev, num_vfs);
7539#endif
7540	return 0;
7541}
7542
7543#ifdef CONFIG_NET_POLL_CONTROLLER
7544/* Polling 'interrupt' - used by things like netconsole to send skbs
7545 * without having to re-enable interrupts. It's not called while
7546 * the interrupt routine is executing.
7547 */
7548static void igb_netpoll(struct net_device *netdev)
7549{
7550	struct igb_adapter *adapter = netdev_priv(netdev);
7551	struct e1000_hw *hw = &adapter->hw;
7552	struct igb_q_vector *q_vector;
7553	int i;
7554
7555	for (i = 0; i < adapter->num_q_vectors; i++) {
7556		q_vector = adapter->q_vector[i];
7557		if (adapter->flags & IGB_FLAG_HAS_MSIX)
7558			wr32(E1000_EIMC, q_vector->eims_value);
7559		else
7560			igb_irq_disable(adapter);
7561		napi_schedule(&q_vector->napi);
7562	}
7563}
7564#endif /* CONFIG_NET_POLL_CONTROLLER */
7565
7566/**
7567 *  igb_io_error_detected - called when PCI error is detected
7568 *  @pdev: Pointer to PCI device
7569 *  @state: The current pci connection state
7570 *
7571 *  This function is called after a PCI bus error affecting
7572 *  this device has been detected.
7573 **/
7574static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
7575					      pci_channel_state_t state)
7576{
7577	struct net_device *netdev = pci_get_drvdata(pdev);
7578	struct igb_adapter *adapter = netdev_priv(netdev);
7579
7580	netif_device_detach(netdev);
7581
7582	if (state == pci_channel_io_perm_failure)
7583		return PCI_ERS_RESULT_DISCONNECT;
7584
7585	if (netif_running(netdev))
7586		igb_down(adapter);
7587	pci_disable_device(pdev);
7588
7589	/* Request a slot slot reset. */
7590	return PCI_ERS_RESULT_NEED_RESET;
7591}
7592
7593/**
7594 *  igb_io_slot_reset - called after the pci bus has been reset.
7595 *  @pdev: Pointer to PCI device
7596 *
7597 *  Restart the card from scratch, as if from a cold-boot. Implementation
7598 *  resembles the first-half of the igb_resume routine.
7599 **/
7600static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
7601{
7602	struct net_device *netdev = pci_get_drvdata(pdev);
7603	struct igb_adapter *adapter = netdev_priv(netdev);
7604	struct e1000_hw *hw = &adapter->hw;
7605	pci_ers_result_t result;
7606	int err;
7607
7608	if (pci_enable_device_mem(pdev)) {
7609		dev_err(&pdev->dev,
7610			"Cannot re-enable PCI device after reset.\n");
7611		result = PCI_ERS_RESULT_DISCONNECT;
7612	} else {
7613		pci_set_master(pdev);
7614		pci_restore_state(pdev);
7615		pci_save_state(pdev);
7616
7617		pci_enable_wake(pdev, PCI_D3hot, 0);
7618		pci_enable_wake(pdev, PCI_D3cold, 0);
7619
7620		igb_reset(adapter);
7621		wr32(E1000_WUS, ~0);
7622		result = PCI_ERS_RESULT_RECOVERED;
7623	}
7624
7625	err = pci_cleanup_aer_uncorrect_error_status(pdev);
7626	if (err) {
7627		dev_err(&pdev->dev,
7628			"pci_cleanup_aer_uncorrect_error_status failed 0x%0x\n",
7629			err);
7630		/* non-fatal, continue */
7631	}
7632
7633	return result;
7634}
7635
7636/**
7637 *  igb_io_resume - called when traffic can start flowing again.
7638 *  @pdev: Pointer to PCI device
7639 *
7640 *  This callback is called when the error recovery driver tells us that
7641 *  its OK to resume normal operation. Implementation resembles the
7642 *  second-half of the igb_resume routine.
7643 */
7644static void igb_io_resume(struct pci_dev *pdev)
7645{
7646	struct net_device *netdev = pci_get_drvdata(pdev);
7647	struct igb_adapter *adapter = netdev_priv(netdev);
7648
7649	if (netif_running(netdev)) {
7650		if (igb_up(adapter)) {
7651			dev_err(&pdev->dev, "igb_up failed after reset\n");
7652			return;
7653		}
7654	}
7655
7656	netif_device_attach(netdev);
7657
7658	/* let the f/w know that the h/w is now under the control of the
7659	 * driver.
7660	 */
7661	igb_get_hw_control(adapter);
7662}
7663
7664static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
7665			     u8 qsel)
7666{
7667	u32 rar_low, rar_high;
7668	struct e1000_hw *hw = &adapter->hw;
7669
7670	/* HW expects these in little endian so we reverse the byte order
7671	 * from network order (big endian) to little endian
7672	 */
7673	rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
7674		   ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
7675	rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
7676
7677	/* Indicate to hardware the Address is Valid. */
7678	rar_high |= E1000_RAH_AV;
7679
7680	if (hw->mac.type == e1000_82575)
7681		rar_high |= E1000_RAH_POOL_1 * qsel;
7682	else
7683		rar_high |= E1000_RAH_POOL_1 << qsel;
7684
7685	wr32(E1000_RAL(index), rar_low);
7686	wrfl();
7687	wr32(E1000_RAH(index), rar_high);
7688	wrfl();
7689}
7690
7691static int igb_set_vf_mac(struct igb_adapter *adapter,
7692			  int vf, unsigned char *mac_addr)
7693{
7694	struct e1000_hw *hw = &adapter->hw;
7695	/* VF MAC addresses start at end of receive addresses and moves
7696	 * towards the first, as a result a collision should not be possible
7697	 */
7698	int rar_entry = hw->mac.rar_entry_count - (vf + 1);
7699
7700	memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
7701
7702	igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
7703
7704	return 0;
7705}
7706
7707static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
7708{
7709	struct igb_adapter *adapter = netdev_priv(netdev);
7710	if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
7711		return -EINVAL;
7712	adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
7713	dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
7714	dev_info(&adapter->pdev->dev,
7715		 "Reload the VF driver to make this change effective.");
7716	if (test_bit(__IGB_DOWN, &adapter->state)) {
7717		dev_warn(&adapter->pdev->dev,
7718			 "The VF MAC address has been set, but the PF device is not up.\n");
7719		dev_warn(&adapter->pdev->dev,
7720			 "Bring the PF device up before attempting to use the VF device.\n");
7721	}
7722	return igb_set_vf_mac(adapter, vf, mac);
7723}
7724
7725static int igb_link_mbps(int internal_link_speed)
7726{
7727	switch (internal_link_speed) {
7728	case SPEED_100:
7729		return 100;
7730	case SPEED_1000:
7731		return 1000;
7732	default:
7733		return 0;
7734	}
7735}
7736
7737static void igb_set_vf_rate_limit(struct e1000_hw *hw, int vf, int tx_rate,
7738				  int link_speed)
7739{
7740	int rf_dec, rf_int;
7741	u32 bcnrc_val;
7742
7743	if (tx_rate != 0) {
7744		/* Calculate the rate factor values to set */
7745		rf_int = link_speed / tx_rate;
7746		rf_dec = (link_speed - (rf_int * tx_rate));
7747		rf_dec = (rf_dec * (1 << E1000_RTTBCNRC_RF_INT_SHIFT)) /
7748			 tx_rate;
7749
7750		bcnrc_val = E1000_RTTBCNRC_RS_ENA;
7751		bcnrc_val |= ((rf_int << E1000_RTTBCNRC_RF_INT_SHIFT) &
7752			      E1000_RTTBCNRC_RF_INT_MASK);
7753		bcnrc_val |= (rf_dec & E1000_RTTBCNRC_RF_DEC_MASK);
7754	} else {
7755		bcnrc_val = 0;
7756	}
7757
7758	wr32(E1000_RTTDQSEL, vf); /* vf X uses queue X */
7759	/* Set global transmit compensation time to the MMW_SIZE in RTTBCNRM
7760	 * register. MMW_SIZE=0x014 if 9728-byte jumbo is supported.
7761	 */
7762	wr32(E1000_RTTBCNRM, 0x14);
7763	wr32(E1000_RTTBCNRC, bcnrc_val);
7764}
7765
7766static void igb_check_vf_rate_limit(struct igb_adapter *adapter)
7767{
7768	int actual_link_speed, i;
7769	bool reset_rate = false;
7770
7771	/* VF TX rate limit was not set or not supported */
7772	if ((adapter->vf_rate_link_speed == 0) ||
7773	    (adapter->hw.mac.type != e1000_82576))
7774		return;
7775
7776	actual_link_speed = igb_link_mbps(adapter->link_speed);
7777	if (actual_link_speed != adapter->vf_rate_link_speed) {
7778		reset_rate = true;
7779		adapter->vf_rate_link_speed = 0;
7780		dev_info(&adapter->pdev->dev,
7781			 "Link speed has been changed. VF Transmit rate is disabled\n");
7782	}
7783
7784	for (i = 0; i < adapter->vfs_allocated_count; i++) {
7785		if (reset_rate)
7786			adapter->vf_data[i].tx_rate = 0;
7787
7788		igb_set_vf_rate_limit(&adapter->hw, i,
7789				      adapter->vf_data[i].tx_rate,
7790				      actual_link_speed);
7791	}
7792}
7793
7794static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
7795{
7796	struct igb_adapter *adapter = netdev_priv(netdev);
7797	struct e1000_hw *hw = &adapter->hw;
7798	int actual_link_speed;
7799
7800	if (hw->mac.type != e1000_82576)
7801		return -EOPNOTSUPP;
7802
7803	actual_link_speed = igb_link_mbps(adapter->link_speed);
7804	if ((vf >= adapter->vfs_allocated_count) ||
7805	    (!(rd32(E1000_STATUS) & E1000_STATUS_LU)) ||
7806	    (tx_rate < 0) || (tx_rate > actual_link_speed))
7807		return -EINVAL;
7808
7809	adapter->vf_rate_link_speed = actual_link_speed;
7810	adapter->vf_data[vf].tx_rate = (u16)tx_rate;
7811	igb_set_vf_rate_limit(hw, vf, tx_rate, actual_link_speed);
7812
7813	return 0;
7814}
7815
7816static int igb_ndo_set_vf_spoofchk(struct net_device *netdev, int vf,
7817				   bool setting)
7818{
7819	struct igb_adapter *adapter = netdev_priv(netdev);
7820	struct e1000_hw *hw = &adapter->hw;
7821	u32 reg_val, reg_offset;
7822
7823	if (!adapter->vfs_allocated_count)
7824		return -EOPNOTSUPP;
7825
7826	if (vf >= adapter->vfs_allocated_count)
7827		return -EINVAL;
7828
7829	reg_offset = (hw->mac.type == e1000_82576) ? E1000_DTXSWC : E1000_TXSWC;
7830	reg_val = rd32(reg_offset);
7831	if (setting)
7832		reg_val |= ((1 << vf) |
7833			    (1 << (vf + E1000_DTXSWC_VLAN_SPOOF_SHIFT)));
7834	else
7835		reg_val &= ~((1 << vf) |
7836			     (1 << (vf + E1000_DTXSWC_VLAN_SPOOF_SHIFT)));
7837	wr32(reg_offset, reg_val);
7838
7839	adapter->vf_data[vf].spoofchk_enabled = setting;
7840	return E1000_SUCCESS;
7841}
7842
7843static int igb_ndo_get_vf_config(struct net_device *netdev,
7844				 int vf, struct ifla_vf_info *ivi)
7845{
7846	struct igb_adapter *adapter = netdev_priv(netdev);
7847	if (vf >= adapter->vfs_allocated_count)
7848		return -EINVAL;
7849	ivi->vf = vf;
7850	memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
7851	ivi->tx_rate = adapter->vf_data[vf].tx_rate;
7852	ivi->vlan = adapter->vf_data[vf].pf_vlan;
7853	ivi->qos = adapter->vf_data[vf].pf_qos;
7854	ivi->spoofchk = adapter->vf_data[vf].spoofchk_enabled;
7855	return 0;
7856}
7857
7858static void igb_vmm_control(struct igb_adapter *adapter)
7859{
7860	struct e1000_hw *hw = &adapter->hw;
7861	u32 reg;
7862
7863	switch (hw->mac.type) {
7864	case e1000_82575:
7865	case e1000_i210:
7866	case e1000_i211:
7867	case e1000_i354:
7868	default:
7869		/* replication is not supported for 82575 */
7870		return;
7871	case e1000_82576:
7872		/* notify HW that the MAC is adding vlan tags */
7873		reg = rd32(E1000_DTXCTL);
7874		reg |= E1000_DTXCTL_VLAN_ADDED;
7875		wr32(E1000_DTXCTL, reg);
7876	case e1000_82580:
7877		/* enable replication vlan tag stripping */
7878		reg = rd32(E1000_RPLOLR);
7879		reg |= E1000_RPLOLR_STRVLAN;
7880		wr32(E1000_RPLOLR, reg);
7881	case e1000_i350:
7882		/* none of the above registers are supported by i350 */
7883		break;
7884	}
7885
7886	if (adapter->vfs_allocated_count) {
7887		igb_vmdq_set_loopback_pf(hw, true);
7888		igb_vmdq_set_replication_pf(hw, true);
7889		igb_vmdq_set_anti_spoofing_pf(hw, true,
7890					      adapter->vfs_allocated_count);
7891	} else {
7892		igb_vmdq_set_loopback_pf(hw, false);
7893		igb_vmdq_set_replication_pf(hw, false);
7894	}
7895}
7896
7897static void igb_init_dmac(struct igb_adapter *adapter, u32 pba)
7898{
7899	struct e1000_hw *hw = &adapter->hw;
7900	u32 dmac_thr;
7901	u16 hwm;
7902
7903	if (hw->mac.type > e1000_82580) {
7904		if (adapter->flags & IGB_FLAG_DMAC) {
7905			u32 reg;
7906
7907			/* force threshold to 0. */
7908			wr32(E1000_DMCTXTH, 0);
7909
7910			/* DMA Coalescing high water mark needs to be greater
7911			 * than the Rx threshold. Set hwm to PBA - max frame
7912			 * size in 16B units, capping it at PBA - 6KB.
7913			 */
7914			hwm = 64 * pba - adapter->max_frame_size / 16;
7915			if (hwm < 64 * (pba - 6))
7916				hwm = 64 * (pba - 6);
7917			reg = rd32(E1000_FCRTC);
7918			reg &= ~E1000_FCRTC_RTH_COAL_MASK;
7919			reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT)
7920				& E1000_FCRTC_RTH_COAL_MASK);
7921			wr32(E1000_FCRTC, reg);
7922
7923			/* Set the DMA Coalescing Rx threshold to PBA - 2 * max
7924			 * frame size, capping it at PBA - 10KB.
7925			 */
7926			dmac_thr = pba - adapter->max_frame_size / 512;
7927			if (dmac_thr < pba - 10)
7928				dmac_thr = pba - 10;
7929			reg = rd32(E1000_DMACR);
7930			reg &= ~E1000_DMACR_DMACTHR_MASK;
7931			reg |= ((dmac_thr << E1000_DMACR_DMACTHR_SHIFT)
7932				& E1000_DMACR_DMACTHR_MASK);
7933
7934			/* transition to L0x or L1 if available..*/
7935			reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
7936
7937			/* watchdog timer= +-1000 usec in 32usec intervals */
7938			reg |= (1000 >> 5);
7939
7940			/* Disable BMC-to-OS Watchdog Enable */
7941			if (hw->mac.type != e1000_i354)
7942				reg &= ~E1000_DMACR_DC_BMC2OSW_EN;
7943
7944			wr32(E1000_DMACR, reg);
7945
7946			/* no lower threshold to disable
7947			 * coalescing(smart fifb)-UTRESH=0
7948			 */
7949			wr32(E1000_DMCRTRH, 0);
7950
7951			reg = (IGB_DMCTLX_DCFLUSH_DIS | 0x4);
7952
7953			wr32(E1000_DMCTLX, reg);
7954
7955			/* free space in tx packet buffer to wake from
7956			 * DMA coal
7957			 */
7958			wr32(E1000_DMCTXTH, (IGB_MIN_TXPBSIZE -
7959			     (IGB_TX_BUF_4096 + adapter->max_frame_size)) >> 6);
7960
7961			/* make low power state decision controlled
7962			 * by DMA coal
7963			 */
7964			reg = rd32(E1000_PCIEMISC);
7965			reg &= ~E1000_PCIEMISC_LX_DECISION;
7966			wr32(E1000_PCIEMISC, reg);
7967		} /* endif adapter->dmac is not disabled */
7968	} else if (hw->mac.type == e1000_82580) {
7969		u32 reg = rd32(E1000_PCIEMISC);
7970		wr32(E1000_PCIEMISC, reg & ~E1000_PCIEMISC_LX_DECISION);
7971		wr32(E1000_DMACR, 0);
7972	}
7973}
7974
7975/**
7976 *  igb_read_i2c_byte - Reads 8 bit word over I2C
7977 *  @hw: pointer to hardware structure
7978 *  @byte_offset: byte offset to read
7979 *  @dev_addr: device address
7980 *  @data: value read
7981 *
7982 *  Performs byte read operation over I2C interface at
7983 *  a specified device address.
7984 **/
7985s32 igb_read_i2c_byte(struct e1000_hw *hw, u8 byte_offset,
7986		      u8 dev_addr, u8 *data)
7987{
7988	struct igb_adapter *adapter = container_of(hw, struct igb_adapter, hw);
7989	struct i2c_client *this_client = adapter->i2c_client;
7990	s32 status;
7991	u16 swfw_mask = 0;
7992
7993	if (!this_client)
7994		return E1000_ERR_I2C;
7995
7996	swfw_mask = E1000_SWFW_PHY0_SM;
7997
7998	if (hw->mac.ops.acquire_swfw_sync(hw, swfw_mask)
7999	    != E1000_SUCCESS)
8000		return E1000_ERR_SWFW_SYNC;
8001
8002	status = i2c_smbus_read_byte_data(this_client, byte_offset);
8003	hw->mac.ops.release_swfw_sync(hw, swfw_mask);
8004
8005	if (status < 0)
8006		return E1000_ERR_I2C;
8007	else {
8008		*data = status;
8009		return E1000_SUCCESS;
8010	}
8011}
8012
8013/**
8014 *  igb_write_i2c_byte - Writes 8 bit word over I2C
8015 *  @hw: pointer to hardware structure
8016 *  @byte_offset: byte offset to write
8017 *  @dev_addr: device address
8018 *  @data: value to write
8019 *
8020 *  Performs byte write operation over I2C interface at
8021 *  a specified device address.
8022 **/
8023s32 igb_write_i2c_byte(struct e1000_hw *hw, u8 byte_offset,
8024		       u8 dev_addr, u8 data)
8025{
8026	struct igb_adapter *adapter = container_of(hw, struct igb_adapter, hw);
8027	struct i2c_client *this_client = adapter->i2c_client;
8028	s32 status;
8029	u16 swfw_mask = E1000_SWFW_PHY0_SM;
8030
8031	if (!this_client)
8032		return E1000_ERR_I2C;
8033
8034	if (hw->mac.ops.acquire_swfw_sync(hw, swfw_mask) != E1000_SUCCESS)
8035		return E1000_ERR_SWFW_SYNC;
8036	status = i2c_smbus_write_byte_data(this_client, byte_offset, data);
8037	hw->mac.ops.release_swfw_sync(hw, swfw_mask);
8038
8039	if (status)
8040		return E1000_ERR_I2C;
8041	else
8042		return E1000_SUCCESS;
8043
8044}
8045
8046int igb_reinit_queues(struct igb_adapter *adapter)
8047{
8048	struct net_device *netdev = adapter->netdev;
8049	struct pci_dev *pdev = adapter->pdev;
8050	int err = 0;
8051
8052	if (netif_running(netdev))
8053		igb_close(netdev);
8054
8055	igb_reset_interrupt_capability(adapter);
8056
8057	if (igb_init_interrupt_scheme(adapter, true)) {
8058		dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
8059		return -ENOMEM;
8060	}
8061
8062	if (netif_running(netdev))
8063		err = igb_open(netdev);
8064
8065	return err;
8066}
8067/* igb_main.c */
8068