igb_main.c revision 89eaefb61dc9170237d95b844dd357338fc7225d
1/*******************************************************************************
2
3  Intel(R) Gigabit Ethernet Linux driver
4  Copyright(c) 2007-2012 Intel Corporation.
5
6  This program is free software; you can redistribute it and/or modify it
7  under the terms and conditions of the GNU General Public License,
8  version 2, as published by the Free Software Foundation.
9
10  This program is distributed in the hope it will be useful, but WITHOUT
11  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13  more details.
14
15  You should have received a copy of the GNU General Public License along with
16  this program; if not, write to the Free Software Foundation, Inc.,
17  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18
19  The full GNU General Public License is included in this distribution in
20  the file called "COPYING".
21
22  Contact Information:
23  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
25
26*******************************************************************************/
27
28#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
29
30#include <linux/module.h>
31#include <linux/types.h>
32#include <linux/init.h>
33#include <linux/bitops.h>
34#include <linux/vmalloc.h>
35#include <linux/pagemap.h>
36#include <linux/netdevice.h>
37#include <linux/ipv6.h>
38#include <linux/slab.h>
39#include <net/checksum.h>
40#include <net/ip6_checksum.h>
41#include <linux/net_tstamp.h>
42#include <linux/mii.h>
43#include <linux/ethtool.h>
44#include <linux/if.h>
45#include <linux/if_vlan.h>
46#include <linux/pci.h>
47#include <linux/pci-aspm.h>
48#include <linux/delay.h>
49#include <linux/interrupt.h>
50#include <linux/ip.h>
51#include <linux/tcp.h>
52#include <linux/sctp.h>
53#include <linux/if_ether.h>
54#include <linux/aer.h>
55#include <linux/prefetch.h>
56#include <linux/pm_runtime.h>
57#ifdef CONFIG_IGB_DCA
58#include <linux/dca.h>
59#endif
60#include "igb.h"
61
62#define MAJ 3
63#define MIN 2
64#define BUILD 10
65#define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \
66__stringify(BUILD) "-k"
67char igb_driver_name[] = "igb";
68char igb_driver_version[] = DRV_VERSION;
69static const char igb_driver_string[] =
70				"Intel(R) Gigabit Ethernet Network Driver";
71static const char igb_copyright[] = "Copyright (c) 2007-2012 Intel Corporation.";
72
73static const struct e1000_info *igb_info_tbl[] = {
74	[board_82575] = &e1000_82575_info,
75};
76
77static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
78	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER), board_82575 },
79	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER), board_82575 },
80	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES), board_82575 },
81	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII), board_82575 },
82	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
83	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
84	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_QUAD_FIBER), board_82575 },
85	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
86	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
87	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
88	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SGMII), board_82575 },
89	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SERDES), board_82575 },
90	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_BACKPLANE), board_82575 },
91	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SFP), board_82575 },
92	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
93	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
94	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
95	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
96	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
97	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
98	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2), board_82575 },
99	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
100	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
101	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
102	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
103	/* required last entry */
104	{0, }
105};
106
107MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
108
109void igb_reset(struct igb_adapter *);
110static int igb_setup_all_tx_resources(struct igb_adapter *);
111static int igb_setup_all_rx_resources(struct igb_adapter *);
112static void igb_free_all_tx_resources(struct igb_adapter *);
113static void igb_free_all_rx_resources(struct igb_adapter *);
114static void igb_setup_mrqc(struct igb_adapter *);
115static int igb_probe(struct pci_dev *, const struct pci_device_id *);
116static void __devexit igb_remove(struct pci_dev *pdev);
117static void igb_init_hw_timer(struct igb_adapter *adapter);
118static int igb_sw_init(struct igb_adapter *);
119static int igb_open(struct net_device *);
120static int igb_close(struct net_device *);
121static void igb_configure_tx(struct igb_adapter *);
122static void igb_configure_rx(struct igb_adapter *);
123static void igb_clean_all_tx_rings(struct igb_adapter *);
124static void igb_clean_all_rx_rings(struct igb_adapter *);
125static void igb_clean_tx_ring(struct igb_ring *);
126static void igb_clean_rx_ring(struct igb_ring *);
127static void igb_set_rx_mode(struct net_device *);
128static void igb_update_phy_info(unsigned long);
129static void igb_watchdog(unsigned long);
130static void igb_watchdog_task(struct work_struct *);
131static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, struct net_device *);
132static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *dev,
133						 struct rtnl_link_stats64 *stats);
134static int igb_change_mtu(struct net_device *, int);
135static int igb_set_mac(struct net_device *, void *);
136static void igb_set_uta(struct igb_adapter *adapter);
137static irqreturn_t igb_intr(int irq, void *);
138static irqreturn_t igb_intr_msi(int irq, void *);
139static irqreturn_t igb_msix_other(int irq, void *);
140static irqreturn_t igb_msix_ring(int irq, void *);
141#ifdef CONFIG_IGB_DCA
142static void igb_update_dca(struct igb_q_vector *);
143static void igb_setup_dca(struct igb_adapter *);
144#endif /* CONFIG_IGB_DCA */
145static int igb_poll(struct napi_struct *, int);
146static bool igb_clean_tx_irq(struct igb_q_vector *);
147static bool igb_clean_rx_irq(struct igb_q_vector *, int);
148static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
149static void igb_tx_timeout(struct net_device *);
150static void igb_reset_task(struct work_struct *);
151static void igb_vlan_mode(struct net_device *netdev, netdev_features_t features);
152static int igb_vlan_rx_add_vid(struct net_device *, u16);
153static int igb_vlan_rx_kill_vid(struct net_device *, u16);
154static void igb_restore_vlan(struct igb_adapter *);
155static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
156static void igb_ping_all_vfs(struct igb_adapter *);
157static void igb_msg_task(struct igb_adapter *);
158static void igb_vmm_control(struct igb_adapter *);
159static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
160static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
161static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
162static int igb_ndo_set_vf_vlan(struct net_device *netdev,
163			       int vf, u16 vlan, u8 qos);
164static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
165static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
166				 struct ifla_vf_info *ivi);
167static void igb_check_vf_rate_limit(struct igb_adapter *);
168
169#ifdef CONFIG_PCI_IOV
170static int igb_vf_configure(struct igb_adapter *adapter, int vf);
171static int igb_find_enabled_vfs(struct igb_adapter *adapter);
172static int igb_check_vf_assignment(struct igb_adapter *adapter);
173#endif
174
175#ifdef CONFIG_PM
176#ifdef CONFIG_PM_SLEEP
177static int igb_suspend(struct device *);
178#endif
179static int igb_resume(struct device *);
180#ifdef CONFIG_PM_RUNTIME
181static int igb_runtime_suspend(struct device *dev);
182static int igb_runtime_resume(struct device *dev);
183static int igb_runtime_idle(struct device *dev);
184#endif
185static const struct dev_pm_ops igb_pm_ops = {
186	SET_SYSTEM_SLEEP_PM_OPS(igb_suspend, igb_resume)
187	SET_RUNTIME_PM_OPS(igb_runtime_suspend, igb_runtime_resume,
188			igb_runtime_idle)
189};
190#endif
191static void igb_shutdown(struct pci_dev *);
192#ifdef CONFIG_IGB_DCA
193static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
194static struct notifier_block dca_notifier = {
195	.notifier_call	= igb_notify_dca,
196	.next		= NULL,
197	.priority	= 0
198};
199#endif
200#ifdef CONFIG_NET_POLL_CONTROLLER
201/* for netdump / net console */
202static void igb_netpoll(struct net_device *);
203#endif
204#ifdef CONFIG_PCI_IOV
205static unsigned int max_vfs = 0;
206module_param(max_vfs, uint, 0);
207MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
208                 "per physical function");
209#endif /* CONFIG_PCI_IOV */
210
211static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
212		     pci_channel_state_t);
213static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
214static void igb_io_resume(struct pci_dev *);
215
216static struct pci_error_handlers igb_err_handler = {
217	.error_detected = igb_io_error_detected,
218	.slot_reset = igb_io_slot_reset,
219	.resume = igb_io_resume,
220};
221
222static void igb_init_dmac(struct igb_adapter *adapter, u32 pba);
223
224static struct pci_driver igb_driver = {
225	.name     = igb_driver_name,
226	.id_table = igb_pci_tbl,
227	.probe    = igb_probe,
228	.remove   = __devexit_p(igb_remove),
229#ifdef CONFIG_PM
230	.driver.pm = &igb_pm_ops,
231#endif
232	.shutdown = igb_shutdown,
233	.err_handler = &igb_err_handler
234};
235
236MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
237MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
238MODULE_LICENSE("GPL");
239MODULE_VERSION(DRV_VERSION);
240
241struct igb_reg_info {
242	u32 ofs;
243	char *name;
244};
245
246static const struct igb_reg_info igb_reg_info_tbl[] = {
247
248	/* General Registers */
249	{E1000_CTRL, "CTRL"},
250	{E1000_STATUS, "STATUS"},
251	{E1000_CTRL_EXT, "CTRL_EXT"},
252
253	/* Interrupt Registers */
254	{E1000_ICR, "ICR"},
255
256	/* RX Registers */
257	{E1000_RCTL, "RCTL"},
258	{E1000_RDLEN(0), "RDLEN"},
259	{E1000_RDH(0), "RDH"},
260	{E1000_RDT(0), "RDT"},
261	{E1000_RXDCTL(0), "RXDCTL"},
262	{E1000_RDBAL(0), "RDBAL"},
263	{E1000_RDBAH(0), "RDBAH"},
264
265	/* TX Registers */
266	{E1000_TCTL, "TCTL"},
267	{E1000_TDBAL(0), "TDBAL"},
268	{E1000_TDBAH(0), "TDBAH"},
269	{E1000_TDLEN(0), "TDLEN"},
270	{E1000_TDH(0), "TDH"},
271	{E1000_TDT(0), "TDT"},
272	{E1000_TXDCTL(0), "TXDCTL"},
273	{E1000_TDFH, "TDFH"},
274	{E1000_TDFT, "TDFT"},
275	{E1000_TDFHS, "TDFHS"},
276	{E1000_TDFPC, "TDFPC"},
277
278	/* List Terminator */
279	{}
280};
281
282/*
283 * igb_regdump - register printout routine
284 */
285static void igb_regdump(struct e1000_hw *hw, struct igb_reg_info *reginfo)
286{
287	int n = 0;
288	char rname[16];
289	u32 regs[8];
290
291	switch (reginfo->ofs) {
292	case E1000_RDLEN(0):
293		for (n = 0; n < 4; n++)
294			regs[n] = rd32(E1000_RDLEN(n));
295		break;
296	case E1000_RDH(0):
297		for (n = 0; n < 4; n++)
298			regs[n] = rd32(E1000_RDH(n));
299		break;
300	case E1000_RDT(0):
301		for (n = 0; n < 4; n++)
302			regs[n] = rd32(E1000_RDT(n));
303		break;
304	case E1000_RXDCTL(0):
305		for (n = 0; n < 4; n++)
306			regs[n] = rd32(E1000_RXDCTL(n));
307		break;
308	case E1000_RDBAL(0):
309		for (n = 0; n < 4; n++)
310			regs[n] = rd32(E1000_RDBAL(n));
311		break;
312	case E1000_RDBAH(0):
313		for (n = 0; n < 4; n++)
314			regs[n] = rd32(E1000_RDBAH(n));
315		break;
316	case E1000_TDBAL(0):
317		for (n = 0; n < 4; n++)
318			regs[n] = rd32(E1000_RDBAL(n));
319		break;
320	case E1000_TDBAH(0):
321		for (n = 0; n < 4; n++)
322			regs[n] = rd32(E1000_TDBAH(n));
323		break;
324	case E1000_TDLEN(0):
325		for (n = 0; n < 4; n++)
326			regs[n] = rd32(E1000_TDLEN(n));
327		break;
328	case E1000_TDH(0):
329		for (n = 0; n < 4; n++)
330			regs[n] = rd32(E1000_TDH(n));
331		break;
332	case E1000_TDT(0):
333		for (n = 0; n < 4; n++)
334			regs[n] = rd32(E1000_TDT(n));
335		break;
336	case E1000_TXDCTL(0):
337		for (n = 0; n < 4; n++)
338			regs[n] = rd32(E1000_TXDCTL(n));
339		break;
340	default:
341		pr_info("%-15s %08x\n", reginfo->name, rd32(reginfo->ofs));
342		return;
343	}
344
345	snprintf(rname, 16, "%s%s", reginfo->name, "[0-3]");
346	pr_info("%-15s %08x %08x %08x %08x\n", rname, regs[0], regs[1],
347		regs[2], regs[3]);
348}
349
350/*
351 * igb_dump - Print registers, tx-rings and rx-rings
352 */
353static void igb_dump(struct igb_adapter *adapter)
354{
355	struct net_device *netdev = adapter->netdev;
356	struct e1000_hw *hw = &adapter->hw;
357	struct igb_reg_info *reginfo;
358	struct igb_ring *tx_ring;
359	union e1000_adv_tx_desc *tx_desc;
360	struct my_u0 { u64 a; u64 b; } *u0;
361	struct igb_ring *rx_ring;
362	union e1000_adv_rx_desc *rx_desc;
363	u32 staterr;
364	u16 i, n;
365
366	if (!netif_msg_hw(adapter))
367		return;
368
369	/* Print netdevice Info */
370	if (netdev) {
371		dev_info(&adapter->pdev->dev, "Net device Info\n");
372		pr_info("Device Name     state            trans_start      "
373			"last_rx\n");
374		pr_info("%-15s %016lX %016lX %016lX\n", netdev->name,
375			netdev->state, netdev->trans_start, netdev->last_rx);
376	}
377
378	/* Print Registers */
379	dev_info(&adapter->pdev->dev, "Register Dump\n");
380	pr_info(" Register Name   Value\n");
381	for (reginfo = (struct igb_reg_info *)igb_reg_info_tbl;
382	     reginfo->name; reginfo++) {
383		igb_regdump(hw, reginfo);
384	}
385
386	/* Print TX Ring Summary */
387	if (!netdev || !netif_running(netdev))
388		goto exit;
389
390	dev_info(&adapter->pdev->dev, "TX Rings Summary\n");
391	pr_info("Queue [NTU] [NTC] [bi(ntc)->dma  ] leng ntw timestamp\n");
392	for (n = 0; n < adapter->num_tx_queues; n++) {
393		struct igb_tx_buffer *buffer_info;
394		tx_ring = adapter->tx_ring[n];
395		buffer_info = &tx_ring->tx_buffer_info[tx_ring->next_to_clean];
396		pr_info(" %5d %5X %5X %016llX %04X %p %016llX\n",
397			n, tx_ring->next_to_use, tx_ring->next_to_clean,
398			(u64)buffer_info->dma,
399			buffer_info->length,
400			buffer_info->next_to_watch,
401			(u64)buffer_info->time_stamp);
402	}
403
404	/* Print TX Rings */
405	if (!netif_msg_tx_done(adapter))
406		goto rx_ring_summary;
407
408	dev_info(&adapter->pdev->dev, "TX Rings Dump\n");
409
410	/* Transmit Descriptor Formats
411	 *
412	 * Advanced Transmit Descriptor
413	 *   +--------------------------------------------------------------+
414	 * 0 |         Buffer Address [63:0]                                |
415	 *   +--------------------------------------------------------------+
416	 * 8 | PAYLEN  | PORTS  |CC|IDX | STA | DCMD  |DTYP|MAC|RSV| DTALEN |
417	 *   +--------------------------------------------------------------+
418	 *   63      46 45    40 39 38 36 35 32 31   24             15       0
419	 */
420
421	for (n = 0; n < adapter->num_tx_queues; n++) {
422		tx_ring = adapter->tx_ring[n];
423		pr_info("------------------------------------\n");
424		pr_info("TX QUEUE INDEX = %d\n", tx_ring->queue_index);
425		pr_info("------------------------------------\n");
426		pr_info("T [desc]     [address 63:0  ] [PlPOCIStDDM Ln] "
427			"[bi->dma       ] leng  ntw timestamp        "
428			"bi->skb\n");
429
430		for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) {
431			const char *next_desc;
432			struct igb_tx_buffer *buffer_info;
433			tx_desc = IGB_TX_DESC(tx_ring, i);
434			buffer_info = &tx_ring->tx_buffer_info[i];
435			u0 = (struct my_u0 *)tx_desc;
436			if (i == tx_ring->next_to_use &&
437			    i == tx_ring->next_to_clean)
438				next_desc = " NTC/U";
439			else if (i == tx_ring->next_to_use)
440				next_desc = " NTU";
441			else if (i == tx_ring->next_to_clean)
442				next_desc = " NTC";
443			else
444				next_desc = "";
445
446			pr_info("T [0x%03X]    %016llX %016llX %016llX"
447				" %04X  %p %016llX %p%s\n", i,
448				le64_to_cpu(u0->a),
449				le64_to_cpu(u0->b),
450				(u64)buffer_info->dma,
451				buffer_info->length,
452				buffer_info->next_to_watch,
453				(u64)buffer_info->time_stamp,
454				buffer_info->skb, next_desc);
455
456			if (netif_msg_pktdata(adapter) && buffer_info->dma != 0)
457				print_hex_dump(KERN_INFO, "",
458					DUMP_PREFIX_ADDRESS,
459					16, 1, phys_to_virt(buffer_info->dma),
460					buffer_info->length, true);
461		}
462	}
463
464	/* Print RX Rings Summary */
465rx_ring_summary:
466	dev_info(&adapter->pdev->dev, "RX Rings Summary\n");
467	pr_info("Queue [NTU] [NTC]\n");
468	for (n = 0; n < adapter->num_rx_queues; n++) {
469		rx_ring = adapter->rx_ring[n];
470		pr_info(" %5d %5X %5X\n",
471			n, rx_ring->next_to_use, rx_ring->next_to_clean);
472	}
473
474	/* Print RX Rings */
475	if (!netif_msg_rx_status(adapter))
476		goto exit;
477
478	dev_info(&adapter->pdev->dev, "RX Rings Dump\n");
479
480	/* Advanced Receive Descriptor (Read) Format
481	 *    63                                           1        0
482	 *    +-----------------------------------------------------+
483	 *  0 |       Packet Buffer Address [63:1]           |A0/NSE|
484	 *    +----------------------------------------------+------+
485	 *  8 |       Header Buffer Address [63:1]           |  DD  |
486	 *    +-----------------------------------------------------+
487	 *
488	 *
489	 * Advanced Receive Descriptor (Write-Back) Format
490	 *
491	 *   63       48 47    32 31  30      21 20 17 16   4 3     0
492	 *   +------------------------------------------------------+
493	 * 0 | Packet     IP     |SPH| HDR_LEN   | RSV|Packet|  RSS |
494	 *   | Checksum   Ident  |   |           |    | Type | Type |
495	 *   +------------------------------------------------------+
496	 * 8 | VLAN Tag | Length | Extended Error | Extended Status |
497	 *   +------------------------------------------------------+
498	 *   63       48 47    32 31            20 19               0
499	 */
500
501	for (n = 0; n < adapter->num_rx_queues; n++) {
502		rx_ring = adapter->rx_ring[n];
503		pr_info("------------------------------------\n");
504		pr_info("RX QUEUE INDEX = %d\n", rx_ring->queue_index);
505		pr_info("------------------------------------\n");
506		pr_info("R  [desc]      [ PktBuf     A0] [  HeadBuf   DD] "
507			"[bi->dma       ] [bi->skb] <-- Adv Rx Read format\n");
508		pr_info("RWB[desc]      [PcsmIpSHl PtRs] [vl er S cks ln] -----"
509			"----------- [bi->skb] <-- Adv Rx Write-Back format\n");
510
511		for (i = 0; i < rx_ring->count; i++) {
512			const char *next_desc;
513			struct igb_rx_buffer *buffer_info;
514			buffer_info = &rx_ring->rx_buffer_info[i];
515			rx_desc = IGB_RX_DESC(rx_ring, i);
516			u0 = (struct my_u0 *)rx_desc;
517			staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
518
519			if (i == rx_ring->next_to_use)
520				next_desc = " NTU";
521			else if (i == rx_ring->next_to_clean)
522				next_desc = " NTC";
523			else
524				next_desc = "";
525
526			if (staterr & E1000_RXD_STAT_DD) {
527				/* Descriptor Done */
528				pr_info("%s[0x%03X]     %016llX %016llX -------"
529					"--------- %p%s\n", "RWB", i,
530					le64_to_cpu(u0->a),
531					le64_to_cpu(u0->b),
532					buffer_info->skb, next_desc);
533			} else {
534				pr_info("%s[0x%03X]     %016llX %016llX %016llX"
535					" %p%s\n", "R  ", i,
536					le64_to_cpu(u0->a),
537					le64_to_cpu(u0->b),
538					(u64)buffer_info->dma,
539					buffer_info->skb, next_desc);
540
541				if (netif_msg_pktdata(adapter)) {
542					print_hex_dump(KERN_INFO, "",
543						DUMP_PREFIX_ADDRESS,
544						16, 1,
545						phys_to_virt(buffer_info->dma),
546						IGB_RX_HDR_LEN, true);
547					print_hex_dump(KERN_INFO, "",
548					  DUMP_PREFIX_ADDRESS,
549					  16, 1,
550					  phys_to_virt(
551					    buffer_info->page_dma +
552					    buffer_info->page_offset),
553					  PAGE_SIZE/2, true);
554				}
555			}
556		}
557	}
558
559exit:
560	return;
561}
562
563
564/**
565 * igb_read_clock - read raw cycle counter (to be used by time counter)
566 */
567static cycle_t igb_read_clock(const struct cyclecounter *tc)
568{
569	struct igb_adapter *adapter =
570		container_of(tc, struct igb_adapter, cycles);
571	struct e1000_hw *hw = &adapter->hw;
572	u64 stamp = 0;
573	int shift = 0;
574
575	/*
576	 * The timestamp latches on lowest register read. For the 82580
577	 * the lowest register is SYSTIMR instead of SYSTIML.  However we never
578	 * adjusted TIMINCA so SYSTIMR will just read as all 0s so ignore it.
579	 */
580	if (hw->mac.type >= e1000_82580) {
581		stamp = rd32(E1000_SYSTIMR) >> 8;
582		shift = IGB_82580_TSYNC_SHIFT;
583	}
584
585	stamp |= (u64)rd32(E1000_SYSTIML) << shift;
586	stamp |= (u64)rd32(E1000_SYSTIMH) << (shift + 32);
587	return stamp;
588}
589
590/**
591 * igb_get_hw_dev - return device
592 * used by hardware layer to print debugging information
593 **/
594struct net_device *igb_get_hw_dev(struct e1000_hw *hw)
595{
596	struct igb_adapter *adapter = hw->back;
597	return adapter->netdev;
598}
599
600/**
601 * igb_init_module - Driver Registration Routine
602 *
603 * igb_init_module is the first routine called when the driver is
604 * loaded. All it does is register with the PCI subsystem.
605 **/
606static int __init igb_init_module(void)
607{
608	int ret;
609	pr_info("%s - version %s\n",
610	       igb_driver_string, igb_driver_version);
611
612	pr_info("%s\n", igb_copyright);
613
614#ifdef CONFIG_IGB_DCA
615	dca_register_notify(&dca_notifier);
616#endif
617	ret = pci_register_driver(&igb_driver);
618	return ret;
619}
620
621module_init(igb_init_module);
622
623/**
624 * igb_exit_module - Driver Exit Cleanup Routine
625 *
626 * igb_exit_module is called just before the driver is removed
627 * from memory.
628 **/
629static void __exit igb_exit_module(void)
630{
631#ifdef CONFIG_IGB_DCA
632	dca_unregister_notify(&dca_notifier);
633#endif
634	pci_unregister_driver(&igb_driver);
635}
636
637module_exit(igb_exit_module);
638
639#define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
640/**
641 * igb_cache_ring_register - Descriptor ring to register mapping
642 * @adapter: board private structure to initialize
643 *
644 * Once we know the feature-set enabled for the device, we'll cache
645 * the register offset the descriptor ring is assigned to.
646 **/
647static void igb_cache_ring_register(struct igb_adapter *adapter)
648{
649	int i = 0, j = 0;
650	u32 rbase_offset = adapter->vfs_allocated_count;
651
652	switch (adapter->hw.mac.type) {
653	case e1000_82576:
654		/* The queues are allocated for virtualization such that VF 0
655		 * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
656		 * In order to avoid collision we start at the first free queue
657		 * and continue consuming queues in the same sequence
658		 */
659		if (adapter->vfs_allocated_count) {
660			for (; i < adapter->rss_queues; i++)
661				adapter->rx_ring[i]->reg_idx = rbase_offset +
662				                               Q_IDX_82576(i);
663		}
664	case e1000_82575:
665	case e1000_82580:
666	case e1000_i350:
667	default:
668		for (; i < adapter->num_rx_queues; i++)
669			adapter->rx_ring[i]->reg_idx = rbase_offset + i;
670		for (; j < adapter->num_tx_queues; j++)
671			adapter->tx_ring[j]->reg_idx = rbase_offset + j;
672		break;
673	}
674}
675
676static void igb_free_queues(struct igb_adapter *adapter)
677{
678	int i;
679
680	for (i = 0; i < adapter->num_tx_queues; i++) {
681		kfree(adapter->tx_ring[i]);
682		adapter->tx_ring[i] = NULL;
683	}
684	for (i = 0; i < adapter->num_rx_queues; i++) {
685		kfree(adapter->rx_ring[i]);
686		adapter->rx_ring[i] = NULL;
687	}
688	adapter->num_rx_queues = 0;
689	adapter->num_tx_queues = 0;
690}
691
692/**
693 * igb_alloc_queues - Allocate memory for all rings
694 * @adapter: board private structure to initialize
695 *
696 * We allocate one ring per queue at run-time since we don't know the
697 * number of queues at compile-time.
698 **/
699static int igb_alloc_queues(struct igb_adapter *adapter)
700{
701	struct igb_ring *ring;
702	int i;
703	int orig_node = adapter->node;
704
705	for (i = 0; i < adapter->num_tx_queues; i++) {
706		if (orig_node == -1) {
707			int cur_node = next_online_node(adapter->node);
708			if (cur_node == MAX_NUMNODES)
709				cur_node = first_online_node;
710			adapter->node = cur_node;
711		}
712		ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL,
713				    adapter->node);
714		if (!ring)
715			ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
716		if (!ring)
717			goto err;
718		ring->count = adapter->tx_ring_count;
719		ring->queue_index = i;
720		ring->dev = &adapter->pdev->dev;
721		ring->netdev = adapter->netdev;
722		ring->numa_node = adapter->node;
723		/* For 82575, context index must be unique per ring. */
724		if (adapter->hw.mac.type == e1000_82575)
725			set_bit(IGB_RING_FLAG_TX_CTX_IDX, &ring->flags);
726		adapter->tx_ring[i] = ring;
727	}
728	/* Restore the adapter's original node */
729	adapter->node = orig_node;
730
731	for (i = 0; i < adapter->num_rx_queues; i++) {
732		if (orig_node == -1) {
733			int cur_node = next_online_node(adapter->node);
734			if (cur_node == MAX_NUMNODES)
735				cur_node = first_online_node;
736			adapter->node = cur_node;
737		}
738		ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL,
739				    adapter->node);
740		if (!ring)
741			ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
742		if (!ring)
743			goto err;
744		ring->count = adapter->rx_ring_count;
745		ring->queue_index = i;
746		ring->dev = &adapter->pdev->dev;
747		ring->netdev = adapter->netdev;
748		ring->numa_node = adapter->node;
749		/* set flag indicating ring supports SCTP checksum offload */
750		if (adapter->hw.mac.type >= e1000_82576)
751			set_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags);
752
753		/* On i350, loopback VLAN packets have the tag byte-swapped. */
754		if (adapter->hw.mac.type == e1000_i350)
755			set_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &ring->flags);
756
757		adapter->rx_ring[i] = ring;
758	}
759	/* Restore the adapter's original node */
760	adapter->node = orig_node;
761
762	igb_cache_ring_register(adapter);
763
764	return 0;
765
766err:
767	/* Restore the adapter's original node */
768	adapter->node = orig_node;
769	igb_free_queues(adapter);
770
771	return -ENOMEM;
772}
773
774/**
775 *  igb_write_ivar - configure ivar for given MSI-X vector
776 *  @hw: pointer to the HW structure
777 *  @msix_vector: vector number we are allocating to a given ring
778 *  @index: row index of IVAR register to write within IVAR table
779 *  @offset: column offset of in IVAR, should be multiple of 8
780 *
781 *  This function is intended to handle the writing of the IVAR register
782 *  for adapters 82576 and newer.  The IVAR table consists of 2 columns,
783 *  each containing an cause allocation for an Rx and Tx ring, and a
784 *  variable number of rows depending on the number of queues supported.
785 **/
786static void igb_write_ivar(struct e1000_hw *hw, int msix_vector,
787			   int index, int offset)
788{
789	u32 ivar = array_rd32(E1000_IVAR0, index);
790
791	/* clear any bits that are currently set */
792	ivar &= ~((u32)0xFF << offset);
793
794	/* write vector and valid bit */
795	ivar |= (msix_vector | E1000_IVAR_VALID) << offset;
796
797	array_wr32(E1000_IVAR0, index, ivar);
798}
799
800#define IGB_N0_QUEUE -1
801static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
802{
803	struct igb_adapter *adapter = q_vector->adapter;
804	struct e1000_hw *hw = &adapter->hw;
805	int rx_queue = IGB_N0_QUEUE;
806	int tx_queue = IGB_N0_QUEUE;
807	u32 msixbm = 0;
808
809	if (q_vector->rx.ring)
810		rx_queue = q_vector->rx.ring->reg_idx;
811	if (q_vector->tx.ring)
812		tx_queue = q_vector->tx.ring->reg_idx;
813
814	switch (hw->mac.type) {
815	case e1000_82575:
816		/* The 82575 assigns vectors using a bitmask, which matches the
817		   bitmask for the EICR/EIMS/EIMC registers.  To assign one
818		   or more queues to a vector, we write the appropriate bits
819		   into the MSIXBM register for that vector. */
820		if (rx_queue > IGB_N0_QUEUE)
821			msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
822		if (tx_queue > IGB_N0_QUEUE)
823			msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
824		if (!adapter->msix_entries && msix_vector == 0)
825			msixbm |= E1000_EIMS_OTHER;
826		array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
827		q_vector->eims_value = msixbm;
828		break;
829	case e1000_82576:
830		/*
831		 * 82576 uses a table that essentially consists of 2 columns
832		 * with 8 rows.  The ordering is column-major so we use the
833		 * lower 3 bits as the row index, and the 4th bit as the
834		 * column offset.
835		 */
836		if (rx_queue > IGB_N0_QUEUE)
837			igb_write_ivar(hw, msix_vector,
838				       rx_queue & 0x7,
839				       (rx_queue & 0x8) << 1);
840		if (tx_queue > IGB_N0_QUEUE)
841			igb_write_ivar(hw, msix_vector,
842				       tx_queue & 0x7,
843				       ((tx_queue & 0x8) << 1) + 8);
844		q_vector->eims_value = 1 << msix_vector;
845		break;
846	case e1000_82580:
847	case e1000_i350:
848		/*
849		 * On 82580 and newer adapters the scheme is similar to 82576
850		 * however instead of ordering column-major we have things
851		 * ordered row-major.  So we traverse the table by using
852		 * bit 0 as the column offset, and the remaining bits as the
853		 * row index.
854		 */
855		if (rx_queue > IGB_N0_QUEUE)
856			igb_write_ivar(hw, msix_vector,
857				       rx_queue >> 1,
858				       (rx_queue & 0x1) << 4);
859		if (tx_queue > IGB_N0_QUEUE)
860			igb_write_ivar(hw, msix_vector,
861				       tx_queue >> 1,
862				       ((tx_queue & 0x1) << 4) + 8);
863		q_vector->eims_value = 1 << msix_vector;
864		break;
865	default:
866		BUG();
867		break;
868	}
869
870	/* add q_vector eims value to global eims_enable_mask */
871	adapter->eims_enable_mask |= q_vector->eims_value;
872
873	/* configure q_vector to set itr on first interrupt */
874	q_vector->set_itr = 1;
875}
876
877/**
878 * igb_configure_msix - Configure MSI-X hardware
879 *
880 * igb_configure_msix sets up the hardware to properly
881 * generate MSI-X interrupts.
882 **/
883static void igb_configure_msix(struct igb_adapter *adapter)
884{
885	u32 tmp;
886	int i, vector = 0;
887	struct e1000_hw *hw = &adapter->hw;
888
889	adapter->eims_enable_mask = 0;
890
891	/* set vector for other causes, i.e. link changes */
892	switch (hw->mac.type) {
893	case e1000_82575:
894		tmp = rd32(E1000_CTRL_EXT);
895		/* enable MSI-X PBA support*/
896		tmp |= E1000_CTRL_EXT_PBA_CLR;
897
898		/* Auto-Mask interrupts upon ICR read. */
899		tmp |= E1000_CTRL_EXT_EIAME;
900		tmp |= E1000_CTRL_EXT_IRCA;
901
902		wr32(E1000_CTRL_EXT, tmp);
903
904		/* enable msix_other interrupt */
905		array_wr32(E1000_MSIXBM(0), vector++,
906		                      E1000_EIMS_OTHER);
907		adapter->eims_other = E1000_EIMS_OTHER;
908
909		break;
910
911	case e1000_82576:
912	case e1000_82580:
913	case e1000_i350:
914		/* Turn on MSI-X capability first, or our settings
915		 * won't stick.  And it will take days to debug. */
916		wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
917		                E1000_GPIE_PBA | E1000_GPIE_EIAME |
918		                E1000_GPIE_NSICR);
919
920		/* enable msix_other interrupt */
921		adapter->eims_other = 1 << vector;
922		tmp = (vector++ | E1000_IVAR_VALID) << 8;
923
924		wr32(E1000_IVAR_MISC, tmp);
925		break;
926	default:
927		/* do nothing, since nothing else supports MSI-X */
928		break;
929	} /* switch (hw->mac.type) */
930
931	adapter->eims_enable_mask |= adapter->eims_other;
932
933	for (i = 0; i < adapter->num_q_vectors; i++)
934		igb_assign_vector(adapter->q_vector[i], vector++);
935
936	wrfl();
937}
938
939/**
940 * igb_request_msix - Initialize MSI-X interrupts
941 *
942 * igb_request_msix allocates MSI-X vectors and requests interrupts from the
943 * kernel.
944 **/
945static int igb_request_msix(struct igb_adapter *adapter)
946{
947	struct net_device *netdev = adapter->netdev;
948	struct e1000_hw *hw = &adapter->hw;
949	int i, err = 0, vector = 0;
950
951	err = request_irq(adapter->msix_entries[vector].vector,
952	                  igb_msix_other, 0, netdev->name, adapter);
953	if (err)
954		goto out;
955	vector++;
956
957	for (i = 0; i < adapter->num_q_vectors; i++) {
958		struct igb_q_vector *q_vector = adapter->q_vector[i];
959
960		q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
961
962		if (q_vector->rx.ring && q_vector->tx.ring)
963			sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
964				q_vector->rx.ring->queue_index);
965		else if (q_vector->tx.ring)
966			sprintf(q_vector->name, "%s-tx-%u", netdev->name,
967				q_vector->tx.ring->queue_index);
968		else if (q_vector->rx.ring)
969			sprintf(q_vector->name, "%s-rx-%u", netdev->name,
970				q_vector->rx.ring->queue_index);
971		else
972			sprintf(q_vector->name, "%s-unused", netdev->name);
973
974		err = request_irq(adapter->msix_entries[vector].vector,
975		                  igb_msix_ring, 0, q_vector->name,
976		                  q_vector);
977		if (err)
978			goto out;
979		vector++;
980	}
981
982	igb_configure_msix(adapter);
983	return 0;
984out:
985	return err;
986}
987
988static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
989{
990	if (adapter->msix_entries) {
991		pci_disable_msix(adapter->pdev);
992		kfree(adapter->msix_entries);
993		adapter->msix_entries = NULL;
994	} else if (adapter->flags & IGB_FLAG_HAS_MSI) {
995		pci_disable_msi(adapter->pdev);
996	}
997}
998
999/**
1000 * igb_free_q_vectors - Free memory allocated for interrupt vectors
1001 * @adapter: board private structure to initialize
1002 *
1003 * This function frees the memory allocated to the q_vectors.  In addition if
1004 * NAPI is enabled it will delete any references to the NAPI struct prior
1005 * to freeing the q_vector.
1006 **/
1007static void igb_free_q_vectors(struct igb_adapter *adapter)
1008{
1009	int v_idx;
1010
1011	for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1012		struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1013		adapter->q_vector[v_idx] = NULL;
1014		if (!q_vector)
1015			continue;
1016		netif_napi_del(&q_vector->napi);
1017		kfree(q_vector);
1018	}
1019	adapter->num_q_vectors = 0;
1020}
1021
1022/**
1023 * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
1024 *
1025 * This function resets the device so that it has 0 rx queues, tx queues, and
1026 * MSI-X interrupts allocated.
1027 */
1028static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
1029{
1030	igb_free_queues(adapter);
1031	igb_free_q_vectors(adapter);
1032	igb_reset_interrupt_capability(adapter);
1033}
1034
1035/**
1036 * igb_set_interrupt_capability - set MSI or MSI-X if supported
1037 *
1038 * Attempt to configure interrupts using the best available
1039 * capabilities of the hardware and kernel.
1040 **/
1041static int igb_set_interrupt_capability(struct igb_adapter *adapter)
1042{
1043	int err;
1044	int numvecs, i;
1045
1046	/* Number of supported queues. */
1047	adapter->num_rx_queues = adapter->rss_queues;
1048	if (adapter->vfs_allocated_count)
1049		adapter->num_tx_queues = 1;
1050	else
1051		adapter->num_tx_queues = adapter->rss_queues;
1052
1053	/* start with one vector for every rx queue */
1054	numvecs = adapter->num_rx_queues;
1055
1056	/* if tx handler is separate add 1 for every tx queue */
1057	if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
1058		numvecs += adapter->num_tx_queues;
1059
1060	/* store the number of vectors reserved for queues */
1061	adapter->num_q_vectors = numvecs;
1062
1063	/* add 1 vector for link status interrupts */
1064	numvecs++;
1065	adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
1066					GFP_KERNEL);
1067	if (!adapter->msix_entries)
1068		goto msi_only;
1069
1070	for (i = 0; i < numvecs; i++)
1071		adapter->msix_entries[i].entry = i;
1072
1073	err = pci_enable_msix(adapter->pdev,
1074			      adapter->msix_entries,
1075			      numvecs);
1076	if (err == 0)
1077		goto out;
1078
1079	igb_reset_interrupt_capability(adapter);
1080
1081	/* If we can't do MSI-X, try MSI */
1082msi_only:
1083#ifdef CONFIG_PCI_IOV
1084	/* disable SR-IOV for non MSI-X configurations */
1085	if (adapter->vf_data) {
1086		struct e1000_hw *hw = &adapter->hw;
1087		/* disable iov and allow time for transactions to clear */
1088		pci_disable_sriov(adapter->pdev);
1089		msleep(500);
1090
1091		kfree(adapter->vf_data);
1092		adapter->vf_data = NULL;
1093		wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1094		wrfl();
1095		msleep(100);
1096		dev_info(&adapter->pdev->dev, "IOV Disabled\n");
1097	}
1098#endif
1099	adapter->vfs_allocated_count = 0;
1100	adapter->rss_queues = 1;
1101	adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1102	adapter->num_rx_queues = 1;
1103	adapter->num_tx_queues = 1;
1104	adapter->num_q_vectors = 1;
1105	if (!pci_enable_msi(adapter->pdev))
1106		adapter->flags |= IGB_FLAG_HAS_MSI;
1107out:
1108	/* Notify the stack of the (possibly) reduced queue counts. */
1109	netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues);
1110	return netif_set_real_num_rx_queues(adapter->netdev,
1111					    adapter->num_rx_queues);
1112}
1113
1114/**
1115 * igb_alloc_q_vectors - Allocate memory for interrupt vectors
1116 * @adapter: board private structure to initialize
1117 *
1118 * We allocate one q_vector per queue interrupt.  If allocation fails we
1119 * return -ENOMEM.
1120 **/
1121static int igb_alloc_q_vectors(struct igb_adapter *adapter)
1122{
1123	struct igb_q_vector *q_vector;
1124	struct e1000_hw *hw = &adapter->hw;
1125	int v_idx;
1126	int orig_node = adapter->node;
1127
1128	for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1129		if ((adapter->num_q_vectors == (adapter->num_rx_queues +
1130						adapter->num_tx_queues)) &&
1131		    (adapter->num_rx_queues == v_idx))
1132			adapter->node = orig_node;
1133		if (orig_node == -1) {
1134			int cur_node = next_online_node(adapter->node);
1135			if (cur_node == MAX_NUMNODES)
1136				cur_node = first_online_node;
1137			adapter->node = cur_node;
1138		}
1139		q_vector = kzalloc_node(sizeof(struct igb_q_vector), GFP_KERNEL,
1140					adapter->node);
1141		if (!q_vector)
1142			q_vector = kzalloc(sizeof(struct igb_q_vector),
1143					   GFP_KERNEL);
1144		if (!q_vector)
1145			goto err_out;
1146		q_vector->adapter = adapter;
1147		q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
1148		q_vector->itr_val = IGB_START_ITR;
1149		netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
1150		adapter->q_vector[v_idx] = q_vector;
1151	}
1152	/* Restore the adapter's original node */
1153	adapter->node = orig_node;
1154
1155	return 0;
1156
1157err_out:
1158	/* Restore the adapter's original node */
1159	adapter->node = orig_node;
1160	igb_free_q_vectors(adapter);
1161	return -ENOMEM;
1162}
1163
1164static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
1165                                      int ring_idx, int v_idx)
1166{
1167	struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1168
1169	q_vector->rx.ring = adapter->rx_ring[ring_idx];
1170	q_vector->rx.ring->q_vector = q_vector;
1171	q_vector->rx.count++;
1172	q_vector->itr_val = adapter->rx_itr_setting;
1173	if (q_vector->itr_val && q_vector->itr_val <= 3)
1174		q_vector->itr_val = IGB_START_ITR;
1175}
1176
1177static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
1178                                      int ring_idx, int v_idx)
1179{
1180	struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1181
1182	q_vector->tx.ring = adapter->tx_ring[ring_idx];
1183	q_vector->tx.ring->q_vector = q_vector;
1184	q_vector->tx.count++;
1185	q_vector->itr_val = adapter->tx_itr_setting;
1186	q_vector->tx.work_limit = adapter->tx_work_limit;
1187	if (q_vector->itr_val && q_vector->itr_val <= 3)
1188		q_vector->itr_val = IGB_START_ITR;
1189}
1190
1191/**
1192 * igb_map_ring_to_vector - maps allocated queues to vectors
1193 *
1194 * This function maps the recently allocated queues to vectors.
1195 **/
1196static int igb_map_ring_to_vector(struct igb_adapter *adapter)
1197{
1198	int i;
1199	int v_idx = 0;
1200
1201	if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
1202	    (adapter->num_q_vectors < adapter->num_tx_queues))
1203		return -ENOMEM;
1204
1205	if (adapter->num_q_vectors >=
1206	    (adapter->num_rx_queues + adapter->num_tx_queues)) {
1207		for (i = 0; i < adapter->num_rx_queues; i++)
1208			igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1209		for (i = 0; i < adapter->num_tx_queues; i++)
1210			igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1211	} else {
1212		for (i = 0; i < adapter->num_rx_queues; i++) {
1213			if (i < adapter->num_tx_queues)
1214				igb_map_tx_ring_to_vector(adapter, i, v_idx);
1215			igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1216		}
1217		for (; i < adapter->num_tx_queues; i++)
1218			igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1219	}
1220	return 0;
1221}
1222
1223/**
1224 * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
1225 *
1226 * This function initializes the interrupts and allocates all of the queues.
1227 **/
1228static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
1229{
1230	struct pci_dev *pdev = adapter->pdev;
1231	int err;
1232
1233	err = igb_set_interrupt_capability(adapter);
1234	if (err)
1235		return err;
1236
1237	err = igb_alloc_q_vectors(adapter);
1238	if (err) {
1239		dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
1240		goto err_alloc_q_vectors;
1241	}
1242
1243	err = igb_alloc_queues(adapter);
1244	if (err) {
1245		dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1246		goto err_alloc_queues;
1247	}
1248
1249	err = igb_map_ring_to_vector(adapter);
1250	if (err) {
1251		dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
1252		goto err_map_queues;
1253	}
1254
1255
1256	return 0;
1257err_map_queues:
1258	igb_free_queues(adapter);
1259err_alloc_queues:
1260	igb_free_q_vectors(adapter);
1261err_alloc_q_vectors:
1262	igb_reset_interrupt_capability(adapter);
1263	return err;
1264}
1265
1266/**
1267 * igb_request_irq - initialize interrupts
1268 *
1269 * Attempts to configure interrupts using the best available
1270 * capabilities of the hardware and kernel.
1271 **/
1272static int igb_request_irq(struct igb_adapter *adapter)
1273{
1274	struct net_device *netdev = adapter->netdev;
1275	struct pci_dev *pdev = adapter->pdev;
1276	int err = 0;
1277
1278	if (adapter->msix_entries) {
1279		err = igb_request_msix(adapter);
1280		if (!err)
1281			goto request_done;
1282		/* fall back to MSI */
1283		igb_clear_interrupt_scheme(adapter);
1284		if (!pci_enable_msi(pdev))
1285			adapter->flags |= IGB_FLAG_HAS_MSI;
1286		igb_free_all_tx_resources(adapter);
1287		igb_free_all_rx_resources(adapter);
1288		adapter->num_tx_queues = 1;
1289		adapter->num_rx_queues = 1;
1290		adapter->num_q_vectors = 1;
1291		err = igb_alloc_q_vectors(adapter);
1292		if (err) {
1293			dev_err(&pdev->dev,
1294			        "Unable to allocate memory for vectors\n");
1295			goto request_done;
1296		}
1297		err = igb_alloc_queues(adapter);
1298		if (err) {
1299			dev_err(&pdev->dev,
1300			        "Unable to allocate memory for queues\n");
1301			igb_free_q_vectors(adapter);
1302			goto request_done;
1303		}
1304		igb_setup_all_tx_resources(adapter);
1305		igb_setup_all_rx_resources(adapter);
1306	}
1307
1308	igb_assign_vector(adapter->q_vector[0], 0);
1309
1310	if (adapter->flags & IGB_FLAG_HAS_MSI) {
1311		err = request_irq(pdev->irq, igb_intr_msi, 0,
1312				  netdev->name, adapter);
1313		if (!err)
1314			goto request_done;
1315
1316		/* fall back to legacy interrupts */
1317		igb_reset_interrupt_capability(adapter);
1318		adapter->flags &= ~IGB_FLAG_HAS_MSI;
1319	}
1320
1321	err = request_irq(pdev->irq, igb_intr, IRQF_SHARED,
1322			  netdev->name, adapter);
1323
1324	if (err)
1325		dev_err(&pdev->dev, "Error %d getting interrupt\n",
1326			err);
1327
1328request_done:
1329	return err;
1330}
1331
1332static void igb_free_irq(struct igb_adapter *adapter)
1333{
1334	if (adapter->msix_entries) {
1335		int vector = 0, i;
1336
1337		free_irq(adapter->msix_entries[vector++].vector, adapter);
1338
1339		for (i = 0; i < adapter->num_q_vectors; i++)
1340			free_irq(adapter->msix_entries[vector++].vector,
1341				 adapter->q_vector[i]);
1342	} else {
1343		free_irq(adapter->pdev->irq, adapter);
1344	}
1345}
1346
1347/**
1348 * igb_irq_disable - Mask off interrupt generation on the NIC
1349 * @adapter: board private structure
1350 **/
1351static void igb_irq_disable(struct igb_adapter *adapter)
1352{
1353	struct e1000_hw *hw = &adapter->hw;
1354
1355	/*
1356	 * we need to be careful when disabling interrupts.  The VFs are also
1357	 * mapped into these registers and so clearing the bits can cause
1358	 * issues on the VF drivers so we only need to clear what we set
1359	 */
1360	if (adapter->msix_entries) {
1361		u32 regval = rd32(E1000_EIAM);
1362		wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
1363		wr32(E1000_EIMC, adapter->eims_enable_mask);
1364		regval = rd32(E1000_EIAC);
1365		wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
1366	}
1367
1368	wr32(E1000_IAM, 0);
1369	wr32(E1000_IMC, ~0);
1370	wrfl();
1371	if (adapter->msix_entries) {
1372		int i;
1373		for (i = 0; i < adapter->num_q_vectors; i++)
1374			synchronize_irq(adapter->msix_entries[i].vector);
1375	} else {
1376		synchronize_irq(adapter->pdev->irq);
1377	}
1378}
1379
1380/**
1381 * igb_irq_enable - Enable default interrupt generation settings
1382 * @adapter: board private structure
1383 **/
1384static void igb_irq_enable(struct igb_adapter *adapter)
1385{
1386	struct e1000_hw *hw = &adapter->hw;
1387
1388	if (adapter->msix_entries) {
1389		u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC | E1000_IMS_DRSTA;
1390		u32 regval = rd32(E1000_EIAC);
1391		wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
1392		regval = rd32(E1000_EIAM);
1393		wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
1394		wr32(E1000_EIMS, adapter->eims_enable_mask);
1395		if (adapter->vfs_allocated_count) {
1396			wr32(E1000_MBVFIMR, 0xFF);
1397			ims |= E1000_IMS_VMMB;
1398		}
1399		wr32(E1000_IMS, ims);
1400	} else {
1401		wr32(E1000_IMS, IMS_ENABLE_MASK |
1402				E1000_IMS_DRSTA);
1403		wr32(E1000_IAM, IMS_ENABLE_MASK |
1404				E1000_IMS_DRSTA);
1405	}
1406}
1407
1408static void igb_update_mng_vlan(struct igb_adapter *adapter)
1409{
1410	struct e1000_hw *hw = &adapter->hw;
1411	u16 vid = adapter->hw.mng_cookie.vlan_id;
1412	u16 old_vid = adapter->mng_vlan_id;
1413
1414	if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1415		/* add VID to filter table */
1416		igb_vfta_set(hw, vid, true);
1417		adapter->mng_vlan_id = vid;
1418	} else {
1419		adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1420	}
1421
1422	if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1423	    (vid != old_vid) &&
1424	    !test_bit(old_vid, adapter->active_vlans)) {
1425		/* remove VID from filter table */
1426		igb_vfta_set(hw, old_vid, false);
1427	}
1428}
1429
1430/**
1431 * igb_release_hw_control - release control of the h/w to f/w
1432 * @adapter: address of board private structure
1433 *
1434 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1435 * For ASF and Pass Through versions of f/w this means that the
1436 * driver is no longer loaded.
1437 *
1438 **/
1439static void igb_release_hw_control(struct igb_adapter *adapter)
1440{
1441	struct e1000_hw *hw = &adapter->hw;
1442	u32 ctrl_ext;
1443
1444	/* Let firmware take over control of h/w */
1445	ctrl_ext = rd32(E1000_CTRL_EXT);
1446	wr32(E1000_CTRL_EXT,
1447			ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1448}
1449
1450/**
1451 * igb_get_hw_control - get control of the h/w from f/w
1452 * @adapter: address of board private structure
1453 *
1454 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1455 * For ASF and Pass Through versions of f/w this means that
1456 * the driver is loaded.
1457 *
1458 **/
1459static void igb_get_hw_control(struct igb_adapter *adapter)
1460{
1461	struct e1000_hw *hw = &adapter->hw;
1462	u32 ctrl_ext;
1463
1464	/* Let firmware know the driver has taken over */
1465	ctrl_ext = rd32(E1000_CTRL_EXT);
1466	wr32(E1000_CTRL_EXT,
1467			ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1468}
1469
1470/**
1471 * igb_configure - configure the hardware for RX and TX
1472 * @adapter: private board structure
1473 **/
1474static void igb_configure(struct igb_adapter *adapter)
1475{
1476	struct net_device *netdev = adapter->netdev;
1477	int i;
1478
1479	igb_get_hw_control(adapter);
1480	igb_set_rx_mode(netdev);
1481
1482	igb_restore_vlan(adapter);
1483
1484	igb_setup_tctl(adapter);
1485	igb_setup_mrqc(adapter);
1486	igb_setup_rctl(adapter);
1487
1488	igb_configure_tx(adapter);
1489	igb_configure_rx(adapter);
1490
1491	igb_rx_fifo_flush_82575(&adapter->hw);
1492
1493	/* call igb_desc_unused which always leaves
1494	 * at least 1 descriptor unused to make sure
1495	 * next_to_use != next_to_clean */
1496	for (i = 0; i < adapter->num_rx_queues; i++) {
1497		struct igb_ring *ring = adapter->rx_ring[i];
1498		igb_alloc_rx_buffers(ring, igb_desc_unused(ring));
1499	}
1500}
1501
1502/**
1503 * igb_power_up_link - Power up the phy/serdes link
1504 * @adapter: address of board private structure
1505 **/
1506void igb_power_up_link(struct igb_adapter *adapter)
1507{
1508	if (adapter->hw.phy.media_type == e1000_media_type_copper)
1509		igb_power_up_phy_copper(&adapter->hw);
1510	else
1511		igb_power_up_serdes_link_82575(&adapter->hw);
1512	igb_reset_phy(&adapter->hw);
1513}
1514
1515/**
1516 * igb_power_down_link - Power down the phy/serdes link
1517 * @adapter: address of board private structure
1518 */
1519static void igb_power_down_link(struct igb_adapter *adapter)
1520{
1521	if (adapter->hw.phy.media_type == e1000_media_type_copper)
1522		igb_power_down_phy_copper_82575(&adapter->hw);
1523	else
1524		igb_shutdown_serdes_link_82575(&adapter->hw);
1525}
1526
1527/**
1528 * igb_up - Open the interface and prepare it to handle traffic
1529 * @adapter: board private structure
1530 **/
1531int igb_up(struct igb_adapter *adapter)
1532{
1533	struct e1000_hw *hw = &adapter->hw;
1534	int i;
1535
1536	/* hardware has been reset, we need to reload some things */
1537	igb_configure(adapter);
1538
1539	clear_bit(__IGB_DOWN, &adapter->state);
1540
1541	for (i = 0; i < adapter->num_q_vectors; i++)
1542		napi_enable(&(adapter->q_vector[i]->napi));
1543
1544	if (adapter->msix_entries)
1545		igb_configure_msix(adapter);
1546	else
1547		igb_assign_vector(adapter->q_vector[0], 0);
1548
1549	/* Clear any pending interrupts. */
1550	rd32(E1000_ICR);
1551	igb_irq_enable(adapter);
1552
1553	/* notify VFs that reset has been completed */
1554	if (adapter->vfs_allocated_count) {
1555		u32 reg_data = rd32(E1000_CTRL_EXT);
1556		reg_data |= E1000_CTRL_EXT_PFRSTD;
1557		wr32(E1000_CTRL_EXT, reg_data);
1558	}
1559
1560	netif_tx_start_all_queues(adapter->netdev);
1561
1562	/* start the watchdog. */
1563	hw->mac.get_link_status = 1;
1564	schedule_work(&adapter->watchdog_task);
1565
1566	return 0;
1567}
1568
1569void igb_down(struct igb_adapter *adapter)
1570{
1571	struct net_device *netdev = adapter->netdev;
1572	struct e1000_hw *hw = &adapter->hw;
1573	u32 tctl, rctl;
1574	int i;
1575
1576	/* signal that we're down so the interrupt handler does not
1577	 * reschedule our watchdog timer */
1578	set_bit(__IGB_DOWN, &adapter->state);
1579
1580	/* disable receives in the hardware */
1581	rctl = rd32(E1000_RCTL);
1582	wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1583	/* flush and sleep below */
1584
1585	netif_tx_stop_all_queues(netdev);
1586
1587	/* disable transmits in the hardware */
1588	tctl = rd32(E1000_TCTL);
1589	tctl &= ~E1000_TCTL_EN;
1590	wr32(E1000_TCTL, tctl);
1591	/* flush both disables and wait for them to finish */
1592	wrfl();
1593	msleep(10);
1594
1595	for (i = 0; i < adapter->num_q_vectors; i++)
1596		napi_disable(&(adapter->q_vector[i]->napi));
1597
1598	igb_irq_disable(adapter);
1599
1600	del_timer_sync(&adapter->watchdog_timer);
1601	del_timer_sync(&adapter->phy_info_timer);
1602
1603	netif_carrier_off(netdev);
1604
1605	/* record the stats before reset*/
1606	spin_lock(&adapter->stats64_lock);
1607	igb_update_stats(adapter, &adapter->stats64);
1608	spin_unlock(&adapter->stats64_lock);
1609
1610	adapter->link_speed = 0;
1611	adapter->link_duplex = 0;
1612
1613	if (!pci_channel_offline(adapter->pdev))
1614		igb_reset(adapter);
1615	igb_clean_all_tx_rings(adapter);
1616	igb_clean_all_rx_rings(adapter);
1617#ifdef CONFIG_IGB_DCA
1618
1619	/* since we reset the hardware DCA settings were cleared */
1620	igb_setup_dca(adapter);
1621#endif
1622}
1623
1624void igb_reinit_locked(struct igb_adapter *adapter)
1625{
1626	WARN_ON(in_interrupt());
1627	while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1628		msleep(1);
1629	igb_down(adapter);
1630	igb_up(adapter);
1631	clear_bit(__IGB_RESETTING, &adapter->state);
1632}
1633
1634void igb_reset(struct igb_adapter *adapter)
1635{
1636	struct pci_dev *pdev = adapter->pdev;
1637	struct e1000_hw *hw = &adapter->hw;
1638	struct e1000_mac_info *mac = &hw->mac;
1639	struct e1000_fc_info *fc = &hw->fc;
1640	u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1641	u16 hwm;
1642
1643	/* Repartition Pba for greater than 9k mtu
1644	 * To take effect CTRL.RST is required.
1645	 */
1646	switch (mac->type) {
1647	case e1000_i350:
1648	case e1000_82580:
1649		pba = rd32(E1000_RXPBS);
1650		pba = igb_rxpbs_adjust_82580(pba);
1651		break;
1652	case e1000_82576:
1653		pba = rd32(E1000_RXPBS);
1654		pba &= E1000_RXPBS_SIZE_MASK_82576;
1655		break;
1656	case e1000_82575:
1657	default:
1658		pba = E1000_PBA_34K;
1659		break;
1660	}
1661
1662	if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1663	    (mac->type < e1000_82576)) {
1664		/* adjust PBA for jumbo frames */
1665		wr32(E1000_PBA, pba);
1666
1667		/* To maintain wire speed transmits, the Tx FIFO should be
1668		 * large enough to accommodate two full transmit packets,
1669		 * rounded up to the next 1KB and expressed in KB.  Likewise,
1670		 * the Rx FIFO should be large enough to accommodate at least
1671		 * one full receive packet and is similarly rounded up and
1672		 * expressed in KB. */
1673		pba = rd32(E1000_PBA);
1674		/* upper 16 bits has Tx packet buffer allocation size in KB */
1675		tx_space = pba >> 16;
1676		/* lower 16 bits has Rx packet buffer allocation size in KB */
1677		pba &= 0xffff;
1678		/* the tx fifo also stores 16 bytes of information about the tx
1679		 * but don't include ethernet FCS because hardware appends it */
1680		min_tx_space = (adapter->max_frame_size +
1681				sizeof(union e1000_adv_tx_desc) -
1682				ETH_FCS_LEN) * 2;
1683		min_tx_space = ALIGN(min_tx_space, 1024);
1684		min_tx_space >>= 10;
1685		/* software strips receive CRC, so leave room for it */
1686		min_rx_space = adapter->max_frame_size;
1687		min_rx_space = ALIGN(min_rx_space, 1024);
1688		min_rx_space >>= 10;
1689
1690		/* If current Tx allocation is less than the min Tx FIFO size,
1691		 * and the min Tx FIFO size is less than the current Rx FIFO
1692		 * allocation, take space away from current Rx allocation */
1693		if (tx_space < min_tx_space &&
1694		    ((min_tx_space - tx_space) < pba)) {
1695			pba = pba - (min_tx_space - tx_space);
1696
1697			/* if short on rx space, rx wins and must trump tx
1698			 * adjustment */
1699			if (pba < min_rx_space)
1700				pba = min_rx_space;
1701		}
1702		wr32(E1000_PBA, pba);
1703	}
1704
1705	/* flow control settings */
1706	/* The high water mark must be low enough to fit one full frame
1707	 * (or the size used for early receive) above it in the Rx FIFO.
1708	 * Set it to the lower of:
1709	 * - 90% of the Rx FIFO size, or
1710	 * - the full Rx FIFO size minus one full frame */
1711	hwm = min(((pba << 10) * 9 / 10),
1712			((pba << 10) - 2 * adapter->max_frame_size));
1713
1714	fc->high_water = hwm & 0xFFF0;	/* 16-byte granularity */
1715	fc->low_water = fc->high_water - 16;
1716	fc->pause_time = 0xFFFF;
1717	fc->send_xon = 1;
1718	fc->current_mode = fc->requested_mode;
1719
1720	/* disable receive for all VFs and wait one second */
1721	if (adapter->vfs_allocated_count) {
1722		int i;
1723		for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1724			adapter->vf_data[i].flags &= IGB_VF_FLAG_PF_SET_MAC;
1725
1726		/* ping all the active vfs to let them know we are going down */
1727		igb_ping_all_vfs(adapter);
1728
1729		/* disable transmits and receives */
1730		wr32(E1000_VFRE, 0);
1731		wr32(E1000_VFTE, 0);
1732	}
1733
1734	/* Allow time for pending master requests to run */
1735	hw->mac.ops.reset_hw(hw);
1736	wr32(E1000_WUC, 0);
1737
1738	if (hw->mac.ops.init_hw(hw))
1739		dev_err(&pdev->dev, "Hardware Error\n");
1740
1741	igb_init_dmac(adapter, pba);
1742	if (!netif_running(adapter->netdev))
1743		igb_power_down_link(adapter);
1744
1745	igb_update_mng_vlan(adapter);
1746
1747	/* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1748	wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1749
1750	igb_get_phy_info(hw);
1751}
1752
1753static netdev_features_t igb_fix_features(struct net_device *netdev,
1754	netdev_features_t features)
1755{
1756	/*
1757	 * Since there is no support for separate rx/tx vlan accel
1758	 * enable/disable make sure tx flag is always in same state as rx.
1759	 */
1760	if (features & NETIF_F_HW_VLAN_RX)
1761		features |= NETIF_F_HW_VLAN_TX;
1762	else
1763		features &= ~NETIF_F_HW_VLAN_TX;
1764
1765	return features;
1766}
1767
1768static int igb_set_features(struct net_device *netdev,
1769	netdev_features_t features)
1770{
1771	netdev_features_t changed = netdev->features ^ features;
1772	struct igb_adapter *adapter = netdev_priv(netdev);
1773
1774	if (changed & NETIF_F_HW_VLAN_RX)
1775		igb_vlan_mode(netdev, features);
1776
1777	if (!(changed & NETIF_F_RXALL))
1778		return 0;
1779
1780	netdev->features = features;
1781
1782	if (netif_running(netdev))
1783		igb_reinit_locked(adapter);
1784	else
1785		igb_reset(adapter);
1786
1787	return 0;
1788}
1789
1790static const struct net_device_ops igb_netdev_ops = {
1791	.ndo_open		= igb_open,
1792	.ndo_stop		= igb_close,
1793	.ndo_start_xmit		= igb_xmit_frame,
1794	.ndo_get_stats64	= igb_get_stats64,
1795	.ndo_set_rx_mode	= igb_set_rx_mode,
1796	.ndo_set_mac_address	= igb_set_mac,
1797	.ndo_change_mtu		= igb_change_mtu,
1798	.ndo_do_ioctl		= igb_ioctl,
1799	.ndo_tx_timeout		= igb_tx_timeout,
1800	.ndo_validate_addr	= eth_validate_addr,
1801	.ndo_vlan_rx_add_vid	= igb_vlan_rx_add_vid,
1802	.ndo_vlan_rx_kill_vid	= igb_vlan_rx_kill_vid,
1803	.ndo_set_vf_mac		= igb_ndo_set_vf_mac,
1804	.ndo_set_vf_vlan	= igb_ndo_set_vf_vlan,
1805	.ndo_set_vf_tx_rate	= igb_ndo_set_vf_bw,
1806	.ndo_get_vf_config	= igb_ndo_get_vf_config,
1807#ifdef CONFIG_NET_POLL_CONTROLLER
1808	.ndo_poll_controller	= igb_netpoll,
1809#endif
1810	.ndo_fix_features	= igb_fix_features,
1811	.ndo_set_features	= igb_set_features,
1812};
1813
1814/**
1815 * igb_probe - Device Initialization Routine
1816 * @pdev: PCI device information struct
1817 * @ent: entry in igb_pci_tbl
1818 *
1819 * Returns 0 on success, negative on failure
1820 *
1821 * igb_probe initializes an adapter identified by a pci_dev structure.
1822 * The OS initialization, configuring of the adapter private structure,
1823 * and a hardware reset occur.
1824 **/
1825static int __devinit igb_probe(struct pci_dev *pdev,
1826			       const struct pci_device_id *ent)
1827{
1828	struct net_device *netdev;
1829	struct igb_adapter *adapter;
1830	struct e1000_hw *hw;
1831	u16 eeprom_data = 0;
1832	s32 ret_val;
1833	static int global_quad_port_a; /* global quad port a indication */
1834	const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1835	unsigned long mmio_start, mmio_len;
1836	int err, pci_using_dac;
1837	u16 eeprom_apme_mask = IGB_EEPROM_APME;
1838	u8 part_str[E1000_PBANUM_LENGTH];
1839
1840	/* Catch broken hardware that put the wrong VF device ID in
1841	 * the PCIe SR-IOV capability.
1842	 */
1843	if (pdev->is_virtfn) {
1844		WARN(1, KERN_ERR "%s (%hx:%hx) should not be a VF!\n",
1845		     pci_name(pdev), pdev->vendor, pdev->device);
1846		return -EINVAL;
1847	}
1848
1849	err = pci_enable_device_mem(pdev);
1850	if (err)
1851		return err;
1852
1853	pci_using_dac = 0;
1854	err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
1855	if (!err) {
1856		err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
1857		if (!err)
1858			pci_using_dac = 1;
1859	} else {
1860		err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
1861		if (err) {
1862			err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
1863			if (err) {
1864				dev_err(&pdev->dev, "No usable DMA "
1865					"configuration, aborting\n");
1866				goto err_dma;
1867			}
1868		}
1869	}
1870
1871	err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1872	                                   IORESOURCE_MEM),
1873	                                   igb_driver_name);
1874	if (err)
1875		goto err_pci_reg;
1876
1877	pci_enable_pcie_error_reporting(pdev);
1878
1879	pci_set_master(pdev);
1880	pci_save_state(pdev);
1881
1882	err = -ENOMEM;
1883	netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1884				   IGB_MAX_TX_QUEUES);
1885	if (!netdev)
1886		goto err_alloc_etherdev;
1887
1888	SET_NETDEV_DEV(netdev, &pdev->dev);
1889
1890	pci_set_drvdata(pdev, netdev);
1891	adapter = netdev_priv(netdev);
1892	adapter->netdev = netdev;
1893	adapter->pdev = pdev;
1894	hw = &adapter->hw;
1895	hw->back = adapter;
1896	adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE;
1897
1898	mmio_start = pci_resource_start(pdev, 0);
1899	mmio_len = pci_resource_len(pdev, 0);
1900
1901	err = -EIO;
1902	hw->hw_addr = ioremap(mmio_start, mmio_len);
1903	if (!hw->hw_addr)
1904		goto err_ioremap;
1905
1906	netdev->netdev_ops = &igb_netdev_ops;
1907	igb_set_ethtool_ops(netdev);
1908	netdev->watchdog_timeo = 5 * HZ;
1909
1910	strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1911
1912	netdev->mem_start = mmio_start;
1913	netdev->mem_end = mmio_start + mmio_len;
1914
1915	/* PCI config space info */
1916	hw->vendor_id = pdev->vendor;
1917	hw->device_id = pdev->device;
1918	hw->revision_id = pdev->revision;
1919	hw->subsystem_vendor_id = pdev->subsystem_vendor;
1920	hw->subsystem_device_id = pdev->subsystem_device;
1921
1922	/* Copy the default MAC, PHY and NVM function pointers */
1923	memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1924	memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1925	memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1926	/* Initialize skew-specific constants */
1927	err = ei->get_invariants(hw);
1928	if (err)
1929		goto err_sw_init;
1930
1931	/* setup the private structure */
1932	err = igb_sw_init(adapter);
1933	if (err)
1934		goto err_sw_init;
1935
1936	igb_get_bus_info_pcie(hw);
1937
1938	hw->phy.autoneg_wait_to_complete = false;
1939
1940	/* Copper options */
1941	if (hw->phy.media_type == e1000_media_type_copper) {
1942		hw->phy.mdix = AUTO_ALL_MODES;
1943		hw->phy.disable_polarity_correction = false;
1944		hw->phy.ms_type = e1000_ms_hw_default;
1945	}
1946
1947	if (igb_check_reset_block(hw))
1948		dev_info(&pdev->dev,
1949			"PHY reset is blocked due to SOL/IDER session.\n");
1950
1951	/*
1952	 * features is initialized to 0 in allocation, it might have bits
1953	 * set by igb_sw_init so we should use an or instead of an
1954	 * assignment.
1955	 */
1956	netdev->features |= NETIF_F_SG |
1957			    NETIF_F_IP_CSUM |
1958			    NETIF_F_IPV6_CSUM |
1959			    NETIF_F_TSO |
1960			    NETIF_F_TSO6 |
1961			    NETIF_F_RXHASH |
1962			    NETIF_F_RXCSUM |
1963			    NETIF_F_HW_VLAN_RX |
1964			    NETIF_F_HW_VLAN_TX;
1965
1966	/* copy netdev features into list of user selectable features */
1967	netdev->hw_features |= netdev->features;
1968	netdev->hw_features |= NETIF_F_RXALL;
1969
1970	/* set this bit last since it cannot be part of hw_features */
1971	netdev->features |= NETIF_F_HW_VLAN_FILTER;
1972
1973	netdev->vlan_features |= NETIF_F_TSO |
1974				 NETIF_F_TSO6 |
1975				 NETIF_F_IP_CSUM |
1976				 NETIF_F_IPV6_CSUM |
1977				 NETIF_F_SG;
1978
1979	netdev->priv_flags |= IFF_SUPP_NOFCS;
1980
1981	if (pci_using_dac) {
1982		netdev->features |= NETIF_F_HIGHDMA;
1983		netdev->vlan_features |= NETIF_F_HIGHDMA;
1984	}
1985
1986	if (hw->mac.type >= e1000_82576) {
1987		netdev->hw_features |= NETIF_F_SCTP_CSUM;
1988		netdev->features |= NETIF_F_SCTP_CSUM;
1989	}
1990
1991	netdev->priv_flags |= IFF_UNICAST_FLT;
1992
1993	adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
1994
1995	/* before reading the NVM, reset the controller to put the device in a
1996	 * known good starting state */
1997	hw->mac.ops.reset_hw(hw);
1998
1999	/* make sure the NVM is good */
2000	if (hw->nvm.ops.validate(hw) < 0) {
2001		dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
2002		err = -EIO;
2003		goto err_eeprom;
2004	}
2005
2006	/* copy the MAC address out of the NVM */
2007	if (hw->mac.ops.read_mac_addr(hw))
2008		dev_err(&pdev->dev, "NVM Read Error\n");
2009
2010	memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
2011	memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
2012
2013	if (!is_valid_ether_addr(netdev->perm_addr)) {
2014		dev_err(&pdev->dev, "Invalid MAC Address\n");
2015		err = -EIO;
2016		goto err_eeprom;
2017	}
2018
2019	setup_timer(&adapter->watchdog_timer, igb_watchdog,
2020	            (unsigned long) adapter);
2021	setup_timer(&adapter->phy_info_timer, igb_update_phy_info,
2022	            (unsigned long) adapter);
2023
2024	INIT_WORK(&adapter->reset_task, igb_reset_task);
2025	INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
2026
2027	/* Initialize link properties that are user-changeable */
2028	adapter->fc_autoneg = true;
2029	hw->mac.autoneg = true;
2030	hw->phy.autoneg_advertised = 0x2f;
2031
2032	hw->fc.requested_mode = e1000_fc_default;
2033	hw->fc.current_mode = e1000_fc_default;
2034
2035	igb_validate_mdi_setting(hw);
2036
2037	/* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
2038	 * enable the ACPI Magic Packet filter
2039	 */
2040
2041	if (hw->bus.func == 0)
2042		hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
2043	else if (hw->mac.type >= e1000_82580)
2044		hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
2045		                 NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
2046		                 &eeprom_data);
2047	else if (hw->bus.func == 1)
2048		hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
2049
2050	if (eeprom_data & eeprom_apme_mask)
2051		adapter->eeprom_wol |= E1000_WUFC_MAG;
2052
2053	/* now that we have the eeprom settings, apply the special cases where
2054	 * the eeprom may be wrong or the board simply won't support wake on
2055	 * lan on a particular port */
2056	switch (pdev->device) {
2057	case E1000_DEV_ID_82575GB_QUAD_COPPER:
2058		adapter->eeprom_wol = 0;
2059		break;
2060	case E1000_DEV_ID_82575EB_FIBER_SERDES:
2061	case E1000_DEV_ID_82576_FIBER:
2062	case E1000_DEV_ID_82576_SERDES:
2063		/* Wake events only supported on port A for dual fiber
2064		 * regardless of eeprom setting */
2065		if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
2066			adapter->eeprom_wol = 0;
2067		break;
2068	case E1000_DEV_ID_82576_QUAD_COPPER:
2069	case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
2070		/* if quad port adapter, disable WoL on all but port A */
2071		if (global_quad_port_a != 0)
2072			adapter->eeprom_wol = 0;
2073		else
2074			adapter->flags |= IGB_FLAG_QUAD_PORT_A;
2075		/* Reset for multiple quad port adapters */
2076		if (++global_quad_port_a == 4)
2077			global_quad_port_a = 0;
2078		break;
2079	}
2080
2081	/* initialize the wol settings based on the eeprom settings */
2082	adapter->wol = adapter->eeprom_wol;
2083	device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
2084
2085	/* reset the hardware with the new settings */
2086	igb_reset(adapter);
2087
2088	/* let the f/w know that the h/w is now under the control of the
2089	 * driver. */
2090	igb_get_hw_control(adapter);
2091
2092	strcpy(netdev->name, "eth%d");
2093	err = register_netdev(netdev);
2094	if (err)
2095		goto err_register;
2096
2097	/* carrier off reporting is important to ethtool even BEFORE open */
2098	netif_carrier_off(netdev);
2099
2100#ifdef CONFIG_IGB_DCA
2101	if (dca_add_requester(&pdev->dev) == 0) {
2102		adapter->flags |= IGB_FLAG_DCA_ENABLED;
2103		dev_info(&pdev->dev, "DCA enabled\n");
2104		igb_setup_dca(adapter);
2105	}
2106
2107#endif
2108	/* do hw tstamp init after resetting */
2109	igb_init_hw_timer(adapter);
2110
2111	dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
2112	/* print bus type/speed/width info */
2113	dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
2114		 netdev->name,
2115		 ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
2116		  (hw->bus.speed == e1000_bus_speed_5000) ? "5.0Gb/s" :
2117		                                            "unknown"),
2118		 ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
2119		  (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
2120		  (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
2121		   "unknown"),
2122		 netdev->dev_addr);
2123
2124	ret_val = igb_read_part_string(hw, part_str, E1000_PBANUM_LENGTH);
2125	if (ret_val)
2126		strcpy(part_str, "Unknown");
2127	dev_info(&pdev->dev, "%s: PBA No: %s\n", netdev->name, part_str);
2128	dev_info(&pdev->dev,
2129		"Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
2130		adapter->msix_entries ? "MSI-X" :
2131		(adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
2132		adapter->num_rx_queues, adapter->num_tx_queues);
2133	switch (hw->mac.type) {
2134	case e1000_i350:
2135		igb_set_eee_i350(hw);
2136		break;
2137	default:
2138		break;
2139	}
2140
2141	pm_runtime_put_noidle(&pdev->dev);
2142	return 0;
2143
2144err_register:
2145	igb_release_hw_control(adapter);
2146err_eeprom:
2147	if (!igb_check_reset_block(hw))
2148		igb_reset_phy(hw);
2149
2150	if (hw->flash_address)
2151		iounmap(hw->flash_address);
2152err_sw_init:
2153	igb_clear_interrupt_scheme(adapter);
2154	iounmap(hw->hw_addr);
2155err_ioremap:
2156	free_netdev(netdev);
2157err_alloc_etherdev:
2158	pci_release_selected_regions(pdev,
2159	                             pci_select_bars(pdev, IORESOURCE_MEM));
2160err_pci_reg:
2161err_dma:
2162	pci_disable_device(pdev);
2163	return err;
2164}
2165
2166/**
2167 * igb_remove - Device Removal Routine
2168 * @pdev: PCI device information struct
2169 *
2170 * igb_remove is called by the PCI subsystem to alert the driver
2171 * that it should release a PCI device.  The could be caused by a
2172 * Hot-Plug event, or because the driver is going to be removed from
2173 * memory.
2174 **/
2175static void __devexit igb_remove(struct pci_dev *pdev)
2176{
2177	struct net_device *netdev = pci_get_drvdata(pdev);
2178	struct igb_adapter *adapter = netdev_priv(netdev);
2179	struct e1000_hw *hw = &adapter->hw;
2180
2181	pm_runtime_get_noresume(&pdev->dev);
2182
2183	/*
2184	 * The watchdog timer may be rescheduled, so explicitly
2185	 * disable watchdog from being rescheduled.
2186	 */
2187	set_bit(__IGB_DOWN, &adapter->state);
2188	del_timer_sync(&adapter->watchdog_timer);
2189	del_timer_sync(&adapter->phy_info_timer);
2190
2191	cancel_work_sync(&adapter->reset_task);
2192	cancel_work_sync(&adapter->watchdog_task);
2193
2194#ifdef CONFIG_IGB_DCA
2195	if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
2196		dev_info(&pdev->dev, "DCA disabled\n");
2197		dca_remove_requester(&pdev->dev);
2198		adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
2199		wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
2200	}
2201#endif
2202
2203	/* Release control of h/w to f/w.  If f/w is AMT enabled, this
2204	 * would have already happened in close and is redundant. */
2205	igb_release_hw_control(adapter);
2206
2207	unregister_netdev(netdev);
2208
2209	igb_clear_interrupt_scheme(adapter);
2210
2211#ifdef CONFIG_PCI_IOV
2212	/* reclaim resources allocated to VFs */
2213	if (adapter->vf_data) {
2214		/* disable iov and allow time for transactions to clear */
2215		if (!igb_check_vf_assignment(adapter)) {
2216			pci_disable_sriov(pdev);
2217			msleep(500);
2218		} else {
2219			dev_info(&pdev->dev, "VF(s) assigned to guests!\n");
2220		}
2221
2222		kfree(adapter->vf_data);
2223		adapter->vf_data = NULL;
2224		wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
2225		wrfl();
2226		msleep(100);
2227		dev_info(&pdev->dev, "IOV Disabled\n");
2228	}
2229#endif
2230
2231	iounmap(hw->hw_addr);
2232	if (hw->flash_address)
2233		iounmap(hw->flash_address);
2234	pci_release_selected_regions(pdev,
2235	                             pci_select_bars(pdev, IORESOURCE_MEM));
2236
2237	kfree(adapter->shadow_vfta);
2238	free_netdev(netdev);
2239
2240	pci_disable_pcie_error_reporting(pdev);
2241
2242	pci_disable_device(pdev);
2243}
2244
2245/**
2246 * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
2247 * @adapter: board private structure to initialize
2248 *
2249 * This function initializes the vf specific data storage and then attempts to
2250 * allocate the VFs.  The reason for ordering it this way is because it is much
2251 * mor expensive time wise to disable SR-IOV than it is to allocate and free
2252 * the memory for the VFs.
2253 **/
2254static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
2255{
2256#ifdef CONFIG_PCI_IOV
2257	struct pci_dev *pdev = adapter->pdev;
2258	int old_vfs = igb_find_enabled_vfs(adapter);
2259	int i;
2260
2261	if (old_vfs) {
2262		dev_info(&pdev->dev, "%d pre-allocated VFs found - override "
2263			 "max_vfs setting of %d\n", old_vfs, max_vfs);
2264		adapter->vfs_allocated_count = old_vfs;
2265	}
2266
2267	if (!adapter->vfs_allocated_count)
2268		return;
2269
2270	adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
2271				sizeof(struct vf_data_storage), GFP_KERNEL);
2272	/* if allocation failed then we do not support SR-IOV */
2273	if (!adapter->vf_data) {
2274		adapter->vfs_allocated_count = 0;
2275		dev_err(&pdev->dev, "Unable to allocate memory for VF "
2276			"Data Storage\n");
2277		goto out;
2278	}
2279
2280	if (!old_vfs) {
2281		if (pci_enable_sriov(pdev, adapter->vfs_allocated_count))
2282			goto err_out;
2283	}
2284	dev_info(&pdev->dev, "%d VFs allocated\n",
2285		 adapter->vfs_allocated_count);
2286	for (i = 0; i < adapter->vfs_allocated_count; i++)
2287		igb_vf_configure(adapter, i);
2288
2289	/* DMA Coalescing is not supported in IOV mode. */
2290	adapter->flags &= ~IGB_FLAG_DMAC;
2291	goto out;
2292err_out:
2293	kfree(adapter->vf_data);
2294	adapter->vf_data = NULL;
2295	adapter->vfs_allocated_count = 0;
2296out:
2297	return;
2298#endif /* CONFIG_PCI_IOV */
2299}
2300
2301/**
2302 * igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp
2303 * @adapter: board private structure to initialize
2304 *
2305 * igb_init_hw_timer initializes the function pointer and values for the hw
2306 * timer found in hardware.
2307 **/
2308static void igb_init_hw_timer(struct igb_adapter *adapter)
2309{
2310	struct e1000_hw *hw = &adapter->hw;
2311
2312	switch (hw->mac.type) {
2313	case e1000_i350:
2314	case e1000_82580:
2315		memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2316		adapter->cycles.read = igb_read_clock;
2317		adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2318		adapter->cycles.mult = 1;
2319		/*
2320		 * The 82580 timesync updates the system timer every 8ns by 8ns
2321		 * and the value cannot be shifted.  Instead we need to shift
2322		 * the registers to generate a 64bit timer value.  As a result
2323		 * SYSTIMR/L/H, TXSTMPL/H, RXSTMPL/H all have to be shifted by
2324		 * 24 in order to generate a larger value for synchronization.
2325		 */
2326		adapter->cycles.shift = IGB_82580_TSYNC_SHIFT;
2327		/* disable system timer temporarily by setting bit 31 */
2328		wr32(E1000_TSAUXC, 0x80000000);
2329		wrfl();
2330
2331		/* Set registers so that rollover occurs soon to test this. */
2332		wr32(E1000_SYSTIMR, 0x00000000);
2333		wr32(E1000_SYSTIML, 0x80000000);
2334		wr32(E1000_SYSTIMH, 0x000000FF);
2335		wrfl();
2336
2337		/* enable system timer by clearing bit 31 */
2338		wr32(E1000_TSAUXC, 0x0);
2339		wrfl();
2340
2341		timecounter_init(&adapter->clock,
2342				 &adapter->cycles,
2343				 ktime_to_ns(ktime_get_real()));
2344		/*
2345		 * Synchronize our NIC clock against system wall clock. NIC
2346		 * time stamp reading requires ~3us per sample, each sample
2347		 * was pretty stable even under load => only require 10
2348		 * samples for each offset comparison.
2349		 */
2350		memset(&adapter->compare, 0, sizeof(adapter->compare));
2351		adapter->compare.source = &adapter->clock;
2352		adapter->compare.target = ktime_get_real;
2353		adapter->compare.num_samples = 10;
2354		timecompare_update(&adapter->compare, 0);
2355		break;
2356	case e1000_82576:
2357		/*
2358		 * Initialize hardware timer: we keep it running just in case
2359		 * that some program needs it later on.
2360		 */
2361		memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2362		adapter->cycles.read = igb_read_clock;
2363		adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2364		adapter->cycles.mult = 1;
2365		/**
2366		 * Scale the NIC clock cycle by a large factor so that
2367		 * relatively small clock corrections can be added or
2368		 * subtracted at each clock tick. The drawbacks of a large
2369		 * factor are a) that the clock register overflows more quickly
2370		 * (not such a big deal) and b) that the increment per tick has
2371		 * to fit into 24 bits.  As a result we need to use a shift of
2372		 * 19 so we can fit a value of 16 into the TIMINCA register.
2373		 */
2374		adapter->cycles.shift = IGB_82576_TSYNC_SHIFT;
2375		wr32(E1000_TIMINCA,
2376		                (1 << E1000_TIMINCA_16NS_SHIFT) |
2377		                (16 << IGB_82576_TSYNC_SHIFT));
2378
2379		/* Set registers so that rollover occurs soon to test this. */
2380		wr32(E1000_SYSTIML, 0x00000000);
2381		wr32(E1000_SYSTIMH, 0xFF800000);
2382		wrfl();
2383
2384		timecounter_init(&adapter->clock,
2385				 &adapter->cycles,
2386				 ktime_to_ns(ktime_get_real()));
2387		/*
2388		 * Synchronize our NIC clock against system wall clock. NIC
2389		 * time stamp reading requires ~3us per sample, each sample
2390		 * was pretty stable even under load => only require 10
2391		 * samples for each offset comparison.
2392		 */
2393		memset(&adapter->compare, 0, sizeof(adapter->compare));
2394		adapter->compare.source = &adapter->clock;
2395		adapter->compare.target = ktime_get_real;
2396		adapter->compare.num_samples = 10;
2397		timecompare_update(&adapter->compare, 0);
2398		break;
2399	case e1000_82575:
2400		/* 82575 does not support timesync */
2401	default:
2402		break;
2403	}
2404
2405}
2406
2407/**
2408 * igb_sw_init - Initialize general software structures (struct igb_adapter)
2409 * @adapter: board private structure to initialize
2410 *
2411 * igb_sw_init initializes the Adapter private data structure.
2412 * Fields are initialized based on PCI device information and
2413 * OS network device settings (MTU size).
2414 **/
2415static int __devinit igb_sw_init(struct igb_adapter *adapter)
2416{
2417	struct e1000_hw *hw = &adapter->hw;
2418	struct net_device *netdev = adapter->netdev;
2419	struct pci_dev *pdev = adapter->pdev;
2420
2421	pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
2422
2423	/* set default ring sizes */
2424	adapter->tx_ring_count = IGB_DEFAULT_TXD;
2425	adapter->rx_ring_count = IGB_DEFAULT_RXD;
2426
2427	/* set default ITR values */
2428	adapter->rx_itr_setting = IGB_DEFAULT_ITR;
2429	adapter->tx_itr_setting = IGB_DEFAULT_ITR;
2430
2431	/* set default work limits */
2432	adapter->tx_work_limit = IGB_DEFAULT_TX_WORK;
2433
2434	adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN +
2435				  VLAN_HLEN;
2436	adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
2437
2438	adapter->node = -1;
2439
2440	spin_lock_init(&adapter->stats64_lock);
2441#ifdef CONFIG_PCI_IOV
2442	switch (hw->mac.type) {
2443	case e1000_82576:
2444	case e1000_i350:
2445		if (max_vfs > 7) {
2446			dev_warn(&pdev->dev,
2447				 "Maximum of 7 VFs per PF, using max\n");
2448			adapter->vfs_allocated_count = 7;
2449		} else
2450			adapter->vfs_allocated_count = max_vfs;
2451		break;
2452	default:
2453		break;
2454	}
2455#endif /* CONFIG_PCI_IOV */
2456	adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
2457	/* i350 cannot do RSS and SR-IOV at the same time */
2458	if (hw->mac.type == e1000_i350 && adapter->vfs_allocated_count)
2459		adapter->rss_queues = 1;
2460
2461	/*
2462	 * if rss_queues > 4 or vfs are going to be allocated with rss_queues
2463	 * then we should combine the queues into a queue pair in order to
2464	 * conserve interrupts due to limited supply
2465	 */
2466	if ((adapter->rss_queues > 4) ||
2467	    ((adapter->rss_queues > 1) && (adapter->vfs_allocated_count > 6)))
2468		adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
2469
2470	/* Setup and initialize a copy of the hw vlan table array */
2471	adapter->shadow_vfta = kzalloc(sizeof(u32) *
2472				E1000_VLAN_FILTER_TBL_SIZE,
2473				GFP_ATOMIC);
2474
2475	/* This call may decrease the number of queues */
2476	if (igb_init_interrupt_scheme(adapter)) {
2477		dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
2478		return -ENOMEM;
2479	}
2480
2481	igb_probe_vfs(adapter);
2482
2483	/* Explicitly disable IRQ since the NIC can be in any state. */
2484	igb_irq_disable(adapter);
2485
2486	if (hw->mac.type == e1000_i350)
2487		adapter->flags &= ~IGB_FLAG_DMAC;
2488
2489	set_bit(__IGB_DOWN, &adapter->state);
2490	return 0;
2491}
2492
2493/**
2494 * igb_open - Called when a network interface is made active
2495 * @netdev: network interface device structure
2496 *
2497 * Returns 0 on success, negative value on failure
2498 *
2499 * The open entry point is called when a network interface is made
2500 * active by the system (IFF_UP).  At this point all resources needed
2501 * for transmit and receive operations are allocated, the interrupt
2502 * handler is registered with the OS, the watchdog timer is started,
2503 * and the stack is notified that the interface is ready.
2504 **/
2505static int __igb_open(struct net_device *netdev, bool resuming)
2506{
2507	struct igb_adapter *adapter = netdev_priv(netdev);
2508	struct e1000_hw *hw = &adapter->hw;
2509	struct pci_dev *pdev = adapter->pdev;
2510	int err;
2511	int i;
2512
2513	/* disallow open during test */
2514	if (test_bit(__IGB_TESTING, &adapter->state)) {
2515		WARN_ON(resuming);
2516		return -EBUSY;
2517	}
2518
2519	if (!resuming)
2520		pm_runtime_get_sync(&pdev->dev);
2521
2522	netif_carrier_off(netdev);
2523
2524	/* allocate transmit descriptors */
2525	err = igb_setup_all_tx_resources(adapter);
2526	if (err)
2527		goto err_setup_tx;
2528
2529	/* allocate receive descriptors */
2530	err = igb_setup_all_rx_resources(adapter);
2531	if (err)
2532		goto err_setup_rx;
2533
2534	igb_power_up_link(adapter);
2535
2536	/* before we allocate an interrupt, we must be ready to handle it.
2537	 * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2538	 * as soon as we call pci_request_irq, so we have to setup our
2539	 * clean_rx handler before we do so.  */
2540	igb_configure(adapter);
2541
2542	err = igb_request_irq(adapter);
2543	if (err)
2544		goto err_req_irq;
2545
2546	/* From here on the code is the same as igb_up() */
2547	clear_bit(__IGB_DOWN, &adapter->state);
2548
2549	for (i = 0; i < adapter->num_q_vectors; i++)
2550		napi_enable(&(adapter->q_vector[i]->napi));
2551
2552	/* Clear any pending interrupts. */
2553	rd32(E1000_ICR);
2554
2555	igb_irq_enable(adapter);
2556
2557	/* notify VFs that reset has been completed */
2558	if (adapter->vfs_allocated_count) {
2559		u32 reg_data = rd32(E1000_CTRL_EXT);
2560		reg_data |= E1000_CTRL_EXT_PFRSTD;
2561		wr32(E1000_CTRL_EXT, reg_data);
2562	}
2563
2564	netif_tx_start_all_queues(netdev);
2565
2566	if (!resuming)
2567		pm_runtime_put(&pdev->dev);
2568
2569	/* start the watchdog. */
2570	hw->mac.get_link_status = 1;
2571	schedule_work(&adapter->watchdog_task);
2572
2573	return 0;
2574
2575err_req_irq:
2576	igb_release_hw_control(adapter);
2577	igb_power_down_link(adapter);
2578	igb_free_all_rx_resources(adapter);
2579err_setup_rx:
2580	igb_free_all_tx_resources(adapter);
2581err_setup_tx:
2582	igb_reset(adapter);
2583	if (!resuming)
2584		pm_runtime_put(&pdev->dev);
2585
2586	return err;
2587}
2588
2589static int igb_open(struct net_device *netdev)
2590{
2591	return __igb_open(netdev, false);
2592}
2593
2594/**
2595 * igb_close - Disables a network interface
2596 * @netdev: network interface device structure
2597 *
2598 * Returns 0, this is not allowed to fail
2599 *
2600 * The close entry point is called when an interface is de-activated
2601 * by the OS.  The hardware is still under the driver's control, but
2602 * needs to be disabled.  A global MAC reset is issued to stop the
2603 * hardware, and all transmit and receive resources are freed.
2604 **/
2605static int __igb_close(struct net_device *netdev, bool suspending)
2606{
2607	struct igb_adapter *adapter = netdev_priv(netdev);
2608	struct pci_dev *pdev = adapter->pdev;
2609
2610	WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
2611
2612	if (!suspending)
2613		pm_runtime_get_sync(&pdev->dev);
2614
2615	igb_down(adapter);
2616	igb_free_irq(adapter);
2617
2618	igb_free_all_tx_resources(adapter);
2619	igb_free_all_rx_resources(adapter);
2620
2621	if (!suspending)
2622		pm_runtime_put_sync(&pdev->dev);
2623	return 0;
2624}
2625
2626static int igb_close(struct net_device *netdev)
2627{
2628	return __igb_close(netdev, false);
2629}
2630
2631/**
2632 * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2633 * @tx_ring: tx descriptor ring (for a specific queue) to setup
2634 *
2635 * Return 0 on success, negative on failure
2636 **/
2637int igb_setup_tx_resources(struct igb_ring *tx_ring)
2638{
2639	struct device *dev = tx_ring->dev;
2640	int orig_node = dev_to_node(dev);
2641	int size;
2642
2643	size = sizeof(struct igb_tx_buffer) * tx_ring->count;
2644	tx_ring->tx_buffer_info = vzalloc_node(size, tx_ring->numa_node);
2645	if (!tx_ring->tx_buffer_info)
2646		tx_ring->tx_buffer_info = vzalloc(size);
2647	if (!tx_ring->tx_buffer_info)
2648		goto err;
2649
2650	/* round up to nearest 4K */
2651	tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2652	tx_ring->size = ALIGN(tx_ring->size, 4096);
2653
2654	set_dev_node(dev, tx_ring->numa_node);
2655	tx_ring->desc = dma_alloc_coherent(dev,
2656					   tx_ring->size,
2657					   &tx_ring->dma,
2658					   GFP_KERNEL);
2659	set_dev_node(dev, orig_node);
2660	if (!tx_ring->desc)
2661		tx_ring->desc = dma_alloc_coherent(dev,
2662						   tx_ring->size,
2663						   &tx_ring->dma,
2664						   GFP_KERNEL);
2665
2666	if (!tx_ring->desc)
2667		goto err;
2668
2669	tx_ring->next_to_use = 0;
2670	tx_ring->next_to_clean = 0;
2671
2672	return 0;
2673
2674err:
2675	vfree(tx_ring->tx_buffer_info);
2676	dev_err(dev,
2677		"Unable to allocate memory for the transmit descriptor ring\n");
2678	return -ENOMEM;
2679}
2680
2681/**
2682 * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2683 *				  (Descriptors) for all queues
2684 * @adapter: board private structure
2685 *
2686 * Return 0 on success, negative on failure
2687 **/
2688static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2689{
2690	struct pci_dev *pdev = adapter->pdev;
2691	int i, err = 0;
2692
2693	for (i = 0; i < adapter->num_tx_queues; i++) {
2694		err = igb_setup_tx_resources(adapter->tx_ring[i]);
2695		if (err) {
2696			dev_err(&pdev->dev,
2697				"Allocation for Tx Queue %u failed\n", i);
2698			for (i--; i >= 0; i--)
2699				igb_free_tx_resources(adapter->tx_ring[i]);
2700			break;
2701		}
2702	}
2703
2704	return err;
2705}
2706
2707/**
2708 * igb_setup_tctl - configure the transmit control registers
2709 * @adapter: Board private structure
2710 **/
2711void igb_setup_tctl(struct igb_adapter *adapter)
2712{
2713	struct e1000_hw *hw = &adapter->hw;
2714	u32 tctl;
2715
2716	/* disable queue 0 which is enabled by default on 82575 and 82576 */
2717	wr32(E1000_TXDCTL(0), 0);
2718
2719	/* Program the Transmit Control Register */
2720	tctl = rd32(E1000_TCTL);
2721	tctl &= ~E1000_TCTL_CT;
2722	tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2723		(E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2724
2725	igb_config_collision_dist(hw);
2726
2727	/* Enable transmits */
2728	tctl |= E1000_TCTL_EN;
2729
2730	wr32(E1000_TCTL, tctl);
2731}
2732
2733/**
2734 * igb_configure_tx_ring - Configure transmit ring after Reset
2735 * @adapter: board private structure
2736 * @ring: tx ring to configure
2737 *
2738 * Configure a transmit ring after a reset.
2739 **/
2740void igb_configure_tx_ring(struct igb_adapter *adapter,
2741                           struct igb_ring *ring)
2742{
2743	struct e1000_hw *hw = &adapter->hw;
2744	u32 txdctl = 0;
2745	u64 tdba = ring->dma;
2746	int reg_idx = ring->reg_idx;
2747
2748	/* disable the queue */
2749	wr32(E1000_TXDCTL(reg_idx), 0);
2750	wrfl();
2751	mdelay(10);
2752
2753	wr32(E1000_TDLEN(reg_idx),
2754	                ring->count * sizeof(union e1000_adv_tx_desc));
2755	wr32(E1000_TDBAL(reg_idx),
2756	                tdba & 0x00000000ffffffffULL);
2757	wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2758
2759	ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2760	wr32(E1000_TDH(reg_idx), 0);
2761	writel(0, ring->tail);
2762
2763	txdctl |= IGB_TX_PTHRESH;
2764	txdctl |= IGB_TX_HTHRESH << 8;
2765	txdctl |= IGB_TX_WTHRESH << 16;
2766
2767	txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2768	wr32(E1000_TXDCTL(reg_idx), txdctl);
2769
2770	netdev_tx_reset_queue(txring_txq(ring));
2771}
2772
2773/**
2774 * igb_configure_tx - Configure transmit Unit after Reset
2775 * @adapter: board private structure
2776 *
2777 * Configure the Tx unit of the MAC after a reset.
2778 **/
2779static void igb_configure_tx(struct igb_adapter *adapter)
2780{
2781	int i;
2782
2783	for (i = 0; i < adapter->num_tx_queues; i++)
2784		igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
2785}
2786
2787/**
2788 * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2789 * @rx_ring:    rx descriptor ring (for a specific queue) to setup
2790 *
2791 * Returns 0 on success, negative on failure
2792 **/
2793int igb_setup_rx_resources(struct igb_ring *rx_ring)
2794{
2795	struct device *dev = rx_ring->dev;
2796	int orig_node = dev_to_node(dev);
2797	int size, desc_len;
2798
2799	size = sizeof(struct igb_rx_buffer) * rx_ring->count;
2800	rx_ring->rx_buffer_info = vzalloc_node(size, rx_ring->numa_node);
2801	if (!rx_ring->rx_buffer_info)
2802		rx_ring->rx_buffer_info = vzalloc(size);
2803	if (!rx_ring->rx_buffer_info)
2804		goto err;
2805
2806	desc_len = sizeof(union e1000_adv_rx_desc);
2807
2808	/* Round up to nearest 4K */
2809	rx_ring->size = rx_ring->count * desc_len;
2810	rx_ring->size = ALIGN(rx_ring->size, 4096);
2811
2812	set_dev_node(dev, rx_ring->numa_node);
2813	rx_ring->desc = dma_alloc_coherent(dev,
2814					   rx_ring->size,
2815					   &rx_ring->dma,
2816					   GFP_KERNEL);
2817	set_dev_node(dev, orig_node);
2818	if (!rx_ring->desc)
2819		rx_ring->desc = dma_alloc_coherent(dev,
2820						   rx_ring->size,
2821						   &rx_ring->dma,
2822						   GFP_KERNEL);
2823
2824	if (!rx_ring->desc)
2825		goto err;
2826
2827	rx_ring->next_to_clean = 0;
2828	rx_ring->next_to_use = 0;
2829
2830	return 0;
2831
2832err:
2833	vfree(rx_ring->rx_buffer_info);
2834	rx_ring->rx_buffer_info = NULL;
2835	dev_err(dev, "Unable to allocate memory for the receive descriptor"
2836		" ring\n");
2837	return -ENOMEM;
2838}
2839
2840/**
2841 * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2842 *				  (Descriptors) for all queues
2843 * @adapter: board private structure
2844 *
2845 * Return 0 on success, negative on failure
2846 **/
2847static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2848{
2849	struct pci_dev *pdev = adapter->pdev;
2850	int i, err = 0;
2851
2852	for (i = 0; i < adapter->num_rx_queues; i++) {
2853		err = igb_setup_rx_resources(adapter->rx_ring[i]);
2854		if (err) {
2855			dev_err(&pdev->dev,
2856				"Allocation for Rx Queue %u failed\n", i);
2857			for (i--; i >= 0; i--)
2858				igb_free_rx_resources(adapter->rx_ring[i]);
2859			break;
2860		}
2861	}
2862
2863	return err;
2864}
2865
2866/**
2867 * igb_setup_mrqc - configure the multiple receive queue control registers
2868 * @adapter: Board private structure
2869 **/
2870static void igb_setup_mrqc(struct igb_adapter *adapter)
2871{
2872	struct e1000_hw *hw = &adapter->hw;
2873	u32 mrqc, rxcsum;
2874	u32 j, num_rx_queues, shift = 0, shift2 = 0;
2875	union e1000_reta {
2876		u32 dword;
2877		u8  bytes[4];
2878	} reta;
2879	static const u8 rsshash[40] = {
2880		0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2881		0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2882		0xae, 0x7b, 0x30, 0xb4,	0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2883		0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2884
2885	/* Fill out hash function seeds */
2886	for (j = 0; j < 10; j++) {
2887		u32 rsskey = rsshash[(j * 4)];
2888		rsskey |= rsshash[(j * 4) + 1] << 8;
2889		rsskey |= rsshash[(j * 4) + 2] << 16;
2890		rsskey |= rsshash[(j * 4) + 3] << 24;
2891		array_wr32(E1000_RSSRK(0), j, rsskey);
2892	}
2893
2894	num_rx_queues = adapter->rss_queues;
2895
2896	if (adapter->vfs_allocated_count) {
2897		/* 82575 and 82576 supports 2 RSS queues for VMDq */
2898		switch (hw->mac.type) {
2899		case e1000_i350:
2900		case e1000_82580:
2901			num_rx_queues = 1;
2902			shift = 0;
2903			break;
2904		case e1000_82576:
2905			shift = 3;
2906			num_rx_queues = 2;
2907			break;
2908		case e1000_82575:
2909			shift = 2;
2910			shift2 = 6;
2911		default:
2912			break;
2913		}
2914	} else {
2915		if (hw->mac.type == e1000_82575)
2916			shift = 6;
2917	}
2918
2919	for (j = 0; j < (32 * 4); j++) {
2920		reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2921		if (shift2)
2922			reta.bytes[j & 3] |= num_rx_queues << shift2;
2923		if ((j & 3) == 3)
2924			wr32(E1000_RETA(j >> 2), reta.dword);
2925	}
2926
2927	/*
2928	 * Disable raw packet checksumming so that RSS hash is placed in
2929	 * descriptor on writeback.  No need to enable TCP/UDP/IP checksum
2930	 * offloads as they are enabled by default
2931	 */
2932	rxcsum = rd32(E1000_RXCSUM);
2933	rxcsum |= E1000_RXCSUM_PCSD;
2934
2935	if (adapter->hw.mac.type >= e1000_82576)
2936		/* Enable Receive Checksum Offload for SCTP */
2937		rxcsum |= E1000_RXCSUM_CRCOFL;
2938
2939	/* Don't need to set TUOFL or IPOFL, they default to 1 */
2940	wr32(E1000_RXCSUM, rxcsum);
2941
2942	/* If VMDq is enabled then we set the appropriate mode for that, else
2943	 * we default to RSS so that an RSS hash is calculated per packet even
2944	 * if we are only using one queue */
2945	if (adapter->vfs_allocated_count) {
2946		if (hw->mac.type > e1000_82575) {
2947			/* Set the default pool for the PF's first queue */
2948			u32 vtctl = rd32(E1000_VT_CTL);
2949			vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2950				   E1000_VT_CTL_DISABLE_DEF_POOL);
2951			vtctl |= adapter->vfs_allocated_count <<
2952				E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2953			wr32(E1000_VT_CTL, vtctl);
2954		}
2955		if (adapter->rss_queues > 1)
2956			mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2957		else
2958			mrqc = E1000_MRQC_ENABLE_VMDQ;
2959	} else {
2960		mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2961	}
2962	igb_vmm_control(adapter);
2963
2964	/*
2965	 * Generate RSS hash based on TCP port numbers and/or
2966	 * IPv4/v6 src and dst addresses since UDP cannot be
2967	 * hashed reliably due to IP fragmentation
2968	 */
2969	mrqc |= E1000_MRQC_RSS_FIELD_IPV4 |
2970		E1000_MRQC_RSS_FIELD_IPV4_TCP |
2971		E1000_MRQC_RSS_FIELD_IPV6 |
2972		E1000_MRQC_RSS_FIELD_IPV6_TCP |
2973		E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
2974
2975	wr32(E1000_MRQC, mrqc);
2976}
2977
2978/**
2979 * igb_setup_rctl - configure the receive control registers
2980 * @adapter: Board private structure
2981 **/
2982void igb_setup_rctl(struct igb_adapter *adapter)
2983{
2984	struct e1000_hw *hw = &adapter->hw;
2985	u32 rctl;
2986
2987	rctl = rd32(E1000_RCTL);
2988
2989	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2990	rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2991
2992	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2993		(hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2994
2995	/*
2996	 * enable stripping of CRC. It's unlikely this will break BMC
2997	 * redirection as it did with e1000. Newer features require
2998	 * that the HW strips the CRC.
2999	 */
3000	rctl |= E1000_RCTL_SECRC;
3001
3002	/* disable store bad packets and clear size bits. */
3003	rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
3004
3005	/* enable LPE to prevent packets larger than max_frame_size */
3006	rctl |= E1000_RCTL_LPE;
3007
3008	/* disable queue 0 to prevent tail write w/o re-config */
3009	wr32(E1000_RXDCTL(0), 0);
3010
3011	/* Attention!!!  For SR-IOV PF driver operations you must enable
3012	 * queue drop for all VF and PF queues to prevent head of line blocking
3013	 * if an un-trusted VF does not provide descriptors to hardware.
3014	 */
3015	if (adapter->vfs_allocated_count) {
3016		/* set all queue drop enable bits */
3017		wr32(E1000_QDE, ALL_QUEUES);
3018	}
3019
3020	/* This is useful for sniffing bad packets. */
3021	if (adapter->netdev->features & NETIF_F_RXALL) {
3022		/* UPE and MPE will be handled by normal PROMISC logic
3023		 * in e1000e_set_rx_mode */
3024		rctl |= (E1000_RCTL_SBP | /* Receive bad packets */
3025			 E1000_RCTL_BAM | /* RX All Bcast Pkts */
3026			 E1000_RCTL_PMCF); /* RX All MAC Ctrl Pkts */
3027
3028		rctl &= ~(E1000_RCTL_VFE | /* Disable VLAN filter */
3029			  E1000_RCTL_DPF | /* Allow filtered pause */
3030			  E1000_RCTL_CFIEN); /* Dis VLAN CFIEN Filter */
3031		/* Do not mess with E1000_CTRL_VME, it affects transmit as well,
3032		 * and that breaks VLANs.
3033		 */
3034	}
3035
3036	wr32(E1000_RCTL, rctl);
3037}
3038
3039static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
3040                                   int vfn)
3041{
3042	struct e1000_hw *hw = &adapter->hw;
3043	u32 vmolr;
3044
3045	/* if it isn't the PF check to see if VFs are enabled and
3046	 * increase the size to support vlan tags */
3047	if (vfn < adapter->vfs_allocated_count &&
3048	    adapter->vf_data[vfn].vlans_enabled)
3049		size += VLAN_TAG_SIZE;
3050
3051	vmolr = rd32(E1000_VMOLR(vfn));
3052	vmolr &= ~E1000_VMOLR_RLPML_MASK;
3053	vmolr |= size | E1000_VMOLR_LPE;
3054	wr32(E1000_VMOLR(vfn), vmolr);
3055
3056	return 0;
3057}
3058
3059/**
3060 * igb_rlpml_set - set maximum receive packet size
3061 * @adapter: board private structure
3062 *
3063 * Configure maximum receivable packet size.
3064 **/
3065static void igb_rlpml_set(struct igb_adapter *adapter)
3066{
3067	u32 max_frame_size = adapter->max_frame_size;
3068	struct e1000_hw *hw = &adapter->hw;
3069	u16 pf_id = adapter->vfs_allocated_count;
3070
3071	if (pf_id) {
3072		igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
3073		/*
3074		 * If we're in VMDQ or SR-IOV mode, then set global RLPML
3075		 * to our max jumbo frame size, in case we need to enable
3076		 * jumbo frames on one of the rings later.
3077		 * This will not pass over-length frames into the default
3078		 * queue because it's gated by the VMOLR.RLPML.
3079		 */
3080		max_frame_size = MAX_JUMBO_FRAME_SIZE;
3081	}
3082
3083	wr32(E1000_RLPML, max_frame_size);
3084}
3085
3086static inline void igb_set_vmolr(struct igb_adapter *adapter,
3087				 int vfn, bool aupe)
3088{
3089	struct e1000_hw *hw = &adapter->hw;
3090	u32 vmolr;
3091
3092	/*
3093	 * This register exists only on 82576 and newer so if we are older then
3094	 * we should exit and do nothing
3095	 */
3096	if (hw->mac.type < e1000_82576)
3097		return;
3098
3099	vmolr = rd32(E1000_VMOLR(vfn));
3100	vmolr |= E1000_VMOLR_STRVLAN;      /* Strip vlan tags */
3101	if (aupe)
3102		vmolr |= E1000_VMOLR_AUPE;        /* Accept untagged packets */
3103	else
3104		vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
3105
3106	/* clear all bits that might not be set */
3107	vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
3108
3109	if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
3110		vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
3111	/*
3112	 * for VMDq only allow the VFs and pool 0 to accept broadcast and
3113	 * multicast packets
3114	 */
3115	if (vfn <= adapter->vfs_allocated_count)
3116		vmolr |= E1000_VMOLR_BAM;	   /* Accept broadcast */
3117
3118	wr32(E1000_VMOLR(vfn), vmolr);
3119}
3120
3121/**
3122 * igb_configure_rx_ring - Configure a receive ring after Reset
3123 * @adapter: board private structure
3124 * @ring: receive ring to be configured
3125 *
3126 * Configure the Rx unit of the MAC after a reset.
3127 **/
3128void igb_configure_rx_ring(struct igb_adapter *adapter,
3129                           struct igb_ring *ring)
3130{
3131	struct e1000_hw *hw = &adapter->hw;
3132	u64 rdba = ring->dma;
3133	int reg_idx = ring->reg_idx;
3134	u32 srrctl = 0, rxdctl = 0;
3135
3136	/* disable the queue */
3137	wr32(E1000_RXDCTL(reg_idx), 0);
3138
3139	/* Set DMA base address registers */
3140	wr32(E1000_RDBAL(reg_idx),
3141	     rdba & 0x00000000ffffffffULL);
3142	wr32(E1000_RDBAH(reg_idx), rdba >> 32);
3143	wr32(E1000_RDLEN(reg_idx),
3144	               ring->count * sizeof(union e1000_adv_rx_desc));
3145
3146	/* initialize head and tail */
3147	ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
3148	wr32(E1000_RDH(reg_idx), 0);
3149	writel(0, ring->tail);
3150
3151	/* set descriptor configuration */
3152	srrctl = IGB_RX_HDR_LEN << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
3153#if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
3154	srrctl |= IGB_RXBUFFER_16384 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3155#else
3156	srrctl |= (PAGE_SIZE / 2) >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3157#endif
3158	srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3159	if (hw->mac.type >= e1000_82580)
3160		srrctl |= E1000_SRRCTL_TIMESTAMP;
3161	/* Only set Drop Enable if we are supporting multiple queues */
3162	if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
3163		srrctl |= E1000_SRRCTL_DROP_EN;
3164
3165	wr32(E1000_SRRCTL(reg_idx), srrctl);
3166
3167	/* set filtering for VMDQ pools */
3168	igb_set_vmolr(adapter, reg_idx & 0x7, true);
3169
3170	rxdctl |= IGB_RX_PTHRESH;
3171	rxdctl |= IGB_RX_HTHRESH << 8;
3172	rxdctl |= IGB_RX_WTHRESH << 16;
3173
3174	/* enable receive descriptor fetching */
3175	rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
3176	wr32(E1000_RXDCTL(reg_idx), rxdctl);
3177}
3178
3179/**
3180 * igb_configure_rx - Configure receive Unit after Reset
3181 * @adapter: board private structure
3182 *
3183 * Configure the Rx unit of the MAC after a reset.
3184 **/
3185static void igb_configure_rx(struct igb_adapter *adapter)
3186{
3187	int i;
3188
3189	/* set UTA to appropriate mode */
3190	igb_set_uta(adapter);
3191
3192	/* set the correct pool for the PF default MAC address in entry 0 */
3193	igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
3194	                 adapter->vfs_allocated_count);
3195
3196	/* Setup the HW Rx Head and Tail Descriptor Pointers and
3197	 * the Base and Length of the Rx Descriptor Ring */
3198	for (i = 0; i < adapter->num_rx_queues; i++)
3199		igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
3200}
3201
3202/**
3203 * igb_free_tx_resources - Free Tx Resources per Queue
3204 * @tx_ring: Tx descriptor ring for a specific queue
3205 *
3206 * Free all transmit software resources
3207 **/
3208void igb_free_tx_resources(struct igb_ring *tx_ring)
3209{
3210	igb_clean_tx_ring(tx_ring);
3211
3212	vfree(tx_ring->tx_buffer_info);
3213	tx_ring->tx_buffer_info = NULL;
3214
3215	/* if not set, then don't free */
3216	if (!tx_ring->desc)
3217		return;
3218
3219	dma_free_coherent(tx_ring->dev, tx_ring->size,
3220			  tx_ring->desc, tx_ring->dma);
3221
3222	tx_ring->desc = NULL;
3223}
3224
3225/**
3226 * igb_free_all_tx_resources - Free Tx Resources for All Queues
3227 * @adapter: board private structure
3228 *
3229 * Free all transmit software resources
3230 **/
3231static void igb_free_all_tx_resources(struct igb_adapter *adapter)
3232{
3233	int i;
3234
3235	for (i = 0; i < adapter->num_tx_queues; i++)
3236		igb_free_tx_resources(adapter->tx_ring[i]);
3237}
3238
3239void igb_unmap_and_free_tx_resource(struct igb_ring *ring,
3240				    struct igb_tx_buffer *tx_buffer)
3241{
3242	if (tx_buffer->skb) {
3243		dev_kfree_skb_any(tx_buffer->skb);
3244		if (tx_buffer->dma)
3245			dma_unmap_single(ring->dev,
3246					 tx_buffer->dma,
3247					 tx_buffer->length,
3248					 DMA_TO_DEVICE);
3249	} else if (tx_buffer->dma) {
3250		dma_unmap_page(ring->dev,
3251			       tx_buffer->dma,
3252			       tx_buffer->length,
3253			       DMA_TO_DEVICE);
3254	}
3255	tx_buffer->next_to_watch = NULL;
3256	tx_buffer->skb = NULL;
3257	tx_buffer->dma = 0;
3258	/* buffer_info must be completely set up in the transmit path */
3259}
3260
3261/**
3262 * igb_clean_tx_ring - Free Tx Buffers
3263 * @tx_ring: ring to be cleaned
3264 **/
3265static void igb_clean_tx_ring(struct igb_ring *tx_ring)
3266{
3267	struct igb_tx_buffer *buffer_info;
3268	unsigned long size;
3269	u16 i;
3270
3271	if (!tx_ring->tx_buffer_info)
3272		return;
3273	/* Free all the Tx ring sk_buffs */
3274
3275	for (i = 0; i < tx_ring->count; i++) {
3276		buffer_info = &tx_ring->tx_buffer_info[i];
3277		igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3278	}
3279
3280	size = sizeof(struct igb_tx_buffer) * tx_ring->count;
3281	memset(tx_ring->tx_buffer_info, 0, size);
3282
3283	/* Zero out the descriptor ring */
3284	memset(tx_ring->desc, 0, tx_ring->size);
3285
3286	tx_ring->next_to_use = 0;
3287	tx_ring->next_to_clean = 0;
3288}
3289
3290/**
3291 * igb_clean_all_tx_rings - Free Tx Buffers for all queues
3292 * @adapter: board private structure
3293 **/
3294static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
3295{
3296	int i;
3297
3298	for (i = 0; i < adapter->num_tx_queues; i++)
3299		igb_clean_tx_ring(adapter->tx_ring[i]);
3300}
3301
3302/**
3303 * igb_free_rx_resources - Free Rx Resources
3304 * @rx_ring: ring to clean the resources from
3305 *
3306 * Free all receive software resources
3307 **/
3308void igb_free_rx_resources(struct igb_ring *rx_ring)
3309{
3310	igb_clean_rx_ring(rx_ring);
3311
3312	vfree(rx_ring->rx_buffer_info);
3313	rx_ring->rx_buffer_info = NULL;
3314
3315	/* if not set, then don't free */
3316	if (!rx_ring->desc)
3317		return;
3318
3319	dma_free_coherent(rx_ring->dev, rx_ring->size,
3320			  rx_ring->desc, rx_ring->dma);
3321
3322	rx_ring->desc = NULL;
3323}
3324
3325/**
3326 * igb_free_all_rx_resources - Free Rx Resources for All Queues
3327 * @adapter: board private structure
3328 *
3329 * Free all receive software resources
3330 **/
3331static void igb_free_all_rx_resources(struct igb_adapter *adapter)
3332{
3333	int i;
3334
3335	for (i = 0; i < adapter->num_rx_queues; i++)
3336		igb_free_rx_resources(adapter->rx_ring[i]);
3337}
3338
3339/**
3340 * igb_clean_rx_ring - Free Rx Buffers per Queue
3341 * @rx_ring: ring to free buffers from
3342 **/
3343static void igb_clean_rx_ring(struct igb_ring *rx_ring)
3344{
3345	unsigned long size;
3346	u16 i;
3347
3348	if (!rx_ring->rx_buffer_info)
3349		return;
3350
3351	/* Free all the Rx ring sk_buffs */
3352	for (i = 0; i < rx_ring->count; i++) {
3353		struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
3354		if (buffer_info->dma) {
3355			dma_unmap_single(rx_ring->dev,
3356			                 buffer_info->dma,
3357					 IGB_RX_HDR_LEN,
3358					 DMA_FROM_DEVICE);
3359			buffer_info->dma = 0;
3360		}
3361
3362		if (buffer_info->skb) {
3363			dev_kfree_skb(buffer_info->skb);
3364			buffer_info->skb = NULL;
3365		}
3366		if (buffer_info->page_dma) {
3367			dma_unmap_page(rx_ring->dev,
3368			               buffer_info->page_dma,
3369				       PAGE_SIZE / 2,
3370				       DMA_FROM_DEVICE);
3371			buffer_info->page_dma = 0;
3372		}
3373		if (buffer_info->page) {
3374			put_page(buffer_info->page);
3375			buffer_info->page = NULL;
3376			buffer_info->page_offset = 0;
3377		}
3378	}
3379
3380	size = sizeof(struct igb_rx_buffer) * rx_ring->count;
3381	memset(rx_ring->rx_buffer_info, 0, size);
3382
3383	/* Zero out the descriptor ring */
3384	memset(rx_ring->desc, 0, rx_ring->size);
3385
3386	rx_ring->next_to_clean = 0;
3387	rx_ring->next_to_use = 0;
3388}
3389
3390/**
3391 * igb_clean_all_rx_rings - Free Rx Buffers for all queues
3392 * @adapter: board private structure
3393 **/
3394static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
3395{
3396	int i;
3397
3398	for (i = 0; i < adapter->num_rx_queues; i++)
3399		igb_clean_rx_ring(adapter->rx_ring[i]);
3400}
3401
3402/**
3403 * igb_set_mac - Change the Ethernet Address of the NIC
3404 * @netdev: network interface device structure
3405 * @p: pointer to an address structure
3406 *
3407 * Returns 0 on success, negative on failure
3408 **/
3409static int igb_set_mac(struct net_device *netdev, void *p)
3410{
3411	struct igb_adapter *adapter = netdev_priv(netdev);
3412	struct e1000_hw *hw = &adapter->hw;
3413	struct sockaddr *addr = p;
3414
3415	if (!is_valid_ether_addr(addr->sa_data))
3416		return -EADDRNOTAVAIL;
3417
3418	memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
3419	memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
3420
3421	/* set the correct pool for the new PF MAC address in entry 0 */
3422	igb_rar_set_qsel(adapter, hw->mac.addr, 0,
3423	                 adapter->vfs_allocated_count);
3424
3425	return 0;
3426}
3427
3428/**
3429 * igb_write_mc_addr_list - write multicast addresses to MTA
3430 * @netdev: network interface device structure
3431 *
3432 * Writes multicast address list to the MTA hash table.
3433 * Returns: -ENOMEM on failure
3434 *                0 on no addresses written
3435 *                X on writing X addresses to MTA
3436 **/
3437static int igb_write_mc_addr_list(struct net_device *netdev)
3438{
3439	struct igb_adapter *adapter = netdev_priv(netdev);
3440	struct e1000_hw *hw = &adapter->hw;
3441	struct netdev_hw_addr *ha;
3442	u8  *mta_list;
3443	int i;
3444
3445	if (netdev_mc_empty(netdev)) {
3446		/* nothing to program, so clear mc list */
3447		igb_update_mc_addr_list(hw, NULL, 0);
3448		igb_restore_vf_multicasts(adapter);
3449		return 0;
3450	}
3451
3452	mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
3453	if (!mta_list)
3454		return -ENOMEM;
3455
3456	/* The shared function expects a packed array of only addresses. */
3457	i = 0;
3458	netdev_for_each_mc_addr(ha, netdev)
3459		memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
3460
3461	igb_update_mc_addr_list(hw, mta_list, i);
3462	kfree(mta_list);
3463
3464	return netdev_mc_count(netdev);
3465}
3466
3467/**
3468 * igb_write_uc_addr_list - write unicast addresses to RAR table
3469 * @netdev: network interface device structure
3470 *
3471 * Writes unicast address list to the RAR table.
3472 * Returns: -ENOMEM on failure/insufficient address space
3473 *                0 on no addresses written
3474 *                X on writing X addresses to the RAR table
3475 **/
3476static int igb_write_uc_addr_list(struct net_device *netdev)
3477{
3478	struct igb_adapter *adapter = netdev_priv(netdev);
3479	struct e1000_hw *hw = &adapter->hw;
3480	unsigned int vfn = adapter->vfs_allocated_count;
3481	unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
3482	int count = 0;
3483
3484	/* return ENOMEM indicating insufficient memory for addresses */
3485	if (netdev_uc_count(netdev) > rar_entries)
3486		return -ENOMEM;
3487
3488	if (!netdev_uc_empty(netdev) && rar_entries) {
3489		struct netdev_hw_addr *ha;
3490
3491		netdev_for_each_uc_addr(ha, netdev) {
3492			if (!rar_entries)
3493				break;
3494			igb_rar_set_qsel(adapter, ha->addr,
3495			                 rar_entries--,
3496			                 vfn);
3497			count++;
3498		}
3499	}
3500	/* write the addresses in reverse order to avoid write combining */
3501	for (; rar_entries > 0 ; rar_entries--) {
3502		wr32(E1000_RAH(rar_entries), 0);
3503		wr32(E1000_RAL(rar_entries), 0);
3504	}
3505	wrfl();
3506
3507	return count;
3508}
3509
3510/**
3511 * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
3512 * @netdev: network interface device structure
3513 *
3514 * The set_rx_mode entry point is called whenever the unicast or multicast
3515 * address lists or the network interface flags are updated.  This routine is
3516 * responsible for configuring the hardware for proper unicast, multicast,
3517 * promiscuous mode, and all-multi behavior.
3518 **/
3519static void igb_set_rx_mode(struct net_device *netdev)
3520{
3521	struct igb_adapter *adapter = netdev_priv(netdev);
3522	struct e1000_hw *hw = &adapter->hw;
3523	unsigned int vfn = adapter->vfs_allocated_count;
3524	u32 rctl, vmolr = 0;
3525	int count;
3526
3527	/* Check for Promiscuous and All Multicast modes */
3528	rctl = rd32(E1000_RCTL);
3529
3530	/* clear the effected bits */
3531	rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
3532
3533	if (netdev->flags & IFF_PROMISC) {
3534		rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
3535		vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
3536	} else {
3537		if (netdev->flags & IFF_ALLMULTI) {
3538			rctl |= E1000_RCTL_MPE;
3539			vmolr |= E1000_VMOLR_MPME;
3540		} else {
3541			/*
3542			 * Write addresses to the MTA, if the attempt fails
3543			 * then we should just turn on promiscuous mode so
3544			 * that we can at least receive multicast traffic
3545			 */
3546			count = igb_write_mc_addr_list(netdev);
3547			if (count < 0) {
3548				rctl |= E1000_RCTL_MPE;
3549				vmolr |= E1000_VMOLR_MPME;
3550			} else if (count) {
3551				vmolr |= E1000_VMOLR_ROMPE;
3552			}
3553		}
3554		/*
3555		 * Write addresses to available RAR registers, if there is not
3556		 * sufficient space to store all the addresses then enable
3557		 * unicast promiscuous mode
3558		 */
3559		count = igb_write_uc_addr_list(netdev);
3560		if (count < 0) {
3561			rctl |= E1000_RCTL_UPE;
3562			vmolr |= E1000_VMOLR_ROPE;
3563		}
3564		rctl |= E1000_RCTL_VFE;
3565	}
3566	wr32(E1000_RCTL, rctl);
3567
3568	/*
3569	 * In order to support SR-IOV and eventually VMDq it is necessary to set
3570	 * the VMOLR to enable the appropriate modes.  Without this workaround
3571	 * we will have issues with VLAN tag stripping not being done for frames
3572	 * that are only arriving because we are the default pool
3573	 */
3574	if (hw->mac.type < e1000_82576)
3575		return;
3576
3577	vmolr |= rd32(E1000_VMOLR(vfn)) &
3578	         ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
3579	wr32(E1000_VMOLR(vfn), vmolr);
3580	igb_restore_vf_multicasts(adapter);
3581}
3582
3583static void igb_check_wvbr(struct igb_adapter *adapter)
3584{
3585	struct e1000_hw *hw = &adapter->hw;
3586	u32 wvbr = 0;
3587
3588	switch (hw->mac.type) {
3589	case e1000_82576:
3590	case e1000_i350:
3591		if (!(wvbr = rd32(E1000_WVBR)))
3592			return;
3593		break;
3594	default:
3595		break;
3596	}
3597
3598	adapter->wvbr |= wvbr;
3599}
3600
3601#define IGB_STAGGERED_QUEUE_OFFSET 8
3602
3603static void igb_spoof_check(struct igb_adapter *adapter)
3604{
3605	int j;
3606
3607	if (!adapter->wvbr)
3608		return;
3609
3610	for(j = 0; j < adapter->vfs_allocated_count; j++) {
3611		if (adapter->wvbr & (1 << j) ||
3612		    adapter->wvbr & (1 << (j + IGB_STAGGERED_QUEUE_OFFSET))) {
3613			dev_warn(&adapter->pdev->dev,
3614				"Spoof event(s) detected on VF %d\n", j);
3615			adapter->wvbr &=
3616				~((1 << j) |
3617				  (1 << (j + IGB_STAGGERED_QUEUE_OFFSET)));
3618		}
3619	}
3620}
3621
3622/* Need to wait a few seconds after link up to get diagnostic information from
3623 * the phy */
3624static void igb_update_phy_info(unsigned long data)
3625{
3626	struct igb_adapter *adapter = (struct igb_adapter *) data;
3627	igb_get_phy_info(&adapter->hw);
3628}
3629
3630/**
3631 * igb_has_link - check shared code for link and determine up/down
3632 * @adapter: pointer to driver private info
3633 **/
3634bool igb_has_link(struct igb_adapter *adapter)
3635{
3636	struct e1000_hw *hw = &adapter->hw;
3637	bool link_active = false;
3638	s32 ret_val = 0;
3639
3640	/* get_link_status is set on LSC (link status) interrupt or
3641	 * rx sequence error interrupt.  get_link_status will stay
3642	 * false until the e1000_check_for_link establishes link
3643	 * for copper adapters ONLY
3644	 */
3645	switch (hw->phy.media_type) {
3646	case e1000_media_type_copper:
3647		if (hw->mac.get_link_status) {
3648			ret_val = hw->mac.ops.check_for_link(hw);
3649			link_active = !hw->mac.get_link_status;
3650		} else {
3651			link_active = true;
3652		}
3653		break;
3654	case e1000_media_type_internal_serdes:
3655		ret_val = hw->mac.ops.check_for_link(hw);
3656		link_active = hw->mac.serdes_has_link;
3657		break;
3658	default:
3659	case e1000_media_type_unknown:
3660		break;
3661	}
3662
3663	return link_active;
3664}
3665
3666static bool igb_thermal_sensor_event(struct e1000_hw *hw, u32 event)
3667{
3668	bool ret = false;
3669	u32 ctrl_ext, thstat;
3670
3671	/* check for thermal sensor event on i350, copper only */
3672	if (hw->mac.type == e1000_i350) {
3673		thstat = rd32(E1000_THSTAT);
3674		ctrl_ext = rd32(E1000_CTRL_EXT);
3675
3676		if ((hw->phy.media_type == e1000_media_type_copper) &&
3677		    !(ctrl_ext & E1000_CTRL_EXT_LINK_MODE_SGMII)) {
3678			ret = !!(thstat & event);
3679		}
3680	}
3681
3682	return ret;
3683}
3684
3685/**
3686 * igb_watchdog - Timer Call-back
3687 * @data: pointer to adapter cast into an unsigned long
3688 **/
3689static void igb_watchdog(unsigned long data)
3690{
3691	struct igb_adapter *adapter = (struct igb_adapter *)data;
3692	/* Do the rest outside of interrupt context */
3693	schedule_work(&adapter->watchdog_task);
3694}
3695
3696static void igb_watchdog_task(struct work_struct *work)
3697{
3698	struct igb_adapter *adapter = container_of(work,
3699	                                           struct igb_adapter,
3700                                                   watchdog_task);
3701	struct e1000_hw *hw = &adapter->hw;
3702	struct net_device *netdev = adapter->netdev;
3703	u32 link;
3704	int i;
3705
3706	link = igb_has_link(adapter);
3707	if (link) {
3708		/* Cancel scheduled suspend requests. */
3709		pm_runtime_resume(netdev->dev.parent);
3710
3711		if (!netif_carrier_ok(netdev)) {
3712			u32 ctrl;
3713			hw->mac.ops.get_speed_and_duplex(hw,
3714			                                 &adapter->link_speed,
3715			                                 &adapter->link_duplex);
3716
3717			ctrl = rd32(E1000_CTRL);
3718			/* Links status message must follow this format */
3719			printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s "
3720			       "Duplex, Flow Control: %s\n",
3721			       netdev->name,
3722			       adapter->link_speed,
3723			       adapter->link_duplex == FULL_DUPLEX ?
3724			       "Full" : "Half",
3725			       (ctrl & E1000_CTRL_TFCE) &&
3726			       (ctrl & E1000_CTRL_RFCE) ? "RX/TX" :
3727			       (ctrl & E1000_CTRL_RFCE) ?  "RX" :
3728			       (ctrl & E1000_CTRL_TFCE) ?  "TX" : "None");
3729
3730			/* check for thermal sensor event */
3731			if (igb_thermal_sensor_event(hw,
3732			    E1000_THSTAT_LINK_THROTTLE)) {
3733				netdev_info(netdev, "The network adapter link "
3734					    "speed was downshifted because it "
3735					    "overheated\n");
3736			}
3737
3738			/* adjust timeout factor according to speed/duplex */
3739			adapter->tx_timeout_factor = 1;
3740			switch (adapter->link_speed) {
3741			case SPEED_10:
3742				adapter->tx_timeout_factor = 14;
3743				break;
3744			case SPEED_100:
3745				/* maybe add some timeout factor ? */
3746				break;
3747			}
3748
3749			netif_carrier_on(netdev);
3750
3751			igb_ping_all_vfs(adapter);
3752			igb_check_vf_rate_limit(adapter);
3753
3754			/* link state has changed, schedule phy info update */
3755			if (!test_bit(__IGB_DOWN, &adapter->state))
3756				mod_timer(&adapter->phy_info_timer,
3757					  round_jiffies(jiffies + 2 * HZ));
3758		}
3759	} else {
3760		if (netif_carrier_ok(netdev)) {
3761			adapter->link_speed = 0;
3762			adapter->link_duplex = 0;
3763
3764			/* check for thermal sensor event */
3765			if (igb_thermal_sensor_event(hw,
3766			    E1000_THSTAT_PWR_DOWN)) {
3767				netdev_err(netdev, "The network adapter was "
3768					   "stopped because it overheated\n");
3769			}
3770
3771			/* Links status message must follow this format */
3772			printk(KERN_INFO "igb: %s NIC Link is Down\n",
3773			       netdev->name);
3774			netif_carrier_off(netdev);
3775
3776			igb_ping_all_vfs(adapter);
3777
3778			/* link state has changed, schedule phy info update */
3779			if (!test_bit(__IGB_DOWN, &adapter->state))
3780				mod_timer(&adapter->phy_info_timer,
3781					  round_jiffies(jiffies + 2 * HZ));
3782
3783			pm_schedule_suspend(netdev->dev.parent,
3784					    MSEC_PER_SEC * 5);
3785		}
3786	}
3787
3788	spin_lock(&adapter->stats64_lock);
3789	igb_update_stats(adapter, &adapter->stats64);
3790	spin_unlock(&adapter->stats64_lock);
3791
3792	for (i = 0; i < adapter->num_tx_queues; i++) {
3793		struct igb_ring *tx_ring = adapter->tx_ring[i];
3794		if (!netif_carrier_ok(netdev)) {
3795			/* We've lost link, so the controller stops DMA,
3796			 * but we've got queued Tx work that's never going
3797			 * to get done, so reset controller to flush Tx.
3798			 * (Do the reset outside of interrupt context). */
3799			if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3800				adapter->tx_timeout_count++;
3801				schedule_work(&adapter->reset_task);
3802				/* return immediately since reset is imminent */
3803				return;
3804			}
3805		}
3806
3807		/* Force detection of hung controller every watchdog period */
3808		set_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
3809	}
3810
3811	/* Cause software interrupt to ensure rx ring is cleaned */
3812	if (adapter->msix_entries) {
3813		u32 eics = 0;
3814		for (i = 0; i < adapter->num_q_vectors; i++)
3815			eics |= adapter->q_vector[i]->eims_value;
3816		wr32(E1000_EICS, eics);
3817	} else {
3818		wr32(E1000_ICS, E1000_ICS_RXDMT0);
3819	}
3820
3821	igb_spoof_check(adapter);
3822
3823	/* Reset the timer */
3824	if (!test_bit(__IGB_DOWN, &adapter->state))
3825		mod_timer(&adapter->watchdog_timer,
3826			  round_jiffies(jiffies + 2 * HZ));
3827}
3828
3829enum latency_range {
3830	lowest_latency = 0,
3831	low_latency = 1,
3832	bulk_latency = 2,
3833	latency_invalid = 255
3834};
3835
3836/**
3837 * igb_update_ring_itr - update the dynamic ITR value based on packet size
3838 *
3839 *      Stores a new ITR value based on strictly on packet size.  This
3840 *      algorithm is less sophisticated than that used in igb_update_itr,
3841 *      due to the difficulty of synchronizing statistics across multiple
3842 *      receive rings.  The divisors and thresholds used by this function
3843 *      were determined based on theoretical maximum wire speed and testing
3844 *      data, in order to minimize response time while increasing bulk
3845 *      throughput.
3846 *      This functionality is controlled by the InterruptThrottleRate module
3847 *      parameter (see igb_param.c)
3848 *      NOTE:  This function is called only when operating in a multiqueue
3849 *             receive environment.
3850 * @q_vector: pointer to q_vector
3851 **/
3852static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3853{
3854	int new_val = q_vector->itr_val;
3855	int avg_wire_size = 0;
3856	struct igb_adapter *adapter = q_vector->adapter;
3857	unsigned int packets;
3858
3859	/* For non-gigabit speeds, just fix the interrupt rate at 4000
3860	 * ints/sec - ITR timer value of 120 ticks.
3861	 */
3862	if (adapter->link_speed != SPEED_1000) {
3863		new_val = IGB_4K_ITR;
3864		goto set_itr_val;
3865	}
3866
3867	packets = q_vector->rx.total_packets;
3868	if (packets)
3869		avg_wire_size = q_vector->rx.total_bytes / packets;
3870
3871	packets = q_vector->tx.total_packets;
3872	if (packets)
3873		avg_wire_size = max_t(u32, avg_wire_size,
3874				      q_vector->tx.total_bytes / packets);
3875
3876	/* if avg_wire_size isn't set no work was done */
3877	if (!avg_wire_size)
3878		goto clear_counts;
3879
3880	/* Add 24 bytes to size to account for CRC, preamble, and gap */
3881	avg_wire_size += 24;
3882
3883	/* Don't starve jumbo frames */
3884	avg_wire_size = min(avg_wire_size, 3000);
3885
3886	/* Give a little boost to mid-size frames */
3887	if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3888		new_val = avg_wire_size / 3;
3889	else
3890		new_val = avg_wire_size / 2;
3891
3892	/* conservative mode (itr 3) eliminates the lowest_latency setting */
3893	if (new_val < IGB_20K_ITR &&
3894	    ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
3895	     (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
3896		new_val = IGB_20K_ITR;
3897
3898set_itr_val:
3899	if (new_val != q_vector->itr_val) {
3900		q_vector->itr_val = new_val;
3901		q_vector->set_itr = 1;
3902	}
3903clear_counts:
3904	q_vector->rx.total_bytes = 0;
3905	q_vector->rx.total_packets = 0;
3906	q_vector->tx.total_bytes = 0;
3907	q_vector->tx.total_packets = 0;
3908}
3909
3910/**
3911 * igb_update_itr - update the dynamic ITR value based on statistics
3912 *      Stores a new ITR value based on packets and byte
3913 *      counts during the last interrupt.  The advantage of per interrupt
3914 *      computation is faster updates and more accurate ITR for the current
3915 *      traffic pattern.  Constants in this function were computed
3916 *      based on theoretical maximum wire speed and thresholds were set based
3917 *      on testing data as well as attempting to minimize response time
3918 *      while increasing bulk throughput.
3919 *      this functionality is controlled by the InterruptThrottleRate module
3920 *      parameter (see igb_param.c)
3921 *      NOTE:  These calculations are only valid when operating in a single-
3922 *             queue environment.
3923 * @q_vector: pointer to q_vector
3924 * @ring_container: ring info to update the itr for
3925 **/
3926static void igb_update_itr(struct igb_q_vector *q_vector,
3927			   struct igb_ring_container *ring_container)
3928{
3929	unsigned int packets = ring_container->total_packets;
3930	unsigned int bytes = ring_container->total_bytes;
3931	u8 itrval = ring_container->itr;
3932
3933	/* no packets, exit with status unchanged */
3934	if (packets == 0)
3935		return;
3936
3937	switch (itrval) {
3938	case lowest_latency:
3939		/* handle TSO and jumbo frames */
3940		if (bytes/packets > 8000)
3941			itrval = bulk_latency;
3942		else if ((packets < 5) && (bytes > 512))
3943			itrval = low_latency;
3944		break;
3945	case low_latency:  /* 50 usec aka 20000 ints/s */
3946		if (bytes > 10000) {
3947			/* this if handles the TSO accounting */
3948			if (bytes/packets > 8000) {
3949				itrval = bulk_latency;
3950			} else if ((packets < 10) || ((bytes/packets) > 1200)) {
3951				itrval = bulk_latency;
3952			} else if ((packets > 35)) {
3953				itrval = lowest_latency;
3954			}
3955		} else if (bytes/packets > 2000) {
3956			itrval = bulk_latency;
3957		} else if (packets <= 2 && bytes < 512) {
3958			itrval = lowest_latency;
3959		}
3960		break;
3961	case bulk_latency: /* 250 usec aka 4000 ints/s */
3962		if (bytes > 25000) {
3963			if (packets > 35)
3964				itrval = low_latency;
3965		} else if (bytes < 1500) {
3966			itrval = low_latency;
3967		}
3968		break;
3969	}
3970
3971	/* clear work counters since we have the values we need */
3972	ring_container->total_bytes = 0;
3973	ring_container->total_packets = 0;
3974
3975	/* write updated itr to ring container */
3976	ring_container->itr = itrval;
3977}
3978
3979static void igb_set_itr(struct igb_q_vector *q_vector)
3980{
3981	struct igb_adapter *adapter = q_vector->adapter;
3982	u32 new_itr = q_vector->itr_val;
3983	u8 current_itr = 0;
3984
3985	/* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3986	if (adapter->link_speed != SPEED_1000) {
3987		current_itr = 0;
3988		new_itr = IGB_4K_ITR;
3989		goto set_itr_now;
3990	}
3991
3992	igb_update_itr(q_vector, &q_vector->tx);
3993	igb_update_itr(q_vector, &q_vector->rx);
3994
3995	current_itr = max(q_vector->rx.itr, q_vector->tx.itr);
3996
3997	/* conservative mode (itr 3) eliminates the lowest_latency setting */
3998	if (current_itr == lowest_latency &&
3999	    ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
4000	     (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
4001		current_itr = low_latency;
4002
4003	switch (current_itr) {
4004	/* counts and packets in update_itr are dependent on these numbers */
4005	case lowest_latency:
4006		new_itr = IGB_70K_ITR; /* 70,000 ints/sec */
4007		break;
4008	case low_latency:
4009		new_itr = IGB_20K_ITR; /* 20,000 ints/sec */
4010		break;
4011	case bulk_latency:
4012		new_itr = IGB_4K_ITR;  /* 4,000 ints/sec */
4013		break;
4014	default:
4015		break;
4016	}
4017
4018set_itr_now:
4019	if (new_itr != q_vector->itr_val) {
4020		/* this attempts to bias the interrupt rate towards Bulk
4021		 * by adding intermediate steps when interrupt rate is
4022		 * increasing */
4023		new_itr = new_itr > q_vector->itr_val ?
4024		             max((new_itr * q_vector->itr_val) /
4025		                 (new_itr + (q_vector->itr_val >> 2)),
4026				 new_itr) :
4027			     new_itr;
4028		/* Don't write the value here; it resets the adapter's
4029		 * internal timer, and causes us to delay far longer than
4030		 * we should between interrupts.  Instead, we write the ITR
4031		 * value at the beginning of the next interrupt so the timing
4032		 * ends up being correct.
4033		 */
4034		q_vector->itr_val = new_itr;
4035		q_vector->set_itr = 1;
4036	}
4037}
4038
4039static void igb_tx_ctxtdesc(struct igb_ring *tx_ring, u32 vlan_macip_lens,
4040			    u32 type_tucmd, u32 mss_l4len_idx)
4041{
4042	struct e1000_adv_tx_context_desc *context_desc;
4043	u16 i = tx_ring->next_to_use;
4044
4045	context_desc = IGB_TX_CTXTDESC(tx_ring, i);
4046
4047	i++;
4048	tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
4049
4050	/* set bits to identify this as an advanced context descriptor */
4051	type_tucmd |= E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
4052
4053	/* For 82575, context index must be unique per ring. */
4054	if (test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
4055		mss_l4len_idx |= tx_ring->reg_idx << 4;
4056
4057	context_desc->vlan_macip_lens	= cpu_to_le32(vlan_macip_lens);
4058	context_desc->seqnum_seed	= 0;
4059	context_desc->type_tucmd_mlhl	= cpu_to_le32(type_tucmd);
4060	context_desc->mss_l4len_idx	= cpu_to_le32(mss_l4len_idx);
4061}
4062
4063static int igb_tso(struct igb_ring *tx_ring,
4064		   struct igb_tx_buffer *first,
4065		   u8 *hdr_len)
4066{
4067	struct sk_buff *skb = first->skb;
4068	u32 vlan_macip_lens, type_tucmd;
4069	u32 mss_l4len_idx, l4len;
4070
4071	if (!skb_is_gso(skb))
4072		return 0;
4073
4074	if (skb_header_cloned(skb)) {
4075		int err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
4076		if (err)
4077			return err;
4078	}
4079
4080	/* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
4081	type_tucmd = E1000_ADVTXD_TUCMD_L4T_TCP;
4082
4083	if (first->protocol == __constant_htons(ETH_P_IP)) {
4084		struct iphdr *iph = ip_hdr(skb);
4085		iph->tot_len = 0;
4086		iph->check = 0;
4087		tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
4088							 iph->daddr, 0,
4089							 IPPROTO_TCP,
4090							 0);
4091		type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
4092		first->tx_flags |= IGB_TX_FLAGS_TSO |
4093				   IGB_TX_FLAGS_CSUM |
4094				   IGB_TX_FLAGS_IPV4;
4095	} else if (skb_is_gso_v6(skb)) {
4096		ipv6_hdr(skb)->payload_len = 0;
4097		tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
4098						       &ipv6_hdr(skb)->daddr,
4099						       0, IPPROTO_TCP, 0);
4100		first->tx_flags |= IGB_TX_FLAGS_TSO |
4101				   IGB_TX_FLAGS_CSUM;
4102	}
4103
4104	/* compute header lengths */
4105	l4len = tcp_hdrlen(skb);
4106	*hdr_len = skb_transport_offset(skb) + l4len;
4107
4108	/* update gso size and bytecount with header size */
4109	first->gso_segs = skb_shinfo(skb)->gso_segs;
4110	first->bytecount += (first->gso_segs - 1) * *hdr_len;
4111
4112	/* MSS L4LEN IDX */
4113	mss_l4len_idx = l4len << E1000_ADVTXD_L4LEN_SHIFT;
4114	mss_l4len_idx |= skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT;
4115
4116	/* VLAN MACLEN IPLEN */
4117	vlan_macip_lens = skb_network_header_len(skb);
4118	vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
4119	vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
4120
4121	igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
4122
4123	return 1;
4124}
4125
4126static void igb_tx_csum(struct igb_ring *tx_ring, struct igb_tx_buffer *first)
4127{
4128	struct sk_buff *skb = first->skb;
4129	u32 vlan_macip_lens = 0;
4130	u32 mss_l4len_idx = 0;
4131	u32 type_tucmd = 0;
4132
4133	if (skb->ip_summed != CHECKSUM_PARTIAL) {
4134		if (!(first->tx_flags & IGB_TX_FLAGS_VLAN))
4135			return;
4136	} else {
4137		u8 l4_hdr = 0;
4138		switch (first->protocol) {
4139		case __constant_htons(ETH_P_IP):
4140			vlan_macip_lens |= skb_network_header_len(skb);
4141			type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
4142			l4_hdr = ip_hdr(skb)->protocol;
4143			break;
4144		case __constant_htons(ETH_P_IPV6):
4145			vlan_macip_lens |= skb_network_header_len(skb);
4146			l4_hdr = ipv6_hdr(skb)->nexthdr;
4147			break;
4148		default:
4149			if (unlikely(net_ratelimit())) {
4150				dev_warn(tx_ring->dev,
4151				 "partial checksum but proto=%x!\n",
4152				 first->protocol);
4153			}
4154			break;
4155		}
4156
4157		switch (l4_hdr) {
4158		case IPPROTO_TCP:
4159			type_tucmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
4160			mss_l4len_idx = tcp_hdrlen(skb) <<
4161					E1000_ADVTXD_L4LEN_SHIFT;
4162			break;
4163		case IPPROTO_SCTP:
4164			type_tucmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
4165			mss_l4len_idx = sizeof(struct sctphdr) <<
4166					E1000_ADVTXD_L4LEN_SHIFT;
4167			break;
4168		case IPPROTO_UDP:
4169			mss_l4len_idx = sizeof(struct udphdr) <<
4170					E1000_ADVTXD_L4LEN_SHIFT;
4171			break;
4172		default:
4173			if (unlikely(net_ratelimit())) {
4174				dev_warn(tx_ring->dev,
4175				 "partial checksum but l4 proto=%x!\n",
4176				 l4_hdr);
4177			}
4178			break;
4179		}
4180
4181		/* update TX checksum flag */
4182		first->tx_flags |= IGB_TX_FLAGS_CSUM;
4183	}
4184
4185	vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
4186	vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
4187
4188	igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
4189}
4190
4191static __le32 igb_tx_cmd_type(u32 tx_flags)
4192{
4193	/* set type for advanced descriptor with frame checksum insertion */
4194	__le32 cmd_type = cpu_to_le32(E1000_ADVTXD_DTYP_DATA |
4195				      E1000_ADVTXD_DCMD_IFCS |
4196				      E1000_ADVTXD_DCMD_DEXT);
4197
4198	/* set HW vlan bit if vlan is present */
4199	if (tx_flags & IGB_TX_FLAGS_VLAN)
4200		cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_VLE);
4201
4202	/* set timestamp bit if present */
4203	if (tx_flags & IGB_TX_FLAGS_TSTAMP)
4204		cmd_type |= cpu_to_le32(E1000_ADVTXD_MAC_TSTAMP);
4205
4206	/* set segmentation bits for TSO */
4207	if (tx_flags & IGB_TX_FLAGS_TSO)
4208		cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_TSE);
4209
4210	return cmd_type;
4211}
4212
4213static void igb_tx_olinfo_status(struct igb_ring *tx_ring,
4214				 union e1000_adv_tx_desc *tx_desc,
4215				 u32 tx_flags, unsigned int paylen)
4216{
4217	u32 olinfo_status = paylen << E1000_ADVTXD_PAYLEN_SHIFT;
4218
4219	/* 82575 requires a unique index per ring if any offload is enabled */
4220	if ((tx_flags & (IGB_TX_FLAGS_CSUM | IGB_TX_FLAGS_VLAN)) &&
4221	    test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
4222		olinfo_status |= tx_ring->reg_idx << 4;
4223
4224	/* insert L4 checksum */
4225	if (tx_flags & IGB_TX_FLAGS_CSUM) {
4226		olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4227
4228		/* insert IPv4 checksum */
4229		if (tx_flags & IGB_TX_FLAGS_IPV4)
4230			olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
4231	}
4232
4233	tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
4234}
4235
4236/*
4237 * The largest size we can write to the descriptor is 65535.  In order to
4238 * maintain a power of two alignment we have to limit ourselves to 32K.
4239 */
4240#define IGB_MAX_TXD_PWR	15
4241#define IGB_MAX_DATA_PER_TXD	(1<<IGB_MAX_TXD_PWR)
4242
4243static void igb_tx_map(struct igb_ring *tx_ring,
4244		       struct igb_tx_buffer *first,
4245		       const u8 hdr_len)
4246{
4247	struct sk_buff *skb = first->skb;
4248	struct igb_tx_buffer *tx_buffer_info;
4249	union e1000_adv_tx_desc *tx_desc;
4250	dma_addr_t dma;
4251	struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0];
4252	unsigned int data_len = skb->data_len;
4253	unsigned int size = skb_headlen(skb);
4254	unsigned int paylen = skb->len - hdr_len;
4255	__le32 cmd_type;
4256	u32 tx_flags = first->tx_flags;
4257	u16 i = tx_ring->next_to_use;
4258
4259	tx_desc = IGB_TX_DESC(tx_ring, i);
4260
4261	igb_tx_olinfo_status(tx_ring, tx_desc, tx_flags, paylen);
4262	cmd_type = igb_tx_cmd_type(tx_flags);
4263
4264	dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
4265	if (dma_mapping_error(tx_ring->dev, dma))
4266		goto dma_error;
4267
4268	/* record length, and DMA address */
4269	first->length = size;
4270	first->dma = dma;
4271	tx_desc->read.buffer_addr = cpu_to_le64(dma);
4272
4273	for (;;) {
4274		while (unlikely(size > IGB_MAX_DATA_PER_TXD)) {
4275			tx_desc->read.cmd_type_len =
4276				cmd_type | cpu_to_le32(IGB_MAX_DATA_PER_TXD);
4277
4278			i++;
4279			tx_desc++;
4280			if (i == tx_ring->count) {
4281				tx_desc = IGB_TX_DESC(tx_ring, 0);
4282				i = 0;
4283			}
4284
4285			dma += IGB_MAX_DATA_PER_TXD;
4286			size -= IGB_MAX_DATA_PER_TXD;
4287
4288			tx_desc->read.olinfo_status = 0;
4289			tx_desc->read.buffer_addr = cpu_to_le64(dma);
4290		}
4291
4292		if (likely(!data_len))
4293			break;
4294
4295		tx_desc->read.cmd_type_len = cmd_type | cpu_to_le32(size);
4296
4297		i++;
4298		tx_desc++;
4299		if (i == tx_ring->count) {
4300			tx_desc = IGB_TX_DESC(tx_ring, 0);
4301			i = 0;
4302		}
4303
4304		size = skb_frag_size(frag);
4305		data_len -= size;
4306
4307		dma = skb_frag_dma_map(tx_ring->dev, frag, 0,
4308				   size, DMA_TO_DEVICE);
4309		if (dma_mapping_error(tx_ring->dev, dma))
4310			goto dma_error;
4311
4312		tx_buffer_info = &tx_ring->tx_buffer_info[i];
4313		tx_buffer_info->length = size;
4314		tx_buffer_info->dma = dma;
4315
4316		tx_desc->read.olinfo_status = 0;
4317		tx_desc->read.buffer_addr = cpu_to_le64(dma);
4318
4319		frag++;
4320	}
4321
4322	netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount);
4323
4324	/* write last descriptor with RS and EOP bits */
4325	cmd_type |= cpu_to_le32(size) | cpu_to_le32(IGB_TXD_DCMD);
4326	if (unlikely(skb->no_fcs))
4327		cmd_type &= ~(cpu_to_le32(E1000_ADVTXD_DCMD_IFCS));
4328	tx_desc->read.cmd_type_len = cmd_type;
4329
4330	/* set the timestamp */
4331	first->time_stamp = jiffies;
4332
4333	/*
4334	 * Force memory writes to complete before letting h/w know there
4335	 * are new descriptors to fetch.  (Only applicable for weak-ordered
4336	 * memory model archs, such as IA-64).
4337	 *
4338	 * We also need this memory barrier to make certain all of the
4339	 * status bits have been updated before next_to_watch is written.
4340	 */
4341	wmb();
4342
4343	/* set next_to_watch value indicating a packet is present */
4344	first->next_to_watch = tx_desc;
4345
4346	i++;
4347	if (i == tx_ring->count)
4348		i = 0;
4349
4350	tx_ring->next_to_use = i;
4351
4352	writel(i, tx_ring->tail);
4353
4354	/* we need this if more than one processor can write to our tail
4355	 * at a time, it syncronizes IO on IA64/Altix systems */
4356	mmiowb();
4357
4358	return;
4359
4360dma_error:
4361	dev_err(tx_ring->dev, "TX DMA map failed\n");
4362
4363	/* clear dma mappings for failed tx_buffer_info map */
4364	for (;;) {
4365		tx_buffer_info = &tx_ring->tx_buffer_info[i];
4366		igb_unmap_and_free_tx_resource(tx_ring, tx_buffer_info);
4367		if (tx_buffer_info == first)
4368			break;
4369		if (i == 0)
4370			i = tx_ring->count;
4371		i--;
4372	}
4373
4374	tx_ring->next_to_use = i;
4375}
4376
4377static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
4378{
4379	struct net_device *netdev = tx_ring->netdev;
4380
4381	netif_stop_subqueue(netdev, tx_ring->queue_index);
4382
4383	/* Herbert's original patch had:
4384	 *  smp_mb__after_netif_stop_queue();
4385	 * but since that doesn't exist yet, just open code it. */
4386	smp_mb();
4387
4388	/* We need to check again in a case another CPU has just
4389	 * made room available. */
4390	if (igb_desc_unused(tx_ring) < size)
4391		return -EBUSY;
4392
4393	/* A reprieve! */
4394	netif_wake_subqueue(netdev, tx_ring->queue_index);
4395
4396	u64_stats_update_begin(&tx_ring->tx_syncp2);
4397	tx_ring->tx_stats.restart_queue2++;
4398	u64_stats_update_end(&tx_ring->tx_syncp2);
4399
4400	return 0;
4401}
4402
4403static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
4404{
4405	if (igb_desc_unused(tx_ring) >= size)
4406		return 0;
4407	return __igb_maybe_stop_tx(tx_ring, size);
4408}
4409
4410netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb,
4411				struct igb_ring *tx_ring)
4412{
4413	struct igb_tx_buffer *first;
4414	int tso;
4415	u32 tx_flags = 0;
4416	__be16 protocol = vlan_get_protocol(skb);
4417	u8 hdr_len = 0;
4418
4419	/* need: 1 descriptor per page,
4420	 *       + 2 desc gap to keep tail from touching head,
4421	 *       + 1 desc for skb->data,
4422	 *       + 1 desc for context descriptor,
4423	 * otherwise try next time */
4424	if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
4425		/* this is a hard error */
4426		return NETDEV_TX_BUSY;
4427	}
4428
4429	/* record the location of the first descriptor for this packet */
4430	first = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
4431	first->skb = skb;
4432	first->bytecount = skb->len;
4433	first->gso_segs = 1;
4434
4435	if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
4436		skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
4437		tx_flags |= IGB_TX_FLAGS_TSTAMP;
4438	}
4439
4440	if (vlan_tx_tag_present(skb)) {
4441		tx_flags |= IGB_TX_FLAGS_VLAN;
4442		tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
4443	}
4444
4445	/* record initial flags and protocol */
4446	first->tx_flags = tx_flags;
4447	first->protocol = protocol;
4448
4449	tso = igb_tso(tx_ring, first, &hdr_len);
4450	if (tso < 0)
4451		goto out_drop;
4452	else if (!tso)
4453		igb_tx_csum(tx_ring, first);
4454
4455	igb_tx_map(tx_ring, first, hdr_len);
4456
4457	/* Make sure there is space in the ring for the next send. */
4458	igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
4459
4460	return NETDEV_TX_OK;
4461
4462out_drop:
4463	igb_unmap_and_free_tx_resource(tx_ring, first);
4464
4465	return NETDEV_TX_OK;
4466}
4467
4468static inline struct igb_ring *igb_tx_queue_mapping(struct igb_adapter *adapter,
4469						    struct sk_buff *skb)
4470{
4471	unsigned int r_idx = skb->queue_mapping;
4472
4473	if (r_idx >= adapter->num_tx_queues)
4474		r_idx = r_idx % adapter->num_tx_queues;
4475
4476	return adapter->tx_ring[r_idx];
4477}
4478
4479static netdev_tx_t igb_xmit_frame(struct sk_buff *skb,
4480				  struct net_device *netdev)
4481{
4482	struct igb_adapter *adapter = netdev_priv(netdev);
4483
4484	if (test_bit(__IGB_DOWN, &adapter->state)) {
4485		dev_kfree_skb_any(skb);
4486		return NETDEV_TX_OK;
4487	}
4488
4489	if (skb->len <= 0) {
4490		dev_kfree_skb_any(skb);
4491		return NETDEV_TX_OK;
4492	}
4493
4494	/*
4495	 * The minimum packet size with TCTL.PSP set is 17 so pad the skb
4496	 * in order to meet this minimum size requirement.
4497	 */
4498	if (skb->len < 17) {
4499		if (skb_padto(skb, 17))
4500			return NETDEV_TX_OK;
4501		skb->len = 17;
4502	}
4503
4504	return igb_xmit_frame_ring(skb, igb_tx_queue_mapping(adapter, skb));
4505}
4506
4507/**
4508 * igb_tx_timeout - Respond to a Tx Hang
4509 * @netdev: network interface device structure
4510 **/
4511static void igb_tx_timeout(struct net_device *netdev)
4512{
4513	struct igb_adapter *adapter = netdev_priv(netdev);
4514	struct e1000_hw *hw = &adapter->hw;
4515
4516	/* Do the reset outside of interrupt context */
4517	adapter->tx_timeout_count++;
4518
4519	if (hw->mac.type >= e1000_82580)
4520		hw->dev_spec._82575.global_device_reset = true;
4521
4522	schedule_work(&adapter->reset_task);
4523	wr32(E1000_EICS,
4524	     (adapter->eims_enable_mask & ~adapter->eims_other));
4525}
4526
4527static void igb_reset_task(struct work_struct *work)
4528{
4529	struct igb_adapter *adapter;
4530	adapter = container_of(work, struct igb_adapter, reset_task);
4531
4532	igb_dump(adapter);
4533	netdev_err(adapter->netdev, "Reset adapter\n");
4534	igb_reinit_locked(adapter);
4535}
4536
4537/**
4538 * igb_get_stats64 - Get System Network Statistics
4539 * @netdev: network interface device structure
4540 * @stats: rtnl_link_stats64 pointer
4541 *
4542 **/
4543static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *netdev,
4544						 struct rtnl_link_stats64 *stats)
4545{
4546	struct igb_adapter *adapter = netdev_priv(netdev);
4547
4548	spin_lock(&adapter->stats64_lock);
4549	igb_update_stats(adapter, &adapter->stats64);
4550	memcpy(stats, &adapter->stats64, sizeof(*stats));
4551	spin_unlock(&adapter->stats64_lock);
4552
4553	return stats;
4554}
4555
4556/**
4557 * igb_change_mtu - Change the Maximum Transfer Unit
4558 * @netdev: network interface device structure
4559 * @new_mtu: new value for maximum frame size
4560 *
4561 * Returns 0 on success, negative on failure
4562 **/
4563static int igb_change_mtu(struct net_device *netdev, int new_mtu)
4564{
4565	struct igb_adapter *adapter = netdev_priv(netdev);
4566	struct pci_dev *pdev = adapter->pdev;
4567	int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
4568
4569	if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
4570		dev_err(&pdev->dev, "Invalid MTU setting\n");
4571		return -EINVAL;
4572	}
4573
4574#define MAX_STD_JUMBO_FRAME_SIZE 9238
4575	if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
4576		dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
4577		return -EINVAL;
4578	}
4579
4580	while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
4581		msleep(1);
4582
4583	/* igb_down has a dependency on max_frame_size */
4584	adapter->max_frame_size = max_frame;
4585
4586	if (netif_running(netdev))
4587		igb_down(adapter);
4588
4589	dev_info(&pdev->dev, "changing MTU from %d to %d\n",
4590		 netdev->mtu, new_mtu);
4591	netdev->mtu = new_mtu;
4592
4593	if (netif_running(netdev))
4594		igb_up(adapter);
4595	else
4596		igb_reset(adapter);
4597
4598	clear_bit(__IGB_RESETTING, &adapter->state);
4599
4600	return 0;
4601}
4602
4603/**
4604 * igb_update_stats - Update the board statistics counters
4605 * @adapter: board private structure
4606 **/
4607
4608void igb_update_stats(struct igb_adapter *adapter,
4609		      struct rtnl_link_stats64 *net_stats)
4610{
4611	struct e1000_hw *hw = &adapter->hw;
4612	struct pci_dev *pdev = adapter->pdev;
4613	u32 reg, mpc;
4614	u16 phy_tmp;
4615	int i;
4616	u64 bytes, packets;
4617	unsigned int start;
4618	u64 _bytes, _packets;
4619
4620#define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
4621
4622	/*
4623	 * Prevent stats update while adapter is being reset, or if the pci
4624	 * connection is down.
4625	 */
4626	if (adapter->link_speed == 0)
4627		return;
4628	if (pci_channel_offline(pdev))
4629		return;
4630
4631	bytes = 0;
4632	packets = 0;
4633	for (i = 0; i < adapter->num_rx_queues; i++) {
4634		u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
4635		struct igb_ring *ring = adapter->rx_ring[i];
4636
4637		ring->rx_stats.drops += rqdpc_tmp;
4638		net_stats->rx_fifo_errors += rqdpc_tmp;
4639
4640		do {
4641			start = u64_stats_fetch_begin_bh(&ring->rx_syncp);
4642			_bytes = ring->rx_stats.bytes;
4643			_packets = ring->rx_stats.packets;
4644		} while (u64_stats_fetch_retry_bh(&ring->rx_syncp, start));
4645		bytes += _bytes;
4646		packets += _packets;
4647	}
4648
4649	net_stats->rx_bytes = bytes;
4650	net_stats->rx_packets = packets;
4651
4652	bytes = 0;
4653	packets = 0;
4654	for (i = 0; i < adapter->num_tx_queues; i++) {
4655		struct igb_ring *ring = adapter->tx_ring[i];
4656		do {
4657			start = u64_stats_fetch_begin_bh(&ring->tx_syncp);
4658			_bytes = ring->tx_stats.bytes;
4659			_packets = ring->tx_stats.packets;
4660		} while (u64_stats_fetch_retry_bh(&ring->tx_syncp, start));
4661		bytes += _bytes;
4662		packets += _packets;
4663	}
4664	net_stats->tx_bytes = bytes;
4665	net_stats->tx_packets = packets;
4666
4667	/* read stats registers */
4668	adapter->stats.crcerrs += rd32(E1000_CRCERRS);
4669	adapter->stats.gprc += rd32(E1000_GPRC);
4670	adapter->stats.gorc += rd32(E1000_GORCL);
4671	rd32(E1000_GORCH); /* clear GORCL */
4672	adapter->stats.bprc += rd32(E1000_BPRC);
4673	adapter->stats.mprc += rd32(E1000_MPRC);
4674	adapter->stats.roc += rd32(E1000_ROC);
4675
4676	adapter->stats.prc64 += rd32(E1000_PRC64);
4677	adapter->stats.prc127 += rd32(E1000_PRC127);
4678	adapter->stats.prc255 += rd32(E1000_PRC255);
4679	adapter->stats.prc511 += rd32(E1000_PRC511);
4680	adapter->stats.prc1023 += rd32(E1000_PRC1023);
4681	adapter->stats.prc1522 += rd32(E1000_PRC1522);
4682	adapter->stats.symerrs += rd32(E1000_SYMERRS);
4683	adapter->stats.sec += rd32(E1000_SEC);
4684
4685	mpc = rd32(E1000_MPC);
4686	adapter->stats.mpc += mpc;
4687	net_stats->rx_fifo_errors += mpc;
4688	adapter->stats.scc += rd32(E1000_SCC);
4689	adapter->stats.ecol += rd32(E1000_ECOL);
4690	adapter->stats.mcc += rd32(E1000_MCC);
4691	adapter->stats.latecol += rd32(E1000_LATECOL);
4692	adapter->stats.dc += rd32(E1000_DC);
4693	adapter->stats.rlec += rd32(E1000_RLEC);
4694	adapter->stats.xonrxc += rd32(E1000_XONRXC);
4695	adapter->stats.xontxc += rd32(E1000_XONTXC);
4696	adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
4697	adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
4698	adapter->stats.fcruc += rd32(E1000_FCRUC);
4699	adapter->stats.gptc += rd32(E1000_GPTC);
4700	adapter->stats.gotc += rd32(E1000_GOTCL);
4701	rd32(E1000_GOTCH); /* clear GOTCL */
4702	adapter->stats.rnbc += rd32(E1000_RNBC);
4703	adapter->stats.ruc += rd32(E1000_RUC);
4704	adapter->stats.rfc += rd32(E1000_RFC);
4705	adapter->stats.rjc += rd32(E1000_RJC);
4706	adapter->stats.tor += rd32(E1000_TORH);
4707	adapter->stats.tot += rd32(E1000_TOTH);
4708	adapter->stats.tpr += rd32(E1000_TPR);
4709
4710	adapter->stats.ptc64 += rd32(E1000_PTC64);
4711	adapter->stats.ptc127 += rd32(E1000_PTC127);
4712	adapter->stats.ptc255 += rd32(E1000_PTC255);
4713	adapter->stats.ptc511 += rd32(E1000_PTC511);
4714	adapter->stats.ptc1023 += rd32(E1000_PTC1023);
4715	adapter->stats.ptc1522 += rd32(E1000_PTC1522);
4716
4717	adapter->stats.mptc += rd32(E1000_MPTC);
4718	adapter->stats.bptc += rd32(E1000_BPTC);
4719
4720	adapter->stats.tpt += rd32(E1000_TPT);
4721	adapter->stats.colc += rd32(E1000_COLC);
4722
4723	adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
4724	/* read internal phy specific stats */
4725	reg = rd32(E1000_CTRL_EXT);
4726	if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
4727		adapter->stats.rxerrc += rd32(E1000_RXERRC);
4728		adapter->stats.tncrs += rd32(E1000_TNCRS);
4729	}
4730
4731	adapter->stats.tsctc += rd32(E1000_TSCTC);
4732	adapter->stats.tsctfc += rd32(E1000_TSCTFC);
4733
4734	adapter->stats.iac += rd32(E1000_IAC);
4735	adapter->stats.icrxoc += rd32(E1000_ICRXOC);
4736	adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
4737	adapter->stats.icrxatc += rd32(E1000_ICRXATC);
4738	adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
4739	adapter->stats.ictxatc += rd32(E1000_ICTXATC);
4740	adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
4741	adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
4742	adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
4743
4744	/* Fill out the OS statistics structure */
4745	net_stats->multicast = adapter->stats.mprc;
4746	net_stats->collisions = adapter->stats.colc;
4747
4748	/* Rx Errors */
4749
4750	/* RLEC on some newer hardware can be incorrect so build
4751	 * our own version based on RUC and ROC */
4752	net_stats->rx_errors = adapter->stats.rxerrc +
4753		adapter->stats.crcerrs + adapter->stats.algnerrc +
4754		adapter->stats.ruc + adapter->stats.roc +
4755		adapter->stats.cexterr;
4756	net_stats->rx_length_errors = adapter->stats.ruc +
4757				      adapter->stats.roc;
4758	net_stats->rx_crc_errors = adapter->stats.crcerrs;
4759	net_stats->rx_frame_errors = adapter->stats.algnerrc;
4760	net_stats->rx_missed_errors = adapter->stats.mpc;
4761
4762	/* Tx Errors */
4763	net_stats->tx_errors = adapter->stats.ecol +
4764			       adapter->stats.latecol;
4765	net_stats->tx_aborted_errors = adapter->stats.ecol;
4766	net_stats->tx_window_errors = adapter->stats.latecol;
4767	net_stats->tx_carrier_errors = adapter->stats.tncrs;
4768
4769	/* Tx Dropped needs to be maintained elsewhere */
4770
4771	/* Phy Stats */
4772	if (hw->phy.media_type == e1000_media_type_copper) {
4773		if ((adapter->link_speed == SPEED_1000) &&
4774		   (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
4775			phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
4776			adapter->phy_stats.idle_errors += phy_tmp;
4777		}
4778	}
4779
4780	/* Management Stats */
4781	adapter->stats.mgptc += rd32(E1000_MGTPTC);
4782	adapter->stats.mgprc += rd32(E1000_MGTPRC);
4783	adapter->stats.mgpdc += rd32(E1000_MGTPDC);
4784
4785	/* OS2BMC Stats */
4786	reg = rd32(E1000_MANC);
4787	if (reg & E1000_MANC_EN_BMC2OS) {
4788		adapter->stats.o2bgptc += rd32(E1000_O2BGPTC);
4789		adapter->stats.o2bspc += rd32(E1000_O2BSPC);
4790		adapter->stats.b2ospc += rd32(E1000_B2OSPC);
4791		adapter->stats.b2ogprc += rd32(E1000_B2OGPRC);
4792	}
4793}
4794
4795static irqreturn_t igb_msix_other(int irq, void *data)
4796{
4797	struct igb_adapter *adapter = data;
4798	struct e1000_hw *hw = &adapter->hw;
4799	u32 icr = rd32(E1000_ICR);
4800	/* reading ICR causes bit 31 of EICR to be cleared */
4801
4802	if (icr & E1000_ICR_DRSTA)
4803		schedule_work(&adapter->reset_task);
4804
4805	if (icr & E1000_ICR_DOUTSYNC) {
4806		/* HW is reporting DMA is out of sync */
4807		adapter->stats.doosync++;
4808		/* The DMA Out of Sync is also indication of a spoof event
4809		 * in IOV mode. Check the Wrong VM Behavior register to
4810		 * see if it is really a spoof event. */
4811		igb_check_wvbr(adapter);
4812	}
4813
4814	/* Check for a mailbox event */
4815	if (icr & E1000_ICR_VMMB)
4816		igb_msg_task(adapter);
4817
4818	if (icr & E1000_ICR_LSC) {
4819		hw->mac.get_link_status = 1;
4820		/* guard against interrupt when we're going down */
4821		if (!test_bit(__IGB_DOWN, &adapter->state))
4822			mod_timer(&adapter->watchdog_timer, jiffies + 1);
4823	}
4824
4825	wr32(E1000_EIMS, adapter->eims_other);
4826
4827	return IRQ_HANDLED;
4828}
4829
4830static void igb_write_itr(struct igb_q_vector *q_vector)
4831{
4832	struct igb_adapter *adapter = q_vector->adapter;
4833	u32 itr_val = q_vector->itr_val & 0x7FFC;
4834
4835	if (!q_vector->set_itr)
4836		return;
4837
4838	if (!itr_val)
4839		itr_val = 0x4;
4840
4841	if (adapter->hw.mac.type == e1000_82575)
4842		itr_val |= itr_val << 16;
4843	else
4844		itr_val |= E1000_EITR_CNT_IGNR;
4845
4846	writel(itr_val, q_vector->itr_register);
4847	q_vector->set_itr = 0;
4848}
4849
4850static irqreturn_t igb_msix_ring(int irq, void *data)
4851{
4852	struct igb_q_vector *q_vector = data;
4853
4854	/* Write the ITR value calculated from the previous interrupt. */
4855	igb_write_itr(q_vector);
4856
4857	napi_schedule(&q_vector->napi);
4858
4859	return IRQ_HANDLED;
4860}
4861
4862#ifdef CONFIG_IGB_DCA
4863static void igb_update_dca(struct igb_q_vector *q_vector)
4864{
4865	struct igb_adapter *adapter = q_vector->adapter;
4866	struct e1000_hw *hw = &adapter->hw;
4867	int cpu = get_cpu();
4868
4869	if (q_vector->cpu == cpu)
4870		goto out_no_update;
4871
4872	if (q_vector->tx.ring) {
4873		int q = q_vector->tx.ring->reg_idx;
4874		u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4875		if (hw->mac.type == e1000_82575) {
4876			dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4877			dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4878		} else {
4879			dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4880			dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4881			              E1000_DCA_TXCTRL_CPUID_SHIFT;
4882		}
4883		dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4884		wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4885	}
4886	if (q_vector->rx.ring) {
4887		int q = q_vector->rx.ring->reg_idx;
4888		u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4889		if (hw->mac.type == e1000_82575) {
4890			dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4891			dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4892		} else {
4893			dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4894			dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4895			              E1000_DCA_RXCTRL_CPUID_SHIFT;
4896		}
4897		dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4898		dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4899		dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4900		wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4901	}
4902	q_vector->cpu = cpu;
4903out_no_update:
4904	put_cpu();
4905}
4906
4907static void igb_setup_dca(struct igb_adapter *adapter)
4908{
4909	struct e1000_hw *hw = &adapter->hw;
4910	int i;
4911
4912	if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4913		return;
4914
4915	/* Always use CB2 mode, difference is masked in the CB driver. */
4916	wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4917
4918	for (i = 0; i < adapter->num_q_vectors; i++) {
4919		adapter->q_vector[i]->cpu = -1;
4920		igb_update_dca(adapter->q_vector[i]);
4921	}
4922}
4923
4924static int __igb_notify_dca(struct device *dev, void *data)
4925{
4926	struct net_device *netdev = dev_get_drvdata(dev);
4927	struct igb_adapter *adapter = netdev_priv(netdev);
4928	struct pci_dev *pdev = adapter->pdev;
4929	struct e1000_hw *hw = &adapter->hw;
4930	unsigned long event = *(unsigned long *)data;
4931
4932	switch (event) {
4933	case DCA_PROVIDER_ADD:
4934		/* if already enabled, don't do it again */
4935		if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4936			break;
4937		if (dca_add_requester(dev) == 0) {
4938			adapter->flags |= IGB_FLAG_DCA_ENABLED;
4939			dev_info(&pdev->dev, "DCA enabled\n");
4940			igb_setup_dca(adapter);
4941			break;
4942		}
4943		/* Fall Through since DCA is disabled. */
4944	case DCA_PROVIDER_REMOVE:
4945		if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4946			/* without this a class_device is left
4947			 * hanging around in the sysfs model */
4948			dca_remove_requester(dev);
4949			dev_info(&pdev->dev, "DCA disabled\n");
4950			adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4951			wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4952		}
4953		break;
4954	}
4955
4956	return 0;
4957}
4958
4959static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4960                          void *p)
4961{
4962	int ret_val;
4963
4964	ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4965	                                 __igb_notify_dca);
4966
4967	return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4968}
4969#endif /* CONFIG_IGB_DCA */
4970
4971#ifdef CONFIG_PCI_IOV
4972static int igb_vf_configure(struct igb_adapter *adapter, int vf)
4973{
4974	unsigned char mac_addr[ETH_ALEN];
4975	struct pci_dev *pdev = adapter->pdev;
4976	struct e1000_hw *hw = &adapter->hw;
4977	struct pci_dev *pvfdev;
4978	unsigned int device_id;
4979	u16 thisvf_devfn;
4980
4981	random_ether_addr(mac_addr);
4982	igb_set_vf_mac(adapter, vf, mac_addr);
4983
4984	switch (adapter->hw.mac.type) {
4985	case e1000_82576:
4986		device_id = IGB_82576_VF_DEV_ID;
4987		/* VF Stride for 82576 is 2 */
4988		thisvf_devfn = (pdev->devfn + 0x80 + (vf << 1)) |
4989			(pdev->devfn & 1);
4990		break;
4991	case e1000_i350:
4992		device_id = IGB_I350_VF_DEV_ID;
4993		/* VF Stride for I350 is 4 */
4994		thisvf_devfn = (pdev->devfn + 0x80 + (vf << 2)) |
4995				(pdev->devfn & 3);
4996		break;
4997	default:
4998		device_id = 0;
4999		thisvf_devfn = 0;
5000		break;
5001	}
5002
5003	pvfdev = pci_get_device(hw->vendor_id, device_id, NULL);
5004	while (pvfdev) {
5005		if (pvfdev->devfn == thisvf_devfn)
5006			break;
5007		pvfdev = pci_get_device(hw->vendor_id,
5008					device_id, pvfdev);
5009	}
5010
5011	if (pvfdev)
5012		adapter->vf_data[vf].vfdev = pvfdev;
5013	else
5014		dev_err(&pdev->dev,
5015			"Couldn't find pci dev ptr for VF %4.4x\n",
5016			thisvf_devfn);
5017	return pvfdev != NULL;
5018}
5019
5020static int igb_find_enabled_vfs(struct igb_adapter *adapter)
5021{
5022	struct e1000_hw *hw = &adapter->hw;
5023	struct pci_dev *pdev = adapter->pdev;
5024	struct pci_dev *pvfdev;
5025	u16 vf_devfn = 0;
5026	u16 vf_stride;
5027	unsigned int device_id;
5028	int vfs_found = 0;
5029
5030	switch (adapter->hw.mac.type) {
5031	case e1000_82576:
5032		device_id = IGB_82576_VF_DEV_ID;
5033		/* VF Stride for 82576 is 2 */
5034		vf_stride = 2;
5035		break;
5036	case e1000_i350:
5037		device_id = IGB_I350_VF_DEV_ID;
5038		/* VF Stride for I350 is 4 */
5039		vf_stride = 4;
5040		break;
5041	default:
5042		device_id = 0;
5043		vf_stride = 0;
5044		break;
5045	}
5046
5047	vf_devfn = pdev->devfn + 0x80;
5048	pvfdev = pci_get_device(hw->vendor_id, device_id, NULL);
5049	while (pvfdev) {
5050		if (pvfdev->devfn == vf_devfn &&
5051		    (pvfdev->bus->number >= pdev->bus->number))
5052			vfs_found++;
5053		vf_devfn += vf_stride;
5054		pvfdev = pci_get_device(hw->vendor_id,
5055					device_id, pvfdev);
5056	}
5057
5058	return vfs_found;
5059}
5060
5061static int igb_check_vf_assignment(struct igb_adapter *adapter)
5062{
5063	int i;
5064	for (i = 0; i < adapter->vfs_allocated_count; i++) {
5065		if (adapter->vf_data[i].vfdev) {
5066			if (adapter->vf_data[i].vfdev->dev_flags &
5067			    PCI_DEV_FLAGS_ASSIGNED)
5068				return true;
5069		}
5070	}
5071	return false;
5072}
5073
5074#endif
5075static void igb_ping_all_vfs(struct igb_adapter *adapter)
5076{
5077	struct e1000_hw *hw = &adapter->hw;
5078	u32 ping;
5079	int i;
5080
5081	for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
5082		ping = E1000_PF_CONTROL_MSG;
5083		if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
5084			ping |= E1000_VT_MSGTYPE_CTS;
5085		igb_write_mbx(hw, &ping, 1, i);
5086	}
5087}
5088
5089static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5090{
5091	struct e1000_hw *hw = &adapter->hw;
5092	u32 vmolr = rd32(E1000_VMOLR(vf));
5093	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5094
5095	vf_data->flags &= ~(IGB_VF_FLAG_UNI_PROMISC |
5096	                    IGB_VF_FLAG_MULTI_PROMISC);
5097	vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
5098
5099	if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
5100		vmolr |= E1000_VMOLR_MPME;
5101		vf_data->flags |= IGB_VF_FLAG_MULTI_PROMISC;
5102		*msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
5103	} else {
5104		/*
5105		 * if we have hashes and we are clearing a multicast promisc
5106		 * flag we need to write the hashes to the MTA as this step
5107		 * was previously skipped
5108		 */
5109		if (vf_data->num_vf_mc_hashes > 30) {
5110			vmolr |= E1000_VMOLR_MPME;
5111		} else if (vf_data->num_vf_mc_hashes) {
5112			int j;
5113			vmolr |= E1000_VMOLR_ROMPE;
5114			for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
5115				igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
5116		}
5117	}
5118
5119	wr32(E1000_VMOLR(vf), vmolr);
5120
5121	/* there are flags left unprocessed, likely not supported */
5122	if (*msgbuf & E1000_VT_MSGINFO_MASK)
5123		return -EINVAL;
5124
5125	return 0;
5126
5127}
5128
5129static int igb_set_vf_multicasts(struct igb_adapter *adapter,
5130				  u32 *msgbuf, u32 vf)
5131{
5132	int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5133	u16 *hash_list = (u16 *)&msgbuf[1];
5134	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5135	int i;
5136
5137	/* salt away the number of multicast addresses assigned
5138	 * to this VF for later use to restore when the PF multi cast
5139	 * list changes
5140	 */
5141	vf_data->num_vf_mc_hashes = n;
5142
5143	/* only up to 30 hash values supported */
5144	if (n > 30)
5145		n = 30;
5146
5147	/* store the hashes for later use */
5148	for (i = 0; i < n; i++)
5149		vf_data->vf_mc_hashes[i] = hash_list[i];
5150
5151	/* Flush and reset the mta with the new values */
5152	igb_set_rx_mode(adapter->netdev);
5153
5154	return 0;
5155}
5156
5157static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
5158{
5159	struct e1000_hw *hw = &adapter->hw;
5160	struct vf_data_storage *vf_data;
5161	int i, j;
5162
5163	for (i = 0; i < adapter->vfs_allocated_count; i++) {
5164		u32 vmolr = rd32(E1000_VMOLR(i));
5165		vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
5166
5167		vf_data = &adapter->vf_data[i];
5168
5169		if ((vf_data->num_vf_mc_hashes > 30) ||
5170		    (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
5171			vmolr |= E1000_VMOLR_MPME;
5172		} else if (vf_data->num_vf_mc_hashes) {
5173			vmolr |= E1000_VMOLR_ROMPE;
5174			for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
5175				igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
5176		}
5177		wr32(E1000_VMOLR(i), vmolr);
5178	}
5179}
5180
5181static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
5182{
5183	struct e1000_hw *hw = &adapter->hw;
5184	u32 pool_mask, reg, vid;
5185	int i;
5186
5187	pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5188
5189	/* Find the vlan filter for this id */
5190	for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5191		reg = rd32(E1000_VLVF(i));
5192
5193		/* remove the vf from the pool */
5194		reg &= ~pool_mask;
5195
5196		/* if pool is empty then remove entry from vfta */
5197		if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
5198		    (reg & E1000_VLVF_VLANID_ENABLE)) {
5199			reg = 0;
5200			vid = reg & E1000_VLVF_VLANID_MASK;
5201			igb_vfta_set(hw, vid, false);
5202		}
5203
5204		wr32(E1000_VLVF(i), reg);
5205	}
5206
5207	adapter->vf_data[vf].vlans_enabled = 0;
5208}
5209
5210static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
5211{
5212	struct e1000_hw *hw = &adapter->hw;
5213	u32 reg, i;
5214
5215	/* The vlvf table only exists on 82576 hardware and newer */
5216	if (hw->mac.type < e1000_82576)
5217		return -1;
5218
5219	/* we only need to do this if VMDq is enabled */
5220	if (!adapter->vfs_allocated_count)
5221		return -1;
5222
5223	/* Find the vlan filter for this id */
5224	for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5225		reg = rd32(E1000_VLVF(i));
5226		if ((reg & E1000_VLVF_VLANID_ENABLE) &&
5227		    vid == (reg & E1000_VLVF_VLANID_MASK))
5228			break;
5229	}
5230
5231	if (add) {
5232		if (i == E1000_VLVF_ARRAY_SIZE) {
5233			/* Did not find a matching VLAN ID entry that was
5234			 * enabled.  Search for a free filter entry, i.e.
5235			 * one without the enable bit set
5236			 */
5237			for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5238				reg = rd32(E1000_VLVF(i));
5239				if (!(reg & E1000_VLVF_VLANID_ENABLE))
5240					break;
5241			}
5242		}
5243		if (i < E1000_VLVF_ARRAY_SIZE) {
5244			/* Found an enabled/available entry */
5245			reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5246
5247			/* if !enabled we need to set this up in vfta */
5248			if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
5249				/* add VID to filter table */
5250				igb_vfta_set(hw, vid, true);
5251				reg |= E1000_VLVF_VLANID_ENABLE;
5252			}
5253			reg &= ~E1000_VLVF_VLANID_MASK;
5254			reg |= vid;
5255			wr32(E1000_VLVF(i), reg);
5256
5257			/* do not modify RLPML for PF devices */
5258			if (vf >= adapter->vfs_allocated_count)
5259				return 0;
5260
5261			if (!adapter->vf_data[vf].vlans_enabled) {
5262				u32 size;
5263				reg = rd32(E1000_VMOLR(vf));
5264				size = reg & E1000_VMOLR_RLPML_MASK;
5265				size += 4;
5266				reg &= ~E1000_VMOLR_RLPML_MASK;
5267				reg |= size;
5268				wr32(E1000_VMOLR(vf), reg);
5269			}
5270
5271			adapter->vf_data[vf].vlans_enabled++;
5272		}
5273	} else {
5274		if (i < E1000_VLVF_ARRAY_SIZE) {
5275			/* remove vf from the pool */
5276			reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
5277			/* if pool is empty then remove entry from vfta */
5278			if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
5279				reg = 0;
5280				igb_vfta_set(hw, vid, false);
5281			}
5282			wr32(E1000_VLVF(i), reg);
5283
5284			/* do not modify RLPML for PF devices */
5285			if (vf >= adapter->vfs_allocated_count)
5286				return 0;
5287
5288			adapter->vf_data[vf].vlans_enabled--;
5289			if (!adapter->vf_data[vf].vlans_enabled) {
5290				u32 size;
5291				reg = rd32(E1000_VMOLR(vf));
5292				size = reg & E1000_VMOLR_RLPML_MASK;
5293				size -= 4;
5294				reg &= ~E1000_VMOLR_RLPML_MASK;
5295				reg |= size;
5296				wr32(E1000_VMOLR(vf), reg);
5297			}
5298		}
5299	}
5300	return 0;
5301}
5302
5303static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
5304{
5305	struct e1000_hw *hw = &adapter->hw;
5306
5307	if (vid)
5308		wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
5309	else
5310		wr32(E1000_VMVIR(vf), 0);
5311}
5312
5313static int igb_ndo_set_vf_vlan(struct net_device *netdev,
5314			       int vf, u16 vlan, u8 qos)
5315{
5316	int err = 0;
5317	struct igb_adapter *adapter = netdev_priv(netdev);
5318
5319	if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
5320		return -EINVAL;
5321	if (vlan || qos) {
5322		err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
5323		if (err)
5324			goto out;
5325		igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
5326		igb_set_vmolr(adapter, vf, !vlan);
5327		adapter->vf_data[vf].pf_vlan = vlan;
5328		adapter->vf_data[vf].pf_qos = qos;
5329		dev_info(&adapter->pdev->dev,
5330			 "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
5331		if (test_bit(__IGB_DOWN, &adapter->state)) {
5332			dev_warn(&adapter->pdev->dev,
5333				 "The VF VLAN has been set,"
5334				 " but the PF device is not up.\n");
5335			dev_warn(&adapter->pdev->dev,
5336				 "Bring the PF device up before"
5337				 " attempting to use the VF device.\n");
5338		}
5339	} else {
5340		igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
5341				   false, vf);
5342		igb_set_vmvir(adapter, vlan, vf);
5343		igb_set_vmolr(adapter, vf, true);
5344		adapter->vf_data[vf].pf_vlan = 0;
5345		adapter->vf_data[vf].pf_qos = 0;
5346       }
5347out:
5348       return err;
5349}
5350
5351static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5352{
5353	int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5354	int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
5355
5356	return igb_vlvf_set(adapter, vid, add, vf);
5357}
5358
5359static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
5360{
5361	/* clear flags - except flag that indicates PF has set the MAC */
5362	adapter->vf_data[vf].flags &= IGB_VF_FLAG_PF_SET_MAC;
5363	adapter->vf_data[vf].last_nack = jiffies;
5364
5365	/* reset offloads to defaults */
5366	igb_set_vmolr(adapter, vf, true);
5367
5368	/* reset vlans for device */
5369	igb_clear_vf_vfta(adapter, vf);
5370	if (adapter->vf_data[vf].pf_vlan)
5371		igb_ndo_set_vf_vlan(adapter->netdev, vf,
5372				    adapter->vf_data[vf].pf_vlan,
5373				    adapter->vf_data[vf].pf_qos);
5374	else
5375		igb_clear_vf_vfta(adapter, vf);
5376
5377	/* reset multicast table array for vf */
5378	adapter->vf_data[vf].num_vf_mc_hashes = 0;
5379
5380	/* Flush and reset the mta with the new values */
5381	igb_set_rx_mode(adapter->netdev);
5382}
5383
5384static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
5385{
5386	unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5387
5388	/* generate a new mac address as we were hotplug removed/added */
5389	if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
5390		random_ether_addr(vf_mac);
5391
5392	/* process remaining reset events */
5393	igb_vf_reset(adapter, vf);
5394}
5395
5396static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
5397{
5398	struct e1000_hw *hw = &adapter->hw;
5399	unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5400	int rar_entry = hw->mac.rar_entry_count - (vf + 1);
5401	u32 reg, msgbuf[3];
5402	u8 *addr = (u8 *)(&msgbuf[1]);
5403
5404	/* process all the same items cleared in a function level reset */
5405	igb_vf_reset(adapter, vf);
5406
5407	/* set vf mac address */
5408	igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
5409
5410	/* enable transmit and receive for vf */
5411	reg = rd32(E1000_VFTE);
5412	wr32(E1000_VFTE, reg | (1 << vf));
5413	reg = rd32(E1000_VFRE);
5414	wr32(E1000_VFRE, reg | (1 << vf));
5415
5416	adapter->vf_data[vf].flags |= IGB_VF_FLAG_CTS;
5417
5418	/* reply to reset with ack and vf mac address */
5419	msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
5420	memcpy(addr, vf_mac, 6);
5421	igb_write_mbx(hw, msgbuf, 3, vf);
5422}
5423
5424static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
5425{
5426	/*
5427	 * The VF MAC Address is stored in a packed array of bytes
5428	 * starting at the second 32 bit word of the msg array
5429	 */
5430	unsigned char *addr = (char *)&msg[1];
5431	int err = -1;
5432
5433	if (is_valid_ether_addr(addr))
5434		err = igb_set_vf_mac(adapter, vf, addr);
5435
5436	return err;
5437}
5438
5439static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
5440{
5441	struct e1000_hw *hw = &adapter->hw;
5442	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5443	u32 msg = E1000_VT_MSGTYPE_NACK;
5444
5445	/* if device isn't clear to send it shouldn't be reading either */
5446	if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
5447	    time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
5448		igb_write_mbx(hw, &msg, 1, vf);
5449		vf_data->last_nack = jiffies;
5450	}
5451}
5452
5453static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
5454{
5455	struct pci_dev *pdev = adapter->pdev;
5456	u32 msgbuf[E1000_VFMAILBOX_SIZE];
5457	struct e1000_hw *hw = &adapter->hw;
5458	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5459	s32 retval;
5460
5461	retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
5462
5463	if (retval) {
5464		/* if receive failed revoke VF CTS stats and restart init */
5465		dev_err(&pdev->dev, "Error receiving message from VF\n");
5466		vf_data->flags &= ~IGB_VF_FLAG_CTS;
5467		if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5468			return;
5469		goto out;
5470	}
5471
5472	/* this is a message we already processed, do nothing */
5473	if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
5474		return;
5475
5476	/*
5477	 * until the vf completes a reset it should not be
5478	 * allowed to start any configuration.
5479	 */
5480
5481	if (msgbuf[0] == E1000_VF_RESET) {
5482		igb_vf_reset_msg(adapter, vf);
5483		return;
5484	}
5485
5486	if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
5487		if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5488			return;
5489		retval = -1;
5490		goto out;
5491	}
5492
5493	switch ((msgbuf[0] & 0xFFFF)) {
5494	case E1000_VF_SET_MAC_ADDR:
5495		retval = -EINVAL;
5496		if (!(vf_data->flags & IGB_VF_FLAG_PF_SET_MAC))
5497			retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
5498		else
5499			dev_warn(&pdev->dev,
5500				 "VF %d attempted to override administratively "
5501				 "set MAC address\nReload the VF driver to "
5502				 "resume operations\n", vf);
5503		break;
5504	case E1000_VF_SET_PROMISC:
5505		retval = igb_set_vf_promisc(adapter, msgbuf, vf);
5506		break;
5507	case E1000_VF_SET_MULTICAST:
5508		retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
5509		break;
5510	case E1000_VF_SET_LPE:
5511		retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
5512		break;
5513	case E1000_VF_SET_VLAN:
5514		retval = -1;
5515		if (vf_data->pf_vlan)
5516			dev_warn(&pdev->dev,
5517				 "VF %d attempted to override administratively "
5518				 "set VLAN tag\nReload the VF driver to "
5519				 "resume operations\n", vf);
5520		else
5521			retval = igb_set_vf_vlan(adapter, msgbuf, vf);
5522		break;
5523	default:
5524		dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
5525		retval = -1;
5526		break;
5527	}
5528
5529	msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
5530out:
5531	/* notify the VF of the results of what it sent us */
5532	if (retval)
5533		msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
5534	else
5535		msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
5536
5537	igb_write_mbx(hw, msgbuf, 1, vf);
5538}
5539
5540static void igb_msg_task(struct igb_adapter *adapter)
5541{
5542	struct e1000_hw *hw = &adapter->hw;
5543	u32 vf;
5544
5545	for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
5546		/* process any reset requests */
5547		if (!igb_check_for_rst(hw, vf))
5548			igb_vf_reset_event(adapter, vf);
5549
5550		/* process any messages pending */
5551		if (!igb_check_for_msg(hw, vf))
5552			igb_rcv_msg_from_vf(adapter, vf);
5553
5554		/* process any acks */
5555		if (!igb_check_for_ack(hw, vf))
5556			igb_rcv_ack_from_vf(adapter, vf);
5557	}
5558}
5559
5560/**
5561 *  igb_set_uta - Set unicast filter table address
5562 *  @adapter: board private structure
5563 *
5564 *  The unicast table address is a register array of 32-bit registers.
5565 *  The table is meant to be used in a way similar to how the MTA is used
5566 *  however due to certain limitations in the hardware it is necessary to
5567 *  set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscuous
5568 *  enable bit to allow vlan tag stripping when promiscuous mode is enabled
5569 **/
5570static void igb_set_uta(struct igb_adapter *adapter)
5571{
5572	struct e1000_hw *hw = &adapter->hw;
5573	int i;
5574
5575	/* The UTA table only exists on 82576 hardware and newer */
5576	if (hw->mac.type < e1000_82576)
5577		return;
5578
5579	/* we only need to do this if VMDq is enabled */
5580	if (!adapter->vfs_allocated_count)
5581		return;
5582
5583	for (i = 0; i < hw->mac.uta_reg_count; i++)
5584		array_wr32(E1000_UTA, i, ~0);
5585}
5586
5587/**
5588 * igb_intr_msi - Interrupt Handler
5589 * @irq: interrupt number
5590 * @data: pointer to a network interface device structure
5591 **/
5592static irqreturn_t igb_intr_msi(int irq, void *data)
5593{
5594	struct igb_adapter *adapter = data;
5595	struct igb_q_vector *q_vector = adapter->q_vector[0];
5596	struct e1000_hw *hw = &adapter->hw;
5597	/* read ICR disables interrupts using IAM */
5598	u32 icr = rd32(E1000_ICR);
5599
5600	igb_write_itr(q_vector);
5601
5602	if (icr & E1000_ICR_DRSTA)
5603		schedule_work(&adapter->reset_task);
5604
5605	if (icr & E1000_ICR_DOUTSYNC) {
5606		/* HW is reporting DMA is out of sync */
5607		adapter->stats.doosync++;
5608	}
5609
5610	if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5611		hw->mac.get_link_status = 1;
5612		if (!test_bit(__IGB_DOWN, &adapter->state))
5613			mod_timer(&adapter->watchdog_timer, jiffies + 1);
5614	}
5615
5616	napi_schedule(&q_vector->napi);
5617
5618	return IRQ_HANDLED;
5619}
5620
5621/**
5622 * igb_intr - Legacy Interrupt Handler
5623 * @irq: interrupt number
5624 * @data: pointer to a network interface device structure
5625 **/
5626static irqreturn_t igb_intr(int irq, void *data)
5627{
5628	struct igb_adapter *adapter = data;
5629	struct igb_q_vector *q_vector = adapter->q_vector[0];
5630	struct e1000_hw *hw = &adapter->hw;
5631	/* Interrupt Auto-Mask...upon reading ICR, interrupts are masked.  No
5632	 * need for the IMC write */
5633	u32 icr = rd32(E1000_ICR);
5634
5635	/* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
5636	 * not set, then the adapter didn't send an interrupt */
5637	if (!(icr & E1000_ICR_INT_ASSERTED))
5638		return IRQ_NONE;
5639
5640	igb_write_itr(q_vector);
5641
5642	if (icr & E1000_ICR_DRSTA)
5643		schedule_work(&adapter->reset_task);
5644
5645	if (icr & E1000_ICR_DOUTSYNC) {
5646		/* HW is reporting DMA is out of sync */
5647		adapter->stats.doosync++;
5648	}
5649
5650	if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5651		hw->mac.get_link_status = 1;
5652		/* guard against interrupt when we're going down */
5653		if (!test_bit(__IGB_DOWN, &adapter->state))
5654			mod_timer(&adapter->watchdog_timer, jiffies + 1);
5655	}
5656
5657	napi_schedule(&q_vector->napi);
5658
5659	return IRQ_HANDLED;
5660}
5661
5662static void igb_ring_irq_enable(struct igb_q_vector *q_vector)
5663{
5664	struct igb_adapter *adapter = q_vector->adapter;
5665	struct e1000_hw *hw = &adapter->hw;
5666
5667	if ((q_vector->rx.ring && (adapter->rx_itr_setting & 3)) ||
5668	    (!q_vector->rx.ring && (adapter->tx_itr_setting & 3))) {
5669		if ((adapter->num_q_vectors == 1) && !adapter->vf_data)
5670			igb_set_itr(q_vector);
5671		else
5672			igb_update_ring_itr(q_vector);
5673	}
5674
5675	if (!test_bit(__IGB_DOWN, &adapter->state)) {
5676		if (adapter->msix_entries)
5677			wr32(E1000_EIMS, q_vector->eims_value);
5678		else
5679			igb_irq_enable(adapter);
5680	}
5681}
5682
5683/**
5684 * igb_poll - NAPI Rx polling callback
5685 * @napi: napi polling structure
5686 * @budget: count of how many packets we should handle
5687 **/
5688static int igb_poll(struct napi_struct *napi, int budget)
5689{
5690	struct igb_q_vector *q_vector = container_of(napi,
5691	                                             struct igb_q_vector,
5692	                                             napi);
5693	bool clean_complete = true;
5694
5695#ifdef CONFIG_IGB_DCA
5696	if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
5697		igb_update_dca(q_vector);
5698#endif
5699	if (q_vector->tx.ring)
5700		clean_complete = igb_clean_tx_irq(q_vector);
5701
5702	if (q_vector->rx.ring)
5703		clean_complete &= igb_clean_rx_irq(q_vector, budget);
5704
5705	/* If all work not completed, return budget and keep polling */
5706	if (!clean_complete)
5707		return budget;
5708
5709	/* If not enough Rx work done, exit the polling mode */
5710	napi_complete(napi);
5711	igb_ring_irq_enable(q_vector);
5712
5713	return 0;
5714}
5715
5716/**
5717 * igb_systim_to_hwtstamp - convert system time value to hw timestamp
5718 * @adapter: board private structure
5719 * @shhwtstamps: timestamp structure to update
5720 * @regval: unsigned 64bit system time value.
5721 *
5722 * We need to convert the system time value stored in the RX/TXSTMP registers
5723 * into a hwtstamp which can be used by the upper level timestamping functions
5724 */
5725static void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
5726                                   struct skb_shared_hwtstamps *shhwtstamps,
5727                                   u64 regval)
5728{
5729	u64 ns;
5730
5731	/*
5732	 * The 82580 starts with 1ns at bit 0 in RX/TXSTMPL, shift this up to
5733	 * 24 to match clock shift we setup earlier.
5734	 */
5735	if (adapter->hw.mac.type >= e1000_82580)
5736		regval <<= IGB_82580_TSYNC_SHIFT;
5737
5738	ns = timecounter_cyc2time(&adapter->clock, regval);
5739	timecompare_update(&adapter->compare, ns);
5740	memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
5741	shhwtstamps->hwtstamp = ns_to_ktime(ns);
5742	shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns);
5743}
5744
5745/**
5746 * igb_tx_hwtstamp - utility function which checks for TX time stamp
5747 * @q_vector: pointer to q_vector containing needed info
5748 * @buffer: pointer to igb_tx_buffer structure
5749 *
5750 * If we were asked to do hardware stamping and such a time stamp is
5751 * available, then it must have been for this skb here because we only
5752 * allow only one such packet into the queue.
5753 */
5754static void igb_tx_hwtstamp(struct igb_q_vector *q_vector,
5755			    struct igb_tx_buffer *buffer_info)
5756{
5757	struct igb_adapter *adapter = q_vector->adapter;
5758	struct e1000_hw *hw = &adapter->hw;
5759	struct skb_shared_hwtstamps shhwtstamps;
5760	u64 regval;
5761
5762	/* if skb does not support hw timestamp or TX stamp not valid exit */
5763	if (likely(!(buffer_info->tx_flags & IGB_TX_FLAGS_TSTAMP)) ||
5764	    !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
5765		return;
5766
5767	regval = rd32(E1000_TXSTMPL);
5768	regval |= (u64)rd32(E1000_TXSTMPH) << 32;
5769
5770	igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
5771	skb_tstamp_tx(buffer_info->skb, &shhwtstamps);
5772}
5773
5774/**
5775 * igb_clean_tx_irq - Reclaim resources after transmit completes
5776 * @q_vector: pointer to q_vector containing needed info
5777 * returns true if ring is completely cleaned
5778 **/
5779static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
5780{
5781	struct igb_adapter *adapter = q_vector->adapter;
5782	struct igb_ring *tx_ring = q_vector->tx.ring;
5783	struct igb_tx_buffer *tx_buffer;
5784	union e1000_adv_tx_desc *tx_desc, *eop_desc;
5785	unsigned int total_bytes = 0, total_packets = 0;
5786	unsigned int budget = q_vector->tx.work_limit;
5787	unsigned int i = tx_ring->next_to_clean;
5788
5789	if (test_bit(__IGB_DOWN, &adapter->state))
5790		return true;
5791
5792	tx_buffer = &tx_ring->tx_buffer_info[i];
5793	tx_desc = IGB_TX_DESC(tx_ring, i);
5794	i -= tx_ring->count;
5795
5796	for (; budget; budget--) {
5797		eop_desc = tx_buffer->next_to_watch;
5798
5799		/* prevent any other reads prior to eop_desc */
5800		rmb();
5801
5802		/* if next_to_watch is not set then there is no work pending */
5803		if (!eop_desc)
5804			break;
5805
5806		/* if DD is not set pending work has not been completed */
5807		if (!(eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)))
5808			break;
5809
5810		/* clear next_to_watch to prevent false hangs */
5811		tx_buffer->next_to_watch = NULL;
5812
5813		/* update the statistics for this packet */
5814		total_bytes += tx_buffer->bytecount;
5815		total_packets += tx_buffer->gso_segs;
5816
5817		/* retrieve hardware timestamp */
5818		igb_tx_hwtstamp(q_vector, tx_buffer);
5819
5820		/* free the skb */
5821		dev_kfree_skb_any(tx_buffer->skb);
5822		tx_buffer->skb = NULL;
5823
5824		/* unmap skb header data */
5825		dma_unmap_single(tx_ring->dev,
5826				 tx_buffer->dma,
5827				 tx_buffer->length,
5828				 DMA_TO_DEVICE);
5829
5830		/* clear last DMA location and unmap remaining buffers */
5831		while (tx_desc != eop_desc) {
5832			tx_buffer->dma = 0;
5833
5834			tx_buffer++;
5835			tx_desc++;
5836			i++;
5837			if (unlikely(!i)) {
5838				i -= tx_ring->count;
5839				tx_buffer = tx_ring->tx_buffer_info;
5840				tx_desc = IGB_TX_DESC(tx_ring, 0);
5841			}
5842
5843			/* unmap any remaining paged data */
5844			if (tx_buffer->dma) {
5845				dma_unmap_page(tx_ring->dev,
5846					       tx_buffer->dma,
5847					       tx_buffer->length,
5848					       DMA_TO_DEVICE);
5849			}
5850		}
5851
5852		/* clear last DMA location */
5853		tx_buffer->dma = 0;
5854
5855		/* move us one more past the eop_desc for start of next pkt */
5856		tx_buffer++;
5857		tx_desc++;
5858		i++;
5859		if (unlikely(!i)) {
5860			i -= tx_ring->count;
5861			tx_buffer = tx_ring->tx_buffer_info;
5862			tx_desc = IGB_TX_DESC(tx_ring, 0);
5863		}
5864	}
5865
5866	netdev_tx_completed_queue(txring_txq(tx_ring),
5867				  total_packets, total_bytes);
5868	i += tx_ring->count;
5869	tx_ring->next_to_clean = i;
5870	u64_stats_update_begin(&tx_ring->tx_syncp);
5871	tx_ring->tx_stats.bytes += total_bytes;
5872	tx_ring->tx_stats.packets += total_packets;
5873	u64_stats_update_end(&tx_ring->tx_syncp);
5874	q_vector->tx.total_bytes += total_bytes;
5875	q_vector->tx.total_packets += total_packets;
5876
5877	if (test_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags)) {
5878		struct e1000_hw *hw = &adapter->hw;
5879
5880		eop_desc = tx_buffer->next_to_watch;
5881
5882		/* Detect a transmit hang in hardware, this serializes the
5883		 * check with the clearing of time_stamp and movement of i */
5884		clear_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
5885		if (eop_desc &&
5886		    time_after(jiffies, tx_buffer->time_stamp +
5887			       (adapter->tx_timeout_factor * HZ)) &&
5888		    !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
5889
5890			/* detected Tx unit hang */
5891			dev_err(tx_ring->dev,
5892				"Detected Tx Unit Hang\n"
5893				"  Tx Queue             <%d>\n"
5894				"  TDH                  <%x>\n"
5895				"  TDT                  <%x>\n"
5896				"  next_to_use          <%x>\n"
5897				"  next_to_clean        <%x>\n"
5898				"buffer_info[next_to_clean]\n"
5899				"  time_stamp           <%lx>\n"
5900				"  next_to_watch        <%p>\n"
5901				"  jiffies              <%lx>\n"
5902				"  desc.status          <%x>\n",
5903				tx_ring->queue_index,
5904				rd32(E1000_TDH(tx_ring->reg_idx)),
5905				readl(tx_ring->tail),
5906				tx_ring->next_to_use,
5907				tx_ring->next_to_clean,
5908				tx_buffer->time_stamp,
5909				eop_desc,
5910				jiffies,
5911				eop_desc->wb.status);
5912			netif_stop_subqueue(tx_ring->netdev,
5913					    tx_ring->queue_index);
5914
5915			/* we are about to reset, no point in enabling stuff */
5916			return true;
5917		}
5918	}
5919
5920	if (unlikely(total_packets &&
5921		     netif_carrier_ok(tx_ring->netdev) &&
5922		     igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
5923		/* Make sure that anybody stopping the queue after this
5924		 * sees the new next_to_clean.
5925		 */
5926		smp_mb();
5927		if (__netif_subqueue_stopped(tx_ring->netdev,
5928					     tx_ring->queue_index) &&
5929		    !(test_bit(__IGB_DOWN, &adapter->state))) {
5930			netif_wake_subqueue(tx_ring->netdev,
5931					    tx_ring->queue_index);
5932
5933			u64_stats_update_begin(&tx_ring->tx_syncp);
5934			tx_ring->tx_stats.restart_queue++;
5935			u64_stats_update_end(&tx_ring->tx_syncp);
5936		}
5937	}
5938
5939	return !!budget;
5940}
5941
5942static inline void igb_rx_checksum(struct igb_ring *ring,
5943				   union e1000_adv_rx_desc *rx_desc,
5944				   struct sk_buff *skb)
5945{
5946	skb_checksum_none_assert(skb);
5947
5948	/* Ignore Checksum bit is set */
5949	if (igb_test_staterr(rx_desc, E1000_RXD_STAT_IXSM))
5950		return;
5951
5952	/* Rx checksum disabled via ethtool */
5953	if (!(ring->netdev->features & NETIF_F_RXCSUM))
5954		return;
5955
5956	/* TCP/UDP checksum error bit is set */
5957	if (igb_test_staterr(rx_desc,
5958			     E1000_RXDEXT_STATERR_TCPE |
5959			     E1000_RXDEXT_STATERR_IPE)) {
5960		/*
5961		 * work around errata with sctp packets where the TCPE aka
5962		 * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
5963		 * packets, (aka let the stack check the crc32c)
5964		 */
5965		if (!((skb->len == 60) &&
5966		      test_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags))) {
5967			u64_stats_update_begin(&ring->rx_syncp);
5968			ring->rx_stats.csum_err++;
5969			u64_stats_update_end(&ring->rx_syncp);
5970		}
5971		/* let the stack verify checksum errors */
5972		return;
5973	}
5974	/* It must be a TCP or UDP packet with a valid checksum */
5975	if (igb_test_staterr(rx_desc, E1000_RXD_STAT_TCPCS |
5976				      E1000_RXD_STAT_UDPCS))
5977		skb->ip_summed = CHECKSUM_UNNECESSARY;
5978
5979	dev_dbg(ring->dev, "cksum success: bits %08X\n",
5980		le32_to_cpu(rx_desc->wb.upper.status_error));
5981}
5982
5983static inline void igb_rx_hash(struct igb_ring *ring,
5984			       union e1000_adv_rx_desc *rx_desc,
5985			       struct sk_buff *skb)
5986{
5987	if (ring->netdev->features & NETIF_F_RXHASH)
5988		skb->rxhash = le32_to_cpu(rx_desc->wb.lower.hi_dword.rss);
5989}
5990
5991static void igb_rx_hwtstamp(struct igb_q_vector *q_vector,
5992			    union e1000_adv_rx_desc *rx_desc,
5993			    struct sk_buff *skb)
5994{
5995	struct igb_adapter *adapter = q_vector->adapter;
5996	struct e1000_hw *hw = &adapter->hw;
5997	u64 regval;
5998
5999	if (!igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP |
6000				       E1000_RXDADV_STAT_TS))
6001		return;
6002
6003	/*
6004	 * If this bit is set, then the RX registers contain the time stamp. No
6005	 * other packet will be time stamped until we read these registers, so
6006	 * read the registers to make them available again. Because only one
6007	 * packet can be time stamped at a time, we know that the register
6008	 * values must belong to this one here and therefore we don't need to
6009	 * compare any of the additional attributes stored for it.
6010	 *
6011	 * If nothing went wrong, then it should have a shared tx_flags that we
6012	 * can turn into a skb_shared_hwtstamps.
6013	 */
6014	if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) {
6015		u32 *stamp = (u32 *)skb->data;
6016		regval = le32_to_cpu(*(stamp + 2));
6017		regval |= (u64)le32_to_cpu(*(stamp + 3)) << 32;
6018		skb_pull(skb, IGB_TS_HDR_LEN);
6019	} else {
6020		if(!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
6021			return;
6022
6023		regval = rd32(E1000_RXSTMPL);
6024		regval |= (u64)rd32(E1000_RXSTMPH) << 32;
6025	}
6026
6027	igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
6028}
6029
6030static void igb_rx_vlan(struct igb_ring *ring,
6031			union e1000_adv_rx_desc *rx_desc,
6032			struct sk_buff *skb)
6033{
6034	if (igb_test_staterr(rx_desc, E1000_RXD_STAT_VP)) {
6035		u16 vid;
6036		if (igb_test_staterr(rx_desc, E1000_RXDEXT_STATERR_LB) &&
6037		    test_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &ring->flags))
6038			vid = be16_to_cpu(rx_desc->wb.upper.vlan);
6039		else
6040			vid = le16_to_cpu(rx_desc->wb.upper.vlan);
6041
6042		__vlan_hwaccel_put_tag(skb, vid);
6043	}
6044}
6045
6046static inline u16 igb_get_hlen(union e1000_adv_rx_desc *rx_desc)
6047{
6048	/* HW will not DMA in data larger than the given buffer, even if it
6049	 * parses the (NFS, of course) header to be larger.  In that case, it
6050	 * fills the header buffer and spills the rest into the page.
6051	 */
6052	u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
6053	           E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
6054	if (hlen > IGB_RX_HDR_LEN)
6055		hlen = IGB_RX_HDR_LEN;
6056	return hlen;
6057}
6058
6059static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, int budget)
6060{
6061	struct igb_ring *rx_ring = q_vector->rx.ring;
6062	union e1000_adv_rx_desc *rx_desc;
6063	const int current_node = numa_node_id();
6064	unsigned int total_bytes = 0, total_packets = 0;
6065	u16 cleaned_count = igb_desc_unused(rx_ring);
6066	u16 i = rx_ring->next_to_clean;
6067
6068	rx_desc = IGB_RX_DESC(rx_ring, i);
6069
6070	while (igb_test_staterr(rx_desc, E1000_RXD_STAT_DD)) {
6071		struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
6072		struct sk_buff *skb = buffer_info->skb;
6073		union e1000_adv_rx_desc *next_rxd;
6074
6075		buffer_info->skb = NULL;
6076		prefetch(skb->data);
6077
6078		i++;
6079		if (i == rx_ring->count)
6080			i = 0;
6081
6082		next_rxd = IGB_RX_DESC(rx_ring, i);
6083		prefetch(next_rxd);
6084
6085		/*
6086		 * This memory barrier is needed to keep us from reading
6087		 * any other fields out of the rx_desc until we know the
6088		 * RXD_STAT_DD bit is set
6089		 */
6090		rmb();
6091
6092		if (!skb_is_nonlinear(skb)) {
6093			__skb_put(skb, igb_get_hlen(rx_desc));
6094			dma_unmap_single(rx_ring->dev, buffer_info->dma,
6095					 IGB_RX_HDR_LEN,
6096					 DMA_FROM_DEVICE);
6097			buffer_info->dma = 0;
6098		}
6099
6100		if (rx_desc->wb.upper.length) {
6101			u16 length = le16_to_cpu(rx_desc->wb.upper.length);
6102
6103			skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
6104						buffer_info->page,
6105						buffer_info->page_offset,
6106						length);
6107
6108			skb->len += length;
6109			skb->data_len += length;
6110			skb->truesize += PAGE_SIZE / 2;
6111
6112			if ((page_count(buffer_info->page) != 1) ||
6113			    (page_to_nid(buffer_info->page) != current_node))
6114				buffer_info->page = NULL;
6115			else
6116				get_page(buffer_info->page);
6117
6118			dma_unmap_page(rx_ring->dev, buffer_info->page_dma,
6119				       PAGE_SIZE / 2, DMA_FROM_DEVICE);
6120			buffer_info->page_dma = 0;
6121		}
6122
6123		if (!igb_test_staterr(rx_desc, E1000_RXD_STAT_EOP)) {
6124			struct igb_rx_buffer *next_buffer;
6125			next_buffer = &rx_ring->rx_buffer_info[i];
6126			buffer_info->skb = next_buffer->skb;
6127			buffer_info->dma = next_buffer->dma;
6128			next_buffer->skb = skb;
6129			next_buffer->dma = 0;
6130			goto next_desc;
6131		}
6132
6133		if (unlikely((igb_test_staterr(rx_desc,
6134					       E1000_RXDEXT_ERR_FRAME_ERR_MASK))
6135			     && !(rx_ring->netdev->features & NETIF_F_RXALL))) {
6136			dev_kfree_skb_any(skb);
6137			goto next_desc;
6138		}
6139
6140		igb_rx_hwtstamp(q_vector, rx_desc, skb);
6141		igb_rx_hash(rx_ring, rx_desc, skb);
6142		igb_rx_checksum(rx_ring, rx_desc, skb);
6143		igb_rx_vlan(rx_ring, rx_desc, skb);
6144
6145		total_bytes += skb->len;
6146		total_packets++;
6147
6148		skb->protocol = eth_type_trans(skb, rx_ring->netdev);
6149
6150		napi_gro_receive(&q_vector->napi, skb);
6151
6152		budget--;
6153next_desc:
6154		if (!budget)
6155			break;
6156
6157		cleaned_count++;
6158		/* return some buffers to hardware, one at a time is too slow */
6159		if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
6160			igb_alloc_rx_buffers(rx_ring, cleaned_count);
6161			cleaned_count = 0;
6162		}
6163
6164		/* use prefetched values */
6165		rx_desc = next_rxd;
6166	}
6167
6168	rx_ring->next_to_clean = i;
6169	u64_stats_update_begin(&rx_ring->rx_syncp);
6170	rx_ring->rx_stats.packets += total_packets;
6171	rx_ring->rx_stats.bytes += total_bytes;
6172	u64_stats_update_end(&rx_ring->rx_syncp);
6173	q_vector->rx.total_packets += total_packets;
6174	q_vector->rx.total_bytes += total_bytes;
6175
6176	if (cleaned_count)
6177		igb_alloc_rx_buffers(rx_ring, cleaned_count);
6178
6179	return !!budget;
6180}
6181
6182static bool igb_alloc_mapped_skb(struct igb_ring *rx_ring,
6183				 struct igb_rx_buffer *bi)
6184{
6185	struct sk_buff *skb = bi->skb;
6186	dma_addr_t dma = bi->dma;
6187
6188	if (dma)
6189		return true;
6190
6191	if (likely(!skb)) {
6192		skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
6193						IGB_RX_HDR_LEN);
6194		bi->skb = skb;
6195		if (!skb) {
6196			rx_ring->rx_stats.alloc_failed++;
6197			return false;
6198		}
6199
6200		/* initialize skb for ring */
6201		skb_record_rx_queue(skb, rx_ring->queue_index);
6202	}
6203
6204	dma = dma_map_single(rx_ring->dev, skb->data,
6205			     IGB_RX_HDR_LEN, DMA_FROM_DEVICE);
6206
6207	if (dma_mapping_error(rx_ring->dev, dma)) {
6208		rx_ring->rx_stats.alloc_failed++;
6209		return false;
6210	}
6211
6212	bi->dma = dma;
6213	return true;
6214}
6215
6216static bool igb_alloc_mapped_page(struct igb_ring *rx_ring,
6217				  struct igb_rx_buffer *bi)
6218{
6219	struct page *page = bi->page;
6220	dma_addr_t page_dma = bi->page_dma;
6221	unsigned int page_offset = bi->page_offset ^ (PAGE_SIZE / 2);
6222
6223	if (page_dma)
6224		return true;
6225
6226	if (!page) {
6227		page = alloc_page(GFP_ATOMIC | __GFP_COLD);
6228		bi->page = page;
6229		if (unlikely(!page)) {
6230			rx_ring->rx_stats.alloc_failed++;
6231			return false;
6232		}
6233	}
6234
6235	page_dma = dma_map_page(rx_ring->dev, page,
6236				page_offset, PAGE_SIZE / 2,
6237				DMA_FROM_DEVICE);
6238
6239	if (dma_mapping_error(rx_ring->dev, page_dma)) {
6240		rx_ring->rx_stats.alloc_failed++;
6241		return false;
6242	}
6243
6244	bi->page_dma = page_dma;
6245	bi->page_offset = page_offset;
6246	return true;
6247}
6248
6249/**
6250 * igb_alloc_rx_buffers - Replace used receive buffers; packet split
6251 * @adapter: address of board private structure
6252 **/
6253void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count)
6254{
6255	union e1000_adv_rx_desc *rx_desc;
6256	struct igb_rx_buffer *bi;
6257	u16 i = rx_ring->next_to_use;
6258
6259	rx_desc = IGB_RX_DESC(rx_ring, i);
6260	bi = &rx_ring->rx_buffer_info[i];
6261	i -= rx_ring->count;
6262
6263	while (cleaned_count--) {
6264		if (!igb_alloc_mapped_skb(rx_ring, bi))
6265			break;
6266
6267		/* Refresh the desc even if buffer_addrs didn't change
6268		 * because each write-back erases this info. */
6269		rx_desc->read.hdr_addr = cpu_to_le64(bi->dma);
6270
6271		if (!igb_alloc_mapped_page(rx_ring, bi))
6272			break;
6273
6274		rx_desc->read.pkt_addr = cpu_to_le64(bi->page_dma);
6275
6276		rx_desc++;
6277		bi++;
6278		i++;
6279		if (unlikely(!i)) {
6280			rx_desc = IGB_RX_DESC(rx_ring, 0);
6281			bi = rx_ring->rx_buffer_info;
6282			i -= rx_ring->count;
6283		}
6284
6285		/* clear the hdr_addr for the next_to_use descriptor */
6286		rx_desc->read.hdr_addr = 0;
6287	}
6288
6289	i += rx_ring->count;
6290
6291	if (rx_ring->next_to_use != i) {
6292		rx_ring->next_to_use = i;
6293
6294		/* Force memory writes to complete before letting h/w
6295		 * know there are new descriptors to fetch.  (Only
6296		 * applicable for weak-ordered memory model archs,
6297		 * such as IA-64). */
6298		wmb();
6299		writel(i, rx_ring->tail);
6300	}
6301}
6302
6303/**
6304 * igb_mii_ioctl -
6305 * @netdev:
6306 * @ifreq:
6307 * @cmd:
6308 **/
6309static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6310{
6311	struct igb_adapter *adapter = netdev_priv(netdev);
6312	struct mii_ioctl_data *data = if_mii(ifr);
6313
6314	if (adapter->hw.phy.media_type != e1000_media_type_copper)
6315		return -EOPNOTSUPP;
6316
6317	switch (cmd) {
6318	case SIOCGMIIPHY:
6319		data->phy_id = adapter->hw.phy.addr;
6320		break;
6321	case SIOCGMIIREG:
6322		if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
6323		                     &data->val_out))
6324			return -EIO;
6325		break;
6326	case SIOCSMIIREG:
6327	default:
6328		return -EOPNOTSUPP;
6329	}
6330	return 0;
6331}
6332
6333/**
6334 * igb_hwtstamp_ioctl - control hardware time stamping
6335 * @netdev:
6336 * @ifreq:
6337 * @cmd:
6338 *
6339 * Outgoing time stamping can be enabled and disabled. Play nice and
6340 * disable it when requested, although it shouldn't case any overhead
6341 * when no packet needs it. At most one packet in the queue may be
6342 * marked for time stamping, otherwise it would be impossible to tell
6343 * for sure to which packet the hardware time stamp belongs.
6344 *
6345 * Incoming time stamping has to be configured via the hardware
6346 * filters. Not all combinations are supported, in particular event
6347 * type has to be specified. Matching the kind of event packet is
6348 * not supported, with the exception of "all V2 events regardless of
6349 * level 2 or 4".
6350 *
6351 **/
6352static int igb_hwtstamp_ioctl(struct net_device *netdev,
6353			      struct ifreq *ifr, int cmd)
6354{
6355	struct igb_adapter *adapter = netdev_priv(netdev);
6356	struct e1000_hw *hw = &adapter->hw;
6357	struct hwtstamp_config config;
6358	u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
6359	u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6360	u32 tsync_rx_cfg = 0;
6361	bool is_l4 = false;
6362	bool is_l2 = false;
6363	u32 regval;
6364
6365	if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
6366		return -EFAULT;
6367
6368	/* reserved for future extensions */
6369	if (config.flags)
6370		return -EINVAL;
6371
6372	switch (config.tx_type) {
6373	case HWTSTAMP_TX_OFF:
6374		tsync_tx_ctl = 0;
6375	case HWTSTAMP_TX_ON:
6376		break;
6377	default:
6378		return -ERANGE;
6379	}
6380
6381	switch (config.rx_filter) {
6382	case HWTSTAMP_FILTER_NONE:
6383		tsync_rx_ctl = 0;
6384		break;
6385	case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
6386	case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
6387	case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
6388	case HWTSTAMP_FILTER_ALL:
6389		/*
6390		 * register TSYNCRXCFG must be set, therefore it is not
6391		 * possible to time stamp both Sync and Delay_Req messages
6392		 * => fall back to time stamping all packets
6393		 */
6394		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6395		config.rx_filter = HWTSTAMP_FILTER_ALL;
6396		break;
6397	case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
6398		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6399		tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
6400		is_l4 = true;
6401		break;
6402	case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
6403		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6404		tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
6405		is_l4 = true;
6406		break;
6407	case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
6408	case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
6409		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6410		tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
6411		is_l2 = true;
6412		is_l4 = true;
6413		config.rx_filter = HWTSTAMP_FILTER_SOME;
6414		break;
6415	case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
6416	case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
6417		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6418		tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
6419		is_l2 = true;
6420		is_l4 = true;
6421		config.rx_filter = HWTSTAMP_FILTER_SOME;
6422		break;
6423	case HWTSTAMP_FILTER_PTP_V2_EVENT:
6424	case HWTSTAMP_FILTER_PTP_V2_SYNC:
6425	case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
6426		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
6427		config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
6428		is_l2 = true;
6429		is_l4 = true;
6430		break;
6431	default:
6432		return -ERANGE;
6433	}
6434
6435	if (hw->mac.type == e1000_82575) {
6436		if (tsync_rx_ctl | tsync_tx_ctl)
6437			return -EINVAL;
6438		return 0;
6439	}
6440
6441	/*
6442	 * Per-packet timestamping only works if all packets are
6443	 * timestamped, so enable timestamping in all packets as
6444	 * long as one rx filter was configured.
6445	 */
6446	if ((hw->mac.type >= e1000_82580) && tsync_rx_ctl) {
6447		tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6448		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6449	}
6450
6451	/* enable/disable TX */
6452	regval = rd32(E1000_TSYNCTXCTL);
6453	regval &= ~E1000_TSYNCTXCTL_ENABLED;
6454	regval |= tsync_tx_ctl;
6455	wr32(E1000_TSYNCTXCTL, regval);
6456
6457	/* enable/disable RX */
6458	regval = rd32(E1000_TSYNCRXCTL);
6459	regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
6460	regval |= tsync_rx_ctl;
6461	wr32(E1000_TSYNCRXCTL, regval);
6462
6463	/* define which PTP packets are time stamped */
6464	wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
6465
6466	/* define ethertype filter for timestamped packets */
6467	if (is_l2)
6468		wr32(E1000_ETQF(3),
6469		                (E1000_ETQF_FILTER_ENABLE | /* enable filter */
6470		                 E1000_ETQF_1588 | /* enable timestamping */
6471		                 ETH_P_1588));     /* 1588 eth protocol type */
6472	else
6473		wr32(E1000_ETQF(3), 0);
6474
6475#define PTP_PORT 319
6476	/* L4 Queue Filter[3]: filter by destination port and protocol */
6477	if (is_l4) {
6478		u32 ftqf = (IPPROTO_UDP /* UDP */
6479			| E1000_FTQF_VF_BP /* VF not compared */
6480			| E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
6481			| E1000_FTQF_MASK); /* mask all inputs */
6482		ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
6483
6484		wr32(E1000_IMIR(3), htons(PTP_PORT));
6485		wr32(E1000_IMIREXT(3),
6486		     (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
6487		if (hw->mac.type == e1000_82576) {
6488			/* enable source port check */
6489			wr32(E1000_SPQF(3), htons(PTP_PORT));
6490			ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
6491		}
6492		wr32(E1000_FTQF(3), ftqf);
6493	} else {
6494		wr32(E1000_FTQF(3), E1000_FTQF_MASK);
6495	}
6496	wrfl();
6497
6498	adapter->hwtstamp_config = config;
6499
6500	/* clear TX/RX time stamp registers, just to be sure */
6501	regval = rd32(E1000_TXSTMPH);
6502	regval = rd32(E1000_RXSTMPH);
6503
6504	return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
6505		-EFAULT : 0;
6506}
6507
6508/**
6509 * igb_ioctl -
6510 * @netdev:
6511 * @ifreq:
6512 * @cmd:
6513 **/
6514static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6515{
6516	switch (cmd) {
6517	case SIOCGMIIPHY:
6518	case SIOCGMIIREG:
6519	case SIOCSMIIREG:
6520		return igb_mii_ioctl(netdev, ifr, cmd);
6521	case SIOCSHWTSTAMP:
6522		return igb_hwtstamp_ioctl(netdev, ifr, cmd);
6523	default:
6524		return -EOPNOTSUPP;
6525	}
6526}
6527
6528s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6529{
6530	struct igb_adapter *adapter = hw->back;
6531	u16 cap_offset;
6532
6533	cap_offset = adapter->pdev->pcie_cap;
6534	if (!cap_offset)
6535		return -E1000_ERR_CONFIG;
6536
6537	pci_read_config_word(adapter->pdev, cap_offset + reg, value);
6538
6539	return 0;
6540}
6541
6542s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6543{
6544	struct igb_adapter *adapter = hw->back;
6545	u16 cap_offset;
6546
6547	cap_offset = adapter->pdev->pcie_cap;
6548	if (!cap_offset)
6549		return -E1000_ERR_CONFIG;
6550
6551	pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
6552
6553	return 0;
6554}
6555
6556static void igb_vlan_mode(struct net_device *netdev, netdev_features_t features)
6557{
6558	struct igb_adapter *adapter = netdev_priv(netdev);
6559	struct e1000_hw *hw = &adapter->hw;
6560	u32 ctrl, rctl;
6561	bool enable = !!(features & NETIF_F_HW_VLAN_RX);
6562
6563	if (enable) {
6564		/* enable VLAN tag insert/strip */
6565		ctrl = rd32(E1000_CTRL);
6566		ctrl |= E1000_CTRL_VME;
6567		wr32(E1000_CTRL, ctrl);
6568
6569		/* Disable CFI check */
6570		rctl = rd32(E1000_RCTL);
6571		rctl &= ~E1000_RCTL_CFIEN;
6572		wr32(E1000_RCTL, rctl);
6573	} else {
6574		/* disable VLAN tag insert/strip */
6575		ctrl = rd32(E1000_CTRL);
6576		ctrl &= ~E1000_CTRL_VME;
6577		wr32(E1000_CTRL, ctrl);
6578	}
6579
6580	igb_rlpml_set(adapter);
6581}
6582
6583static int igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
6584{
6585	struct igb_adapter *adapter = netdev_priv(netdev);
6586	struct e1000_hw *hw = &adapter->hw;
6587	int pf_id = adapter->vfs_allocated_count;
6588
6589	/* attempt to add filter to vlvf array */
6590	igb_vlvf_set(adapter, vid, true, pf_id);
6591
6592	/* add the filter since PF can receive vlans w/o entry in vlvf */
6593	igb_vfta_set(hw, vid, true);
6594
6595	set_bit(vid, adapter->active_vlans);
6596
6597	return 0;
6598}
6599
6600static int igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
6601{
6602	struct igb_adapter *adapter = netdev_priv(netdev);
6603	struct e1000_hw *hw = &adapter->hw;
6604	int pf_id = adapter->vfs_allocated_count;
6605	s32 err;
6606
6607	/* remove vlan from VLVF table array */
6608	err = igb_vlvf_set(adapter, vid, false, pf_id);
6609
6610	/* if vid was not present in VLVF just remove it from table */
6611	if (err)
6612		igb_vfta_set(hw, vid, false);
6613
6614	clear_bit(vid, adapter->active_vlans);
6615
6616	return 0;
6617}
6618
6619static void igb_restore_vlan(struct igb_adapter *adapter)
6620{
6621	u16 vid;
6622
6623	igb_vlan_mode(adapter->netdev, adapter->netdev->features);
6624
6625	for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID)
6626		igb_vlan_rx_add_vid(adapter->netdev, vid);
6627}
6628
6629int igb_set_spd_dplx(struct igb_adapter *adapter, u32 spd, u8 dplx)
6630{
6631	struct pci_dev *pdev = adapter->pdev;
6632	struct e1000_mac_info *mac = &adapter->hw.mac;
6633
6634	mac->autoneg = 0;
6635
6636	/* Make sure dplx is at most 1 bit and lsb of speed is not set
6637	 * for the switch() below to work */
6638	if ((spd & 1) || (dplx & ~1))
6639		goto err_inval;
6640
6641	/* Fiber NIC's only allow 1000 Gbps Full duplex */
6642	if ((adapter->hw.phy.media_type == e1000_media_type_internal_serdes) &&
6643	    spd != SPEED_1000 &&
6644	    dplx != DUPLEX_FULL)
6645		goto err_inval;
6646
6647	switch (spd + dplx) {
6648	case SPEED_10 + DUPLEX_HALF:
6649		mac->forced_speed_duplex = ADVERTISE_10_HALF;
6650		break;
6651	case SPEED_10 + DUPLEX_FULL:
6652		mac->forced_speed_duplex = ADVERTISE_10_FULL;
6653		break;
6654	case SPEED_100 + DUPLEX_HALF:
6655		mac->forced_speed_duplex = ADVERTISE_100_HALF;
6656		break;
6657	case SPEED_100 + DUPLEX_FULL:
6658		mac->forced_speed_duplex = ADVERTISE_100_FULL;
6659		break;
6660	case SPEED_1000 + DUPLEX_FULL:
6661		mac->autoneg = 1;
6662		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
6663		break;
6664	case SPEED_1000 + DUPLEX_HALF: /* not supported */
6665	default:
6666		goto err_inval;
6667	}
6668	return 0;
6669
6670err_inval:
6671	dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
6672	return -EINVAL;
6673}
6674
6675static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake,
6676			  bool runtime)
6677{
6678	struct net_device *netdev = pci_get_drvdata(pdev);
6679	struct igb_adapter *adapter = netdev_priv(netdev);
6680	struct e1000_hw *hw = &adapter->hw;
6681	u32 ctrl, rctl, status;
6682	u32 wufc = runtime ? E1000_WUFC_LNKC : adapter->wol;
6683#ifdef CONFIG_PM
6684	int retval = 0;
6685#endif
6686
6687	netif_device_detach(netdev);
6688
6689	if (netif_running(netdev))
6690		__igb_close(netdev, true);
6691
6692	igb_clear_interrupt_scheme(adapter);
6693
6694#ifdef CONFIG_PM
6695	retval = pci_save_state(pdev);
6696	if (retval)
6697		return retval;
6698#endif
6699
6700	status = rd32(E1000_STATUS);
6701	if (status & E1000_STATUS_LU)
6702		wufc &= ~E1000_WUFC_LNKC;
6703
6704	if (wufc) {
6705		igb_setup_rctl(adapter);
6706		igb_set_rx_mode(netdev);
6707
6708		/* turn on all-multi mode if wake on multicast is enabled */
6709		if (wufc & E1000_WUFC_MC) {
6710			rctl = rd32(E1000_RCTL);
6711			rctl |= E1000_RCTL_MPE;
6712			wr32(E1000_RCTL, rctl);
6713		}
6714
6715		ctrl = rd32(E1000_CTRL);
6716		/* advertise wake from D3Cold */
6717		#define E1000_CTRL_ADVD3WUC 0x00100000
6718		/* phy power management enable */
6719		#define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
6720		ctrl |= E1000_CTRL_ADVD3WUC;
6721		wr32(E1000_CTRL, ctrl);
6722
6723		/* Allow time for pending master requests to run */
6724		igb_disable_pcie_master(hw);
6725
6726		wr32(E1000_WUC, E1000_WUC_PME_EN);
6727		wr32(E1000_WUFC, wufc);
6728	} else {
6729		wr32(E1000_WUC, 0);
6730		wr32(E1000_WUFC, 0);
6731	}
6732
6733	*enable_wake = wufc || adapter->en_mng_pt;
6734	if (!*enable_wake)
6735		igb_power_down_link(adapter);
6736	else
6737		igb_power_up_link(adapter);
6738
6739	/* Release control of h/w to f/w.  If f/w is AMT enabled, this
6740	 * would have already happened in close and is redundant. */
6741	igb_release_hw_control(adapter);
6742
6743	pci_disable_device(pdev);
6744
6745	return 0;
6746}
6747
6748#ifdef CONFIG_PM
6749#ifdef CONFIG_PM_SLEEP
6750static int igb_suspend(struct device *dev)
6751{
6752	int retval;
6753	bool wake;
6754	struct pci_dev *pdev = to_pci_dev(dev);
6755
6756	retval = __igb_shutdown(pdev, &wake, 0);
6757	if (retval)
6758		return retval;
6759
6760	if (wake) {
6761		pci_prepare_to_sleep(pdev);
6762	} else {
6763		pci_wake_from_d3(pdev, false);
6764		pci_set_power_state(pdev, PCI_D3hot);
6765	}
6766
6767	return 0;
6768}
6769#endif /* CONFIG_PM_SLEEP */
6770
6771static int igb_resume(struct device *dev)
6772{
6773	struct pci_dev *pdev = to_pci_dev(dev);
6774	struct net_device *netdev = pci_get_drvdata(pdev);
6775	struct igb_adapter *adapter = netdev_priv(netdev);
6776	struct e1000_hw *hw = &adapter->hw;
6777	u32 err;
6778
6779	pci_set_power_state(pdev, PCI_D0);
6780	pci_restore_state(pdev);
6781	pci_save_state(pdev);
6782
6783	err = pci_enable_device_mem(pdev);
6784	if (err) {
6785		dev_err(&pdev->dev,
6786			"igb: Cannot enable PCI device from suspend\n");
6787		return err;
6788	}
6789	pci_set_master(pdev);
6790
6791	pci_enable_wake(pdev, PCI_D3hot, 0);
6792	pci_enable_wake(pdev, PCI_D3cold, 0);
6793
6794	if (!rtnl_is_locked()) {
6795		/*
6796		 * shut up ASSERT_RTNL() warning in
6797		 * netif_set_real_num_tx/rx_queues.
6798		 */
6799		rtnl_lock();
6800		err = igb_init_interrupt_scheme(adapter);
6801		rtnl_unlock();
6802	} else {
6803		err = igb_init_interrupt_scheme(adapter);
6804	}
6805	if (err) {
6806		dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
6807		return -ENOMEM;
6808	}
6809
6810	igb_reset(adapter);
6811
6812	/* let the f/w know that the h/w is now under the control of the
6813	 * driver. */
6814	igb_get_hw_control(adapter);
6815
6816	wr32(E1000_WUS, ~0);
6817
6818	if (netdev->flags & IFF_UP) {
6819		err = __igb_open(netdev, true);
6820		if (err)
6821			return err;
6822	}
6823
6824	netif_device_attach(netdev);
6825	return 0;
6826}
6827
6828#ifdef CONFIG_PM_RUNTIME
6829static int igb_runtime_idle(struct device *dev)
6830{
6831	struct pci_dev *pdev = to_pci_dev(dev);
6832	struct net_device *netdev = pci_get_drvdata(pdev);
6833	struct igb_adapter *adapter = netdev_priv(netdev);
6834
6835	if (!igb_has_link(adapter))
6836		pm_schedule_suspend(dev, MSEC_PER_SEC * 5);
6837
6838	return -EBUSY;
6839}
6840
6841static int igb_runtime_suspend(struct device *dev)
6842{
6843	struct pci_dev *pdev = to_pci_dev(dev);
6844	int retval;
6845	bool wake;
6846
6847	retval = __igb_shutdown(pdev, &wake, 1);
6848	if (retval)
6849		return retval;
6850
6851	if (wake) {
6852		pci_prepare_to_sleep(pdev);
6853	} else {
6854		pci_wake_from_d3(pdev, false);
6855		pci_set_power_state(pdev, PCI_D3hot);
6856	}
6857
6858	return 0;
6859}
6860
6861static int igb_runtime_resume(struct device *dev)
6862{
6863	return igb_resume(dev);
6864}
6865#endif /* CONFIG_PM_RUNTIME */
6866#endif
6867
6868static void igb_shutdown(struct pci_dev *pdev)
6869{
6870	bool wake;
6871
6872	__igb_shutdown(pdev, &wake, 0);
6873
6874	if (system_state == SYSTEM_POWER_OFF) {
6875		pci_wake_from_d3(pdev, wake);
6876		pci_set_power_state(pdev, PCI_D3hot);
6877	}
6878}
6879
6880#ifdef CONFIG_NET_POLL_CONTROLLER
6881/*
6882 * Polling 'interrupt' - used by things like netconsole to send skbs
6883 * without having to re-enable interrupts. It's not called while
6884 * the interrupt routine is executing.
6885 */
6886static void igb_netpoll(struct net_device *netdev)
6887{
6888	struct igb_adapter *adapter = netdev_priv(netdev);
6889	struct e1000_hw *hw = &adapter->hw;
6890	struct igb_q_vector *q_vector;
6891	int i;
6892
6893	for (i = 0; i < adapter->num_q_vectors; i++) {
6894		q_vector = adapter->q_vector[i];
6895		if (adapter->msix_entries)
6896			wr32(E1000_EIMC, q_vector->eims_value);
6897		else
6898			igb_irq_disable(adapter);
6899		napi_schedule(&q_vector->napi);
6900	}
6901}
6902#endif /* CONFIG_NET_POLL_CONTROLLER */
6903
6904/**
6905 * igb_io_error_detected - called when PCI error is detected
6906 * @pdev: Pointer to PCI device
6907 * @state: The current pci connection state
6908 *
6909 * This function is called after a PCI bus error affecting
6910 * this device has been detected.
6911 */
6912static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
6913					      pci_channel_state_t state)
6914{
6915	struct net_device *netdev = pci_get_drvdata(pdev);
6916	struct igb_adapter *adapter = netdev_priv(netdev);
6917
6918	netif_device_detach(netdev);
6919
6920	if (state == pci_channel_io_perm_failure)
6921		return PCI_ERS_RESULT_DISCONNECT;
6922
6923	if (netif_running(netdev))
6924		igb_down(adapter);
6925	pci_disable_device(pdev);
6926
6927	/* Request a slot slot reset. */
6928	return PCI_ERS_RESULT_NEED_RESET;
6929}
6930
6931/**
6932 * igb_io_slot_reset - called after the pci bus has been reset.
6933 * @pdev: Pointer to PCI device
6934 *
6935 * Restart the card from scratch, as if from a cold-boot. Implementation
6936 * resembles the first-half of the igb_resume routine.
6937 */
6938static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
6939{
6940	struct net_device *netdev = pci_get_drvdata(pdev);
6941	struct igb_adapter *adapter = netdev_priv(netdev);
6942	struct e1000_hw *hw = &adapter->hw;
6943	pci_ers_result_t result;
6944	int err;
6945
6946	if (pci_enable_device_mem(pdev)) {
6947		dev_err(&pdev->dev,
6948			"Cannot re-enable PCI device after reset.\n");
6949		result = PCI_ERS_RESULT_DISCONNECT;
6950	} else {
6951		pci_set_master(pdev);
6952		pci_restore_state(pdev);
6953		pci_save_state(pdev);
6954
6955		pci_enable_wake(pdev, PCI_D3hot, 0);
6956		pci_enable_wake(pdev, PCI_D3cold, 0);
6957
6958		igb_reset(adapter);
6959		wr32(E1000_WUS, ~0);
6960		result = PCI_ERS_RESULT_RECOVERED;
6961	}
6962
6963	err = pci_cleanup_aer_uncorrect_error_status(pdev);
6964	if (err) {
6965		dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
6966		        "failed 0x%0x\n", err);
6967		/* non-fatal, continue */
6968	}
6969
6970	return result;
6971}
6972
6973/**
6974 * igb_io_resume - called when traffic can start flowing again.
6975 * @pdev: Pointer to PCI device
6976 *
6977 * This callback is called when the error recovery driver tells us that
6978 * its OK to resume normal operation. Implementation resembles the
6979 * second-half of the igb_resume routine.
6980 */
6981static void igb_io_resume(struct pci_dev *pdev)
6982{
6983	struct net_device *netdev = pci_get_drvdata(pdev);
6984	struct igb_adapter *adapter = netdev_priv(netdev);
6985
6986	if (netif_running(netdev)) {
6987		if (igb_up(adapter)) {
6988			dev_err(&pdev->dev, "igb_up failed after reset\n");
6989			return;
6990		}
6991	}
6992
6993	netif_device_attach(netdev);
6994
6995	/* let the f/w know that the h/w is now under the control of the
6996	 * driver. */
6997	igb_get_hw_control(adapter);
6998}
6999
7000static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
7001                             u8 qsel)
7002{
7003	u32 rar_low, rar_high;
7004	struct e1000_hw *hw = &adapter->hw;
7005
7006	/* HW expects these in little endian so we reverse the byte order
7007	 * from network order (big endian) to little endian
7008	 */
7009	rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
7010	          ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
7011	rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
7012
7013	/* Indicate to hardware the Address is Valid. */
7014	rar_high |= E1000_RAH_AV;
7015
7016	if (hw->mac.type == e1000_82575)
7017		rar_high |= E1000_RAH_POOL_1 * qsel;
7018	else
7019		rar_high |= E1000_RAH_POOL_1 << qsel;
7020
7021	wr32(E1000_RAL(index), rar_low);
7022	wrfl();
7023	wr32(E1000_RAH(index), rar_high);
7024	wrfl();
7025}
7026
7027static int igb_set_vf_mac(struct igb_adapter *adapter,
7028                          int vf, unsigned char *mac_addr)
7029{
7030	struct e1000_hw *hw = &adapter->hw;
7031	/* VF MAC addresses start at end of receive addresses and moves
7032	 * torwards the first, as a result a collision should not be possible */
7033	int rar_entry = hw->mac.rar_entry_count - (vf + 1);
7034
7035	memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
7036
7037	igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
7038
7039	return 0;
7040}
7041
7042static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
7043{
7044	struct igb_adapter *adapter = netdev_priv(netdev);
7045	if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
7046		return -EINVAL;
7047	adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
7048	dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
7049	dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
7050				      " change effective.");
7051	if (test_bit(__IGB_DOWN, &adapter->state)) {
7052		dev_warn(&adapter->pdev->dev, "The VF MAC address has been set,"
7053			 " but the PF device is not up.\n");
7054		dev_warn(&adapter->pdev->dev, "Bring the PF device up before"
7055			 " attempting to use the VF device.\n");
7056	}
7057	return igb_set_vf_mac(adapter, vf, mac);
7058}
7059
7060static int igb_link_mbps(int internal_link_speed)
7061{
7062	switch (internal_link_speed) {
7063	case SPEED_100:
7064		return 100;
7065	case SPEED_1000:
7066		return 1000;
7067	default:
7068		return 0;
7069	}
7070}
7071
7072static void igb_set_vf_rate_limit(struct e1000_hw *hw, int vf, int tx_rate,
7073				  int link_speed)
7074{
7075	int rf_dec, rf_int;
7076	u32 bcnrc_val;
7077
7078	if (tx_rate != 0) {
7079		/* Calculate the rate factor values to set */
7080		rf_int = link_speed / tx_rate;
7081		rf_dec = (link_speed - (rf_int * tx_rate));
7082		rf_dec = (rf_dec * (1<<E1000_RTTBCNRC_RF_INT_SHIFT)) / tx_rate;
7083
7084		bcnrc_val = E1000_RTTBCNRC_RS_ENA;
7085		bcnrc_val |= ((rf_int<<E1000_RTTBCNRC_RF_INT_SHIFT) &
7086		               E1000_RTTBCNRC_RF_INT_MASK);
7087		bcnrc_val |= (rf_dec & E1000_RTTBCNRC_RF_DEC_MASK);
7088	} else {
7089		bcnrc_val = 0;
7090	}
7091
7092	wr32(E1000_RTTDQSEL, vf); /* vf X uses queue X */
7093	wr32(E1000_RTTBCNRC, bcnrc_val);
7094}
7095
7096static void igb_check_vf_rate_limit(struct igb_adapter *adapter)
7097{
7098	int actual_link_speed, i;
7099	bool reset_rate = false;
7100
7101	/* VF TX rate limit was not set or not supported */
7102	if ((adapter->vf_rate_link_speed == 0) ||
7103	    (adapter->hw.mac.type != e1000_82576))
7104		return;
7105
7106	actual_link_speed = igb_link_mbps(adapter->link_speed);
7107	if (actual_link_speed != adapter->vf_rate_link_speed) {
7108		reset_rate = true;
7109		adapter->vf_rate_link_speed = 0;
7110		dev_info(&adapter->pdev->dev,
7111		         "Link speed has been changed. VF Transmit "
7112		         "rate is disabled\n");
7113	}
7114
7115	for (i = 0; i < adapter->vfs_allocated_count; i++) {
7116		if (reset_rate)
7117			adapter->vf_data[i].tx_rate = 0;
7118
7119		igb_set_vf_rate_limit(&adapter->hw, i,
7120		                      adapter->vf_data[i].tx_rate,
7121		                      actual_link_speed);
7122	}
7123}
7124
7125static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
7126{
7127	struct igb_adapter *adapter = netdev_priv(netdev);
7128	struct e1000_hw *hw = &adapter->hw;
7129	int actual_link_speed;
7130
7131	if (hw->mac.type != e1000_82576)
7132		return -EOPNOTSUPP;
7133
7134	actual_link_speed = igb_link_mbps(adapter->link_speed);
7135	if ((vf >= adapter->vfs_allocated_count) ||
7136	    (!(rd32(E1000_STATUS) & E1000_STATUS_LU)) ||
7137	    (tx_rate < 0) || (tx_rate > actual_link_speed))
7138		return -EINVAL;
7139
7140	adapter->vf_rate_link_speed = actual_link_speed;
7141	adapter->vf_data[vf].tx_rate = (u16)tx_rate;
7142	igb_set_vf_rate_limit(hw, vf, tx_rate, actual_link_speed);
7143
7144	return 0;
7145}
7146
7147static int igb_ndo_get_vf_config(struct net_device *netdev,
7148				 int vf, struct ifla_vf_info *ivi)
7149{
7150	struct igb_adapter *adapter = netdev_priv(netdev);
7151	if (vf >= adapter->vfs_allocated_count)
7152		return -EINVAL;
7153	ivi->vf = vf;
7154	memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
7155	ivi->tx_rate = adapter->vf_data[vf].tx_rate;
7156	ivi->vlan = adapter->vf_data[vf].pf_vlan;
7157	ivi->qos = adapter->vf_data[vf].pf_qos;
7158	return 0;
7159}
7160
7161static void igb_vmm_control(struct igb_adapter *adapter)
7162{
7163	struct e1000_hw *hw = &adapter->hw;
7164	u32 reg;
7165
7166	switch (hw->mac.type) {
7167	case e1000_82575:
7168	default:
7169		/* replication is not supported for 82575 */
7170		return;
7171	case e1000_82576:
7172		/* notify HW that the MAC is adding vlan tags */
7173		reg = rd32(E1000_DTXCTL);
7174		reg |= E1000_DTXCTL_VLAN_ADDED;
7175		wr32(E1000_DTXCTL, reg);
7176	case e1000_82580:
7177		/* enable replication vlan tag stripping */
7178		reg = rd32(E1000_RPLOLR);
7179		reg |= E1000_RPLOLR_STRVLAN;
7180		wr32(E1000_RPLOLR, reg);
7181	case e1000_i350:
7182		/* none of the above registers are supported by i350 */
7183		break;
7184	}
7185
7186	if (adapter->vfs_allocated_count) {
7187		igb_vmdq_set_loopback_pf(hw, true);
7188		igb_vmdq_set_replication_pf(hw, true);
7189		igb_vmdq_set_anti_spoofing_pf(hw, true,
7190						adapter->vfs_allocated_count);
7191	} else {
7192		igb_vmdq_set_loopback_pf(hw, false);
7193		igb_vmdq_set_replication_pf(hw, false);
7194	}
7195}
7196
7197static void igb_init_dmac(struct igb_adapter *adapter, u32 pba)
7198{
7199	struct e1000_hw *hw = &adapter->hw;
7200	u32 dmac_thr;
7201	u16 hwm;
7202
7203	if (hw->mac.type > e1000_82580) {
7204		if (adapter->flags & IGB_FLAG_DMAC) {
7205			u32 reg;
7206
7207			/* force threshold to 0. */
7208			wr32(E1000_DMCTXTH, 0);
7209
7210			/*
7211			 * DMA Coalescing high water mark needs to be greater
7212			 * than the Rx threshold. Set hwm to PBA - max frame
7213			 * size in 16B units, capping it at PBA - 6KB.
7214			 */
7215			hwm = 64 * pba - adapter->max_frame_size / 16;
7216			if (hwm < 64 * (pba - 6))
7217				hwm = 64 * (pba - 6);
7218			reg = rd32(E1000_FCRTC);
7219			reg &= ~E1000_FCRTC_RTH_COAL_MASK;
7220			reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT)
7221				& E1000_FCRTC_RTH_COAL_MASK);
7222			wr32(E1000_FCRTC, reg);
7223
7224			/*
7225			 * Set the DMA Coalescing Rx threshold to PBA - 2 * max
7226			 * frame size, capping it at PBA - 10KB.
7227			 */
7228			dmac_thr = pba - adapter->max_frame_size / 512;
7229			if (dmac_thr < pba - 10)
7230				dmac_thr = pba - 10;
7231			reg = rd32(E1000_DMACR);
7232			reg &= ~E1000_DMACR_DMACTHR_MASK;
7233			reg |= ((dmac_thr << E1000_DMACR_DMACTHR_SHIFT)
7234				& E1000_DMACR_DMACTHR_MASK);
7235
7236			/* transition to L0x or L1 if available..*/
7237			reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
7238
7239			/* watchdog timer= +-1000 usec in 32usec intervals */
7240			reg |= (1000 >> 5);
7241			wr32(E1000_DMACR, reg);
7242
7243			/*
7244			 * no lower threshold to disable
7245			 * coalescing(smart fifb)-UTRESH=0
7246			 */
7247			wr32(E1000_DMCRTRH, 0);
7248
7249			reg = (IGB_DMCTLX_DCFLUSH_DIS | 0x4);
7250
7251			wr32(E1000_DMCTLX, reg);
7252
7253			/*
7254			 * free space in tx packet buffer to wake from
7255			 * DMA coal
7256			 */
7257			wr32(E1000_DMCTXTH, (IGB_MIN_TXPBSIZE -
7258			     (IGB_TX_BUF_4096 + adapter->max_frame_size)) >> 6);
7259
7260			/*
7261			 * make low power state decision controlled
7262			 * by DMA coal
7263			 */
7264			reg = rd32(E1000_PCIEMISC);
7265			reg &= ~E1000_PCIEMISC_LX_DECISION;
7266			wr32(E1000_PCIEMISC, reg);
7267		} /* endif adapter->dmac is not disabled */
7268	} else if (hw->mac.type == e1000_82580) {
7269		u32 reg = rd32(E1000_PCIEMISC);
7270		wr32(E1000_PCIEMISC, reg & ~E1000_PCIEMISC_LX_DECISION);
7271		wr32(E1000_DMACR, 0);
7272	}
7273}
7274
7275/* igb_main.c */
7276