igb_main.c revision 3dbdf96928dcdece134113c8ffa137f1a3b5dd88
1/*******************************************************************************
2
3  Intel(R) Gigabit Ethernet Linux driver
4  Copyright(c) 2007-2012 Intel Corporation.
5
6  This program is free software; you can redistribute it and/or modify it
7  under the terms and conditions of the GNU General Public License,
8  version 2, as published by the Free Software Foundation.
9
10  This program is distributed in the hope it will be useful, but WITHOUT
11  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13  more details.
14
15  You should have received a copy of the GNU General Public License along with
16  this program; if not, write to the Free Software Foundation, Inc.,
17  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18
19  The full GNU General Public License is included in this distribution in
20  the file called "COPYING".
21
22  Contact Information:
23  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
25
26*******************************************************************************/
27
28#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
29
30#include <linux/module.h>
31#include <linux/types.h>
32#include <linux/init.h>
33#include <linux/bitops.h>
34#include <linux/vmalloc.h>
35#include <linux/pagemap.h>
36#include <linux/netdevice.h>
37#include <linux/ipv6.h>
38#include <linux/slab.h>
39#include <net/checksum.h>
40#include <net/ip6_checksum.h>
41#include <linux/net_tstamp.h>
42#include <linux/mii.h>
43#include <linux/ethtool.h>
44#include <linux/if.h>
45#include <linux/if_vlan.h>
46#include <linux/pci.h>
47#include <linux/pci-aspm.h>
48#include <linux/delay.h>
49#include <linux/interrupt.h>
50#include <linux/ip.h>
51#include <linux/tcp.h>
52#include <linux/sctp.h>
53#include <linux/if_ether.h>
54#include <linux/aer.h>
55#include <linux/prefetch.h>
56#include <linux/pm_runtime.h>
57#ifdef CONFIG_IGB_DCA
58#include <linux/dca.h>
59#endif
60#include "igb.h"
61
62#define MAJ 4
63#define MIN 0
64#define BUILD 1
65#define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \
66__stringify(BUILD) "-k"
67char igb_driver_name[] = "igb";
68char igb_driver_version[] = DRV_VERSION;
69static const char igb_driver_string[] =
70				"Intel(R) Gigabit Ethernet Network Driver";
71static const char igb_copyright[] = "Copyright (c) 2007-2012 Intel Corporation.";
72
73static const struct e1000_info *igb_info_tbl[] = {
74	[board_82575] = &e1000_82575_info,
75};
76
77static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
78	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I211_COPPER), board_82575 },
79	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_COPPER), board_82575 },
80	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_FIBER), board_82575 },
81	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_SERDES), board_82575 },
82	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_SGMII), board_82575 },
83	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER), board_82575 },
84	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER), board_82575 },
85	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES), board_82575 },
86	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII), board_82575 },
87	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
88	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
89	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_QUAD_FIBER), board_82575 },
90	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
91	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
92	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
93	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SGMII), board_82575 },
94	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SERDES), board_82575 },
95	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_BACKPLANE), board_82575 },
96	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SFP), board_82575 },
97	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
98	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
99	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
100	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
101	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
102	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
103	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2), board_82575 },
104	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
105	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
106	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
107	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
108	/* required last entry */
109	{0, }
110};
111
112MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
113
114void igb_reset(struct igb_adapter *);
115static int igb_setup_all_tx_resources(struct igb_adapter *);
116static int igb_setup_all_rx_resources(struct igb_adapter *);
117static void igb_free_all_tx_resources(struct igb_adapter *);
118static void igb_free_all_rx_resources(struct igb_adapter *);
119static void igb_setup_mrqc(struct igb_adapter *);
120static int igb_probe(struct pci_dev *, const struct pci_device_id *);
121static void __devexit igb_remove(struct pci_dev *pdev);
122static int igb_sw_init(struct igb_adapter *);
123static int igb_open(struct net_device *);
124static int igb_close(struct net_device *);
125static void igb_configure_tx(struct igb_adapter *);
126static void igb_configure_rx(struct igb_adapter *);
127static void igb_clean_all_tx_rings(struct igb_adapter *);
128static void igb_clean_all_rx_rings(struct igb_adapter *);
129static void igb_clean_tx_ring(struct igb_ring *);
130static void igb_clean_rx_ring(struct igb_ring *);
131static void igb_set_rx_mode(struct net_device *);
132static void igb_update_phy_info(unsigned long);
133static void igb_watchdog(unsigned long);
134static void igb_watchdog_task(struct work_struct *);
135static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, struct net_device *);
136static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *dev,
137						 struct rtnl_link_stats64 *stats);
138static int igb_change_mtu(struct net_device *, int);
139static int igb_set_mac(struct net_device *, void *);
140static void igb_set_uta(struct igb_adapter *adapter);
141static irqreturn_t igb_intr(int irq, void *);
142static irqreturn_t igb_intr_msi(int irq, void *);
143static irqreturn_t igb_msix_other(int irq, void *);
144static irqreturn_t igb_msix_ring(int irq, void *);
145#ifdef CONFIG_IGB_DCA
146static void igb_update_dca(struct igb_q_vector *);
147static void igb_setup_dca(struct igb_adapter *);
148#endif /* CONFIG_IGB_DCA */
149static int igb_poll(struct napi_struct *, int);
150static bool igb_clean_tx_irq(struct igb_q_vector *);
151static bool igb_clean_rx_irq(struct igb_q_vector *, int);
152static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
153static void igb_tx_timeout(struct net_device *);
154static void igb_reset_task(struct work_struct *);
155static void igb_vlan_mode(struct net_device *netdev, netdev_features_t features);
156static int igb_vlan_rx_add_vid(struct net_device *, u16);
157static int igb_vlan_rx_kill_vid(struct net_device *, u16);
158static void igb_restore_vlan(struct igb_adapter *);
159static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
160static void igb_ping_all_vfs(struct igb_adapter *);
161static void igb_msg_task(struct igb_adapter *);
162static void igb_vmm_control(struct igb_adapter *);
163static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
164static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
165static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
166static int igb_ndo_set_vf_vlan(struct net_device *netdev,
167			       int vf, u16 vlan, u8 qos);
168static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
169static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
170				 struct ifla_vf_info *ivi);
171static void igb_check_vf_rate_limit(struct igb_adapter *);
172
173#ifdef CONFIG_PCI_IOV
174static int igb_vf_configure(struct igb_adapter *adapter, int vf);
175static bool igb_vfs_are_assigned(struct igb_adapter *adapter);
176#endif
177
178#ifdef CONFIG_PM
179#ifdef CONFIG_PM_SLEEP
180static int igb_suspend(struct device *);
181#endif
182static int igb_resume(struct device *);
183#ifdef CONFIG_PM_RUNTIME
184static int igb_runtime_suspend(struct device *dev);
185static int igb_runtime_resume(struct device *dev);
186static int igb_runtime_idle(struct device *dev);
187#endif
188static const struct dev_pm_ops igb_pm_ops = {
189	SET_SYSTEM_SLEEP_PM_OPS(igb_suspend, igb_resume)
190	SET_RUNTIME_PM_OPS(igb_runtime_suspend, igb_runtime_resume,
191			igb_runtime_idle)
192};
193#endif
194static void igb_shutdown(struct pci_dev *);
195#ifdef CONFIG_IGB_DCA
196static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
197static struct notifier_block dca_notifier = {
198	.notifier_call	= igb_notify_dca,
199	.next		= NULL,
200	.priority	= 0
201};
202#endif
203#ifdef CONFIG_NET_POLL_CONTROLLER
204/* for netdump / net console */
205static void igb_netpoll(struct net_device *);
206#endif
207#ifdef CONFIG_PCI_IOV
208static unsigned int max_vfs = 0;
209module_param(max_vfs, uint, 0);
210MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
211                 "per physical function");
212#endif /* CONFIG_PCI_IOV */
213
214static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
215		     pci_channel_state_t);
216static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
217static void igb_io_resume(struct pci_dev *);
218
219static struct pci_error_handlers igb_err_handler = {
220	.error_detected = igb_io_error_detected,
221	.slot_reset = igb_io_slot_reset,
222	.resume = igb_io_resume,
223};
224
225static void igb_init_dmac(struct igb_adapter *adapter, u32 pba);
226
227static struct pci_driver igb_driver = {
228	.name     = igb_driver_name,
229	.id_table = igb_pci_tbl,
230	.probe    = igb_probe,
231	.remove   = __devexit_p(igb_remove),
232#ifdef CONFIG_PM
233	.driver.pm = &igb_pm_ops,
234#endif
235	.shutdown = igb_shutdown,
236	.err_handler = &igb_err_handler
237};
238
239MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
240MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
241MODULE_LICENSE("GPL");
242MODULE_VERSION(DRV_VERSION);
243
244#define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV|NETIF_MSG_PROBE|NETIF_MSG_LINK)
245static int debug = -1;
246module_param(debug, int, 0);
247MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
248
249struct igb_reg_info {
250	u32 ofs;
251	char *name;
252};
253
254static const struct igb_reg_info igb_reg_info_tbl[] = {
255
256	/* General Registers */
257	{E1000_CTRL, "CTRL"},
258	{E1000_STATUS, "STATUS"},
259	{E1000_CTRL_EXT, "CTRL_EXT"},
260
261	/* Interrupt Registers */
262	{E1000_ICR, "ICR"},
263
264	/* RX Registers */
265	{E1000_RCTL, "RCTL"},
266	{E1000_RDLEN(0), "RDLEN"},
267	{E1000_RDH(0), "RDH"},
268	{E1000_RDT(0), "RDT"},
269	{E1000_RXDCTL(0), "RXDCTL"},
270	{E1000_RDBAL(0), "RDBAL"},
271	{E1000_RDBAH(0), "RDBAH"},
272
273	/* TX Registers */
274	{E1000_TCTL, "TCTL"},
275	{E1000_TDBAL(0), "TDBAL"},
276	{E1000_TDBAH(0), "TDBAH"},
277	{E1000_TDLEN(0), "TDLEN"},
278	{E1000_TDH(0), "TDH"},
279	{E1000_TDT(0), "TDT"},
280	{E1000_TXDCTL(0), "TXDCTL"},
281	{E1000_TDFH, "TDFH"},
282	{E1000_TDFT, "TDFT"},
283	{E1000_TDFHS, "TDFHS"},
284	{E1000_TDFPC, "TDFPC"},
285
286	/* List Terminator */
287	{}
288};
289
290/*
291 * igb_regdump - register printout routine
292 */
293static void igb_regdump(struct e1000_hw *hw, struct igb_reg_info *reginfo)
294{
295	int n = 0;
296	char rname[16];
297	u32 regs[8];
298
299	switch (reginfo->ofs) {
300	case E1000_RDLEN(0):
301		for (n = 0; n < 4; n++)
302			regs[n] = rd32(E1000_RDLEN(n));
303		break;
304	case E1000_RDH(0):
305		for (n = 0; n < 4; n++)
306			regs[n] = rd32(E1000_RDH(n));
307		break;
308	case E1000_RDT(0):
309		for (n = 0; n < 4; n++)
310			regs[n] = rd32(E1000_RDT(n));
311		break;
312	case E1000_RXDCTL(0):
313		for (n = 0; n < 4; n++)
314			regs[n] = rd32(E1000_RXDCTL(n));
315		break;
316	case E1000_RDBAL(0):
317		for (n = 0; n < 4; n++)
318			regs[n] = rd32(E1000_RDBAL(n));
319		break;
320	case E1000_RDBAH(0):
321		for (n = 0; n < 4; n++)
322			regs[n] = rd32(E1000_RDBAH(n));
323		break;
324	case E1000_TDBAL(0):
325		for (n = 0; n < 4; n++)
326			regs[n] = rd32(E1000_RDBAL(n));
327		break;
328	case E1000_TDBAH(0):
329		for (n = 0; n < 4; n++)
330			regs[n] = rd32(E1000_TDBAH(n));
331		break;
332	case E1000_TDLEN(0):
333		for (n = 0; n < 4; n++)
334			regs[n] = rd32(E1000_TDLEN(n));
335		break;
336	case E1000_TDH(0):
337		for (n = 0; n < 4; n++)
338			regs[n] = rd32(E1000_TDH(n));
339		break;
340	case E1000_TDT(0):
341		for (n = 0; n < 4; n++)
342			regs[n] = rd32(E1000_TDT(n));
343		break;
344	case E1000_TXDCTL(0):
345		for (n = 0; n < 4; n++)
346			regs[n] = rd32(E1000_TXDCTL(n));
347		break;
348	default:
349		pr_info("%-15s %08x\n", reginfo->name, rd32(reginfo->ofs));
350		return;
351	}
352
353	snprintf(rname, 16, "%s%s", reginfo->name, "[0-3]");
354	pr_info("%-15s %08x %08x %08x %08x\n", rname, regs[0], regs[1],
355		regs[2], regs[3]);
356}
357
358/*
359 * igb_dump - Print registers, tx-rings and rx-rings
360 */
361static void igb_dump(struct igb_adapter *adapter)
362{
363	struct net_device *netdev = adapter->netdev;
364	struct e1000_hw *hw = &adapter->hw;
365	struct igb_reg_info *reginfo;
366	struct igb_ring *tx_ring;
367	union e1000_adv_tx_desc *tx_desc;
368	struct my_u0 { u64 a; u64 b; } *u0;
369	struct igb_ring *rx_ring;
370	union e1000_adv_rx_desc *rx_desc;
371	u32 staterr;
372	u16 i, n;
373
374	if (!netif_msg_hw(adapter))
375		return;
376
377	/* Print netdevice Info */
378	if (netdev) {
379		dev_info(&adapter->pdev->dev, "Net device Info\n");
380		pr_info("Device Name     state            trans_start      "
381			"last_rx\n");
382		pr_info("%-15s %016lX %016lX %016lX\n", netdev->name,
383			netdev->state, netdev->trans_start, netdev->last_rx);
384	}
385
386	/* Print Registers */
387	dev_info(&adapter->pdev->dev, "Register Dump\n");
388	pr_info(" Register Name   Value\n");
389	for (reginfo = (struct igb_reg_info *)igb_reg_info_tbl;
390	     reginfo->name; reginfo++) {
391		igb_regdump(hw, reginfo);
392	}
393
394	/* Print TX Ring Summary */
395	if (!netdev || !netif_running(netdev))
396		goto exit;
397
398	dev_info(&adapter->pdev->dev, "TX Rings Summary\n");
399	pr_info("Queue [NTU] [NTC] [bi(ntc)->dma  ] leng ntw timestamp\n");
400	for (n = 0; n < adapter->num_tx_queues; n++) {
401		struct igb_tx_buffer *buffer_info;
402		tx_ring = adapter->tx_ring[n];
403		buffer_info = &tx_ring->tx_buffer_info[tx_ring->next_to_clean];
404		pr_info(" %5d %5X %5X %016llX %04X %p %016llX\n",
405			n, tx_ring->next_to_use, tx_ring->next_to_clean,
406			(u64)buffer_info->dma,
407			buffer_info->length,
408			buffer_info->next_to_watch,
409			(u64)buffer_info->time_stamp);
410	}
411
412	/* Print TX Rings */
413	if (!netif_msg_tx_done(adapter))
414		goto rx_ring_summary;
415
416	dev_info(&adapter->pdev->dev, "TX Rings Dump\n");
417
418	/* Transmit Descriptor Formats
419	 *
420	 * Advanced Transmit Descriptor
421	 *   +--------------------------------------------------------------+
422	 * 0 |         Buffer Address [63:0]                                |
423	 *   +--------------------------------------------------------------+
424	 * 8 | PAYLEN  | PORTS  |CC|IDX | STA | DCMD  |DTYP|MAC|RSV| DTALEN |
425	 *   +--------------------------------------------------------------+
426	 *   63      46 45    40 39 38 36 35 32 31   24             15       0
427	 */
428
429	for (n = 0; n < adapter->num_tx_queues; n++) {
430		tx_ring = adapter->tx_ring[n];
431		pr_info("------------------------------------\n");
432		pr_info("TX QUEUE INDEX = %d\n", tx_ring->queue_index);
433		pr_info("------------------------------------\n");
434		pr_info("T [desc]     [address 63:0  ] [PlPOCIStDDM Ln] "
435			"[bi->dma       ] leng  ntw timestamp        "
436			"bi->skb\n");
437
438		for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) {
439			const char *next_desc;
440			struct igb_tx_buffer *buffer_info;
441			tx_desc = IGB_TX_DESC(tx_ring, i);
442			buffer_info = &tx_ring->tx_buffer_info[i];
443			u0 = (struct my_u0 *)tx_desc;
444			if (i == tx_ring->next_to_use &&
445			    i == tx_ring->next_to_clean)
446				next_desc = " NTC/U";
447			else if (i == tx_ring->next_to_use)
448				next_desc = " NTU";
449			else if (i == tx_ring->next_to_clean)
450				next_desc = " NTC";
451			else
452				next_desc = "";
453
454			pr_info("T [0x%03X]    %016llX %016llX %016llX"
455				" %04X  %p %016llX %p%s\n", i,
456				le64_to_cpu(u0->a),
457				le64_to_cpu(u0->b),
458				(u64)buffer_info->dma,
459				buffer_info->length,
460				buffer_info->next_to_watch,
461				(u64)buffer_info->time_stamp,
462				buffer_info->skb, next_desc);
463
464			if (netif_msg_pktdata(adapter) && buffer_info->skb)
465				print_hex_dump(KERN_INFO, "",
466					DUMP_PREFIX_ADDRESS,
467					16, 1, buffer_info->skb->data,
468					buffer_info->length, true);
469		}
470	}
471
472	/* Print RX Rings Summary */
473rx_ring_summary:
474	dev_info(&adapter->pdev->dev, "RX Rings Summary\n");
475	pr_info("Queue [NTU] [NTC]\n");
476	for (n = 0; n < adapter->num_rx_queues; n++) {
477		rx_ring = adapter->rx_ring[n];
478		pr_info(" %5d %5X %5X\n",
479			n, rx_ring->next_to_use, rx_ring->next_to_clean);
480	}
481
482	/* Print RX Rings */
483	if (!netif_msg_rx_status(adapter))
484		goto exit;
485
486	dev_info(&adapter->pdev->dev, "RX Rings Dump\n");
487
488	/* Advanced Receive Descriptor (Read) Format
489	 *    63                                           1        0
490	 *    +-----------------------------------------------------+
491	 *  0 |       Packet Buffer Address [63:1]           |A0/NSE|
492	 *    +----------------------------------------------+------+
493	 *  8 |       Header Buffer Address [63:1]           |  DD  |
494	 *    +-----------------------------------------------------+
495	 *
496	 *
497	 * Advanced Receive Descriptor (Write-Back) Format
498	 *
499	 *   63       48 47    32 31  30      21 20 17 16   4 3     0
500	 *   +------------------------------------------------------+
501	 * 0 | Packet     IP     |SPH| HDR_LEN   | RSV|Packet|  RSS |
502	 *   | Checksum   Ident  |   |           |    | Type | Type |
503	 *   +------------------------------------------------------+
504	 * 8 | VLAN Tag | Length | Extended Error | Extended Status |
505	 *   +------------------------------------------------------+
506	 *   63       48 47    32 31            20 19               0
507	 */
508
509	for (n = 0; n < adapter->num_rx_queues; n++) {
510		rx_ring = adapter->rx_ring[n];
511		pr_info("------------------------------------\n");
512		pr_info("RX QUEUE INDEX = %d\n", rx_ring->queue_index);
513		pr_info("------------------------------------\n");
514		pr_info("R  [desc]      [ PktBuf     A0] [  HeadBuf   DD] "
515			"[bi->dma       ] [bi->skb] <-- Adv Rx Read format\n");
516		pr_info("RWB[desc]      [PcsmIpSHl PtRs] [vl er S cks ln] -----"
517			"----------- [bi->skb] <-- Adv Rx Write-Back format\n");
518
519		for (i = 0; i < rx_ring->count; i++) {
520			const char *next_desc;
521			struct igb_rx_buffer *buffer_info;
522			buffer_info = &rx_ring->rx_buffer_info[i];
523			rx_desc = IGB_RX_DESC(rx_ring, i);
524			u0 = (struct my_u0 *)rx_desc;
525			staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
526
527			if (i == rx_ring->next_to_use)
528				next_desc = " NTU";
529			else if (i == rx_ring->next_to_clean)
530				next_desc = " NTC";
531			else
532				next_desc = "";
533
534			if (staterr & E1000_RXD_STAT_DD) {
535				/* Descriptor Done */
536				pr_info("%s[0x%03X]     %016llX %016llX -------"
537					"--------- %p%s\n", "RWB", i,
538					le64_to_cpu(u0->a),
539					le64_to_cpu(u0->b),
540					buffer_info->skb, next_desc);
541			} else {
542				pr_info("%s[0x%03X]     %016llX %016llX %016llX"
543					" %p%s\n", "R  ", i,
544					le64_to_cpu(u0->a),
545					le64_to_cpu(u0->b),
546					(u64)buffer_info->dma,
547					buffer_info->skb, next_desc);
548
549				if (netif_msg_pktdata(adapter) &&
550				    buffer_info->dma && buffer_info->skb) {
551					print_hex_dump(KERN_INFO, "",
552						  DUMP_PREFIX_ADDRESS,
553						  16, 1, buffer_info->skb->data,
554						  IGB_RX_HDR_LEN, true);
555					print_hex_dump(KERN_INFO, "",
556					  DUMP_PREFIX_ADDRESS,
557					  16, 1,
558					  page_address(buffer_info->page) +
559						      buffer_info->page_offset,
560					  PAGE_SIZE/2, true);
561				}
562			}
563		}
564	}
565
566exit:
567	return;
568}
569
570/**
571 * igb_get_hw_dev - return device
572 * used by hardware layer to print debugging information
573 **/
574struct net_device *igb_get_hw_dev(struct e1000_hw *hw)
575{
576	struct igb_adapter *adapter = hw->back;
577	return adapter->netdev;
578}
579
580/**
581 * igb_init_module - Driver Registration Routine
582 *
583 * igb_init_module is the first routine called when the driver is
584 * loaded. All it does is register with the PCI subsystem.
585 **/
586static int __init igb_init_module(void)
587{
588	int ret;
589	pr_info("%s - version %s\n",
590	       igb_driver_string, igb_driver_version);
591
592	pr_info("%s\n", igb_copyright);
593
594#ifdef CONFIG_IGB_DCA
595	dca_register_notify(&dca_notifier);
596#endif
597	ret = pci_register_driver(&igb_driver);
598	return ret;
599}
600
601module_init(igb_init_module);
602
603/**
604 * igb_exit_module - Driver Exit Cleanup Routine
605 *
606 * igb_exit_module is called just before the driver is removed
607 * from memory.
608 **/
609static void __exit igb_exit_module(void)
610{
611#ifdef CONFIG_IGB_DCA
612	dca_unregister_notify(&dca_notifier);
613#endif
614	pci_unregister_driver(&igb_driver);
615}
616
617module_exit(igb_exit_module);
618
619#define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
620/**
621 * igb_cache_ring_register - Descriptor ring to register mapping
622 * @adapter: board private structure to initialize
623 *
624 * Once we know the feature-set enabled for the device, we'll cache
625 * the register offset the descriptor ring is assigned to.
626 **/
627static void igb_cache_ring_register(struct igb_adapter *adapter)
628{
629	int i = 0, j = 0;
630	u32 rbase_offset = adapter->vfs_allocated_count;
631
632	switch (adapter->hw.mac.type) {
633	case e1000_82576:
634		/* The queues are allocated for virtualization such that VF 0
635		 * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
636		 * In order to avoid collision we start at the first free queue
637		 * and continue consuming queues in the same sequence
638		 */
639		if (adapter->vfs_allocated_count) {
640			for (; i < adapter->rss_queues; i++)
641				adapter->rx_ring[i]->reg_idx = rbase_offset +
642				                               Q_IDX_82576(i);
643		}
644	case e1000_82575:
645	case e1000_82580:
646	case e1000_i350:
647	case e1000_i210:
648	case e1000_i211:
649	default:
650		for (; i < adapter->num_rx_queues; i++)
651			adapter->rx_ring[i]->reg_idx = rbase_offset + i;
652		for (; j < adapter->num_tx_queues; j++)
653			adapter->tx_ring[j]->reg_idx = rbase_offset + j;
654		break;
655	}
656}
657
658static void igb_free_queues(struct igb_adapter *adapter)
659{
660	int i;
661
662	for (i = 0; i < adapter->num_tx_queues; i++) {
663		kfree(adapter->tx_ring[i]);
664		adapter->tx_ring[i] = NULL;
665	}
666	for (i = 0; i < adapter->num_rx_queues; i++) {
667		kfree(adapter->rx_ring[i]);
668		adapter->rx_ring[i] = NULL;
669	}
670	adapter->num_rx_queues = 0;
671	adapter->num_tx_queues = 0;
672}
673
674/**
675 * igb_alloc_queues - Allocate memory for all rings
676 * @adapter: board private structure to initialize
677 *
678 * We allocate one ring per queue at run-time since we don't know the
679 * number of queues at compile-time.
680 **/
681static int igb_alloc_queues(struct igb_adapter *adapter)
682{
683	struct igb_ring *ring;
684	int i;
685	int orig_node = adapter->node;
686
687	for (i = 0; i < adapter->num_tx_queues; i++) {
688		if (orig_node == -1) {
689			int cur_node = next_online_node(adapter->node);
690			if (cur_node == MAX_NUMNODES)
691				cur_node = first_online_node;
692			adapter->node = cur_node;
693		}
694		ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL,
695				    adapter->node);
696		if (!ring)
697			ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
698		if (!ring)
699			goto err;
700		ring->count = adapter->tx_ring_count;
701		ring->queue_index = i;
702		ring->dev = &adapter->pdev->dev;
703		ring->netdev = adapter->netdev;
704		ring->numa_node = adapter->node;
705		/* For 82575, context index must be unique per ring. */
706		if (adapter->hw.mac.type == e1000_82575)
707			set_bit(IGB_RING_FLAG_TX_CTX_IDX, &ring->flags);
708		adapter->tx_ring[i] = ring;
709	}
710	/* Restore the adapter's original node */
711	adapter->node = orig_node;
712
713	for (i = 0; i < adapter->num_rx_queues; i++) {
714		if (orig_node == -1) {
715			int cur_node = next_online_node(adapter->node);
716			if (cur_node == MAX_NUMNODES)
717				cur_node = first_online_node;
718			adapter->node = cur_node;
719		}
720		ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL,
721				    adapter->node);
722		if (!ring)
723			ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
724		if (!ring)
725			goto err;
726		ring->count = adapter->rx_ring_count;
727		ring->queue_index = i;
728		ring->dev = &adapter->pdev->dev;
729		ring->netdev = adapter->netdev;
730		ring->numa_node = adapter->node;
731		/* set flag indicating ring supports SCTP checksum offload */
732		if (adapter->hw.mac.type >= e1000_82576)
733			set_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags);
734
735		/*
736		 * On i350, i210, and i211, loopback VLAN packets
737		 * have the tag byte-swapped.
738		 * */
739		if (adapter->hw.mac.type >= e1000_i350)
740			set_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &ring->flags);
741
742		adapter->rx_ring[i] = ring;
743	}
744	/* Restore the adapter's original node */
745	adapter->node = orig_node;
746
747	igb_cache_ring_register(adapter);
748
749	return 0;
750
751err:
752	/* Restore the adapter's original node */
753	adapter->node = orig_node;
754	igb_free_queues(adapter);
755
756	return -ENOMEM;
757}
758
759/**
760 *  igb_write_ivar - configure ivar for given MSI-X vector
761 *  @hw: pointer to the HW structure
762 *  @msix_vector: vector number we are allocating to a given ring
763 *  @index: row index of IVAR register to write within IVAR table
764 *  @offset: column offset of in IVAR, should be multiple of 8
765 *
766 *  This function is intended to handle the writing of the IVAR register
767 *  for adapters 82576 and newer.  The IVAR table consists of 2 columns,
768 *  each containing an cause allocation for an Rx and Tx ring, and a
769 *  variable number of rows depending on the number of queues supported.
770 **/
771static void igb_write_ivar(struct e1000_hw *hw, int msix_vector,
772			   int index, int offset)
773{
774	u32 ivar = array_rd32(E1000_IVAR0, index);
775
776	/* clear any bits that are currently set */
777	ivar &= ~((u32)0xFF << offset);
778
779	/* write vector and valid bit */
780	ivar |= (msix_vector | E1000_IVAR_VALID) << offset;
781
782	array_wr32(E1000_IVAR0, index, ivar);
783}
784
785#define IGB_N0_QUEUE -1
786static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
787{
788	struct igb_adapter *adapter = q_vector->adapter;
789	struct e1000_hw *hw = &adapter->hw;
790	int rx_queue = IGB_N0_QUEUE;
791	int tx_queue = IGB_N0_QUEUE;
792	u32 msixbm = 0;
793
794	if (q_vector->rx.ring)
795		rx_queue = q_vector->rx.ring->reg_idx;
796	if (q_vector->tx.ring)
797		tx_queue = q_vector->tx.ring->reg_idx;
798
799	switch (hw->mac.type) {
800	case e1000_82575:
801		/* The 82575 assigns vectors using a bitmask, which matches the
802		   bitmask for the EICR/EIMS/EIMC registers.  To assign one
803		   or more queues to a vector, we write the appropriate bits
804		   into the MSIXBM register for that vector. */
805		if (rx_queue > IGB_N0_QUEUE)
806			msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
807		if (tx_queue > IGB_N0_QUEUE)
808			msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
809		if (!adapter->msix_entries && msix_vector == 0)
810			msixbm |= E1000_EIMS_OTHER;
811		array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
812		q_vector->eims_value = msixbm;
813		break;
814	case e1000_82576:
815		/*
816		 * 82576 uses a table that essentially consists of 2 columns
817		 * with 8 rows.  The ordering is column-major so we use the
818		 * lower 3 bits as the row index, and the 4th bit as the
819		 * column offset.
820		 */
821		if (rx_queue > IGB_N0_QUEUE)
822			igb_write_ivar(hw, msix_vector,
823				       rx_queue & 0x7,
824				       (rx_queue & 0x8) << 1);
825		if (tx_queue > IGB_N0_QUEUE)
826			igb_write_ivar(hw, msix_vector,
827				       tx_queue & 0x7,
828				       ((tx_queue & 0x8) << 1) + 8);
829		q_vector->eims_value = 1 << msix_vector;
830		break;
831	case e1000_82580:
832	case e1000_i350:
833	case e1000_i210:
834	case e1000_i211:
835		/*
836		 * On 82580 and newer adapters the scheme is similar to 82576
837		 * however instead of ordering column-major we have things
838		 * ordered row-major.  So we traverse the table by using
839		 * bit 0 as the column offset, and the remaining bits as the
840		 * row index.
841		 */
842		if (rx_queue > IGB_N0_QUEUE)
843			igb_write_ivar(hw, msix_vector,
844				       rx_queue >> 1,
845				       (rx_queue & 0x1) << 4);
846		if (tx_queue > IGB_N0_QUEUE)
847			igb_write_ivar(hw, msix_vector,
848				       tx_queue >> 1,
849				       ((tx_queue & 0x1) << 4) + 8);
850		q_vector->eims_value = 1 << msix_vector;
851		break;
852	default:
853		BUG();
854		break;
855	}
856
857	/* add q_vector eims value to global eims_enable_mask */
858	adapter->eims_enable_mask |= q_vector->eims_value;
859
860	/* configure q_vector to set itr on first interrupt */
861	q_vector->set_itr = 1;
862}
863
864/**
865 * igb_configure_msix - Configure MSI-X hardware
866 *
867 * igb_configure_msix sets up the hardware to properly
868 * generate MSI-X interrupts.
869 **/
870static void igb_configure_msix(struct igb_adapter *adapter)
871{
872	u32 tmp;
873	int i, vector = 0;
874	struct e1000_hw *hw = &adapter->hw;
875
876	adapter->eims_enable_mask = 0;
877
878	/* set vector for other causes, i.e. link changes */
879	switch (hw->mac.type) {
880	case e1000_82575:
881		tmp = rd32(E1000_CTRL_EXT);
882		/* enable MSI-X PBA support*/
883		tmp |= E1000_CTRL_EXT_PBA_CLR;
884
885		/* Auto-Mask interrupts upon ICR read. */
886		tmp |= E1000_CTRL_EXT_EIAME;
887		tmp |= E1000_CTRL_EXT_IRCA;
888
889		wr32(E1000_CTRL_EXT, tmp);
890
891		/* enable msix_other interrupt */
892		array_wr32(E1000_MSIXBM(0), vector++,
893		                      E1000_EIMS_OTHER);
894		adapter->eims_other = E1000_EIMS_OTHER;
895
896		break;
897
898	case e1000_82576:
899	case e1000_82580:
900	case e1000_i350:
901	case e1000_i210:
902	case e1000_i211:
903		/* Turn on MSI-X capability first, or our settings
904		 * won't stick.  And it will take days to debug. */
905		wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
906		                E1000_GPIE_PBA | E1000_GPIE_EIAME |
907		                E1000_GPIE_NSICR);
908
909		/* enable msix_other interrupt */
910		adapter->eims_other = 1 << vector;
911		tmp = (vector++ | E1000_IVAR_VALID) << 8;
912
913		wr32(E1000_IVAR_MISC, tmp);
914		break;
915	default:
916		/* do nothing, since nothing else supports MSI-X */
917		break;
918	} /* switch (hw->mac.type) */
919
920	adapter->eims_enable_mask |= adapter->eims_other;
921
922	for (i = 0; i < adapter->num_q_vectors; i++)
923		igb_assign_vector(adapter->q_vector[i], vector++);
924
925	wrfl();
926}
927
928/**
929 * igb_request_msix - Initialize MSI-X interrupts
930 *
931 * igb_request_msix allocates MSI-X vectors and requests interrupts from the
932 * kernel.
933 **/
934static int igb_request_msix(struct igb_adapter *adapter)
935{
936	struct net_device *netdev = adapter->netdev;
937	struct e1000_hw *hw = &adapter->hw;
938	int i, err = 0, vector = 0;
939
940	err = request_irq(adapter->msix_entries[vector].vector,
941	                  igb_msix_other, 0, netdev->name, adapter);
942	if (err)
943		goto out;
944	vector++;
945
946	for (i = 0; i < adapter->num_q_vectors; i++) {
947		struct igb_q_vector *q_vector = adapter->q_vector[i];
948
949		q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
950
951		if (q_vector->rx.ring && q_vector->tx.ring)
952			sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
953				q_vector->rx.ring->queue_index);
954		else if (q_vector->tx.ring)
955			sprintf(q_vector->name, "%s-tx-%u", netdev->name,
956				q_vector->tx.ring->queue_index);
957		else if (q_vector->rx.ring)
958			sprintf(q_vector->name, "%s-rx-%u", netdev->name,
959				q_vector->rx.ring->queue_index);
960		else
961			sprintf(q_vector->name, "%s-unused", netdev->name);
962
963		err = request_irq(adapter->msix_entries[vector].vector,
964		                  igb_msix_ring, 0, q_vector->name,
965		                  q_vector);
966		if (err)
967			goto out;
968		vector++;
969	}
970
971	igb_configure_msix(adapter);
972	return 0;
973out:
974	return err;
975}
976
977static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
978{
979	if (adapter->msix_entries) {
980		pci_disable_msix(adapter->pdev);
981		kfree(adapter->msix_entries);
982		adapter->msix_entries = NULL;
983	} else if (adapter->flags & IGB_FLAG_HAS_MSI) {
984		pci_disable_msi(adapter->pdev);
985	}
986}
987
988/**
989 * igb_free_q_vectors - Free memory allocated for interrupt vectors
990 * @adapter: board private structure to initialize
991 *
992 * This function frees the memory allocated to the q_vectors.  In addition if
993 * NAPI is enabled it will delete any references to the NAPI struct prior
994 * to freeing the q_vector.
995 **/
996static void igb_free_q_vectors(struct igb_adapter *adapter)
997{
998	int v_idx;
999
1000	for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1001		struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1002		adapter->q_vector[v_idx] = NULL;
1003		if (!q_vector)
1004			continue;
1005		netif_napi_del(&q_vector->napi);
1006		kfree(q_vector);
1007	}
1008	adapter->num_q_vectors = 0;
1009}
1010
1011/**
1012 * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
1013 *
1014 * This function resets the device so that it has 0 rx queues, tx queues, and
1015 * MSI-X interrupts allocated.
1016 */
1017static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
1018{
1019	igb_free_queues(adapter);
1020	igb_free_q_vectors(adapter);
1021	igb_reset_interrupt_capability(adapter);
1022}
1023
1024/**
1025 * igb_set_interrupt_capability - set MSI or MSI-X if supported
1026 *
1027 * Attempt to configure interrupts using the best available
1028 * capabilities of the hardware and kernel.
1029 **/
1030static int igb_set_interrupt_capability(struct igb_adapter *adapter)
1031{
1032	int err;
1033	int numvecs, i;
1034
1035	/* Number of supported queues. */
1036	adapter->num_rx_queues = adapter->rss_queues;
1037	if (adapter->vfs_allocated_count)
1038		adapter->num_tx_queues = 1;
1039	else
1040		adapter->num_tx_queues = adapter->rss_queues;
1041
1042	/* start with one vector for every rx queue */
1043	numvecs = adapter->num_rx_queues;
1044
1045	/* if tx handler is separate add 1 for every tx queue */
1046	if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
1047		numvecs += adapter->num_tx_queues;
1048
1049	/* store the number of vectors reserved for queues */
1050	adapter->num_q_vectors = numvecs;
1051
1052	/* add 1 vector for link status interrupts */
1053	numvecs++;
1054	adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
1055					GFP_KERNEL);
1056
1057	if (!adapter->msix_entries)
1058		goto msi_only;
1059
1060	for (i = 0; i < numvecs; i++)
1061		adapter->msix_entries[i].entry = i;
1062
1063	err = pci_enable_msix(adapter->pdev,
1064			      adapter->msix_entries,
1065			      numvecs);
1066	if (err == 0)
1067		goto out;
1068
1069	igb_reset_interrupt_capability(adapter);
1070
1071	/* If we can't do MSI-X, try MSI */
1072msi_only:
1073#ifdef CONFIG_PCI_IOV
1074	/* disable SR-IOV for non MSI-X configurations */
1075	if (adapter->vf_data) {
1076		struct e1000_hw *hw = &adapter->hw;
1077		/* disable iov and allow time for transactions to clear */
1078		pci_disable_sriov(adapter->pdev);
1079		msleep(500);
1080
1081		kfree(adapter->vf_data);
1082		adapter->vf_data = NULL;
1083		wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1084		wrfl();
1085		msleep(100);
1086		dev_info(&adapter->pdev->dev, "IOV Disabled\n");
1087	}
1088#endif
1089	adapter->vfs_allocated_count = 0;
1090	adapter->rss_queues = 1;
1091	adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1092	adapter->num_rx_queues = 1;
1093	adapter->num_tx_queues = 1;
1094	adapter->num_q_vectors = 1;
1095	if (!pci_enable_msi(adapter->pdev))
1096		adapter->flags |= IGB_FLAG_HAS_MSI;
1097out:
1098	/* Notify the stack of the (possibly) reduced queue counts. */
1099	rtnl_lock();
1100	netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues);
1101	err = netif_set_real_num_rx_queues(adapter->netdev,
1102		adapter->num_rx_queues);
1103	rtnl_unlock();
1104	return err;
1105}
1106
1107/**
1108 * igb_alloc_q_vectors - Allocate memory for interrupt vectors
1109 * @adapter: board private structure to initialize
1110 *
1111 * We allocate one q_vector per queue interrupt.  If allocation fails we
1112 * return -ENOMEM.
1113 **/
1114static int igb_alloc_q_vectors(struct igb_adapter *adapter)
1115{
1116	struct igb_q_vector *q_vector;
1117	struct e1000_hw *hw = &adapter->hw;
1118	int v_idx;
1119	int orig_node = adapter->node;
1120
1121	for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1122		if ((adapter->num_q_vectors == (adapter->num_rx_queues +
1123						adapter->num_tx_queues)) &&
1124		    (adapter->num_rx_queues == v_idx))
1125			adapter->node = orig_node;
1126		if (orig_node == -1) {
1127			int cur_node = next_online_node(adapter->node);
1128			if (cur_node == MAX_NUMNODES)
1129				cur_node = first_online_node;
1130			adapter->node = cur_node;
1131		}
1132		q_vector = kzalloc_node(sizeof(struct igb_q_vector), GFP_KERNEL,
1133					adapter->node);
1134		if (!q_vector)
1135			q_vector = kzalloc(sizeof(struct igb_q_vector),
1136					   GFP_KERNEL);
1137		if (!q_vector)
1138			goto err_out;
1139		q_vector->adapter = adapter;
1140		q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
1141		q_vector->itr_val = IGB_START_ITR;
1142		netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
1143		adapter->q_vector[v_idx] = q_vector;
1144	}
1145	/* Restore the adapter's original node */
1146	adapter->node = orig_node;
1147
1148	return 0;
1149
1150err_out:
1151	/* Restore the adapter's original node */
1152	adapter->node = orig_node;
1153	igb_free_q_vectors(adapter);
1154	return -ENOMEM;
1155}
1156
1157static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
1158                                      int ring_idx, int v_idx)
1159{
1160	struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1161
1162	q_vector->rx.ring = adapter->rx_ring[ring_idx];
1163	q_vector->rx.ring->q_vector = q_vector;
1164	q_vector->rx.count++;
1165	q_vector->itr_val = adapter->rx_itr_setting;
1166	if (q_vector->itr_val && q_vector->itr_val <= 3)
1167		q_vector->itr_val = IGB_START_ITR;
1168}
1169
1170static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
1171                                      int ring_idx, int v_idx)
1172{
1173	struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1174
1175	q_vector->tx.ring = adapter->tx_ring[ring_idx];
1176	q_vector->tx.ring->q_vector = q_vector;
1177	q_vector->tx.count++;
1178	q_vector->itr_val = adapter->tx_itr_setting;
1179	q_vector->tx.work_limit = adapter->tx_work_limit;
1180	if (q_vector->itr_val && q_vector->itr_val <= 3)
1181		q_vector->itr_val = IGB_START_ITR;
1182}
1183
1184/**
1185 * igb_map_ring_to_vector - maps allocated queues to vectors
1186 *
1187 * This function maps the recently allocated queues to vectors.
1188 **/
1189static int igb_map_ring_to_vector(struct igb_adapter *adapter)
1190{
1191	int i;
1192	int v_idx = 0;
1193
1194	if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
1195	    (adapter->num_q_vectors < adapter->num_tx_queues))
1196		return -ENOMEM;
1197
1198	if (adapter->num_q_vectors >=
1199	    (adapter->num_rx_queues + adapter->num_tx_queues)) {
1200		for (i = 0; i < adapter->num_rx_queues; i++)
1201			igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1202		for (i = 0; i < adapter->num_tx_queues; i++)
1203			igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1204	} else {
1205		for (i = 0; i < adapter->num_rx_queues; i++) {
1206			if (i < adapter->num_tx_queues)
1207				igb_map_tx_ring_to_vector(adapter, i, v_idx);
1208			igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1209		}
1210		for (; i < adapter->num_tx_queues; i++)
1211			igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1212	}
1213	return 0;
1214}
1215
1216/**
1217 * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
1218 *
1219 * This function initializes the interrupts and allocates all of the queues.
1220 **/
1221static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
1222{
1223	struct pci_dev *pdev = adapter->pdev;
1224	int err;
1225
1226	err = igb_set_interrupt_capability(adapter);
1227	if (err)
1228		return err;
1229
1230	err = igb_alloc_q_vectors(adapter);
1231	if (err) {
1232		dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
1233		goto err_alloc_q_vectors;
1234	}
1235
1236	err = igb_alloc_queues(adapter);
1237	if (err) {
1238		dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1239		goto err_alloc_queues;
1240	}
1241
1242	err = igb_map_ring_to_vector(adapter);
1243	if (err) {
1244		dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
1245		goto err_map_queues;
1246	}
1247
1248
1249	return 0;
1250err_map_queues:
1251	igb_free_queues(adapter);
1252err_alloc_queues:
1253	igb_free_q_vectors(adapter);
1254err_alloc_q_vectors:
1255	igb_reset_interrupt_capability(adapter);
1256	return err;
1257}
1258
1259/**
1260 * igb_request_irq - initialize interrupts
1261 *
1262 * Attempts to configure interrupts using the best available
1263 * capabilities of the hardware and kernel.
1264 **/
1265static int igb_request_irq(struct igb_adapter *adapter)
1266{
1267	struct net_device *netdev = adapter->netdev;
1268	struct pci_dev *pdev = adapter->pdev;
1269	int err = 0;
1270
1271	if (adapter->msix_entries) {
1272		err = igb_request_msix(adapter);
1273		if (!err)
1274			goto request_done;
1275		/* fall back to MSI */
1276		igb_clear_interrupt_scheme(adapter);
1277		if (!pci_enable_msi(pdev))
1278			adapter->flags |= IGB_FLAG_HAS_MSI;
1279		igb_free_all_tx_resources(adapter);
1280		igb_free_all_rx_resources(adapter);
1281		adapter->num_tx_queues = 1;
1282		adapter->num_rx_queues = 1;
1283		adapter->num_q_vectors = 1;
1284		err = igb_alloc_q_vectors(adapter);
1285		if (err) {
1286			dev_err(&pdev->dev,
1287			        "Unable to allocate memory for vectors\n");
1288			goto request_done;
1289		}
1290		err = igb_alloc_queues(adapter);
1291		if (err) {
1292			dev_err(&pdev->dev,
1293			        "Unable to allocate memory for queues\n");
1294			igb_free_q_vectors(adapter);
1295			goto request_done;
1296		}
1297		igb_setup_all_tx_resources(adapter);
1298		igb_setup_all_rx_resources(adapter);
1299	}
1300
1301	igb_assign_vector(adapter->q_vector[0], 0);
1302
1303	if (adapter->flags & IGB_FLAG_HAS_MSI) {
1304		err = request_irq(pdev->irq, igb_intr_msi, 0,
1305				  netdev->name, adapter);
1306		if (!err)
1307			goto request_done;
1308
1309		/* fall back to legacy interrupts */
1310		igb_reset_interrupt_capability(adapter);
1311		adapter->flags &= ~IGB_FLAG_HAS_MSI;
1312	}
1313
1314	err = request_irq(pdev->irq, igb_intr, IRQF_SHARED,
1315			  netdev->name, adapter);
1316
1317	if (err)
1318		dev_err(&pdev->dev, "Error %d getting interrupt\n",
1319			err);
1320
1321request_done:
1322	return err;
1323}
1324
1325static void igb_free_irq(struct igb_adapter *adapter)
1326{
1327	if (adapter->msix_entries) {
1328		int vector = 0, i;
1329
1330		free_irq(adapter->msix_entries[vector++].vector, adapter);
1331
1332		for (i = 0; i < adapter->num_q_vectors; i++)
1333			free_irq(adapter->msix_entries[vector++].vector,
1334				 adapter->q_vector[i]);
1335	} else {
1336		free_irq(adapter->pdev->irq, adapter);
1337	}
1338}
1339
1340/**
1341 * igb_irq_disable - Mask off interrupt generation on the NIC
1342 * @adapter: board private structure
1343 **/
1344static void igb_irq_disable(struct igb_adapter *adapter)
1345{
1346	struct e1000_hw *hw = &adapter->hw;
1347
1348	/*
1349	 * we need to be careful when disabling interrupts.  The VFs are also
1350	 * mapped into these registers and so clearing the bits can cause
1351	 * issues on the VF drivers so we only need to clear what we set
1352	 */
1353	if (adapter->msix_entries) {
1354		u32 regval = rd32(E1000_EIAM);
1355		wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
1356		wr32(E1000_EIMC, adapter->eims_enable_mask);
1357		regval = rd32(E1000_EIAC);
1358		wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
1359	}
1360
1361	wr32(E1000_IAM, 0);
1362	wr32(E1000_IMC, ~0);
1363	wrfl();
1364	if (adapter->msix_entries) {
1365		int i;
1366		for (i = 0; i < adapter->num_q_vectors; i++)
1367			synchronize_irq(adapter->msix_entries[i].vector);
1368	} else {
1369		synchronize_irq(adapter->pdev->irq);
1370	}
1371}
1372
1373/**
1374 * igb_irq_enable - Enable default interrupt generation settings
1375 * @adapter: board private structure
1376 **/
1377static void igb_irq_enable(struct igb_adapter *adapter)
1378{
1379	struct e1000_hw *hw = &adapter->hw;
1380
1381	if (adapter->msix_entries) {
1382		u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC | E1000_IMS_DRSTA;
1383		u32 regval = rd32(E1000_EIAC);
1384		wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
1385		regval = rd32(E1000_EIAM);
1386		wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
1387		wr32(E1000_EIMS, adapter->eims_enable_mask);
1388		if (adapter->vfs_allocated_count) {
1389			wr32(E1000_MBVFIMR, 0xFF);
1390			ims |= E1000_IMS_VMMB;
1391		}
1392		wr32(E1000_IMS, ims);
1393	} else {
1394		wr32(E1000_IMS, IMS_ENABLE_MASK |
1395				E1000_IMS_DRSTA);
1396		wr32(E1000_IAM, IMS_ENABLE_MASK |
1397				E1000_IMS_DRSTA);
1398	}
1399}
1400
1401static void igb_update_mng_vlan(struct igb_adapter *adapter)
1402{
1403	struct e1000_hw *hw = &adapter->hw;
1404	u16 vid = adapter->hw.mng_cookie.vlan_id;
1405	u16 old_vid = adapter->mng_vlan_id;
1406
1407	if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1408		/* add VID to filter table */
1409		igb_vfta_set(hw, vid, true);
1410		adapter->mng_vlan_id = vid;
1411	} else {
1412		adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1413	}
1414
1415	if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1416	    (vid != old_vid) &&
1417	    !test_bit(old_vid, adapter->active_vlans)) {
1418		/* remove VID from filter table */
1419		igb_vfta_set(hw, old_vid, false);
1420	}
1421}
1422
1423/**
1424 * igb_release_hw_control - release control of the h/w to f/w
1425 * @adapter: address of board private structure
1426 *
1427 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1428 * For ASF and Pass Through versions of f/w this means that the
1429 * driver is no longer loaded.
1430 *
1431 **/
1432static void igb_release_hw_control(struct igb_adapter *adapter)
1433{
1434	struct e1000_hw *hw = &adapter->hw;
1435	u32 ctrl_ext;
1436
1437	/* Let firmware take over control of h/w */
1438	ctrl_ext = rd32(E1000_CTRL_EXT);
1439	wr32(E1000_CTRL_EXT,
1440			ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1441}
1442
1443/**
1444 * igb_get_hw_control - get control of the h/w from f/w
1445 * @adapter: address of board private structure
1446 *
1447 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1448 * For ASF and Pass Through versions of f/w this means that
1449 * the driver is loaded.
1450 *
1451 **/
1452static void igb_get_hw_control(struct igb_adapter *adapter)
1453{
1454	struct e1000_hw *hw = &adapter->hw;
1455	u32 ctrl_ext;
1456
1457	/* Let firmware know the driver has taken over */
1458	ctrl_ext = rd32(E1000_CTRL_EXT);
1459	wr32(E1000_CTRL_EXT,
1460			ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1461}
1462
1463/**
1464 * igb_configure - configure the hardware for RX and TX
1465 * @adapter: private board structure
1466 **/
1467static void igb_configure(struct igb_adapter *adapter)
1468{
1469	struct net_device *netdev = adapter->netdev;
1470	int i;
1471
1472	igb_get_hw_control(adapter);
1473	igb_set_rx_mode(netdev);
1474
1475	igb_restore_vlan(adapter);
1476
1477	igb_setup_tctl(adapter);
1478	igb_setup_mrqc(adapter);
1479	igb_setup_rctl(adapter);
1480
1481	igb_configure_tx(adapter);
1482	igb_configure_rx(adapter);
1483
1484	igb_rx_fifo_flush_82575(&adapter->hw);
1485
1486	/* call igb_desc_unused which always leaves
1487	 * at least 1 descriptor unused to make sure
1488	 * next_to_use != next_to_clean */
1489	for (i = 0; i < adapter->num_rx_queues; i++) {
1490		struct igb_ring *ring = adapter->rx_ring[i];
1491		igb_alloc_rx_buffers(ring, igb_desc_unused(ring));
1492	}
1493}
1494
1495/**
1496 * igb_power_up_link - Power up the phy/serdes link
1497 * @adapter: address of board private structure
1498 **/
1499void igb_power_up_link(struct igb_adapter *adapter)
1500{
1501	igb_reset_phy(&adapter->hw);
1502
1503	if (adapter->hw.phy.media_type == e1000_media_type_copper)
1504		igb_power_up_phy_copper(&adapter->hw);
1505	else
1506		igb_power_up_serdes_link_82575(&adapter->hw);
1507}
1508
1509/**
1510 * igb_power_down_link - Power down the phy/serdes link
1511 * @adapter: address of board private structure
1512 */
1513static void igb_power_down_link(struct igb_adapter *adapter)
1514{
1515	if (adapter->hw.phy.media_type == e1000_media_type_copper)
1516		igb_power_down_phy_copper_82575(&adapter->hw);
1517	else
1518		igb_shutdown_serdes_link_82575(&adapter->hw);
1519}
1520
1521/**
1522 * igb_up - Open the interface and prepare it to handle traffic
1523 * @adapter: board private structure
1524 **/
1525int igb_up(struct igb_adapter *adapter)
1526{
1527	struct e1000_hw *hw = &adapter->hw;
1528	int i;
1529
1530	/* hardware has been reset, we need to reload some things */
1531	igb_configure(adapter);
1532
1533	clear_bit(__IGB_DOWN, &adapter->state);
1534
1535	for (i = 0; i < adapter->num_q_vectors; i++)
1536		napi_enable(&(adapter->q_vector[i]->napi));
1537
1538	if (adapter->msix_entries)
1539		igb_configure_msix(adapter);
1540	else
1541		igb_assign_vector(adapter->q_vector[0], 0);
1542
1543	/* Clear any pending interrupts. */
1544	rd32(E1000_ICR);
1545	igb_irq_enable(adapter);
1546
1547	/* notify VFs that reset has been completed */
1548	if (adapter->vfs_allocated_count) {
1549		u32 reg_data = rd32(E1000_CTRL_EXT);
1550		reg_data |= E1000_CTRL_EXT_PFRSTD;
1551		wr32(E1000_CTRL_EXT, reg_data);
1552	}
1553
1554	netif_tx_start_all_queues(adapter->netdev);
1555
1556	/* start the watchdog. */
1557	hw->mac.get_link_status = 1;
1558	schedule_work(&adapter->watchdog_task);
1559
1560	return 0;
1561}
1562
1563void igb_down(struct igb_adapter *adapter)
1564{
1565	struct net_device *netdev = adapter->netdev;
1566	struct e1000_hw *hw = &adapter->hw;
1567	u32 tctl, rctl;
1568	int i;
1569
1570	/* signal that we're down so the interrupt handler does not
1571	 * reschedule our watchdog timer */
1572	set_bit(__IGB_DOWN, &adapter->state);
1573
1574	/* disable receives in the hardware */
1575	rctl = rd32(E1000_RCTL);
1576	wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1577	/* flush and sleep below */
1578
1579	netif_tx_stop_all_queues(netdev);
1580
1581	/* disable transmits in the hardware */
1582	tctl = rd32(E1000_TCTL);
1583	tctl &= ~E1000_TCTL_EN;
1584	wr32(E1000_TCTL, tctl);
1585	/* flush both disables and wait for them to finish */
1586	wrfl();
1587	msleep(10);
1588
1589	for (i = 0; i < adapter->num_q_vectors; i++)
1590		napi_disable(&(adapter->q_vector[i]->napi));
1591
1592	igb_irq_disable(adapter);
1593
1594	del_timer_sync(&adapter->watchdog_timer);
1595	del_timer_sync(&adapter->phy_info_timer);
1596
1597	netif_carrier_off(netdev);
1598
1599	/* record the stats before reset*/
1600	spin_lock(&adapter->stats64_lock);
1601	igb_update_stats(adapter, &adapter->stats64);
1602	spin_unlock(&adapter->stats64_lock);
1603
1604	adapter->link_speed = 0;
1605	adapter->link_duplex = 0;
1606
1607	if (!pci_channel_offline(adapter->pdev))
1608		igb_reset(adapter);
1609	igb_clean_all_tx_rings(adapter);
1610	igb_clean_all_rx_rings(adapter);
1611#ifdef CONFIG_IGB_DCA
1612
1613	/* since we reset the hardware DCA settings were cleared */
1614	igb_setup_dca(adapter);
1615#endif
1616}
1617
1618void igb_reinit_locked(struct igb_adapter *adapter)
1619{
1620	WARN_ON(in_interrupt());
1621	while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1622		msleep(1);
1623	igb_down(adapter);
1624	igb_up(adapter);
1625	clear_bit(__IGB_RESETTING, &adapter->state);
1626}
1627
1628void igb_reset(struct igb_adapter *adapter)
1629{
1630	struct pci_dev *pdev = adapter->pdev;
1631	struct e1000_hw *hw = &adapter->hw;
1632	struct e1000_mac_info *mac = &hw->mac;
1633	struct e1000_fc_info *fc = &hw->fc;
1634	u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1635	u16 hwm;
1636
1637	/* Repartition Pba for greater than 9k mtu
1638	 * To take effect CTRL.RST is required.
1639	 */
1640	switch (mac->type) {
1641	case e1000_i350:
1642	case e1000_82580:
1643		pba = rd32(E1000_RXPBS);
1644		pba = igb_rxpbs_adjust_82580(pba);
1645		break;
1646	case e1000_82576:
1647		pba = rd32(E1000_RXPBS);
1648		pba &= E1000_RXPBS_SIZE_MASK_82576;
1649		break;
1650	case e1000_82575:
1651	case e1000_i210:
1652	case e1000_i211:
1653	default:
1654		pba = E1000_PBA_34K;
1655		break;
1656	}
1657
1658	if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1659	    (mac->type < e1000_82576)) {
1660		/* adjust PBA for jumbo frames */
1661		wr32(E1000_PBA, pba);
1662
1663		/* To maintain wire speed transmits, the Tx FIFO should be
1664		 * large enough to accommodate two full transmit packets,
1665		 * rounded up to the next 1KB and expressed in KB.  Likewise,
1666		 * the Rx FIFO should be large enough to accommodate at least
1667		 * one full receive packet and is similarly rounded up and
1668		 * expressed in KB. */
1669		pba = rd32(E1000_PBA);
1670		/* upper 16 bits has Tx packet buffer allocation size in KB */
1671		tx_space = pba >> 16;
1672		/* lower 16 bits has Rx packet buffer allocation size in KB */
1673		pba &= 0xffff;
1674		/* the tx fifo also stores 16 bytes of information about the tx
1675		 * but don't include ethernet FCS because hardware appends it */
1676		min_tx_space = (adapter->max_frame_size +
1677				sizeof(union e1000_adv_tx_desc) -
1678				ETH_FCS_LEN) * 2;
1679		min_tx_space = ALIGN(min_tx_space, 1024);
1680		min_tx_space >>= 10;
1681		/* software strips receive CRC, so leave room for it */
1682		min_rx_space = adapter->max_frame_size;
1683		min_rx_space = ALIGN(min_rx_space, 1024);
1684		min_rx_space >>= 10;
1685
1686		/* If current Tx allocation is less than the min Tx FIFO size,
1687		 * and the min Tx FIFO size is less than the current Rx FIFO
1688		 * allocation, take space away from current Rx allocation */
1689		if (tx_space < min_tx_space &&
1690		    ((min_tx_space - tx_space) < pba)) {
1691			pba = pba - (min_tx_space - tx_space);
1692
1693			/* if short on rx space, rx wins and must trump tx
1694			 * adjustment */
1695			if (pba < min_rx_space)
1696				pba = min_rx_space;
1697		}
1698		wr32(E1000_PBA, pba);
1699	}
1700
1701	/* flow control settings */
1702	/* The high water mark must be low enough to fit one full frame
1703	 * (or the size used for early receive) above it in the Rx FIFO.
1704	 * Set it to the lower of:
1705	 * - 90% of the Rx FIFO size, or
1706	 * - the full Rx FIFO size minus one full frame */
1707	hwm = min(((pba << 10) * 9 / 10),
1708			((pba << 10) - 2 * adapter->max_frame_size));
1709
1710	fc->high_water = hwm & 0xFFF0;	/* 16-byte granularity */
1711	fc->low_water = fc->high_water - 16;
1712	fc->pause_time = 0xFFFF;
1713	fc->send_xon = 1;
1714	fc->current_mode = fc->requested_mode;
1715
1716	/* disable receive for all VFs and wait one second */
1717	if (adapter->vfs_allocated_count) {
1718		int i;
1719		for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1720			adapter->vf_data[i].flags &= IGB_VF_FLAG_PF_SET_MAC;
1721
1722		/* ping all the active vfs to let them know we are going down */
1723		igb_ping_all_vfs(adapter);
1724
1725		/* disable transmits and receives */
1726		wr32(E1000_VFRE, 0);
1727		wr32(E1000_VFTE, 0);
1728	}
1729
1730	/* Allow time for pending master requests to run */
1731	hw->mac.ops.reset_hw(hw);
1732	wr32(E1000_WUC, 0);
1733
1734	if (hw->mac.ops.init_hw(hw))
1735		dev_err(&pdev->dev, "Hardware Error\n");
1736
1737	/*
1738	 * Flow control settings reset on hardware reset, so guarantee flow
1739	 * control is off when forcing speed.
1740	 */
1741	if (!hw->mac.autoneg)
1742		igb_force_mac_fc(hw);
1743
1744	igb_init_dmac(adapter, pba);
1745	if (!netif_running(adapter->netdev))
1746		igb_power_down_link(adapter);
1747
1748	igb_update_mng_vlan(adapter);
1749
1750	/* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1751	wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1752
1753#ifdef CONFIG_IGB_PTP
1754	/* Re-enable PTP, where applicable. */
1755	igb_ptp_reset(adapter);
1756#endif /* CONFIG_IGB_PTP */
1757
1758	igb_get_phy_info(hw);
1759}
1760
1761static netdev_features_t igb_fix_features(struct net_device *netdev,
1762	netdev_features_t features)
1763{
1764	/*
1765	 * Since there is no support for separate rx/tx vlan accel
1766	 * enable/disable make sure tx flag is always in same state as rx.
1767	 */
1768	if (features & NETIF_F_HW_VLAN_RX)
1769		features |= NETIF_F_HW_VLAN_TX;
1770	else
1771		features &= ~NETIF_F_HW_VLAN_TX;
1772
1773	return features;
1774}
1775
1776static int igb_set_features(struct net_device *netdev,
1777	netdev_features_t features)
1778{
1779	netdev_features_t changed = netdev->features ^ features;
1780	struct igb_adapter *adapter = netdev_priv(netdev);
1781
1782	if (changed & NETIF_F_HW_VLAN_RX)
1783		igb_vlan_mode(netdev, features);
1784
1785	if (!(changed & NETIF_F_RXALL))
1786		return 0;
1787
1788	netdev->features = features;
1789
1790	if (netif_running(netdev))
1791		igb_reinit_locked(adapter);
1792	else
1793		igb_reset(adapter);
1794
1795	return 0;
1796}
1797
1798static const struct net_device_ops igb_netdev_ops = {
1799	.ndo_open		= igb_open,
1800	.ndo_stop		= igb_close,
1801	.ndo_start_xmit		= igb_xmit_frame,
1802	.ndo_get_stats64	= igb_get_stats64,
1803	.ndo_set_rx_mode	= igb_set_rx_mode,
1804	.ndo_set_mac_address	= igb_set_mac,
1805	.ndo_change_mtu		= igb_change_mtu,
1806	.ndo_do_ioctl		= igb_ioctl,
1807	.ndo_tx_timeout		= igb_tx_timeout,
1808	.ndo_validate_addr	= eth_validate_addr,
1809	.ndo_vlan_rx_add_vid	= igb_vlan_rx_add_vid,
1810	.ndo_vlan_rx_kill_vid	= igb_vlan_rx_kill_vid,
1811	.ndo_set_vf_mac		= igb_ndo_set_vf_mac,
1812	.ndo_set_vf_vlan	= igb_ndo_set_vf_vlan,
1813	.ndo_set_vf_tx_rate	= igb_ndo_set_vf_bw,
1814	.ndo_get_vf_config	= igb_ndo_get_vf_config,
1815#ifdef CONFIG_NET_POLL_CONTROLLER
1816	.ndo_poll_controller	= igb_netpoll,
1817#endif
1818	.ndo_fix_features	= igb_fix_features,
1819	.ndo_set_features	= igb_set_features,
1820};
1821
1822/**
1823 * igb_set_fw_version - Configure version string for ethtool
1824 * @adapter: adapter struct
1825 *
1826 **/
1827void igb_set_fw_version(struct igb_adapter *adapter)
1828{
1829	struct e1000_hw *hw = &adapter->hw;
1830	u16 eeprom_verh, eeprom_verl, comb_verh, comb_verl, comb_offset;
1831	u16 major, build, patch, fw_version;
1832	u32 etrack_id;
1833
1834	hw->nvm.ops.read(hw, 5, 1, &fw_version);
1835	if (adapter->hw.mac.type != e1000_i211) {
1836		hw->nvm.ops.read(hw, NVM_ETRACK_WORD, 1, &eeprom_verh);
1837		hw->nvm.ops.read(hw, (NVM_ETRACK_WORD + 1), 1, &eeprom_verl);
1838		etrack_id = (eeprom_verh << IGB_ETRACK_SHIFT) | eeprom_verl;
1839
1840		/* combo image version needs to be found */
1841		hw->nvm.ops.read(hw, NVM_COMB_VER_PTR, 1, &comb_offset);
1842		if ((comb_offset != 0x0) &&
1843		    (comb_offset != IGB_NVM_VER_INVALID)) {
1844			hw->nvm.ops.read(hw, (NVM_COMB_VER_OFF + comb_offset
1845					 + 1), 1, &comb_verh);
1846			hw->nvm.ops.read(hw, (NVM_COMB_VER_OFF + comb_offset),
1847					 1, &comb_verl);
1848
1849			/* Only display Option Rom if it exists and is valid */
1850			if ((comb_verh && comb_verl) &&
1851			    ((comb_verh != IGB_NVM_VER_INVALID) &&
1852			     (comb_verl != IGB_NVM_VER_INVALID))) {
1853				major = comb_verl >> IGB_COMB_VER_SHFT;
1854				build = (comb_verl << IGB_COMB_VER_SHFT) |
1855					(comb_verh >> IGB_COMB_VER_SHFT);
1856				patch = comb_verh & IGB_COMB_VER_MASK;
1857				snprintf(adapter->fw_version,
1858					 sizeof(adapter->fw_version),
1859					 "%d.%d%d, 0x%08x, %d.%d.%d",
1860					 (fw_version & IGB_MAJOR_MASK) >>
1861					 IGB_MAJOR_SHIFT,
1862					 (fw_version & IGB_MINOR_MASK) >>
1863					 IGB_MINOR_SHIFT,
1864					 (fw_version & IGB_BUILD_MASK),
1865					 etrack_id, major, build, patch);
1866				goto out;
1867			}
1868		}
1869		snprintf(adapter->fw_version, sizeof(adapter->fw_version),
1870			 "%d.%d%d, 0x%08x",
1871			 (fw_version & IGB_MAJOR_MASK) >> IGB_MAJOR_SHIFT,
1872			 (fw_version & IGB_MINOR_MASK) >> IGB_MINOR_SHIFT,
1873			 (fw_version & IGB_BUILD_MASK), etrack_id);
1874	} else {
1875		snprintf(adapter->fw_version, sizeof(adapter->fw_version),
1876			 "%d.%d%d",
1877			 (fw_version & IGB_MAJOR_MASK) >> IGB_MAJOR_SHIFT,
1878			 (fw_version & IGB_MINOR_MASK) >> IGB_MINOR_SHIFT,
1879			 (fw_version & IGB_BUILD_MASK));
1880	}
1881out:
1882	return;
1883}
1884
1885/**
1886 * igb_probe - Device Initialization Routine
1887 * @pdev: PCI device information struct
1888 * @ent: entry in igb_pci_tbl
1889 *
1890 * Returns 0 on success, negative on failure
1891 *
1892 * igb_probe initializes an adapter identified by a pci_dev structure.
1893 * The OS initialization, configuring of the adapter private structure,
1894 * and a hardware reset occur.
1895 **/
1896static int __devinit igb_probe(struct pci_dev *pdev,
1897			       const struct pci_device_id *ent)
1898{
1899	struct net_device *netdev;
1900	struct igb_adapter *adapter;
1901	struct e1000_hw *hw;
1902	u16 eeprom_data = 0;
1903	s32 ret_val;
1904	static int global_quad_port_a; /* global quad port a indication */
1905	const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1906	unsigned long mmio_start, mmio_len;
1907	int err, pci_using_dac;
1908	u16 eeprom_apme_mask = IGB_EEPROM_APME;
1909	u8 part_str[E1000_PBANUM_LENGTH];
1910
1911	/* Catch broken hardware that put the wrong VF device ID in
1912	 * the PCIe SR-IOV capability.
1913	 */
1914	if (pdev->is_virtfn) {
1915		WARN(1, KERN_ERR "%s (%hx:%hx) should not be a VF!\n",
1916			pci_name(pdev), pdev->vendor, pdev->device);
1917		return -EINVAL;
1918	}
1919
1920	err = pci_enable_device_mem(pdev);
1921	if (err)
1922		return err;
1923
1924	pci_using_dac = 0;
1925	err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
1926	if (!err) {
1927		err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
1928		if (!err)
1929			pci_using_dac = 1;
1930	} else {
1931		err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
1932		if (err) {
1933			err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
1934			if (err) {
1935				dev_err(&pdev->dev, "No usable DMA "
1936					"configuration, aborting\n");
1937				goto err_dma;
1938			}
1939		}
1940	}
1941
1942	err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1943	                                   IORESOURCE_MEM),
1944	                                   igb_driver_name);
1945	if (err)
1946		goto err_pci_reg;
1947
1948	pci_enable_pcie_error_reporting(pdev);
1949
1950	pci_set_master(pdev);
1951	pci_save_state(pdev);
1952
1953	err = -ENOMEM;
1954	netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1955				   IGB_MAX_TX_QUEUES);
1956	if (!netdev)
1957		goto err_alloc_etherdev;
1958
1959	SET_NETDEV_DEV(netdev, &pdev->dev);
1960
1961	pci_set_drvdata(pdev, netdev);
1962	adapter = netdev_priv(netdev);
1963	adapter->netdev = netdev;
1964	adapter->pdev = pdev;
1965	hw = &adapter->hw;
1966	hw->back = adapter;
1967	adapter->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE);
1968
1969	mmio_start = pci_resource_start(pdev, 0);
1970	mmio_len = pci_resource_len(pdev, 0);
1971
1972	err = -EIO;
1973	hw->hw_addr = ioremap(mmio_start, mmio_len);
1974	if (!hw->hw_addr)
1975		goto err_ioremap;
1976
1977	netdev->netdev_ops = &igb_netdev_ops;
1978	igb_set_ethtool_ops(netdev);
1979	netdev->watchdog_timeo = 5 * HZ;
1980
1981	strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1982
1983	netdev->mem_start = mmio_start;
1984	netdev->mem_end = mmio_start + mmio_len;
1985
1986	/* PCI config space info */
1987	hw->vendor_id = pdev->vendor;
1988	hw->device_id = pdev->device;
1989	hw->revision_id = pdev->revision;
1990	hw->subsystem_vendor_id = pdev->subsystem_vendor;
1991	hw->subsystem_device_id = pdev->subsystem_device;
1992
1993	/* Copy the default MAC, PHY and NVM function pointers */
1994	memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1995	memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1996	memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1997	/* Initialize skew-specific constants */
1998	err = ei->get_invariants(hw);
1999	if (err)
2000		goto err_sw_init;
2001
2002	/* setup the private structure */
2003	err = igb_sw_init(adapter);
2004	if (err)
2005		goto err_sw_init;
2006
2007	igb_get_bus_info_pcie(hw);
2008
2009	hw->phy.autoneg_wait_to_complete = false;
2010
2011	/* Copper options */
2012	if (hw->phy.media_type == e1000_media_type_copper) {
2013		hw->phy.mdix = AUTO_ALL_MODES;
2014		hw->phy.disable_polarity_correction = false;
2015		hw->phy.ms_type = e1000_ms_hw_default;
2016	}
2017
2018	if (igb_check_reset_block(hw))
2019		dev_info(&pdev->dev,
2020			"PHY reset is blocked due to SOL/IDER session.\n");
2021
2022	/*
2023	 * features is initialized to 0 in allocation, it might have bits
2024	 * set by igb_sw_init so we should use an or instead of an
2025	 * assignment.
2026	 */
2027	netdev->features |= NETIF_F_SG |
2028			    NETIF_F_IP_CSUM |
2029			    NETIF_F_IPV6_CSUM |
2030			    NETIF_F_TSO |
2031			    NETIF_F_TSO6 |
2032			    NETIF_F_RXHASH |
2033			    NETIF_F_RXCSUM |
2034			    NETIF_F_HW_VLAN_RX |
2035			    NETIF_F_HW_VLAN_TX;
2036
2037	/* copy netdev features into list of user selectable features */
2038	netdev->hw_features |= netdev->features;
2039	netdev->hw_features |= NETIF_F_RXALL;
2040
2041	/* set this bit last since it cannot be part of hw_features */
2042	netdev->features |= NETIF_F_HW_VLAN_FILTER;
2043
2044	netdev->vlan_features |= NETIF_F_TSO |
2045				 NETIF_F_TSO6 |
2046				 NETIF_F_IP_CSUM |
2047				 NETIF_F_IPV6_CSUM |
2048				 NETIF_F_SG;
2049
2050	netdev->priv_flags |= IFF_SUPP_NOFCS;
2051
2052	if (pci_using_dac) {
2053		netdev->features |= NETIF_F_HIGHDMA;
2054		netdev->vlan_features |= NETIF_F_HIGHDMA;
2055	}
2056
2057	if (hw->mac.type >= e1000_82576) {
2058		netdev->hw_features |= NETIF_F_SCTP_CSUM;
2059		netdev->features |= NETIF_F_SCTP_CSUM;
2060	}
2061
2062	netdev->priv_flags |= IFF_UNICAST_FLT;
2063
2064	adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
2065
2066	/* before reading the NVM, reset the controller to put the device in a
2067	 * known good starting state */
2068	hw->mac.ops.reset_hw(hw);
2069
2070	/*
2071	 * make sure the NVM is good , i211 parts have special NVM that
2072	 * doesn't contain a checksum
2073	 */
2074	if (hw->mac.type != e1000_i211) {
2075		if (hw->nvm.ops.validate(hw) < 0) {
2076			dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
2077			err = -EIO;
2078			goto err_eeprom;
2079		}
2080	}
2081
2082	/* copy the MAC address out of the NVM */
2083	if (hw->mac.ops.read_mac_addr(hw))
2084		dev_err(&pdev->dev, "NVM Read Error\n");
2085
2086	memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
2087	memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
2088
2089	if (!is_valid_ether_addr(netdev->perm_addr)) {
2090		dev_err(&pdev->dev, "Invalid MAC Address\n");
2091		err = -EIO;
2092		goto err_eeprom;
2093	}
2094
2095	/* get firmware version for ethtool -i */
2096	igb_set_fw_version(adapter);
2097
2098	setup_timer(&adapter->watchdog_timer, igb_watchdog,
2099	            (unsigned long) adapter);
2100	setup_timer(&adapter->phy_info_timer, igb_update_phy_info,
2101	            (unsigned long) adapter);
2102
2103	INIT_WORK(&adapter->reset_task, igb_reset_task);
2104	INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
2105
2106	/* Initialize link properties that are user-changeable */
2107	adapter->fc_autoneg = true;
2108	hw->mac.autoneg = true;
2109	hw->phy.autoneg_advertised = 0x2f;
2110
2111	hw->fc.requested_mode = e1000_fc_default;
2112	hw->fc.current_mode = e1000_fc_default;
2113
2114	igb_validate_mdi_setting(hw);
2115
2116	/* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
2117	 * enable the ACPI Magic Packet filter
2118	 */
2119
2120	if (hw->bus.func == 0)
2121		hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
2122	else if (hw->mac.type >= e1000_82580)
2123		hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
2124		                 NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
2125		                 &eeprom_data);
2126	else if (hw->bus.func == 1)
2127		hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
2128
2129	if (eeprom_data & eeprom_apme_mask)
2130		adapter->eeprom_wol |= E1000_WUFC_MAG;
2131
2132	/* now that we have the eeprom settings, apply the special cases where
2133	 * the eeprom may be wrong or the board simply won't support wake on
2134	 * lan on a particular port */
2135	switch (pdev->device) {
2136	case E1000_DEV_ID_82575GB_QUAD_COPPER:
2137		adapter->eeprom_wol = 0;
2138		break;
2139	case E1000_DEV_ID_82575EB_FIBER_SERDES:
2140	case E1000_DEV_ID_82576_FIBER:
2141	case E1000_DEV_ID_82576_SERDES:
2142		/* Wake events only supported on port A for dual fiber
2143		 * regardless of eeprom setting */
2144		if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
2145			adapter->eeprom_wol = 0;
2146		break;
2147	case E1000_DEV_ID_82576_QUAD_COPPER:
2148	case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
2149		/* if quad port adapter, disable WoL on all but port A */
2150		if (global_quad_port_a != 0)
2151			adapter->eeprom_wol = 0;
2152		else
2153			adapter->flags |= IGB_FLAG_QUAD_PORT_A;
2154		/* Reset for multiple quad port adapters */
2155		if (++global_quad_port_a == 4)
2156			global_quad_port_a = 0;
2157		break;
2158	}
2159
2160	/* initialize the wol settings based on the eeprom settings */
2161	adapter->wol = adapter->eeprom_wol;
2162	device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
2163
2164	/* reset the hardware with the new settings */
2165	igb_reset(adapter);
2166
2167	/* let the f/w know that the h/w is now under the control of the
2168	 * driver. */
2169	igb_get_hw_control(adapter);
2170
2171	strcpy(netdev->name, "eth%d");
2172	err = register_netdev(netdev);
2173	if (err)
2174		goto err_register;
2175
2176	/* carrier off reporting is important to ethtool even BEFORE open */
2177	netif_carrier_off(netdev);
2178
2179#ifdef CONFIG_IGB_DCA
2180	if (dca_add_requester(&pdev->dev) == 0) {
2181		adapter->flags |= IGB_FLAG_DCA_ENABLED;
2182		dev_info(&pdev->dev, "DCA enabled\n");
2183		igb_setup_dca(adapter);
2184	}
2185
2186#endif
2187
2188#ifdef CONFIG_IGB_PTP
2189	/* do hw tstamp init after resetting */
2190	igb_ptp_init(adapter);
2191#endif /* CONFIG_IGB_PTP */
2192
2193	dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
2194	/* print bus type/speed/width info */
2195	dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
2196		 netdev->name,
2197		 ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
2198		  (hw->bus.speed == e1000_bus_speed_5000) ? "5.0Gb/s" :
2199		                                            "unknown"),
2200		 ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
2201		  (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
2202		  (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
2203		   "unknown"),
2204		 netdev->dev_addr);
2205
2206	ret_val = igb_read_part_string(hw, part_str, E1000_PBANUM_LENGTH);
2207	if (ret_val)
2208		strcpy(part_str, "Unknown");
2209	dev_info(&pdev->dev, "%s: PBA No: %s\n", netdev->name, part_str);
2210	dev_info(&pdev->dev,
2211		"Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
2212		adapter->msix_entries ? "MSI-X" :
2213		(adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
2214		adapter->num_rx_queues, adapter->num_tx_queues);
2215	switch (hw->mac.type) {
2216	case e1000_i350:
2217	case e1000_i210:
2218	case e1000_i211:
2219		igb_set_eee_i350(hw);
2220		break;
2221	default:
2222		break;
2223	}
2224
2225	pm_runtime_put_noidle(&pdev->dev);
2226	return 0;
2227
2228err_register:
2229	igb_release_hw_control(adapter);
2230err_eeprom:
2231	if (!igb_check_reset_block(hw))
2232		igb_reset_phy(hw);
2233
2234	if (hw->flash_address)
2235		iounmap(hw->flash_address);
2236err_sw_init:
2237	igb_clear_interrupt_scheme(adapter);
2238	iounmap(hw->hw_addr);
2239err_ioremap:
2240	free_netdev(netdev);
2241err_alloc_etherdev:
2242	pci_release_selected_regions(pdev,
2243	                             pci_select_bars(pdev, IORESOURCE_MEM));
2244err_pci_reg:
2245err_dma:
2246	pci_disable_device(pdev);
2247	return err;
2248}
2249
2250/**
2251 * igb_remove - Device Removal Routine
2252 * @pdev: PCI device information struct
2253 *
2254 * igb_remove is called by the PCI subsystem to alert the driver
2255 * that it should release a PCI device.  The could be caused by a
2256 * Hot-Plug event, or because the driver is going to be removed from
2257 * memory.
2258 **/
2259static void __devexit igb_remove(struct pci_dev *pdev)
2260{
2261	struct net_device *netdev = pci_get_drvdata(pdev);
2262	struct igb_adapter *adapter = netdev_priv(netdev);
2263	struct e1000_hw *hw = &adapter->hw;
2264
2265	pm_runtime_get_noresume(&pdev->dev);
2266#ifdef CONFIG_IGB_PTP
2267	igb_ptp_stop(adapter);
2268#endif /* CONFIG_IGB_PTP */
2269
2270	/*
2271	 * The watchdog timer may be rescheduled, so explicitly
2272	 * disable watchdog from being rescheduled.
2273	 */
2274	set_bit(__IGB_DOWN, &adapter->state);
2275	del_timer_sync(&adapter->watchdog_timer);
2276	del_timer_sync(&adapter->phy_info_timer);
2277
2278	cancel_work_sync(&adapter->reset_task);
2279	cancel_work_sync(&adapter->watchdog_task);
2280
2281#ifdef CONFIG_IGB_DCA
2282	if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
2283		dev_info(&pdev->dev, "DCA disabled\n");
2284		dca_remove_requester(&pdev->dev);
2285		adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
2286		wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
2287	}
2288#endif
2289
2290	/* Release control of h/w to f/w.  If f/w is AMT enabled, this
2291	 * would have already happened in close and is redundant. */
2292	igb_release_hw_control(adapter);
2293
2294	unregister_netdev(netdev);
2295
2296	igb_clear_interrupt_scheme(adapter);
2297
2298#ifdef CONFIG_PCI_IOV
2299	/* reclaim resources allocated to VFs */
2300	if (adapter->vf_data) {
2301		/* disable iov and allow time for transactions to clear */
2302		if (igb_vfs_are_assigned(adapter)) {
2303			dev_info(&pdev->dev, "Unloading driver while VFs are assigned - VFs will not be deallocated\n");
2304		} else {
2305			pci_disable_sriov(pdev);
2306			msleep(500);
2307		}
2308
2309		kfree(adapter->vf_data);
2310		adapter->vf_data = NULL;
2311		wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
2312		wrfl();
2313		msleep(100);
2314		dev_info(&pdev->dev, "IOV Disabled\n");
2315	}
2316#endif
2317
2318	iounmap(hw->hw_addr);
2319	if (hw->flash_address)
2320		iounmap(hw->flash_address);
2321	pci_release_selected_regions(pdev,
2322	                             pci_select_bars(pdev, IORESOURCE_MEM));
2323
2324	kfree(adapter->shadow_vfta);
2325	free_netdev(netdev);
2326
2327	pci_disable_pcie_error_reporting(pdev);
2328
2329	pci_disable_device(pdev);
2330}
2331
2332/**
2333 * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
2334 * @adapter: board private structure to initialize
2335 *
2336 * This function initializes the vf specific data storage and then attempts to
2337 * allocate the VFs.  The reason for ordering it this way is because it is much
2338 * mor expensive time wise to disable SR-IOV than it is to allocate and free
2339 * the memory for the VFs.
2340 **/
2341static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
2342{
2343#ifdef CONFIG_PCI_IOV
2344	struct pci_dev *pdev = adapter->pdev;
2345	struct e1000_hw *hw = &adapter->hw;
2346	int old_vfs = pci_num_vf(adapter->pdev);
2347	int i;
2348
2349	/* Virtualization features not supported on i210 family. */
2350	if ((hw->mac.type == e1000_i210) || (hw->mac.type == e1000_i211))
2351		return;
2352
2353	if (old_vfs) {
2354		dev_info(&pdev->dev, "%d pre-allocated VFs found - override "
2355			 "max_vfs setting of %d\n", old_vfs, max_vfs);
2356		adapter->vfs_allocated_count = old_vfs;
2357	}
2358
2359	if (!adapter->vfs_allocated_count)
2360		return;
2361
2362	adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
2363				sizeof(struct vf_data_storage), GFP_KERNEL);
2364
2365	/* if allocation failed then we do not support SR-IOV */
2366	if (!adapter->vf_data) {
2367		adapter->vfs_allocated_count = 0;
2368		dev_err(&pdev->dev, "Unable to allocate memory for VF "
2369			"Data Storage\n");
2370		goto out;
2371	}
2372
2373	if (!old_vfs) {
2374		if (pci_enable_sriov(pdev, adapter->vfs_allocated_count))
2375			goto err_out;
2376	}
2377	dev_info(&pdev->dev, "%d VFs allocated\n",
2378		 adapter->vfs_allocated_count);
2379	for (i = 0; i < adapter->vfs_allocated_count; i++)
2380		igb_vf_configure(adapter, i);
2381
2382	/* DMA Coalescing is not supported in IOV mode. */
2383	adapter->flags &= ~IGB_FLAG_DMAC;
2384	goto out;
2385err_out:
2386	kfree(adapter->vf_data);
2387	adapter->vf_data = NULL;
2388	adapter->vfs_allocated_count = 0;
2389out:
2390	return;
2391#endif /* CONFIG_PCI_IOV */
2392}
2393
2394/**
2395 * igb_sw_init - Initialize general software structures (struct igb_adapter)
2396 * @adapter: board private structure to initialize
2397 *
2398 * igb_sw_init initializes the Adapter private data structure.
2399 * Fields are initialized based on PCI device information and
2400 * OS network device settings (MTU size).
2401 **/
2402static int __devinit igb_sw_init(struct igb_adapter *adapter)
2403{
2404	struct e1000_hw *hw = &adapter->hw;
2405	struct net_device *netdev = adapter->netdev;
2406	struct pci_dev *pdev = adapter->pdev;
2407	u32 max_rss_queues;
2408
2409	pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
2410
2411	/* set default ring sizes */
2412	adapter->tx_ring_count = IGB_DEFAULT_TXD;
2413	adapter->rx_ring_count = IGB_DEFAULT_RXD;
2414
2415	/* set default ITR values */
2416	adapter->rx_itr_setting = IGB_DEFAULT_ITR;
2417	adapter->tx_itr_setting = IGB_DEFAULT_ITR;
2418
2419	/* set default work limits */
2420	adapter->tx_work_limit = IGB_DEFAULT_TX_WORK;
2421
2422	adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN +
2423				  VLAN_HLEN;
2424	adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
2425
2426	adapter->node = -1;
2427
2428	spin_lock_init(&adapter->stats64_lock);
2429#ifdef CONFIG_PCI_IOV
2430	switch (hw->mac.type) {
2431	case e1000_82576:
2432	case e1000_i350:
2433		if (max_vfs > 7) {
2434			dev_warn(&pdev->dev,
2435				 "Maximum of 7 VFs per PF, using max\n");
2436			adapter->vfs_allocated_count = 7;
2437		} else
2438			adapter->vfs_allocated_count = max_vfs;
2439		break;
2440	default:
2441		break;
2442	}
2443#endif /* CONFIG_PCI_IOV */
2444
2445	/* Determine the maximum number of RSS queues supported. */
2446	switch (hw->mac.type) {
2447	case e1000_i211:
2448		max_rss_queues = IGB_MAX_RX_QUEUES_I211;
2449		break;
2450	case e1000_82575:
2451	case e1000_i210:
2452		max_rss_queues = IGB_MAX_RX_QUEUES_82575;
2453		break;
2454	case e1000_i350:
2455		/* I350 cannot do RSS and SR-IOV at the same time */
2456		if (!!adapter->vfs_allocated_count) {
2457			max_rss_queues = 1;
2458			break;
2459		}
2460		/* fall through */
2461	case e1000_82576:
2462		if (!!adapter->vfs_allocated_count) {
2463			max_rss_queues = 2;
2464			break;
2465		}
2466		/* fall through */
2467	case e1000_82580:
2468	default:
2469		max_rss_queues = IGB_MAX_RX_QUEUES;
2470		break;
2471	}
2472
2473	adapter->rss_queues = min_t(u32, max_rss_queues, num_online_cpus());
2474
2475	/* Determine if we need to pair queues. */
2476	switch (hw->mac.type) {
2477	case e1000_82575:
2478	case e1000_i211:
2479		/* Device supports enough interrupts without queue pairing. */
2480		break;
2481	case e1000_82576:
2482		/*
2483		 * If VFs are going to be allocated with RSS queues then we
2484		 * should pair the queues in order to conserve interrupts due
2485		 * to limited supply.
2486		 */
2487		if ((adapter->rss_queues > 1) &&
2488		    (adapter->vfs_allocated_count > 6))
2489			adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
2490		/* fall through */
2491	case e1000_82580:
2492	case e1000_i350:
2493	case e1000_i210:
2494	default:
2495		/*
2496		 * If rss_queues > half of max_rss_queues, pair the queues in
2497		 * order to conserve interrupts due to limited supply.
2498		 */
2499		if (adapter->rss_queues > (max_rss_queues / 2))
2500			adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
2501		break;
2502	}
2503
2504	/* Setup and initialize a copy of the hw vlan table array */
2505	adapter->shadow_vfta = kzalloc(sizeof(u32) *
2506				E1000_VLAN_FILTER_TBL_SIZE,
2507				GFP_ATOMIC);
2508
2509	/* This call may decrease the number of queues */
2510	if (igb_init_interrupt_scheme(adapter)) {
2511		dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
2512		return -ENOMEM;
2513	}
2514
2515	igb_probe_vfs(adapter);
2516
2517	/* Explicitly disable IRQ since the NIC can be in any state. */
2518	igb_irq_disable(adapter);
2519
2520	if (hw->mac.type >= e1000_i350)
2521		adapter->flags &= ~IGB_FLAG_DMAC;
2522
2523	set_bit(__IGB_DOWN, &adapter->state);
2524	return 0;
2525}
2526
2527/**
2528 * igb_open - Called when a network interface is made active
2529 * @netdev: network interface device structure
2530 *
2531 * Returns 0 on success, negative value on failure
2532 *
2533 * The open entry point is called when a network interface is made
2534 * active by the system (IFF_UP).  At this point all resources needed
2535 * for transmit and receive operations are allocated, the interrupt
2536 * handler is registered with the OS, the watchdog timer is started,
2537 * and the stack is notified that the interface is ready.
2538 **/
2539static int __igb_open(struct net_device *netdev, bool resuming)
2540{
2541	struct igb_adapter *adapter = netdev_priv(netdev);
2542	struct e1000_hw *hw = &adapter->hw;
2543	struct pci_dev *pdev = adapter->pdev;
2544	int err;
2545	int i;
2546
2547	/* disallow open during test */
2548	if (test_bit(__IGB_TESTING, &adapter->state)) {
2549		WARN_ON(resuming);
2550		return -EBUSY;
2551	}
2552
2553	if (!resuming)
2554		pm_runtime_get_sync(&pdev->dev);
2555
2556	netif_carrier_off(netdev);
2557
2558	/* allocate transmit descriptors */
2559	err = igb_setup_all_tx_resources(adapter);
2560	if (err)
2561		goto err_setup_tx;
2562
2563	/* allocate receive descriptors */
2564	err = igb_setup_all_rx_resources(adapter);
2565	if (err)
2566		goto err_setup_rx;
2567
2568	igb_power_up_link(adapter);
2569
2570	/* before we allocate an interrupt, we must be ready to handle it.
2571	 * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2572	 * as soon as we call pci_request_irq, so we have to setup our
2573	 * clean_rx handler before we do so.  */
2574	igb_configure(adapter);
2575
2576	err = igb_request_irq(adapter);
2577	if (err)
2578		goto err_req_irq;
2579
2580	/* From here on the code is the same as igb_up() */
2581	clear_bit(__IGB_DOWN, &adapter->state);
2582
2583	for (i = 0; i < adapter->num_q_vectors; i++)
2584		napi_enable(&(adapter->q_vector[i]->napi));
2585
2586	/* Clear any pending interrupts. */
2587	rd32(E1000_ICR);
2588
2589	igb_irq_enable(adapter);
2590
2591	/* notify VFs that reset has been completed */
2592	if (adapter->vfs_allocated_count) {
2593		u32 reg_data = rd32(E1000_CTRL_EXT);
2594		reg_data |= E1000_CTRL_EXT_PFRSTD;
2595		wr32(E1000_CTRL_EXT, reg_data);
2596	}
2597
2598	netif_tx_start_all_queues(netdev);
2599
2600	if (!resuming)
2601		pm_runtime_put(&pdev->dev);
2602
2603	/* start the watchdog. */
2604	hw->mac.get_link_status = 1;
2605	schedule_work(&adapter->watchdog_task);
2606
2607	return 0;
2608
2609err_req_irq:
2610	igb_release_hw_control(adapter);
2611	igb_power_down_link(adapter);
2612	igb_free_all_rx_resources(adapter);
2613err_setup_rx:
2614	igb_free_all_tx_resources(adapter);
2615err_setup_tx:
2616	igb_reset(adapter);
2617	if (!resuming)
2618		pm_runtime_put(&pdev->dev);
2619
2620	return err;
2621}
2622
2623static int igb_open(struct net_device *netdev)
2624{
2625	return __igb_open(netdev, false);
2626}
2627
2628/**
2629 * igb_close - Disables a network interface
2630 * @netdev: network interface device structure
2631 *
2632 * Returns 0, this is not allowed to fail
2633 *
2634 * The close entry point is called when an interface is de-activated
2635 * by the OS.  The hardware is still under the driver's control, but
2636 * needs to be disabled.  A global MAC reset is issued to stop the
2637 * hardware, and all transmit and receive resources are freed.
2638 **/
2639static int __igb_close(struct net_device *netdev, bool suspending)
2640{
2641	struct igb_adapter *adapter = netdev_priv(netdev);
2642	struct pci_dev *pdev = adapter->pdev;
2643
2644	WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
2645
2646	if (!suspending)
2647		pm_runtime_get_sync(&pdev->dev);
2648
2649	igb_down(adapter);
2650	igb_free_irq(adapter);
2651
2652	igb_free_all_tx_resources(adapter);
2653	igb_free_all_rx_resources(adapter);
2654
2655	if (!suspending)
2656		pm_runtime_put_sync(&pdev->dev);
2657	return 0;
2658}
2659
2660static int igb_close(struct net_device *netdev)
2661{
2662	return __igb_close(netdev, false);
2663}
2664
2665/**
2666 * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2667 * @tx_ring: tx descriptor ring (for a specific queue) to setup
2668 *
2669 * Return 0 on success, negative on failure
2670 **/
2671int igb_setup_tx_resources(struct igb_ring *tx_ring)
2672{
2673	struct device *dev = tx_ring->dev;
2674	int orig_node = dev_to_node(dev);
2675	int size;
2676
2677	size = sizeof(struct igb_tx_buffer) * tx_ring->count;
2678	tx_ring->tx_buffer_info = vzalloc_node(size, tx_ring->numa_node);
2679	if (!tx_ring->tx_buffer_info)
2680		tx_ring->tx_buffer_info = vzalloc(size);
2681	if (!tx_ring->tx_buffer_info)
2682		goto err;
2683
2684	/* round up to nearest 4K */
2685	tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2686	tx_ring->size = ALIGN(tx_ring->size, 4096);
2687
2688	set_dev_node(dev, tx_ring->numa_node);
2689	tx_ring->desc = dma_alloc_coherent(dev,
2690					   tx_ring->size,
2691					   &tx_ring->dma,
2692					   GFP_KERNEL);
2693	set_dev_node(dev, orig_node);
2694	if (!tx_ring->desc)
2695		tx_ring->desc = dma_alloc_coherent(dev,
2696						   tx_ring->size,
2697						   &tx_ring->dma,
2698						   GFP_KERNEL);
2699
2700	if (!tx_ring->desc)
2701		goto err;
2702
2703	tx_ring->next_to_use = 0;
2704	tx_ring->next_to_clean = 0;
2705
2706	return 0;
2707
2708err:
2709	vfree(tx_ring->tx_buffer_info);
2710	dev_err(dev,
2711		"Unable to allocate memory for the transmit descriptor ring\n");
2712	return -ENOMEM;
2713}
2714
2715/**
2716 * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2717 *				  (Descriptors) for all queues
2718 * @adapter: board private structure
2719 *
2720 * Return 0 on success, negative on failure
2721 **/
2722static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2723{
2724	struct pci_dev *pdev = adapter->pdev;
2725	int i, err = 0;
2726
2727	for (i = 0; i < adapter->num_tx_queues; i++) {
2728		err = igb_setup_tx_resources(adapter->tx_ring[i]);
2729		if (err) {
2730			dev_err(&pdev->dev,
2731				"Allocation for Tx Queue %u failed\n", i);
2732			for (i--; i >= 0; i--)
2733				igb_free_tx_resources(adapter->tx_ring[i]);
2734			break;
2735		}
2736	}
2737
2738	return err;
2739}
2740
2741/**
2742 * igb_setup_tctl - configure the transmit control registers
2743 * @adapter: Board private structure
2744 **/
2745void igb_setup_tctl(struct igb_adapter *adapter)
2746{
2747	struct e1000_hw *hw = &adapter->hw;
2748	u32 tctl;
2749
2750	/* disable queue 0 which is enabled by default on 82575 and 82576 */
2751	wr32(E1000_TXDCTL(0), 0);
2752
2753	/* Program the Transmit Control Register */
2754	tctl = rd32(E1000_TCTL);
2755	tctl &= ~E1000_TCTL_CT;
2756	tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2757		(E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2758
2759	igb_config_collision_dist(hw);
2760
2761	/* Enable transmits */
2762	tctl |= E1000_TCTL_EN;
2763
2764	wr32(E1000_TCTL, tctl);
2765}
2766
2767/**
2768 * igb_configure_tx_ring - Configure transmit ring after Reset
2769 * @adapter: board private structure
2770 * @ring: tx ring to configure
2771 *
2772 * Configure a transmit ring after a reset.
2773 **/
2774void igb_configure_tx_ring(struct igb_adapter *adapter,
2775                           struct igb_ring *ring)
2776{
2777	struct e1000_hw *hw = &adapter->hw;
2778	u32 txdctl = 0;
2779	u64 tdba = ring->dma;
2780	int reg_idx = ring->reg_idx;
2781
2782	/* disable the queue */
2783	wr32(E1000_TXDCTL(reg_idx), 0);
2784	wrfl();
2785	mdelay(10);
2786
2787	wr32(E1000_TDLEN(reg_idx),
2788	                ring->count * sizeof(union e1000_adv_tx_desc));
2789	wr32(E1000_TDBAL(reg_idx),
2790	                tdba & 0x00000000ffffffffULL);
2791	wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2792
2793	ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2794	wr32(E1000_TDH(reg_idx), 0);
2795	writel(0, ring->tail);
2796
2797	txdctl |= IGB_TX_PTHRESH;
2798	txdctl |= IGB_TX_HTHRESH << 8;
2799	txdctl |= IGB_TX_WTHRESH << 16;
2800
2801	txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2802	wr32(E1000_TXDCTL(reg_idx), txdctl);
2803}
2804
2805/**
2806 * igb_configure_tx - Configure transmit Unit after Reset
2807 * @adapter: board private structure
2808 *
2809 * Configure the Tx unit of the MAC after a reset.
2810 **/
2811static void igb_configure_tx(struct igb_adapter *adapter)
2812{
2813	int i;
2814
2815	for (i = 0; i < adapter->num_tx_queues; i++)
2816		igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
2817}
2818
2819/**
2820 * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2821 * @rx_ring:    rx descriptor ring (for a specific queue) to setup
2822 *
2823 * Returns 0 on success, negative on failure
2824 **/
2825int igb_setup_rx_resources(struct igb_ring *rx_ring)
2826{
2827	struct device *dev = rx_ring->dev;
2828	int orig_node = dev_to_node(dev);
2829	int size, desc_len;
2830
2831	size = sizeof(struct igb_rx_buffer) * rx_ring->count;
2832	rx_ring->rx_buffer_info = vzalloc_node(size, rx_ring->numa_node);
2833	if (!rx_ring->rx_buffer_info)
2834		rx_ring->rx_buffer_info = vzalloc(size);
2835	if (!rx_ring->rx_buffer_info)
2836		goto err;
2837
2838	desc_len = sizeof(union e1000_adv_rx_desc);
2839
2840	/* Round up to nearest 4K */
2841	rx_ring->size = rx_ring->count * desc_len;
2842	rx_ring->size = ALIGN(rx_ring->size, 4096);
2843
2844	set_dev_node(dev, rx_ring->numa_node);
2845	rx_ring->desc = dma_alloc_coherent(dev,
2846					   rx_ring->size,
2847					   &rx_ring->dma,
2848					   GFP_KERNEL);
2849	set_dev_node(dev, orig_node);
2850	if (!rx_ring->desc)
2851		rx_ring->desc = dma_alloc_coherent(dev,
2852						   rx_ring->size,
2853						   &rx_ring->dma,
2854						   GFP_KERNEL);
2855
2856	if (!rx_ring->desc)
2857		goto err;
2858
2859	rx_ring->next_to_clean = 0;
2860	rx_ring->next_to_use = 0;
2861
2862	return 0;
2863
2864err:
2865	vfree(rx_ring->rx_buffer_info);
2866	rx_ring->rx_buffer_info = NULL;
2867	dev_err(dev, "Unable to allocate memory for the receive descriptor"
2868		" ring\n");
2869	return -ENOMEM;
2870}
2871
2872/**
2873 * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2874 *				  (Descriptors) for all queues
2875 * @adapter: board private structure
2876 *
2877 * Return 0 on success, negative on failure
2878 **/
2879static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2880{
2881	struct pci_dev *pdev = adapter->pdev;
2882	int i, err = 0;
2883
2884	for (i = 0; i < adapter->num_rx_queues; i++) {
2885		err = igb_setup_rx_resources(adapter->rx_ring[i]);
2886		if (err) {
2887			dev_err(&pdev->dev,
2888				"Allocation for Rx Queue %u failed\n", i);
2889			for (i--; i >= 0; i--)
2890				igb_free_rx_resources(adapter->rx_ring[i]);
2891			break;
2892		}
2893	}
2894
2895	return err;
2896}
2897
2898/**
2899 * igb_setup_mrqc - configure the multiple receive queue control registers
2900 * @adapter: Board private structure
2901 **/
2902static void igb_setup_mrqc(struct igb_adapter *adapter)
2903{
2904	struct e1000_hw *hw = &adapter->hw;
2905	u32 mrqc, rxcsum;
2906	u32 j, num_rx_queues, shift = 0, shift2 = 0;
2907	union e1000_reta {
2908		u32 dword;
2909		u8  bytes[4];
2910	} reta;
2911	static const u8 rsshash[40] = {
2912		0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2913		0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2914		0xae, 0x7b, 0x30, 0xb4,	0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2915		0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2916
2917	/* Fill out hash function seeds */
2918	for (j = 0; j < 10; j++) {
2919		u32 rsskey = rsshash[(j * 4)];
2920		rsskey |= rsshash[(j * 4) + 1] << 8;
2921		rsskey |= rsshash[(j * 4) + 2] << 16;
2922		rsskey |= rsshash[(j * 4) + 3] << 24;
2923		array_wr32(E1000_RSSRK(0), j, rsskey);
2924	}
2925
2926	num_rx_queues = adapter->rss_queues;
2927
2928	if (adapter->vfs_allocated_count) {
2929		/* 82575 and 82576 supports 2 RSS queues for VMDq */
2930		switch (hw->mac.type) {
2931		case e1000_i350:
2932		case e1000_82580:
2933			num_rx_queues = 1;
2934			shift = 0;
2935			break;
2936		case e1000_82576:
2937			shift = 3;
2938			num_rx_queues = 2;
2939			break;
2940		case e1000_82575:
2941			shift = 2;
2942			shift2 = 6;
2943		default:
2944			break;
2945		}
2946	} else {
2947		if (hw->mac.type == e1000_82575)
2948			shift = 6;
2949	}
2950
2951	for (j = 0; j < (32 * 4); j++) {
2952		reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2953		if (shift2)
2954			reta.bytes[j & 3] |= num_rx_queues << shift2;
2955		if ((j & 3) == 3)
2956			wr32(E1000_RETA(j >> 2), reta.dword);
2957	}
2958
2959	/*
2960	 * Disable raw packet checksumming so that RSS hash is placed in
2961	 * descriptor on writeback.  No need to enable TCP/UDP/IP checksum
2962	 * offloads as they are enabled by default
2963	 */
2964	rxcsum = rd32(E1000_RXCSUM);
2965	rxcsum |= E1000_RXCSUM_PCSD;
2966
2967	if (adapter->hw.mac.type >= e1000_82576)
2968		/* Enable Receive Checksum Offload for SCTP */
2969		rxcsum |= E1000_RXCSUM_CRCOFL;
2970
2971	/* Don't need to set TUOFL or IPOFL, they default to 1 */
2972	wr32(E1000_RXCSUM, rxcsum);
2973	/*
2974	 * Generate RSS hash based on TCP port numbers and/or
2975	 * IPv4/v6 src and dst addresses since UDP cannot be
2976	 * hashed reliably due to IP fragmentation
2977	 */
2978
2979	mrqc = E1000_MRQC_RSS_FIELD_IPV4 |
2980	       E1000_MRQC_RSS_FIELD_IPV4_TCP |
2981	       E1000_MRQC_RSS_FIELD_IPV6 |
2982	       E1000_MRQC_RSS_FIELD_IPV6_TCP |
2983	       E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
2984
2985	/* If VMDq is enabled then we set the appropriate mode for that, else
2986	 * we default to RSS so that an RSS hash is calculated per packet even
2987	 * if we are only using one queue */
2988	if (adapter->vfs_allocated_count) {
2989		if (hw->mac.type > e1000_82575) {
2990			/* Set the default pool for the PF's first queue */
2991			u32 vtctl = rd32(E1000_VT_CTL);
2992			vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2993				   E1000_VT_CTL_DISABLE_DEF_POOL);
2994			vtctl |= adapter->vfs_allocated_count <<
2995				E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2996			wr32(E1000_VT_CTL, vtctl);
2997		}
2998		if (adapter->rss_queues > 1)
2999			mrqc |= E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
3000		else
3001			mrqc |= E1000_MRQC_ENABLE_VMDQ;
3002	} else {
3003		if (hw->mac.type != e1000_i211)
3004			mrqc |= E1000_MRQC_ENABLE_RSS_4Q;
3005	}
3006	igb_vmm_control(adapter);
3007
3008	wr32(E1000_MRQC, mrqc);
3009}
3010
3011/**
3012 * igb_setup_rctl - configure the receive control registers
3013 * @adapter: Board private structure
3014 **/
3015void igb_setup_rctl(struct igb_adapter *adapter)
3016{
3017	struct e1000_hw *hw = &adapter->hw;
3018	u32 rctl;
3019
3020	rctl = rd32(E1000_RCTL);
3021
3022	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
3023	rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
3024
3025	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
3026		(hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
3027
3028	/*
3029	 * enable stripping of CRC. It's unlikely this will break BMC
3030	 * redirection as it did with e1000. Newer features require
3031	 * that the HW strips the CRC.
3032	 */
3033	rctl |= E1000_RCTL_SECRC;
3034
3035	/* disable store bad packets and clear size bits. */
3036	rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
3037
3038	/* enable LPE to prevent packets larger than max_frame_size */
3039	rctl |= E1000_RCTL_LPE;
3040
3041	/* disable queue 0 to prevent tail write w/o re-config */
3042	wr32(E1000_RXDCTL(0), 0);
3043
3044	/* Attention!!!  For SR-IOV PF driver operations you must enable
3045	 * queue drop for all VF and PF queues to prevent head of line blocking
3046	 * if an un-trusted VF does not provide descriptors to hardware.
3047	 */
3048	if (adapter->vfs_allocated_count) {
3049		/* set all queue drop enable bits */
3050		wr32(E1000_QDE, ALL_QUEUES);
3051	}
3052
3053	/* This is useful for sniffing bad packets. */
3054	if (adapter->netdev->features & NETIF_F_RXALL) {
3055		/* UPE and MPE will be handled by normal PROMISC logic
3056		 * in e1000e_set_rx_mode */
3057		rctl |= (E1000_RCTL_SBP | /* Receive bad packets */
3058			 E1000_RCTL_BAM | /* RX All Bcast Pkts */
3059			 E1000_RCTL_PMCF); /* RX All MAC Ctrl Pkts */
3060
3061		rctl &= ~(E1000_RCTL_VFE | /* Disable VLAN filter */
3062			  E1000_RCTL_DPF | /* Allow filtered pause */
3063			  E1000_RCTL_CFIEN); /* Dis VLAN CFIEN Filter */
3064		/* Do not mess with E1000_CTRL_VME, it affects transmit as well,
3065		 * and that breaks VLANs.
3066		 */
3067	}
3068
3069	wr32(E1000_RCTL, rctl);
3070}
3071
3072static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
3073                                   int vfn)
3074{
3075	struct e1000_hw *hw = &adapter->hw;
3076	u32 vmolr;
3077
3078	/* if it isn't the PF check to see if VFs are enabled and
3079	 * increase the size to support vlan tags */
3080	if (vfn < adapter->vfs_allocated_count &&
3081	    adapter->vf_data[vfn].vlans_enabled)
3082		size += VLAN_TAG_SIZE;
3083
3084	vmolr = rd32(E1000_VMOLR(vfn));
3085	vmolr &= ~E1000_VMOLR_RLPML_MASK;
3086	vmolr |= size | E1000_VMOLR_LPE;
3087	wr32(E1000_VMOLR(vfn), vmolr);
3088
3089	return 0;
3090}
3091
3092/**
3093 * igb_rlpml_set - set maximum receive packet size
3094 * @adapter: board private structure
3095 *
3096 * Configure maximum receivable packet size.
3097 **/
3098static void igb_rlpml_set(struct igb_adapter *adapter)
3099{
3100	u32 max_frame_size = adapter->max_frame_size;
3101	struct e1000_hw *hw = &adapter->hw;
3102	u16 pf_id = adapter->vfs_allocated_count;
3103
3104	if (pf_id) {
3105		igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
3106		/*
3107		 * If we're in VMDQ or SR-IOV mode, then set global RLPML
3108		 * to our max jumbo frame size, in case we need to enable
3109		 * jumbo frames on one of the rings later.
3110		 * This will not pass over-length frames into the default
3111		 * queue because it's gated by the VMOLR.RLPML.
3112		 */
3113		max_frame_size = MAX_JUMBO_FRAME_SIZE;
3114	}
3115
3116	wr32(E1000_RLPML, max_frame_size);
3117}
3118
3119static inline void igb_set_vmolr(struct igb_adapter *adapter,
3120				 int vfn, bool aupe)
3121{
3122	struct e1000_hw *hw = &adapter->hw;
3123	u32 vmolr;
3124
3125	/*
3126	 * This register exists only on 82576 and newer so if we are older then
3127	 * we should exit and do nothing
3128	 */
3129	if (hw->mac.type < e1000_82576)
3130		return;
3131
3132	vmolr = rd32(E1000_VMOLR(vfn));
3133	vmolr |= E1000_VMOLR_STRVLAN;      /* Strip vlan tags */
3134	if (aupe)
3135		vmolr |= E1000_VMOLR_AUPE;        /* Accept untagged packets */
3136	else
3137		vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
3138
3139	/* clear all bits that might not be set */
3140	vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
3141
3142	if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
3143		vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
3144	/*
3145	 * for VMDq only allow the VFs and pool 0 to accept broadcast and
3146	 * multicast packets
3147	 */
3148	if (vfn <= adapter->vfs_allocated_count)
3149		vmolr |= E1000_VMOLR_BAM;	   /* Accept broadcast */
3150
3151	wr32(E1000_VMOLR(vfn), vmolr);
3152}
3153
3154/**
3155 * igb_configure_rx_ring - Configure a receive ring after Reset
3156 * @adapter: board private structure
3157 * @ring: receive ring to be configured
3158 *
3159 * Configure the Rx unit of the MAC after a reset.
3160 **/
3161void igb_configure_rx_ring(struct igb_adapter *adapter,
3162                           struct igb_ring *ring)
3163{
3164	struct e1000_hw *hw = &adapter->hw;
3165	u64 rdba = ring->dma;
3166	int reg_idx = ring->reg_idx;
3167	u32 srrctl = 0, rxdctl = 0;
3168
3169	/* disable the queue */
3170	wr32(E1000_RXDCTL(reg_idx), 0);
3171
3172	/* Set DMA base address registers */
3173	wr32(E1000_RDBAL(reg_idx),
3174	     rdba & 0x00000000ffffffffULL);
3175	wr32(E1000_RDBAH(reg_idx), rdba >> 32);
3176	wr32(E1000_RDLEN(reg_idx),
3177	               ring->count * sizeof(union e1000_adv_rx_desc));
3178
3179	/* initialize head and tail */
3180	ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
3181	wr32(E1000_RDH(reg_idx), 0);
3182	writel(0, ring->tail);
3183
3184	/* set descriptor configuration */
3185	srrctl = IGB_RX_HDR_LEN << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
3186#if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
3187	srrctl |= IGB_RXBUFFER_16384 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3188#else
3189	srrctl |= (PAGE_SIZE / 2) >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3190#endif
3191	srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3192#ifdef CONFIG_IGB_PTP
3193	if (hw->mac.type >= e1000_82580)
3194		srrctl |= E1000_SRRCTL_TIMESTAMP;
3195#endif /* CONFIG_IGB_PTP */
3196	/* Only set Drop Enable if we are supporting multiple queues */
3197	if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
3198		srrctl |= E1000_SRRCTL_DROP_EN;
3199
3200	wr32(E1000_SRRCTL(reg_idx), srrctl);
3201
3202	/* set filtering for VMDQ pools */
3203	igb_set_vmolr(adapter, reg_idx & 0x7, true);
3204
3205	rxdctl |= IGB_RX_PTHRESH;
3206	rxdctl |= IGB_RX_HTHRESH << 8;
3207	rxdctl |= IGB_RX_WTHRESH << 16;
3208
3209	/* enable receive descriptor fetching */
3210	rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
3211	wr32(E1000_RXDCTL(reg_idx), rxdctl);
3212}
3213
3214/**
3215 * igb_configure_rx - Configure receive Unit after Reset
3216 * @adapter: board private structure
3217 *
3218 * Configure the Rx unit of the MAC after a reset.
3219 **/
3220static void igb_configure_rx(struct igb_adapter *adapter)
3221{
3222	int i;
3223
3224	/* set UTA to appropriate mode */
3225	igb_set_uta(adapter);
3226
3227	/* set the correct pool for the PF default MAC address in entry 0 */
3228	igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
3229	                 adapter->vfs_allocated_count);
3230
3231	/* Setup the HW Rx Head and Tail Descriptor Pointers and
3232	 * the Base and Length of the Rx Descriptor Ring */
3233	for (i = 0; i < adapter->num_rx_queues; i++)
3234		igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
3235}
3236
3237/**
3238 * igb_free_tx_resources - Free Tx Resources per Queue
3239 * @tx_ring: Tx descriptor ring for a specific queue
3240 *
3241 * Free all transmit software resources
3242 **/
3243void igb_free_tx_resources(struct igb_ring *tx_ring)
3244{
3245	igb_clean_tx_ring(tx_ring);
3246
3247	vfree(tx_ring->tx_buffer_info);
3248	tx_ring->tx_buffer_info = NULL;
3249
3250	/* if not set, then don't free */
3251	if (!tx_ring->desc)
3252		return;
3253
3254	dma_free_coherent(tx_ring->dev, tx_ring->size,
3255			  tx_ring->desc, tx_ring->dma);
3256
3257	tx_ring->desc = NULL;
3258}
3259
3260/**
3261 * igb_free_all_tx_resources - Free Tx Resources for All Queues
3262 * @adapter: board private structure
3263 *
3264 * Free all transmit software resources
3265 **/
3266static void igb_free_all_tx_resources(struct igb_adapter *adapter)
3267{
3268	int i;
3269
3270	for (i = 0; i < adapter->num_tx_queues; i++)
3271		igb_free_tx_resources(adapter->tx_ring[i]);
3272}
3273
3274void igb_unmap_and_free_tx_resource(struct igb_ring *ring,
3275				    struct igb_tx_buffer *tx_buffer)
3276{
3277	if (tx_buffer->skb) {
3278		dev_kfree_skb_any(tx_buffer->skb);
3279		if (tx_buffer->dma)
3280			dma_unmap_single(ring->dev,
3281					 tx_buffer->dma,
3282					 tx_buffer->length,
3283					 DMA_TO_DEVICE);
3284	} else if (tx_buffer->dma) {
3285		dma_unmap_page(ring->dev,
3286			       tx_buffer->dma,
3287			       tx_buffer->length,
3288			       DMA_TO_DEVICE);
3289	}
3290	tx_buffer->next_to_watch = NULL;
3291	tx_buffer->skb = NULL;
3292	tx_buffer->dma = 0;
3293	/* buffer_info must be completely set up in the transmit path */
3294}
3295
3296/**
3297 * igb_clean_tx_ring - Free Tx Buffers
3298 * @tx_ring: ring to be cleaned
3299 **/
3300static void igb_clean_tx_ring(struct igb_ring *tx_ring)
3301{
3302	struct igb_tx_buffer *buffer_info;
3303	unsigned long size;
3304	u16 i;
3305
3306	if (!tx_ring->tx_buffer_info)
3307		return;
3308	/* Free all the Tx ring sk_buffs */
3309
3310	for (i = 0; i < tx_ring->count; i++) {
3311		buffer_info = &tx_ring->tx_buffer_info[i];
3312		igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3313	}
3314
3315	netdev_tx_reset_queue(txring_txq(tx_ring));
3316
3317	size = sizeof(struct igb_tx_buffer) * tx_ring->count;
3318	memset(tx_ring->tx_buffer_info, 0, size);
3319
3320	/* Zero out the descriptor ring */
3321	memset(tx_ring->desc, 0, tx_ring->size);
3322
3323	tx_ring->next_to_use = 0;
3324	tx_ring->next_to_clean = 0;
3325}
3326
3327/**
3328 * igb_clean_all_tx_rings - Free Tx Buffers for all queues
3329 * @adapter: board private structure
3330 **/
3331static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
3332{
3333	int i;
3334
3335	for (i = 0; i < adapter->num_tx_queues; i++)
3336		igb_clean_tx_ring(adapter->tx_ring[i]);
3337}
3338
3339/**
3340 * igb_free_rx_resources - Free Rx Resources
3341 * @rx_ring: ring to clean the resources from
3342 *
3343 * Free all receive software resources
3344 **/
3345void igb_free_rx_resources(struct igb_ring *rx_ring)
3346{
3347	igb_clean_rx_ring(rx_ring);
3348
3349	vfree(rx_ring->rx_buffer_info);
3350	rx_ring->rx_buffer_info = NULL;
3351
3352	/* if not set, then don't free */
3353	if (!rx_ring->desc)
3354		return;
3355
3356	dma_free_coherent(rx_ring->dev, rx_ring->size,
3357			  rx_ring->desc, rx_ring->dma);
3358
3359	rx_ring->desc = NULL;
3360}
3361
3362/**
3363 * igb_free_all_rx_resources - Free Rx Resources for All Queues
3364 * @adapter: board private structure
3365 *
3366 * Free all receive software resources
3367 **/
3368static void igb_free_all_rx_resources(struct igb_adapter *adapter)
3369{
3370	int i;
3371
3372	for (i = 0; i < adapter->num_rx_queues; i++)
3373		igb_free_rx_resources(adapter->rx_ring[i]);
3374}
3375
3376/**
3377 * igb_clean_rx_ring - Free Rx Buffers per Queue
3378 * @rx_ring: ring to free buffers from
3379 **/
3380static void igb_clean_rx_ring(struct igb_ring *rx_ring)
3381{
3382	unsigned long size;
3383	u16 i;
3384
3385	if (!rx_ring->rx_buffer_info)
3386		return;
3387
3388	/* Free all the Rx ring sk_buffs */
3389	for (i = 0; i < rx_ring->count; i++) {
3390		struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
3391		if (buffer_info->dma) {
3392			dma_unmap_single(rx_ring->dev,
3393			                 buffer_info->dma,
3394					 IGB_RX_HDR_LEN,
3395					 DMA_FROM_DEVICE);
3396			buffer_info->dma = 0;
3397		}
3398
3399		if (buffer_info->skb) {
3400			dev_kfree_skb(buffer_info->skb);
3401			buffer_info->skb = NULL;
3402		}
3403		if (buffer_info->page_dma) {
3404			dma_unmap_page(rx_ring->dev,
3405			               buffer_info->page_dma,
3406				       PAGE_SIZE / 2,
3407				       DMA_FROM_DEVICE);
3408			buffer_info->page_dma = 0;
3409		}
3410		if (buffer_info->page) {
3411			put_page(buffer_info->page);
3412			buffer_info->page = NULL;
3413			buffer_info->page_offset = 0;
3414		}
3415	}
3416
3417	size = sizeof(struct igb_rx_buffer) * rx_ring->count;
3418	memset(rx_ring->rx_buffer_info, 0, size);
3419
3420	/* Zero out the descriptor ring */
3421	memset(rx_ring->desc, 0, rx_ring->size);
3422
3423	rx_ring->next_to_clean = 0;
3424	rx_ring->next_to_use = 0;
3425}
3426
3427/**
3428 * igb_clean_all_rx_rings - Free Rx Buffers for all queues
3429 * @adapter: board private structure
3430 **/
3431static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
3432{
3433	int i;
3434
3435	for (i = 0; i < adapter->num_rx_queues; i++)
3436		igb_clean_rx_ring(adapter->rx_ring[i]);
3437}
3438
3439/**
3440 * igb_set_mac - Change the Ethernet Address of the NIC
3441 * @netdev: network interface device structure
3442 * @p: pointer to an address structure
3443 *
3444 * Returns 0 on success, negative on failure
3445 **/
3446static int igb_set_mac(struct net_device *netdev, void *p)
3447{
3448	struct igb_adapter *adapter = netdev_priv(netdev);
3449	struct e1000_hw *hw = &adapter->hw;
3450	struct sockaddr *addr = p;
3451
3452	if (!is_valid_ether_addr(addr->sa_data))
3453		return -EADDRNOTAVAIL;
3454
3455	memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
3456	memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
3457
3458	/* set the correct pool for the new PF MAC address in entry 0 */
3459	igb_rar_set_qsel(adapter, hw->mac.addr, 0,
3460	                 adapter->vfs_allocated_count);
3461
3462	return 0;
3463}
3464
3465/**
3466 * igb_write_mc_addr_list - write multicast addresses to MTA
3467 * @netdev: network interface device structure
3468 *
3469 * Writes multicast address list to the MTA hash table.
3470 * Returns: -ENOMEM on failure
3471 *                0 on no addresses written
3472 *                X on writing X addresses to MTA
3473 **/
3474static int igb_write_mc_addr_list(struct net_device *netdev)
3475{
3476	struct igb_adapter *adapter = netdev_priv(netdev);
3477	struct e1000_hw *hw = &adapter->hw;
3478	struct netdev_hw_addr *ha;
3479	u8  *mta_list;
3480	int i;
3481
3482	if (netdev_mc_empty(netdev)) {
3483		/* nothing to program, so clear mc list */
3484		igb_update_mc_addr_list(hw, NULL, 0);
3485		igb_restore_vf_multicasts(adapter);
3486		return 0;
3487	}
3488
3489	mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
3490	if (!mta_list)
3491		return -ENOMEM;
3492
3493	/* The shared function expects a packed array of only addresses. */
3494	i = 0;
3495	netdev_for_each_mc_addr(ha, netdev)
3496		memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
3497
3498	igb_update_mc_addr_list(hw, mta_list, i);
3499	kfree(mta_list);
3500
3501	return netdev_mc_count(netdev);
3502}
3503
3504/**
3505 * igb_write_uc_addr_list - write unicast addresses to RAR table
3506 * @netdev: network interface device structure
3507 *
3508 * Writes unicast address list to the RAR table.
3509 * Returns: -ENOMEM on failure/insufficient address space
3510 *                0 on no addresses written
3511 *                X on writing X addresses to the RAR table
3512 **/
3513static int igb_write_uc_addr_list(struct net_device *netdev)
3514{
3515	struct igb_adapter *adapter = netdev_priv(netdev);
3516	struct e1000_hw *hw = &adapter->hw;
3517	unsigned int vfn = adapter->vfs_allocated_count;
3518	unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
3519	int count = 0;
3520
3521	/* return ENOMEM indicating insufficient memory for addresses */
3522	if (netdev_uc_count(netdev) > rar_entries)
3523		return -ENOMEM;
3524
3525	if (!netdev_uc_empty(netdev) && rar_entries) {
3526		struct netdev_hw_addr *ha;
3527
3528		netdev_for_each_uc_addr(ha, netdev) {
3529			if (!rar_entries)
3530				break;
3531			igb_rar_set_qsel(adapter, ha->addr,
3532			                 rar_entries--,
3533			                 vfn);
3534			count++;
3535		}
3536	}
3537	/* write the addresses in reverse order to avoid write combining */
3538	for (; rar_entries > 0 ; rar_entries--) {
3539		wr32(E1000_RAH(rar_entries), 0);
3540		wr32(E1000_RAL(rar_entries), 0);
3541	}
3542	wrfl();
3543
3544	return count;
3545}
3546
3547/**
3548 * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
3549 * @netdev: network interface device structure
3550 *
3551 * The set_rx_mode entry point is called whenever the unicast or multicast
3552 * address lists or the network interface flags are updated.  This routine is
3553 * responsible for configuring the hardware for proper unicast, multicast,
3554 * promiscuous mode, and all-multi behavior.
3555 **/
3556static void igb_set_rx_mode(struct net_device *netdev)
3557{
3558	struct igb_adapter *adapter = netdev_priv(netdev);
3559	struct e1000_hw *hw = &adapter->hw;
3560	unsigned int vfn = adapter->vfs_allocated_count;
3561	u32 rctl, vmolr = 0;
3562	int count;
3563
3564	/* Check for Promiscuous and All Multicast modes */
3565	rctl = rd32(E1000_RCTL);
3566
3567	/* clear the effected bits */
3568	rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
3569
3570	if (netdev->flags & IFF_PROMISC) {
3571		rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
3572		vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
3573	} else {
3574		if (netdev->flags & IFF_ALLMULTI) {
3575			rctl |= E1000_RCTL_MPE;
3576			vmolr |= E1000_VMOLR_MPME;
3577		} else {
3578			/*
3579			 * Write addresses to the MTA, if the attempt fails
3580			 * then we should just turn on promiscuous mode so
3581			 * that we can at least receive multicast traffic
3582			 */
3583			count = igb_write_mc_addr_list(netdev);
3584			if (count < 0) {
3585				rctl |= E1000_RCTL_MPE;
3586				vmolr |= E1000_VMOLR_MPME;
3587			} else if (count) {
3588				vmolr |= E1000_VMOLR_ROMPE;
3589			}
3590		}
3591		/*
3592		 * Write addresses to available RAR registers, if there is not
3593		 * sufficient space to store all the addresses then enable
3594		 * unicast promiscuous mode
3595		 */
3596		count = igb_write_uc_addr_list(netdev);
3597		if (count < 0) {
3598			rctl |= E1000_RCTL_UPE;
3599			vmolr |= E1000_VMOLR_ROPE;
3600		}
3601		rctl |= E1000_RCTL_VFE;
3602	}
3603	wr32(E1000_RCTL, rctl);
3604
3605	/*
3606	 * In order to support SR-IOV and eventually VMDq it is necessary to set
3607	 * the VMOLR to enable the appropriate modes.  Without this workaround
3608	 * we will have issues with VLAN tag stripping not being done for frames
3609	 * that are only arriving because we are the default pool
3610	 */
3611	if ((hw->mac.type < e1000_82576) || (hw->mac.type > e1000_i350))
3612		return;
3613
3614	vmolr |= rd32(E1000_VMOLR(vfn)) &
3615	         ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
3616	wr32(E1000_VMOLR(vfn), vmolr);
3617	igb_restore_vf_multicasts(adapter);
3618}
3619
3620static void igb_check_wvbr(struct igb_adapter *adapter)
3621{
3622	struct e1000_hw *hw = &adapter->hw;
3623	u32 wvbr = 0;
3624
3625	switch (hw->mac.type) {
3626	case e1000_82576:
3627	case e1000_i350:
3628		if (!(wvbr = rd32(E1000_WVBR)))
3629			return;
3630		break;
3631	default:
3632		break;
3633	}
3634
3635	adapter->wvbr |= wvbr;
3636}
3637
3638#define IGB_STAGGERED_QUEUE_OFFSET 8
3639
3640static void igb_spoof_check(struct igb_adapter *adapter)
3641{
3642	int j;
3643
3644	if (!adapter->wvbr)
3645		return;
3646
3647	for(j = 0; j < adapter->vfs_allocated_count; j++) {
3648		if (adapter->wvbr & (1 << j) ||
3649		    adapter->wvbr & (1 << (j + IGB_STAGGERED_QUEUE_OFFSET))) {
3650			dev_warn(&adapter->pdev->dev,
3651				"Spoof event(s) detected on VF %d\n", j);
3652			adapter->wvbr &=
3653				~((1 << j) |
3654				  (1 << (j + IGB_STAGGERED_QUEUE_OFFSET)));
3655		}
3656	}
3657}
3658
3659/* Need to wait a few seconds after link up to get diagnostic information from
3660 * the phy */
3661static void igb_update_phy_info(unsigned long data)
3662{
3663	struct igb_adapter *adapter = (struct igb_adapter *) data;
3664	igb_get_phy_info(&adapter->hw);
3665}
3666
3667/**
3668 * igb_has_link - check shared code for link and determine up/down
3669 * @adapter: pointer to driver private info
3670 **/
3671bool igb_has_link(struct igb_adapter *adapter)
3672{
3673	struct e1000_hw *hw = &adapter->hw;
3674	bool link_active = false;
3675	s32 ret_val = 0;
3676
3677	/* get_link_status is set on LSC (link status) interrupt or
3678	 * rx sequence error interrupt.  get_link_status will stay
3679	 * false until the e1000_check_for_link establishes link
3680	 * for copper adapters ONLY
3681	 */
3682	switch (hw->phy.media_type) {
3683	case e1000_media_type_copper:
3684		if (hw->mac.get_link_status) {
3685			ret_val = hw->mac.ops.check_for_link(hw);
3686			link_active = !hw->mac.get_link_status;
3687		} else {
3688			link_active = true;
3689		}
3690		break;
3691	case e1000_media_type_internal_serdes:
3692		ret_val = hw->mac.ops.check_for_link(hw);
3693		link_active = hw->mac.serdes_has_link;
3694		break;
3695	default:
3696	case e1000_media_type_unknown:
3697		break;
3698	}
3699
3700	return link_active;
3701}
3702
3703static bool igb_thermal_sensor_event(struct e1000_hw *hw, u32 event)
3704{
3705	bool ret = false;
3706	u32 ctrl_ext, thstat;
3707
3708	/* check for thermal sensor event on i350 copper only */
3709	if (hw->mac.type == e1000_i350) {
3710		thstat = rd32(E1000_THSTAT);
3711		ctrl_ext = rd32(E1000_CTRL_EXT);
3712
3713		if ((hw->phy.media_type == e1000_media_type_copper) &&
3714		    !(ctrl_ext & E1000_CTRL_EXT_LINK_MODE_SGMII)) {
3715			ret = !!(thstat & event);
3716		}
3717	}
3718
3719	return ret;
3720}
3721
3722/**
3723 * igb_watchdog - Timer Call-back
3724 * @data: pointer to adapter cast into an unsigned long
3725 **/
3726static void igb_watchdog(unsigned long data)
3727{
3728	struct igb_adapter *adapter = (struct igb_adapter *)data;
3729	/* Do the rest outside of interrupt context */
3730	schedule_work(&adapter->watchdog_task);
3731}
3732
3733static void igb_watchdog_task(struct work_struct *work)
3734{
3735	struct igb_adapter *adapter = container_of(work,
3736	                                           struct igb_adapter,
3737                                                   watchdog_task);
3738	struct e1000_hw *hw = &adapter->hw;
3739	struct net_device *netdev = adapter->netdev;
3740	u32 link;
3741	int i;
3742
3743	link = igb_has_link(adapter);
3744	if (link) {
3745		/* Cancel scheduled suspend requests. */
3746		pm_runtime_resume(netdev->dev.parent);
3747
3748		if (!netif_carrier_ok(netdev)) {
3749			u32 ctrl;
3750			hw->mac.ops.get_speed_and_duplex(hw,
3751			                                 &adapter->link_speed,
3752			                                 &adapter->link_duplex);
3753
3754			ctrl = rd32(E1000_CTRL);
3755			/* Links status message must follow this format */
3756			printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s "
3757			       "Duplex, Flow Control: %s\n",
3758			       netdev->name,
3759			       adapter->link_speed,
3760			       adapter->link_duplex == FULL_DUPLEX ?
3761			       "Full" : "Half",
3762			       (ctrl & E1000_CTRL_TFCE) &&
3763			       (ctrl & E1000_CTRL_RFCE) ? "RX/TX" :
3764			       (ctrl & E1000_CTRL_RFCE) ?  "RX" :
3765			       (ctrl & E1000_CTRL_TFCE) ?  "TX" : "None");
3766
3767			/* check for thermal sensor event */
3768			if (igb_thermal_sensor_event(hw,
3769			    E1000_THSTAT_LINK_THROTTLE)) {
3770				netdev_info(netdev, "The network adapter link "
3771					    "speed was downshifted because it "
3772					    "overheated\n");
3773			}
3774
3775			/* adjust timeout factor according to speed/duplex */
3776			adapter->tx_timeout_factor = 1;
3777			switch (adapter->link_speed) {
3778			case SPEED_10:
3779				adapter->tx_timeout_factor = 14;
3780				break;
3781			case SPEED_100:
3782				/* maybe add some timeout factor ? */
3783				break;
3784			}
3785
3786			netif_carrier_on(netdev);
3787
3788			igb_ping_all_vfs(adapter);
3789			igb_check_vf_rate_limit(adapter);
3790
3791			/* link state has changed, schedule phy info update */
3792			if (!test_bit(__IGB_DOWN, &adapter->state))
3793				mod_timer(&adapter->phy_info_timer,
3794					  round_jiffies(jiffies + 2 * HZ));
3795		}
3796	} else {
3797		if (netif_carrier_ok(netdev)) {
3798			adapter->link_speed = 0;
3799			adapter->link_duplex = 0;
3800
3801			/* check for thermal sensor event */
3802			if (igb_thermal_sensor_event(hw,
3803			    E1000_THSTAT_PWR_DOWN)) {
3804				netdev_err(netdev, "The network adapter was "
3805					   "stopped because it overheated\n");
3806			}
3807
3808			/* Links status message must follow this format */
3809			printk(KERN_INFO "igb: %s NIC Link is Down\n",
3810			       netdev->name);
3811			netif_carrier_off(netdev);
3812
3813			igb_ping_all_vfs(adapter);
3814
3815			/* link state has changed, schedule phy info update */
3816			if (!test_bit(__IGB_DOWN, &adapter->state))
3817				mod_timer(&adapter->phy_info_timer,
3818					  round_jiffies(jiffies + 2 * HZ));
3819
3820			pm_schedule_suspend(netdev->dev.parent,
3821					    MSEC_PER_SEC * 5);
3822		}
3823	}
3824
3825	spin_lock(&adapter->stats64_lock);
3826	igb_update_stats(adapter, &adapter->stats64);
3827	spin_unlock(&adapter->stats64_lock);
3828
3829	for (i = 0; i < adapter->num_tx_queues; i++) {
3830		struct igb_ring *tx_ring = adapter->tx_ring[i];
3831		if (!netif_carrier_ok(netdev)) {
3832			/* We've lost link, so the controller stops DMA,
3833			 * but we've got queued Tx work that's never going
3834			 * to get done, so reset controller to flush Tx.
3835			 * (Do the reset outside of interrupt context). */
3836			if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3837				adapter->tx_timeout_count++;
3838				schedule_work(&adapter->reset_task);
3839				/* return immediately since reset is imminent */
3840				return;
3841			}
3842		}
3843
3844		/* Force detection of hung controller every watchdog period */
3845		set_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
3846	}
3847
3848	/* Cause software interrupt to ensure rx ring is cleaned */
3849	if (adapter->msix_entries) {
3850		u32 eics = 0;
3851		for (i = 0; i < adapter->num_q_vectors; i++)
3852			eics |= adapter->q_vector[i]->eims_value;
3853		wr32(E1000_EICS, eics);
3854	} else {
3855		wr32(E1000_ICS, E1000_ICS_RXDMT0);
3856	}
3857
3858	igb_spoof_check(adapter);
3859
3860	/* Reset the timer */
3861	if (!test_bit(__IGB_DOWN, &adapter->state))
3862		mod_timer(&adapter->watchdog_timer,
3863			  round_jiffies(jiffies + 2 * HZ));
3864}
3865
3866enum latency_range {
3867	lowest_latency = 0,
3868	low_latency = 1,
3869	bulk_latency = 2,
3870	latency_invalid = 255
3871};
3872
3873/**
3874 * igb_update_ring_itr - update the dynamic ITR value based on packet size
3875 *
3876 *      Stores a new ITR value based on strictly on packet size.  This
3877 *      algorithm is less sophisticated than that used in igb_update_itr,
3878 *      due to the difficulty of synchronizing statistics across multiple
3879 *      receive rings.  The divisors and thresholds used by this function
3880 *      were determined based on theoretical maximum wire speed and testing
3881 *      data, in order to minimize response time while increasing bulk
3882 *      throughput.
3883 *      This functionality is controlled by the InterruptThrottleRate module
3884 *      parameter (see igb_param.c)
3885 *      NOTE:  This function is called only when operating in a multiqueue
3886 *             receive environment.
3887 * @q_vector: pointer to q_vector
3888 **/
3889static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3890{
3891	int new_val = q_vector->itr_val;
3892	int avg_wire_size = 0;
3893	struct igb_adapter *adapter = q_vector->adapter;
3894	unsigned int packets;
3895
3896	/* For non-gigabit speeds, just fix the interrupt rate at 4000
3897	 * ints/sec - ITR timer value of 120 ticks.
3898	 */
3899	if (adapter->link_speed != SPEED_1000) {
3900		new_val = IGB_4K_ITR;
3901		goto set_itr_val;
3902	}
3903
3904	packets = q_vector->rx.total_packets;
3905	if (packets)
3906		avg_wire_size = q_vector->rx.total_bytes / packets;
3907
3908	packets = q_vector->tx.total_packets;
3909	if (packets)
3910		avg_wire_size = max_t(u32, avg_wire_size,
3911				      q_vector->tx.total_bytes / packets);
3912
3913	/* if avg_wire_size isn't set no work was done */
3914	if (!avg_wire_size)
3915		goto clear_counts;
3916
3917	/* Add 24 bytes to size to account for CRC, preamble, and gap */
3918	avg_wire_size += 24;
3919
3920	/* Don't starve jumbo frames */
3921	avg_wire_size = min(avg_wire_size, 3000);
3922
3923	/* Give a little boost to mid-size frames */
3924	if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3925		new_val = avg_wire_size / 3;
3926	else
3927		new_val = avg_wire_size / 2;
3928
3929	/* conservative mode (itr 3) eliminates the lowest_latency setting */
3930	if (new_val < IGB_20K_ITR &&
3931	    ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
3932	     (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
3933		new_val = IGB_20K_ITR;
3934
3935set_itr_val:
3936	if (new_val != q_vector->itr_val) {
3937		q_vector->itr_val = new_val;
3938		q_vector->set_itr = 1;
3939	}
3940clear_counts:
3941	q_vector->rx.total_bytes = 0;
3942	q_vector->rx.total_packets = 0;
3943	q_vector->tx.total_bytes = 0;
3944	q_vector->tx.total_packets = 0;
3945}
3946
3947/**
3948 * igb_update_itr - update the dynamic ITR value based on statistics
3949 *      Stores a new ITR value based on packets and byte
3950 *      counts during the last interrupt.  The advantage of per interrupt
3951 *      computation is faster updates and more accurate ITR for the current
3952 *      traffic pattern.  Constants in this function were computed
3953 *      based on theoretical maximum wire speed and thresholds were set based
3954 *      on testing data as well as attempting to minimize response time
3955 *      while increasing bulk throughput.
3956 *      this functionality is controlled by the InterruptThrottleRate module
3957 *      parameter (see igb_param.c)
3958 *      NOTE:  These calculations are only valid when operating in a single-
3959 *             queue environment.
3960 * @q_vector: pointer to q_vector
3961 * @ring_container: ring info to update the itr for
3962 **/
3963static void igb_update_itr(struct igb_q_vector *q_vector,
3964			   struct igb_ring_container *ring_container)
3965{
3966	unsigned int packets = ring_container->total_packets;
3967	unsigned int bytes = ring_container->total_bytes;
3968	u8 itrval = ring_container->itr;
3969
3970	/* no packets, exit with status unchanged */
3971	if (packets == 0)
3972		return;
3973
3974	switch (itrval) {
3975	case lowest_latency:
3976		/* handle TSO and jumbo frames */
3977		if (bytes/packets > 8000)
3978			itrval = bulk_latency;
3979		else if ((packets < 5) && (bytes > 512))
3980			itrval = low_latency;
3981		break;
3982	case low_latency:  /* 50 usec aka 20000 ints/s */
3983		if (bytes > 10000) {
3984			/* this if handles the TSO accounting */
3985			if (bytes/packets > 8000) {
3986				itrval = bulk_latency;
3987			} else if ((packets < 10) || ((bytes/packets) > 1200)) {
3988				itrval = bulk_latency;
3989			} else if ((packets > 35)) {
3990				itrval = lowest_latency;
3991			}
3992		} else if (bytes/packets > 2000) {
3993			itrval = bulk_latency;
3994		} else if (packets <= 2 && bytes < 512) {
3995			itrval = lowest_latency;
3996		}
3997		break;
3998	case bulk_latency: /* 250 usec aka 4000 ints/s */
3999		if (bytes > 25000) {
4000			if (packets > 35)
4001				itrval = low_latency;
4002		} else if (bytes < 1500) {
4003			itrval = low_latency;
4004		}
4005		break;
4006	}
4007
4008	/* clear work counters since we have the values we need */
4009	ring_container->total_bytes = 0;
4010	ring_container->total_packets = 0;
4011
4012	/* write updated itr to ring container */
4013	ring_container->itr = itrval;
4014}
4015
4016static void igb_set_itr(struct igb_q_vector *q_vector)
4017{
4018	struct igb_adapter *adapter = q_vector->adapter;
4019	u32 new_itr = q_vector->itr_val;
4020	u8 current_itr = 0;
4021
4022	/* for non-gigabit speeds, just fix the interrupt rate at 4000 */
4023	if (adapter->link_speed != SPEED_1000) {
4024		current_itr = 0;
4025		new_itr = IGB_4K_ITR;
4026		goto set_itr_now;
4027	}
4028
4029	igb_update_itr(q_vector, &q_vector->tx);
4030	igb_update_itr(q_vector, &q_vector->rx);
4031
4032	current_itr = max(q_vector->rx.itr, q_vector->tx.itr);
4033
4034	/* conservative mode (itr 3) eliminates the lowest_latency setting */
4035	if (current_itr == lowest_latency &&
4036	    ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
4037	     (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
4038		current_itr = low_latency;
4039
4040	switch (current_itr) {
4041	/* counts and packets in update_itr are dependent on these numbers */
4042	case lowest_latency:
4043		new_itr = IGB_70K_ITR; /* 70,000 ints/sec */
4044		break;
4045	case low_latency:
4046		new_itr = IGB_20K_ITR; /* 20,000 ints/sec */
4047		break;
4048	case bulk_latency:
4049		new_itr = IGB_4K_ITR;  /* 4,000 ints/sec */
4050		break;
4051	default:
4052		break;
4053	}
4054
4055set_itr_now:
4056	if (new_itr != q_vector->itr_val) {
4057		/* this attempts to bias the interrupt rate towards Bulk
4058		 * by adding intermediate steps when interrupt rate is
4059		 * increasing */
4060		new_itr = new_itr > q_vector->itr_val ?
4061		             max((new_itr * q_vector->itr_val) /
4062		                 (new_itr + (q_vector->itr_val >> 2)),
4063				 new_itr) :
4064			     new_itr;
4065		/* Don't write the value here; it resets the adapter's
4066		 * internal timer, and causes us to delay far longer than
4067		 * we should between interrupts.  Instead, we write the ITR
4068		 * value at the beginning of the next interrupt so the timing
4069		 * ends up being correct.
4070		 */
4071		q_vector->itr_val = new_itr;
4072		q_vector->set_itr = 1;
4073	}
4074}
4075
4076static void igb_tx_ctxtdesc(struct igb_ring *tx_ring, u32 vlan_macip_lens,
4077			    u32 type_tucmd, u32 mss_l4len_idx)
4078{
4079	struct e1000_adv_tx_context_desc *context_desc;
4080	u16 i = tx_ring->next_to_use;
4081
4082	context_desc = IGB_TX_CTXTDESC(tx_ring, i);
4083
4084	i++;
4085	tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
4086
4087	/* set bits to identify this as an advanced context descriptor */
4088	type_tucmd |= E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
4089
4090	/* For 82575, context index must be unique per ring. */
4091	if (test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
4092		mss_l4len_idx |= tx_ring->reg_idx << 4;
4093
4094	context_desc->vlan_macip_lens	= cpu_to_le32(vlan_macip_lens);
4095	context_desc->seqnum_seed	= 0;
4096	context_desc->type_tucmd_mlhl	= cpu_to_le32(type_tucmd);
4097	context_desc->mss_l4len_idx	= cpu_to_le32(mss_l4len_idx);
4098}
4099
4100static int igb_tso(struct igb_ring *tx_ring,
4101		   struct igb_tx_buffer *first,
4102		   u8 *hdr_len)
4103{
4104	struct sk_buff *skb = first->skb;
4105	u32 vlan_macip_lens, type_tucmd;
4106	u32 mss_l4len_idx, l4len;
4107
4108	if (!skb_is_gso(skb))
4109		return 0;
4110
4111	if (skb_header_cloned(skb)) {
4112		int err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
4113		if (err)
4114			return err;
4115	}
4116
4117	/* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
4118	type_tucmd = E1000_ADVTXD_TUCMD_L4T_TCP;
4119
4120	if (first->protocol == __constant_htons(ETH_P_IP)) {
4121		struct iphdr *iph = ip_hdr(skb);
4122		iph->tot_len = 0;
4123		iph->check = 0;
4124		tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
4125							 iph->daddr, 0,
4126							 IPPROTO_TCP,
4127							 0);
4128		type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
4129		first->tx_flags |= IGB_TX_FLAGS_TSO |
4130				   IGB_TX_FLAGS_CSUM |
4131				   IGB_TX_FLAGS_IPV4;
4132	} else if (skb_is_gso_v6(skb)) {
4133		ipv6_hdr(skb)->payload_len = 0;
4134		tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
4135						       &ipv6_hdr(skb)->daddr,
4136						       0, IPPROTO_TCP, 0);
4137		first->tx_flags |= IGB_TX_FLAGS_TSO |
4138				   IGB_TX_FLAGS_CSUM;
4139	}
4140
4141	/* compute header lengths */
4142	l4len = tcp_hdrlen(skb);
4143	*hdr_len = skb_transport_offset(skb) + l4len;
4144
4145	/* update gso size and bytecount with header size */
4146	first->gso_segs = skb_shinfo(skb)->gso_segs;
4147	first->bytecount += (first->gso_segs - 1) * *hdr_len;
4148
4149	/* MSS L4LEN IDX */
4150	mss_l4len_idx = l4len << E1000_ADVTXD_L4LEN_SHIFT;
4151	mss_l4len_idx |= skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT;
4152
4153	/* VLAN MACLEN IPLEN */
4154	vlan_macip_lens = skb_network_header_len(skb);
4155	vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
4156	vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
4157
4158	igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
4159
4160	return 1;
4161}
4162
4163static void igb_tx_csum(struct igb_ring *tx_ring, struct igb_tx_buffer *first)
4164{
4165	struct sk_buff *skb = first->skb;
4166	u32 vlan_macip_lens = 0;
4167	u32 mss_l4len_idx = 0;
4168	u32 type_tucmd = 0;
4169
4170	if (skb->ip_summed != CHECKSUM_PARTIAL) {
4171		if (!(first->tx_flags & IGB_TX_FLAGS_VLAN))
4172			return;
4173	} else {
4174		u8 l4_hdr = 0;
4175		switch (first->protocol) {
4176		case __constant_htons(ETH_P_IP):
4177			vlan_macip_lens |= skb_network_header_len(skb);
4178			type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
4179			l4_hdr = ip_hdr(skb)->protocol;
4180			break;
4181		case __constant_htons(ETH_P_IPV6):
4182			vlan_macip_lens |= skb_network_header_len(skb);
4183			l4_hdr = ipv6_hdr(skb)->nexthdr;
4184			break;
4185		default:
4186			if (unlikely(net_ratelimit())) {
4187				dev_warn(tx_ring->dev,
4188				 "partial checksum but proto=%x!\n",
4189				 first->protocol);
4190			}
4191			break;
4192		}
4193
4194		switch (l4_hdr) {
4195		case IPPROTO_TCP:
4196			type_tucmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
4197			mss_l4len_idx = tcp_hdrlen(skb) <<
4198					E1000_ADVTXD_L4LEN_SHIFT;
4199			break;
4200		case IPPROTO_SCTP:
4201			type_tucmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
4202			mss_l4len_idx = sizeof(struct sctphdr) <<
4203					E1000_ADVTXD_L4LEN_SHIFT;
4204			break;
4205		case IPPROTO_UDP:
4206			mss_l4len_idx = sizeof(struct udphdr) <<
4207					E1000_ADVTXD_L4LEN_SHIFT;
4208			break;
4209		default:
4210			if (unlikely(net_ratelimit())) {
4211				dev_warn(tx_ring->dev,
4212				 "partial checksum but l4 proto=%x!\n",
4213				 l4_hdr);
4214			}
4215			break;
4216		}
4217
4218		/* update TX checksum flag */
4219		first->tx_flags |= IGB_TX_FLAGS_CSUM;
4220	}
4221
4222	vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
4223	vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
4224
4225	igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
4226}
4227
4228static __le32 igb_tx_cmd_type(u32 tx_flags)
4229{
4230	/* set type for advanced descriptor with frame checksum insertion */
4231	__le32 cmd_type = cpu_to_le32(E1000_ADVTXD_DTYP_DATA |
4232				      E1000_ADVTXD_DCMD_IFCS |
4233				      E1000_ADVTXD_DCMD_DEXT);
4234
4235	/* set HW vlan bit if vlan is present */
4236	if (tx_flags & IGB_TX_FLAGS_VLAN)
4237		cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_VLE);
4238
4239#ifdef CONFIG_IGB_PTP
4240	/* set timestamp bit if present */
4241	if (unlikely(tx_flags & IGB_TX_FLAGS_TSTAMP))
4242		cmd_type |= cpu_to_le32(E1000_ADVTXD_MAC_TSTAMP);
4243#endif /* CONFIG_IGB_PTP */
4244
4245	/* set segmentation bits for TSO */
4246	if (tx_flags & IGB_TX_FLAGS_TSO)
4247		cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_TSE);
4248
4249	return cmd_type;
4250}
4251
4252static void igb_tx_olinfo_status(struct igb_ring *tx_ring,
4253				 union e1000_adv_tx_desc *tx_desc,
4254				 u32 tx_flags, unsigned int paylen)
4255{
4256	u32 olinfo_status = paylen << E1000_ADVTXD_PAYLEN_SHIFT;
4257
4258	/* 82575 requires a unique index per ring if any offload is enabled */
4259	if ((tx_flags & (IGB_TX_FLAGS_CSUM | IGB_TX_FLAGS_VLAN)) &&
4260	    test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
4261		olinfo_status |= tx_ring->reg_idx << 4;
4262
4263	/* insert L4 checksum */
4264	if (tx_flags & IGB_TX_FLAGS_CSUM) {
4265		olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4266
4267		/* insert IPv4 checksum */
4268		if (tx_flags & IGB_TX_FLAGS_IPV4)
4269			olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
4270	}
4271
4272	tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
4273}
4274
4275/*
4276 * The largest size we can write to the descriptor is 65535.  In order to
4277 * maintain a power of two alignment we have to limit ourselves to 32K.
4278 */
4279#define IGB_MAX_TXD_PWR	15
4280#define IGB_MAX_DATA_PER_TXD	(1<<IGB_MAX_TXD_PWR)
4281
4282static void igb_tx_map(struct igb_ring *tx_ring,
4283		       struct igb_tx_buffer *first,
4284		       const u8 hdr_len)
4285{
4286	struct sk_buff *skb = first->skb;
4287	struct igb_tx_buffer *tx_buffer_info;
4288	union e1000_adv_tx_desc *tx_desc;
4289	dma_addr_t dma;
4290	struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0];
4291	unsigned int data_len = skb->data_len;
4292	unsigned int size = skb_headlen(skb);
4293	unsigned int paylen = skb->len - hdr_len;
4294	__le32 cmd_type;
4295	u32 tx_flags = first->tx_flags;
4296	u16 i = tx_ring->next_to_use;
4297
4298	tx_desc = IGB_TX_DESC(tx_ring, i);
4299
4300	igb_tx_olinfo_status(tx_ring, tx_desc, tx_flags, paylen);
4301	cmd_type = igb_tx_cmd_type(tx_flags);
4302
4303	dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
4304	if (dma_mapping_error(tx_ring->dev, dma))
4305		goto dma_error;
4306
4307	/* record length, and DMA address */
4308	first->length = size;
4309	first->dma = dma;
4310	tx_desc->read.buffer_addr = cpu_to_le64(dma);
4311
4312	for (;;) {
4313		while (unlikely(size > IGB_MAX_DATA_PER_TXD)) {
4314			tx_desc->read.cmd_type_len =
4315				cmd_type | cpu_to_le32(IGB_MAX_DATA_PER_TXD);
4316
4317			i++;
4318			tx_desc++;
4319			if (i == tx_ring->count) {
4320				tx_desc = IGB_TX_DESC(tx_ring, 0);
4321				i = 0;
4322			}
4323
4324			dma += IGB_MAX_DATA_PER_TXD;
4325			size -= IGB_MAX_DATA_PER_TXD;
4326
4327			tx_desc->read.olinfo_status = 0;
4328			tx_desc->read.buffer_addr = cpu_to_le64(dma);
4329		}
4330
4331		if (likely(!data_len))
4332			break;
4333
4334		tx_desc->read.cmd_type_len = cmd_type | cpu_to_le32(size);
4335
4336		i++;
4337		tx_desc++;
4338		if (i == tx_ring->count) {
4339			tx_desc = IGB_TX_DESC(tx_ring, 0);
4340			i = 0;
4341		}
4342
4343		size = skb_frag_size(frag);
4344		data_len -= size;
4345
4346		dma = skb_frag_dma_map(tx_ring->dev, frag, 0,
4347				   size, DMA_TO_DEVICE);
4348		if (dma_mapping_error(tx_ring->dev, dma))
4349			goto dma_error;
4350
4351		tx_buffer_info = &tx_ring->tx_buffer_info[i];
4352		tx_buffer_info->length = size;
4353		tx_buffer_info->dma = dma;
4354
4355		tx_desc->read.olinfo_status = 0;
4356		tx_desc->read.buffer_addr = cpu_to_le64(dma);
4357
4358		frag++;
4359	}
4360
4361	netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount);
4362
4363	/* write last descriptor with RS and EOP bits */
4364	cmd_type |= cpu_to_le32(size) | cpu_to_le32(IGB_TXD_DCMD);
4365	if (unlikely(skb->no_fcs))
4366		cmd_type &= ~(cpu_to_le32(E1000_ADVTXD_DCMD_IFCS));
4367	tx_desc->read.cmd_type_len = cmd_type;
4368
4369	/* set the timestamp */
4370	first->time_stamp = jiffies;
4371
4372	/*
4373	 * Force memory writes to complete before letting h/w know there
4374	 * are new descriptors to fetch.  (Only applicable for weak-ordered
4375	 * memory model archs, such as IA-64).
4376	 *
4377	 * We also need this memory barrier to make certain all of the
4378	 * status bits have been updated before next_to_watch is written.
4379	 */
4380	wmb();
4381
4382	/* set next_to_watch value indicating a packet is present */
4383	first->next_to_watch = tx_desc;
4384
4385	i++;
4386	if (i == tx_ring->count)
4387		i = 0;
4388
4389	tx_ring->next_to_use = i;
4390
4391	writel(i, tx_ring->tail);
4392
4393	/* we need this if more than one processor can write to our tail
4394	 * at a time, it syncronizes IO on IA64/Altix systems */
4395	mmiowb();
4396
4397	return;
4398
4399dma_error:
4400	dev_err(tx_ring->dev, "TX DMA map failed\n");
4401
4402	/* clear dma mappings for failed tx_buffer_info map */
4403	for (;;) {
4404		tx_buffer_info = &tx_ring->tx_buffer_info[i];
4405		igb_unmap_and_free_tx_resource(tx_ring, tx_buffer_info);
4406		if (tx_buffer_info == first)
4407			break;
4408		if (i == 0)
4409			i = tx_ring->count;
4410		i--;
4411	}
4412
4413	tx_ring->next_to_use = i;
4414}
4415
4416static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
4417{
4418	struct net_device *netdev = tx_ring->netdev;
4419
4420	netif_stop_subqueue(netdev, tx_ring->queue_index);
4421
4422	/* Herbert's original patch had:
4423	 *  smp_mb__after_netif_stop_queue();
4424	 * but since that doesn't exist yet, just open code it. */
4425	smp_mb();
4426
4427	/* We need to check again in a case another CPU has just
4428	 * made room available. */
4429	if (igb_desc_unused(tx_ring) < size)
4430		return -EBUSY;
4431
4432	/* A reprieve! */
4433	netif_wake_subqueue(netdev, tx_ring->queue_index);
4434
4435	u64_stats_update_begin(&tx_ring->tx_syncp2);
4436	tx_ring->tx_stats.restart_queue2++;
4437	u64_stats_update_end(&tx_ring->tx_syncp2);
4438
4439	return 0;
4440}
4441
4442static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
4443{
4444	if (igb_desc_unused(tx_ring) >= size)
4445		return 0;
4446	return __igb_maybe_stop_tx(tx_ring, size);
4447}
4448
4449netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb,
4450				struct igb_ring *tx_ring)
4451{
4452#ifdef CONFIG_IGB_PTP
4453	struct igb_adapter *adapter = netdev_priv(tx_ring->netdev);
4454#endif /* CONFIG_IGB_PTP */
4455	struct igb_tx_buffer *first;
4456	int tso;
4457	u32 tx_flags = 0;
4458	__be16 protocol = vlan_get_protocol(skb);
4459	u8 hdr_len = 0;
4460
4461	/* need: 1 descriptor per page,
4462	 *       + 2 desc gap to keep tail from touching head,
4463	 *       + 1 desc for skb->data,
4464	 *       + 1 desc for context descriptor,
4465	 * otherwise try next time */
4466	if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
4467		/* this is a hard error */
4468		return NETDEV_TX_BUSY;
4469	}
4470
4471	/* record the location of the first descriptor for this packet */
4472	first = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
4473	first->skb = skb;
4474	first->bytecount = skb->len;
4475	first->gso_segs = 1;
4476
4477#ifdef CONFIG_IGB_PTP
4478	if (unlikely((skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) &&
4479		     !(adapter->ptp_tx_skb))) {
4480		skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
4481		tx_flags |= IGB_TX_FLAGS_TSTAMP;
4482
4483		adapter->ptp_tx_skb = skb_get(skb);
4484		if (adapter->hw.mac.type == e1000_82576)
4485			schedule_work(&adapter->ptp_tx_work);
4486	}
4487#endif /* CONFIG_IGB_PTP */
4488
4489	if (vlan_tx_tag_present(skb)) {
4490		tx_flags |= IGB_TX_FLAGS_VLAN;
4491		tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
4492	}
4493
4494	/* record initial flags and protocol */
4495	first->tx_flags = tx_flags;
4496	first->protocol = protocol;
4497
4498	tso = igb_tso(tx_ring, first, &hdr_len);
4499	if (tso < 0)
4500		goto out_drop;
4501	else if (!tso)
4502		igb_tx_csum(tx_ring, first);
4503
4504	igb_tx_map(tx_ring, first, hdr_len);
4505
4506	/* Make sure there is space in the ring for the next send. */
4507	igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
4508
4509	return NETDEV_TX_OK;
4510
4511out_drop:
4512	igb_unmap_and_free_tx_resource(tx_ring, first);
4513
4514	return NETDEV_TX_OK;
4515}
4516
4517static inline struct igb_ring *igb_tx_queue_mapping(struct igb_adapter *adapter,
4518						    struct sk_buff *skb)
4519{
4520	unsigned int r_idx = skb->queue_mapping;
4521
4522	if (r_idx >= adapter->num_tx_queues)
4523		r_idx = r_idx % adapter->num_tx_queues;
4524
4525	return adapter->tx_ring[r_idx];
4526}
4527
4528static netdev_tx_t igb_xmit_frame(struct sk_buff *skb,
4529				  struct net_device *netdev)
4530{
4531	struct igb_adapter *adapter = netdev_priv(netdev);
4532
4533	if (test_bit(__IGB_DOWN, &adapter->state)) {
4534		dev_kfree_skb_any(skb);
4535		return NETDEV_TX_OK;
4536	}
4537
4538	if (skb->len <= 0) {
4539		dev_kfree_skb_any(skb);
4540		return NETDEV_TX_OK;
4541	}
4542
4543	/*
4544	 * The minimum packet size with TCTL.PSP set is 17 so pad the skb
4545	 * in order to meet this minimum size requirement.
4546	 */
4547	if (skb->len < 17) {
4548		if (skb_padto(skb, 17))
4549			return NETDEV_TX_OK;
4550		skb->len = 17;
4551	}
4552
4553	return igb_xmit_frame_ring(skb, igb_tx_queue_mapping(adapter, skb));
4554}
4555
4556/**
4557 * igb_tx_timeout - Respond to a Tx Hang
4558 * @netdev: network interface device structure
4559 **/
4560static void igb_tx_timeout(struct net_device *netdev)
4561{
4562	struct igb_adapter *adapter = netdev_priv(netdev);
4563	struct e1000_hw *hw = &adapter->hw;
4564
4565	/* Do the reset outside of interrupt context */
4566	adapter->tx_timeout_count++;
4567
4568	if (hw->mac.type >= e1000_82580)
4569		hw->dev_spec._82575.global_device_reset = true;
4570
4571	schedule_work(&adapter->reset_task);
4572	wr32(E1000_EICS,
4573	     (adapter->eims_enable_mask & ~adapter->eims_other));
4574}
4575
4576static void igb_reset_task(struct work_struct *work)
4577{
4578	struct igb_adapter *adapter;
4579	adapter = container_of(work, struct igb_adapter, reset_task);
4580
4581	igb_dump(adapter);
4582	netdev_err(adapter->netdev, "Reset adapter\n");
4583	igb_reinit_locked(adapter);
4584}
4585
4586/**
4587 * igb_get_stats64 - Get System Network Statistics
4588 * @netdev: network interface device structure
4589 * @stats: rtnl_link_stats64 pointer
4590 *
4591 **/
4592static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *netdev,
4593						 struct rtnl_link_stats64 *stats)
4594{
4595	struct igb_adapter *adapter = netdev_priv(netdev);
4596
4597	spin_lock(&adapter->stats64_lock);
4598	igb_update_stats(adapter, &adapter->stats64);
4599	memcpy(stats, &adapter->stats64, sizeof(*stats));
4600	spin_unlock(&adapter->stats64_lock);
4601
4602	return stats;
4603}
4604
4605/**
4606 * igb_change_mtu - Change the Maximum Transfer Unit
4607 * @netdev: network interface device structure
4608 * @new_mtu: new value for maximum frame size
4609 *
4610 * Returns 0 on success, negative on failure
4611 **/
4612static int igb_change_mtu(struct net_device *netdev, int new_mtu)
4613{
4614	struct igb_adapter *adapter = netdev_priv(netdev);
4615	struct pci_dev *pdev = adapter->pdev;
4616	int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
4617
4618	if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
4619		dev_err(&pdev->dev, "Invalid MTU setting\n");
4620		return -EINVAL;
4621	}
4622
4623#define MAX_STD_JUMBO_FRAME_SIZE 9238
4624	if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
4625		dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
4626		return -EINVAL;
4627	}
4628
4629	while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
4630		msleep(1);
4631
4632	/* igb_down has a dependency on max_frame_size */
4633	adapter->max_frame_size = max_frame;
4634
4635	if (netif_running(netdev))
4636		igb_down(adapter);
4637
4638	dev_info(&pdev->dev, "changing MTU from %d to %d\n",
4639		 netdev->mtu, new_mtu);
4640	netdev->mtu = new_mtu;
4641
4642	if (netif_running(netdev))
4643		igb_up(adapter);
4644	else
4645		igb_reset(adapter);
4646
4647	clear_bit(__IGB_RESETTING, &adapter->state);
4648
4649	return 0;
4650}
4651
4652/**
4653 * igb_update_stats - Update the board statistics counters
4654 * @adapter: board private structure
4655 **/
4656
4657void igb_update_stats(struct igb_adapter *adapter,
4658		      struct rtnl_link_stats64 *net_stats)
4659{
4660	struct e1000_hw *hw = &adapter->hw;
4661	struct pci_dev *pdev = adapter->pdev;
4662	u32 reg, mpc;
4663	u16 phy_tmp;
4664	int i;
4665	u64 bytes, packets;
4666	unsigned int start;
4667	u64 _bytes, _packets;
4668
4669#define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
4670
4671	/*
4672	 * Prevent stats update while adapter is being reset, or if the pci
4673	 * connection is down.
4674	 */
4675	if (adapter->link_speed == 0)
4676		return;
4677	if (pci_channel_offline(pdev))
4678		return;
4679
4680	bytes = 0;
4681	packets = 0;
4682	for (i = 0; i < adapter->num_rx_queues; i++) {
4683		u32 rqdpc = rd32(E1000_RQDPC(i));
4684		struct igb_ring *ring = adapter->rx_ring[i];
4685
4686		if (rqdpc) {
4687			ring->rx_stats.drops += rqdpc;
4688			net_stats->rx_fifo_errors += rqdpc;
4689		}
4690
4691		do {
4692			start = u64_stats_fetch_begin_bh(&ring->rx_syncp);
4693			_bytes = ring->rx_stats.bytes;
4694			_packets = ring->rx_stats.packets;
4695		} while (u64_stats_fetch_retry_bh(&ring->rx_syncp, start));
4696		bytes += _bytes;
4697		packets += _packets;
4698	}
4699
4700	net_stats->rx_bytes = bytes;
4701	net_stats->rx_packets = packets;
4702
4703	bytes = 0;
4704	packets = 0;
4705	for (i = 0; i < adapter->num_tx_queues; i++) {
4706		struct igb_ring *ring = adapter->tx_ring[i];
4707		do {
4708			start = u64_stats_fetch_begin_bh(&ring->tx_syncp);
4709			_bytes = ring->tx_stats.bytes;
4710			_packets = ring->tx_stats.packets;
4711		} while (u64_stats_fetch_retry_bh(&ring->tx_syncp, start));
4712		bytes += _bytes;
4713		packets += _packets;
4714	}
4715	net_stats->tx_bytes = bytes;
4716	net_stats->tx_packets = packets;
4717
4718	/* read stats registers */
4719	adapter->stats.crcerrs += rd32(E1000_CRCERRS);
4720	adapter->stats.gprc += rd32(E1000_GPRC);
4721	adapter->stats.gorc += rd32(E1000_GORCL);
4722	rd32(E1000_GORCH); /* clear GORCL */
4723	adapter->stats.bprc += rd32(E1000_BPRC);
4724	adapter->stats.mprc += rd32(E1000_MPRC);
4725	adapter->stats.roc += rd32(E1000_ROC);
4726
4727	adapter->stats.prc64 += rd32(E1000_PRC64);
4728	adapter->stats.prc127 += rd32(E1000_PRC127);
4729	adapter->stats.prc255 += rd32(E1000_PRC255);
4730	adapter->stats.prc511 += rd32(E1000_PRC511);
4731	adapter->stats.prc1023 += rd32(E1000_PRC1023);
4732	adapter->stats.prc1522 += rd32(E1000_PRC1522);
4733	adapter->stats.symerrs += rd32(E1000_SYMERRS);
4734	adapter->stats.sec += rd32(E1000_SEC);
4735
4736	mpc = rd32(E1000_MPC);
4737	adapter->stats.mpc += mpc;
4738	net_stats->rx_fifo_errors += mpc;
4739	adapter->stats.scc += rd32(E1000_SCC);
4740	adapter->stats.ecol += rd32(E1000_ECOL);
4741	adapter->stats.mcc += rd32(E1000_MCC);
4742	adapter->stats.latecol += rd32(E1000_LATECOL);
4743	adapter->stats.dc += rd32(E1000_DC);
4744	adapter->stats.rlec += rd32(E1000_RLEC);
4745	adapter->stats.xonrxc += rd32(E1000_XONRXC);
4746	adapter->stats.xontxc += rd32(E1000_XONTXC);
4747	adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
4748	adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
4749	adapter->stats.fcruc += rd32(E1000_FCRUC);
4750	adapter->stats.gptc += rd32(E1000_GPTC);
4751	adapter->stats.gotc += rd32(E1000_GOTCL);
4752	rd32(E1000_GOTCH); /* clear GOTCL */
4753	adapter->stats.rnbc += rd32(E1000_RNBC);
4754	adapter->stats.ruc += rd32(E1000_RUC);
4755	adapter->stats.rfc += rd32(E1000_RFC);
4756	adapter->stats.rjc += rd32(E1000_RJC);
4757	adapter->stats.tor += rd32(E1000_TORH);
4758	adapter->stats.tot += rd32(E1000_TOTH);
4759	adapter->stats.tpr += rd32(E1000_TPR);
4760
4761	adapter->stats.ptc64 += rd32(E1000_PTC64);
4762	adapter->stats.ptc127 += rd32(E1000_PTC127);
4763	adapter->stats.ptc255 += rd32(E1000_PTC255);
4764	adapter->stats.ptc511 += rd32(E1000_PTC511);
4765	adapter->stats.ptc1023 += rd32(E1000_PTC1023);
4766	adapter->stats.ptc1522 += rd32(E1000_PTC1522);
4767
4768	adapter->stats.mptc += rd32(E1000_MPTC);
4769	adapter->stats.bptc += rd32(E1000_BPTC);
4770
4771	adapter->stats.tpt += rd32(E1000_TPT);
4772	adapter->stats.colc += rd32(E1000_COLC);
4773
4774	adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
4775	/* read internal phy specific stats */
4776	reg = rd32(E1000_CTRL_EXT);
4777	if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
4778		adapter->stats.rxerrc += rd32(E1000_RXERRC);
4779
4780		/* this stat has invalid values on i210/i211 */
4781		if ((hw->mac.type != e1000_i210) &&
4782		    (hw->mac.type != e1000_i211))
4783			adapter->stats.tncrs += rd32(E1000_TNCRS);
4784	}
4785
4786	adapter->stats.tsctc += rd32(E1000_TSCTC);
4787	adapter->stats.tsctfc += rd32(E1000_TSCTFC);
4788
4789	adapter->stats.iac += rd32(E1000_IAC);
4790	adapter->stats.icrxoc += rd32(E1000_ICRXOC);
4791	adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
4792	adapter->stats.icrxatc += rd32(E1000_ICRXATC);
4793	adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
4794	adapter->stats.ictxatc += rd32(E1000_ICTXATC);
4795	adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
4796	adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
4797	adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
4798
4799	/* Fill out the OS statistics structure */
4800	net_stats->multicast = adapter->stats.mprc;
4801	net_stats->collisions = adapter->stats.colc;
4802
4803	/* Rx Errors */
4804
4805	/* RLEC on some newer hardware can be incorrect so build
4806	 * our own version based on RUC and ROC */
4807	net_stats->rx_errors = adapter->stats.rxerrc +
4808		adapter->stats.crcerrs + adapter->stats.algnerrc +
4809		adapter->stats.ruc + adapter->stats.roc +
4810		adapter->stats.cexterr;
4811	net_stats->rx_length_errors = adapter->stats.ruc +
4812				      adapter->stats.roc;
4813	net_stats->rx_crc_errors = adapter->stats.crcerrs;
4814	net_stats->rx_frame_errors = adapter->stats.algnerrc;
4815	net_stats->rx_missed_errors = adapter->stats.mpc;
4816
4817	/* Tx Errors */
4818	net_stats->tx_errors = adapter->stats.ecol +
4819			       adapter->stats.latecol;
4820	net_stats->tx_aborted_errors = adapter->stats.ecol;
4821	net_stats->tx_window_errors = adapter->stats.latecol;
4822	net_stats->tx_carrier_errors = adapter->stats.tncrs;
4823
4824	/* Tx Dropped needs to be maintained elsewhere */
4825
4826	/* Phy Stats */
4827	if (hw->phy.media_type == e1000_media_type_copper) {
4828		if ((adapter->link_speed == SPEED_1000) &&
4829		   (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
4830			phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
4831			adapter->phy_stats.idle_errors += phy_tmp;
4832		}
4833	}
4834
4835	/* Management Stats */
4836	adapter->stats.mgptc += rd32(E1000_MGTPTC);
4837	adapter->stats.mgprc += rd32(E1000_MGTPRC);
4838	adapter->stats.mgpdc += rd32(E1000_MGTPDC);
4839
4840	/* OS2BMC Stats */
4841	reg = rd32(E1000_MANC);
4842	if (reg & E1000_MANC_EN_BMC2OS) {
4843		adapter->stats.o2bgptc += rd32(E1000_O2BGPTC);
4844		adapter->stats.o2bspc += rd32(E1000_O2BSPC);
4845		adapter->stats.b2ospc += rd32(E1000_B2OSPC);
4846		adapter->stats.b2ogprc += rd32(E1000_B2OGPRC);
4847	}
4848}
4849
4850static irqreturn_t igb_msix_other(int irq, void *data)
4851{
4852	struct igb_adapter *adapter = data;
4853	struct e1000_hw *hw = &adapter->hw;
4854	u32 icr = rd32(E1000_ICR);
4855	/* reading ICR causes bit 31 of EICR to be cleared */
4856
4857	if (icr & E1000_ICR_DRSTA)
4858		schedule_work(&adapter->reset_task);
4859
4860	if (icr & E1000_ICR_DOUTSYNC) {
4861		/* HW is reporting DMA is out of sync */
4862		adapter->stats.doosync++;
4863		/* The DMA Out of Sync is also indication of a spoof event
4864		 * in IOV mode. Check the Wrong VM Behavior register to
4865		 * see if it is really a spoof event. */
4866		igb_check_wvbr(adapter);
4867	}
4868
4869	/* Check for a mailbox event */
4870	if (icr & E1000_ICR_VMMB)
4871		igb_msg_task(adapter);
4872
4873	if (icr & E1000_ICR_LSC) {
4874		hw->mac.get_link_status = 1;
4875		/* guard against interrupt when we're going down */
4876		if (!test_bit(__IGB_DOWN, &adapter->state))
4877			mod_timer(&adapter->watchdog_timer, jiffies + 1);
4878	}
4879
4880#ifdef CONFIG_IGB_PTP
4881	if (icr & E1000_ICR_TS) {
4882		u32 tsicr = rd32(E1000_TSICR);
4883
4884		if (tsicr & E1000_TSICR_TXTS) {
4885			/* acknowledge the interrupt */
4886			wr32(E1000_TSICR, E1000_TSICR_TXTS);
4887			/* retrieve hardware timestamp */
4888			schedule_work(&adapter->ptp_tx_work);
4889		}
4890	}
4891#endif /* CONFIG_IGB_PTP */
4892
4893	wr32(E1000_EIMS, adapter->eims_other);
4894
4895	return IRQ_HANDLED;
4896}
4897
4898static void igb_write_itr(struct igb_q_vector *q_vector)
4899{
4900	struct igb_adapter *adapter = q_vector->adapter;
4901	u32 itr_val = q_vector->itr_val & 0x7FFC;
4902
4903	if (!q_vector->set_itr)
4904		return;
4905
4906	if (!itr_val)
4907		itr_val = 0x4;
4908
4909	if (adapter->hw.mac.type == e1000_82575)
4910		itr_val |= itr_val << 16;
4911	else
4912		itr_val |= E1000_EITR_CNT_IGNR;
4913
4914	writel(itr_val, q_vector->itr_register);
4915	q_vector->set_itr = 0;
4916}
4917
4918static irqreturn_t igb_msix_ring(int irq, void *data)
4919{
4920	struct igb_q_vector *q_vector = data;
4921
4922	/* Write the ITR value calculated from the previous interrupt. */
4923	igb_write_itr(q_vector);
4924
4925	napi_schedule(&q_vector->napi);
4926
4927	return IRQ_HANDLED;
4928}
4929
4930#ifdef CONFIG_IGB_DCA
4931static void igb_update_dca(struct igb_q_vector *q_vector)
4932{
4933	struct igb_adapter *adapter = q_vector->adapter;
4934	struct e1000_hw *hw = &adapter->hw;
4935	int cpu = get_cpu();
4936
4937	if (q_vector->cpu == cpu)
4938		goto out_no_update;
4939
4940	if (q_vector->tx.ring) {
4941		int q = q_vector->tx.ring->reg_idx;
4942		u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4943		if (hw->mac.type == e1000_82575) {
4944			dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4945			dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4946		} else {
4947			dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4948			dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4949			              E1000_DCA_TXCTRL_CPUID_SHIFT;
4950		}
4951		dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4952		wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4953	}
4954	if (q_vector->rx.ring) {
4955		int q = q_vector->rx.ring->reg_idx;
4956		u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4957		if (hw->mac.type == e1000_82575) {
4958			dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4959			dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4960		} else {
4961			dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4962			dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4963			              E1000_DCA_RXCTRL_CPUID_SHIFT;
4964		}
4965		dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4966		dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4967		dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4968		wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4969	}
4970	q_vector->cpu = cpu;
4971out_no_update:
4972	put_cpu();
4973}
4974
4975static void igb_setup_dca(struct igb_adapter *adapter)
4976{
4977	struct e1000_hw *hw = &adapter->hw;
4978	int i;
4979
4980	if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4981		return;
4982
4983	/* Always use CB2 mode, difference is masked in the CB driver. */
4984	wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4985
4986	for (i = 0; i < adapter->num_q_vectors; i++) {
4987		adapter->q_vector[i]->cpu = -1;
4988		igb_update_dca(adapter->q_vector[i]);
4989	}
4990}
4991
4992static int __igb_notify_dca(struct device *dev, void *data)
4993{
4994	struct net_device *netdev = dev_get_drvdata(dev);
4995	struct igb_adapter *adapter = netdev_priv(netdev);
4996	struct pci_dev *pdev = adapter->pdev;
4997	struct e1000_hw *hw = &adapter->hw;
4998	unsigned long event = *(unsigned long *)data;
4999
5000	switch (event) {
5001	case DCA_PROVIDER_ADD:
5002		/* if already enabled, don't do it again */
5003		if (adapter->flags & IGB_FLAG_DCA_ENABLED)
5004			break;
5005		if (dca_add_requester(dev) == 0) {
5006			adapter->flags |= IGB_FLAG_DCA_ENABLED;
5007			dev_info(&pdev->dev, "DCA enabled\n");
5008			igb_setup_dca(adapter);
5009			break;
5010		}
5011		/* Fall Through since DCA is disabled. */
5012	case DCA_PROVIDER_REMOVE:
5013		if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
5014			/* without this a class_device is left
5015			 * hanging around in the sysfs model */
5016			dca_remove_requester(dev);
5017			dev_info(&pdev->dev, "DCA disabled\n");
5018			adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
5019			wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
5020		}
5021		break;
5022	}
5023
5024	return 0;
5025}
5026
5027static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
5028                          void *p)
5029{
5030	int ret_val;
5031
5032	ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
5033	                                 __igb_notify_dca);
5034
5035	return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
5036}
5037#endif /* CONFIG_IGB_DCA */
5038
5039#ifdef CONFIG_PCI_IOV
5040static int igb_vf_configure(struct igb_adapter *adapter, int vf)
5041{
5042	unsigned char mac_addr[ETH_ALEN];
5043
5044	eth_random_addr(mac_addr);
5045	igb_set_vf_mac(adapter, vf, mac_addr);
5046
5047	return 0;
5048}
5049
5050static bool igb_vfs_are_assigned(struct igb_adapter *adapter)
5051{
5052	struct pci_dev *pdev = adapter->pdev;
5053	struct pci_dev *vfdev;
5054	int dev_id;
5055
5056	switch (adapter->hw.mac.type) {
5057	case e1000_82576:
5058		dev_id = IGB_82576_VF_DEV_ID;
5059		break;
5060	case e1000_i350:
5061		dev_id = IGB_I350_VF_DEV_ID;
5062		break;
5063	default:
5064		return false;
5065	}
5066
5067	/* loop through all the VFs to see if we own any that are assigned */
5068	vfdev = pci_get_device(PCI_VENDOR_ID_INTEL, dev_id, NULL);
5069	while (vfdev) {
5070		/* if we don't own it we don't care */
5071		if (vfdev->is_virtfn && vfdev->physfn == pdev) {
5072			/* if it is assigned we cannot release it */
5073			if (vfdev->dev_flags & PCI_DEV_FLAGS_ASSIGNED)
5074				return true;
5075		}
5076
5077		vfdev = pci_get_device(PCI_VENDOR_ID_INTEL, dev_id, vfdev);
5078	}
5079
5080	return false;
5081}
5082
5083#endif
5084static void igb_ping_all_vfs(struct igb_adapter *adapter)
5085{
5086	struct e1000_hw *hw = &adapter->hw;
5087	u32 ping;
5088	int i;
5089
5090	for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
5091		ping = E1000_PF_CONTROL_MSG;
5092		if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
5093			ping |= E1000_VT_MSGTYPE_CTS;
5094		igb_write_mbx(hw, &ping, 1, i);
5095	}
5096}
5097
5098static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5099{
5100	struct e1000_hw *hw = &adapter->hw;
5101	u32 vmolr = rd32(E1000_VMOLR(vf));
5102	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5103
5104	vf_data->flags &= ~(IGB_VF_FLAG_UNI_PROMISC |
5105	                    IGB_VF_FLAG_MULTI_PROMISC);
5106	vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
5107
5108	if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
5109		vmolr |= E1000_VMOLR_MPME;
5110		vf_data->flags |= IGB_VF_FLAG_MULTI_PROMISC;
5111		*msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
5112	} else {
5113		/*
5114		 * if we have hashes and we are clearing a multicast promisc
5115		 * flag we need to write the hashes to the MTA as this step
5116		 * was previously skipped
5117		 */
5118		if (vf_data->num_vf_mc_hashes > 30) {
5119			vmolr |= E1000_VMOLR_MPME;
5120		} else if (vf_data->num_vf_mc_hashes) {
5121			int j;
5122			vmolr |= E1000_VMOLR_ROMPE;
5123			for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
5124				igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
5125		}
5126	}
5127
5128	wr32(E1000_VMOLR(vf), vmolr);
5129
5130	/* there are flags left unprocessed, likely not supported */
5131	if (*msgbuf & E1000_VT_MSGINFO_MASK)
5132		return -EINVAL;
5133
5134	return 0;
5135
5136}
5137
5138static int igb_set_vf_multicasts(struct igb_adapter *adapter,
5139				  u32 *msgbuf, u32 vf)
5140{
5141	int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5142	u16 *hash_list = (u16 *)&msgbuf[1];
5143	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5144	int i;
5145
5146	/* salt away the number of multicast addresses assigned
5147	 * to this VF for later use to restore when the PF multi cast
5148	 * list changes
5149	 */
5150	vf_data->num_vf_mc_hashes = n;
5151
5152	/* only up to 30 hash values supported */
5153	if (n > 30)
5154		n = 30;
5155
5156	/* store the hashes for later use */
5157	for (i = 0; i < n; i++)
5158		vf_data->vf_mc_hashes[i] = hash_list[i];
5159
5160	/* Flush and reset the mta with the new values */
5161	igb_set_rx_mode(adapter->netdev);
5162
5163	return 0;
5164}
5165
5166static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
5167{
5168	struct e1000_hw *hw = &adapter->hw;
5169	struct vf_data_storage *vf_data;
5170	int i, j;
5171
5172	for (i = 0; i < adapter->vfs_allocated_count; i++) {
5173		u32 vmolr = rd32(E1000_VMOLR(i));
5174		vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
5175
5176		vf_data = &adapter->vf_data[i];
5177
5178		if ((vf_data->num_vf_mc_hashes > 30) ||
5179		    (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
5180			vmolr |= E1000_VMOLR_MPME;
5181		} else if (vf_data->num_vf_mc_hashes) {
5182			vmolr |= E1000_VMOLR_ROMPE;
5183			for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
5184				igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
5185		}
5186		wr32(E1000_VMOLR(i), vmolr);
5187	}
5188}
5189
5190static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
5191{
5192	struct e1000_hw *hw = &adapter->hw;
5193	u32 pool_mask, reg, vid;
5194	int i;
5195
5196	pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5197
5198	/* Find the vlan filter for this id */
5199	for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5200		reg = rd32(E1000_VLVF(i));
5201
5202		/* remove the vf from the pool */
5203		reg &= ~pool_mask;
5204
5205		/* if pool is empty then remove entry from vfta */
5206		if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
5207		    (reg & E1000_VLVF_VLANID_ENABLE)) {
5208			reg = 0;
5209			vid = reg & E1000_VLVF_VLANID_MASK;
5210			igb_vfta_set(hw, vid, false);
5211		}
5212
5213		wr32(E1000_VLVF(i), reg);
5214	}
5215
5216	adapter->vf_data[vf].vlans_enabled = 0;
5217}
5218
5219static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
5220{
5221	struct e1000_hw *hw = &adapter->hw;
5222	u32 reg, i;
5223
5224	/* The vlvf table only exists on 82576 hardware and newer */
5225	if (hw->mac.type < e1000_82576)
5226		return -1;
5227
5228	/* we only need to do this if VMDq is enabled */
5229	if (!adapter->vfs_allocated_count)
5230		return -1;
5231
5232	/* Find the vlan filter for this id */
5233	for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5234		reg = rd32(E1000_VLVF(i));
5235		if ((reg & E1000_VLVF_VLANID_ENABLE) &&
5236		    vid == (reg & E1000_VLVF_VLANID_MASK))
5237			break;
5238	}
5239
5240	if (add) {
5241		if (i == E1000_VLVF_ARRAY_SIZE) {
5242			/* Did not find a matching VLAN ID entry that was
5243			 * enabled.  Search for a free filter entry, i.e.
5244			 * one without the enable bit set
5245			 */
5246			for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5247				reg = rd32(E1000_VLVF(i));
5248				if (!(reg & E1000_VLVF_VLANID_ENABLE))
5249					break;
5250			}
5251		}
5252		if (i < E1000_VLVF_ARRAY_SIZE) {
5253			/* Found an enabled/available entry */
5254			reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5255
5256			/* if !enabled we need to set this up in vfta */
5257			if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
5258				/* add VID to filter table */
5259				igb_vfta_set(hw, vid, true);
5260				reg |= E1000_VLVF_VLANID_ENABLE;
5261			}
5262			reg &= ~E1000_VLVF_VLANID_MASK;
5263			reg |= vid;
5264			wr32(E1000_VLVF(i), reg);
5265
5266			/* do not modify RLPML for PF devices */
5267			if (vf >= adapter->vfs_allocated_count)
5268				return 0;
5269
5270			if (!adapter->vf_data[vf].vlans_enabled) {
5271				u32 size;
5272				reg = rd32(E1000_VMOLR(vf));
5273				size = reg & E1000_VMOLR_RLPML_MASK;
5274				size += 4;
5275				reg &= ~E1000_VMOLR_RLPML_MASK;
5276				reg |= size;
5277				wr32(E1000_VMOLR(vf), reg);
5278			}
5279
5280			adapter->vf_data[vf].vlans_enabled++;
5281		}
5282	} else {
5283		if (i < E1000_VLVF_ARRAY_SIZE) {
5284			/* remove vf from the pool */
5285			reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
5286			/* if pool is empty then remove entry from vfta */
5287			if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
5288				reg = 0;
5289				igb_vfta_set(hw, vid, false);
5290			}
5291			wr32(E1000_VLVF(i), reg);
5292
5293			/* do not modify RLPML for PF devices */
5294			if (vf >= adapter->vfs_allocated_count)
5295				return 0;
5296
5297			adapter->vf_data[vf].vlans_enabled--;
5298			if (!adapter->vf_data[vf].vlans_enabled) {
5299				u32 size;
5300				reg = rd32(E1000_VMOLR(vf));
5301				size = reg & E1000_VMOLR_RLPML_MASK;
5302				size -= 4;
5303				reg &= ~E1000_VMOLR_RLPML_MASK;
5304				reg |= size;
5305				wr32(E1000_VMOLR(vf), reg);
5306			}
5307		}
5308	}
5309	return 0;
5310}
5311
5312static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
5313{
5314	struct e1000_hw *hw = &adapter->hw;
5315
5316	if (vid)
5317		wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
5318	else
5319		wr32(E1000_VMVIR(vf), 0);
5320}
5321
5322static int igb_ndo_set_vf_vlan(struct net_device *netdev,
5323			       int vf, u16 vlan, u8 qos)
5324{
5325	int err = 0;
5326	struct igb_adapter *adapter = netdev_priv(netdev);
5327
5328	if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
5329		return -EINVAL;
5330	if (vlan || qos) {
5331		err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
5332		if (err)
5333			goto out;
5334		igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
5335		igb_set_vmolr(adapter, vf, !vlan);
5336		adapter->vf_data[vf].pf_vlan = vlan;
5337		adapter->vf_data[vf].pf_qos = qos;
5338		dev_info(&adapter->pdev->dev,
5339			 "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
5340		if (test_bit(__IGB_DOWN, &adapter->state)) {
5341			dev_warn(&adapter->pdev->dev,
5342				 "The VF VLAN has been set,"
5343				 " but the PF device is not up.\n");
5344			dev_warn(&adapter->pdev->dev,
5345				 "Bring the PF device up before"
5346				 " attempting to use the VF device.\n");
5347		}
5348	} else {
5349		igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
5350				   false, vf);
5351		igb_set_vmvir(adapter, vlan, vf);
5352		igb_set_vmolr(adapter, vf, true);
5353		adapter->vf_data[vf].pf_vlan = 0;
5354		adapter->vf_data[vf].pf_qos = 0;
5355       }
5356out:
5357       return err;
5358}
5359
5360static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5361{
5362	int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5363	int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
5364
5365	return igb_vlvf_set(adapter, vid, add, vf);
5366}
5367
5368static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
5369{
5370	/* clear flags - except flag that indicates PF has set the MAC */
5371	adapter->vf_data[vf].flags &= IGB_VF_FLAG_PF_SET_MAC;
5372	adapter->vf_data[vf].last_nack = jiffies;
5373
5374	/* reset offloads to defaults */
5375	igb_set_vmolr(adapter, vf, true);
5376
5377	/* reset vlans for device */
5378	igb_clear_vf_vfta(adapter, vf);
5379	if (adapter->vf_data[vf].pf_vlan)
5380		igb_ndo_set_vf_vlan(adapter->netdev, vf,
5381				    adapter->vf_data[vf].pf_vlan,
5382				    adapter->vf_data[vf].pf_qos);
5383	else
5384		igb_clear_vf_vfta(adapter, vf);
5385
5386	/* reset multicast table array for vf */
5387	adapter->vf_data[vf].num_vf_mc_hashes = 0;
5388
5389	/* Flush and reset the mta with the new values */
5390	igb_set_rx_mode(adapter->netdev);
5391}
5392
5393static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
5394{
5395	unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5396
5397	/* generate a new mac address as we were hotplug removed/added */
5398	if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
5399		eth_random_addr(vf_mac);
5400
5401	/* process remaining reset events */
5402	igb_vf_reset(adapter, vf);
5403}
5404
5405static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
5406{
5407	struct e1000_hw *hw = &adapter->hw;
5408	unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5409	int rar_entry = hw->mac.rar_entry_count - (vf + 1);
5410	u32 reg, msgbuf[3];
5411	u8 *addr = (u8 *)(&msgbuf[1]);
5412
5413	/* process all the same items cleared in a function level reset */
5414	igb_vf_reset(adapter, vf);
5415
5416	/* set vf mac address */
5417	igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
5418
5419	/* enable transmit and receive for vf */
5420	reg = rd32(E1000_VFTE);
5421	wr32(E1000_VFTE, reg | (1 << vf));
5422	reg = rd32(E1000_VFRE);
5423	wr32(E1000_VFRE, reg | (1 << vf));
5424
5425	adapter->vf_data[vf].flags |= IGB_VF_FLAG_CTS;
5426
5427	/* reply to reset with ack and vf mac address */
5428	msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
5429	memcpy(addr, vf_mac, 6);
5430	igb_write_mbx(hw, msgbuf, 3, vf);
5431}
5432
5433static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
5434{
5435	/*
5436	 * The VF MAC Address is stored in a packed array of bytes
5437	 * starting at the second 32 bit word of the msg array
5438	 */
5439	unsigned char *addr = (char *)&msg[1];
5440	int err = -1;
5441
5442	if (is_valid_ether_addr(addr))
5443		err = igb_set_vf_mac(adapter, vf, addr);
5444
5445	return err;
5446}
5447
5448static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
5449{
5450	struct e1000_hw *hw = &adapter->hw;
5451	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5452	u32 msg = E1000_VT_MSGTYPE_NACK;
5453
5454	/* if device isn't clear to send it shouldn't be reading either */
5455	if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
5456	    time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
5457		igb_write_mbx(hw, &msg, 1, vf);
5458		vf_data->last_nack = jiffies;
5459	}
5460}
5461
5462static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
5463{
5464	struct pci_dev *pdev = adapter->pdev;
5465	u32 msgbuf[E1000_VFMAILBOX_SIZE];
5466	struct e1000_hw *hw = &adapter->hw;
5467	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5468	s32 retval;
5469
5470	retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
5471
5472	if (retval) {
5473		/* if receive failed revoke VF CTS stats and restart init */
5474		dev_err(&pdev->dev, "Error receiving message from VF\n");
5475		vf_data->flags &= ~IGB_VF_FLAG_CTS;
5476		if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5477			return;
5478		goto out;
5479	}
5480
5481	/* this is a message we already processed, do nothing */
5482	if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
5483		return;
5484
5485	/*
5486	 * until the vf completes a reset it should not be
5487	 * allowed to start any configuration.
5488	 */
5489
5490	if (msgbuf[0] == E1000_VF_RESET) {
5491		igb_vf_reset_msg(adapter, vf);
5492		return;
5493	}
5494
5495	if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
5496		if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5497			return;
5498		retval = -1;
5499		goto out;
5500	}
5501
5502	switch ((msgbuf[0] & 0xFFFF)) {
5503	case E1000_VF_SET_MAC_ADDR:
5504		retval = -EINVAL;
5505		if (!(vf_data->flags & IGB_VF_FLAG_PF_SET_MAC))
5506			retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
5507		else
5508			dev_warn(&pdev->dev,
5509				 "VF %d attempted to override administratively "
5510				 "set MAC address\nReload the VF driver to "
5511				 "resume operations\n", vf);
5512		break;
5513	case E1000_VF_SET_PROMISC:
5514		retval = igb_set_vf_promisc(adapter, msgbuf, vf);
5515		break;
5516	case E1000_VF_SET_MULTICAST:
5517		retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
5518		break;
5519	case E1000_VF_SET_LPE:
5520		retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
5521		break;
5522	case E1000_VF_SET_VLAN:
5523		retval = -1;
5524		if (vf_data->pf_vlan)
5525			dev_warn(&pdev->dev,
5526				 "VF %d attempted to override administratively "
5527				 "set VLAN tag\nReload the VF driver to "
5528				 "resume operations\n", vf);
5529		else
5530			retval = igb_set_vf_vlan(adapter, msgbuf, vf);
5531		break;
5532	default:
5533		dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
5534		retval = -1;
5535		break;
5536	}
5537
5538	msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
5539out:
5540	/* notify the VF of the results of what it sent us */
5541	if (retval)
5542		msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
5543	else
5544		msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
5545
5546	igb_write_mbx(hw, msgbuf, 1, vf);
5547}
5548
5549static void igb_msg_task(struct igb_adapter *adapter)
5550{
5551	struct e1000_hw *hw = &adapter->hw;
5552	u32 vf;
5553
5554	for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
5555		/* process any reset requests */
5556		if (!igb_check_for_rst(hw, vf))
5557			igb_vf_reset_event(adapter, vf);
5558
5559		/* process any messages pending */
5560		if (!igb_check_for_msg(hw, vf))
5561			igb_rcv_msg_from_vf(adapter, vf);
5562
5563		/* process any acks */
5564		if (!igb_check_for_ack(hw, vf))
5565			igb_rcv_ack_from_vf(adapter, vf);
5566	}
5567}
5568
5569/**
5570 *  igb_set_uta - Set unicast filter table address
5571 *  @adapter: board private structure
5572 *
5573 *  The unicast table address is a register array of 32-bit registers.
5574 *  The table is meant to be used in a way similar to how the MTA is used
5575 *  however due to certain limitations in the hardware it is necessary to
5576 *  set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscuous
5577 *  enable bit to allow vlan tag stripping when promiscuous mode is enabled
5578 **/
5579static void igb_set_uta(struct igb_adapter *adapter)
5580{
5581	struct e1000_hw *hw = &adapter->hw;
5582	int i;
5583
5584	/* The UTA table only exists on 82576 hardware and newer */
5585	if (hw->mac.type < e1000_82576)
5586		return;
5587
5588	/* we only need to do this if VMDq is enabled */
5589	if (!adapter->vfs_allocated_count)
5590		return;
5591
5592	for (i = 0; i < hw->mac.uta_reg_count; i++)
5593		array_wr32(E1000_UTA, i, ~0);
5594}
5595
5596/**
5597 * igb_intr_msi - Interrupt Handler
5598 * @irq: interrupt number
5599 * @data: pointer to a network interface device structure
5600 **/
5601static irqreturn_t igb_intr_msi(int irq, void *data)
5602{
5603	struct igb_adapter *adapter = data;
5604	struct igb_q_vector *q_vector = adapter->q_vector[0];
5605	struct e1000_hw *hw = &adapter->hw;
5606	/* read ICR disables interrupts using IAM */
5607	u32 icr = rd32(E1000_ICR);
5608
5609	igb_write_itr(q_vector);
5610
5611	if (icr & E1000_ICR_DRSTA)
5612		schedule_work(&adapter->reset_task);
5613
5614	if (icr & E1000_ICR_DOUTSYNC) {
5615		/* HW is reporting DMA is out of sync */
5616		adapter->stats.doosync++;
5617	}
5618
5619	if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5620		hw->mac.get_link_status = 1;
5621		if (!test_bit(__IGB_DOWN, &adapter->state))
5622			mod_timer(&adapter->watchdog_timer, jiffies + 1);
5623	}
5624
5625#ifdef CONFIG_IGB_PTP
5626	if (icr & E1000_ICR_TS) {
5627		u32 tsicr = rd32(E1000_TSICR);
5628
5629		if (tsicr & E1000_TSICR_TXTS) {
5630			/* acknowledge the interrupt */
5631			wr32(E1000_TSICR, E1000_TSICR_TXTS);
5632			/* retrieve hardware timestamp */
5633			schedule_work(&adapter->ptp_tx_work);
5634		}
5635	}
5636#endif /* CONFIG_IGB_PTP */
5637
5638	napi_schedule(&q_vector->napi);
5639
5640	return IRQ_HANDLED;
5641}
5642
5643/**
5644 * igb_intr - Legacy Interrupt Handler
5645 * @irq: interrupt number
5646 * @data: pointer to a network interface device structure
5647 **/
5648static irqreturn_t igb_intr(int irq, void *data)
5649{
5650	struct igb_adapter *adapter = data;
5651	struct igb_q_vector *q_vector = adapter->q_vector[0];
5652	struct e1000_hw *hw = &adapter->hw;
5653	/* Interrupt Auto-Mask...upon reading ICR, interrupts are masked.  No
5654	 * need for the IMC write */
5655	u32 icr = rd32(E1000_ICR);
5656
5657	/* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
5658	 * not set, then the adapter didn't send an interrupt */
5659	if (!(icr & E1000_ICR_INT_ASSERTED))
5660		return IRQ_NONE;
5661
5662	igb_write_itr(q_vector);
5663
5664	if (icr & E1000_ICR_DRSTA)
5665		schedule_work(&adapter->reset_task);
5666
5667	if (icr & E1000_ICR_DOUTSYNC) {
5668		/* HW is reporting DMA is out of sync */
5669		adapter->stats.doosync++;
5670	}
5671
5672	if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5673		hw->mac.get_link_status = 1;
5674		/* guard against interrupt when we're going down */
5675		if (!test_bit(__IGB_DOWN, &adapter->state))
5676			mod_timer(&adapter->watchdog_timer, jiffies + 1);
5677	}
5678
5679#ifdef CONFIG_IGB_PTP
5680	if (icr & E1000_ICR_TS) {
5681		u32 tsicr = rd32(E1000_TSICR);
5682
5683		if (tsicr & E1000_TSICR_TXTS) {
5684			/* acknowledge the interrupt */
5685			wr32(E1000_TSICR, E1000_TSICR_TXTS);
5686			/* retrieve hardware timestamp */
5687			schedule_work(&adapter->ptp_tx_work);
5688		}
5689	}
5690#endif /* CONFIG_IGB_PTP */
5691
5692	napi_schedule(&q_vector->napi);
5693
5694	return IRQ_HANDLED;
5695}
5696
5697static void igb_ring_irq_enable(struct igb_q_vector *q_vector)
5698{
5699	struct igb_adapter *adapter = q_vector->adapter;
5700	struct e1000_hw *hw = &adapter->hw;
5701
5702	if ((q_vector->rx.ring && (adapter->rx_itr_setting & 3)) ||
5703	    (!q_vector->rx.ring && (adapter->tx_itr_setting & 3))) {
5704		if ((adapter->num_q_vectors == 1) && !adapter->vf_data)
5705			igb_set_itr(q_vector);
5706		else
5707			igb_update_ring_itr(q_vector);
5708	}
5709
5710	if (!test_bit(__IGB_DOWN, &adapter->state)) {
5711		if (adapter->msix_entries)
5712			wr32(E1000_EIMS, q_vector->eims_value);
5713		else
5714			igb_irq_enable(adapter);
5715	}
5716}
5717
5718/**
5719 * igb_poll - NAPI Rx polling callback
5720 * @napi: napi polling structure
5721 * @budget: count of how many packets we should handle
5722 **/
5723static int igb_poll(struct napi_struct *napi, int budget)
5724{
5725	struct igb_q_vector *q_vector = container_of(napi,
5726	                                             struct igb_q_vector,
5727	                                             napi);
5728	bool clean_complete = true;
5729
5730#ifdef CONFIG_IGB_DCA
5731	if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
5732		igb_update_dca(q_vector);
5733#endif
5734	if (q_vector->tx.ring)
5735		clean_complete = igb_clean_tx_irq(q_vector);
5736
5737	if (q_vector->rx.ring)
5738		clean_complete &= igb_clean_rx_irq(q_vector, budget);
5739
5740	/* If all work not completed, return budget and keep polling */
5741	if (!clean_complete)
5742		return budget;
5743
5744	/* If not enough Rx work done, exit the polling mode */
5745	napi_complete(napi);
5746	igb_ring_irq_enable(q_vector);
5747
5748	return 0;
5749}
5750
5751/**
5752 * igb_clean_tx_irq - Reclaim resources after transmit completes
5753 * @q_vector: pointer to q_vector containing needed info
5754 *
5755 * returns true if ring is completely cleaned
5756 **/
5757static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
5758{
5759	struct igb_adapter *adapter = q_vector->adapter;
5760	struct igb_ring *tx_ring = q_vector->tx.ring;
5761	struct igb_tx_buffer *tx_buffer;
5762	union e1000_adv_tx_desc *tx_desc, *eop_desc;
5763	unsigned int total_bytes = 0, total_packets = 0;
5764	unsigned int budget = q_vector->tx.work_limit;
5765	unsigned int i = tx_ring->next_to_clean;
5766
5767	if (test_bit(__IGB_DOWN, &adapter->state))
5768		return true;
5769
5770	tx_buffer = &tx_ring->tx_buffer_info[i];
5771	tx_desc = IGB_TX_DESC(tx_ring, i);
5772	i -= tx_ring->count;
5773
5774	for (; budget; budget--) {
5775		eop_desc = tx_buffer->next_to_watch;
5776
5777		/* prevent any other reads prior to eop_desc */
5778		rmb();
5779
5780		/* if next_to_watch is not set then there is no work pending */
5781		if (!eop_desc)
5782			break;
5783
5784		/* if DD is not set pending work has not been completed */
5785		if (!(eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)))
5786			break;
5787
5788		/* clear next_to_watch to prevent false hangs */
5789		tx_buffer->next_to_watch = NULL;
5790
5791		/* update the statistics for this packet */
5792		total_bytes += tx_buffer->bytecount;
5793		total_packets += tx_buffer->gso_segs;
5794
5795		/* free the skb */
5796		dev_kfree_skb_any(tx_buffer->skb);
5797		tx_buffer->skb = NULL;
5798
5799		/* unmap skb header data */
5800		dma_unmap_single(tx_ring->dev,
5801				 tx_buffer->dma,
5802				 tx_buffer->length,
5803				 DMA_TO_DEVICE);
5804
5805		/* clear last DMA location and unmap remaining buffers */
5806		while (tx_desc != eop_desc) {
5807			tx_buffer->dma = 0;
5808
5809			tx_buffer++;
5810			tx_desc++;
5811			i++;
5812			if (unlikely(!i)) {
5813				i -= tx_ring->count;
5814				tx_buffer = tx_ring->tx_buffer_info;
5815				tx_desc = IGB_TX_DESC(tx_ring, 0);
5816			}
5817
5818			/* unmap any remaining paged data */
5819			if (tx_buffer->dma) {
5820				dma_unmap_page(tx_ring->dev,
5821					       tx_buffer->dma,
5822					       tx_buffer->length,
5823					       DMA_TO_DEVICE);
5824			}
5825		}
5826
5827		/* clear last DMA location */
5828		tx_buffer->dma = 0;
5829
5830		/* move us one more past the eop_desc for start of next pkt */
5831		tx_buffer++;
5832		tx_desc++;
5833		i++;
5834		if (unlikely(!i)) {
5835			i -= tx_ring->count;
5836			tx_buffer = tx_ring->tx_buffer_info;
5837			tx_desc = IGB_TX_DESC(tx_ring, 0);
5838		}
5839	}
5840
5841	netdev_tx_completed_queue(txring_txq(tx_ring),
5842				  total_packets, total_bytes);
5843	i += tx_ring->count;
5844	tx_ring->next_to_clean = i;
5845	u64_stats_update_begin(&tx_ring->tx_syncp);
5846	tx_ring->tx_stats.bytes += total_bytes;
5847	tx_ring->tx_stats.packets += total_packets;
5848	u64_stats_update_end(&tx_ring->tx_syncp);
5849	q_vector->tx.total_bytes += total_bytes;
5850	q_vector->tx.total_packets += total_packets;
5851
5852	if (test_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags)) {
5853		struct e1000_hw *hw = &adapter->hw;
5854
5855		eop_desc = tx_buffer->next_to_watch;
5856
5857		/* Detect a transmit hang in hardware, this serializes the
5858		 * check with the clearing of time_stamp and movement of i */
5859		clear_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
5860		if (eop_desc &&
5861		    time_after(jiffies, tx_buffer->time_stamp +
5862			       (adapter->tx_timeout_factor * HZ)) &&
5863		    !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
5864
5865			/* detected Tx unit hang */
5866			dev_err(tx_ring->dev,
5867				"Detected Tx Unit Hang\n"
5868				"  Tx Queue             <%d>\n"
5869				"  TDH                  <%x>\n"
5870				"  TDT                  <%x>\n"
5871				"  next_to_use          <%x>\n"
5872				"  next_to_clean        <%x>\n"
5873				"buffer_info[next_to_clean]\n"
5874				"  time_stamp           <%lx>\n"
5875				"  next_to_watch        <%p>\n"
5876				"  jiffies              <%lx>\n"
5877				"  desc.status          <%x>\n",
5878				tx_ring->queue_index,
5879				rd32(E1000_TDH(tx_ring->reg_idx)),
5880				readl(tx_ring->tail),
5881				tx_ring->next_to_use,
5882				tx_ring->next_to_clean,
5883				tx_buffer->time_stamp,
5884				eop_desc,
5885				jiffies,
5886				eop_desc->wb.status);
5887			netif_stop_subqueue(tx_ring->netdev,
5888					    tx_ring->queue_index);
5889
5890			/* we are about to reset, no point in enabling stuff */
5891			return true;
5892		}
5893	}
5894
5895	if (unlikely(total_packets &&
5896		     netif_carrier_ok(tx_ring->netdev) &&
5897		     igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
5898		/* Make sure that anybody stopping the queue after this
5899		 * sees the new next_to_clean.
5900		 */
5901		smp_mb();
5902		if (__netif_subqueue_stopped(tx_ring->netdev,
5903					     tx_ring->queue_index) &&
5904		    !(test_bit(__IGB_DOWN, &adapter->state))) {
5905			netif_wake_subqueue(tx_ring->netdev,
5906					    tx_ring->queue_index);
5907
5908			u64_stats_update_begin(&tx_ring->tx_syncp);
5909			tx_ring->tx_stats.restart_queue++;
5910			u64_stats_update_end(&tx_ring->tx_syncp);
5911		}
5912	}
5913
5914	return !!budget;
5915}
5916
5917static inline void igb_rx_checksum(struct igb_ring *ring,
5918				   union e1000_adv_rx_desc *rx_desc,
5919				   struct sk_buff *skb)
5920{
5921	skb_checksum_none_assert(skb);
5922
5923	/* Ignore Checksum bit is set */
5924	if (igb_test_staterr(rx_desc, E1000_RXD_STAT_IXSM))
5925		return;
5926
5927	/* Rx checksum disabled via ethtool */
5928	if (!(ring->netdev->features & NETIF_F_RXCSUM))
5929		return;
5930
5931	/* TCP/UDP checksum error bit is set */
5932	if (igb_test_staterr(rx_desc,
5933			     E1000_RXDEXT_STATERR_TCPE |
5934			     E1000_RXDEXT_STATERR_IPE)) {
5935		/*
5936		 * work around errata with sctp packets where the TCPE aka
5937		 * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
5938		 * packets, (aka let the stack check the crc32c)
5939		 */
5940		if (!((skb->len == 60) &&
5941		      test_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags))) {
5942			u64_stats_update_begin(&ring->rx_syncp);
5943			ring->rx_stats.csum_err++;
5944			u64_stats_update_end(&ring->rx_syncp);
5945		}
5946		/* let the stack verify checksum errors */
5947		return;
5948	}
5949	/* It must be a TCP or UDP packet with a valid checksum */
5950	if (igb_test_staterr(rx_desc, E1000_RXD_STAT_TCPCS |
5951				      E1000_RXD_STAT_UDPCS))
5952		skb->ip_summed = CHECKSUM_UNNECESSARY;
5953
5954	dev_dbg(ring->dev, "cksum success: bits %08X\n",
5955		le32_to_cpu(rx_desc->wb.upper.status_error));
5956}
5957
5958static inline void igb_rx_hash(struct igb_ring *ring,
5959			       union e1000_adv_rx_desc *rx_desc,
5960			       struct sk_buff *skb)
5961{
5962	if (ring->netdev->features & NETIF_F_RXHASH)
5963		skb->rxhash = le32_to_cpu(rx_desc->wb.lower.hi_dword.rss);
5964}
5965
5966static void igb_rx_vlan(struct igb_ring *ring,
5967			union e1000_adv_rx_desc *rx_desc,
5968			struct sk_buff *skb)
5969{
5970	if (igb_test_staterr(rx_desc, E1000_RXD_STAT_VP)) {
5971		u16 vid;
5972		if (igb_test_staterr(rx_desc, E1000_RXDEXT_STATERR_LB) &&
5973		    test_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &ring->flags))
5974			vid = be16_to_cpu(rx_desc->wb.upper.vlan);
5975		else
5976			vid = le16_to_cpu(rx_desc->wb.upper.vlan);
5977
5978		__vlan_hwaccel_put_tag(skb, vid);
5979	}
5980}
5981
5982static inline u16 igb_get_hlen(union e1000_adv_rx_desc *rx_desc)
5983{
5984	/* HW will not DMA in data larger than the given buffer, even if it
5985	 * parses the (NFS, of course) header to be larger.  In that case, it
5986	 * fills the header buffer and spills the rest into the page.
5987	 */
5988	u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
5989	           E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
5990	if (hlen > IGB_RX_HDR_LEN)
5991		hlen = IGB_RX_HDR_LEN;
5992	return hlen;
5993}
5994
5995static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, int budget)
5996{
5997	struct igb_ring *rx_ring = q_vector->rx.ring;
5998	union e1000_adv_rx_desc *rx_desc;
5999	const int current_node = numa_node_id();
6000	unsigned int total_bytes = 0, total_packets = 0;
6001	u16 cleaned_count = igb_desc_unused(rx_ring);
6002	u16 i = rx_ring->next_to_clean;
6003
6004	rx_desc = IGB_RX_DESC(rx_ring, i);
6005
6006	while (igb_test_staterr(rx_desc, E1000_RXD_STAT_DD)) {
6007		struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
6008		struct sk_buff *skb = buffer_info->skb;
6009		union e1000_adv_rx_desc *next_rxd;
6010
6011		buffer_info->skb = NULL;
6012		prefetch(skb->data);
6013
6014		i++;
6015		if (i == rx_ring->count)
6016			i = 0;
6017
6018		next_rxd = IGB_RX_DESC(rx_ring, i);
6019		prefetch(next_rxd);
6020
6021		/*
6022		 * This memory barrier is needed to keep us from reading
6023		 * any other fields out of the rx_desc until we know the
6024		 * RXD_STAT_DD bit is set
6025		 */
6026		rmb();
6027
6028		if (!skb_is_nonlinear(skb)) {
6029			__skb_put(skb, igb_get_hlen(rx_desc));
6030			dma_unmap_single(rx_ring->dev, buffer_info->dma,
6031					 IGB_RX_HDR_LEN,
6032					 DMA_FROM_DEVICE);
6033			buffer_info->dma = 0;
6034		}
6035
6036		if (rx_desc->wb.upper.length) {
6037			u16 length = le16_to_cpu(rx_desc->wb.upper.length);
6038
6039			skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
6040						buffer_info->page,
6041						buffer_info->page_offset,
6042						length);
6043
6044			skb->len += length;
6045			skb->data_len += length;
6046			skb->truesize += PAGE_SIZE / 2;
6047
6048			if ((page_count(buffer_info->page) != 1) ||
6049			    (page_to_nid(buffer_info->page) != current_node))
6050				buffer_info->page = NULL;
6051			else
6052				get_page(buffer_info->page);
6053
6054			dma_unmap_page(rx_ring->dev, buffer_info->page_dma,
6055				       PAGE_SIZE / 2, DMA_FROM_DEVICE);
6056			buffer_info->page_dma = 0;
6057		}
6058
6059		if (!igb_test_staterr(rx_desc, E1000_RXD_STAT_EOP)) {
6060			struct igb_rx_buffer *next_buffer;
6061			next_buffer = &rx_ring->rx_buffer_info[i];
6062			buffer_info->skb = next_buffer->skb;
6063			buffer_info->dma = next_buffer->dma;
6064			next_buffer->skb = skb;
6065			next_buffer->dma = 0;
6066			goto next_desc;
6067		}
6068
6069		if (unlikely((igb_test_staterr(rx_desc,
6070					       E1000_RXDEXT_ERR_FRAME_ERR_MASK))
6071			     && !(rx_ring->netdev->features & NETIF_F_RXALL))) {
6072			dev_kfree_skb_any(skb);
6073			goto next_desc;
6074		}
6075
6076#ifdef CONFIG_IGB_PTP
6077		igb_ptp_rx_hwtstamp(q_vector, rx_desc, skb);
6078#endif /* CONFIG_IGB_PTP */
6079		igb_rx_hash(rx_ring, rx_desc, skb);
6080		igb_rx_checksum(rx_ring, rx_desc, skb);
6081		igb_rx_vlan(rx_ring, rx_desc, skb);
6082
6083		total_bytes += skb->len;
6084		total_packets++;
6085
6086		skb->protocol = eth_type_trans(skb, rx_ring->netdev);
6087
6088		napi_gro_receive(&q_vector->napi, skb);
6089
6090		budget--;
6091next_desc:
6092		if (!budget)
6093			break;
6094
6095		cleaned_count++;
6096		/* return some buffers to hardware, one at a time is too slow */
6097		if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
6098			igb_alloc_rx_buffers(rx_ring, cleaned_count);
6099			cleaned_count = 0;
6100		}
6101
6102		/* use prefetched values */
6103		rx_desc = next_rxd;
6104	}
6105
6106	rx_ring->next_to_clean = i;
6107	u64_stats_update_begin(&rx_ring->rx_syncp);
6108	rx_ring->rx_stats.packets += total_packets;
6109	rx_ring->rx_stats.bytes += total_bytes;
6110	u64_stats_update_end(&rx_ring->rx_syncp);
6111	q_vector->rx.total_packets += total_packets;
6112	q_vector->rx.total_bytes += total_bytes;
6113
6114	if (cleaned_count)
6115		igb_alloc_rx_buffers(rx_ring, cleaned_count);
6116
6117	return !!budget;
6118}
6119
6120static bool igb_alloc_mapped_skb(struct igb_ring *rx_ring,
6121				 struct igb_rx_buffer *bi)
6122{
6123	struct sk_buff *skb = bi->skb;
6124	dma_addr_t dma = bi->dma;
6125
6126	if (dma)
6127		return true;
6128
6129	if (likely(!skb)) {
6130		skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
6131						IGB_RX_HDR_LEN);
6132		bi->skb = skb;
6133		if (!skb) {
6134			rx_ring->rx_stats.alloc_failed++;
6135			return false;
6136		}
6137
6138		/* initialize skb for ring */
6139		skb_record_rx_queue(skb, rx_ring->queue_index);
6140	}
6141
6142	dma = dma_map_single(rx_ring->dev, skb->data,
6143			     IGB_RX_HDR_LEN, DMA_FROM_DEVICE);
6144
6145	if (dma_mapping_error(rx_ring->dev, dma)) {
6146		rx_ring->rx_stats.alloc_failed++;
6147		return false;
6148	}
6149
6150	bi->dma = dma;
6151	return true;
6152}
6153
6154static bool igb_alloc_mapped_page(struct igb_ring *rx_ring,
6155				  struct igb_rx_buffer *bi)
6156{
6157	struct page *page = bi->page;
6158	dma_addr_t page_dma = bi->page_dma;
6159	unsigned int page_offset = bi->page_offset ^ (PAGE_SIZE / 2);
6160
6161	if (page_dma)
6162		return true;
6163
6164	if (!page) {
6165		page = __skb_alloc_page(GFP_ATOMIC, bi->skb);
6166		bi->page = page;
6167		if (unlikely(!page)) {
6168			rx_ring->rx_stats.alloc_failed++;
6169			return false;
6170		}
6171	}
6172
6173	page_dma = dma_map_page(rx_ring->dev, page,
6174				page_offset, PAGE_SIZE / 2,
6175				DMA_FROM_DEVICE);
6176
6177	if (dma_mapping_error(rx_ring->dev, page_dma)) {
6178		rx_ring->rx_stats.alloc_failed++;
6179		return false;
6180	}
6181
6182	bi->page_dma = page_dma;
6183	bi->page_offset = page_offset;
6184	return true;
6185}
6186
6187/**
6188 * igb_alloc_rx_buffers - Replace used receive buffers; packet split
6189 * @adapter: address of board private structure
6190 **/
6191void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count)
6192{
6193	union e1000_adv_rx_desc *rx_desc;
6194	struct igb_rx_buffer *bi;
6195	u16 i = rx_ring->next_to_use;
6196
6197	rx_desc = IGB_RX_DESC(rx_ring, i);
6198	bi = &rx_ring->rx_buffer_info[i];
6199	i -= rx_ring->count;
6200
6201	while (cleaned_count--) {
6202		if (!igb_alloc_mapped_skb(rx_ring, bi))
6203			break;
6204
6205		/* Refresh the desc even if buffer_addrs didn't change
6206		 * because each write-back erases this info. */
6207		rx_desc->read.hdr_addr = cpu_to_le64(bi->dma);
6208
6209		if (!igb_alloc_mapped_page(rx_ring, bi))
6210			break;
6211
6212		rx_desc->read.pkt_addr = cpu_to_le64(bi->page_dma);
6213
6214		rx_desc++;
6215		bi++;
6216		i++;
6217		if (unlikely(!i)) {
6218			rx_desc = IGB_RX_DESC(rx_ring, 0);
6219			bi = rx_ring->rx_buffer_info;
6220			i -= rx_ring->count;
6221		}
6222
6223		/* clear the hdr_addr for the next_to_use descriptor */
6224		rx_desc->read.hdr_addr = 0;
6225	}
6226
6227	i += rx_ring->count;
6228
6229	if (rx_ring->next_to_use != i) {
6230		rx_ring->next_to_use = i;
6231
6232		/* Force memory writes to complete before letting h/w
6233		 * know there are new descriptors to fetch.  (Only
6234		 * applicable for weak-ordered memory model archs,
6235		 * such as IA-64). */
6236		wmb();
6237		writel(i, rx_ring->tail);
6238	}
6239}
6240
6241/**
6242 * igb_mii_ioctl -
6243 * @netdev:
6244 * @ifreq:
6245 * @cmd:
6246 **/
6247static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6248{
6249	struct igb_adapter *adapter = netdev_priv(netdev);
6250	struct mii_ioctl_data *data = if_mii(ifr);
6251
6252	if (adapter->hw.phy.media_type != e1000_media_type_copper)
6253		return -EOPNOTSUPP;
6254
6255	switch (cmd) {
6256	case SIOCGMIIPHY:
6257		data->phy_id = adapter->hw.phy.addr;
6258		break;
6259	case SIOCGMIIREG:
6260		if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
6261		                     &data->val_out))
6262			return -EIO;
6263		break;
6264	case SIOCSMIIREG:
6265	default:
6266		return -EOPNOTSUPP;
6267	}
6268	return 0;
6269}
6270
6271/**
6272 * igb_ioctl -
6273 * @netdev:
6274 * @ifreq:
6275 * @cmd:
6276 **/
6277static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6278{
6279	switch (cmd) {
6280	case SIOCGMIIPHY:
6281	case SIOCGMIIREG:
6282	case SIOCSMIIREG:
6283		return igb_mii_ioctl(netdev, ifr, cmd);
6284#ifdef CONFIG_IGB_PTP
6285	case SIOCSHWTSTAMP:
6286		return igb_ptp_hwtstamp_ioctl(netdev, ifr, cmd);
6287#endif /* CONFIG_IGB_PTP */
6288	default:
6289		return -EOPNOTSUPP;
6290	}
6291}
6292
6293s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6294{
6295	struct igb_adapter *adapter = hw->back;
6296	u16 cap_offset;
6297
6298	cap_offset = adapter->pdev->pcie_cap;
6299	if (!cap_offset)
6300		return -E1000_ERR_CONFIG;
6301
6302	pci_read_config_word(adapter->pdev, cap_offset + reg, value);
6303
6304	return 0;
6305}
6306
6307s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6308{
6309	struct igb_adapter *adapter = hw->back;
6310	u16 cap_offset;
6311
6312	cap_offset = adapter->pdev->pcie_cap;
6313	if (!cap_offset)
6314		return -E1000_ERR_CONFIG;
6315
6316	pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
6317
6318	return 0;
6319}
6320
6321static void igb_vlan_mode(struct net_device *netdev, netdev_features_t features)
6322{
6323	struct igb_adapter *adapter = netdev_priv(netdev);
6324	struct e1000_hw *hw = &adapter->hw;
6325	u32 ctrl, rctl;
6326	bool enable = !!(features & NETIF_F_HW_VLAN_RX);
6327
6328	if (enable) {
6329		/* enable VLAN tag insert/strip */
6330		ctrl = rd32(E1000_CTRL);
6331		ctrl |= E1000_CTRL_VME;
6332		wr32(E1000_CTRL, ctrl);
6333
6334		/* Disable CFI check */
6335		rctl = rd32(E1000_RCTL);
6336		rctl &= ~E1000_RCTL_CFIEN;
6337		wr32(E1000_RCTL, rctl);
6338	} else {
6339		/* disable VLAN tag insert/strip */
6340		ctrl = rd32(E1000_CTRL);
6341		ctrl &= ~E1000_CTRL_VME;
6342		wr32(E1000_CTRL, ctrl);
6343	}
6344
6345	igb_rlpml_set(adapter);
6346}
6347
6348static int igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
6349{
6350	struct igb_adapter *adapter = netdev_priv(netdev);
6351	struct e1000_hw *hw = &adapter->hw;
6352	int pf_id = adapter->vfs_allocated_count;
6353
6354	/* attempt to add filter to vlvf array */
6355	igb_vlvf_set(adapter, vid, true, pf_id);
6356
6357	/* add the filter since PF can receive vlans w/o entry in vlvf */
6358	igb_vfta_set(hw, vid, true);
6359
6360	set_bit(vid, adapter->active_vlans);
6361
6362	return 0;
6363}
6364
6365static int igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
6366{
6367	struct igb_adapter *adapter = netdev_priv(netdev);
6368	struct e1000_hw *hw = &adapter->hw;
6369	int pf_id = adapter->vfs_allocated_count;
6370	s32 err;
6371
6372	/* remove vlan from VLVF table array */
6373	err = igb_vlvf_set(adapter, vid, false, pf_id);
6374
6375	/* if vid was not present in VLVF just remove it from table */
6376	if (err)
6377		igb_vfta_set(hw, vid, false);
6378
6379	clear_bit(vid, adapter->active_vlans);
6380
6381	return 0;
6382}
6383
6384static void igb_restore_vlan(struct igb_adapter *adapter)
6385{
6386	u16 vid;
6387
6388	igb_vlan_mode(adapter->netdev, adapter->netdev->features);
6389
6390	for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID)
6391		igb_vlan_rx_add_vid(adapter->netdev, vid);
6392}
6393
6394int igb_set_spd_dplx(struct igb_adapter *adapter, u32 spd, u8 dplx)
6395{
6396	struct pci_dev *pdev = adapter->pdev;
6397	struct e1000_mac_info *mac = &adapter->hw.mac;
6398
6399	mac->autoneg = 0;
6400
6401	/* Make sure dplx is at most 1 bit and lsb of speed is not set
6402	 * for the switch() below to work */
6403	if ((spd & 1) || (dplx & ~1))
6404		goto err_inval;
6405
6406	/* Fiber NIC's only allow 1000 Gbps Full duplex */
6407	if ((adapter->hw.phy.media_type == e1000_media_type_internal_serdes) &&
6408	    spd != SPEED_1000 &&
6409	    dplx != DUPLEX_FULL)
6410		goto err_inval;
6411
6412	switch (spd + dplx) {
6413	case SPEED_10 + DUPLEX_HALF:
6414		mac->forced_speed_duplex = ADVERTISE_10_HALF;
6415		break;
6416	case SPEED_10 + DUPLEX_FULL:
6417		mac->forced_speed_duplex = ADVERTISE_10_FULL;
6418		break;
6419	case SPEED_100 + DUPLEX_HALF:
6420		mac->forced_speed_duplex = ADVERTISE_100_HALF;
6421		break;
6422	case SPEED_100 + DUPLEX_FULL:
6423		mac->forced_speed_duplex = ADVERTISE_100_FULL;
6424		break;
6425	case SPEED_1000 + DUPLEX_FULL:
6426		mac->autoneg = 1;
6427		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
6428		break;
6429	case SPEED_1000 + DUPLEX_HALF: /* not supported */
6430	default:
6431		goto err_inval;
6432	}
6433
6434	/* clear MDI, MDI(-X) override is only allowed when autoneg enabled */
6435	adapter->hw.phy.mdix = AUTO_ALL_MODES;
6436
6437	return 0;
6438
6439err_inval:
6440	dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
6441	return -EINVAL;
6442}
6443
6444static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake,
6445			  bool runtime)
6446{
6447	struct net_device *netdev = pci_get_drvdata(pdev);
6448	struct igb_adapter *adapter = netdev_priv(netdev);
6449	struct e1000_hw *hw = &adapter->hw;
6450	u32 ctrl, rctl, status;
6451	u32 wufc = runtime ? E1000_WUFC_LNKC : adapter->wol;
6452#ifdef CONFIG_PM
6453	int retval = 0;
6454#endif
6455
6456	netif_device_detach(netdev);
6457
6458	if (netif_running(netdev))
6459		__igb_close(netdev, true);
6460
6461	igb_clear_interrupt_scheme(adapter);
6462
6463#ifdef CONFIG_PM
6464	retval = pci_save_state(pdev);
6465	if (retval)
6466		return retval;
6467#endif
6468
6469	status = rd32(E1000_STATUS);
6470	if (status & E1000_STATUS_LU)
6471		wufc &= ~E1000_WUFC_LNKC;
6472
6473	if (wufc) {
6474		igb_setup_rctl(adapter);
6475		igb_set_rx_mode(netdev);
6476
6477		/* turn on all-multi mode if wake on multicast is enabled */
6478		if (wufc & E1000_WUFC_MC) {
6479			rctl = rd32(E1000_RCTL);
6480			rctl |= E1000_RCTL_MPE;
6481			wr32(E1000_RCTL, rctl);
6482		}
6483
6484		ctrl = rd32(E1000_CTRL);
6485		/* advertise wake from D3Cold */
6486		#define E1000_CTRL_ADVD3WUC 0x00100000
6487		/* phy power management enable */
6488		#define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
6489		ctrl |= E1000_CTRL_ADVD3WUC;
6490		wr32(E1000_CTRL, ctrl);
6491
6492		/* Allow time for pending master requests to run */
6493		igb_disable_pcie_master(hw);
6494
6495		wr32(E1000_WUC, E1000_WUC_PME_EN);
6496		wr32(E1000_WUFC, wufc);
6497	} else {
6498		wr32(E1000_WUC, 0);
6499		wr32(E1000_WUFC, 0);
6500	}
6501
6502	*enable_wake = wufc || adapter->en_mng_pt;
6503	if (!*enable_wake)
6504		igb_power_down_link(adapter);
6505	else
6506		igb_power_up_link(adapter);
6507
6508	/* Release control of h/w to f/w.  If f/w is AMT enabled, this
6509	 * would have already happened in close and is redundant. */
6510	igb_release_hw_control(adapter);
6511
6512	pci_disable_device(pdev);
6513
6514	return 0;
6515}
6516
6517#ifdef CONFIG_PM
6518#ifdef CONFIG_PM_SLEEP
6519static int igb_suspend(struct device *dev)
6520{
6521	int retval;
6522	bool wake;
6523	struct pci_dev *pdev = to_pci_dev(dev);
6524
6525	retval = __igb_shutdown(pdev, &wake, 0);
6526	if (retval)
6527		return retval;
6528
6529	if (wake) {
6530		pci_prepare_to_sleep(pdev);
6531	} else {
6532		pci_wake_from_d3(pdev, false);
6533		pci_set_power_state(pdev, PCI_D3hot);
6534	}
6535
6536	return 0;
6537}
6538#endif /* CONFIG_PM_SLEEP */
6539
6540static int igb_resume(struct device *dev)
6541{
6542	struct pci_dev *pdev = to_pci_dev(dev);
6543	struct net_device *netdev = pci_get_drvdata(pdev);
6544	struct igb_adapter *adapter = netdev_priv(netdev);
6545	struct e1000_hw *hw = &adapter->hw;
6546	u32 err;
6547
6548	pci_set_power_state(pdev, PCI_D0);
6549	pci_restore_state(pdev);
6550	pci_save_state(pdev);
6551
6552	err = pci_enable_device_mem(pdev);
6553	if (err) {
6554		dev_err(&pdev->dev,
6555			"igb: Cannot enable PCI device from suspend\n");
6556		return err;
6557	}
6558	pci_set_master(pdev);
6559
6560	pci_enable_wake(pdev, PCI_D3hot, 0);
6561	pci_enable_wake(pdev, PCI_D3cold, 0);
6562
6563	if (igb_init_interrupt_scheme(adapter)) {
6564		dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
6565		return -ENOMEM;
6566	}
6567
6568	igb_reset(adapter);
6569
6570	/* let the f/w know that the h/w is now under the control of the
6571	 * driver. */
6572	igb_get_hw_control(adapter);
6573
6574	wr32(E1000_WUS, ~0);
6575
6576	if (netdev->flags & IFF_UP) {
6577		err = __igb_open(netdev, true);
6578		if (err)
6579			return err;
6580	}
6581
6582	netif_device_attach(netdev);
6583	return 0;
6584}
6585
6586#ifdef CONFIG_PM_RUNTIME
6587static int igb_runtime_idle(struct device *dev)
6588{
6589	struct pci_dev *pdev = to_pci_dev(dev);
6590	struct net_device *netdev = pci_get_drvdata(pdev);
6591	struct igb_adapter *adapter = netdev_priv(netdev);
6592
6593	if (!igb_has_link(adapter))
6594		pm_schedule_suspend(dev, MSEC_PER_SEC * 5);
6595
6596	return -EBUSY;
6597}
6598
6599static int igb_runtime_suspend(struct device *dev)
6600{
6601	struct pci_dev *pdev = to_pci_dev(dev);
6602	int retval;
6603	bool wake;
6604
6605	retval = __igb_shutdown(pdev, &wake, 1);
6606	if (retval)
6607		return retval;
6608
6609	if (wake) {
6610		pci_prepare_to_sleep(pdev);
6611	} else {
6612		pci_wake_from_d3(pdev, false);
6613		pci_set_power_state(pdev, PCI_D3hot);
6614	}
6615
6616	return 0;
6617}
6618
6619static int igb_runtime_resume(struct device *dev)
6620{
6621	return igb_resume(dev);
6622}
6623#endif /* CONFIG_PM_RUNTIME */
6624#endif
6625
6626static void igb_shutdown(struct pci_dev *pdev)
6627{
6628	bool wake;
6629
6630	__igb_shutdown(pdev, &wake, 0);
6631
6632	if (system_state == SYSTEM_POWER_OFF) {
6633		pci_wake_from_d3(pdev, wake);
6634		pci_set_power_state(pdev, PCI_D3hot);
6635	}
6636}
6637
6638#ifdef CONFIG_NET_POLL_CONTROLLER
6639/*
6640 * Polling 'interrupt' - used by things like netconsole to send skbs
6641 * without having to re-enable interrupts. It's not called while
6642 * the interrupt routine is executing.
6643 */
6644static void igb_netpoll(struct net_device *netdev)
6645{
6646	struct igb_adapter *adapter = netdev_priv(netdev);
6647	struct e1000_hw *hw = &adapter->hw;
6648	struct igb_q_vector *q_vector;
6649	int i;
6650
6651	for (i = 0; i < adapter->num_q_vectors; i++) {
6652		q_vector = adapter->q_vector[i];
6653		if (adapter->msix_entries)
6654			wr32(E1000_EIMC, q_vector->eims_value);
6655		else
6656			igb_irq_disable(adapter);
6657		napi_schedule(&q_vector->napi);
6658	}
6659}
6660#endif /* CONFIG_NET_POLL_CONTROLLER */
6661
6662/**
6663 * igb_io_error_detected - called when PCI error is detected
6664 * @pdev: Pointer to PCI device
6665 * @state: The current pci connection state
6666 *
6667 * This function is called after a PCI bus error affecting
6668 * this device has been detected.
6669 */
6670static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
6671					      pci_channel_state_t state)
6672{
6673	struct net_device *netdev = pci_get_drvdata(pdev);
6674	struct igb_adapter *adapter = netdev_priv(netdev);
6675
6676	netif_device_detach(netdev);
6677
6678	if (state == pci_channel_io_perm_failure)
6679		return PCI_ERS_RESULT_DISCONNECT;
6680
6681	if (netif_running(netdev))
6682		igb_down(adapter);
6683	pci_disable_device(pdev);
6684
6685	/* Request a slot slot reset. */
6686	return PCI_ERS_RESULT_NEED_RESET;
6687}
6688
6689/**
6690 * igb_io_slot_reset - called after the pci bus has been reset.
6691 * @pdev: Pointer to PCI device
6692 *
6693 * Restart the card from scratch, as if from a cold-boot. Implementation
6694 * resembles the first-half of the igb_resume routine.
6695 */
6696static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
6697{
6698	struct net_device *netdev = pci_get_drvdata(pdev);
6699	struct igb_adapter *adapter = netdev_priv(netdev);
6700	struct e1000_hw *hw = &adapter->hw;
6701	pci_ers_result_t result;
6702	int err;
6703
6704	if (pci_enable_device_mem(pdev)) {
6705		dev_err(&pdev->dev,
6706			"Cannot re-enable PCI device after reset.\n");
6707		result = PCI_ERS_RESULT_DISCONNECT;
6708	} else {
6709		pci_set_master(pdev);
6710		pci_restore_state(pdev);
6711		pci_save_state(pdev);
6712
6713		pci_enable_wake(pdev, PCI_D3hot, 0);
6714		pci_enable_wake(pdev, PCI_D3cold, 0);
6715
6716		igb_reset(adapter);
6717		wr32(E1000_WUS, ~0);
6718		result = PCI_ERS_RESULT_RECOVERED;
6719	}
6720
6721	err = pci_cleanup_aer_uncorrect_error_status(pdev);
6722	if (err) {
6723		dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
6724		        "failed 0x%0x\n", err);
6725		/* non-fatal, continue */
6726	}
6727
6728	return result;
6729}
6730
6731/**
6732 * igb_io_resume - called when traffic can start flowing again.
6733 * @pdev: Pointer to PCI device
6734 *
6735 * This callback is called when the error recovery driver tells us that
6736 * its OK to resume normal operation. Implementation resembles the
6737 * second-half of the igb_resume routine.
6738 */
6739static void igb_io_resume(struct pci_dev *pdev)
6740{
6741	struct net_device *netdev = pci_get_drvdata(pdev);
6742	struct igb_adapter *adapter = netdev_priv(netdev);
6743
6744	if (netif_running(netdev)) {
6745		if (igb_up(adapter)) {
6746			dev_err(&pdev->dev, "igb_up failed after reset\n");
6747			return;
6748		}
6749	}
6750
6751	netif_device_attach(netdev);
6752
6753	/* let the f/w know that the h/w is now under the control of the
6754	 * driver. */
6755	igb_get_hw_control(adapter);
6756}
6757
6758static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
6759                             u8 qsel)
6760{
6761	u32 rar_low, rar_high;
6762	struct e1000_hw *hw = &adapter->hw;
6763
6764	/* HW expects these in little endian so we reverse the byte order
6765	 * from network order (big endian) to little endian
6766	 */
6767	rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
6768	          ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
6769	rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
6770
6771	/* Indicate to hardware the Address is Valid. */
6772	rar_high |= E1000_RAH_AV;
6773
6774	if (hw->mac.type == e1000_82575)
6775		rar_high |= E1000_RAH_POOL_1 * qsel;
6776	else
6777		rar_high |= E1000_RAH_POOL_1 << qsel;
6778
6779	wr32(E1000_RAL(index), rar_low);
6780	wrfl();
6781	wr32(E1000_RAH(index), rar_high);
6782	wrfl();
6783}
6784
6785static int igb_set_vf_mac(struct igb_adapter *adapter,
6786                          int vf, unsigned char *mac_addr)
6787{
6788	struct e1000_hw *hw = &adapter->hw;
6789	/* VF MAC addresses start at end of receive addresses and moves
6790	 * torwards the first, as a result a collision should not be possible */
6791	int rar_entry = hw->mac.rar_entry_count - (vf + 1);
6792
6793	memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
6794
6795	igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
6796
6797	return 0;
6798}
6799
6800static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
6801{
6802	struct igb_adapter *adapter = netdev_priv(netdev);
6803	if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
6804		return -EINVAL;
6805	adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
6806	dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
6807	dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
6808				      " change effective.");
6809	if (test_bit(__IGB_DOWN, &adapter->state)) {
6810		dev_warn(&adapter->pdev->dev, "The VF MAC address has been set,"
6811			 " but the PF device is not up.\n");
6812		dev_warn(&adapter->pdev->dev, "Bring the PF device up before"
6813			 " attempting to use the VF device.\n");
6814	}
6815	return igb_set_vf_mac(adapter, vf, mac);
6816}
6817
6818static int igb_link_mbps(int internal_link_speed)
6819{
6820	switch (internal_link_speed) {
6821	case SPEED_100:
6822		return 100;
6823	case SPEED_1000:
6824		return 1000;
6825	default:
6826		return 0;
6827	}
6828}
6829
6830static void igb_set_vf_rate_limit(struct e1000_hw *hw, int vf, int tx_rate,
6831				  int link_speed)
6832{
6833	int rf_dec, rf_int;
6834	u32 bcnrc_val;
6835
6836	if (tx_rate != 0) {
6837		/* Calculate the rate factor values to set */
6838		rf_int = link_speed / tx_rate;
6839		rf_dec = (link_speed - (rf_int * tx_rate));
6840		rf_dec = (rf_dec * (1<<E1000_RTTBCNRC_RF_INT_SHIFT)) / tx_rate;
6841
6842		bcnrc_val = E1000_RTTBCNRC_RS_ENA;
6843		bcnrc_val |= ((rf_int<<E1000_RTTBCNRC_RF_INT_SHIFT) &
6844		               E1000_RTTBCNRC_RF_INT_MASK);
6845		bcnrc_val |= (rf_dec & E1000_RTTBCNRC_RF_DEC_MASK);
6846	} else {
6847		bcnrc_val = 0;
6848	}
6849
6850	wr32(E1000_RTTDQSEL, vf); /* vf X uses queue X */
6851	/*
6852	 * Set global transmit compensation time to the MMW_SIZE in RTTBCNRM
6853	 * register. MMW_SIZE=0x014 if 9728-byte jumbo is supported.
6854	 */
6855	wr32(E1000_RTTBCNRM, 0x14);
6856	wr32(E1000_RTTBCNRC, bcnrc_val);
6857}
6858
6859static void igb_check_vf_rate_limit(struct igb_adapter *adapter)
6860{
6861	int actual_link_speed, i;
6862	bool reset_rate = false;
6863
6864	/* VF TX rate limit was not set or not supported */
6865	if ((adapter->vf_rate_link_speed == 0) ||
6866	    (adapter->hw.mac.type != e1000_82576))
6867		return;
6868
6869	actual_link_speed = igb_link_mbps(adapter->link_speed);
6870	if (actual_link_speed != adapter->vf_rate_link_speed) {
6871		reset_rate = true;
6872		adapter->vf_rate_link_speed = 0;
6873		dev_info(&adapter->pdev->dev,
6874		         "Link speed has been changed. VF Transmit "
6875		         "rate is disabled\n");
6876	}
6877
6878	for (i = 0; i < adapter->vfs_allocated_count; i++) {
6879		if (reset_rate)
6880			adapter->vf_data[i].tx_rate = 0;
6881
6882		igb_set_vf_rate_limit(&adapter->hw, i,
6883		                      adapter->vf_data[i].tx_rate,
6884		                      actual_link_speed);
6885	}
6886}
6887
6888static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
6889{
6890	struct igb_adapter *adapter = netdev_priv(netdev);
6891	struct e1000_hw *hw = &adapter->hw;
6892	int actual_link_speed;
6893
6894	if (hw->mac.type != e1000_82576)
6895		return -EOPNOTSUPP;
6896
6897	actual_link_speed = igb_link_mbps(adapter->link_speed);
6898	if ((vf >= adapter->vfs_allocated_count) ||
6899	    (!(rd32(E1000_STATUS) & E1000_STATUS_LU)) ||
6900	    (tx_rate < 0) || (tx_rate > actual_link_speed))
6901		return -EINVAL;
6902
6903	adapter->vf_rate_link_speed = actual_link_speed;
6904	adapter->vf_data[vf].tx_rate = (u16)tx_rate;
6905	igb_set_vf_rate_limit(hw, vf, tx_rate, actual_link_speed);
6906
6907	return 0;
6908}
6909
6910static int igb_ndo_get_vf_config(struct net_device *netdev,
6911				 int vf, struct ifla_vf_info *ivi)
6912{
6913	struct igb_adapter *adapter = netdev_priv(netdev);
6914	if (vf >= adapter->vfs_allocated_count)
6915		return -EINVAL;
6916	ivi->vf = vf;
6917	memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
6918	ivi->tx_rate = adapter->vf_data[vf].tx_rate;
6919	ivi->vlan = adapter->vf_data[vf].pf_vlan;
6920	ivi->qos = adapter->vf_data[vf].pf_qos;
6921	return 0;
6922}
6923
6924static void igb_vmm_control(struct igb_adapter *adapter)
6925{
6926	struct e1000_hw *hw = &adapter->hw;
6927	u32 reg;
6928
6929	switch (hw->mac.type) {
6930	case e1000_82575:
6931	case e1000_i210:
6932	case e1000_i211:
6933	default:
6934		/* replication is not supported for 82575 */
6935		return;
6936	case e1000_82576:
6937		/* notify HW that the MAC is adding vlan tags */
6938		reg = rd32(E1000_DTXCTL);
6939		reg |= E1000_DTXCTL_VLAN_ADDED;
6940		wr32(E1000_DTXCTL, reg);
6941	case e1000_82580:
6942		/* enable replication vlan tag stripping */
6943		reg = rd32(E1000_RPLOLR);
6944		reg |= E1000_RPLOLR_STRVLAN;
6945		wr32(E1000_RPLOLR, reg);
6946	case e1000_i350:
6947		/* none of the above registers are supported by i350 */
6948		break;
6949	}
6950
6951	if (adapter->vfs_allocated_count) {
6952		igb_vmdq_set_loopback_pf(hw, true);
6953		igb_vmdq_set_replication_pf(hw, true);
6954		igb_vmdq_set_anti_spoofing_pf(hw, true,
6955						adapter->vfs_allocated_count);
6956	} else {
6957		igb_vmdq_set_loopback_pf(hw, false);
6958		igb_vmdq_set_replication_pf(hw, false);
6959	}
6960}
6961
6962static void igb_init_dmac(struct igb_adapter *adapter, u32 pba)
6963{
6964	struct e1000_hw *hw = &adapter->hw;
6965	u32 dmac_thr;
6966	u16 hwm;
6967
6968	if (hw->mac.type > e1000_82580) {
6969		if (adapter->flags & IGB_FLAG_DMAC) {
6970			u32 reg;
6971
6972			/* force threshold to 0. */
6973			wr32(E1000_DMCTXTH, 0);
6974
6975			/*
6976			 * DMA Coalescing high water mark needs to be greater
6977			 * than the Rx threshold. Set hwm to PBA - max frame
6978			 * size in 16B units, capping it at PBA - 6KB.
6979			 */
6980			hwm = 64 * pba - adapter->max_frame_size / 16;
6981			if (hwm < 64 * (pba - 6))
6982				hwm = 64 * (pba - 6);
6983			reg = rd32(E1000_FCRTC);
6984			reg &= ~E1000_FCRTC_RTH_COAL_MASK;
6985			reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT)
6986				& E1000_FCRTC_RTH_COAL_MASK);
6987			wr32(E1000_FCRTC, reg);
6988
6989			/*
6990			 * Set the DMA Coalescing Rx threshold to PBA - 2 * max
6991			 * frame size, capping it at PBA - 10KB.
6992			 */
6993			dmac_thr = pba - adapter->max_frame_size / 512;
6994			if (dmac_thr < pba - 10)
6995				dmac_thr = pba - 10;
6996			reg = rd32(E1000_DMACR);
6997			reg &= ~E1000_DMACR_DMACTHR_MASK;
6998			reg |= ((dmac_thr << E1000_DMACR_DMACTHR_SHIFT)
6999				& E1000_DMACR_DMACTHR_MASK);
7000
7001			/* transition to L0x or L1 if available..*/
7002			reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
7003
7004			/* watchdog timer= +-1000 usec in 32usec intervals */
7005			reg |= (1000 >> 5);
7006
7007			/* Disable BMC-to-OS Watchdog Enable */
7008			reg &= ~E1000_DMACR_DC_BMC2OSW_EN;
7009			wr32(E1000_DMACR, reg);
7010
7011			/*
7012			 * no lower threshold to disable
7013			 * coalescing(smart fifb)-UTRESH=0
7014			 */
7015			wr32(E1000_DMCRTRH, 0);
7016
7017			reg = (IGB_DMCTLX_DCFLUSH_DIS | 0x4);
7018
7019			wr32(E1000_DMCTLX, reg);
7020
7021			/*
7022			 * free space in tx packet buffer to wake from
7023			 * DMA coal
7024			 */
7025			wr32(E1000_DMCTXTH, (IGB_MIN_TXPBSIZE -
7026			     (IGB_TX_BUF_4096 + adapter->max_frame_size)) >> 6);
7027
7028			/*
7029			 * make low power state decision controlled
7030			 * by DMA coal
7031			 */
7032			reg = rd32(E1000_PCIEMISC);
7033			reg &= ~E1000_PCIEMISC_LX_DECISION;
7034			wr32(E1000_PCIEMISC, reg);
7035		} /* endif adapter->dmac is not disabled */
7036	} else if (hw->mac.type == e1000_82580) {
7037		u32 reg = rd32(E1000_PCIEMISC);
7038		wr32(E1000_PCIEMISC, reg & ~E1000_PCIEMISC_LX_DECISION);
7039		wr32(E1000_DMACR, 0);
7040	}
7041}
7042
7043/* igb_main.c */
7044