igb_main.c revision ae1c07a6b7ced6c0c94c99e3b53f4e7856fa8bff
1/*******************************************************************************
2
3  Intel(R) Gigabit Ethernet Linux driver
4  Copyright(c) 2007-2012 Intel Corporation.
5
6  This program is free software; you can redistribute it and/or modify it
7  under the terms and conditions of the GNU General Public License,
8  version 2, as published by the Free Software Foundation.
9
10  This program is distributed in the hope it will be useful, but WITHOUT
11  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13  more details.
14
15  You should have received a copy of the GNU General Public License along with
16  this program; if not, write to the Free Software Foundation, Inc.,
17  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18
19  The full GNU General Public License is included in this distribution in
20  the file called "COPYING".
21
22  Contact Information:
23  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
25
26*******************************************************************************/
27
28#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
29
30#include <linux/module.h>
31#include <linux/types.h>
32#include <linux/init.h>
33#include <linux/bitops.h>
34#include <linux/vmalloc.h>
35#include <linux/pagemap.h>
36#include <linux/netdevice.h>
37#include <linux/ipv6.h>
38#include <linux/slab.h>
39#include <net/checksum.h>
40#include <net/ip6_checksum.h>
41#include <linux/net_tstamp.h>
42#include <linux/mii.h>
43#include <linux/ethtool.h>
44#include <linux/if.h>
45#include <linux/if_vlan.h>
46#include <linux/pci.h>
47#include <linux/pci-aspm.h>
48#include <linux/delay.h>
49#include <linux/interrupt.h>
50#include <linux/ip.h>
51#include <linux/tcp.h>
52#include <linux/sctp.h>
53#include <linux/if_ether.h>
54#include <linux/aer.h>
55#include <linux/prefetch.h>
56#include <linux/pm_runtime.h>
57#ifdef CONFIG_IGB_DCA
58#include <linux/dca.h>
59#endif
60#include "igb.h"
61
62#define MAJ 4
63#define MIN 0
64#define BUILD 1
65#define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \
66__stringify(BUILD) "-k"
67char igb_driver_name[] = "igb";
68char igb_driver_version[] = DRV_VERSION;
69static const char igb_driver_string[] =
70				"Intel(R) Gigabit Ethernet Network Driver";
71static const char igb_copyright[] = "Copyright (c) 2007-2012 Intel Corporation.";
72
73static const struct e1000_info *igb_info_tbl[] = {
74	[board_82575] = &e1000_82575_info,
75};
76
77static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
78	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I211_COPPER), board_82575 },
79	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_COPPER), board_82575 },
80	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_FIBER), board_82575 },
81	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_SERDES), board_82575 },
82	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_SGMII), board_82575 },
83	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER), board_82575 },
84	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER), board_82575 },
85	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES), board_82575 },
86	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII), board_82575 },
87	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
88	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
89	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_QUAD_FIBER), board_82575 },
90	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
91	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
92	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
93	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SGMII), board_82575 },
94	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SERDES), board_82575 },
95	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_BACKPLANE), board_82575 },
96	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SFP), board_82575 },
97	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
98	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
99	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
100	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
101	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
102	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
103	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2), board_82575 },
104	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
105	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
106	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
107	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
108	/* required last entry */
109	{0, }
110};
111
112MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
113
114void igb_reset(struct igb_adapter *);
115static int igb_setup_all_tx_resources(struct igb_adapter *);
116static int igb_setup_all_rx_resources(struct igb_adapter *);
117static void igb_free_all_tx_resources(struct igb_adapter *);
118static void igb_free_all_rx_resources(struct igb_adapter *);
119static void igb_setup_mrqc(struct igb_adapter *);
120static int igb_probe(struct pci_dev *, const struct pci_device_id *);
121static void __devexit igb_remove(struct pci_dev *pdev);
122static int igb_sw_init(struct igb_adapter *);
123static int igb_open(struct net_device *);
124static int igb_close(struct net_device *);
125static void igb_configure_tx(struct igb_adapter *);
126static void igb_configure_rx(struct igb_adapter *);
127static void igb_clean_all_tx_rings(struct igb_adapter *);
128static void igb_clean_all_rx_rings(struct igb_adapter *);
129static void igb_clean_tx_ring(struct igb_ring *);
130static void igb_clean_rx_ring(struct igb_ring *);
131static void igb_set_rx_mode(struct net_device *);
132static void igb_update_phy_info(unsigned long);
133static void igb_watchdog(unsigned long);
134static void igb_watchdog_task(struct work_struct *);
135static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, struct net_device *);
136static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *dev,
137						 struct rtnl_link_stats64 *stats);
138static int igb_change_mtu(struct net_device *, int);
139static int igb_set_mac(struct net_device *, void *);
140static void igb_set_uta(struct igb_adapter *adapter);
141static irqreturn_t igb_intr(int irq, void *);
142static irqreturn_t igb_intr_msi(int irq, void *);
143static irqreturn_t igb_msix_other(int irq, void *);
144static irqreturn_t igb_msix_ring(int irq, void *);
145#ifdef CONFIG_IGB_DCA
146static void igb_update_dca(struct igb_q_vector *);
147static void igb_setup_dca(struct igb_adapter *);
148#endif /* CONFIG_IGB_DCA */
149static int igb_poll(struct napi_struct *, int);
150static bool igb_clean_tx_irq(struct igb_q_vector *);
151static bool igb_clean_rx_irq(struct igb_q_vector *, int);
152static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
153static void igb_tx_timeout(struct net_device *);
154static void igb_reset_task(struct work_struct *);
155static void igb_vlan_mode(struct net_device *netdev, netdev_features_t features);
156static int igb_vlan_rx_add_vid(struct net_device *, u16);
157static int igb_vlan_rx_kill_vid(struct net_device *, u16);
158static void igb_restore_vlan(struct igb_adapter *);
159static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
160static void igb_ping_all_vfs(struct igb_adapter *);
161static void igb_msg_task(struct igb_adapter *);
162static void igb_vmm_control(struct igb_adapter *);
163static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
164static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
165static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
166static int igb_ndo_set_vf_vlan(struct net_device *netdev,
167			       int vf, u16 vlan, u8 qos);
168static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
169static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
170				 struct ifla_vf_info *ivi);
171static void igb_check_vf_rate_limit(struct igb_adapter *);
172
173#ifdef CONFIG_PCI_IOV
174static int igb_vf_configure(struct igb_adapter *adapter, int vf);
175static int igb_find_enabled_vfs(struct igb_adapter *adapter);
176static int igb_check_vf_assignment(struct igb_adapter *adapter);
177#endif
178
179#ifdef CONFIG_PM
180#ifdef CONFIG_PM_SLEEP
181static int igb_suspend(struct device *);
182#endif
183static int igb_resume(struct device *);
184#ifdef CONFIG_PM_RUNTIME
185static int igb_runtime_suspend(struct device *dev);
186static int igb_runtime_resume(struct device *dev);
187static int igb_runtime_idle(struct device *dev);
188#endif
189static const struct dev_pm_ops igb_pm_ops = {
190	SET_SYSTEM_SLEEP_PM_OPS(igb_suspend, igb_resume)
191	SET_RUNTIME_PM_OPS(igb_runtime_suspend, igb_runtime_resume,
192			igb_runtime_idle)
193};
194#endif
195static void igb_shutdown(struct pci_dev *);
196#ifdef CONFIG_IGB_DCA
197static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
198static struct notifier_block dca_notifier = {
199	.notifier_call	= igb_notify_dca,
200	.next		= NULL,
201	.priority	= 0
202};
203#endif
204#ifdef CONFIG_NET_POLL_CONTROLLER
205/* for netdump / net console */
206static void igb_netpoll(struct net_device *);
207#endif
208#ifdef CONFIG_PCI_IOV
209static unsigned int max_vfs = 0;
210module_param(max_vfs, uint, 0);
211MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
212                 "per physical function");
213#endif /* CONFIG_PCI_IOV */
214
215static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
216		     pci_channel_state_t);
217static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
218static void igb_io_resume(struct pci_dev *);
219
220static struct pci_error_handlers igb_err_handler = {
221	.error_detected = igb_io_error_detected,
222	.slot_reset = igb_io_slot_reset,
223	.resume = igb_io_resume,
224};
225
226static void igb_init_dmac(struct igb_adapter *adapter, u32 pba);
227
228static struct pci_driver igb_driver = {
229	.name     = igb_driver_name,
230	.id_table = igb_pci_tbl,
231	.probe    = igb_probe,
232	.remove   = __devexit_p(igb_remove),
233#ifdef CONFIG_PM
234	.driver.pm = &igb_pm_ops,
235#endif
236	.shutdown = igb_shutdown,
237	.err_handler = &igb_err_handler
238};
239
240MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
241MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
242MODULE_LICENSE("GPL");
243MODULE_VERSION(DRV_VERSION);
244
245#define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV|NETIF_MSG_PROBE|NETIF_MSG_LINK)
246static int debug = -1;
247module_param(debug, int, 0);
248MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
249
250struct igb_reg_info {
251	u32 ofs;
252	char *name;
253};
254
255static const struct igb_reg_info igb_reg_info_tbl[] = {
256
257	/* General Registers */
258	{E1000_CTRL, "CTRL"},
259	{E1000_STATUS, "STATUS"},
260	{E1000_CTRL_EXT, "CTRL_EXT"},
261
262	/* Interrupt Registers */
263	{E1000_ICR, "ICR"},
264
265	/* RX Registers */
266	{E1000_RCTL, "RCTL"},
267	{E1000_RDLEN(0), "RDLEN"},
268	{E1000_RDH(0), "RDH"},
269	{E1000_RDT(0), "RDT"},
270	{E1000_RXDCTL(0), "RXDCTL"},
271	{E1000_RDBAL(0), "RDBAL"},
272	{E1000_RDBAH(0), "RDBAH"},
273
274	/* TX Registers */
275	{E1000_TCTL, "TCTL"},
276	{E1000_TDBAL(0), "TDBAL"},
277	{E1000_TDBAH(0), "TDBAH"},
278	{E1000_TDLEN(0), "TDLEN"},
279	{E1000_TDH(0), "TDH"},
280	{E1000_TDT(0), "TDT"},
281	{E1000_TXDCTL(0), "TXDCTL"},
282	{E1000_TDFH, "TDFH"},
283	{E1000_TDFT, "TDFT"},
284	{E1000_TDFHS, "TDFHS"},
285	{E1000_TDFPC, "TDFPC"},
286
287	/* List Terminator */
288	{}
289};
290
291/*
292 * igb_regdump - register printout routine
293 */
294static void igb_regdump(struct e1000_hw *hw, struct igb_reg_info *reginfo)
295{
296	int n = 0;
297	char rname[16];
298	u32 regs[8];
299
300	switch (reginfo->ofs) {
301	case E1000_RDLEN(0):
302		for (n = 0; n < 4; n++)
303			regs[n] = rd32(E1000_RDLEN(n));
304		break;
305	case E1000_RDH(0):
306		for (n = 0; n < 4; n++)
307			regs[n] = rd32(E1000_RDH(n));
308		break;
309	case E1000_RDT(0):
310		for (n = 0; n < 4; n++)
311			regs[n] = rd32(E1000_RDT(n));
312		break;
313	case E1000_RXDCTL(0):
314		for (n = 0; n < 4; n++)
315			regs[n] = rd32(E1000_RXDCTL(n));
316		break;
317	case E1000_RDBAL(0):
318		for (n = 0; n < 4; n++)
319			regs[n] = rd32(E1000_RDBAL(n));
320		break;
321	case E1000_RDBAH(0):
322		for (n = 0; n < 4; n++)
323			regs[n] = rd32(E1000_RDBAH(n));
324		break;
325	case E1000_TDBAL(0):
326		for (n = 0; n < 4; n++)
327			regs[n] = rd32(E1000_RDBAL(n));
328		break;
329	case E1000_TDBAH(0):
330		for (n = 0; n < 4; n++)
331			regs[n] = rd32(E1000_TDBAH(n));
332		break;
333	case E1000_TDLEN(0):
334		for (n = 0; n < 4; n++)
335			regs[n] = rd32(E1000_TDLEN(n));
336		break;
337	case E1000_TDH(0):
338		for (n = 0; n < 4; n++)
339			regs[n] = rd32(E1000_TDH(n));
340		break;
341	case E1000_TDT(0):
342		for (n = 0; n < 4; n++)
343			regs[n] = rd32(E1000_TDT(n));
344		break;
345	case E1000_TXDCTL(0):
346		for (n = 0; n < 4; n++)
347			regs[n] = rd32(E1000_TXDCTL(n));
348		break;
349	default:
350		pr_info("%-15s %08x\n", reginfo->name, rd32(reginfo->ofs));
351		return;
352	}
353
354	snprintf(rname, 16, "%s%s", reginfo->name, "[0-3]");
355	pr_info("%-15s %08x %08x %08x %08x\n", rname, regs[0], regs[1],
356		regs[2], regs[3]);
357}
358
359/*
360 * igb_dump - Print registers, tx-rings and rx-rings
361 */
362static void igb_dump(struct igb_adapter *adapter)
363{
364	struct net_device *netdev = adapter->netdev;
365	struct e1000_hw *hw = &adapter->hw;
366	struct igb_reg_info *reginfo;
367	struct igb_ring *tx_ring;
368	union e1000_adv_tx_desc *tx_desc;
369	struct my_u0 { u64 a; u64 b; } *u0;
370	struct igb_ring *rx_ring;
371	union e1000_adv_rx_desc *rx_desc;
372	u32 staterr;
373	u16 i, n;
374
375	if (!netif_msg_hw(adapter))
376		return;
377
378	/* Print netdevice Info */
379	if (netdev) {
380		dev_info(&adapter->pdev->dev, "Net device Info\n");
381		pr_info("Device Name     state            trans_start      "
382			"last_rx\n");
383		pr_info("%-15s %016lX %016lX %016lX\n", netdev->name,
384			netdev->state, netdev->trans_start, netdev->last_rx);
385	}
386
387	/* Print Registers */
388	dev_info(&adapter->pdev->dev, "Register Dump\n");
389	pr_info(" Register Name   Value\n");
390	for (reginfo = (struct igb_reg_info *)igb_reg_info_tbl;
391	     reginfo->name; reginfo++) {
392		igb_regdump(hw, reginfo);
393	}
394
395	/* Print TX Ring Summary */
396	if (!netdev || !netif_running(netdev))
397		goto exit;
398
399	dev_info(&adapter->pdev->dev, "TX Rings Summary\n");
400	pr_info("Queue [NTU] [NTC] [bi(ntc)->dma  ] leng ntw timestamp\n");
401	for (n = 0; n < adapter->num_tx_queues; n++) {
402		struct igb_tx_buffer *buffer_info;
403		tx_ring = adapter->tx_ring[n];
404		buffer_info = &tx_ring->tx_buffer_info[tx_ring->next_to_clean];
405		pr_info(" %5d %5X %5X %016llX %04X %p %016llX\n",
406			n, tx_ring->next_to_use, tx_ring->next_to_clean,
407			(u64)buffer_info->dma,
408			buffer_info->length,
409			buffer_info->next_to_watch,
410			(u64)buffer_info->time_stamp);
411	}
412
413	/* Print TX Rings */
414	if (!netif_msg_tx_done(adapter))
415		goto rx_ring_summary;
416
417	dev_info(&adapter->pdev->dev, "TX Rings Dump\n");
418
419	/* Transmit Descriptor Formats
420	 *
421	 * Advanced Transmit Descriptor
422	 *   +--------------------------------------------------------------+
423	 * 0 |         Buffer Address [63:0]                                |
424	 *   +--------------------------------------------------------------+
425	 * 8 | PAYLEN  | PORTS  |CC|IDX | STA | DCMD  |DTYP|MAC|RSV| DTALEN |
426	 *   +--------------------------------------------------------------+
427	 *   63      46 45    40 39 38 36 35 32 31   24             15       0
428	 */
429
430	for (n = 0; n < adapter->num_tx_queues; n++) {
431		tx_ring = adapter->tx_ring[n];
432		pr_info("------------------------------------\n");
433		pr_info("TX QUEUE INDEX = %d\n", tx_ring->queue_index);
434		pr_info("------------------------------------\n");
435		pr_info("T [desc]     [address 63:0  ] [PlPOCIStDDM Ln] "
436			"[bi->dma       ] leng  ntw timestamp        "
437			"bi->skb\n");
438
439		for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) {
440			const char *next_desc;
441			struct igb_tx_buffer *buffer_info;
442			tx_desc = IGB_TX_DESC(tx_ring, i);
443			buffer_info = &tx_ring->tx_buffer_info[i];
444			u0 = (struct my_u0 *)tx_desc;
445			if (i == tx_ring->next_to_use &&
446			    i == tx_ring->next_to_clean)
447				next_desc = " NTC/U";
448			else if (i == tx_ring->next_to_use)
449				next_desc = " NTU";
450			else if (i == tx_ring->next_to_clean)
451				next_desc = " NTC";
452			else
453				next_desc = "";
454
455			pr_info("T [0x%03X]    %016llX %016llX %016llX"
456				" %04X  %p %016llX %p%s\n", i,
457				le64_to_cpu(u0->a),
458				le64_to_cpu(u0->b),
459				(u64)buffer_info->dma,
460				buffer_info->length,
461				buffer_info->next_to_watch,
462				(u64)buffer_info->time_stamp,
463				buffer_info->skb, next_desc);
464
465			if (netif_msg_pktdata(adapter) && buffer_info->skb)
466				print_hex_dump(KERN_INFO, "",
467					DUMP_PREFIX_ADDRESS,
468					16, 1, buffer_info->skb->data,
469					buffer_info->length, true);
470		}
471	}
472
473	/* Print RX Rings Summary */
474rx_ring_summary:
475	dev_info(&adapter->pdev->dev, "RX Rings Summary\n");
476	pr_info("Queue [NTU] [NTC]\n");
477	for (n = 0; n < adapter->num_rx_queues; n++) {
478		rx_ring = adapter->rx_ring[n];
479		pr_info(" %5d %5X %5X\n",
480			n, rx_ring->next_to_use, rx_ring->next_to_clean);
481	}
482
483	/* Print RX Rings */
484	if (!netif_msg_rx_status(adapter))
485		goto exit;
486
487	dev_info(&adapter->pdev->dev, "RX Rings Dump\n");
488
489	/* Advanced Receive Descriptor (Read) Format
490	 *    63                                           1        0
491	 *    +-----------------------------------------------------+
492	 *  0 |       Packet Buffer Address [63:1]           |A0/NSE|
493	 *    +----------------------------------------------+------+
494	 *  8 |       Header Buffer Address [63:1]           |  DD  |
495	 *    +-----------------------------------------------------+
496	 *
497	 *
498	 * Advanced Receive Descriptor (Write-Back) Format
499	 *
500	 *   63       48 47    32 31  30      21 20 17 16   4 3     0
501	 *   +------------------------------------------------------+
502	 * 0 | Packet     IP     |SPH| HDR_LEN   | RSV|Packet|  RSS |
503	 *   | Checksum   Ident  |   |           |    | Type | Type |
504	 *   +------------------------------------------------------+
505	 * 8 | VLAN Tag | Length | Extended Error | Extended Status |
506	 *   +------------------------------------------------------+
507	 *   63       48 47    32 31            20 19               0
508	 */
509
510	for (n = 0; n < adapter->num_rx_queues; n++) {
511		rx_ring = adapter->rx_ring[n];
512		pr_info("------------------------------------\n");
513		pr_info("RX QUEUE INDEX = %d\n", rx_ring->queue_index);
514		pr_info("------------------------------------\n");
515		pr_info("R  [desc]      [ PktBuf     A0] [  HeadBuf   DD] "
516			"[bi->dma       ] [bi->skb] <-- Adv Rx Read format\n");
517		pr_info("RWB[desc]      [PcsmIpSHl PtRs] [vl er S cks ln] -----"
518			"----------- [bi->skb] <-- Adv Rx Write-Back format\n");
519
520		for (i = 0; i < rx_ring->count; i++) {
521			const char *next_desc;
522			struct igb_rx_buffer *buffer_info;
523			buffer_info = &rx_ring->rx_buffer_info[i];
524			rx_desc = IGB_RX_DESC(rx_ring, i);
525			u0 = (struct my_u0 *)rx_desc;
526			staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
527
528			if (i == rx_ring->next_to_use)
529				next_desc = " NTU";
530			else if (i == rx_ring->next_to_clean)
531				next_desc = " NTC";
532			else
533				next_desc = "";
534
535			if (staterr & E1000_RXD_STAT_DD) {
536				/* Descriptor Done */
537				pr_info("%s[0x%03X]     %016llX %016llX -------"
538					"--------- %p%s\n", "RWB", i,
539					le64_to_cpu(u0->a),
540					le64_to_cpu(u0->b),
541					buffer_info->skb, next_desc);
542			} else {
543				pr_info("%s[0x%03X]     %016llX %016llX %016llX"
544					" %p%s\n", "R  ", i,
545					le64_to_cpu(u0->a),
546					le64_to_cpu(u0->b),
547					(u64)buffer_info->dma,
548					buffer_info->skb, next_desc);
549
550				if (netif_msg_pktdata(adapter) &&
551				    buffer_info->dma && buffer_info->skb) {
552					print_hex_dump(KERN_INFO, "",
553						  DUMP_PREFIX_ADDRESS,
554						  16, 1, buffer_info->skb->data,
555						  IGB_RX_HDR_LEN, true);
556					print_hex_dump(KERN_INFO, "",
557					  DUMP_PREFIX_ADDRESS,
558					  16, 1,
559					  page_address(buffer_info->page) +
560						      buffer_info->page_offset,
561					  PAGE_SIZE/2, true);
562				}
563			}
564		}
565	}
566
567exit:
568	return;
569}
570
571/**
572 * igb_get_hw_dev - return device
573 * used by hardware layer to print debugging information
574 **/
575struct net_device *igb_get_hw_dev(struct e1000_hw *hw)
576{
577	struct igb_adapter *adapter = hw->back;
578	return adapter->netdev;
579}
580
581/**
582 * igb_init_module - Driver Registration Routine
583 *
584 * igb_init_module is the first routine called when the driver is
585 * loaded. All it does is register with the PCI subsystem.
586 **/
587static int __init igb_init_module(void)
588{
589	int ret;
590	pr_info("%s - version %s\n",
591	       igb_driver_string, igb_driver_version);
592
593	pr_info("%s\n", igb_copyright);
594
595#ifdef CONFIG_IGB_DCA
596	dca_register_notify(&dca_notifier);
597#endif
598	ret = pci_register_driver(&igb_driver);
599	return ret;
600}
601
602module_init(igb_init_module);
603
604/**
605 * igb_exit_module - Driver Exit Cleanup Routine
606 *
607 * igb_exit_module is called just before the driver is removed
608 * from memory.
609 **/
610static void __exit igb_exit_module(void)
611{
612#ifdef CONFIG_IGB_DCA
613	dca_unregister_notify(&dca_notifier);
614#endif
615	pci_unregister_driver(&igb_driver);
616}
617
618module_exit(igb_exit_module);
619
620#define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
621/**
622 * igb_cache_ring_register - Descriptor ring to register mapping
623 * @adapter: board private structure to initialize
624 *
625 * Once we know the feature-set enabled for the device, we'll cache
626 * the register offset the descriptor ring is assigned to.
627 **/
628static void igb_cache_ring_register(struct igb_adapter *adapter)
629{
630	int i = 0, j = 0;
631	u32 rbase_offset = adapter->vfs_allocated_count;
632
633	switch (adapter->hw.mac.type) {
634	case e1000_82576:
635		/* The queues are allocated for virtualization such that VF 0
636		 * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
637		 * In order to avoid collision we start at the first free queue
638		 * and continue consuming queues in the same sequence
639		 */
640		if (adapter->vfs_allocated_count) {
641			for (; i < adapter->rss_queues; i++)
642				adapter->rx_ring[i]->reg_idx = rbase_offset +
643				                               Q_IDX_82576(i);
644		}
645	case e1000_82575:
646	case e1000_82580:
647	case e1000_i350:
648	case e1000_i210:
649	case e1000_i211:
650	default:
651		for (; i < adapter->num_rx_queues; i++)
652			adapter->rx_ring[i]->reg_idx = rbase_offset + i;
653		for (; j < adapter->num_tx_queues; j++)
654			adapter->tx_ring[j]->reg_idx = rbase_offset + j;
655		break;
656	}
657}
658
659static void igb_free_queues(struct igb_adapter *adapter)
660{
661	int i;
662
663	for (i = 0; i < adapter->num_tx_queues; i++) {
664		kfree(adapter->tx_ring[i]);
665		adapter->tx_ring[i] = NULL;
666	}
667	for (i = 0; i < adapter->num_rx_queues; i++) {
668		kfree(adapter->rx_ring[i]);
669		adapter->rx_ring[i] = NULL;
670	}
671	adapter->num_rx_queues = 0;
672	adapter->num_tx_queues = 0;
673}
674
675/**
676 * igb_alloc_queues - Allocate memory for all rings
677 * @adapter: board private structure to initialize
678 *
679 * We allocate one ring per queue at run-time since we don't know the
680 * number of queues at compile-time.
681 **/
682static int igb_alloc_queues(struct igb_adapter *adapter)
683{
684	struct igb_ring *ring;
685	int i;
686	int orig_node = adapter->node;
687
688	for (i = 0; i < adapter->num_tx_queues; i++) {
689		if (orig_node == -1) {
690			int cur_node = next_online_node(adapter->node);
691			if (cur_node == MAX_NUMNODES)
692				cur_node = first_online_node;
693			adapter->node = cur_node;
694		}
695		ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL,
696				    adapter->node);
697		if (!ring)
698			ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
699		if (!ring)
700			goto err;
701		ring->count = adapter->tx_ring_count;
702		ring->queue_index = i;
703		ring->dev = &adapter->pdev->dev;
704		ring->netdev = adapter->netdev;
705		ring->numa_node = adapter->node;
706		/* For 82575, context index must be unique per ring. */
707		if (adapter->hw.mac.type == e1000_82575)
708			set_bit(IGB_RING_FLAG_TX_CTX_IDX, &ring->flags);
709		adapter->tx_ring[i] = ring;
710	}
711	/* Restore the adapter's original node */
712	adapter->node = orig_node;
713
714	for (i = 0; i < adapter->num_rx_queues; i++) {
715		if (orig_node == -1) {
716			int cur_node = next_online_node(adapter->node);
717			if (cur_node == MAX_NUMNODES)
718				cur_node = first_online_node;
719			adapter->node = cur_node;
720		}
721		ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL,
722				    adapter->node);
723		if (!ring)
724			ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
725		if (!ring)
726			goto err;
727		ring->count = adapter->rx_ring_count;
728		ring->queue_index = i;
729		ring->dev = &adapter->pdev->dev;
730		ring->netdev = adapter->netdev;
731		ring->numa_node = adapter->node;
732		/* set flag indicating ring supports SCTP checksum offload */
733		if (adapter->hw.mac.type >= e1000_82576)
734			set_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags);
735
736		/*
737		 * On i350, i210, and i211, loopback VLAN packets
738		 * have the tag byte-swapped.
739		 * */
740		if (adapter->hw.mac.type >= e1000_i350)
741			set_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &ring->flags);
742
743		adapter->rx_ring[i] = ring;
744	}
745	/* Restore the adapter's original node */
746	adapter->node = orig_node;
747
748	igb_cache_ring_register(adapter);
749
750	return 0;
751
752err:
753	/* Restore the adapter's original node */
754	adapter->node = orig_node;
755	igb_free_queues(adapter);
756
757	return -ENOMEM;
758}
759
760/**
761 *  igb_write_ivar - configure ivar for given MSI-X vector
762 *  @hw: pointer to the HW structure
763 *  @msix_vector: vector number we are allocating to a given ring
764 *  @index: row index of IVAR register to write within IVAR table
765 *  @offset: column offset of in IVAR, should be multiple of 8
766 *
767 *  This function is intended to handle the writing of the IVAR register
768 *  for adapters 82576 and newer.  The IVAR table consists of 2 columns,
769 *  each containing an cause allocation for an Rx and Tx ring, and a
770 *  variable number of rows depending on the number of queues supported.
771 **/
772static void igb_write_ivar(struct e1000_hw *hw, int msix_vector,
773			   int index, int offset)
774{
775	u32 ivar = array_rd32(E1000_IVAR0, index);
776
777	/* clear any bits that are currently set */
778	ivar &= ~((u32)0xFF << offset);
779
780	/* write vector and valid bit */
781	ivar |= (msix_vector | E1000_IVAR_VALID) << offset;
782
783	array_wr32(E1000_IVAR0, index, ivar);
784}
785
786#define IGB_N0_QUEUE -1
787static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
788{
789	struct igb_adapter *adapter = q_vector->adapter;
790	struct e1000_hw *hw = &adapter->hw;
791	int rx_queue = IGB_N0_QUEUE;
792	int tx_queue = IGB_N0_QUEUE;
793	u32 msixbm = 0;
794
795	if (q_vector->rx.ring)
796		rx_queue = q_vector->rx.ring->reg_idx;
797	if (q_vector->tx.ring)
798		tx_queue = q_vector->tx.ring->reg_idx;
799
800	switch (hw->mac.type) {
801	case e1000_82575:
802		/* The 82575 assigns vectors using a bitmask, which matches the
803		   bitmask for the EICR/EIMS/EIMC registers.  To assign one
804		   or more queues to a vector, we write the appropriate bits
805		   into the MSIXBM register for that vector. */
806		if (rx_queue > IGB_N0_QUEUE)
807			msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
808		if (tx_queue > IGB_N0_QUEUE)
809			msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
810		if (!adapter->msix_entries && msix_vector == 0)
811			msixbm |= E1000_EIMS_OTHER;
812		array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
813		q_vector->eims_value = msixbm;
814		break;
815	case e1000_82576:
816		/*
817		 * 82576 uses a table that essentially consists of 2 columns
818		 * with 8 rows.  The ordering is column-major so we use the
819		 * lower 3 bits as the row index, and the 4th bit as the
820		 * column offset.
821		 */
822		if (rx_queue > IGB_N0_QUEUE)
823			igb_write_ivar(hw, msix_vector,
824				       rx_queue & 0x7,
825				       (rx_queue & 0x8) << 1);
826		if (tx_queue > IGB_N0_QUEUE)
827			igb_write_ivar(hw, msix_vector,
828				       tx_queue & 0x7,
829				       ((tx_queue & 0x8) << 1) + 8);
830		q_vector->eims_value = 1 << msix_vector;
831		break;
832	case e1000_82580:
833	case e1000_i350:
834	case e1000_i210:
835	case e1000_i211:
836		/*
837		 * On 82580 and newer adapters the scheme is similar to 82576
838		 * however instead of ordering column-major we have things
839		 * ordered row-major.  So we traverse the table by using
840		 * bit 0 as the column offset, and the remaining bits as the
841		 * row index.
842		 */
843		if (rx_queue > IGB_N0_QUEUE)
844			igb_write_ivar(hw, msix_vector,
845				       rx_queue >> 1,
846				       (rx_queue & 0x1) << 4);
847		if (tx_queue > IGB_N0_QUEUE)
848			igb_write_ivar(hw, msix_vector,
849				       tx_queue >> 1,
850				       ((tx_queue & 0x1) << 4) + 8);
851		q_vector->eims_value = 1 << msix_vector;
852		break;
853	default:
854		BUG();
855		break;
856	}
857
858	/* add q_vector eims value to global eims_enable_mask */
859	adapter->eims_enable_mask |= q_vector->eims_value;
860
861	/* configure q_vector to set itr on first interrupt */
862	q_vector->set_itr = 1;
863}
864
865/**
866 * igb_configure_msix - Configure MSI-X hardware
867 *
868 * igb_configure_msix sets up the hardware to properly
869 * generate MSI-X interrupts.
870 **/
871static void igb_configure_msix(struct igb_adapter *adapter)
872{
873	u32 tmp;
874	int i, vector = 0;
875	struct e1000_hw *hw = &adapter->hw;
876
877	adapter->eims_enable_mask = 0;
878
879	/* set vector for other causes, i.e. link changes */
880	switch (hw->mac.type) {
881	case e1000_82575:
882		tmp = rd32(E1000_CTRL_EXT);
883		/* enable MSI-X PBA support*/
884		tmp |= E1000_CTRL_EXT_PBA_CLR;
885
886		/* Auto-Mask interrupts upon ICR read. */
887		tmp |= E1000_CTRL_EXT_EIAME;
888		tmp |= E1000_CTRL_EXT_IRCA;
889
890		wr32(E1000_CTRL_EXT, tmp);
891
892		/* enable msix_other interrupt */
893		array_wr32(E1000_MSIXBM(0), vector++,
894		                      E1000_EIMS_OTHER);
895		adapter->eims_other = E1000_EIMS_OTHER;
896
897		break;
898
899	case e1000_82576:
900	case e1000_82580:
901	case e1000_i350:
902	case e1000_i210:
903	case e1000_i211:
904		/* Turn on MSI-X capability first, or our settings
905		 * won't stick.  And it will take days to debug. */
906		wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
907		                E1000_GPIE_PBA | E1000_GPIE_EIAME |
908		                E1000_GPIE_NSICR);
909
910		/* enable msix_other interrupt */
911		adapter->eims_other = 1 << vector;
912		tmp = (vector++ | E1000_IVAR_VALID) << 8;
913
914		wr32(E1000_IVAR_MISC, tmp);
915		break;
916	default:
917		/* do nothing, since nothing else supports MSI-X */
918		break;
919	} /* switch (hw->mac.type) */
920
921	adapter->eims_enable_mask |= adapter->eims_other;
922
923	for (i = 0; i < adapter->num_q_vectors; i++)
924		igb_assign_vector(adapter->q_vector[i], vector++);
925
926	wrfl();
927}
928
929/**
930 * igb_request_msix - Initialize MSI-X interrupts
931 *
932 * igb_request_msix allocates MSI-X vectors and requests interrupts from the
933 * kernel.
934 **/
935static int igb_request_msix(struct igb_adapter *adapter)
936{
937	struct net_device *netdev = adapter->netdev;
938	struct e1000_hw *hw = &adapter->hw;
939	int i, err = 0, vector = 0;
940
941	err = request_irq(adapter->msix_entries[vector].vector,
942	                  igb_msix_other, 0, netdev->name, adapter);
943	if (err)
944		goto out;
945	vector++;
946
947	for (i = 0; i < adapter->num_q_vectors; i++) {
948		struct igb_q_vector *q_vector = adapter->q_vector[i];
949
950		q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
951
952		if (q_vector->rx.ring && q_vector->tx.ring)
953			sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
954				q_vector->rx.ring->queue_index);
955		else if (q_vector->tx.ring)
956			sprintf(q_vector->name, "%s-tx-%u", netdev->name,
957				q_vector->tx.ring->queue_index);
958		else if (q_vector->rx.ring)
959			sprintf(q_vector->name, "%s-rx-%u", netdev->name,
960				q_vector->rx.ring->queue_index);
961		else
962			sprintf(q_vector->name, "%s-unused", netdev->name);
963
964		err = request_irq(adapter->msix_entries[vector].vector,
965		                  igb_msix_ring, 0, q_vector->name,
966		                  q_vector);
967		if (err)
968			goto out;
969		vector++;
970	}
971
972	igb_configure_msix(adapter);
973	return 0;
974out:
975	return err;
976}
977
978static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
979{
980	if (adapter->msix_entries) {
981		pci_disable_msix(adapter->pdev);
982		kfree(adapter->msix_entries);
983		adapter->msix_entries = NULL;
984	} else if (adapter->flags & IGB_FLAG_HAS_MSI) {
985		pci_disable_msi(adapter->pdev);
986	}
987}
988
989/**
990 * igb_free_q_vectors - Free memory allocated for interrupt vectors
991 * @adapter: board private structure to initialize
992 *
993 * This function frees the memory allocated to the q_vectors.  In addition if
994 * NAPI is enabled it will delete any references to the NAPI struct prior
995 * to freeing the q_vector.
996 **/
997static void igb_free_q_vectors(struct igb_adapter *adapter)
998{
999	int v_idx;
1000
1001	for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1002		struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1003		adapter->q_vector[v_idx] = NULL;
1004		if (!q_vector)
1005			continue;
1006		netif_napi_del(&q_vector->napi);
1007		kfree(q_vector);
1008	}
1009	adapter->num_q_vectors = 0;
1010}
1011
1012/**
1013 * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
1014 *
1015 * This function resets the device so that it has 0 rx queues, tx queues, and
1016 * MSI-X interrupts allocated.
1017 */
1018static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
1019{
1020	igb_free_queues(adapter);
1021	igb_free_q_vectors(adapter);
1022	igb_reset_interrupt_capability(adapter);
1023}
1024
1025/**
1026 * igb_set_interrupt_capability - set MSI or MSI-X if supported
1027 *
1028 * Attempt to configure interrupts using the best available
1029 * capabilities of the hardware and kernel.
1030 **/
1031static int igb_set_interrupt_capability(struct igb_adapter *adapter)
1032{
1033	int err;
1034	int numvecs, i;
1035
1036	/* Number of supported queues. */
1037	adapter->num_rx_queues = adapter->rss_queues;
1038	if (adapter->vfs_allocated_count)
1039		adapter->num_tx_queues = 1;
1040	else
1041		adapter->num_tx_queues = adapter->rss_queues;
1042
1043	/* start with one vector for every rx queue */
1044	numvecs = adapter->num_rx_queues;
1045
1046	/* if tx handler is separate add 1 for every tx queue */
1047	if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
1048		numvecs += adapter->num_tx_queues;
1049
1050	/* store the number of vectors reserved for queues */
1051	adapter->num_q_vectors = numvecs;
1052
1053	/* add 1 vector for link status interrupts */
1054	numvecs++;
1055	adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
1056					GFP_KERNEL);
1057
1058	if (!adapter->msix_entries)
1059		goto msi_only;
1060
1061	for (i = 0; i < numvecs; i++)
1062		adapter->msix_entries[i].entry = i;
1063
1064	err = pci_enable_msix(adapter->pdev,
1065			      adapter->msix_entries,
1066			      numvecs);
1067	if (err == 0)
1068		goto out;
1069
1070	igb_reset_interrupt_capability(adapter);
1071
1072	/* If we can't do MSI-X, try MSI */
1073msi_only:
1074#ifdef CONFIG_PCI_IOV
1075	/* disable SR-IOV for non MSI-X configurations */
1076	if (adapter->vf_data) {
1077		struct e1000_hw *hw = &adapter->hw;
1078		/* disable iov and allow time for transactions to clear */
1079		pci_disable_sriov(adapter->pdev);
1080		msleep(500);
1081
1082		kfree(adapter->vf_data);
1083		adapter->vf_data = NULL;
1084		wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1085		wrfl();
1086		msleep(100);
1087		dev_info(&adapter->pdev->dev, "IOV Disabled\n");
1088	}
1089#endif
1090	adapter->vfs_allocated_count = 0;
1091	adapter->rss_queues = 1;
1092	adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1093	adapter->num_rx_queues = 1;
1094	adapter->num_tx_queues = 1;
1095	adapter->num_q_vectors = 1;
1096	if (!pci_enable_msi(adapter->pdev))
1097		adapter->flags |= IGB_FLAG_HAS_MSI;
1098out:
1099	/* Notify the stack of the (possibly) reduced queue counts. */
1100	rtnl_lock();
1101	netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues);
1102	err = netif_set_real_num_rx_queues(adapter->netdev,
1103		adapter->num_rx_queues);
1104	rtnl_unlock();
1105	return err;
1106}
1107
1108/**
1109 * igb_alloc_q_vectors - Allocate memory for interrupt vectors
1110 * @adapter: board private structure to initialize
1111 *
1112 * We allocate one q_vector per queue interrupt.  If allocation fails we
1113 * return -ENOMEM.
1114 **/
1115static int igb_alloc_q_vectors(struct igb_adapter *adapter)
1116{
1117	struct igb_q_vector *q_vector;
1118	struct e1000_hw *hw = &adapter->hw;
1119	int v_idx;
1120	int orig_node = adapter->node;
1121
1122	for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1123		if ((adapter->num_q_vectors == (adapter->num_rx_queues +
1124						adapter->num_tx_queues)) &&
1125		    (adapter->num_rx_queues == v_idx))
1126			adapter->node = orig_node;
1127		if (orig_node == -1) {
1128			int cur_node = next_online_node(adapter->node);
1129			if (cur_node == MAX_NUMNODES)
1130				cur_node = first_online_node;
1131			adapter->node = cur_node;
1132		}
1133		q_vector = kzalloc_node(sizeof(struct igb_q_vector), GFP_KERNEL,
1134					adapter->node);
1135		if (!q_vector)
1136			q_vector = kzalloc(sizeof(struct igb_q_vector),
1137					   GFP_KERNEL);
1138		if (!q_vector)
1139			goto err_out;
1140		q_vector->adapter = adapter;
1141		q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
1142		q_vector->itr_val = IGB_START_ITR;
1143		netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
1144		adapter->q_vector[v_idx] = q_vector;
1145	}
1146	/* Restore the adapter's original node */
1147	adapter->node = orig_node;
1148
1149	return 0;
1150
1151err_out:
1152	/* Restore the adapter's original node */
1153	adapter->node = orig_node;
1154	igb_free_q_vectors(adapter);
1155	return -ENOMEM;
1156}
1157
1158static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
1159                                      int ring_idx, int v_idx)
1160{
1161	struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1162
1163	q_vector->rx.ring = adapter->rx_ring[ring_idx];
1164	q_vector->rx.ring->q_vector = q_vector;
1165	q_vector->rx.count++;
1166	q_vector->itr_val = adapter->rx_itr_setting;
1167	if (q_vector->itr_val && q_vector->itr_val <= 3)
1168		q_vector->itr_val = IGB_START_ITR;
1169}
1170
1171static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
1172                                      int ring_idx, int v_idx)
1173{
1174	struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1175
1176	q_vector->tx.ring = adapter->tx_ring[ring_idx];
1177	q_vector->tx.ring->q_vector = q_vector;
1178	q_vector->tx.count++;
1179	q_vector->itr_val = adapter->tx_itr_setting;
1180	q_vector->tx.work_limit = adapter->tx_work_limit;
1181	if (q_vector->itr_val && q_vector->itr_val <= 3)
1182		q_vector->itr_val = IGB_START_ITR;
1183}
1184
1185/**
1186 * igb_map_ring_to_vector - maps allocated queues to vectors
1187 *
1188 * This function maps the recently allocated queues to vectors.
1189 **/
1190static int igb_map_ring_to_vector(struct igb_adapter *adapter)
1191{
1192	int i;
1193	int v_idx = 0;
1194
1195	if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
1196	    (adapter->num_q_vectors < adapter->num_tx_queues))
1197		return -ENOMEM;
1198
1199	if (adapter->num_q_vectors >=
1200	    (adapter->num_rx_queues + adapter->num_tx_queues)) {
1201		for (i = 0; i < adapter->num_rx_queues; i++)
1202			igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1203		for (i = 0; i < adapter->num_tx_queues; i++)
1204			igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1205	} else {
1206		for (i = 0; i < adapter->num_rx_queues; i++) {
1207			if (i < adapter->num_tx_queues)
1208				igb_map_tx_ring_to_vector(adapter, i, v_idx);
1209			igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1210		}
1211		for (; i < adapter->num_tx_queues; i++)
1212			igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1213	}
1214	return 0;
1215}
1216
1217/**
1218 * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
1219 *
1220 * This function initializes the interrupts and allocates all of the queues.
1221 **/
1222static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
1223{
1224	struct pci_dev *pdev = adapter->pdev;
1225	int err;
1226
1227	err = igb_set_interrupt_capability(adapter);
1228	if (err)
1229		return err;
1230
1231	err = igb_alloc_q_vectors(adapter);
1232	if (err) {
1233		dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
1234		goto err_alloc_q_vectors;
1235	}
1236
1237	err = igb_alloc_queues(adapter);
1238	if (err) {
1239		dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1240		goto err_alloc_queues;
1241	}
1242
1243	err = igb_map_ring_to_vector(adapter);
1244	if (err) {
1245		dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
1246		goto err_map_queues;
1247	}
1248
1249
1250	return 0;
1251err_map_queues:
1252	igb_free_queues(adapter);
1253err_alloc_queues:
1254	igb_free_q_vectors(adapter);
1255err_alloc_q_vectors:
1256	igb_reset_interrupt_capability(adapter);
1257	return err;
1258}
1259
1260/**
1261 * igb_request_irq - initialize interrupts
1262 *
1263 * Attempts to configure interrupts using the best available
1264 * capabilities of the hardware and kernel.
1265 **/
1266static int igb_request_irq(struct igb_adapter *adapter)
1267{
1268	struct net_device *netdev = adapter->netdev;
1269	struct pci_dev *pdev = adapter->pdev;
1270	int err = 0;
1271
1272	if (adapter->msix_entries) {
1273		err = igb_request_msix(adapter);
1274		if (!err)
1275			goto request_done;
1276		/* fall back to MSI */
1277		igb_clear_interrupt_scheme(adapter);
1278		if (!pci_enable_msi(pdev))
1279			adapter->flags |= IGB_FLAG_HAS_MSI;
1280		igb_free_all_tx_resources(adapter);
1281		igb_free_all_rx_resources(adapter);
1282		adapter->num_tx_queues = 1;
1283		adapter->num_rx_queues = 1;
1284		adapter->num_q_vectors = 1;
1285		err = igb_alloc_q_vectors(adapter);
1286		if (err) {
1287			dev_err(&pdev->dev,
1288			        "Unable to allocate memory for vectors\n");
1289			goto request_done;
1290		}
1291		err = igb_alloc_queues(adapter);
1292		if (err) {
1293			dev_err(&pdev->dev,
1294			        "Unable to allocate memory for queues\n");
1295			igb_free_q_vectors(adapter);
1296			goto request_done;
1297		}
1298		igb_setup_all_tx_resources(adapter);
1299		igb_setup_all_rx_resources(adapter);
1300	}
1301
1302	igb_assign_vector(adapter->q_vector[0], 0);
1303
1304	if (adapter->flags & IGB_FLAG_HAS_MSI) {
1305		err = request_irq(pdev->irq, igb_intr_msi, 0,
1306				  netdev->name, adapter);
1307		if (!err)
1308			goto request_done;
1309
1310		/* fall back to legacy interrupts */
1311		igb_reset_interrupt_capability(adapter);
1312		adapter->flags &= ~IGB_FLAG_HAS_MSI;
1313	}
1314
1315	err = request_irq(pdev->irq, igb_intr, IRQF_SHARED,
1316			  netdev->name, adapter);
1317
1318	if (err)
1319		dev_err(&pdev->dev, "Error %d getting interrupt\n",
1320			err);
1321
1322request_done:
1323	return err;
1324}
1325
1326static void igb_free_irq(struct igb_adapter *adapter)
1327{
1328	if (adapter->msix_entries) {
1329		int vector = 0, i;
1330
1331		free_irq(adapter->msix_entries[vector++].vector, adapter);
1332
1333		for (i = 0; i < adapter->num_q_vectors; i++)
1334			free_irq(adapter->msix_entries[vector++].vector,
1335				 adapter->q_vector[i]);
1336	} else {
1337		free_irq(adapter->pdev->irq, adapter);
1338	}
1339}
1340
1341/**
1342 * igb_irq_disable - Mask off interrupt generation on the NIC
1343 * @adapter: board private structure
1344 **/
1345static void igb_irq_disable(struct igb_adapter *adapter)
1346{
1347	struct e1000_hw *hw = &adapter->hw;
1348
1349	/*
1350	 * we need to be careful when disabling interrupts.  The VFs are also
1351	 * mapped into these registers and so clearing the bits can cause
1352	 * issues on the VF drivers so we only need to clear what we set
1353	 */
1354	if (adapter->msix_entries) {
1355		u32 regval = rd32(E1000_EIAM);
1356		wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
1357		wr32(E1000_EIMC, adapter->eims_enable_mask);
1358		regval = rd32(E1000_EIAC);
1359		wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
1360	}
1361
1362	wr32(E1000_IAM, 0);
1363	wr32(E1000_IMC, ~0);
1364	wrfl();
1365	if (adapter->msix_entries) {
1366		int i;
1367		for (i = 0; i < adapter->num_q_vectors; i++)
1368			synchronize_irq(adapter->msix_entries[i].vector);
1369	} else {
1370		synchronize_irq(adapter->pdev->irq);
1371	}
1372}
1373
1374/**
1375 * igb_irq_enable - Enable default interrupt generation settings
1376 * @adapter: board private structure
1377 **/
1378static void igb_irq_enable(struct igb_adapter *adapter)
1379{
1380	struct e1000_hw *hw = &adapter->hw;
1381
1382	if (adapter->msix_entries) {
1383		u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC | E1000_IMS_DRSTA;
1384		u32 regval = rd32(E1000_EIAC);
1385		wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
1386		regval = rd32(E1000_EIAM);
1387		wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
1388		wr32(E1000_EIMS, adapter->eims_enable_mask);
1389		if (adapter->vfs_allocated_count) {
1390			wr32(E1000_MBVFIMR, 0xFF);
1391			ims |= E1000_IMS_VMMB;
1392		}
1393		wr32(E1000_IMS, ims);
1394	} else {
1395		wr32(E1000_IMS, IMS_ENABLE_MASK |
1396				E1000_IMS_DRSTA);
1397		wr32(E1000_IAM, IMS_ENABLE_MASK |
1398				E1000_IMS_DRSTA);
1399	}
1400}
1401
1402static void igb_update_mng_vlan(struct igb_adapter *adapter)
1403{
1404	struct e1000_hw *hw = &adapter->hw;
1405	u16 vid = adapter->hw.mng_cookie.vlan_id;
1406	u16 old_vid = adapter->mng_vlan_id;
1407
1408	if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1409		/* add VID to filter table */
1410		igb_vfta_set(hw, vid, true);
1411		adapter->mng_vlan_id = vid;
1412	} else {
1413		adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1414	}
1415
1416	if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1417	    (vid != old_vid) &&
1418	    !test_bit(old_vid, adapter->active_vlans)) {
1419		/* remove VID from filter table */
1420		igb_vfta_set(hw, old_vid, false);
1421	}
1422}
1423
1424/**
1425 * igb_release_hw_control - release control of the h/w to f/w
1426 * @adapter: address of board private structure
1427 *
1428 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1429 * For ASF and Pass Through versions of f/w this means that the
1430 * driver is no longer loaded.
1431 *
1432 **/
1433static void igb_release_hw_control(struct igb_adapter *adapter)
1434{
1435	struct e1000_hw *hw = &adapter->hw;
1436	u32 ctrl_ext;
1437
1438	/* Let firmware take over control of h/w */
1439	ctrl_ext = rd32(E1000_CTRL_EXT);
1440	wr32(E1000_CTRL_EXT,
1441			ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1442}
1443
1444/**
1445 * igb_get_hw_control - get control of the h/w from f/w
1446 * @adapter: address of board private structure
1447 *
1448 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1449 * For ASF and Pass Through versions of f/w this means that
1450 * the driver is loaded.
1451 *
1452 **/
1453static void igb_get_hw_control(struct igb_adapter *adapter)
1454{
1455	struct e1000_hw *hw = &adapter->hw;
1456	u32 ctrl_ext;
1457
1458	/* Let firmware know the driver has taken over */
1459	ctrl_ext = rd32(E1000_CTRL_EXT);
1460	wr32(E1000_CTRL_EXT,
1461			ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1462}
1463
1464/**
1465 * igb_configure - configure the hardware for RX and TX
1466 * @adapter: private board structure
1467 **/
1468static void igb_configure(struct igb_adapter *adapter)
1469{
1470	struct net_device *netdev = adapter->netdev;
1471	int i;
1472
1473	igb_get_hw_control(adapter);
1474	igb_set_rx_mode(netdev);
1475
1476	igb_restore_vlan(adapter);
1477
1478	igb_setup_tctl(adapter);
1479	igb_setup_mrqc(adapter);
1480	igb_setup_rctl(adapter);
1481
1482	igb_configure_tx(adapter);
1483	igb_configure_rx(adapter);
1484
1485	igb_rx_fifo_flush_82575(&adapter->hw);
1486
1487	/* call igb_desc_unused which always leaves
1488	 * at least 1 descriptor unused to make sure
1489	 * next_to_use != next_to_clean */
1490	for (i = 0; i < adapter->num_rx_queues; i++) {
1491		struct igb_ring *ring = adapter->rx_ring[i];
1492		igb_alloc_rx_buffers(ring, igb_desc_unused(ring));
1493	}
1494}
1495
1496/**
1497 * igb_power_up_link - Power up the phy/serdes link
1498 * @adapter: address of board private structure
1499 **/
1500void igb_power_up_link(struct igb_adapter *adapter)
1501{
1502	igb_reset_phy(&adapter->hw);
1503
1504	if (adapter->hw.phy.media_type == e1000_media_type_copper)
1505		igb_power_up_phy_copper(&adapter->hw);
1506	else
1507		igb_power_up_serdes_link_82575(&adapter->hw);
1508}
1509
1510/**
1511 * igb_power_down_link - Power down the phy/serdes link
1512 * @adapter: address of board private structure
1513 */
1514static void igb_power_down_link(struct igb_adapter *adapter)
1515{
1516	if (adapter->hw.phy.media_type == e1000_media_type_copper)
1517		igb_power_down_phy_copper_82575(&adapter->hw);
1518	else
1519		igb_shutdown_serdes_link_82575(&adapter->hw);
1520}
1521
1522/**
1523 * igb_up - Open the interface and prepare it to handle traffic
1524 * @adapter: board private structure
1525 **/
1526int igb_up(struct igb_adapter *adapter)
1527{
1528	struct e1000_hw *hw = &adapter->hw;
1529	int i;
1530
1531	/* hardware has been reset, we need to reload some things */
1532	igb_configure(adapter);
1533
1534	clear_bit(__IGB_DOWN, &adapter->state);
1535
1536	for (i = 0; i < adapter->num_q_vectors; i++)
1537		napi_enable(&(adapter->q_vector[i]->napi));
1538
1539	if (adapter->msix_entries)
1540		igb_configure_msix(adapter);
1541	else
1542		igb_assign_vector(adapter->q_vector[0], 0);
1543
1544	/* Clear any pending interrupts. */
1545	rd32(E1000_ICR);
1546	igb_irq_enable(adapter);
1547
1548	/* notify VFs that reset has been completed */
1549	if (adapter->vfs_allocated_count) {
1550		u32 reg_data = rd32(E1000_CTRL_EXT);
1551		reg_data |= E1000_CTRL_EXT_PFRSTD;
1552		wr32(E1000_CTRL_EXT, reg_data);
1553	}
1554
1555	netif_tx_start_all_queues(adapter->netdev);
1556
1557	/* start the watchdog. */
1558	hw->mac.get_link_status = 1;
1559	schedule_work(&adapter->watchdog_task);
1560
1561	return 0;
1562}
1563
1564void igb_down(struct igb_adapter *adapter)
1565{
1566	struct net_device *netdev = adapter->netdev;
1567	struct e1000_hw *hw = &adapter->hw;
1568	u32 tctl, rctl;
1569	int i;
1570
1571	/* signal that we're down so the interrupt handler does not
1572	 * reschedule our watchdog timer */
1573	set_bit(__IGB_DOWN, &adapter->state);
1574
1575	/* disable receives in the hardware */
1576	rctl = rd32(E1000_RCTL);
1577	wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1578	/* flush and sleep below */
1579
1580	netif_tx_stop_all_queues(netdev);
1581
1582	/* disable transmits in the hardware */
1583	tctl = rd32(E1000_TCTL);
1584	tctl &= ~E1000_TCTL_EN;
1585	wr32(E1000_TCTL, tctl);
1586	/* flush both disables and wait for them to finish */
1587	wrfl();
1588	msleep(10);
1589
1590	for (i = 0; i < adapter->num_q_vectors; i++)
1591		napi_disable(&(adapter->q_vector[i]->napi));
1592
1593	igb_irq_disable(adapter);
1594
1595	del_timer_sync(&adapter->watchdog_timer);
1596	del_timer_sync(&adapter->phy_info_timer);
1597
1598	netif_carrier_off(netdev);
1599
1600	/* record the stats before reset*/
1601	spin_lock(&adapter->stats64_lock);
1602	igb_update_stats(adapter, &adapter->stats64);
1603	spin_unlock(&adapter->stats64_lock);
1604
1605	adapter->link_speed = 0;
1606	adapter->link_duplex = 0;
1607
1608	if (!pci_channel_offline(adapter->pdev))
1609		igb_reset(adapter);
1610	igb_clean_all_tx_rings(adapter);
1611	igb_clean_all_rx_rings(adapter);
1612#ifdef CONFIG_IGB_DCA
1613
1614	/* since we reset the hardware DCA settings were cleared */
1615	igb_setup_dca(adapter);
1616#endif
1617}
1618
1619void igb_reinit_locked(struct igb_adapter *adapter)
1620{
1621	WARN_ON(in_interrupt());
1622	while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1623		msleep(1);
1624	igb_down(adapter);
1625	igb_up(adapter);
1626	clear_bit(__IGB_RESETTING, &adapter->state);
1627}
1628
1629void igb_reset(struct igb_adapter *adapter)
1630{
1631	struct pci_dev *pdev = adapter->pdev;
1632	struct e1000_hw *hw = &adapter->hw;
1633	struct e1000_mac_info *mac = &hw->mac;
1634	struct e1000_fc_info *fc = &hw->fc;
1635	u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1636	u16 hwm;
1637
1638	/* Repartition Pba for greater than 9k mtu
1639	 * To take effect CTRL.RST is required.
1640	 */
1641	switch (mac->type) {
1642	case e1000_i350:
1643	case e1000_82580:
1644		pba = rd32(E1000_RXPBS);
1645		pba = igb_rxpbs_adjust_82580(pba);
1646		break;
1647	case e1000_82576:
1648		pba = rd32(E1000_RXPBS);
1649		pba &= E1000_RXPBS_SIZE_MASK_82576;
1650		break;
1651	case e1000_82575:
1652	case e1000_i210:
1653	case e1000_i211:
1654	default:
1655		pba = E1000_PBA_34K;
1656		break;
1657	}
1658
1659	if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1660	    (mac->type < e1000_82576)) {
1661		/* adjust PBA for jumbo frames */
1662		wr32(E1000_PBA, pba);
1663
1664		/* To maintain wire speed transmits, the Tx FIFO should be
1665		 * large enough to accommodate two full transmit packets,
1666		 * rounded up to the next 1KB and expressed in KB.  Likewise,
1667		 * the Rx FIFO should be large enough to accommodate at least
1668		 * one full receive packet and is similarly rounded up and
1669		 * expressed in KB. */
1670		pba = rd32(E1000_PBA);
1671		/* upper 16 bits has Tx packet buffer allocation size in KB */
1672		tx_space = pba >> 16;
1673		/* lower 16 bits has Rx packet buffer allocation size in KB */
1674		pba &= 0xffff;
1675		/* the tx fifo also stores 16 bytes of information about the tx
1676		 * but don't include ethernet FCS because hardware appends it */
1677		min_tx_space = (adapter->max_frame_size +
1678				sizeof(union e1000_adv_tx_desc) -
1679				ETH_FCS_LEN) * 2;
1680		min_tx_space = ALIGN(min_tx_space, 1024);
1681		min_tx_space >>= 10;
1682		/* software strips receive CRC, so leave room for it */
1683		min_rx_space = adapter->max_frame_size;
1684		min_rx_space = ALIGN(min_rx_space, 1024);
1685		min_rx_space >>= 10;
1686
1687		/* If current Tx allocation is less than the min Tx FIFO size,
1688		 * and the min Tx FIFO size is less than the current Rx FIFO
1689		 * allocation, take space away from current Rx allocation */
1690		if (tx_space < min_tx_space &&
1691		    ((min_tx_space - tx_space) < pba)) {
1692			pba = pba - (min_tx_space - tx_space);
1693
1694			/* if short on rx space, rx wins and must trump tx
1695			 * adjustment */
1696			if (pba < min_rx_space)
1697				pba = min_rx_space;
1698		}
1699		wr32(E1000_PBA, pba);
1700	}
1701
1702	/* flow control settings */
1703	/* The high water mark must be low enough to fit one full frame
1704	 * (or the size used for early receive) above it in the Rx FIFO.
1705	 * Set it to the lower of:
1706	 * - 90% of the Rx FIFO size, or
1707	 * - the full Rx FIFO size minus one full frame */
1708	hwm = min(((pba << 10) * 9 / 10),
1709			((pba << 10) - 2 * adapter->max_frame_size));
1710
1711	fc->high_water = hwm & 0xFFF0;	/* 16-byte granularity */
1712	fc->low_water = fc->high_water - 16;
1713	fc->pause_time = 0xFFFF;
1714	fc->send_xon = 1;
1715	fc->current_mode = fc->requested_mode;
1716
1717	/* disable receive for all VFs and wait one second */
1718	if (adapter->vfs_allocated_count) {
1719		int i;
1720		for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1721			adapter->vf_data[i].flags &= IGB_VF_FLAG_PF_SET_MAC;
1722
1723		/* ping all the active vfs to let them know we are going down */
1724		igb_ping_all_vfs(adapter);
1725
1726		/* disable transmits and receives */
1727		wr32(E1000_VFRE, 0);
1728		wr32(E1000_VFTE, 0);
1729	}
1730
1731	/* Allow time for pending master requests to run */
1732	hw->mac.ops.reset_hw(hw);
1733	wr32(E1000_WUC, 0);
1734
1735	if (hw->mac.ops.init_hw(hw))
1736		dev_err(&pdev->dev, "Hardware Error\n");
1737
1738	/*
1739	 * Flow control settings reset on hardware reset, so guarantee flow
1740	 * control is off when forcing speed.
1741	 */
1742	if (!hw->mac.autoneg)
1743		igb_force_mac_fc(hw);
1744
1745	igb_init_dmac(adapter, pba);
1746	if (!netif_running(adapter->netdev))
1747		igb_power_down_link(adapter);
1748
1749	igb_update_mng_vlan(adapter);
1750
1751	/* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1752	wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1753
1754#ifdef CONFIG_IGB_PTP
1755	/* Re-enable PTP, where applicable. */
1756	igb_ptp_reset(adapter);
1757#endif /* CONFIG_IGB_PTP */
1758
1759	igb_get_phy_info(hw);
1760}
1761
1762static netdev_features_t igb_fix_features(struct net_device *netdev,
1763	netdev_features_t features)
1764{
1765	/*
1766	 * Since there is no support for separate rx/tx vlan accel
1767	 * enable/disable make sure tx flag is always in same state as rx.
1768	 */
1769	if (features & NETIF_F_HW_VLAN_RX)
1770		features |= NETIF_F_HW_VLAN_TX;
1771	else
1772		features &= ~NETIF_F_HW_VLAN_TX;
1773
1774	return features;
1775}
1776
1777static int igb_set_features(struct net_device *netdev,
1778	netdev_features_t features)
1779{
1780	netdev_features_t changed = netdev->features ^ features;
1781	struct igb_adapter *adapter = netdev_priv(netdev);
1782
1783	if (changed & NETIF_F_HW_VLAN_RX)
1784		igb_vlan_mode(netdev, features);
1785
1786	if (!(changed & NETIF_F_RXALL))
1787		return 0;
1788
1789	netdev->features = features;
1790
1791	if (netif_running(netdev))
1792		igb_reinit_locked(adapter);
1793	else
1794		igb_reset(adapter);
1795
1796	return 0;
1797}
1798
1799static const struct net_device_ops igb_netdev_ops = {
1800	.ndo_open		= igb_open,
1801	.ndo_stop		= igb_close,
1802	.ndo_start_xmit		= igb_xmit_frame,
1803	.ndo_get_stats64	= igb_get_stats64,
1804	.ndo_set_rx_mode	= igb_set_rx_mode,
1805	.ndo_set_mac_address	= igb_set_mac,
1806	.ndo_change_mtu		= igb_change_mtu,
1807	.ndo_do_ioctl		= igb_ioctl,
1808	.ndo_tx_timeout		= igb_tx_timeout,
1809	.ndo_validate_addr	= eth_validate_addr,
1810	.ndo_vlan_rx_add_vid	= igb_vlan_rx_add_vid,
1811	.ndo_vlan_rx_kill_vid	= igb_vlan_rx_kill_vid,
1812	.ndo_set_vf_mac		= igb_ndo_set_vf_mac,
1813	.ndo_set_vf_vlan	= igb_ndo_set_vf_vlan,
1814	.ndo_set_vf_tx_rate	= igb_ndo_set_vf_bw,
1815	.ndo_get_vf_config	= igb_ndo_get_vf_config,
1816#ifdef CONFIG_NET_POLL_CONTROLLER
1817	.ndo_poll_controller	= igb_netpoll,
1818#endif
1819	.ndo_fix_features	= igb_fix_features,
1820	.ndo_set_features	= igb_set_features,
1821};
1822
1823/**
1824 * igb_set_fw_version - Configure version string for ethtool
1825 * @adapter: adapter struct
1826 *
1827 **/
1828void igb_set_fw_version(struct igb_adapter *adapter)
1829{
1830	struct e1000_hw *hw = &adapter->hw;
1831	u16 eeprom_verh, eeprom_verl, comb_verh, comb_verl, comb_offset;
1832	u16 major, build, patch, fw_version;
1833	u32 etrack_id;
1834
1835	hw->nvm.ops.read(hw, 5, 1, &fw_version);
1836	if (adapter->hw.mac.type != e1000_i211) {
1837		hw->nvm.ops.read(hw, NVM_ETRACK_WORD, 1, &eeprom_verh);
1838		hw->nvm.ops.read(hw, (NVM_ETRACK_WORD + 1), 1, &eeprom_verl);
1839		etrack_id = (eeprom_verh << IGB_ETRACK_SHIFT) | eeprom_verl;
1840
1841		/* combo image version needs to be found */
1842		hw->nvm.ops.read(hw, NVM_COMB_VER_PTR, 1, &comb_offset);
1843		if ((comb_offset != 0x0) &&
1844		    (comb_offset != IGB_NVM_VER_INVALID)) {
1845			hw->nvm.ops.read(hw, (NVM_COMB_VER_OFF + comb_offset
1846					 + 1), 1, &comb_verh);
1847			hw->nvm.ops.read(hw, (NVM_COMB_VER_OFF + comb_offset),
1848					 1, &comb_verl);
1849
1850			/* Only display Option Rom if it exists and is valid */
1851			if ((comb_verh && comb_verl) &&
1852			    ((comb_verh != IGB_NVM_VER_INVALID) &&
1853			     (comb_verl != IGB_NVM_VER_INVALID))) {
1854				major = comb_verl >> IGB_COMB_VER_SHFT;
1855				build = (comb_verl << IGB_COMB_VER_SHFT) |
1856					(comb_verh >> IGB_COMB_VER_SHFT);
1857				patch = comb_verh & IGB_COMB_VER_MASK;
1858				snprintf(adapter->fw_version,
1859					 sizeof(adapter->fw_version),
1860					 "%d.%d%d, 0x%08x, %d.%d.%d",
1861					 (fw_version & IGB_MAJOR_MASK) >>
1862					 IGB_MAJOR_SHIFT,
1863					 (fw_version & IGB_MINOR_MASK) >>
1864					 IGB_MINOR_SHIFT,
1865					 (fw_version & IGB_BUILD_MASK),
1866					 etrack_id, major, build, patch);
1867				goto out;
1868			}
1869		}
1870		snprintf(adapter->fw_version, sizeof(adapter->fw_version),
1871			 "%d.%d%d, 0x%08x",
1872			 (fw_version & IGB_MAJOR_MASK) >> IGB_MAJOR_SHIFT,
1873			 (fw_version & IGB_MINOR_MASK) >> IGB_MINOR_SHIFT,
1874			 (fw_version & IGB_BUILD_MASK), etrack_id);
1875	} else {
1876		snprintf(adapter->fw_version, sizeof(adapter->fw_version),
1877			 "%d.%d%d",
1878			 (fw_version & IGB_MAJOR_MASK) >> IGB_MAJOR_SHIFT,
1879			 (fw_version & IGB_MINOR_MASK) >> IGB_MINOR_SHIFT,
1880			 (fw_version & IGB_BUILD_MASK));
1881	}
1882out:
1883	return;
1884}
1885
1886/**
1887 * igb_probe - Device Initialization Routine
1888 * @pdev: PCI device information struct
1889 * @ent: entry in igb_pci_tbl
1890 *
1891 * Returns 0 on success, negative on failure
1892 *
1893 * igb_probe initializes an adapter identified by a pci_dev structure.
1894 * The OS initialization, configuring of the adapter private structure,
1895 * and a hardware reset occur.
1896 **/
1897static int __devinit igb_probe(struct pci_dev *pdev,
1898			       const struct pci_device_id *ent)
1899{
1900	struct net_device *netdev;
1901	struct igb_adapter *adapter;
1902	struct e1000_hw *hw;
1903	u16 eeprom_data = 0;
1904	s32 ret_val;
1905	static int global_quad_port_a; /* global quad port a indication */
1906	const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1907	unsigned long mmio_start, mmio_len;
1908	int err, pci_using_dac;
1909	u16 eeprom_apme_mask = IGB_EEPROM_APME;
1910	u8 part_str[E1000_PBANUM_LENGTH];
1911
1912	/* Catch broken hardware that put the wrong VF device ID in
1913	 * the PCIe SR-IOV capability.
1914	 */
1915	if (pdev->is_virtfn) {
1916		WARN(1, KERN_ERR "%s (%hx:%hx) should not be a VF!\n",
1917			pci_name(pdev), pdev->vendor, pdev->device);
1918		return -EINVAL;
1919	}
1920
1921	err = pci_enable_device_mem(pdev);
1922	if (err)
1923		return err;
1924
1925	pci_using_dac = 0;
1926	err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
1927	if (!err) {
1928		err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
1929		if (!err)
1930			pci_using_dac = 1;
1931	} else {
1932		err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
1933		if (err) {
1934			err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
1935			if (err) {
1936				dev_err(&pdev->dev, "No usable DMA "
1937					"configuration, aborting\n");
1938				goto err_dma;
1939			}
1940		}
1941	}
1942
1943	err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1944	                                   IORESOURCE_MEM),
1945	                                   igb_driver_name);
1946	if (err)
1947		goto err_pci_reg;
1948
1949	pci_enable_pcie_error_reporting(pdev);
1950
1951	pci_set_master(pdev);
1952	pci_save_state(pdev);
1953
1954	err = -ENOMEM;
1955	netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1956				   IGB_MAX_TX_QUEUES);
1957	if (!netdev)
1958		goto err_alloc_etherdev;
1959
1960	SET_NETDEV_DEV(netdev, &pdev->dev);
1961
1962	pci_set_drvdata(pdev, netdev);
1963	adapter = netdev_priv(netdev);
1964	adapter->netdev = netdev;
1965	adapter->pdev = pdev;
1966	hw = &adapter->hw;
1967	hw->back = adapter;
1968	adapter->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE);
1969
1970	mmio_start = pci_resource_start(pdev, 0);
1971	mmio_len = pci_resource_len(pdev, 0);
1972
1973	err = -EIO;
1974	hw->hw_addr = ioremap(mmio_start, mmio_len);
1975	if (!hw->hw_addr)
1976		goto err_ioremap;
1977
1978	netdev->netdev_ops = &igb_netdev_ops;
1979	igb_set_ethtool_ops(netdev);
1980	netdev->watchdog_timeo = 5 * HZ;
1981
1982	strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1983
1984	netdev->mem_start = mmio_start;
1985	netdev->mem_end = mmio_start + mmio_len;
1986
1987	/* PCI config space info */
1988	hw->vendor_id = pdev->vendor;
1989	hw->device_id = pdev->device;
1990	hw->revision_id = pdev->revision;
1991	hw->subsystem_vendor_id = pdev->subsystem_vendor;
1992	hw->subsystem_device_id = pdev->subsystem_device;
1993
1994	/* Copy the default MAC, PHY and NVM function pointers */
1995	memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1996	memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1997	memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1998	/* Initialize skew-specific constants */
1999	err = ei->get_invariants(hw);
2000	if (err)
2001		goto err_sw_init;
2002
2003	/* setup the private structure */
2004	err = igb_sw_init(adapter);
2005	if (err)
2006		goto err_sw_init;
2007
2008	igb_get_bus_info_pcie(hw);
2009
2010	hw->phy.autoneg_wait_to_complete = false;
2011
2012	/* Copper options */
2013	if (hw->phy.media_type == e1000_media_type_copper) {
2014		hw->phy.mdix = AUTO_ALL_MODES;
2015		hw->phy.disable_polarity_correction = false;
2016		hw->phy.ms_type = e1000_ms_hw_default;
2017	}
2018
2019	if (igb_check_reset_block(hw))
2020		dev_info(&pdev->dev,
2021			"PHY reset is blocked due to SOL/IDER session.\n");
2022
2023	/*
2024	 * features is initialized to 0 in allocation, it might have bits
2025	 * set by igb_sw_init so we should use an or instead of an
2026	 * assignment.
2027	 */
2028	netdev->features |= NETIF_F_SG |
2029			    NETIF_F_IP_CSUM |
2030			    NETIF_F_IPV6_CSUM |
2031			    NETIF_F_TSO |
2032			    NETIF_F_TSO6 |
2033			    NETIF_F_RXHASH |
2034			    NETIF_F_RXCSUM |
2035			    NETIF_F_HW_VLAN_RX |
2036			    NETIF_F_HW_VLAN_TX;
2037
2038	/* copy netdev features into list of user selectable features */
2039	netdev->hw_features |= netdev->features;
2040	netdev->hw_features |= NETIF_F_RXALL;
2041
2042	/* set this bit last since it cannot be part of hw_features */
2043	netdev->features |= NETIF_F_HW_VLAN_FILTER;
2044
2045	netdev->vlan_features |= NETIF_F_TSO |
2046				 NETIF_F_TSO6 |
2047				 NETIF_F_IP_CSUM |
2048				 NETIF_F_IPV6_CSUM |
2049				 NETIF_F_SG;
2050
2051	netdev->priv_flags |= IFF_SUPP_NOFCS;
2052
2053	if (pci_using_dac) {
2054		netdev->features |= NETIF_F_HIGHDMA;
2055		netdev->vlan_features |= NETIF_F_HIGHDMA;
2056	}
2057
2058	if (hw->mac.type >= e1000_82576) {
2059		netdev->hw_features |= NETIF_F_SCTP_CSUM;
2060		netdev->features |= NETIF_F_SCTP_CSUM;
2061	}
2062
2063	netdev->priv_flags |= IFF_UNICAST_FLT;
2064
2065	adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
2066
2067	/* before reading the NVM, reset the controller to put the device in a
2068	 * known good starting state */
2069	hw->mac.ops.reset_hw(hw);
2070
2071	/*
2072	 * make sure the NVM is good , i211 parts have special NVM that
2073	 * doesn't contain a checksum
2074	 */
2075	if (hw->mac.type != e1000_i211) {
2076		if (hw->nvm.ops.validate(hw) < 0) {
2077			dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
2078			err = -EIO;
2079			goto err_eeprom;
2080		}
2081	}
2082
2083	/* copy the MAC address out of the NVM */
2084	if (hw->mac.ops.read_mac_addr(hw))
2085		dev_err(&pdev->dev, "NVM Read Error\n");
2086
2087	memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
2088	memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
2089
2090	if (!is_valid_ether_addr(netdev->perm_addr)) {
2091		dev_err(&pdev->dev, "Invalid MAC Address\n");
2092		err = -EIO;
2093		goto err_eeprom;
2094	}
2095
2096	/* get firmware version for ethtool -i */
2097	igb_set_fw_version(adapter);
2098
2099	setup_timer(&adapter->watchdog_timer, igb_watchdog,
2100	            (unsigned long) adapter);
2101	setup_timer(&adapter->phy_info_timer, igb_update_phy_info,
2102	            (unsigned long) adapter);
2103
2104	INIT_WORK(&adapter->reset_task, igb_reset_task);
2105	INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
2106
2107	/* Initialize link properties that are user-changeable */
2108	adapter->fc_autoneg = true;
2109	hw->mac.autoneg = true;
2110	hw->phy.autoneg_advertised = 0x2f;
2111
2112	hw->fc.requested_mode = e1000_fc_default;
2113	hw->fc.current_mode = e1000_fc_default;
2114
2115	igb_validate_mdi_setting(hw);
2116
2117	/* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
2118	 * enable the ACPI Magic Packet filter
2119	 */
2120
2121	if (hw->bus.func == 0)
2122		hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
2123	else if (hw->mac.type >= e1000_82580)
2124		hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
2125		                 NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
2126		                 &eeprom_data);
2127	else if (hw->bus.func == 1)
2128		hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
2129
2130	if (eeprom_data & eeprom_apme_mask)
2131		adapter->eeprom_wol |= E1000_WUFC_MAG;
2132
2133	/* now that we have the eeprom settings, apply the special cases where
2134	 * the eeprom may be wrong or the board simply won't support wake on
2135	 * lan on a particular port */
2136	switch (pdev->device) {
2137	case E1000_DEV_ID_82575GB_QUAD_COPPER:
2138		adapter->eeprom_wol = 0;
2139		break;
2140	case E1000_DEV_ID_82575EB_FIBER_SERDES:
2141	case E1000_DEV_ID_82576_FIBER:
2142	case E1000_DEV_ID_82576_SERDES:
2143		/* Wake events only supported on port A for dual fiber
2144		 * regardless of eeprom setting */
2145		if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
2146			adapter->eeprom_wol = 0;
2147		break;
2148	case E1000_DEV_ID_82576_QUAD_COPPER:
2149	case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
2150		/* if quad port adapter, disable WoL on all but port A */
2151		if (global_quad_port_a != 0)
2152			adapter->eeprom_wol = 0;
2153		else
2154			adapter->flags |= IGB_FLAG_QUAD_PORT_A;
2155		/* Reset for multiple quad port adapters */
2156		if (++global_quad_port_a == 4)
2157			global_quad_port_a = 0;
2158		break;
2159	}
2160
2161	/* initialize the wol settings based on the eeprom settings */
2162	adapter->wol = adapter->eeprom_wol;
2163	device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
2164
2165	/* reset the hardware with the new settings */
2166	igb_reset(adapter);
2167
2168	/* let the f/w know that the h/w is now under the control of the
2169	 * driver. */
2170	igb_get_hw_control(adapter);
2171
2172	strcpy(netdev->name, "eth%d");
2173	err = register_netdev(netdev);
2174	if (err)
2175		goto err_register;
2176
2177	/* carrier off reporting is important to ethtool even BEFORE open */
2178	netif_carrier_off(netdev);
2179
2180#ifdef CONFIG_IGB_DCA
2181	if (dca_add_requester(&pdev->dev) == 0) {
2182		adapter->flags |= IGB_FLAG_DCA_ENABLED;
2183		dev_info(&pdev->dev, "DCA enabled\n");
2184		igb_setup_dca(adapter);
2185	}
2186
2187#endif
2188
2189#ifdef CONFIG_IGB_PTP
2190	/* do hw tstamp init after resetting */
2191	igb_ptp_init(adapter);
2192#endif /* CONFIG_IGB_PTP */
2193
2194	dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
2195	/* print bus type/speed/width info */
2196	dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
2197		 netdev->name,
2198		 ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
2199		  (hw->bus.speed == e1000_bus_speed_5000) ? "5.0Gb/s" :
2200		                                            "unknown"),
2201		 ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
2202		  (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
2203		  (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
2204		   "unknown"),
2205		 netdev->dev_addr);
2206
2207	ret_val = igb_read_part_string(hw, part_str, E1000_PBANUM_LENGTH);
2208	if (ret_val)
2209		strcpy(part_str, "Unknown");
2210	dev_info(&pdev->dev, "%s: PBA No: %s\n", netdev->name, part_str);
2211	dev_info(&pdev->dev,
2212		"Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
2213		adapter->msix_entries ? "MSI-X" :
2214		(adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
2215		adapter->num_rx_queues, adapter->num_tx_queues);
2216	switch (hw->mac.type) {
2217	case e1000_i350:
2218	case e1000_i210:
2219	case e1000_i211:
2220		igb_set_eee_i350(hw);
2221		break;
2222	default:
2223		break;
2224	}
2225
2226	pm_runtime_put_noidle(&pdev->dev);
2227	return 0;
2228
2229err_register:
2230	igb_release_hw_control(adapter);
2231err_eeprom:
2232	if (!igb_check_reset_block(hw))
2233		igb_reset_phy(hw);
2234
2235	if (hw->flash_address)
2236		iounmap(hw->flash_address);
2237err_sw_init:
2238	igb_clear_interrupt_scheme(adapter);
2239	iounmap(hw->hw_addr);
2240err_ioremap:
2241	free_netdev(netdev);
2242err_alloc_etherdev:
2243	pci_release_selected_regions(pdev,
2244	                             pci_select_bars(pdev, IORESOURCE_MEM));
2245err_pci_reg:
2246err_dma:
2247	pci_disable_device(pdev);
2248	return err;
2249}
2250
2251/**
2252 * igb_remove - Device Removal Routine
2253 * @pdev: PCI device information struct
2254 *
2255 * igb_remove is called by the PCI subsystem to alert the driver
2256 * that it should release a PCI device.  The could be caused by a
2257 * Hot-Plug event, or because the driver is going to be removed from
2258 * memory.
2259 **/
2260static void __devexit igb_remove(struct pci_dev *pdev)
2261{
2262	struct net_device *netdev = pci_get_drvdata(pdev);
2263	struct igb_adapter *adapter = netdev_priv(netdev);
2264	struct e1000_hw *hw = &adapter->hw;
2265
2266	pm_runtime_get_noresume(&pdev->dev);
2267#ifdef CONFIG_IGB_PTP
2268	igb_ptp_stop(adapter);
2269#endif /* CONFIG_IGB_PTP */
2270
2271	/*
2272	 * The watchdog timer may be rescheduled, so explicitly
2273	 * disable watchdog from being rescheduled.
2274	 */
2275	set_bit(__IGB_DOWN, &adapter->state);
2276	del_timer_sync(&adapter->watchdog_timer);
2277	del_timer_sync(&adapter->phy_info_timer);
2278
2279	cancel_work_sync(&adapter->reset_task);
2280	cancel_work_sync(&adapter->watchdog_task);
2281
2282#ifdef CONFIG_IGB_DCA
2283	if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
2284		dev_info(&pdev->dev, "DCA disabled\n");
2285		dca_remove_requester(&pdev->dev);
2286		adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
2287		wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
2288	}
2289#endif
2290
2291	/* Release control of h/w to f/w.  If f/w is AMT enabled, this
2292	 * would have already happened in close and is redundant. */
2293	igb_release_hw_control(adapter);
2294
2295	unregister_netdev(netdev);
2296
2297	igb_clear_interrupt_scheme(adapter);
2298
2299#ifdef CONFIG_PCI_IOV
2300	/* reclaim resources allocated to VFs */
2301	if (adapter->vf_data) {
2302		/* disable iov and allow time for transactions to clear */
2303		if (!igb_check_vf_assignment(adapter)) {
2304			pci_disable_sriov(pdev);
2305			msleep(500);
2306		} else {
2307			dev_info(&pdev->dev, "VF(s) assigned to guests!\n");
2308		}
2309
2310		kfree(adapter->vf_data);
2311		adapter->vf_data = NULL;
2312		wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
2313		wrfl();
2314		msleep(100);
2315		dev_info(&pdev->dev, "IOV Disabled\n");
2316	}
2317#endif
2318
2319	iounmap(hw->hw_addr);
2320	if (hw->flash_address)
2321		iounmap(hw->flash_address);
2322	pci_release_selected_regions(pdev,
2323	                             pci_select_bars(pdev, IORESOURCE_MEM));
2324
2325	kfree(adapter->shadow_vfta);
2326	free_netdev(netdev);
2327
2328	pci_disable_pcie_error_reporting(pdev);
2329
2330	pci_disable_device(pdev);
2331}
2332
2333/**
2334 * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
2335 * @adapter: board private structure to initialize
2336 *
2337 * This function initializes the vf specific data storage and then attempts to
2338 * allocate the VFs.  The reason for ordering it this way is because it is much
2339 * mor expensive time wise to disable SR-IOV than it is to allocate and free
2340 * the memory for the VFs.
2341 **/
2342static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
2343{
2344#ifdef CONFIG_PCI_IOV
2345	struct pci_dev *pdev = adapter->pdev;
2346	struct e1000_hw *hw = &adapter->hw;
2347	int old_vfs = igb_find_enabled_vfs(adapter);
2348	int i;
2349
2350	/* Virtualization features not supported on i210 family. */
2351	if ((hw->mac.type == e1000_i210) || (hw->mac.type == e1000_i211))
2352		return;
2353
2354	if (old_vfs) {
2355		dev_info(&pdev->dev, "%d pre-allocated VFs found - override "
2356			 "max_vfs setting of %d\n", old_vfs, max_vfs);
2357		adapter->vfs_allocated_count = old_vfs;
2358	}
2359
2360	if (!adapter->vfs_allocated_count)
2361		return;
2362
2363	adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
2364				sizeof(struct vf_data_storage), GFP_KERNEL);
2365
2366	/* if allocation failed then we do not support SR-IOV */
2367	if (!adapter->vf_data) {
2368		adapter->vfs_allocated_count = 0;
2369		dev_err(&pdev->dev, "Unable to allocate memory for VF "
2370			"Data Storage\n");
2371		goto out;
2372	}
2373
2374	if (!old_vfs) {
2375		if (pci_enable_sriov(pdev, adapter->vfs_allocated_count))
2376			goto err_out;
2377	}
2378	dev_info(&pdev->dev, "%d VFs allocated\n",
2379		 adapter->vfs_allocated_count);
2380	for (i = 0; i < adapter->vfs_allocated_count; i++)
2381		igb_vf_configure(adapter, i);
2382
2383	/* DMA Coalescing is not supported in IOV mode. */
2384	adapter->flags &= ~IGB_FLAG_DMAC;
2385	goto out;
2386err_out:
2387	kfree(adapter->vf_data);
2388	adapter->vf_data = NULL;
2389	adapter->vfs_allocated_count = 0;
2390out:
2391	return;
2392#endif /* CONFIG_PCI_IOV */
2393}
2394
2395/**
2396 * igb_sw_init - Initialize general software structures (struct igb_adapter)
2397 * @adapter: board private structure to initialize
2398 *
2399 * igb_sw_init initializes the Adapter private data structure.
2400 * Fields are initialized based on PCI device information and
2401 * OS network device settings (MTU size).
2402 **/
2403static int __devinit igb_sw_init(struct igb_adapter *adapter)
2404{
2405	struct e1000_hw *hw = &adapter->hw;
2406	struct net_device *netdev = adapter->netdev;
2407	struct pci_dev *pdev = adapter->pdev;
2408	u32 max_rss_queues;
2409
2410	pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
2411
2412	/* set default ring sizes */
2413	adapter->tx_ring_count = IGB_DEFAULT_TXD;
2414	adapter->rx_ring_count = IGB_DEFAULT_RXD;
2415
2416	/* set default ITR values */
2417	adapter->rx_itr_setting = IGB_DEFAULT_ITR;
2418	adapter->tx_itr_setting = IGB_DEFAULT_ITR;
2419
2420	/* set default work limits */
2421	adapter->tx_work_limit = IGB_DEFAULT_TX_WORK;
2422
2423	adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN +
2424				  VLAN_HLEN;
2425	adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
2426
2427	adapter->node = -1;
2428
2429	spin_lock_init(&adapter->stats64_lock);
2430#ifdef CONFIG_PCI_IOV
2431	switch (hw->mac.type) {
2432	case e1000_82576:
2433	case e1000_i350:
2434		if (max_vfs > 7) {
2435			dev_warn(&pdev->dev,
2436				 "Maximum of 7 VFs per PF, using max\n");
2437			adapter->vfs_allocated_count = 7;
2438		} else
2439			adapter->vfs_allocated_count = max_vfs;
2440		break;
2441	default:
2442		break;
2443	}
2444#endif /* CONFIG_PCI_IOV */
2445
2446	/* Determine the maximum number of RSS queues supported. */
2447	switch (hw->mac.type) {
2448	case e1000_i211:
2449		max_rss_queues = IGB_MAX_RX_QUEUES_I211;
2450		break;
2451	case e1000_82575:
2452	case e1000_i210:
2453		max_rss_queues = IGB_MAX_RX_QUEUES_82575;
2454		break;
2455	case e1000_i350:
2456		/* I350 cannot do RSS and SR-IOV at the same time */
2457		if (!!adapter->vfs_allocated_count) {
2458			max_rss_queues = 1;
2459			break;
2460		}
2461		/* fall through */
2462	case e1000_82576:
2463		if (!!adapter->vfs_allocated_count) {
2464			max_rss_queues = 2;
2465			break;
2466		}
2467		/* fall through */
2468	case e1000_82580:
2469	default:
2470		max_rss_queues = IGB_MAX_RX_QUEUES;
2471		break;
2472	}
2473
2474	adapter->rss_queues = min_t(u32, max_rss_queues, num_online_cpus());
2475
2476	/* Determine if we need to pair queues. */
2477	switch (hw->mac.type) {
2478	case e1000_82575:
2479	case e1000_i211:
2480		/* Device supports enough interrupts without queue pairing. */
2481		break;
2482	case e1000_82576:
2483		/*
2484		 * If VFs are going to be allocated with RSS queues then we
2485		 * should pair the queues in order to conserve interrupts due
2486		 * to limited supply.
2487		 */
2488		if ((adapter->rss_queues > 1) &&
2489		    (adapter->vfs_allocated_count > 6))
2490			adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
2491		/* fall through */
2492	case e1000_82580:
2493	case e1000_i350:
2494	case e1000_i210:
2495	default:
2496		/*
2497		 * If rss_queues > half of max_rss_queues, pair the queues in
2498		 * order to conserve interrupts due to limited supply.
2499		 */
2500		if (adapter->rss_queues > (max_rss_queues / 2))
2501			adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
2502		break;
2503	}
2504
2505	/* Setup and initialize a copy of the hw vlan table array */
2506	adapter->shadow_vfta = kzalloc(sizeof(u32) *
2507				E1000_VLAN_FILTER_TBL_SIZE,
2508				GFP_ATOMIC);
2509
2510	/* This call may decrease the number of queues */
2511	if (igb_init_interrupt_scheme(adapter)) {
2512		dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
2513		return -ENOMEM;
2514	}
2515
2516	igb_probe_vfs(adapter);
2517
2518	/* Explicitly disable IRQ since the NIC can be in any state. */
2519	igb_irq_disable(adapter);
2520
2521	if (hw->mac.type >= e1000_i350)
2522		adapter->flags &= ~IGB_FLAG_DMAC;
2523
2524	set_bit(__IGB_DOWN, &adapter->state);
2525	return 0;
2526}
2527
2528/**
2529 * igb_open - Called when a network interface is made active
2530 * @netdev: network interface device structure
2531 *
2532 * Returns 0 on success, negative value on failure
2533 *
2534 * The open entry point is called when a network interface is made
2535 * active by the system (IFF_UP).  At this point all resources needed
2536 * for transmit and receive operations are allocated, the interrupt
2537 * handler is registered with the OS, the watchdog timer is started,
2538 * and the stack is notified that the interface is ready.
2539 **/
2540static int __igb_open(struct net_device *netdev, bool resuming)
2541{
2542	struct igb_adapter *adapter = netdev_priv(netdev);
2543	struct e1000_hw *hw = &adapter->hw;
2544	struct pci_dev *pdev = adapter->pdev;
2545	int err;
2546	int i;
2547
2548	/* disallow open during test */
2549	if (test_bit(__IGB_TESTING, &adapter->state)) {
2550		WARN_ON(resuming);
2551		return -EBUSY;
2552	}
2553
2554	if (!resuming)
2555		pm_runtime_get_sync(&pdev->dev);
2556
2557	netif_carrier_off(netdev);
2558
2559	/* allocate transmit descriptors */
2560	err = igb_setup_all_tx_resources(adapter);
2561	if (err)
2562		goto err_setup_tx;
2563
2564	/* allocate receive descriptors */
2565	err = igb_setup_all_rx_resources(adapter);
2566	if (err)
2567		goto err_setup_rx;
2568
2569	igb_power_up_link(adapter);
2570
2571	/* before we allocate an interrupt, we must be ready to handle it.
2572	 * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2573	 * as soon as we call pci_request_irq, so we have to setup our
2574	 * clean_rx handler before we do so.  */
2575	igb_configure(adapter);
2576
2577	err = igb_request_irq(adapter);
2578	if (err)
2579		goto err_req_irq;
2580
2581	/* From here on the code is the same as igb_up() */
2582	clear_bit(__IGB_DOWN, &adapter->state);
2583
2584	for (i = 0; i < adapter->num_q_vectors; i++)
2585		napi_enable(&(adapter->q_vector[i]->napi));
2586
2587	/* Clear any pending interrupts. */
2588	rd32(E1000_ICR);
2589
2590	igb_irq_enable(adapter);
2591
2592	/* notify VFs that reset has been completed */
2593	if (adapter->vfs_allocated_count) {
2594		u32 reg_data = rd32(E1000_CTRL_EXT);
2595		reg_data |= E1000_CTRL_EXT_PFRSTD;
2596		wr32(E1000_CTRL_EXT, reg_data);
2597	}
2598
2599	netif_tx_start_all_queues(netdev);
2600
2601	if (!resuming)
2602		pm_runtime_put(&pdev->dev);
2603
2604	/* start the watchdog. */
2605	hw->mac.get_link_status = 1;
2606	schedule_work(&adapter->watchdog_task);
2607
2608	return 0;
2609
2610err_req_irq:
2611	igb_release_hw_control(adapter);
2612	igb_power_down_link(adapter);
2613	igb_free_all_rx_resources(adapter);
2614err_setup_rx:
2615	igb_free_all_tx_resources(adapter);
2616err_setup_tx:
2617	igb_reset(adapter);
2618	if (!resuming)
2619		pm_runtime_put(&pdev->dev);
2620
2621	return err;
2622}
2623
2624static int igb_open(struct net_device *netdev)
2625{
2626	return __igb_open(netdev, false);
2627}
2628
2629/**
2630 * igb_close - Disables a network interface
2631 * @netdev: network interface device structure
2632 *
2633 * Returns 0, this is not allowed to fail
2634 *
2635 * The close entry point is called when an interface is de-activated
2636 * by the OS.  The hardware is still under the driver's control, but
2637 * needs to be disabled.  A global MAC reset is issued to stop the
2638 * hardware, and all transmit and receive resources are freed.
2639 **/
2640static int __igb_close(struct net_device *netdev, bool suspending)
2641{
2642	struct igb_adapter *adapter = netdev_priv(netdev);
2643	struct pci_dev *pdev = adapter->pdev;
2644
2645	WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
2646
2647	if (!suspending)
2648		pm_runtime_get_sync(&pdev->dev);
2649
2650	igb_down(adapter);
2651	igb_free_irq(adapter);
2652
2653	igb_free_all_tx_resources(adapter);
2654	igb_free_all_rx_resources(adapter);
2655
2656	if (!suspending)
2657		pm_runtime_put_sync(&pdev->dev);
2658	return 0;
2659}
2660
2661static int igb_close(struct net_device *netdev)
2662{
2663	return __igb_close(netdev, false);
2664}
2665
2666/**
2667 * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2668 * @tx_ring: tx descriptor ring (for a specific queue) to setup
2669 *
2670 * Return 0 on success, negative on failure
2671 **/
2672int igb_setup_tx_resources(struct igb_ring *tx_ring)
2673{
2674	struct device *dev = tx_ring->dev;
2675	int orig_node = dev_to_node(dev);
2676	int size;
2677
2678	size = sizeof(struct igb_tx_buffer) * tx_ring->count;
2679	tx_ring->tx_buffer_info = vzalloc_node(size, tx_ring->numa_node);
2680	if (!tx_ring->tx_buffer_info)
2681		tx_ring->tx_buffer_info = vzalloc(size);
2682	if (!tx_ring->tx_buffer_info)
2683		goto err;
2684
2685	/* round up to nearest 4K */
2686	tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2687	tx_ring->size = ALIGN(tx_ring->size, 4096);
2688
2689	set_dev_node(dev, tx_ring->numa_node);
2690	tx_ring->desc = dma_alloc_coherent(dev,
2691					   tx_ring->size,
2692					   &tx_ring->dma,
2693					   GFP_KERNEL);
2694	set_dev_node(dev, orig_node);
2695	if (!tx_ring->desc)
2696		tx_ring->desc = dma_alloc_coherent(dev,
2697						   tx_ring->size,
2698						   &tx_ring->dma,
2699						   GFP_KERNEL);
2700
2701	if (!tx_ring->desc)
2702		goto err;
2703
2704	tx_ring->next_to_use = 0;
2705	tx_ring->next_to_clean = 0;
2706
2707	return 0;
2708
2709err:
2710	vfree(tx_ring->tx_buffer_info);
2711	dev_err(dev,
2712		"Unable to allocate memory for the transmit descriptor ring\n");
2713	return -ENOMEM;
2714}
2715
2716/**
2717 * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2718 *				  (Descriptors) for all queues
2719 * @adapter: board private structure
2720 *
2721 * Return 0 on success, negative on failure
2722 **/
2723static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2724{
2725	struct pci_dev *pdev = adapter->pdev;
2726	int i, err = 0;
2727
2728	for (i = 0; i < adapter->num_tx_queues; i++) {
2729		err = igb_setup_tx_resources(adapter->tx_ring[i]);
2730		if (err) {
2731			dev_err(&pdev->dev,
2732				"Allocation for Tx Queue %u failed\n", i);
2733			for (i--; i >= 0; i--)
2734				igb_free_tx_resources(adapter->tx_ring[i]);
2735			break;
2736		}
2737	}
2738
2739	return err;
2740}
2741
2742/**
2743 * igb_setup_tctl - configure the transmit control registers
2744 * @adapter: Board private structure
2745 **/
2746void igb_setup_tctl(struct igb_adapter *adapter)
2747{
2748	struct e1000_hw *hw = &adapter->hw;
2749	u32 tctl;
2750
2751	/* disable queue 0 which is enabled by default on 82575 and 82576 */
2752	wr32(E1000_TXDCTL(0), 0);
2753
2754	/* Program the Transmit Control Register */
2755	tctl = rd32(E1000_TCTL);
2756	tctl &= ~E1000_TCTL_CT;
2757	tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2758		(E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2759
2760	igb_config_collision_dist(hw);
2761
2762	/* Enable transmits */
2763	tctl |= E1000_TCTL_EN;
2764
2765	wr32(E1000_TCTL, tctl);
2766}
2767
2768/**
2769 * igb_configure_tx_ring - Configure transmit ring after Reset
2770 * @adapter: board private structure
2771 * @ring: tx ring to configure
2772 *
2773 * Configure a transmit ring after a reset.
2774 **/
2775void igb_configure_tx_ring(struct igb_adapter *adapter,
2776                           struct igb_ring *ring)
2777{
2778	struct e1000_hw *hw = &adapter->hw;
2779	u32 txdctl = 0;
2780	u64 tdba = ring->dma;
2781	int reg_idx = ring->reg_idx;
2782
2783	/* disable the queue */
2784	wr32(E1000_TXDCTL(reg_idx), 0);
2785	wrfl();
2786	mdelay(10);
2787
2788	wr32(E1000_TDLEN(reg_idx),
2789	                ring->count * sizeof(union e1000_adv_tx_desc));
2790	wr32(E1000_TDBAL(reg_idx),
2791	                tdba & 0x00000000ffffffffULL);
2792	wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2793
2794	ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2795	wr32(E1000_TDH(reg_idx), 0);
2796	writel(0, ring->tail);
2797
2798	txdctl |= IGB_TX_PTHRESH;
2799	txdctl |= IGB_TX_HTHRESH << 8;
2800	txdctl |= IGB_TX_WTHRESH << 16;
2801
2802	txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2803	wr32(E1000_TXDCTL(reg_idx), txdctl);
2804}
2805
2806/**
2807 * igb_configure_tx - Configure transmit Unit after Reset
2808 * @adapter: board private structure
2809 *
2810 * Configure the Tx unit of the MAC after a reset.
2811 **/
2812static void igb_configure_tx(struct igb_adapter *adapter)
2813{
2814	int i;
2815
2816	for (i = 0; i < adapter->num_tx_queues; i++)
2817		igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
2818}
2819
2820/**
2821 * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2822 * @rx_ring:    rx descriptor ring (for a specific queue) to setup
2823 *
2824 * Returns 0 on success, negative on failure
2825 **/
2826int igb_setup_rx_resources(struct igb_ring *rx_ring)
2827{
2828	struct device *dev = rx_ring->dev;
2829	int orig_node = dev_to_node(dev);
2830	int size, desc_len;
2831
2832	size = sizeof(struct igb_rx_buffer) * rx_ring->count;
2833	rx_ring->rx_buffer_info = vzalloc_node(size, rx_ring->numa_node);
2834	if (!rx_ring->rx_buffer_info)
2835		rx_ring->rx_buffer_info = vzalloc(size);
2836	if (!rx_ring->rx_buffer_info)
2837		goto err;
2838
2839	desc_len = sizeof(union e1000_adv_rx_desc);
2840
2841	/* Round up to nearest 4K */
2842	rx_ring->size = rx_ring->count * desc_len;
2843	rx_ring->size = ALIGN(rx_ring->size, 4096);
2844
2845	set_dev_node(dev, rx_ring->numa_node);
2846	rx_ring->desc = dma_alloc_coherent(dev,
2847					   rx_ring->size,
2848					   &rx_ring->dma,
2849					   GFP_KERNEL);
2850	set_dev_node(dev, orig_node);
2851	if (!rx_ring->desc)
2852		rx_ring->desc = dma_alloc_coherent(dev,
2853						   rx_ring->size,
2854						   &rx_ring->dma,
2855						   GFP_KERNEL);
2856
2857	if (!rx_ring->desc)
2858		goto err;
2859
2860	rx_ring->next_to_clean = 0;
2861	rx_ring->next_to_use = 0;
2862
2863	return 0;
2864
2865err:
2866	vfree(rx_ring->rx_buffer_info);
2867	rx_ring->rx_buffer_info = NULL;
2868	dev_err(dev, "Unable to allocate memory for the receive descriptor"
2869		" ring\n");
2870	return -ENOMEM;
2871}
2872
2873/**
2874 * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2875 *				  (Descriptors) for all queues
2876 * @adapter: board private structure
2877 *
2878 * Return 0 on success, negative on failure
2879 **/
2880static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2881{
2882	struct pci_dev *pdev = adapter->pdev;
2883	int i, err = 0;
2884
2885	for (i = 0; i < adapter->num_rx_queues; i++) {
2886		err = igb_setup_rx_resources(adapter->rx_ring[i]);
2887		if (err) {
2888			dev_err(&pdev->dev,
2889				"Allocation for Rx Queue %u failed\n", i);
2890			for (i--; i >= 0; i--)
2891				igb_free_rx_resources(adapter->rx_ring[i]);
2892			break;
2893		}
2894	}
2895
2896	return err;
2897}
2898
2899/**
2900 * igb_setup_mrqc - configure the multiple receive queue control registers
2901 * @adapter: Board private structure
2902 **/
2903static void igb_setup_mrqc(struct igb_adapter *adapter)
2904{
2905	struct e1000_hw *hw = &adapter->hw;
2906	u32 mrqc, rxcsum;
2907	u32 j, num_rx_queues, shift = 0, shift2 = 0;
2908	union e1000_reta {
2909		u32 dword;
2910		u8  bytes[4];
2911	} reta;
2912	static const u8 rsshash[40] = {
2913		0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2914		0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2915		0xae, 0x7b, 0x30, 0xb4,	0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2916		0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2917
2918	/* Fill out hash function seeds */
2919	for (j = 0; j < 10; j++) {
2920		u32 rsskey = rsshash[(j * 4)];
2921		rsskey |= rsshash[(j * 4) + 1] << 8;
2922		rsskey |= rsshash[(j * 4) + 2] << 16;
2923		rsskey |= rsshash[(j * 4) + 3] << 24;
2924		array_wr32(E1000_RSSRK(0), j, rsskey);
2925	}
2926
2927	num_rx_queues = adapter->rss_queues;
2928
2929	if (adapter->vfs_allocated_count) {
2930		/* 82575 and 82576 supports 2 RSS queues for VMDq */
2931		switch (hw->mac.type) {
2932		case e1000_i350:
2933		case e1000_82580:
2934			num_rx_queues = 1;
2935			shift = 0;
2936			break;
2937		case e1000_82576:
2938			shift = 3;
2939			num_rx_queues = 2;
2940			break;
2941		case e1000_82575:
2942			shift = 2;
2943			shift2 = 6;
2944		default:
2945			break;
2946		}
2947	} else {
2948		if (hw->mac.type == e1000_82575)
2949			shift = 6;
2950	}
2951
2952	for (j = 0; j < (32 * 4); j++) {
2953		reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2954		if (shift2)
2955			reta.bytes[j & 3] |= num_rx_queues << shift2;
2956		if ((j & 3) == 3)
2957			wr32(E1000_RETA(j >> 2), reta.dword);
2958	}
2959
2960	/*
2961	 * Disable raw packet checksumming so that RSS hash is placed in
2962	 * descriptor on writeback.  No need to enable TCP/UDP/IP checksum
2963	 * offloads as they are enabled by default
2964	 */
2965	rxcsum = rd32(E1000_RXCSUM);
2966	rxcsum |= E1000_RXCSUM_PCSD;
2967
2968	if (adapter->hw.mac.type >= e1000_82576)
2969		/* Enable Receive Checksum Offload for SCTP */
2970		rxcsum |= E1000_RXCSUM_CRCOFL;
2971
2972	/* Don't need to set TUOFL or IPOFL, they default to 1 */
2973	wr32(E1000_RXCSUM, rxcsum);
2974	/*
2975	 * Generate RSS hash based on TCP port numbers and/or
2976	 * IPv4/v6 src and dst addresses since UDP cannot be
2977	 * hashed reliably due to IP fragmentation
2978	 */
2979
2980	mrqc = E1000_MRQC_RSS_FIELD_IPV4 |
2981	       E1000_MRQC_RSS_FIELD_IPV4_TCP |
2982	       E1000_MRQC_RSS_FIELD_IPV6 |
2983	       E1000_MRQC_RSS_FIELD_IPV6_TCP |
2984	       E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
2985
2986	/* If VMDq is enabled then we set the appropriate mode for that, else
2987	 * we default to RSS so that an RSS hash is calculated per packet even
2988	 * if we are only using one queue */
2989	if (adapter->vfs_allocated_count) {
2990		if (hw->mac.type > e1000_82575) {
2991			/* Set the default pool for the PF's first queue */
2992			u32 vtctl = rd32(E1000_VT_CTL);
2993			vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2994				   E1000_VT_CTL_DISABLE_DEF_POOL);
2995			vtctl |= adapter->vfs_allocated_count <<
2996				E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2997			wr32(E1000_VT_CTL, vtctl);
2998		}
2999		if (adapter->rss_queues > 1)
3000			mrqc |= E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
3001		else
3002			mrqc |= E1000_MRQC_ENABLE_VMDQ;
3003	} else {
3004		if (hw->mac.type != e1000_i211)
3005			mrqc |= E1000_MRQC_ENABLE_RSS_4Q;
3006	}
3007	igb_vmm_control(adapter);
3008
3009	wr32(E1000_MRQC, mrqc);
3010}
3011
3012/**
3013 * igb_setup_rctl - configure the receive control registers
3014 * @adapter: Board private structure
3015 **/
3016void igb_setup_rctl(struct igb_adapter *adapter)
3017{
3018	struct e1000_hw *hw = &adapter->hw;
3019	u32 rctl;
3020
3021	rctl = rd32(E1000_RCTL);
3022
3023	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
3024	rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
3025
3026	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
3027		(hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
3028
3029	/*
3030	 * enable stripping of CRC. It's unlikely this will break BMC
3031	 * redirection as it did with e1000. Newer features require
3032	 * that the HW strips the CRC.
3033	 */
3034	rctl |= E1000_RCTL_SECRC;
3035
3036	/* disable store bad packets and clear size bits. */
3037	rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
3038
3039	/* enable LPE to prevent packets larger than max_frame_size */
3040	rctl |= E1000_RCTL_LPE;
3041
3042	/* disable queue 0 to prevent tail write w/o re-config */
3043	wr32(E1000_RXDCTL(0), 0);
3044
3045	/* Attention!!!  For SR-IOV PF driver operations you must enable
3046	 * queue drop for all VF and PF queues to prevent head of line blocking
3047	 * if an un-trusted VF does not provide descriptors to hardware.
3048	 */
3049	if (adapter->vfs_allocated_count) {
3050		/* set all queue drop enable bits */
3051		wr32(E1000_QDE, ALL_QUEUES);
3052	}
3053
3054	/* This is useful for sniffing bad packets. */
3055	if (adapter->netdev->features & NETIF_F_RXALL) {
3056		/* UPE and MPE will be handled by normal PROMISC logic
3057		 * in e1000e_set_rx_mode */
3058		rctl |= (E1000_RCTL_SBP | /* Receive bad packets */
3059			 E1000_RCTL_BAM | /* RX All Bcast Pkts */
3060			 E1000_RCTL_PMCF); /* RX All MAC Ctrl Pkts */
3061
3062		rctl &= ~(E1000_RCTL_VFE | /* Disable VLAN filter */
3063			  E1000_RCTL_DPF | /* Allow filtered pause */
3064			  E1000_RCTL_CFIEN); /* Dis VLAN CFIEN Filter */
3065		/* Do not mess with E1000_CTRL_VME, it affects transmit as well,
3066		 * and that breaks VLANs.
3067		 */
3068	}
3069
3070	wr32(E1000_RCTL, rctl);
3071}
3072
3073static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
3074                                   int vfn)
3075{
3076	struct e1000_hw *hw = &adapter->hw;
3077	u32 vmolr;
3078
3079	/* if it isn't the PF check to see if VFs are enabled and
3080	 * increase the size to support vlan tags */
3081	if (vfn < adapter->vfs_allocated_count &&
3082	    adapter->vf_data[vfn].vlans_enabled)
3083		size += VLAN_TAG_SIZE;
3084
3085	vmolr = rd32(E1000_VMOLR(vfn));
3086	vmolr &= ~E1000_VMOLR_RLPML_MASK;
3087	vmolr |= size | E1000_VMOLR_LPE;
3088	wr32(E1000_VMOLR(vfn), vmolr);
3089
3090	return 0;
3091}
3092
3093/**
3094 * igb_rlpml_set - set maximum receive packet size
3095 * @adapter: board private structure
3096 *
3097 * Configure maximum receivable packet size.
3098 **/
3099static void igb_rlpml_set(struct igb_adapter *adapter)
3100{
3101	u32 max_frame_size = adapter->max_frame_size;
3102	struct e1000_hw *hw = &adapter->hw;
3103	u16 pf_id = adapter->vfs_allocated_count;
3104
3105	if (pf_id) {
3106		igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
3107		/*
3108		 * If we're in VMDQ or SR-IOV mode, then set global RLPML
3109		 * to our max jumbo frame size, in case we need to enable
3110		 * jumbo frames on one of the rings later.
3111		 * This will not pass over-length frames into the default
3112		 * queue because it's gated by the VMOLR.RLPML.
3113		 */
3114		max_frame_size = MAX_JUMBO_FRAME_SIZE;
3115	}
3116
3117	wr32(E1000_RLPML, max_frame_size);
3118}
3119
3120static inline void igb_set_vmolr(struct igb_adapter *adapter,
3121				 int vfn, bool aupe)
3122{
3123	struct e1000_hw *hw = &adapter->hw;
3124	u32 vmolr;
3125
3126	/*
3127	 * This register exists only on 82576 and newer so if we are older then
3128	 * we should exit and do nothing
3129	 */
3130	if (hw->mac.type < e1000_82576)
3131		return;
3132
3133	vmolr = rd32(E1000_VMOLR(vfn));
3134	vmolr |= E1000_VMOLR_STRVLAN;      /* Strip vlan tags */
3135	if (aupe)
3136		vmolr |= E1000_VMOLR_AUPE;        /* Accept untagged packets */
3137	else
3138		vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
3139
3140	/* clear all bits that might not be set */
3141	vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
3142
3143	if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
3144		vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
3145	/*
3146	 * for VMDq only allow the VFs and pool 0 to accept broadcast and
3147	 * multicast packets
3148	 */
3149	if (vfn <= adapter->vfs_allocated_count)
3150		vmolr |= E1000_VMOLR_BAM;	   /* Accept broadcast */
3151
3152	wr32(E1000_VMOLR(vfn), vmolr);
3153}
3154
3155/**
3156 * igb_configure_rx_ring - Configure a receive ring after Reset
3157 * @adapter: board private structure
3158 * @ring: receive ring to be configured
3159 *
3160 * Configure the Rx unit of the MAC after a reset.
3161 **/
3162void igb_configure_rx_ring(struct igb_adapter *adapter,
3163                           struct igb_ring *ring)
3164{
3165	struct e1000_hw *hw = &adapter->hw;
3166	u64 rdba = ring->dma;
3167	int reg_idx = ring->reg_idx;
3168	u32 srrctl = 0, rxdctl = 0;
3169
3170	/* disable the queue */
3171	wr32(E1000_RXDCTL(reg_idx), 0);
3172
3173	/* Set DMA base address registers */
3174	wr32(E1000_RDBAL(reg_idx),
3175	     rdba & 0x00000000ffffffffULL);
3176	wr32(E1000_RDBAH(reg_idx), rdba >> 32);
3177	wr32(E1000_RDLEN(reg_idx),
3178	               ring->count * sizeof(union e1000_adv_rx_desc));
3179
3180	/* initialize head and tail */
3181	ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
3182	wr32(E1000_RDH(reg_idx), 0);
3183	writel(0, ring->tail);
3184
3185	/* set descriptor configuration */
3186	srrctl = IGB_RX_HDR_LEN << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
3187#if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
3188	srrctl |= IGB_RXBUFFER_16384 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3189#else
3190	srrctl |= (PAGE_SIZE / 2) >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3191#endif
3192	srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3193#ifdef CONFIG_IGB_PTP
3194	if (hw->mac.type >= e1000_82580)
3195		srrctl |= E1000_SRRCTL_TIMESTAMP;
3196#endif /* CONFIG_IGB_PTP */
3197	/* Only set Drop Enable if we are supporting multiple queues */
3198	if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
3199		srrctl |= E1000_SRRCTL_DROP_EN;
3200
3201	wr32(E1000_SRRCTL(reg_idx), srrctl);
3202
3203	/* set filtering for VMDQ pools */
3204	igb_set_vmolr(adapter, reg_idx & 0x7, true);
3205
3206	rxdctl |= IGB_RX_PTHRESH;
3207	rxdctl |= IGB_RX_HTHRESH << 8;
3208	rxdctl |= IGB_RX_WTHRESH << 16;
3209
3210	/* enable receive descriptor fetching */
3211	rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
3212	wr32(E1000_RXDCTL(reg_idx), rxdctl);
3213}
3214
3215/**
3216 * igb_configure_rx - Configure receive Unit after Reset
3217 * @adapter: board private structure
3218 *
3219 * Configure the Rx unit of the MAC after a reset.
3220 **/
3221static void igb_configure_rx(struct igb_adapter *adapter)
3222{
3223	int i;
3224
3225	/* set UTA to appropriate mode */
3226	igb_set_uta(adapter);
3227
3228	/* set the correct pool for the PF default MAC address in entry 0 */
3229	igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
3230	                 adapter->vfs_allocated_count);
3231
3232	/* Setup the HW Rx Head and Tail Descriptor Pointers and
3233	 * the Base and Length of the Rx Descriptor Ring */
3234	for (i = 0; i < adapter->num_rx_queues; i++)
3235		igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
3236}
3237
3238/**
3239 * igb_free_tx_resources - Free Tx Resources per Queue
3240 * @tx_ring: Tx descriptor ring for a specific queue
3241 *
3242 * Free all transmit software resources
3243 **/
3244void igb_free_tx_resources(struct igb_ring *tx_ring)
3245{
3246	igb_clean_tx_ring(tx_ring);
3247
3248	vfree(tx_ring->tx_buffer_info);
3249	tx_ring->tx_buffer_info = NULL;
3250
3251	/* if not set, then don't free */
3252	if (!tx_ring->desc)
3253		return;
3254
3255	dma_free_coherent(tx_ring->dev, tx_ring->size,
3256			  tx_ring->desc, tx_ring->dma);
3257
3258	tx_ring->desc = NULL;
3259}
3260
3261/**
3262 * igb_free_all_tx_resources - Free Tx Resources for All Queues
3263 * @adapter: board private structure
3264 *
3265 * Free all transmit software resources
3266 **/
3267static void igb_free_all_tx_resources(struct igb_adapter *adapter)
3268{
3269	int i;
3270
3271	for (i = 0; i < adapter->num_tx_queues; i++)
3272		igb_free_tx_resources(adapter->tx_ring[i]);
3273}
3274
3275void igb_unmap_and_free_tx_resource(struct igb_ring *ring,
3276				    struct igb_tx_buffer *tx_buffer)
3277{
3278	if (tx_buffer->skb) {
3279		dev_kfree_skb_any(tx_buffer->skb);
3280		if (tx_buffer->dma)
3281			dma_unmap_single(ring->dev,
3282					 tx_buffer->dma,
3283					 tx_buffer->length,
3284					 DMA_TO_DEVICE);
3285	} else if (tx_buffer->dma) {
3286		dma_unmap_page(ring->dev,
3287			       tx_buffer->dma,
3288			       tx_buffer->length,
3289			       DMA_TO_DEVICE);
3290	}
3291	tx_buffer->next_to_watch = NULL;
3292	tx_buffer->skb = NULL;
3293	tx_buffer->dma = 0;
3294	/* buffer_info must be completely set up in the transmit path */
3295}
3296
3297/**
3298 * igb_clean_tx_ring - Free Tx Buffers
3299 * @tx_ring: ring to be cleaned
3300 **/
3301static void igb_clean_tx_ring(struct igb_ring *tx_ring)
3302{
3303	struct igb_tx_buffer *buffer_info;
3304	unsigned long size;
3305	u16 i;
3306
3307	if (!tx_ring->tx_buffer_info)
3308		return;
3309	/* Free all the Tx ring sk_buffs */
3310
3311	for (i = 0; i < tx_ring->count; i++) {
3312		buffer_info = &tx_ring->tx_buffer_info[i];
3313		igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3314	}
3315
3316	netdev_tx_reset_queue(txring_txq(tx_ring));
3317
3318	size = sizeof(struct igb_tx_buffer) * tx_ring->count;
3319	memset(tx_ring->tx_buffer_info, 0, size);
3320
3321	/* Zero out the descriptor ring */
3322	memset(tx_ring->desc, 0, tx_ring->size);
3323
3324	tx_ring->next_to_use = 0;
3325	tx_ring->next_to_clean = 0;
3326}
3327
3328/**
3329 * igb_clean_all_tx_rings - Free Tx Buffers for all queues
3330 * @adapter: board private structure
3331 **/
3332static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
3333{
3334	int i;
3335
3336	for (i = 0; i < adapter->num_tx_queues; i++)
3337		igb_clean_tx_ring(adapter->tx_ring[i]);
3338}
3339
3340/**
3341 * igb_free_rx_resources - Free Rx Resources
3342 * @rx_ring: ring to clean the resources from
3343 *
3344 * Free all receive software resources
3345 **/
3346void igb_free_rx_resources(struct igb_ring *rx_ring)
3347{
3348	igb_clean_rx_ring(rx_ring);
3349
3350	vfree(rx_ring->rx_buffer_info);
3351	rx_ring->rx_buffer_info = NULL;
3352
3353	/* if not set, then don't free */
3354	if (!rx_ring->desc)
3355		return;
3356
3357	dma_free_coherent(rx_ring->dev, rx_ring->size,
3358			  rx_ring->desc, rx_ring->dma);
3359
3360	rx_ring->desc = NULL;
3361}
3362
3363/**
3364 * igb_free_all_rx_resources - Free Rx Resources for All Queues
3365 * @adapter: board private structure
3366 *
3367 * Free all receive software resources
3368 **/
3369static void igb_free_all_rx_resources(struct igb_adapter *adapter)
3370{
3371	int i;
3372
3373	for (i = 0; i < adapter->num_rx_queues; i++)
3374		igb_free_rx_resources(adapter->rx_ring[i]);
3375}
3376
3377/**
3378 * igb_clean_rx_ring - Free Rx Buffers per Queue
3379 * @rx_ring: ring to free buffers from
3380 **/
3381static void igb_clean_rx_ring(struct igb_ring *rx_ring)
3382{
3383	unsigned long size;
3384	u16 i;
3385
3386	if (!rx_ring->rx_buffer_info)
3387		return;
3388
3389	/* Free all the Rx ring sk_buffs */
3390	for (i = 0; i < rx_ring->count; i++) {
3391		struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
3392		if (buffer_info->dma) {
3393			dma_unmap_single(rx_ring->dev,
3394			                 buffer_info->dma,
3395					 IGB_RX_HDR_LEN,
3396					 DMA_FROM_DEVICE);
3397			buffer_info->dma = 0;
3398		}
3399
3400		if (buffer_info->skb) {
3401			dev_kfree_skb(buffer_info->skb);
3402			buffer_info->skb = NULL;
3403		}
3404		if (buffer_info->page_dma) {
3405			dma_unmap_page(rx_ring->dev,
3406			               buffer_info->page_dma,
3407				       PAGE_SIZE / 2,
3408				       DMA_FROM_DEVICE);
3409			buffer_info->page_dma = 0;
3410		}
3411		if (buffer_info->page) {
3412			put_page(buffer_info->page);
3413			buffer_info->page = NULL;
3414			buffer_info->page_offset = 0;
3415		}
3416	}
3417
3418	size = sizeof(struct igb_rx_buffer) * rx_ring->count;
3419	memset(rx_ring->rx_buffer_info, 0, size);
3420
3421	/* Zero out the descriptor ring */
3422	memset(rx_ring->desc, 0, rx_ring->size);
3423
3424	rx_ring->next_to_clean = 0;
3425	rx_ring->next_to_use = 0;
3426}
3427
3428/**
3429 * igb_clean_all_rx_rings - Free Rx Buffers for all queues
3430 * @adapter: board private structure
3431 **/
3432static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
3433{
3434	int i;
3435
3436	for (i = 0; i < adapter->num_rx_queues; i++)
3437		igb_clean_rx_ring(adapter->rx_ring[i]);
3438}
3439
3440/**
3441 * igb_set_mac - Change the Ethernet Address of the NIC
3442 * @netdev: network interface device structure
3443 * @p: pointer to an address structure
3444 *
3445 * Returns 0 on success, negative on failure
3446 **/
3447static int igb_set_mac(struct net_device *netdev, void *p)
3448{
3449	struct igb_adapter *adapter = netdev_priv(netdev);
3450	struct e1000_hw *hw = &adapter->hw;
3451	struct sockaddr *addr = p;
3452
3453	if (!is_valid_ether_addr(addr->sa_data))
3454		return -EADDRNOTAVAIL;
3455
3456	memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
3457	memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
3458
3459	/* set the correct pool for the new PF MAC address in entry 0 */
3460	igb_rar_set_qsel(adapter, hw->mac.addr, 0,
3461	                 adapter->vfs_allocated_count);
3462
3463	return 0;
3464}
3465
3466/**
3467 * igb_write_mc_addr_list - write multicast addresses to MTA
3468 * @netdev: network interface device structure
3469 *
3470 * Writes multicast address list to the MTA hash table.
3471 * Returns: -ENOMEM on failure
3472 *                0 on no addresses written
3473 *                X on writing X addresses to MTA
3474 **/
3475static int igb_write_mc_addr_list(struct net_device *netdev)
3476{
3477	struct igb_adapter *adapter = netdev_priv(netdev);
3478	struct e1000_hw *hw = &adapter->hw;
3479	struct netdev_hw_addr *ha;
3480	u8  *mta_list;
3481	int i;
3482
3483	if (netdev_mc_empty(netdev)) {
3484		/* nothing to program, so clear mc list */
3485		igb_update_mc_addr_list(hw, NULL, 0);
3486		igb_restore_vf_multicasts(adapter);
3487		return 0;
3488	}
3489
3490	mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
3491	if (!mta_list)
3492		return -ENOMEM;
3493
3494	/* The shared function expects a packed array of only addresses. */
3495	i = 0;
3496	netdev_for_each_mc_addr(ha, netdev)
3497		memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
3498
3499	igb_update_mc_addr_list(hw, mta_list, i);
3500	kfree(mta_list);
3501
3502	return netdev_mc_count(netdev);
3503}
3504
3505/**
3506 * igb_write_uc_addr_list - write unicast addresses to RAR table
3507 * @netdev: network interface device structure
3508 *
3509 * Writes unicast address list to the RAR table.
3510 * Returns: -ENOMEM on failure/insufficient address space
3511 *                0 on no addresses written
3512 *                X on writing X addresses to the RAR table
3513 **/
3514static int igb_write_uc_addr_list(struct net_device *netdev)
3515{
3516	struct igb_adapter *adapter = netdev_priv(netdev);
3517	struct e1000_hw *hw = &adapter->hw;
3518	unsigned int vfn = adapter->vfs_allocated_count;
3519	unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
3520	int count = 0;
3521
3522	/* return ENOMEM indicating insufficient memory for addresses */
3523	if (netdev_uc_count(netdev) > rar_entries)
3524		return -ENOMEM;
3525
3526	if (!netdev_uc_empty(netdev) && rar_entries) {
3527		struct netdev_hw_addr *ha;
3528
3529		netdev_for_each_uc_addr(ha, netdev) {
3530			if (!rar_entries)
3531				break;
3532			igb_rar_set_qsel(adapter, ha->addr,
3533			                 rar_entries--,
3534			                 vfn);
3535			count++;
3536		}
3537	}
3538	/* write the addresses in reverse order to avoid write combining */
3539	for (; rar_entries > 0 ; rar_entries--) {
3540		wr32(E1000_RAH(rar_entries), 0);
3541		wr32(E1000_RAL(rar_entries), 0);
3542	}
3543	wrfl();
3544
3545	return count;
3546}
3547
3548/**
3549 * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
3550 * @netdev: network interface device structure
3551 *
3552 * The set_rx_mode entry point is called whenever the unicast or multicast
3553 * address lists or the network interface flags are updated.  This routine is
3554 * responsible for configuring the hardware for proper unicast, multicast,
3555 * promiscuous mode, and all-multi behavior.
3556 **/
3557static void igb_set_rx_mode(struct net_device *netdev)
3558{
3559	struct igb_adapter *adapter = netdev_priv(netdev);
3560	struct e1000_hw *hw = &adapter->hw;
3561	unsigned int vfn = adapter->vfs_allocated_count;
3562	u32 rctl, vmolr = 0;
3563	int count;
3564
3565	/* Check for Promiscuous and All Multicast modes */
3566	rctl = rd32(E1000_RCTL);
3567
3568	/* clear the effected bits */
3569	rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
3570
3571	if (netdev->flags & IFF_PROMISC) {
3572		rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
3573		vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
3574	} else {
3575		if (netdev->flags & IFF_ALLMULTI) {
3576			rctl |= E1000_RCTL_MPE;
3577			vmolr |= E1000_VMOLR_MPME;
3578		} else {
3579			/*
3580			 * Write addresses to the MTA, if the attempt fails
3581			 * then we should just turn on promiscuous mode so
3582			 * that we can at least receive multicast traffic
3583			 */
3584			count = igb_write_mc_addr_list(netdev);
3585			if (count < 0) {
3586				rctl |= E1000_RCTL_MPE;
3587				vmolr |= E1000_VMOLR_MPME;
3588			} else if (count) {
3589				vmolr |= E1000_VMOLR_ROMPE;
3590			}
3591		}
3592		/*
3593		 * Write addresses to available RAR registers, if there is not
3594		 * sufficient space to store all the addresses then enable
3595		 * unicast promiscuous mode
3596		 */
3597		count = igb_write_uc_addr_list(netdev);
3598		if (count < 0) {
3599			rctl |= E1000_RCTL_UPE;
3600			vmolr |= E1000_VMOLR_ROPE;
3601		}
3602		rctl |= E1000_RCTL_VFE;
3603	}
3604	wr32(E1000_RCTL, rctl);
3605
3606	/*
3607	 * In order to support SR-IOV and eventually VMDq it is necessary to set
3608	 * the VMOLR to enable the appropriate modes.  Without this workaround
3609	 * we will have issues with VLAN tag stripping not being done for frames
3610	 * that are only arriving because we are the default pool
3611	 */
3612	if ((hw->mac.type < e1000_82576) || (hw->mac.type > e1000_i350))
3613		return;
3614
3615	vmolr |= rd32(E1000_VMOLR(vfn)) &
3616	         ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
3617	wr32(E1000_VMOLR(vfn), vmolr);
3618	igb_restore_vf_multicasts(adapter);
3619}
3620
3621static void igb_check_wvbr(struct igb_adapter *adapter)
3622{
3623	struct e1000_hw *hw = &adapter->hw;
3624	u32 wvbr = 0;
3625
3626	switch (hw->mac.type) {
3627	case e1000_82576:
3628	case e1000_i350:
3629		if (!(wvbr = rd32(E1000_WVBR)))
3630			return;
3631		break;
3632	default:
3633		break;
3634	}
3635
3636	adapter->wvbr |= wvbr;
3637}
3638
3639#define IGB_STAGGERED_QUEUE_OFFSET 8
3640
3641static void igb_spoof_check(struct igb_adapter *adapter)
3642{
3643	int j;
3644
3645	if (!adapter->wvbr)
3646		return;
3647
3648	for(j = 0; j < adapter->vfs_allocated_count; j++) {
3649		if (adapter->wvbr & (1 << j) ||
3650		    adapter->wvbr & (1 << (j + IGB_STAGGERED_QUEUE_OFFSET))) {
3651			dev_warn(&adapter->pdev->dev,
3652				"Spoof event(s) detected on VF %d\n", j);
3653			adapter->wvbr &=
3654				~((1 << j) |
3655				  (1 << (j + IGB_STAGGERED_QUEUE_OFFSET)));
3656		}
3657	}
3658}
3659
3660/* Need to wait a few seconds after link up to get diagnostic information from
3661 * the phy */
3662static void igb_update_phy_info(unsigned long data)
3663{
3664	struct igb_adapter *adapter = (struct igb_adapter *) data;
3665	igb_get_phy_info(&adapter->hw);
3666}
3667
3668/**
3669 * igb_has_link - check shared code for link and determine up/down
3670 * @adapter: pointer to driver private info
3671 **/
3672bool igb_has_link(struct igb_adapter *adapter)
3673{
3674	struct e1000_hw *hw = &adapter->hw;
3675	bool link_active = false;
3676	s32 ret_val = 0;
3677
3678	/* get_link_status is set on LSC (link status) interrupt or
3679	 * rx sequence error interrupt.  get_link_status will stay
3680	 * false until the e1000_check_for_link establishes link
3681	 * for copper adapters ONLY
3682	 */
3683	switch (hw->phy.media_type) {
3684	case e1000_media_type_copper:
3685		if (hw->mac.get_link_status) {
3686			ret_val = hw->mac.ops.check_for_link(hw);
3687			link_active = !hw->mac.get_link_status;
3688		} else {
3689			link_active = true;
3690		}
3691		break;
3692	case e1000_media_type_internal_serdes:
3693		ret_val = hw->mac.ops.check_for_link(hw);
3694		link_active = hw->mac.serdes_has_link;
3695		break;
3696	default:
3697	case e1000_media_type_unknown:
3698		break;
3699	}
3700
3701	return link_active;
3702}
3703
3704static bool igb_thermal_sensor_event(struct e1000_hw *hw, u32 event)
3705{
3706	bool ret = false;
3707	u32 ctrl_ext, thstat;
3708
3709	/* check for thermal sensor event on i350 copper only */
3710	if (hw->mac.type == e1000_i350) {
3711		thstat = rd32(E1000_THSTAT);
3712		ctrl_ext = rd32(E1000_CTRL_EXT);
3713
3714		if ((hw->phy.media_type == e1000_media_type_copper) &&
3715		    !(ctrl_ext & E1000_CTRL_EXT_LINK_MODE_SGMII)) {
3716			ret = !!(thstat & event);
3717		}
3718	}
3719
3720	return ret;
3721}
3722
3723/**
3724 * igb_watchdog - Timer Call-back
3725 * @data: pointer to adapter cast into an unsigned long
3726 **/
3727static void igb_watchdog(unsigned long data)
3728{
3729	struct igb_adapter *adapter = (struct igb_adapter *)data;
3730	/* Do the rest outside of interrupt context */
3731	schedule_work(&adapter->watchdog_task);
3732}
3733
3734static void igb_watchdog_task(struct work_struct *work)
3735{
3736	struct igb_adapter *adapter = container_of(work,
3737	                                           struct igb_adapter,
3738                                                   watchdog_task);
3739	struct e1000_hw *hw = &adapter->hw;
3740	struct net_device *netdev = adapter->netdev;
3741	u32 link;
3742	int i;
3743
3744	link = igb_has_link(adapter);
3745	if (link) {
3746		/* Cancel scheduled suspend requests. */
3747		pm_runtime_resume(netdev->dev.parent);
3748
3749		if (!netif_carrier_ok(netdev)) {
3750			u32 ctrl;
3751			hw->mac.ops.get_speed_and_duplex(hw,
3752			                                 &adapter->link_speed,
3753			                                 &adapter->link_duplex);
3754
3755			ctrl = rd32(E1000_CTRL);
3756			/* Links status message must follow this format */
3757			printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s "
3758			       "Duplex, Flow Control: %s\n",
3759			       netdev->name,
3760			       adapter->link_speed,
3761			       adapter->link_duplex == FULL_DUPLEX ?
3762			       "Full" : "Half",
3763			       (ctrl & E1000_CTRL_TFCE) &&
3764			       (ctrl & E1000_CTRL_RFCE) ? "RX/TX" :
3765			       (ctrl & E1000_CTRL_RFCE) ?  "RX" :
3766			       (ctrl & E1000_CTRL_TFCE) ?  "TX" : "None");
3767
3768			/* check for thermal sensor event */
3769			if (igb_thermal_sensor_event(hw,
3770			    E1000_THSTAT_LINK_THROTTLE)) {
3771				netdev_info(netdev, "The network adapter link "
3772					    "speed was downshifted because it "
3773					    "overheated\n");
3774			}
3775
3776			/* adjust timeout factor according to speed/duplex */
3777			adapter->tx_timeout_factor = 1;
3778			switch (adapter->link_speed) {
3779			case SPEED_10:
3780				adapter->tx_timeout_factor = 14;
3781				break;
3782			case SPEED_100:
3783				/* maybe add some timeout factor ? */
3784				break;
3785			}
3786
3787			netif_carrier_on(netdev);
3788
3789			igb_ping_all_vfs(adapter);
3790			igb_check_vf_rate_limit(adapter);
3791
3792			/* link state has changed, schedule phy info update */
3793			if (!test_bit(__IGB_DOWN, &adapter->state))
3794				mod_timer(&adapter->phy_info_timer,
3795					  round_jiffies(jiffies + 2 * HZ));
3796		}
3797	} else {
3798		if (netif_carrier_ok(netdev)) {
3799			adapter->link_speed = 0;
3800			adapter->link_duplex = 0;
3801
3802			/* check for thermal sensor event */
3803			if (igb_thermal_sensor_event(hw,
3804			    E1000_THSTAT_PWR_DOWN)) {
3805				netdev_err(netdev, "The network adapter was "
3806					   "stopped because it overheated\n");
3807			}
3808
3809			/* Links status message must follow this format */
3810			printk(KERN_INFO "igb: %s NIC Link is Down\n",
3811			       netdev->name);
3812			netif_carrier_off(netdev);
3813
3814			igb_ping_all_vfs(adapter);
3815
3816			/* link state has changed, schedule phy info update */
3817			if (!test_bit(__IGB_DOWN, &adapter->state))
3818				mod_timer(&adapter->phy_info_timer,
3819					  round_jiffies(jiffies + 2 * HZ));
3820
3821			pm_schedule_suspend(netdev->dev.parent,
3822					    MSEC_PER_SEC * 5);
3823		}
3824	}
3825
3826	spin_lock(&adapter->stats64_lock);
3827	igb_update_stats(adapter, &adapter->stats64);
3828	spin_unlock(&adapter->stats64_lock);
3829
3830	for (i = 0; i < adapter->num_tx_queues; i++) {
3831		struct igb_ring *tx_ring = adapter->tx_ring[i];
3832		if (!netif_carrier_ok(netdev)) {
3833			/* We've lost link, so the controller stops DMA,
3834			 * but we've got queued Tx work that's never going
3835			 * to get done, so reset controller to flush Tx.
3836			 * (Do the reset outside of interrupt context). */
3837			if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3838				adapter->tx_timeout_count++;
3839				schedule_work(&adapter->reset_task);
3840				/* return immediately since reset is imminent */
3841				return;
3842			}
3843		}
3844
3845		/* Force detection of hung controller every watchdog period */
3846		set_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
3847	}
3848
3849	/* Cause software interrupt to ensure rx ring is cleaned */
3850	if (adapter->msix_entries) {
3851		u32 eics = 0;
3852		for (i = 0; i < adapter->num_q_vectors; i++)
3853			eics |= adapter->q_vector[i]->eims_value;
3854		wr32(E1000_EICS, eics);
3855	} else {
3856		wr32(E1000_ICS, E1000_ICS_RXDMT0);
3857	}
3858
3859	igb_spoof_check(adapter);
3860
3861	/* Reset the timer */
3862	if (!test_bit(__IGB_DOWN, &adapter->state))
3863		mod_timer(&adapter->watchdog_timer,
3864			  round_jiffies(jiffies + 2 * HZ));
3865}
3866
3867enum latency_range {
3868	lowest_latency = 0,
3869	low_latency = 1,
3870	bulk_latency = 2,
3871	latency_invalid = 255
3872};
3873
3874/**
3875 * igb_update_ring_itr - update the dynamic ITR value based on packet size
3876 *
3877 *      Stores a new ITR value based on strictly on packet size.  This
3878 *      algorithm is less sophisticated than that used in igb_update_itr,
3879 *      due to the difficulty of synchronizing statistics across multiple
3880 *      receive rings.  The divisors and thresholds used by this function
3881 *      were determined based on theoretical maximum wire speed and testing
3882 *      data, in order to minimize response time while increasing bulk
3883 *      throughput.
3884 *      This functionality is controlled by the InterruptThrottleRate module
3885 *      parameter (see igb_param.c)
3886 *      NOTE:  This function is called only when operating in a multiqueue
3887 *             receive environment.
3888 * @q_vector: pointer to q_vector
3889 **/
3890static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3891{
3892	int new_val = q_vector->itr_val;
3893	int avg_wire_size = 0;
3894	struct igb_adapter *adapter = q_vector->adapter;
3895	unsigned int packets;
3896
3897	/* For non-gigabit speeds, just fix the interrupt rate at 4000
3898	 * ints/sec - ITR timer value of 120 ticks.
3899	 */
3900	if (adapter->link_speed != SPEED_1000) {
3901		new_val = IGB_4K_ITR;
3902		goto set_itr_val;
3903	}
3904
3905	packets = q_vector->rx.total_packets;
3906	if (packets)
3907		avg_wire_size = q_vector->rx.total_bytes / packets;
3908
3909	packets = q_vector->tx.total_packets;
3910	if (packets)
3911		avg_wire_size = max_t(u32, avg_wire_size,
3912				      q_vector->tx.total_bytes / packets);
3913
3914	/* if avg_wire_size isn't set no work was done */
3915	if (!avg_wire_size)
3916		goto clear_counts;
3917
3918	/* Add 24 bytes to size to account for CRC, preamble, and gap */
3919	avg_wire_size += 24;
3920
3921	/* Don't starve jumbo frames */
3922	avg_wire_size = min(avg_wire_size, 3000);
3923
3924	/* Give a little boost to mid-size frames */
3925	if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3926		new_val = avg_wire_size / 3;
3927	else
3928		new_val = avg_wire_size / 2;
3929
3930	/* conservative mode (itr 3) eliminates the lowest_latency setting */
3931	if (new_val < IGB_20K_ITR &&
3932	    ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
3933	     (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
3934		new_val = IGB_20K_ITR;
3935
3936set_itr_val:
3937	if (new_val != q_vector->itr_val) {
3938		q_vector->itr_val = new_val;
3939		q_vector->set_itr = 1;
3940	}
3941clear_counts:
3942	q_vector->rx.total_bytes = 0;
3943	q_vector->rx.total_packets = 0;
3944	q_vector->tx.total_bytes = 0;
3945	q_vector->tx.total_packets = 0;
3946}
3947
3948/**
3949 * igb_update_itr - update the dynamic ITR value based on statistics
3950 *      Stores a new ITR value based on packets and byte
3951 *      counts during the last interrupt.  The advantage of per interrupt
3952 *      computation is faster updates and more accurate ITR for the current
3953 *      traffic pattern.  Constants in this function were computed
3954 *      based on theoretical maximum wire speed and thresholds were set based
3955 *      on testing data as well as attempting to minimize response time
3956 *      while increasing bulk throughput.
3957 *      this functionality is controlled by the InterruptThrottleRate module
3958 *      parameter (see igb_param.c)
3959 *      NOTE:  These calculations are only valid when operating in a single-
3960 *             queue environment.
3961 * @q_vector: pointer to q_vector
3962 * @ring_container: ring info to update the itr for
3963 **/
3964static void igb_update_itr(struct igb_q_vector *q_vector,
3965			   struct igb_ring_container *ring_container)
3966{
3967	unsigned int packets = ring_container->total_packets;
3968	unsigned int bytes = ring_container->total_bytes;
3969	u8 itrval = ring_container->itr;
3970
3971	/* no packets, exit with status unchanged */
3972	if (packets == 0)
3973		return;
3974
3975	switch (itrval) {
3976	case lowest_latency:
3977		/* handle TSO and jumbo frames */
3978		if (bytes/packets > 8000)
3979			itrval = bulk_latency;
3980		else if ((packets < 5) && (bytes > 512))
3981			itrval = low_latency;
3982		break;
3983	case low_latency:  /* 50 usec aka 20000 ints/s */
3984		if (bytes > 10000) {
3985			/* this if handles the TSO accounting */
3986			if (bytes/packets > 8000) {
3987				itrval = bulk_latency;
3988			} else if ((packets < 10) || ((bytes/packets) > 1200)) {
3989				itrval = bulk_latency;
3990			} else if ((packets > 35)) {
3991				itrval = lowest_latency;
3992			}
3993		} else if (bytes/packets > 2000) {
3994			itrval = bulk_latency;
3995		} else if (packets <= 2 && bytes < 512) {
3996			itrval = lowest_latency;
3997		}
3998		break;
3999	case bulk_latency: /* 250 usec aka 4000 ints/s */
4000		if (bytes > 25000) {
4001			if (packets > 35)
4002				itrval = low_latency;
4003		} else if (bytes < 1500) {
4004			itrval = low_latency;
4005		}
4006		break;
4007	}
4008
4009	/* clear work counters since we have the values we need */
4010	ring_container->total_bytes = 0;
4011	ring_container->total_packets = 0;
4012
4013	/* write updated itr to ring container */
4014	ring_container->itr = itrval;
4015}
4016
4017static void igb_set_itr(struct igb_q_vector *q_vector)
4018{
4019	struct igb_adapter *adapter = q_vector->adapter;
4020	u32 new_itr = q_vector->itr_val;
4021	u8 current_itr = 0;
4022
4023	/* for non-gigabit speeds, just fix the interrupt rate at 4000 */
4024	if (adapter->link_speed != SPEED_1000) {
4025		current_itr = 0;
4026		new_itr = IGB_4K_ITR;
4027		goto set_itr_now;
4028	}
4029
4030	igb_update_itr(q_vector, &q_vector->tx);
4031	igb_update_itr(q_vector, &q_vector->rx);
4032
4033	current_itr = max(q_vector->rx.itr, q_vector->tx.itr);
4034
4035	/* conservative mode (itr 3) eliminates the lowest_latency setting */
4036	if (current_itr == lowest_latency &&
4037	    ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
4038	     (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
4039		current_itr = low_latency;
4040
4041	switch (current_itr) {
4042	/* counts and packets in update_itr are dependent on these numbers */
4043	case lowest_latency:
4044		new_itr = IGB_70K_ITR; /* 70,000 ints/sec */
4045		break;
4046	case low_latency:
4047		new_itr = IGB_20K_ITR; /* 20,000 ints/sec */
4048		break;
4049	case bulk_latency:
4050		new_itr = IGB_4K_ITR;  /* 4,000 ints/sec */
4051		break;
4052	default:
4053		break;
4054	}
4055
4056set_itr_now:
4057	if (new_itr != q_vector->itr_val) {
4058		/* this attempts to bias the interrupt rate towards Bulk
4059		 * by adding intermediate steps when interrupt rate is
4060		 * increasing */
4061		new_itr = new_itr > q_vector->itr_val ?
4062		             max((new_itr * q_vector->itr_val) /
4063		                 (new_itr + (q_vector->itr_val >> 2)),
4064				 new_itr) :
4065			     new_itr;
4066		/* Don't write the value here; it resets the adapter's
4067		 * internal timer, and causes us to delay far longer than
4068		 * we should between interrupts.  Instead, we write the ITR
4069		 * value at the beginning of the next interrupt so the timing
4070		 * ends up being correct.
4071		 */
4072		q_vector->itr_val = new_itr;
4073		q_vector->set_itr = 1;
4074	}
4075}
4076
4077static void igb_tx_ctxtdesc(struct igb_ring *tx_ring, u32 vlan_macip_lens,
4078			    u32 type_tucmd, u32 mss_l4len_idx)
4079{
4080	struct e1000_adv_tx_context_desc *context_desc;
4081	u16 i = tx_ring->next_to_use;
4082
4083	context_desc = IGB_TX_CTXTDESC(tx_ring, i);
4084
4085	i++;
4086	tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
4087
4088	/* set bits to identify this as an advanced context descriptor */
4089	type_tucmd |= E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
4090
4091	/* For 82575, context index must be unique per ring. */
4092	if (test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
4093		mss_l4len_idx |= tx_ring->reg_idx << 4;
4094
4095	context_desc->vlan_macip_lens	= cpu_to_le32(vlan_macip_lens);
4096	context_desc->seqnum_seed	= 0;
4097	context_desc->type_tucmd_mlhl	= cpu_to_le32(type_tucmd);
4098	context_desc->mss_l4len_idx	= cpu_to_le32(mss_l4len_idx);
4099}
4100
4101static int igb_tso(struct igb_ring *tx_ring,
4102		   struct igb_tx_buffer *first,
4103		   u8 *hdr_len)
4104{
4105	struct sk_buff *skb = first->skb;
4106	u32 vlan_macip_lens, type_tucmd;
4107	u32 mss_l4len_idx, l4len;
4108
4109	if (!skb_is_gso(skb))
4110		return 0;
4111
4112	if (skb_header_cloned(skb)) {
4113		int err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
4114		if (err)
4115			return err;
4116	}
4117
4118	/* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
4119	type_tucmd = E1000_ADVTXD_TUCMD_L4T_TCP;
4120
4121	if (first->protocol == __constant_htons(ETH_P_IP)) {
4122		struct iphdr *iph = ip_hdr(skb);
4123		iph->tot_len = 0;
4124		iph->check = 0;
4125		tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
4126							 iph->daddr, 0,
4127							 IPPROTO_TCP,
4128							 0);
4129		type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
4130		first->tx_flags |= IGB_TX_FLAGS_TSO |
4131				   IGB_TX_FLAGS_CSUM |
4132				   IGB_TX_FLAGS_IPV4;
4133	} else if (skb_is_gso_v6(skb)) {
4134		ipv6_hdr(skb)->payload_len = 0;
4135		tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
4136						       &ipv6_hdr(skb)->daddr,
4137						       0, IPPROTO_TCP, 0);
4138		first->tx_flags |= IGB_TX_FLAGS_TSO |
4139				   IGB_TX_FLAGS_CSUM;
4140	}
4141
4142	/* compute header lengths */
4143	l4len = tcp_hdrlen(skb);
4144	*hdr_len = skb_transport_offset(skb) + l4len;
4145
4146	/* update gso size and bytecount with header size */
4147	first->gso_segs = skb_shinfo(skb)->gso_segs;
4148	first->bytecount += (first->gso_segs - 1) * *hdr_len;
4149
4150	/* MSS L4LEN IDX */
4151	mss_l4len_idx = l4len << E1000_ADVTXD_L4LEN_SHIFT;
4152	mss_l4len_idx |= skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT;
4153
4154	/* VLAN MACLEN IPLEN */
4155	vlan_macip_lens = skb_network_header_len(skb);
4156	vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
4157	vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
4158
4159	igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
4160
4161	return 1;
4162}
4163
4164static void igb_tx_csum(struct igb_ring *tx_ring, struct igb_tx_buffer *first)
4165{
4166	struct sk_buff *skb = first->skb;
4167	u32 vlan_macip_lens = 0;
4168	u32 mss_l4len_idx = 0;
4169	u32 type_tucmd = 0;
4170
4171	if (skb->ip_summed != CHECKSUM_PARTIAL) {
4172		if (!(first->tx_flags & IGB_TX_FLAGS_VLAN))
4173			return;
4174	} else {
4175		u8 l4_hdr = 0;
4176		switch (first->protocol) {
4177		case __constant_htons(ETH_P_IP):
4178			vlan_macip_lens |= skb_network_header_len(skb);
4179			type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
4180			l4_hdr = ip_hdr(skb)->protocol;
4181			break;
4182		case __constant_htons(ETH_P_IPV6):
4183			vlan_macip_lens |= skb_network_header_len(skb);
4184			l4_hdr = ipv6_hdr(skb)->nexthdr;
4185			break;
4186		default:
4187			if (unlikely(net_ratelimit())) {
4188				dev_warn(tx_ring->dev,
4189				 "partial checksum but proto=%x!\n",
4190				 first->protocol);
4191			}
4192			break;
4193		}
4194
4195		switch (l4_hdr) {
4196		case IPPROTO_TCP:
4197			type_tucmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
4198			mss_l4len_idx = tcp_hdrlen(skb) <<
4199					E1000_ADVTXD_L4LEN_SHIFT;
4200			break;
4201		case IPPROTO_SCTP:
4202			type_tucmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
4203			mss_l4len_idx = sizeof(struct sctphdr) <<
4204					E1000_ADVTXD_L4LEN_SHIFT;
4205			break;
4206		case IPPROTO_UDP:
4207			mss_l4len_idx = sizeof(struct udphdr) <<
4208					E1000_ADVTXD_L4LEN_SHIFT;
4209			break;
4210		default:
4211			if (unlikely(net_ratelimit())) {
4212				dev_warn(tx_ring->dev,
4213				 "partial checksum but l4 proto=%x!\n",
4214				 l4_hdr);
4215			}
4216			break;
4217		}
4218
4219		/* update TX checksum flag */
4220		first->tx_flags |= IGB_TX_FLAGS_CSUM;
4221	}
4222
4223	vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
4224	vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
4225
4226	igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
4227}
4228
4229static __le32 igb_tx_cmd_type(u32 tx_flags)
4230{
4231	/* set type for advanced descriptor with frame checksum insertion */
4232	__le32 cmd_type = cpu_to_le32(E1000_ADVTXD_DTYP_DATA |
4233				      E1000_ADVTXD_DCMD_IFCS |
4234				      E1000_ADVTXD_DCMD_DEXT);
4235
4236	/* set HW vlan bit if vlan is present */
4237	if (tx_flags & IGB_TX_FLAGS_VLAN)
4238		cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_VLE);
4239
4240#ifdef CONFIG_IGB_PTP
4241	/* set timestamp bit if present */
4242	if (unlikely(tx_flags & IGB_TX_FLAGS_TSTAMP))
4243		cmd_type |= cpu_to_le32(E1000_ADVTXD_MAC_TSTAMP);
4244#endif /* CONFIG_IGB_PTP */
4245
4246	/* set segmentation bits for TSO */
4247	if (tx_flags & IGB_TX_FLAGS_TSO)
4248		cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_TSE);
4249
4250	return cmd_type;
4251}
4252
4253static void igb_tx_olinfo_status(struct igb_ring *tx_ring,
4254				 union e1000_adv_tx_desc *tx_desc,
4255				 u32 tx_flags, unsigned int paylen)
4256{
4257	u32 olinfo_status = paylen << E1000_ADVTXD_PAYLEN_SHIFT;
4258
4259	/* 82575 requires a unique index per ring if any offload is enabled */
4260	if ((tx_flags & (IGB_TX_FLAGS_CSUM | IGB_TX_FLAGS_VLAN)) &&
4261	    test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
4262		olinfo_status |= tx_ring->reg_idx << 4;
4263
4264	/* insert L4 checksum */
4265	if (tx_flags & IGB_TX_FLAGS_CSUM) {
4266		olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4267
4268		/* insert IPv4 checksum */
4269		if (tx_flags & IGB_TX_FLAGS_IPV4)
4270			olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
4271	}
4272
4273	tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
4274}
4275
4276/*
4277 * The largest size we can write to the descriptor is 65535.  In order to
4278 * maintain a power of two alignment we have to limit ourselves to 32K.
4279 */
4280#define IGB_MAX_TXD_PWR	15
4281#define IGB_MAX_DATA_PER_TXD	(1<<IGB_MAX_TXD_PWR)
4282
4283static void igb_tx_map(struct igb_ring *tx_ring,
4284		       struct igb_tx_buffer *first,
4285		       const u8 hdr_len)
4286{
4287	struct sk_buff *skb = first->skb;
4288	struct igb_tx_buffer *tx_buffer_info;
4289	union e1000_adv_tx_desc *tx_desc;
4290	dma_addr_t dma;
4291	struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0];
4292	unsigned int data_len = skb->data_len;
4293	unsigned int size = skb_headlen(skb);
4294	unsigned int paylen = skb->len - hdr_len;
4295	__le32 cmd_type;
4296	u32 tx_flags = first->tx_flags;
4297	u16 i = tx_ring->next_to_use;
4298
4299	tx_desc = IGB_TX_DESC(tx_ring, i);
4300
4301	igb_tx_olinfo_status(tx_ring, tx_desc, tx_flags, paylen);
4302	cmd_type = igb_tx_cmd_type(tx_flags);
4303
4304	dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
4305	if (dma_mapping_error(tx_ring->dev, dma))
4306		goto dma_error;
4307
4308	/* record length, and DMA address */
4309	first->length = size;
4310	first->dma = dma;
4311	tx_desc->read.buffer_addr = cpu_to_le64(dma);
4312
4313	for (;;) {
4314		while (unlikely(size > IGB_MAX_DATA_PER_TXD)) {
4315			tx_desc->read.cmd_type_len =
4316				cmd_type | cpu_to_le32(IGB_MAX_DATA_PER_TXD);
4317
4318			i++;
4319			tx_desc++;
4320			if (i == tx_ring->count) {
4321				tx_desc = IGB_TX_DESC(tx_ring, 0);
4322				i = 0;
4323			}
4324
4325			dma += IGB_MAX_DATA_PER_TXD;
4326			size -= IGB_MAX_DATA_PER_TXD;
4327
4328			tx_desc->read.olinfo_status = 0;
4329			tx_desc->read.buffer_addr = cpu_to_le64(dma);
4330		}
4331
4332		if (likely(!data_len))
4333			break;
4334
4335		tx_desc->read.cmd_type_len = cmd_type | cpu_to_le32(size);
4336
4337		i++;
4338		tx_desc++;
4339		if (i == tx_ring->count) {
4340			tx_desc = IGB_TX_DESC(tx_ring, 0);
4341			i = 0;
4342		}
4343
4344		size = skb_frag_size(frag);
4345		data_len -= size;
4346
4347		dma = skb_frag_dma_map(tx_ring->dev, frag, 0,
4348				   size, DMA_TO_DEVICE);
4349		if (dma_mapping_error(tx_ring->dev, dma))
4350			goto dma_error;
4351
4352		tx_buffer_info = &tx_ring->tx_buffer_info[i];
4353		tx_buffer_info->length = size;
4354		tx_buffer_info->dma = dma;
4355
4356		tx_desc->read.olinfo_status = 0;
4357		tx_desc->read.buffer_addr = cpu_to_le64(dma);
4358
4359		frag++;
4360	}
4361
4362	netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount);
4363
4364	/* write last descriptor with RS and EOP bits */
4365	cmd_type |= cpu_to_le32(size) | cpu_to_le32(IGB_TXD_DCMD);
4366	if (unlikely(skb->no_fcs))
4367		cmd_type &= ~(cpu_to_le32(E1000_ADVTXD_DCMD_IFCS));
4368	tx_desc->read.cmd_type_len = cmd_type;
4369
4370	/* set the timestamp */
4371	first->time_stamp = jiffies;
4372
4373	/*
4374	 * Force memory writes to complete before letting h/w know there
4375	 * are new descriptors to fetch.  (Only applicable for weak-ordered
4376	 * memory model archs, such as IA-64).
4377	 *
4378	 * We also need this memory barrier to make certain all of the
4379	 * status bits have been updated before next_to_watch is written.
4380	 */
4381	wmb();
4382
4383	/* set next_to_watch value indicating a packet is present */
4384	first->next_to_watch = tx_desc;
4385
4386	i++;
4387	if (i == tx_ring->count)
4388		i = 0;
4389
4390	tx_ring->next_to_use = i;
4391
4392	writel(i, tx_ring->tail);
4393
4394	/* we need this if more than one processor can write to our tail
4395	 * at a time, it syncronizes IO on IA64/Altix systems */
4396	mmiowb();
4397
4398	return;
4399
4400dma_error:
4401	dev_err(tx_ring->dev, "TX DMA map failed\n");
4402
4403	/* clear dma mappings for failed tx_buffer_info map */
4404	for (;;) {
4405		tx_buffer_info = &tx_ring->tx_buffer_info[i];
4406		igb_unmap_and_free_tx_resource(tx_ring, tx_buffer_info);
4407		if (tx_buffer_info == first)
4408			break;
4409		if (i == 0)
4410			i = tx_ring->count;
4411		i--;
4412	}
4413
4414	tx_ring->next_to_use = i;
4415}
4416
4417static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
4418{
4419	struct net_device *netdev = tx_ring->netdev;
4420
4421	netif_stop_subqueue(netdev, tx_ring->queue_index);
4422
4423	/* Herbert's original patch had:
4424	 *  smp_mb__after_netif_stop_queue();
4425	 * but since that doesn't exist yet, just open code it. */
4426	smp_mb();
4427
4428	/* We need to check again in a case another CPU has just
4429	 * made room available. */
4430	if (igb_desc_unused(tx_ring) < size)
4431		return -EBUSY;
4432
4433	/* A reprieve! */
4434	netif_wake_subqueue(netdev, tx_ring->queue_index);
4435
4436	u64_stats_update_begin(&tx_ring->tx_syncp2);
4437	tx_ring->tx_stats.restart_queue2++;
4438	u64_stats_update_end(&tx_ring->tx_syncp2);
4439
4440	return 0;
4441}
4442
4443static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
4444{
4445	if (igb_desc_unused(tx_ring) >= size)
4446		return 0;
4447	return __igb_maybe_stop_tx(tx_ring, size);
4448}
4449
4450netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb,
4451				struct igb_ring *tx_ring)
4452{
4453#ifdef CONFIG_IGB_PTP
4454	struct igb_adapter *adapter = netdev_priv(tx_ring->netdev);
4455#endif /* CONFIG_IGB_PTP */
4456	struct igb_tx_buffer *first;
4457	int tso;
4458	u32 tx_flags = 0;
4459	__be16 protocol = vlan_get_protocol(skb);
4460	u8 hdr_len = 0;
4461
4462	/* need: 1 descriptor per page,
4463	 *       + 2 desc gap to keep tail from touching head,
4464	 *       + 1 desc for skb->data,
4465	 *       + 1 desc for context descriptor,
4466	 * otherwise try next time */
4467	if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
4468		/* this is a hard error */
4469		return NETDEV_TX_BUSY;
4470	}
4471
4472	/* record the location of the first descriptor for this packet */
4473	first = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
4474	first->skb = skb;
4475	first->bytecount = skb->len;
4476	first->gso_segs = 1;
4477
4478#ifdef CONFIG_IGB_PTP
4479	if (unlikely((skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) &&
4480		     !(adapter->ptp_tx_skb))) {
4481		skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
4482		tx_flags |= IGB_TX_FLAGS_TSTAMP;
4483
4484		adapter->ptp_tx_skb = skb_get(skb);
4485		if (adapter->hw.mac.type == e1000_82576)
4486			schedule_work(&adapter->ptp_tx_work);
4487	}
4488#endif /* CONFIG_IGB_PTP */
4489
4490	if (vlan_tx_tag_present(skb)) {
4491		tx_flags |= IGB_TX_FLAGS_VLAN;
4492		tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
4493	}
4494
4495	/* record initial flags and protocol */
4496	first->tx_flags = tx_flags;
4497	first->protocol = protocol;
4498
4499	tso = igb_tso(tx_ring, first, &hdr_len);
4500	if (tso < 0)
4501		goto out_drop;
4502	else if (!tso)
4503		igb_tx_csum(tx_ring, first);
4504
4505	igb_tx_map(tx_ring, first, hdr_len);
4506
4507	/* Make sure there is space in the ring for the next send. */
4508	igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
4509
4510	return NETDEV_TX_OK;
4511
4512out_drop:
4513	igb_unmap_and_free_tx_resource(tx_ring, first);
4514
4515	return NETDEV_TX_OK;
4516}
4517
4518static inline struct igb_ring *igb_tx_queue_mapping(struct igb_adapter *adapter,
4519						    struct sk_buff *skb)
4520{
4521	unsigned int r_idx = skb->queue_mapping;
4522
4523	if (r_idx >= adapter->num_tx_queues)
4524		r_idx = r_idx % adapter->num_tx_queues;
4525
4526	return adapter->tx_ring[r_idx];
4527}
4528
4529static netdev_tx_t igb_xmit_frame(struct sk_buff *skb,
4530				  struct net_device *netdev)
4531{
4532	struct igb_adapter *adapter = netdev_priv(netdev);
4533
4534	if (test_bit(__IGB_DOWN, &adapter->state)) {
4535		dev_kfree_skb_any(skb);
4536		return NETDEV_TX_OK;
4537	}
4538
4539	if (skb->len <= 0) {
4540		dev_kfree_skb_any(skb);
4541		return NETDEV_TX_OK;
4542	}
4543
4544	/*
4545	 * The minimum packet size with TCTL.PSP set is 17 so pad the skb
4546	 * in order to meet this minimum size requirement.
4547	 */
4548	if (skb->len < 17) {
4549		if (skb_padto(skb, 17))
4550			return NETDEV_TX_OK;
4551		skb->len = 17;
4552	}
4553
4554	return igb_xmit_frame_ring(skb, igb_tx_queue_mapping(adapter, skb));
4555}
4556
4557/**
4558 * igb_tx_timeout - Respond to a Tx Hang
4559 * @netdev: network interface device structure
4560 **/
4561static void igb_tx_timeout(struct net_device *netdev)
4562{
4563	struct igb_adapter *adapter = netdev_priv(netdev);
4564	struct e1000_hw *hw = &adapter->hw;
4565
4566	/* Do the reset outside of interrupt context */
4567	adapter->tx_timeout_count++;
4568
4569	if (hw->mac.type >= e1000_82580)
4570		hw->dev_spec._82575.global_device_reset = true;
4571
4572	schedule_work(&adapter->reset_task);
4573	wr32(E1000_EICS,
4574	     (adapter->eims_enable_mask & ~adapter->eims_other));
4575}
4576
4577static void igb_reset_task(struct work_struct *work)
4578{
4579	struct igb_adapter *adapter;
4580	adapter = container_of(work, struct igb_adapter, reset_task);
4581
4582	igb_dump(adapter);
4583	netdev_err(adapter->netdev, "Reset adapter\n");
4584	igb_reinit_locked(adapter);
4585}
4586
4587/**
4588 * igb_get_stats64 - Get System Network Statistics
4589 * @netdev: network interface device structure
4590 * @stats: rtnl_link_stats64 pointer
4591 *
4592 **/
4593static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *netdev,
4594						 struct rtnl_link_stats64 *stats)
4595{
4596	struct igb_adapter *adapter = netdev_priv(netdev);
4597
4598	spin_lock(&adapter->stats64_lock);
4599	igb_update_stats(adapter, &adapter->stats64);
4600	memcpy(stats, &adapter->stats64, sizeof(*stats));
4601	spin_unlock(&adapter->stats64_lock);
4602
4603	return stats;
4604}
4605
4606/**
4607 * igb_change_mtu - Change the Maximum Transfer Unit
4608 * @netdev: network interface device structure
4609 * @new_mtu: new value for maximum frame size
4610 *
4611 * Returns 0 on success, negative on failure
4612 **/
4613static int igb_change_mtu(struct net_device *netdev, int new_mtu)
4614{
4615	struct igb_adapter *adapter = netdev_priv(netdev);
4616	struct pci_dev *pdev = adapter->pdev;
4617	int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
4618
4619	if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
4620		dev_err(&pdev->dev, "Invalid MTU setting\n");
4621		return -EINVAL;
4622	}
4623
4624#define MAX_STD_JUMBO_FRAME_SIZE 9238
4625	if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
4626		dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
4627		return -EINVAL;
4628	}
4629
4630	while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
4631		msleep(1);
4632
4633	/* igb_down has a dependency on max_frame_size */
4634	adapter->max_frame_size = max_frame;
4635
4636	if (netif_running(netdev))
4637		igb_down(adapter);
4638
4639	dev_info(&pdev->dev, "changing MTU from %d to %d\n",
4640		 netdev->mtu, new_mtu);
4641	netdev->mtu = new_mtu;
4642
4643	if (netif_running(netdev))
4644		igb_up(adapter);
4645	else
4646		igb_reset(adapter);
4647
4648	clear_bit(__IGB_RESETTING, &adapter->state);
4649
4650	return 0;
4651}
4652
4653/**
4654 * igb_update_stats - Update the board statistics counters
4655 * @adapter: board private structure
4656 **/
4657
4658void igb_update_stats(struct igb_adapter *adapter,
4659		      struct rtnl_link_stats64 *net_stats)
4660{
4661	struct e1000_hw *hw = &adapter->hw;
4662	struct pci_dev *pdev = adapter->pdev;
4663	u32 reg, mpc;
4664	u16 phy_tmp;
4665	int i;
4666	u64 bytes, packets;
4667	unsigned int start;
4668	u64 _bytes, _packets;
4669
4670#define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
4671
4672	/*
4673	 * Prevent stats update while adapter is being reset, or if the pci
4674	 * connection is down.
4675	 */
4676	if (adapter->link_speed == 0)
4677		return;
4678	if (pci_channel_offline(pdev))
4679		return;
4680
4681	bytes = 0;
4682	packets = 0;
4683	for (i = 0; i < adapter->num_rx_queues; i++) {
4684		u32 rqdpc = rd32(E1000_RQDPC(i));
4685		struct igb_ring *ring = adapter->rx_ring[i];
4686
4687		if (rqdpc) {
4688			ring->rx_stats.drops += rqdpc;
4689			net_stats->rx_fifo_errors += rqdpc;
4690		}
4691
4692		do {
4693			start = u64_stats_fetch_begin_bh(&ring->rx_syncp);
4694			_bytes = ring->rx_stats.bytes;
4695			_packets = ring->rx_stats.packets;
4696		} while (u64_stats_fetch_retry_bh(&ring->rx_syncp, start));
4697		bytes += _bytes;
4698		packets += _packets;
4699	}
4700
4701	net_stats->rx_bytes = bytes;
4702	net_stats->rx_packets = packets;
4703
4704	bytes = 0;
4705	packets = 0;
4706	for (i = 0; i < adapter->num_tx_queues; i++) {
4707		struct igb_ring *ring = adapter->tx_ring[i];
4708		do {
4709			start = u64_stats_fetch_begin_bh(&ring->tx_syncp);
4710			_bytes = ring->tx_stats.bytes;
4711			_packets = ring->tx_stats.packets;
4712		} while (u64_stats_fetch_retry_bh(&ring->tx_syncp, start));
4713		bytes += _bytes;
4714		packets += _packets;
4715	}
4716	net_stats->tx_bytes = bytes;
4717	net_stats->tx_packets = packets;
4718
4719	/* read stats registers */
4720	adapter->stats.crcerrs += rd32(E1000_CRCERRS);
4721	adapter->stats.gprc += rd32(E1000_GPRC);
4722	adapter->stats.gorc += rd32(E1000_GORCL);
4723	rd32(E1000_GORCH); /* clear GORCL */
4724	adapter->stats.bprc += rd32(E1000_BPRC);
4725	adapter->stats.mprc += rd32(E1000_MPRC);
4726	adapter->stats.roc += rd32(E1000_ROC);
4727
4728	adapter->stats.prc64 += rd32(E1000_PRC64);
4729	adapter->stats.prc127 += rd32(E1000_PRC127);
4730	adapter->stats.prc255 += rd32(E1000_PRC255);
4731	adapter->stats.prc511 += rd32(E1000_PRC511);
4732	adapter->stats.prc1023 += rd32(E1000_PRC1023);
4733	adapter->stats.prc1522 += rd32(E1000_PRC1522);
4734	adapter->stats.symerrs += rd32(E1000_SYMERRS);
4735	adapter->stats.sec += rd32(E1000_SEC);
4736
4737	mpc = rd32(E1000_MPC);
4738	adapter->stats.mpc += mpc;
4739	net_stats->rx_fifo_errors += mpc;
4740	adapter->stats.scc += rd32(E1000_SCC);
4741	adapter->stats.ecol += rd32(E1000_ECOL);
4742	adapter->stats.mcc += rd32(E1000_MCC);
4743	adapter->stats.latecol += rd32(E1000_LATECOL);
4744	adapter->stats.dc += rd32(E1000_DC);
4745	adapter->stats.rlec += rd32(E1000_RLEC);
4746	adapter->stats.xonrxc += rd32(E1000_XONRXC);
4747	adapter->stats.xontxc += rd32(E1000_XONTXC);
4748	adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
4749	adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
4750	adapter->stats.fcruc += rd32(E1000_FCRUC);
4751	adapter->stats.gptc += rd32(E1000_GPTC);
4752	adapter->stats.gotc += rd32(E1000_GOTCL);
4753	rd32(E1000_GOTCH); /* clear GOTCL */
4754	adapter->stats.rnbc += rd32(E1000_RNBC);
4755	adapter->stats.ruc += rd32(E1000_RUC);
4756	adapter->stats.rfc += rd32(E1000_RFC);
4757	adapter->stats.rjc += rd32(E1000_RJC);
4758	adapter->stats.tor += rd32(E1000_TORH);
4759	adapter->stats.tot += rd32(E1000_TOTH);
4760	adapter->stats.tpr += rd32(E1000_TPR);
4761
4762	adapter->stats.ptc64 += rd32(E1000_PTC64);
4763	adapter->stats.ptc127 += rd32(E1000_PTC127);
4764	adapter->stats.ptc255 += rd32(E1000_PTC255);
4765	adapter->stats.ptc511 += rd32(E1000_PTC511);
4766	adapter->stats.ptc1023 += rd32(E1000_PTC1023);
4767	adapter->stats.ptc1522 += rd32(E1000_PTC1522);
4768
4769	adapter->stats.mptc += rd32(E1000_MPTC);
4770	adapter->stats.bptc += rd32(E1000_BPTC);
4771
4772	adapter->stats.tpt += rd32(E1000_TPT);
4773	adapter->stats.colc += rd32(E1000_COLC);
4774
4775	adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
4776	/* read internal phy specific stats */
4777	reg = rd32(E1000_CTRL_EXT);
4778	if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
4779		adapter->stats.rxerrc += rd32(E1000_RXERRC);
4780		adapter->stats.tncrs += rd32(E1000_TNCRS);
4781	}
4782
4783	adapter->stats.tsctc += rd32(E1000_TSCTC);
4784	adapter->stats.tsctfc += rd32(E1000_TSCTFC);
4785
4786	adapter->stats.iac += rd32(E1000_IAC);
4787	adapter->stats.icrxoc += rd32(E1000_ICRXOC);
4788	adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
4789	adapter->stats.icrxatc += rd32(E1000_ICRXATC);
4790	adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
4791	adapter->stats.ictxatc += rd32(E1000_ICTXATC);
4792	adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
4793	adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
4794	adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
4795
4796	/* Fill out the OS statistics structure */
4797	net_stats->multicast = adapter->stats.mprc;
4798	net_stats->collisions = adapter->stats.colc;
4799
4800	/* Rx Errors */
4801
4802	/* RLEC on some newer hardware can be incorrect so build
4803	 * our own version based on RUC and ROC */
4804	net_stats->rx_errors = adapter->stats.rxerrc +
4805		adapter->stats.crcerrs + adapter->stats.algnerrc +
4806		adapter->stats.ruc + adapter->stats.roc +
4807		adapter->stats.cexterr;
4808	net_stats->rx_length_errors = adapter->stats.ruc +
4809				      adapter->stats.roc;
4810	net_stats->rx_crc_errors = adapter->stats.crcerrs;
4811	net_stats->rx_frame_errors = adapter->stats.algnerrc;
4812	net_stats->rx_missed_errors = adapter->stats.mpc;
4813
4814	/* Tx Errors */
4815	net_stats->tx_errors = adapter->stats.ecol +
4816			       adapter->stats.latecol;
4817	net_stats->tx_aborted_errors = adapter->stats.ecol;
4818	net_stats->tx_window_errors = adapter->stats.latecol;
4819	net_stats->tx_carrier_errors = adapter->stats.tncrs;
4820
4821	/* Tx Dropped needs to be maintained elsewhere */
4822
4823	/* Phy Stats */
4824	if (hw->phy.media_type == e1000_media_type_copper) {
4825		if ((adapter->link_speed == SPEED_1000) &&
4826		   (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
4827			phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
4828			adapter->phy_stats.idle_errors += phy_tmp;
4829		}
4830	}
4831
4832	/* Management Stats */
4833	adapter->stats.mgptc += rd32(E1000_MGTPTC);
4834	adapter->stats.mgprc += rd32(E1000_MGTPRC);
4835	adapter->stats.mgpdc += rd32(E1000_MGTPDC);
4836
4837	/* OS2BMC Stats */
4838	reg = rd32(E1000_MANC);
4839	if (reg & E1000_MANC_EN_BMC2OS) {
4840		adapter->stats.o2bgptc += rd32(E1000_O2BGPTC);
4841		adapter->stats.o2bspc += rd32(E1000_O2BSPC);
4842		adapter->stats.b2ospc += rd32(E1000_B2OSPC);
4843		adapter->stats.b2ogprc += rd32(E1000_B2OGPRC);
4844	}
4845}
4846
4847static irqreturn_t igb_msix_other(int irq, void *data)
4848{
4849	struct igb_adapter *adapter = data;
4850	struct e1000_hw *hw = &adapter->hw;
4851	u32 icr = rd32(E1000_ICR);
4852	/* reading ICR causes bit 31 of EICR to be cleared */
4853
4854	if (icr & E1000_ICR_DRSTA)
4855		schedule_work(&adapter->reset_task);
4856
4857	if (icr & E1000_ICR_DOUTSYNC) {
4858		/* HW is reporting DMA is out of sync */
4859		adapter->stats.doosync++;
4860		/* The DMA Out of Sync is also indication of a spoof event
4861		 * in IOV mode. Check the Wrong VM Behavior register to
4862		 * see if it is really a spoof event. */
4863		igb_check_wvbr(adapter);
4864	}
4865
4866	/* Check for a mailbox event */
4867	if (icr & E1000_ICR_VMMB)
4868		igb_msg_task(adapter);
4869
4870	if (icr & E1000_ICR_LSC) {
4871		hw->mac.get_link_status = 1;
4872		/* guard against interrupt when we're going down */
4873		if (!test_bit(__IGB_DOWN, &adapter->state))
4874			mod_timer(&adapter->watchdog_timer, jiffies + 1);
4875	}
4876
4877#ifdef CONFIG_IGB_PTP
4878	if (icr & E1000_ICR_TS) {
4879		u32 tsicr = rd32(E1000_TSICR);
4880
4881		if (tsicr & E1000_TSICR_TXTS) {
4882			/* acknowledge the interrupt */
4883			wr32(E1000_TSICR, E1000_TSICR_TXTS);
4884			/* retrieve hardware timestamp */
4885			schedule_work(&adapter->ptp_tx_work);
4886		}
4887	}
4888#endif /* CONFIG_IGB_PTP */
4889
4890	wr32(E1000_EIMS, adapter->eims_other);
4891
4892	return IRQ_HANDLED;
4893}
4894
4895static void igb_write_itr(struct igb_q_vector *q_vector)
4896{
4897	struct igb_adapter *adapter = q_vector->adapter;
4898	u32 itr_val = q_vector->itr_val & 0x7FFC;
4899
4900	if (!q_vector->set_itr)
4901		return;
4902
4903	if (!itr_val)
4904		itr_val = 0x4;
4905
4906	if (adapter->hw.mac.type == e1000_82575)
4907		itr_val |= itr_val << 16;
4908	else
4909		itr_val |= E1000_EITR_CNT_IGNR;
4910
4911	writel(itr_val, q_vector->itr_register);
4912	q_vector->set_itr = 0;
4913}
4914
4915static irqreturn_t igb_msix_ring(int irq, void *data)
4916{
4917	struct igb_q_vector *q_vector = data;
4918
4919	/* Write the ITR value calculated from the previous interrupt. */
4920	igb_write_itr(q_vector);
4921
4922	napi_schedule(&q_vector->napi);
4923
4924	return IRQ_HANDLED;
4925}
4926
4927#ifdef CONFIG_IGB_DCA
4928static void igb_update_dca(struct igb_q_vector *q_vector)
4929{
4930	struct igb_adapter *adapter = q_vector->adapter;
4931	struct e1000_hw *hw = &adapter->hw;
4932	int cpu = get_cpu();
4933
4934	if (q_vector->cpu == cpu)
4935		goto out_no_update;
4936
4937	if (q_vector->tx.ring) {
4938		int q = q_vector->tx.ring->reg_idx;
4939		u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4940		if (hw->mac.type == e1000_82575) {
4941			dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4942			dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4943		} else {
4944			dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4945			dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4946			              E1000_DCA_TXCTRL_CPUID_SHIFT;
4947		}
4948		dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4949		wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4950	}
4951	if (q_vector->rx.ring) {
4952		int q = q_vector->rx.ring->reg_idx;
4953		u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4954		if (hw->mac.type == e1000_82575) {
4955			dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4956			dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4957		} else {
4958			dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4959			dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4960			              E1000_DCA_RXCTRL_CPUID_SHIFT;
4961		}
4962		dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4963		dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4964		dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4965		wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4966	}
4967	q_vector->cpu = cpu;
4968out_no_update:
4969	put_cpu();
4970}
4971
4972static void igb_setup_dca(struct igb_adapter *adapter)
4973{
4974	struct e1000_hw *hw = &adapter->hw;
4975	int i;
4976
4977	if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4978		return;
4979
4980	/* Always use CB2 mode, difference is masked in the CB driver. */
4981	wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4982
4983	for (i = 0; i < adapter->num_q_vectors; i++) {
4984		adapter->q_vector[i]->cpu = -1;
4985		igb_update_dca(adapter->q_vector[i]);
4986	}
4987}
4988
4989static int __igb_notify_dca(struct device *dev, void *data)
4990{
4991	struct net_device *netdev = dev_get_drvdata(dev);
4992	struct igb_adapter *adapter = netdev_priv(netdev);
4993	struct pci_dev *pdev = adapter->pdev;
4994	struct e1000_hw *hw = &adapter->hw;
4995	unsigned long event = *(unsigned long *)data;
4996
4997	switch (event) {
4998	case DCA_PROVIDER_ADD:
4999		/* if already enabled, don't do it again */
5000		if (adapter->flags & IGB_FLAG_DCA_ENABLED)
5001			break;
5002		if (dca_add_requester(dev) == 0) {
5003			adapter->flags |= IGB_FLAG_DCA_ENABLED;
5004			dev_info(&pdev->dev, "DCA enabled\n");
5005			igb_setup_dca(adapter);
5006			break;
5007		}
5008		/* Fall Through since DCA is disabled. */
5009	case DCA_PROVIDER_REMOVE:
5010		if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
5011			/* without this a class_device is left
5012			 * hanging around in the sysfs model */
5013			dca_remove_requester(dev);
5014			dev_info(&pdev->dev, "DCA disabled\n");
5015			adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
5016			wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
5017		}
5018		break;
5019	}
5020
5021	return 0;
5022}
5023
5024static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
5025                          void *p)
5026{
5027	int ret_val;
5028
5029	ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
5030	                                 __igb_notify_dca);
5031
5032	return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
5033}
5034#endif /* CONFIG_IGB_DCA */
5035
5036#ifdef CONFIG_PCI_IOV
5037static int igb_vf_configure(struct igb_adapter *adapter, int vf)
5038{
5039	unsigned char mac_addr[ETH_ALEN];
5040	struct pci_dev *pdev = adapter->pdev;
5041	struct e1000_hw *hw = &adapter->hw;
5042	struct pci_dev *pvfdev;
5043	unsigned int device_id;
5044	u16 thisvf_devfn;
5045
5046	eth_random_addr(mac_addr);
5047	igb_set_vf_mac(adapter, vf, mac_addr);
5048
5049	switch (adapter->hw.mac.type) {
5050	case e1000_82576:
5051		device_id = IGB_82576_VF_DEV_ID;
5052		/* VF Stride for 82576 is 2 */
5053		thisvf_devfn = (pdev->devfn + 0x80 + (vf << 1)) |
5054			(pdev->devfn & 1);
5055		break;
5056	case e1000_i350:
5057		device_id = IGB_I350_VF_DEV_ID;
5058		/* VF Stride for I350 is 4 */
5059		thisvf_devfn = (pdev->devfn + 0x80 + (vf << 2)) |
5060				(pdev->devfn & 3);
5061		break;
5062	default:
5063		device_id = 0;
5064		thisvf_devfn = 0;
5065		break;
5066	}
5067
5068	pvfdev = pci_get_device(hw->vendor_id, device_id, NULL);
5069	while (pvfdev) {
5070		if (pvfdev->devfn == thisvf_devfn)
5071			break;
5072		pvfdev = pci_get_device(hw->vendor_id,
5073					device_id, pvfdev);
5074	}
5075
5076	if (pvfdev)
5077		adapter->vf_data[vf].vfdev = pvfdev;
5078	else
5079		dev_err(&pdev->dev,
5080			"Couldn't find pci dev ptr for VF %4.4x\n",
5081			thisvf_devfn);
5082	return pvfdev != NULL;
5083}
5084
5085static int igb_find_enabled_vfs(struct igb_adapter *adapter)
5086{
5087	struct e1000_hw *hw = &adapter->hw;
5088	struct pci_dev *pdev = adapter->pdev;
5089	struct pci_dev *pvfdev;
5090	u16 vf_devfn = 0;
5091	u16 vf_stride;
5092	unsigned int device_id;
5093	int vfs_found = 0;
5094
5095	switch (adapter->hw.mac.type) {
5096	case e1000_82576:
5097		device_id = IGB_82576_VF_DEV_ID;
5098		/* VF Stride for 82576 is 2 */
5099		vf_stride = 2;
5100		break;
5101	case e1000_i350:
5102		device_id = IGB_I350_VF_DEV_ID;
5103		/* VF Stride for I350 is 4 */
5104		vf_stride = 4;
5105		break;
5106	default:
5107		device_id = 0;
5108		vf_stride = 0;
5109		break;
5110	}
5111
5112	vf_devfn = pdev->devfn + 0x80;
5113	pvfdev = pci_get_device(hw->vendor_id, device_id, NULL);
5114	while (pvfdev) {
5115		if (pvfdev->devfn == vf_devfn &&
5116		    (pvfdev->bus->number >= pdev->bus->number))
5117			vfs_found++;
5118		vf_devfn += vf_stride;
5119		pvfdev = pci_get_device(hw->vendor_id,
5120					device_id, pvfdev);
5121	}
5122
5123	return vfs_found;
5124}
5125
5126static int igb_check_vf_assignment(struct igb_adapter *adapter)
5127{
5128	int i;
5129	for (i = 0; i < adapter->vfs_allocated_count; i++) {
5130		if (adapter->vf_data[i].vfdev) {
5131			if (adapter->vf_data[i].vfdev->dev_flags &
5132			    PCI_DEV_FLAGS_ASSIGNED)
5133				return true;
5134		}
5135	}
5136	return false;
5137}
5138
5139#endif
5140static void igb_ping_all_vfs(struct igb_adapter *adapter)
5141{
5142	struct e1000_hw *hw = &adapter->hw;
5143	u32 ping;
5144	int i;
5145
5146	for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
5147		ping = E1000_PF_CONTROL_MSG;
5148		if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
5149			ping |= E1000_VT_MSGTYPE_CTS;
5150		igb_write_mbx(hw, &ping, 1, i);
5151	}
5152}
5153
5154static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5155{
5156	struct e1000_hw *hw = &adapter->hw;
5157	u32 vmolr = rd32(E1000_VMOLR(vf));
5158	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5159
5160	vf_data->flags &= ~(IGB_VF_FLAG_UNI_PROMISC |
5161	                    IGB_VF_FLAG_MULTI_PROMISC);
5162	vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
5163
5164	if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
5165		vmolr |= E1000_VMOLR_MPME;
5166		vf_data->flags |= IGB_VF_FLAG_MULTI_PROMISC;
5167		*msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
5168	} else {
5169		/*
5170		 * if we have hashes and we are clearing a multicast promisc
5171		 * flag we need to write the hashes to the MTA as this step
5172		 * was previously skipped
5173		 */
5174		if (vf_data->num_vf_mc_hashes > 30) {
5175			vmolr |= E1000_VMOLR_MPME;
5176		} else if (vf_data->num_vf_mc_hashes) {
5177			int j;
5178			vmolr |= E1000_VMOLR_ROMPE;
5179			for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
5180				igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
5181		}
5182	}
5183
5184	wr32(E1000_VMOLR(vf), vmolr);
5185
5186	/* there are flags left unprocessed, likely not supported */
5187	if (*msgbuf & E1000_VT_MSGINFO_MASK)
5188		return -EINVAL;
5189
5190	return 0;
5191
5192}
5193
5194static int igb_set_vf_multicasts(struct igb_adapter *adapter,
5195				  u32 *msgbuf, u32 vf)
5196{
5197	int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5198	u16 *hash_list = (u16 *)&msgbuf[1];
5199	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5200	int i;
5201
5202	/* salt away the number of multicast addresses assigned
5203	 * to this VF for later use to restore when the PF multi cast
5204	 * list changes
5205	 */
5206	vf_data->num_vf_mc_hashes = n;
5207
5208	/* only up to 30 hash values supported */
5209	if (n > 30)
5210		n = 30;
5211
5212	/* store the hashes for later use */
5213	for (i = 0; i < n; i++)
5214		vf_data->vf_mc_hashes[i] = hash_list[i];
5215
5216	/* Flush and reset the mta with the new values */
5217	igb_set_rx_mode(adapter->netdev);
5218
5219	return 0;
5220}
5221
5222static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
5223{
5224	struct e1000_hw *hw = &adapter->hw;
5225	struct vf_data_storage *vf_data;
5226	int i, j;
5227
5228	for (i = 0; i < adapter->vfs_allocated_count; i++) {
5229		u32 vmolr = rd32(E1000_VMOLR(i));
5230		vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
5231
5232		vf_data = &adapter->vf_data[i];
5233
5234		if ((vf_data->num_vf_mc_hashes > 30) ||
5235		    (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
5236			vmolr |= E1000_VMOLR_MPME;
5237		} else if (vf_data->num_vf_mc_hashes) {
5238			vmolr |= E1000_VMOLR_ROMPE;
5239			for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
5240				igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
5241		}
5242		wr32(E1000_VMOLR(i), vmolr);
5243	}
5244}
5245
5246static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
5247{
5248	struct e1000_hw *hw = &adapter->hw;
5249	u32 pool_mask, reg, vid;
5250	int i;
5251
5252	pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5253
5254	/* Find the vlan filter for this id */
5255	for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5256		reg = rd32(E1000_VLVF(i));
5257
5258		/* remove the vf from the pool */
5259		reg &= ~pool_mask;
5260
5261		/* if pool is empty then remove entry from vfta */
5262		if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
5263		    (reg & E1000_VLVF_VLANID_ENABLE)) {
5264			reg = 0;
5265			vid = reg & E1000_VLVF_VLANID_MASK;
5266			igb_vfta_set(hw, vid, false);
5267		}
5268
5269		wr32(E1000_VLVF(i), reg);
5270	}
5271
5272	adapter->vf_data[vf].vlans_enabled = 0;
5273}
5274
5275static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
5276{
5277	struct e1000_hw *hw = &adapter->hw;
5278	u32 reg, i;
5279
5280	/* The vlvf table only exists on 82576 hardware and newer */
5281	if (hw->mac.type < e1000_82576)
5282		return -1;
5283
5284	/* we only need to do this if VMDq is enabled */
5285	if (!adapter->vfs_allocated_count)
5286		return -1;
5287
5288	/* Find the vlan filter for this id */
5289	for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5290		reg = rd32(E1000_VLVF(i));
5291		if ((reg & E1000_VLVF_VLANID_ENABLE) &&
5292		    vid == (reg & E1000_VLVF_VLANID_MASK))
5293			break;
5294	}
5295
5296	if (add) {
5297		if (i == E1000_VLVF_ARRAY_SIZE) {
5298			/* Did not find a matching VLAN ID entry that was
5299			 * enabled.  Search for a free filter entry, i.e.
5300			 * one without the enable bit set
5301			 */
5302			for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5303				reg = rd32(E1000_VLVF(i));
5304				if (!(reg & E1000_VLVF_VLANID_ENABLE))
5305					break;
5306			}
5307		}
5308		if (i < E1000_VLVF_ARRAY_SIZE) {
5309			/* Found an enabled/available entry */
5310			reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5311
5312			/* if !enabled we need to set this up in vfta */
5313			if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
5314				/* add VID to filter table */
5315				igb_vfta_set(hw, vid, true);
5316				reg |= E1000_VLVF_VLANID_ENABLE;
5317			}
5318			reg &= ~E1000_VLVF_VLANID_MASK;
5319			reg |= vid;
5320			wr32(E1000_VLVF(i), reg);
5321
5322			/* do not modify RLPML for PF devices */
5323			if (vf >= adapter->vfs_allocated_count)
5324				return 0;
5325
5326			if (!adapter->vf_data[vf].vlans_enabled) {
5327				u32 size;
5328				reg = rd32(E1000_VMOLR(vf));
5329				size = reg & E1000_VMOLR_RLPML_MASK;
5330				size += 4;
5331				reg &= ~E1000_VMOLR_RLPML_MASK;
5332				reg |= size;
5333				wr32(E1000_VMOLR(vf), reg);
5334			}
5335
5336			adapter->vf_data[vf].vlans_enabled++;
5337		}
5338	} else {
5339		if (i < E1000_VLVF_ARRAY_SIZE) {
5340			/* remove vf from the pool */
5341			reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
5342			/* if pool is empty then remove entry from vfta */
5343			if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
5344				reg = 0;
5345				igb_vfta_set(hw, vid, false);
5346			}
5347			wr32(E1000_VLVF(i), reg);
5348
5349			/* do not modify RLPML for PF devices */
5350			if (vf >= adapter->vfs_allocated_count)
5351				return 0;
5352
5353			adapter->vf_data[vf].vlans_enabled--;
5354			if (!adapter->vf_data[vf].vlans_enabled) {
5355				u32 size;
5356				reg = rd32(E1000_VMOLR(vf));
5357				size = reg & E1000_VMOLR_RLPML_MASK;
5358				size -= 4;
5359				reg &= ~E1000_VMOLR_RLPML_MASK;
5360				reg |= size;
5361				wr32(E1000_VMOLR(vf), reg);
5362			}
5363		}
5364	}
5365	return 0;
5366}
5367
5368static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
5369{
5370	struct e1000_hw *hw = &adapter->hw;
5371
5372	if (vid)
5373		wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
5374	else
5375		wr32(E1000_VMVIR(vf), 0);
5376}
5377
5378static int igb_ndo_set_vf_vlan(struct net_device *netdev,
5379			       int vf, u16 vlan, u8 qos)
5380{
5381	int err = 0;
5382	struct igb_adapter *adapter = netdev_priv(netdev);
5383
5384	if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
5385		return -EINVAL;
5386	if (vlan || qos) {
5387		err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
5388		if (err)
5389			goto out;
5390		igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
5391		igb_set_vmolr(adapter, vf, !vlan);
5392		adapter->vf_data[vf].pf_vlan = vlan;
5393		adapter->vf_data[vf].pf_qos = qos;
5394		dev_info(&adapter->pdev->dev,
5395			 "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
5396		if (test_bit(__IGB_DOWN, &adapter->state)) {
5397			dev_warn(&adapter->pdev->dev,
5398				 "The VF VLAN has been set,"
5399				 " but the PF device is not up.\n");
5400			dev_warn(&adapter->pdev->dev,
5401				 "Bring the PF device up before"
5402				 " attempting to use the VF device.\n");
5403		}
5404	} else {
5405		igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
5406				   false, vf);
5407		igb_set_vmvir(adapter, vlan, vf);
5408		igb_set_vmolr(adapter, vf, true);
5409		adapter->vf_data[vf].pf_vlan = 0;
5410		adapter->vf_data[vf].pf_qos = 0;
5411       }
5412out:
5413       return err;
5414}
5415
5416static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5417{
5418	int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5419	int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
5420
5421	return igb_vlvf_set(adapter, vid, add, vf);
5422}
5423
5424static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
5425{
5426	/* clear flags - except flag that indicates PF has set the MAC */
5427	adapter->vf_data[vf].flags &= IGB_VF_FLAG_PF_SET_MAC;
5428	adapter->vf_data[vf].last_nack = jiffies;
5429
5430	/* reset offloads to defaults */
5431	igb_set_vmolr(adapter, vf, true);
5432
5433	/* reset vlans for device */
5434	igb_clear_vf_vfta(adapter, vf);
5435	if (adapter->vf_data[vf].pf_vlan)
5436		igb_ndo_set_vf_vlan(adapter->netdev, vf,
5437				    adapter->vf_data[vf].pf_vlan,
5438				    adapter->vf_data[vf].pf_qos);
5439	else
5440		igb_clear_vf_vfta(adapter, vf);
5441
5442	/* reset multicast table array for vf */
5443	adapter->vf_data[vf].num_vf_mc_hashes = 0;
5444
5445	/* Flush and reset the mta with the new values */
5446	igb_set_rx_mode(adapter->netdev);
5447}
5448
5449static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
5450{
5451	unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5452
5453	/* generate a new mac address as we were hotplug removed/added */
5454	if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
5455		eth_random_addr(vf_mac);
5456
5457	/* process remaining reset events */
5458	igb_vf_reset(adapter, vf);
5459}
5460
5461static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
5462{
5463	struct e1000_hw *hw = &adapter->hw;
5464	unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5465	int rar_entry = hw->mac.rar_entry_count - (vf + 1);
5466	u32 reg, msgbuf[3];
5467	u8 *addr = (u8 *)(&msgbuf[1]);
5468
5469	/* process all the same items cleared in a function level reset */
5470	igb_vf_reset(adapter, vf);
5471
5472	/* set vf mac address */
5473	igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
5474
5475	/* enable transmit and receive for vf */
5476	reg = rd32(E1000_VFTE);
5477	wr32(E1000_VFTE, reg | (1 << vf));
5478	reg = rd32(E1000_VFRE);
5479	wr32(E1000_VFRE, reg | (1 << vf));
5480
5481	adapter->vf_data[vf].flags |= IGB_VF_FLAG_CTS;
5482
5483	/* reply to reset with ack and vf mac address */
5484	msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
5485	memcpy(addr, vf_mac, 6);
5486	igb_write_mbx(hw, msgbuf, 3, vf);
5487}
5488
5489static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
5490{
5491	/*
5492	 * The VF MAC Address is stored in a packed array of bytes
5493	 * starting at the second 32 bit word of the msg array
5494	 */
5495	unsigned char *addr = (char *)&msg[1];
5496	int err = -1;
5497
5498	if (is_valid_ether_addr(addr))
5499		err = igb_set_vf_mac(adapter, vf, addr);
5500
5501	return err;
5502}
5503
5504static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
5505{
5506	struct e1000_hw *hw = &adapter->hw;
5507	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5508	u32 msg = E1000_VT_MSGTYPE_NACK;
5509
5510	/* if device isn't clear to send it shouldn't be reading either */
5511	if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
5512	    time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
5513		igb_write_mbx(hw, &msg, 1, vf);
5514		vf_data->last_nack = jiffies;
5515	}
5516}
5517
5518static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
5519{
5520	struct pci_dev *pdev = adapter->pdev;
5521	u32 msgbuf[E1000_VFMAILBOX_SIZE];
5522	struct e1000_hw *hw = &adapter->hw;
5523	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5524	s32 retval;
5525
5526	retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
5527
5528	if (retval) {
5529		/* if receive failed revoke VF CTS stats and restart init */
5530		dev_err(&pdev->dev, "Error receiving message from VF\n");
5531		vf_data->flags &= ~IGB_VF_FLAG_CTS;
5532		if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5533			return;
5534		goto out;
5535	}
5536
5537	/* this is a message we already processed, do nothing */
5538	if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
5539		return;
5540
5541	/*
5542	 * until the vf completes a reset it should not be
5543	 * allowed to start any configuration.
5544	 */
5545
5546	if (msgbuf[0] == E1000_VF_RESET) {
5547		igb_vf_reset_msg(adapter, vf);
5548		return;
5549	}
5550
5551	if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
5552		if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5553			return;
5554		retval = -1;
5555		goto out;
5556	}
5557
5558	switch ((msgbuf[0] & 0xFFFF)) {
5559	case E1000_VF_SET_MAC_ADDR:
5560		retval = -EINVAL;
5561		if (!(vf_data->flags & IGB_VF_FLAG_PF_SET_MAC))
5562			retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
5563		else
5564			dev_warn(&pdev->dev,
5565				 "VF %d attempted to override administratively "
5566				 "set MAC address\nReload the VF driver to "
5567				 "resume operations\n", vf);
5568		break;
5569	case E1000_VF_SET_PROMISC:
5570		retval = igb_set_vf_promisc(adapter, msgbuf, vf);
5571		break;
5572	case E1000_VF_SET_MULTICAST:
5573		retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
5574		break;
5575	case E1000_VF_SET_LPE:
5576		retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
5577		break;
5578	case E1000_VF_SET_VLAN:
5579		retval = -1;
5580		if (vf_data->pf_vlan)
5581			dev_warn(&pdev->dev,
5582				 "VF %d attempted to override administratively "
5583				 "set VLAN tag\nReload the VF driver to "
5584				 "resume operations\n", vf);
5585		else
5586			retval = igb_set_vf_vlan(adapter, msgbuf, vf);
5587		break;
5588	default:
5589		dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
5590		retval = -1;
5591		break;
5592	}
5593
5594	msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
5595out:
5596	/* notify the VF of the results of what it sent us */
5597	if (retval)
5598		msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
5599	else
5600		msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
5601
5602	igb_write_mbx(hw, msgbuf, 1, vf);
5603}
5604
5605static void igb_msg_task(struct igb_adapter *adapter)
5606{
5607	struct e1000_hw *hw = &adapter->hw;
5608	u32 vf;
5609
5610	for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
5611		/* process any reset requests */
5612		if (!igb_check_for_rst(hw, vf))
5613			igb_vf_reset_event(adapter, vf);
5614
5615		/* process any messages pending */
5616		if (!igb_check_for_msg(hw, vf))
5617			igb_rcv_msg_from_vf(adapter, vf);
5618
5619		/* process any acks */
5620		if (!igb_check_for_ack(hw, vf))
5621			igb_rcv_ack_from_vf(adapter, vf);
5622	}
5623}
5624
5625/**
5626 *  igb_set_uta - Set unicast filter table address
5627 *  @adapter: board private structure
5628 *
5629 *  The unicast table address is a register array of 32-bit registers.
5630 *  The table is meant to be used in a way similar to how the MTA is used
5631 *  however due to certain limitations in the hardware it is necessary to
5632 *  set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscuous
5633 *  enable bit to allow vlan tag stripping when promiscuous mode is enabled
5634 **/
5635static void igb_set_uta(struct igb_adapter *adapter)
5636{
5637	struct e1000_hw *hw = &adapter->hw;
5638	int i;
5639
5640	/* The UTA table only exists on 82576 hardware and newer */
5641	if (hw->mac.type < e1000_82576)
5642		return;
5643
5644	/* we only need to do this if VMDq is enabled */
5645	if (!adapter->vfs_allocated_count)
5646		return;
5647
5648	for (i = 0; i < hw->mac.uta_reg_count; i++)
5649		array_wr32(E1000_UTA, i, ~0);
5650}
5651
5652/**
5653 * igb_intr_msi - Interrupt Handler
5654 * @irq: interrupt number
5655 * @data: pointer to a network interface device structure
5656 **/
5657static irqreturn_t igb_intr_msi(int irq, void *data)
5658{
5659	struct igb_adapter *adapter = data;
5660	struct igb_q_vector *q_vector = adapter->q_vector[0];
5661	struct e1000_hw *hw = &adapter->hw;
5662	/* read ICR disables interrupts using IAM */
5663	u32 icr = rd32(E1000_ICR);
5664
5665	igb_write_itr(q_vector);
5666
5667	if (icr & E1000_ICR_DRSTA)
5668		schedule_work(&adapter->reset_task);
5669
5670	if (icr & E1000_ICR_DOUTSYNC) {
5671		/* HW is reporting DMA is out of sync */
5672		adapter->stats.doosync++;
5673	}
5674
5675	if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5676		hw->mac.get_link_status = 1;
5677		if (!test_bit(__IGB_DOWN, &adapter->state))
5678			mod_timer(&adapter->watchdog_timer, jiffies + 1);
5679	}
5680
5681#ifdef CONFIG_IGB_PTP
5682	if (icr & E1000_ICR_TS) {
5683		u32 tsicr = rd32(E1000_TSICR);
5684
5685		if (tsicr & E1000_TSICR_TXTS) {
5686			/* acknowledge the interrupt */
5687			wr32(E1000_TSICR, E1000_TSICR_TXTS);
5688			/* retrieve hardware timestamp */
5689			schedule_work(&adapter->ptp_tx_work);
5690		}
5691	}
5692#endif /* CONFIG_IGB_PTP */
5693
5694	napi_schedule(&q_vector->napi);
5695
5696	return IRQ_HANDLED;
5697}
5698
5699/**
5700 * igb_intr - Legacy Interrupt Handler
5701 * @irq: interrupt number
5702 * @data: pointer to a network interface device structure
5703 **/
5704static irqreturn_t igb_intr(int irq, void *data)
5705{
5706	struct igb_adapter *adapter = data;
5707	struct igb_q_vector *q_vector = adapter->q_vector[0];
5708	struct e1000_hw *hw = &adapter->hw;
5709	/* Interrupt Auto-Mask...upon reading ICR, interrupts are masked.  No
5710	 * need for the IMC write */
5711	u32 icr = rd32(E1000_ICR);
5712
5713	/* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
5714	 * not set, then the adapter didn't send an interrupt */
5715	if (!(icr & E1000_ICR_INT_ASSERTED))
5716		return IRQ_NONE;
5717
5718	igb_write_itr(q_vector);
5719
5720	if (icr & E1000_ICR_DRSTA)
5721		schedule_work(&adapter->reset_task);
5722
5723	if (icr & E1000_ICR_DOUTSYNC) {
5724		/* HW is reporting DMA is out of sync */
5725		adapter->stats.doosync++;
5726	}
5727
5728	if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5729		hw->mac.get_link_status = 1;
5730		/* guard against interrupt when we're going down */
5731		if (!test_bit(__IGB_DOWN, &adapter->state))
5732			mod_timer(&adapter->watchdog_timer, jiffies + 1);
5733	}
5734
5735#ifdef CONFIG_IGB_PTP
5736	if (icr & E1000_ICR_TS) {
5737		u32 tsicr = rd32(E1000_TSICR);
5738
5739		if (tsicr & E1000_TSICR_TXTS) {
5740			/* acknowledge the interrupt */
5741			wr32(E1000_TSICR, E1000_TSICR_TXTS);
5742			/* retrieve hardware timestamp */
5743			schedule_work(&adapter->ptp_tx_work);
5744		}
5745	}
5746#endif /* CONFIG_IGB_PTP */
5747
5748	napi_schedule(&q_vector->napi);
5749
5750	return IRQ_HANDLED;
5751}
5752
5753static void igb_ring_irq_enable(struct igb_q_vector *q_vector)
5754{
5755	struct igb_adapter *adapter = q_vector->adapter;
5756	struct e1000_hw *hw = &adapter->hw;
5757
5758	if ((q_vector->rx.ring && (adapter->rx_itr_setting & 3)) ||
5759	    (!q_vector->rx.ring && (adapter->tx_itr_setting & 3))) {
5760		if ((adapter->num_q_vectors == 1) && !adapter->vf_data)
5761			igb_set_itr(q_vector);
5762		else
5763			igb_update_ring_itr(q_vector);
5764	}
5765
5766	if (!test_bit(__IGB_DOWN, &adapter->state)) {
5767		if (adapter->msix_entries)
5768			wr32(E1000_EIMS, q_vector->eims_value);
5769		else
5770			igb_irq_enable(adapter);
5771	}
5772}
5773
5774/**
5775 * igb_poll - NAPI Rx polling callback
5776 * @napi: napi polling structure
5777 * @budget: count of how many packets we should handle
5778 **/
5779static int igb_poll(struct napi_struct *napi, int budget)
5780{
5781	struct igb_q_vector *q_vector = container_of(napi,
5782	                                             struct igb_q_vector,
5783	                                             napi);
5784	bool clean_complete = true;
5785
5786#ifdef CONFIG_IGB_DCA
5787	if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
5788		igb_update_dca(q_vector);
5789#endif
5790	if (q_vector->tx.ring)
5791		clean_complete = igb_clean_tx_irq(q_vector);
5792
5793	if (q_vector->rx.ring)
5794		clean_complete &= igb_clean_rx_irq(q_vector, budget);
5795
5796	/* If all work not completed, return budget and keep polling */
5797	if (!clean_complete)
5798		return budget;
5799
5800	/* If not enough Rx work done, exit the polling mode */
5801	napi_complete(napi);
5802	igb_ring_irq_enable(q_vector);
5803
5804	return 0;
5805}
5806
5807/**
5808 * igb_clean_tx_irq - Reclaim resources after transmit completes
5809 * @q_vector: pointer to q_vector containing needed info
5810 *
5811 * returns true if ring is completely cleaned
5812 **/
5813static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
5814{
5815	struct igb_adapter *adapter = q_vector->adapter;
5816	struct igb_ring *tx_ring = q_vector->tx.ring;
5817	struct igb_tx_buffer *tx_buffer;
5818	union e1000_adv_tx_desc *tx_desc, *eop_desc;
5819	unsigned int total_bytes = 0, total_packets = 0;
5820	unsigned int budget = q_vector->tx.work_limit;
5821	unsigned int i = tx_ring->next_to_clean;
5822
5823	if (test_bit(__IGB_DOWN, &adapter->state))
5824		return true;
5825
5826	tx_buffer = &tx_ring->tx_buffer_info[i];
5827	tx_desc = IGB_TX_DESC(tx_ring, i);
5828	i -= tx_ring->count;
5829
5830	for (; budget; budget--) {
5831		eop_desc = tx_buffer->next_to_watch;
5832
5833		/* prevent any other reads prior to eop_desc */
5834		rmb();
5835
5836		/* if next_to_watch is not set then there is no work pending */
5837		if (!eop_desc)
5838			break;
5839
5840		/* if DD is not set pending work has not been completed */
5841		if (!(eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)))
5842			break;
5843
5844		/* clear next_to_watch to prevent false hangs */
5845		tx_buffer->next_to_watch = NULL;
5846
5847		/* update the statistics for this packet */
5848		total_bytes += tx_buffer->bytecount;
5849		total_packets += tx_buffer->gso_segs;
5850
5851		/* free the skb */
5852		dev_kfree_skb_any(tx_buffer->skb);
5853		tx_buffer->skb = NULL;
5854
5855		/* unmap skb header data */
5856		dma_unmap_single(tx_ring->dev,
5857				 tx_buffer->dma,
5858				 tx_buffer->length,
5859				 DMA_TO_DEVICE);
5860
5861		/* clear last DMA location and unmap remaining buffers */
5862		while (tx_desc != eop_desc) {
5863			tx_buffer->dma = 0;
5864
5865			tx_buffer++;
5866			tx_desc++;
5867			i++;
5868			if (unlikely(!i)) {
5869				i -= tx_ring->count;
5870				tx_buffer = tx_ring->tx_buffer_info;
5871				tx_desc = IGB_TX_DESC(tx_ring, 0);
5872			}
5873
5874			/* unmap any remaining paged data */
5875			if (tx_buffer->dma) {
5876				dma_unmap_page(tx_ring->dev,
5877					       tx_buffer->dma,
5878					       tx_buffer->length,
5879					       DMA_TO_DEVICE);
5880			}
5881		}
5882
5883		/* clear last DMA location */
5884		tx_buffer->dma = 0;
5885
5886		/* move us one more past the eop_desc for start of next pkt */
5887		tx_buffer++;
5888		tx_desc++;
5889		i++;
5890		if (unlikely(!i)) {
5891			i -= tx_ring->count;
5892			tx_buffer = tx_ring->tx_buffer_info;
5893			tx_desc = IGB_TX_DESC(tx_ring, 0);
5894		}
5895	}
5896
5897	netdev_tx_completed_queue(txring_txq(tx_ring),
5898				  total_packets, total_bytes);
5899	i += tx_ring->count;
5900	tx_ring->next_to_clean = i;
5901	u64_stats_update_begin(&tx_ring->tx_syncp);
5902	tx_ring->tx_stats.bytes += total_bytes;
5903	tx_ring->tx_stats.packets += total_packets;
5904	u64_stats_update_end(&tx_ring->tx_syncp);
5905	q_vector->tx.total_bytes += total_bytes;
5906	q_vector->tx.total_packets += total_packets;
5907
5908	if (test_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags)) {
5909		struct e1000_hw *hw = &adapter->hw;
5910
5911		eop_desc = tx_buffer->next_to_watch;
5912
5913		/* Detect a transmit hang in hardware, this serializes the
5914		 * check with the clearing of time_stamp and movement of i */
5915		clear_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
5916		if (eop_desc &&
5917		    time_after(jiffies, tx_buffer->time_stamp +
5918			       (adapter->tx_timeout_factor * HZ)) &&
5919		    !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
5920
5921			/* detected Tx unit hang */
5922			dev_err(tx_ring->dev,
5923				"Detected Tx Unit Hang\n"
5924				"  Tx Queue             <%d>\n"
5925				"  TDH                  <%x>\n"
5926				"  TDT                  <%x>\n"
5927				"  next_to_use          <%x>\n"
5928				"  next_to_clean        <%x>\n"
5929				"buffer_info[next_to_clean]\n"
5930				"  time_stamp           <%lx>\n"
5931				"  next_to_watch        <%p>\n"
5932				"  jiffies              <%lx>\n"
5933				"  desc.status          <%x>\n",
5934				tx_ring->queue_index,
5935				rd32(E1000_TDH(tx_ring->reg_idx)),
5936				readl(tx_ring->tail),
5937				tx_ring->next_to_use,
5938				tx_ring->next_to_clean,
5939				tx_buffer->time_stamp,
5940				eop_desc,
5941				jiffies,
5942				eop_desc->wb.status);
5943			netif_stop_subqueue(tx_ring->netdev,
5944					    tx_ring->queue_index);
5945
5946			/* we are about to reset, no point in enabling stuff */
5947			return true;
5948		}
5949	}
5950
5951	if (unlikely(total_packets &&
5952		     netif_carrier_ok(tx_ring->netdev) &&
5953		     igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
5954		/* Make sure that anybody stopping the queue after this
5955		 * sees the new next_to_clean.
5956		 */
5957		smp_mb();
5958		if (__netif_subqueue_stopped(tx_ring->netdev,
5959					     tx_ring->queue_index) &&
5960		    !(test_bit(__IGB_DOWN, &adapter->state))) {
5961			netif_wake_subqueue(tx_ring->netdev,
5962					    tx_ring->queue_index);
5963
5964			u64_stats_update_begin(&tx_ring->tx_syncp);
5965			tx_ring->tx_stats.restart_queue++;
5966			u64_stats_update_end(&tx_ring->tx_syncp);
5967		}
5968	}
5969
5970	return !!budget;
5971}
5972
5973static inline void igb_rx_checksum(struct igb_ring *ring,
5974				   union e1000_adv_rx_desc *rx_desc,
5975				   struct sk_buff *skb)
5976{
5977	skb_checksum_none_assert(skb);
5978
5979	/* Ignore Checksum bit is set */
5980	if (igb_test_staterr(rx_desc, E1000_RXD_STAT_IXSM))
5981		return;
5982
5983	/* Rx checksum disabled via ethtool */
5984	if (!(ring->netdev->features & NETIF_F_RXCSUM))
5985		return;
5986
5987	/* TCP/UDP checksum error bit is set */
5988	if (igb_test_staterr(rx_desc,
5989			     E1000_RXDEXT_STATERR_TCPE |
5990			     E1000_RXDEXT_STATERR_IPE)) {
5991		/*
5992		 * work around errata with sctp packets where the TCPE aka
5993		 * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
5994		 * packets, (aka let the stack check the crc32c)
5995		 */
5996		if (!((skb->len == 60) &&
5997		      test_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags))) {
5998			u64_stats_update_begin(&ring->rx_syncp);
5999			ring->rx_stats.csum_err++;
6000			u64_stats_update_end(&ring->rx_syncp);
6001		}
6002		/* let the stack verify checksum errors */
6003		return;
6004	}
6005	/* It must be a TCP or UDP packet with a valid checksum */
6006	if (igb_test_staterr(rx_desc, E1000_RXD_STAT_TCPCS |
6007				      E1000_RXD_STAT_UDPCS))
6008		skb->ip_summed = CHECKSUM_UNNECESSARY;
6009
6010	dev_dbg(ring->dev, "cksum success: bits %08X\n",
6011		le32_to_cpu(rx_desc->wb.upper.status_error));
6012}
6013
6014static inline void igb_rx_hash(struct igb_ring *ring,
6015			       union e1000_adv_rx_desc *rx_desc,
6016			       struct sk_buff *skb)
6017{
6018	if (ring->netdev->features & NETIF_F_RXHASH)
6019		skb->rxhash = le32_to_cpu(rx_desc->wb.lower.hi_dword.rss);
6020}
6021
6022static void igb_rx_vlan(struct igb_ring *ring,
6023			union e1000_adv_rx_desc *rx_desc,
6024			struct sk_buff *skb)
6025{
6026	if (igb_test_staterr(rx_desc, E1000_RXD_STAT_VP)) {
6027		u16 vid;
6028		if (igb_test_staterr(rx_desc, E1000_RXDEXT_STATERR_LB) &&
6029		    test_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &ring->flags))
6030			vid = be16_to_cpu(rx_desc->wb.upper.vlan);
6031		else
6032			vid = le16_to_cpu(rx_desc->wb.upper.vlan);
6033
6034		__vlan_hwaccel_put_tag(skb, vid);
6035	}
6036}
6037
6038static inline u16 igb_get_hlen(union e1000_adv_rx_desc *rx_desc)
6039{
6040	/* HW will not DMA in data larger than the given buffer, even if it
6041	 * parses the (NFS, of course) header to be larger.  In that case, it
6042	 * fills the header buffer and spills the rest into the page.
6043	 */
6044	u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
6045	           E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
6046	if (hlen > IGB_RX_HDR_LEN)
6047		hlen = IGB_RX_HDR_LEN;
6048	return hlen;
6049}
6050
6051static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, int budget)
6052{
6053	struct igb_ring *rx_ring = q_vector->rx.ring;
6054	union e1000_adv_rx_desc *rx_desc;
6055	const int current_node = numa_node_id();
6056	unsigned int total_bytes = 0, total_packets = 0;
6057	u16 cleaned_count = igb_desc_unused(rx_ring);
6058	u16 i = rx_ring->next_to_clean;
6059
6060	rx_desc = IGB_RX_DESC(rx_ring, i);
6061
6062	while (igb_test_staterr(rx_desc, E1000_RXD_STAT_DD)) {
6063		struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
6064		struct sk_buff *skb = buffer_info->skb;
6065		union e1000_adv_rx_desc *next_rxd;
6066
6067		buffer_info->skb = NULL;
6068		prefetch(skb->data);
6069
6070		i++;
6071		if (i == rx_ring->count)
6072			i = 0;
6073
6074		next_rxd = IGB_RX_DESC(rx_ring, i);
6075		prefetch(next_rxd);
6076
6077		/*
6078		 * This memory barrier is needed to keep us from reading
6079		 * any other fields out of the rx_desc until we know the
6080		 * RXD_STAT_DD bit is set
6081		 */
6082		rmb();
6083
6084		if (!skb_is_nonlinear(skb)) {
6085			__skb_put(skb, igb_get_hlen(rx_desc));
6086			dma_unmap_single(rx_ring->dev, buffer_info->dma,
6087					 IGB_RX_HDR_LEN,
6088					 DMA_FROM_DEVICE);
6089			buffer_info->dma = 0;
6090		}
6091
6092		if (rx_desc->wb.upper.length) {
6093			u16 length = le16_to_cpu(rx_desc->wb.upper.length);
6094
6095			skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
6096						buffer_info->page,
6097						buffer_info->page_offset,
6098						length);
6099
6100			skb->len += length;
6101			skb->data_len += length;
6102			skb->truesize += PAGE_SIZE / 2;
6103
6104			if ((page_count(buffer_info->page) != 1) ||
6105			    (page_to_nid(buffer_info->page) != current_node))
6106				buffer_info->page = NULL;
6107			else
6108				get_page(buffer_info->page);
6109
6110			dma_unmap_page(rx_ring->dev, buffer_info->page_dma,
6111				       PAGE_SIZE / 2, DMA_FROM_DEVICE);
6112			buffer_info->page_dma = 0;
6113		}
6114
6115		if (!igb_test_staterr(rx_desc, E1000_RXD_STAT_EOP)) {
6116			struct igb_rx_buffer *next_buffer;
6117			next_buffer = &rx_ring->rx_buffer_info[i];
6118			buffer_info->skb = next_buffer->skb;
6119			buffer_info->dma = next_buffer->dma;
6120			next_buffer->skb = skb;
6121			next_buffer->dma = 0;
6122			goto next_desc;
6123		}
6124
6125		if (unlikely((igb_test_staterr(rx_desc,
6126					       E1000_RXDEXT_ERR_FRAME_ERR_MASK))
6127			     && !(rx_ring->netdev->features & NETIF_F_RXALL))) {
6128			dev_kfree_skb_any(skb);
6129			goto next_desc;
6130		}
6131
6132#ifdef CONFIG_IGB_PTP
6133		igb_ptp_rx_hwtstamp(q_vector, rx_desc, skb);
6134#endif /* CONFIG_IGB_PTP */
6135		igb_rx_hash(rx_ring, rx_desc, skb);
6136		igb_rx_checksum(rx_ring, rx_desc, skb);
6137		igb_rx_vlan(rx_ring, rx_desc, skb);
6138
6139		total_bytes += skb->len;
6140		total_packets++;
6141
6142		skb->protocol = eth_type_trans(skb, rx_ring->netdev);
6143
6144		napi_gro_receive(&q_vector->napi, skb);
6145
6146		budget--;
6147next_desc:
6148		if (!budget)
6149			break;
6150
6151		cleaned_count++;
6152		/* return some buffers to hardware, one at a time is too slow */
6153		if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
6154			igb_alloc_rx_buffers(rx_ring, cleaned_count);
6155			cleaned_count = 0;
6156		}
6157
6158		/* use prefetched values */
6159		rx_desc = next_rxd;
6160	}
6161
6162	rx_ring->next_to_clean = i;
6163	u64_stats_update_begin(&rx_ring->rx_syncp);
6164	rx_ring->rx_stats.packets += total_packets;
6165	rx_ring->rx_stats.bytes += total_bytes;
6166	u64_stats_update_end(&rx_ring->rx_syncp);
6167	q_vector->rx.total_packets += total_packets;
6168	q_vector->rx.total_bytes += total_bytes;
6169
6170	if (cleaned_count)
6171		igb_alloc_rx_buffers(rx_ring, cleaned_count);
6172
6173	return !!budget;
6174}
6175
6176static bool igb_alloc_mapped_skb(struct igb_ring *rx_ring,
6177				 struct igb_rx_buffer *bi)
6178{
6179	struct sk_buff *skb = bi->skb;
6180	dma_addr_t dma = bi->dma;
6181
6182	if (dma)
6183		return true;
6184
6185	if (likely(!skb)) {
6186		skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
6187						IGB_RX_HDR_LEN);
6188		bi->skb = skb;
6189		if (!skb) {
6190			rx_ring->rx_stats.alloc_failed++;
6191			return false;
6192		}
6193
6194		/* initialize skb for ring */
6195		skb_record_rx_queue(skb, rx_ring->queue_index);
6196	}
6197
6198	dma = dma_map_single(rx_ring->dev, skb->data,
6199			     IGB_RX_HDR_LEN, DMA_FROM_DEVICE);
6200
6201	if (dma_mapping_error(rx_ring->dev, dma)) {
6202		rx_ring->rx_stats.alloc_failed++;
6203		return false;
6204	}
6205
6206	bi->dma = dma;
6207	return true;
6208}
6209
6210static bool igb_alloc_mapped_page(struct igb_ring *rx_ring,
6211				  struct igb_rx_buffer *bi)
6212{
6213	struct page *page = bi->page;
6214	dma_addr_t page_dma = bi->page_dma;
6215	unsigned int page_offset = bi->page_offset ^ (PAGE_SIZE / 2);
6216
6217	if (page_dma)
6218		return true;
6219
6220	if (!page) {
6221		page = __skb_alloc_page(GFP_ATOMIC, bi->skb);
6222		bi->page = page;
6223		if (unlikely(!page)) {
6224			rx_ring->rx_stats.alloc_failed++;
6225			return false;
6226		}
6227	}
6228
6229	page_dma = dma_map_page(rx_ring->dev, page,
6230				page_offset, PAGE_SIZE / 2,
6231				DMA_FROM_DEVICE);
6232
6233	if (dma_mapping_error(rx_ring->dev, page_dma)) {
6234		rx_ring->rx_stats.alloc_failed++;
6235		return false;
6236	}
6237
6238	bi->page_dma = page_dma;
6239	bi->page_offset = page_offset;
6240	return true;
6241}
6242
6243/**
6244 * igb_alloc_rx_buffers - Replace used receive buffers; packet split
6245 * @adapter: address of board private structure
6246 **/
6247void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count)
6248{
6249	union e1000_adv_rx_desc *rx_desc;
6250	struct igb_rx_buffer *bi;
6251	u16 i = rx_ring->next_to_use;
6252
6253	rx_desc = IGB_RX_DESC(rx_ring, i);
6254	bi = &rx_ring->rx_buffer_info[i];
6255	i -= rx_ring->count;
6256
6257	while (cleaned_count--) {
6258		if (!igb_alloc_mapped_skb(rx_ring, bi))
6259			break;
6260
6261		/* Refresh the desc even if buffer_addrs didn't change
6262		 * because each write-back erases this info. */
6263		rx_desc->read.hdr_addr = cpu_to_le64(bi->dma);
6264
6265		if (!igb_alloc_mapped_page(rx_ring, bi))
6266			break;
6267
6268		rx_desc->read.pkt_addr = cpu_to_le64(bi->page_dma);
6269
6270		rx_desc++;
6271		bi++;
6272		i++;
6273		if (unlikely(!i)) {
6274			rx_desc = IGB_RX_DESC(rx_ring, 0);
6275			bi = rx_ring->rx_buffer_info;
6276			i -= rx_ring->count;
6277		}
6278
6279		/* clear the hdr_addr for the next_to_use descriptor */
6280		rx_desc->read.hdr_addr = 0;
6281	}
6282
6283	i += rx_ring->count;
6284
6285	if (rx_ring->next_to_use != i) {
6286		rx_ring->next_to_use = i;
6287
6288		/* Force memory writes to complete before letting h/w
6289		 * know there are new descriptors to fetch.  (Only
6290		 * applicable for weak-ordered memory model archs,
6291		 * such as IA-64). */
6292		wmb();
6293		writel(i, rx_ring->tail);
6294	}
6295}
6296
6297/**
6298 * igb_mii_ioctl -
6299 * @netdev:
6300 * @ifreq:
6301 * @cmd:
6302 **/
6303static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6304{
6305	struct igb_adapter *adapter = netdev_priv(netdev);
6306	struct mii_ioctl_data *data = if_mii(ifr);
6307
6308	if (adapter->hw.phy.media_type != e1000_media_type_copper)
6309		return -EOPNOTSUPP;
6310
6311	switch (cmd) {
6312	case SIOCGMIIPHY:
6313		data->phy_id = adapter->hw.phy.addr;
6314		break;
6315	case SIOCGMIIREG:
6316		if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
6317		                     &data->val_out))
6318			return -EIO;
6319		break;
6320	case SIOCSMIIREG:
6321	default:
6322		return -EOPNOTSUPP;
6323	}
6324	return 0;
6325}
6326
6327/**
6328 * igb_ioctl -
6329 * @netdev:
6330 * @ifreq:
6331 * @cmd:
6332 **/
6333static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6334{
6335	switch (cmd) {
6336	case SIOCGMIIPHY:
6337	case SIOCGMIIREG:
6338	case SIOCSMIIREG:
6339		return igb_mii_ioctl(netdev, ifr, cmd);
6340#ifdef CONFIG_IGB_PTP
6341	case SIOCSHWTSTAMP:
6342		return igb_ptp_hwtstamp_ioctl(netdev, ifr, cmd);
6343#endif /* CONFIG_IGB_PTP */
6344	default:
6345		return -EOPNOTSUPP;
6346	}
6347}
6348
6349s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6350{
6351	struct igb_adapter *adapter = hw->back;
6352	u16 cap_offset;
6353
6354	cap_offset = adapter->pdev->pcie_cap;
6355	if (!cap_offset)
6356		return -E1000_ERR_CONFIG;
6357
6358	pci_read_config_word(adapter->pdev, cap_offset + reg, value);
6359
6360	return 0;
6361}
6362
6363s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6364{
6365	struct igb_adapter *adapter = hw->back;
6366	u16 cap_offset;
6367
6368	cap_offset = adapter->pdev->pcie_cap;
6369	if (!cap_offset)
6370		return -E1000_ERR_CONFIG;
6371
6372	pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
6373
6374	return 0;
6375}
6376
6377static void igb_vlan_mode(struct net_device *netdev, netdev_features_t features)
6378{
6379	struct igb_adapter *adapter = netdev_priv(netdev);
6380	struct e1000_hw *hw = &adapter->hw;
6381	u32 ctrl, rctl;
6382	bool enable = !!(features & NETIF_F_HW_VLAN_RX);
6383
6384	if (enable) {
6385		/* enable VLAN tag insert/strip */
6386		ctrl = rd32(E1000_CTRL);
6387		ctrl |= E1000_CTRL_VME;
6388		wr32(E1000_CTRL, ctrl);
6389
6390		/* Disable CFI check */
6391		rctl = rd32(E1000_RCTL);
6392		rctl &= ~E1000_RCTL_CFIEN;
6393		wr32(E1000_RCTL, rctl);
6394	} else {
6395		/* disable VLAN tag insert/strip */
6396		ctrl = rd32(E1000_CTRL);
6397		ctrl &= ~E1000_CTRL_VME;
6398		wr32(E1000_CTRL, ctrl);
6399	}
6400
6401	igb_rlpml_set(adapter);
6402}
6403
6404static int igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
6405{
6406	struct igb_adapter *adapter = netdev_priv(netdev);
6407	struct e1000_hw *hw = &adapter->hw;
6408	int pf_id = adapter->vfs_allocated_count;
6409
6410	/* attempt to add filter to vlvf array */
6411	igb_vlvf_set(adapter, vid, true, pf_id);
6412
6413	/* add the filter since PF can receive vlans w/o entry in vlvf */
6414	igb_vfta_set(hw, vid, true);
6415
6416	set_bit(vid, adapter->active_vlans);
6417
6418	return 0;
6419}
6420
6421static int igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
6422{
6423	struct igb_adapter *adapter = netdev_priv(netdev);
6424	struct e1000_hw *hw = &adapter->hw;
6425	int pf_id = adapter->vfs_allocated_count;
6426	s32 err;
6427
6428	/* remove vlan from VLVF table array */
6429	err = igb_vlvf_set(adapter, vid, false, pf_id);
6430
6431	/* if vid was not present in VLVF just remove it from table */
6432	if (err)
6433		igb_vfta_set(hw, vid, false);
6434
6435	clear_bit(vid, adapter->active_vlans);
6436
6437	return 0;
6438}
6439
6440static void igb_restore_vlan(struct igb_adapter *adapter)
6441{
6442	u16 vid;
6443
6444	igb_vlan_mode(adapter->netdev, adapter->netdev->features);
6445
6446	for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID)
6447		igb_vlan_rx_add_vid(adapter->netdev, vid);
6448}
6449
6450int igb_set_spd_dplx(struct igb_adapter *adapter, u32 spd, u8 dplx)
6451{
6452	struct pci_dev *pdev = adapter->pdev;
6453	struct e1000_mac_info *mac = &adapter->hw.mac;
6454
6455	mac->autoneg = 0;
6456
6457	/* Make sure dplx is at most 1 bit and lsb of speed is not set
6458	 * for the switch() below to work */
6459	if ((spd & 1) || (dplx & ~1))
6460		goto err_inval;
6461
6462	/* Fiber NIC's only allow 1000 Gbps Full duplex */
6463	if ((adapter->hw.phy.media_type == e1000_media_type_internal_serdes) &&
6464	    spd != SPEED_1000 &&
6465	    dplx != DUPLEX_FULL)
6466		goto err_inval;
6467
6468	switch (spd + dplx) {
6469	case SPEED_10 + DUPLEX_HALF:
6470		mac->forced_speed_duplex = ADVERTISE_10_HALF;
6471		break;
6472	case SPEED_10 + DUPLEX_FULL:
6473		mac->forced_speed_duplex = ADVERTISE_10_FULL;
6474		break;
6475	case SPEED_100 + DUPLEX_HALF:
6476		mac->forced_speed_duplex = ADVERTISE_100_HALF;
6477		break;
6478	case SPEED_100 + DUPLEX_FULL:
6479		mac->forced_speed_duplex = ADVERTISE_100_FULL;
6480		break;
6481	case SPEED_1000 + DUPLEX_FULL:
6482		mac->autoneg = 1;
6483		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
6484		break;
6485	case SPEED_1000 + DUPLEX_HALF: /* not supported */
6486	default:
6487		goto err_inval;
6488	}
6489
6490	/* clear MDI, MDI(-X) override is only allowed when autoneg enabled */
6491	adapter->hw.phy.mdix = AUTO_ALL_MODES;
6492
6493	return 0;
6494
6495err_inval:
6496	dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
6497	return -EINVAL;
6498}
6499
6500static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake,
6501			  bool runtime)
6502{
6503	struct net_device *netdev = pci_get_drvdata(pdev);
6504	struct igb_adapter *adapter = netdev_priv(netdev);
6505	struct e1000_hw *hw = &adapter->hw;
6506	u32 ctrl, rctl, status;
6507	u32 wufc = runtime ? E1000_WUFC_LNKC : adapter->wol;
6508#ifdef CONFIG_PM
6509	int retval = 0;
6510#endif
6511
6512	netif_device_detach(netdev);
6513
6514	if (netif_running(netdev))
6515		__igb_close(netdev, true);
6516
6517	igb_clear_interrupt_scheme(adapter);
6518
6519#ifdef CONFIG_PM
6520	retval = pci_save_state(pdev);
6521	if (retval)
6522		return retval;
6523#endif
6524
6525	status = rd32(E1000_STATUS);
6526	if (status & E1000_STATUS_LU)
6527		wufc &= ~E1000_WUFC_LNKC;
6528
6529	if (wufc) {
6530		igb_setup_rctl(adapter);
6531		igb_set_rx_mode(netdev);
6532
6533		/* turn on all-multi mode if wake on multicast is enabled */
6534		if (wufc & E1000_WUFC_MC) {
6535			rctl = rd32(E1000_RCTL);
6536			rctl |= E1000_RCTL_MPE;
6537			wr32(E1000_RCTL, rctl);
6538		}
6539
6540		ctrl = rd32(E1000_CTRL);
6541		/* advertise wake from D3Cold */
6542		#define E1000_CTRL_ADVD3WUC 0x00100000
6543		/* phy power management enable */
6544		#define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
6545		ctrl |= E1000_CTRL_ADVD3WUC;
6546		wr32(E1000_CTRL, ctrl);
6547
6548		/* Allow time for pending master requests to run */
6549		igb_disable_pcie_master(hw);
6550
6551		wr32(E1000_WUC, E1000_WUC_PME_EN);
6552		wr32(E1000_WUFC, wufc);
6553	} else {
6554		wr32(E1000_WUC, 0);
6555		wr32(E1000_WUFC, 0);
6556	}
6557
6558	*enable_wake = wufc || adapter->en_mng_pt;
6559	if (!*enable_wake)
6560		igb_power_down_link(adapter);
6561	else
6562		igb_power_up_link(adapter);
6563
6564	/* Release control of h/w to f/w.  If f/w is AMT enabled, this
6565	 * would have already happened in close and is redundant. */
6566	igb_release_hw_control(adapter);
6567
6568	pci_disable_device(pdev);
6569
6570	return 0;
6571}
6572
6573#ifdef CONFIG_PM
6574#ifdef CONFIG_PM_SLEEP
6575static int igb_suspend(struct device *dev)
6576{
6577	int retval;
6578	bool wake;
6579	struct pci_dev *pdev = to_pci_dev(dev);
6580
6581	retval = __igb_shutdown(pdev, &wake, 0);
6582	if (retval)
6583		return retval;
6584
6585	if (wake) {
6586		pci_prepare_to_sleep(pdev);
6587	} else {
6588		pci_wake_from_d3(pdev, false);
6589		pci_set_power_state(pdev, PCI_D3hot);
6590	}
6591
6592	return 0;
6593}
6594#endif /* CONFIG_PM_SLEEP */
6595
6596static int igb_resume(struct device *dev)
6597{
6598	struct pci_dev *pdev = to_pci_dev(dev);
6599	struct net_device *netdev = pci_get_drvdata(pdev);
6600	struct igb_adapter *adapter = netdev_priv(netdev);
6601	struct e1000_hw *hw = &adapter->hw;
6602	u32 err;
6603
6604	pci_set_power_state(pdev, PCI_D0);
6605	pci_restore_state(pdev);
6606	pci_save_state(pdev);
6607
6608	err = pci_enable_device_mem(pdev);
6609	if (err) {
6610		dev_err(&pdev->dev,
6611			"igb: Cannot enable PCI device from suspend\n");
6612		return err;
6613	}
6614	pci_set_master(pdev);
6615
6616	pci_enable_wake(pdev, PCI_D3hot, 0);
6617	pci_enable_wake(pdev, PCI_D3cold, 0);
6618
6619	if (igb_init_interrupt_scheme(adapter)) {
6620		dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
6621		return -ENOMEM;
6622	}
6623
6624	igb_reset(adapter);
6625
6626	/* let the f/w know that the h/w is now under the control of the
6627	 * driver. */
6628	igb_get_hw_control(adapter);
6629
6630	wr32(E1000_WUS, ~0);
6631
6632	if (netdev->flags & IFF_UP) {
6633		err = __igb_open(netdev, true);
6634		if (err)
6635			return err;
6636	}
6637
6638	netif_device_attach(netdev);
6639	return 0;
6640}
6641
6642#ifdef CONFIG_PM_RUNTIME
6643static int igb_runtime_idle(struct device *dev)
6644{
6645	struct pci_dev *pdev = to_pci_dev(dev);
6646	struct net_device *netdev = pci_get_drvdata(pdev);
6647	struct igb_adapter *adapter = netdev_priv(netdev);
6648
6649	if (!igb_has_link(adapter))
6650		pm_schedule_suspend(dev, MSEC_PER_SEC * 5);
6651
6652	return -EBUSY;
6653}
6654
6655static int igb_runtime_suspend(struct device *dev)
6656{
6657	struct pci_dev *pdev = to_pci_dev(dev);
6658	int retval;
6659	bool wake;
6660
6661	retval = __igb_shutdown(pdev, &wake, 1);
6662	if (retval)
6663		return retval;
6664
6665	if (wake) {
6666		pci_prepare_to_sleep(pdev);
6667	} else {
6668		pci_wake_from_d3(pdev, false);
6669		pci_set_power_state(pdev, PCI_D3hot);
6670	}
6671
6672	return 0;
6673}
6674
6675static int igb_runtime_resume(struct device *dev)
6676{
6677	return igb_resume(dev);
6678}
6679#endif /* CONFIG_PM_RUNTIME */
6680#endif
6681
6682static void igb_shutdown(struct pci_dev *pdev)
6683{
6684	bool wake;
6685
6686	__igb_shutdown(pdev, &wake, 0);
6687
6688	if (system_state == SYSTEM_POWER_OFF) {
6689		pci_wake_from_d3(pdev, wake);
6690		pci_set_power_state(pdev, PCI_D3hot);
6691	}
6692}
6693
6694#ifdef CONFIG_NET_POLL_CONTROLLER
6695/*
6696 * Polling 'interrupt' - used by things like netconsole to send skbs
6697 * without having to re-enable interrupts. It's not called while
6698 * the interrupt routine is executing.
6699 */
6700static void igb_netpoll(struct net_device *netdev)
6701{
6702	struct igb_adapter *adapter = netdev_priv(netdev);
6703	struct e1000_hw *hw = &adapter->hw;
6704	struct igb_q_vector *q_vector;
6705	int i;
6706
6707	for (i = 0; i < adapter->num_q_vectors; i++) {
6708		q_vector = adapter->q_vector[i];
6709		if (adapter->msix_entries)
6710			wr32(E1000_EIMC, q_vector->eims_value);
6711		else
6712			igb_irq_disable(adapter);
6713		napi_schedule(&q_vector->napi);
6714	}
6715}
6716#endif /* CONFIG_NET_POLL_CONTROLLER */
6717
6718/**
6719 * igb_io_error_detected - called when PCI error is detected
6720 * @pdev: Pointer to PCI device
6721 * @state: The current pci connection state
6722 *
6723 * This function is called after a PCI bus error affecting
6724 * this device has been detected.
6725 */
6726static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
6727					      pci_channel_state_t state)
6728{
6729	struct net_device *netdev = pci_get_drvdata(pdev);
6730	struct igb_adapter *adapter = netdev_priv(netdev);
6731
6732	netif_device_detach(netdev);
6733
6734	if (state == pci_channel_io_perm_failure)
6735		return PCI_ERS_RESULT_DISCONNECT;
6736
6737	if (netif_running(netdev))
6738		igb_down(adapter);
6739	pci_disable_device(pdev);
6740
6741	/* Request a slot slot reset. */
6742	return PCI_ERS_RESULT_NEED_RESET;
6743}
6744
6745/**
6746 * igb_io_slot_reset - called after the pci bus has been reset.
6747 * @pdev: Pointer to PCI device
6748 *
6749 * Restart the card from scratch, as if from a cold-boot. Implementation
6750 * resembles the first-half of the igb_resume routine.
6751 */
6752static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
6753{
6754	struct net_device *netdev = pci_get_drvdata(pdev);
6755	struct igb_adapter *adapter = netdev_priv(netdev);
6756	struct e1000_hw *hw = &adapter->hw;
6757	pci_ers_result_t result;
6758	int err;
6759
6760	if (pci_enable_device_mem(pdev)) {
6761		dev_err(&pdev->dev,
6762			"Cannot re-enable PCI device after reset.\n");
6763		result = PCI_ERS_RESULT_DISCONNECT;
6764	} else {
6765		pci_set_master(pdev);
6766		pci_restore_state(pdev);
6767		pci_save_state(pdev);
6768
6769		pci_enable_wake(pdev, PCI_D3hot, 0);
6770		pci_enable_wake(pdev, PCI_D3cold, 0);
6771
6772		igb_reset(adapter);
6773		wr32(E1000_WUS, ~0);
6774		result = PCI_ERS_RESULT_RECOVERED;
6775	}
6776
6777	err = pci_cleanup_aer_uncorrect_error_status(pdev);
6778	if (err) {
6779		dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
6780		        "failed 0x%0x\n", err);
6781		/* non-fatal, continue */
6782	}
6783
6784	return result;
6785}
6786
6787/**
6788 * igb_io_resume - called when traffic can start flowing again.
6789 * @pdev: Pointer to PCI device
6790 *
6791 * This callback is called when the error recovery driver tells us that
6792 * its OK to resume normal operation. Implementation resembles the
6793 * second-half of the igb_resume routine.
6794 */
6795static void igb_io_resume(struct pci_dev *pdev)
6796{
6797	struct net_device *netdev = pci_get_drvdata(pdev);
6798	struct igb_adapter *adapter = netdev_priv(netdev);
6799
6800	if (netif_running(netdev)) {
6801		if (igb_up(adapter)) {
6802			dev_err(&pdev->dev, "igb_up failed after reset\n");
6803			return;
6804		}
6805	}
6806
6807	netif_device_attach(netdev);
6808
6809	/* let the f/w know that the h/w is now under the control of the
6810	 * driver. */
6811	igb_get_hw_control(adapter);
6812}
6813
6814static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
6815                             u8 qsel)
6816{
6817	u32 rar_low, rar_high;
6818	struct e1000_hw *hw = &adapter->hw;
6819
6820	/* HW expects these in little endian so we reverse the byte order
6821	 * from network order (big endian) to little endian
6822	 */
6823	rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
6824	          ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
6825	rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
6826
6827	/* Indicate to hardware the Address is Valid. */
6828	rar_high |= E1000_RAH_AV;
6829
6830	if (hw->mac.type == e1000_82575)
6831		rar_high |= E1000_RAH_POOL_1 * qsel;
6832	else
6833		rar_high |= E1000_RAH_POOL_1 << qsel;
6834
6835	wr32(E1000_RAL(index), rar_low);
6836	wrfl();
6837	wr32(E1000_RAH(index), rar_high);
6838	wrfl();
6839}
6840
6841static int igb_set_vf_mac(struct igb_adapter *adapter,
6842                          int vf, unsigned char *mac_addr)
6843{
6844	struct e1000_hw *hw = &adapter->hw;
6845	/* VF MAC addresses start at end of receive addresses and moves
6846	 * torwards the first, as a result a collision should not be possible */
6847	int rar_entry = hw->mac.rar_entry_count - (vf + 1);
6848
6849	memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
6850
6851	igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
6852
6853	return 0;
6854}
6855
6856static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
6857{
6858	struct igb_adapter *adapter = netdev_priv(netdev);
6859	if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
6860		return -EINVAL;
6861	adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
6862	dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
6863	dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
6864				      " change effective.");
6865	if (test_bit(__IGB_DOWN, &adapter->state)) {
6866		dev_warn(&adapter->pdev->dev, "The VF MAC address has been set,"
6867			 " but the PF device is not up.\n");
6868		dev_warn(&adapter->pdev->dev, "Bring the PF device up before"
6869			 " attempting to use the VF device.\n");
6870	}
6871	return igb_set_vf_mac(adapter, vf, mac);
6872}
6873
6874static int igb_link_mbps(int internal_link_speed)
6875{
6876	switch (internal_link_speed) {
6877	case SPEED_100:
6878		return 100;
6879	case SPEED_1000:
6880		return 1000;
6881	default:
6882		return 0;
6883	}
6884}
6885
6886static void igb_set_vf_rate_limit(struct e1000_hw *hw, int vf, int tx_rate,
6887				  int link_speed)
6888{
6889	int rf_dec, rf_int;
6890	u32 bcnrc_val;
6891
6892	if (tx_rate != 0) {
6893		/* Calculate the rate factor values to set */
6894		rf_int = link_speed / tx_rate;
6895		rf_dec = (link_speed - (rf_int * tx_rate));
6896		rf_dec = (rf_dec * (1<<E1000_RTTBCNRC_RF_INT_SHIFT)) / tx_rate;
6897
6898		bcnrc_val = E1000_RTTBCNRC_RS_ENA;
6899		bcnrc_val |= ((rf_int<<E1000_RTTBCNRC_RF_INT_SHIFT) &
6900		               E1000_RTTBCNRC_RF_INT_MASK);
6901		bcnrc_val |= (rf_dec & E1000_RTTBCNRC_RF_DEC_MASK);
6902	} else {
6903		bcnrc_val = 0;
6904	}
6905
6906	wr32(E1000_RTTDQSEL, vf); /* vf X uses queue X */
6907	/*
6908	 * Set global transmit compensation time to the MMW_SIZE in RTTBCNRM
6909	 * register. MMW_SIZE=0x014 if 9728-byte jumbo is supported.
6910	 */
6911	wr32(E1000_RTTBCNRM, 0x14);
6912	wr32(E1000_RTTBCNRC, bcnrc_val);
6913}
6914
6915static void igb_check_vf_rate_limit(struct igb_adapter *adapter)
6916{
6917	int actual_link_speed, i;
6918	bool reset_rate = false;
6919
6920	/* VF TX rate limit was not set or not supported */
6921	if ((adapter->vf_rate_link_speed == 0) ||
6922	    (adapter->hw.mac.type != e1000_82576))
6923		return;
6924
6925	actual_link_speed = igb_link_mbps(adapter->link_speed);
6926	if (actual_link_speed != adapter->vf_rate_link_speed) {
6927		reset_rate = true;
6928		adapter->vf_rate_link_speed = 0;
6929		dev_info(&adapter->pdev->dev,
6930		         "Link speed has been changed. VF Transmit "
6931		         "rate is disabled\n");
6932	}
6933
6934	for (i = 0; i < adapter->vfs_allocated_count; i++) {
6935		if (reset_rate)
6936			adapter->vf_data[i].tx_rate = 0;
6937
6938		igb_set_vf_rate_limit(&adapter->hw, i,
6939		                      adapter->vf_data[i].tx_rate,
6940		                      actual_link_speed);
6941	}
6942}
6943
6944static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
6945{
6946	struct igb_adapter *adapter = netdev_priv(netdev);
6947	struct e1000_hw *hw = &adapter->hw;
6948	int actual_link_speed;
6949
6950	if (hw->mac.type != e1000_82576)
6951		return -EOPNOTSUPP;
6952
6953	actual_link_speed = igb_link_mbps(adapter->link_speed);
6954	if ((vf >= adapter->vfs_allocated_count) ||
6955	    (!(rd32(E1000_STATUS) & E1000_STATUS_LU)) ||
6956	    (tx_rate < 0) || (tx_rate > actual_link_speed))
6957		return -EINVAL;
6958
6959	adapter->vf_rate_link_speed = actual_link_speed;
6960	adapter->vf_data[vf].tx_rate = (u16)tx_rate;
6961	igb_set_vf_rate_limit(hw, vf, tx_rate, actual_link_speed);
6962
6963	return 0;
6964}
6965
6966static int igb_ndo_get_vf_config(struct net_device *netdev,
6967				 int vf, struct ifla_vf_info *ivi)
6968{
6969	struct igb_adapter *adapter = netdev_priv(netdev);
6970	if (vf >= adapter->vfs_allocated_count)
6971		return -EINVAL;
6972	ivi->vf = vf;
6973	memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
6974	ivi->tx_rate = adapter->vf_data[vf].tx_rate;
6975	ivi->vlan = adapter->vf_data[vf].pf_vlan;
6976	ivi->qos = adapter->vf_data[vf].pf_qos;
6977	return 0;
6978}
6979
6980static void igb_vmm_control(struct igb_adapter *adapter)
6981{
6982	struct e1000_hw *hw = &adapter->hw;
6983	u32 reg;
6984
6985	switch (hw->mac.type) {
6986	case e1000_82575:
6987	case e1000_i210:
6988	case e1000_i211:
6989	default:
6990		/* replication is not supported for 82575 */
6991		return;
6992	case e1000_82576:
6993		/* notify HW that the MAC is adding vlan tags */
6994		reg = rd32(E1000_DTXCTL);
6995		reg |= E1000_DTXCTL_VLAN_ADDED;
6996		wr32(E1000_DTXCTL, reg);
6997	case e1000_82580:
6998		/* enable replication vlan tag stripping */
6999		reg = rd32(E1000_RPLOLR);
7000		reg |= E1000_RPLOLR_STRVLAN;
7001		wr32(E1000_RPLOLR, reg);
7002	case e1000_i350:
7003		/* none of the above registers are supported by i350 */
7004		break;
7005	}
7006
7007	if (adapter->vfs_allocated_count) {
7008		igb_vmdq_set_loopback_pf(hw, true);
7009		igb_vmdq_set_replication_pf(hw, true);
7010		igb_vmdq_set_anti_spoofing_pf(hw, true,
7011						adapter->vfs_allocated_count);
7012	} else {
7013		igb_vmdq_set_loopback_pf(hw, false);
7014		igb_vmdq_set_replication_pf(hw, false);
7015	}
7016}
7017
7018static void igb_init_dmac(struct igb_adapter *adapter, u32 pba)
7019{
7020	struct e1000_hw *hw = &adapter->hw;
7021	u32 dmac_thr;
7022	u16 hwm;
7023
7024	if (hw->mac.type > e1000_82580) {
7025		if (adapter->flags & IGB_FLAG_DMAC) {
7026			u32 reg;
7027
7028			/* force threshold to 0. */
7029			wr32(E1000_DMCTXTH, 0);
7030
7031			/*
7032			 * DMA Coalescing high water mark needs to be greater
7033			 * than the Rx threshold. Set hwm to PBA - max frame
7034			 * size in 16B units, capping it at PBA - 6KB.
7035			 */
7036			hwm = 64 * pba - adapter->max_frame_size / 16;
7037			if (hwm < 64 * (pba - 6))
7038				hwm = 64 * (pba - 6);
7039			reg = rd32(E1000_FCRTC);
7040			reg &= ~E1000_FCRTC_RTH_COAL_MASK;
7041			reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT)
7042				& E1000_FCRTC_RTH_COAL_MASK);
7043			wr32(E1000_FCRTC, reg);
7044
7045			/*
7046			 * Set the DMA Coalescing Rx threshold to PBA - 2 * max
7047			 * frame size, capping it at PBA - 10KB.
7048			 */
7049			dmac_thr = pba - adapter->max_frame_size / 512;
7050			if (dmac_thr < pba - 10)
7051				dmac_thr = pba - 10;
7052			reg = rd32(E1000_DMACR);
7053			reg &= ~E1000_DMACR_DMACTHR_MASK;
7054			reg |= ((dmac_thr << E1000_DMACR_DMACTHR_SHIFT)
7055				& E1000_DMACR_DMACTHR_MASK);
7056
7057			/* transition to L0x or L1 if available..*/
7058			reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
7059
7060			/* watchdog timer= +-1000 usec in 32usec intervals */
7061			reg |= (1000 >> 5);
7062
7063			/* Disable BMC-to-OS Watchdog Enable */
7064			reg &= ~E1000_DMACR_DC_BMC2OSW_EN;
7065			wr32(E1000_DMACR, reg);
7066
7067			/*
7068			 * no lower threshold to disable
7069			 * coalescing(smart fifb)-UTRESH=0
7070			 */
7071			wr32(E1000_DMCRTRH, 0);
7072
7073			reg = (IGB_DMCTLX_DCFLUSH_DIS | 0x4);
7074
7075			wr32(E1000_DMCTLX, reg);
7076
7077			/*
7078			 * free space in tx packet buffer to wake from
7079			 * DMA coal
7080			 */
7081			wr32(E1000_DMCTXTH, (IGB_MIN_TXPBSIZE -
7082			     (IGB_TX_BUF_4096 + adapter->max_frame_size)) >> 6);
7083
7084			/*
7085			 * make low power state decision controlled
7086			 * by DMA coal
7087			 */
7088			reg = rd32(E1000_PCIEMISC);
7089			reg &= ~E1000_PCIEMISC_LX_DECISION;
7090			wr32(E1000_PCIEMISC, reg);
7091		} /* endif adapter->dmac is not disabled */
7092	} else if (hw->mac.type == e1000_82580) {
7093		u32 reg = rd32(E1000_PCIEMISC);
7094		wr32(E1000_PCIEMISC, reg & ~E1000_PCIEMISC_LX_DECISION);
7095		wr32(E1000_DMACR, 0);
7096	}
7097}
7098
7099/* igb_main.c */
7100