igb_main.c revision 76886596921dd0e058f7f0a16de6151629390d15
1/*******************************************************************************
2
3  Intel(R) Gigabit Ethernet Linux driver
4  Copyright(c) 2007-2012 Intel Corporation.
5
6  This program is free software; you can redistribute it and/or modify it
7  under the terms and conditions of the GNU General Public License,
8  version 2, as published by the Free Software Foundation.
9
10  This program is distributed in the hope it will be useful, but WITHOUT
11  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13  more details.
14
15  You should have received a copy of the GNU General Public License along with
16  this program; if not, write to the Free Software Foundation, Inc.,
17  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18
19  The full GNU General Public License is included in this distribution in
20  the file called "COPYING".
21
22  Contact Information:
23  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
25
26*******************************************************************************/
27
28#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
29
30#include <linux/module.h>
31#include <linux/types.h>
32#include <linux/init.h>
33#include <linux/bitops.h>
34#include <linux/vmalloc.h>
35#include <linux/pagemap.h>
36#include <linux/netdevice.h>
37#include <linux/ipv6.h>
38#include <linux/slab.h>
39#include <net/checksum.h>
40#include <net/ip6_checksum.h>
41#include <linux/net_tstamp.h>
42#include <linux/mii.h>
43#include <linux/ethtool.h>
44#include <linux/if.h>
45#include <linux/if_vlan.h>
46#include <linux/pci.h>
47#include <linux/pci-aspm.h>
48#include <linux/delay.h>
49#include <linux/interrupt.h>
50#include <linux/ip.h>
51#include <linux/tcp.h>
52#include <linux/sctp.h>
53#include <linux/if_ether.h>
54#include <linux/aer.h>
55#include <linux/prefetch.h>
56#include <linux/pm_runtime.h>
57#ifdef CONFIG_IGB_DCA
58#include <linux/dca.h>
59#endif
60#include "igb.h"
61
62#define MAJ 4
63#define MIN 0
64#define BUILD 1
65#define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \
66__stringify(BUILD) "-k"
67char igb_driver_name[] = "igb";
68char igb_driver_version[] = DRV_VERSION;
69static const char igb_driver_string[] =
70				"Intel(R) Gigabit Ethernet Network Driver";
71static const char igb_copyright[] = "Copyright (c) 2007-2012 Intel Corporation.";
72
73static const struct e1000_info *igb_info_tbl[] = {
74	[board_82575] = &e1000_82575_info,
75};
76
77static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
78	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I211_COPPER), board_82575 },
79	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_COPPER), board_82575 },
80	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_FIBER), board_82575 },
81	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_SERDES), board_82575 },
82	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_SGMII), board_82575 },
83	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER), board_82575 },
84	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER), board_82575 },
85	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES), board_82575 },
86	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII), board_82575 },
87	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
88	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
89	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_QUAD_FIBER), board_82575 },
90	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
91	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
92	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
93	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SGMII), board_82575 },
94	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SERDES), board_82575 },
95	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_BACKPLANE), board_82575 },
96	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SFP), board_82575 },
97	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
98	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
99	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
100	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
101	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
102	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
103	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2), board_82575 },
104	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
105	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
106	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
107	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
108	/* required last entry */
109	{0, }
110};
111
112MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
113
114void igb_reset(struct igb_adapter *);
115static int igb_setup_all_tx_resources(struct igb_adapter *);
116static int igb_setup_all_rx_resources(struct igb_adapter *);
117static void igb_free_all_tx_resources(struct igb_adapter *);
118static void igb_free_all_rx_resources(struct igb_adapter *);
119static void igb_setup_mrqc(struct igb_adapter *);
120static int igb_probe(struct pci_dev *, const struct pci_device_id *);
121static void __devexit igb_remove(struct pci_dev *pdev);
122static int igb_sw_init(struct igb_adapter *);
123static int igb_open(struct net_device *);
124static int igb_close(struct net_device *);
125static void igb_configure_tx(struct igb_adapter *);
126static void igb_configure_rx(struct igb_adapter *);
127static void igb_clean_all_tx_rings(struct igb_adapter *);
128static void igb_clean_all_rx_rings(struct igb_adapter *);
129static void igb_clean_tx_ring(struct igb_ring *);
130static void igb_clean_rx_ring(struct igb_ring *);
131static void igb_set_rx_mode(struct net_device *);
132static void igb_update_phy_info(unsigned long);
133static void igb_watchdog(unsigned long);
134static void igb_watchdog_task(struct work_struct *);
135static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, struct net_device *);
136static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *dev,
137						 struct rtnl_link_stats64 *stats);
138static int igb_change_mtu(struct net_device *, int);
139static int igb_set_mac(struct net_device *, void *);
140static void igb_set_uta(struct igb_adapter *adapter);
141static irqreturn_t igb_intr(int irq, void *);
142static irqreturn_t igb_intr_msi(int irq, void *);
143static irqreturn_t igb_msix_other(int irq, void *);
144static irqreturn_t igb_msix_ring(int irq, void *);
145#ifdef CONFIG_IGB_DCA
146static void igb_update_dca(struct igb_q_vector *);
147static void igb_setup_dca(struct igb_adapter *);
148#endif /* CONFIG_IGB_DCA */
149static int igb_poll(struct napi_struct *, int);
150static bool igb_clean_tx_irq(struct igb_q_vector *);
151static bool igb_clean_rx_irq(struct igb_q_vector *, int);
152static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
153static void igb_tx_timeout(struct net_device *);
154static void igb_reset_task(struct work_struct *);
155static void igb_vlan_mode(struct net_device *netdev, netdev_features_t features);
156static int igb_vlan_rx_add_vid(struct net_device *, u16);
157static int igb_vlan_rx_kill_vid(struct net_device *, u16);
158static void igb_restore_vlan(struct igb_adapter *);
159static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
160static void igb_ping_all_vfs(struct igb_adapter *);
161static void igb_msg_task(struct igb_adapter *);
162static void igb_vmm_control(struct igb_adapter *);
163static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
164static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
165static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
166static int igb_ndo_set_vf_vlan(struct net_device *netdev,
167			       int vf, u16 vlan, u8 qos);
168static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
169static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
170				 struct ifla_vf_info *ivi);
171static void igb_check_vf_rate_limit(struct igb_adapter *);
172
173#ifdef CONFIG_PCI_IOV
174static int igb_vf_configure(struct igb_adapter *adapter, int vf);
175static int igb_find_enabled_vfs(struct igb_adapter *adapter);
176static int igb_check_vf_assignment(struct igb_adapter *adapter);
177#endif
178
179#ifdef CONFIG_PM
180#ifdef CONFIG_PM_SLEEP
181static int igb_suspend(struct device *);
182#endif
183static int igb_resume(struct device *);
184#ifdef CONFIG_PM_RUNTIME
185static int igb_runtime_suspend(struct device *dev);
186static int igb_runtime_resume(struct device *dev);
187static int igb_runtime_idle(struct device *dev);
188#endif
189static const struct dev_pm_ops igb_pm_ops = {
190	SET_SYSTEM_SLEEP_PM_OPS(igb_suspend, igb_resume)
191	SET_RUNTIME_PM_OPS(igb_runtime_suspend, igb_runtime_resume,
192			igb_runtime_idle)
193};
194#endif
195static void igb_shutdown(struct pci_dev *);
196#ifdef CONFIG_IGB_DCA
197static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
198static struct notifier_block dca_notifier = {
199	.notifier_call	= igb_notify_dca,
200	.next		= NULL,
201	.priority	= 0
202};
203#endif
204#ifdef CONFIG_NET_POLL_CONTROLLER
205/* for netdump / net console */
206static void igb_netpoll(struct net_device *);
207#endif
208#ifdef CONFIG_PCI_IOV
209static unsigned int max_vfs = 0;
210module_param(max_vfs, uint, 0);
211MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
212                 "per physical function");
213#endif /* CONFIG_PCI_IOV */
214
215static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
216		     pci_channel_state_t);
217static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
218static void igb_io_resume(struct pci_dev *);
219
220static struct pci_error_handlers igb_err_handler = {
221	.error_detected = igb_io_error_detected,
222	.slot_reset = igb_io_slot_reset,
223	.resume = igb_io_resume,
224};
225
226static void igb_init_dmac(struct igb_adapter *adapter, u32 pba);
227
228static struct pci_driver igb_driver = {
229	.name     = igb_driver_name,
230	.id_table = igb_pci_tbl,
231	.probe    = igb_probe,
232	.remove   = __devexit_p(igb_remove),
233#ifdef CONFIG_PM
234	.driver.pm = &igb_pm_ops,
235#endif
236	.shutdown = igb_shutdown,
237	.err_handler = &igb_err_handler
238};
239
240MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
241MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
242MODULE_LICENSE("GPL");
243MODULE_VERSION(DRV_VERSION);
244
245#define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV|NETIF_MSG_PROBE|NETIF_MSG_LINK)
246static int debug = -1;
247module_param(debug, int, 0);
248MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
249
250struct igb_reg_info {
251	u32 ofs;
252	char *name;
253};
254
255static const struct igb_reg_info igb_reg_info_tbl[] = {
256
257	/* General Registers */
258	{E1000_CTRL, "CTRL"},
259	{E1000_STATUS, "STATUS"},
260	{E1000_CTRL_EXT, "CTRL_EXT"},
261
262	/* Interrupt Registers */
263	{E1000_ICR, "ICR"},
264
265	/* RX Registers */
266	{E1000_RCTL, "RCTL"},
267	{E1000_RDLEN(0), "RDLEN"},
268	{E1000_RDH(0), "RDH"},
269	{E1000_RDT(0), "RDT"},
270	{E1000_RXDCTL(0), "RXDCTL"},
271	{E1000_RDBAL(0), "RDBAL"},
272	{E1000_RDBAH(0), "RDBAH"},
273
274	/* TX Registers */
275	{E1000_TCTL, "TCTL"},
276	{E1000_TDBAL(0), "TDBAL"},
277	{E1000_TDBAH(0), "TDBAH"},
278	{E1000_TDLEN(0), "TDLEN"},
279	{E1000_TDH(0), "TDH"},
280	{E1000_TDT(0), "TDT"},
281	{E1000_TXDCTL(0), "TXDCTL"},
282	{E1000_TDFH, "TDFH"},
283	{E1000_TDFT, "TDFT"},
284	{E1000_TDFHS, "TDFHS"},
285	{E1000_TDFPC, "TDFPC"},
286
287	/* List Terminator */
288	{}
289};
290
291/*
292 * igb_regdump - register printout routine
293 */
294static void igb_regdump(struct e1000_hw *hw, struct igb_reg_info *reginfo)
295{
296	int n = 0;
297	char rname[16];
298	u32 regs[8];
299
300	switch (reginfo->ofs) {
301	case E1000_RDLEN(0):
302		for (n = 0; n < 4; n++)
303			regs[n] = rd32(E1000_RDLEN(n));
304		break;
305	case E1000_RDH(0):
306		for (n = 0; n < 4; n++)
307			regs[n] = rd32(E1000_RDH(n));
308		break;
309	case E1000_RDT(0):
310		for (n = 0; n < 4; n++)
311			regs[n] = rd32(E1000_RDT(n));
312		break;
313	case E1000_RXDCTL(0):
314		for (n = 0; n < 4; n++)
315			regs[n] = rd32(E1000_RXDCTL(n));
316		break;
317	case E1000_RDBAL(0):
318		for (n = 0; n < 4; n++)
319			regs[n] = rd32(E1000_RDBAL(n));
320		break;
321	case E1000_RDBAH(0):
322		for (n = 0; n < 4; n++)
323			regs[n] = rd32(E1000_RDBAH(n));
324		break;
325	case E1000_TDBAL(0):
326		for (n = 0; n < 4; n++)
327			regs[n] = rd32(E1000_RDBAL(n));
328		break;
329	case E1000_TDBAH(0):
330		for (n = 0; n < 4; n++)
331			regs[n] = rd32(E1000_TDBAH(n));
332		break;
333	case E1000_TDLEN(0):
334		for (n = 0; n < 4; n++)
335			regs[n] = rd32(E1000_TDLEN(n));
336		break;
337	case E1000_TDH(0):
338		for (n = 0; n < 4; n++)
339			regs[n] = rd32(E1000_TDH(n));
340		break;
341	case E1000_TDT(0):
342		for (n = 0; n < 4; n++)
343			regs[n] = rd32(E1000_TDT(n));
344		break;
345	case E1000_TXDCTL(0):
346		for (n = 0; n < 4; n++)
347			regs[n] = rd32(E1000_TXDCTL(n));
348		break;
349	default:
350		pr_info("%-15s %08x\n", reginfo->name, rd32(reginfo->ofs));
351		return;
352	}
353
354	snprintf(rname, 16, "%s%s", reginfo->name, "[0-3]");
355	pr_info("%-15s %08x %08x %08x %08x\n", rname, regs[0], regs[1],
356		regs[2], regs[3]);
357}
358
359/*
360 * igb_dump - Print registers, tx-rings and rx-rings
361 */
362static void igb_dump(struct igb_adapter *adapter)
363{
364	struct net_device *netdev = adapter->netdev;
365	struct e1000_hw *hw = &adapter->hw;
366	struct igb_reg_info *reginfo;
367	struct igb_ring *tx_ring;
368	union e1000_adv_tx_desc *tx_desc;
369	struct my_u0 { u64 a; u64 b; } *u0;
370	struct igb_ring *rx_ring;
371	union e1000_adv_rx_desc *rx_desc;
372	u32 staterr;
373	u16 i, n;
374
375	if (!netif_msg_hw(adapter))
376		return;
377
378	/* Print netdevice Info */
379	if (netdev) {
380		dev_info(&adapter->pdev->dev, "Net device Info\n");
381		pr_info("Device Name     state            trans_start      "
382			"last_rx\n");
383		pr_info("%-15s %016lX %016lX %016lX\n", netdev->name,
384			netdev->state, netdev->trans_start, netdev->last_rx);
385	}
386
387	/* Print Registers */
388	dev_info(&adapter->pdev->dev, "Register Dump\n");
389	pr_info(" Register Name   Value\n");
390	for (reginfo = (struct igb_reg_info *)igb_reg_info_tbl;
391	     reginfo->name; reginfo++) {
392		igb_regdump(hw, reginfo);
393	}
394
395	/* Print TX Ring Summary */
396	if (!netdev || !netif_running(netdev))
397		goto exit;
398
399	dev_info(&adapter->pdev->dev, "TX Rings Summary\n");
400	pr_info("Queue [NTU] [NTC] [bi(ntc)->dma  ] leng ntw timestamp\n");
401	for (n = 0; n < adapter->num_tx_queues; n++) {
402		struct igb_tx_buffer *buffer_info;
403		tx_ring = adapter->tx_ring[n];
404		buffer_info = &tx_ring->tx_buffer_info[tx_ring->next_to_clean];
405		pr_info(" %5d %5X %5X %016llX %04X %p %016llX\n",
406			n, tx_ring->next_to_use, tx_ring->next_to_clean,
407			(u64)buffer_info->dma,
408			buffer_info->length,
409			buffer_info->next_to_watch,
410			(u64)buffer_info->time_stamp);
411	}
412
413	/* Print TX Rings */
414	if (!netif_msg_tx_done(adapter))
415		goto rx_ring_summary;
416
417	dev_info(&adapter->pdev->dev, "TX Rings Dump\n");
418
419	/* Transmit Descriptor Formats
420	 *
421	 * Advanced Transmit Descriptor
422	 *   +--------------------------------------------------------------+
423	 * 0 |         Buffer Address [63:0]                                |
424	 *   +--------------------------------------------------------------+
425	 * 8 | PAYLEN  | PORTS  |CC|IDX | STA | DCMD  |DTYP|MAC|RSV| DTALEN |
426	 *   +--------------------------------------------------------------+
427	 *   63      46 45    40 39 38 36 35 32 31   24             15       0
428	 */
429
430	for (n = 0; n < adapter->num_tx_queues; n++) {
431		tx_ring = adapter->tx_ring[n];
432		pr_info("------------------------------------\n");
433		pr_info("TX QUEUE INDEX = %d\n", tx_ring->queue_index);
434		pr_info("------------------------------------\n");
435		pr_info("T [desc]     [address 63:0  ] [PlPOCIStDDM Ln] "
436			"[bi->dma       ] leng  ntw timestamp        "
437			"bi->skb\n");
438
439		for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) {
440			const char *next_desc;
441			struct igb_tx_buffer *buffer_info;
442			tx_desc = IGB_TX_DESC(tx_ring, i);
443			buffer_info = &tx_ring->tx_buffer_info[i];
444			u0 = (struct my_u0 *)tx_desc;
445			if (i == tx_ring->next_to_use &&
446			    i == tx_ring->next_to_clean)
447				next_desc = " NTC/U";
448			else if (i == tx_ring->next_to_use)
449				next_desc = " NTU";
450			else if (i == tx_ring->next_to_clean)
451				next_desc = " NTC";
452			else
453				next_desc = "";
454
455			pr_info("T [0x%03X]    %016llX %016llX %016llX"
456				" %04X  %p %016llX %p%s\n", i,
457				le64_to_cpu(u0->a),
458				le64_to_cpu(u0->b),
459				(u64)buffer_info->dma,
460				buffer_info->length,
461				buffer_info->next_to_watch,
462				(u64)buffer_info->time_stamp,
463				buffer_info->skb, next_desc);
464
465			if (netif_msg_pktdata(adapter) && buffer_info->dma != 0)
466				print_hex_dump(KERN_INFO, "",
467					DUMP_PREFIX_ADDRESS,
468					16, 1, phys_to_virt(buffer_info->dma),
469					buffer_info->length, true);
470		}
471	}
472
473	/* Print RX Rings Summary */
474rx_ring_summary:
475	dev_info(&adapter->pdev->dev, "RX Rings Summary\n");
476	pr_info("Queue [NTU] [NTC]\n");
477	for (n = 0; n < adapter->num_rx_queues; n++) {
478		rx_ring = adapter->rx_ring[n];
479		pr_info(" %5d %5X %5X\n",
480			n, rx_ring->next_to_use, rx_ring->next_to_clean);
481	}
482
483	/* Print RX Rings */
484	if (!netif_msg_rx_status(adapter))
485		goto exit;
486
487	dev_info(&adapter->pdev->dev, "RX Rings Dump\n");
488
489	/* Advanced Receive Descriptor (Read) Format
490	 *    63                                           1        0
491	 *    +-----------------------------------------------------+
492	 *  0 |       Packet Buffer Address [63:1]           |A0/NSE|
493	 *    +----------------------------------------------+------+
494	 *  8 |       Header Buffer Address [63:1]           |  DD  |
495	 *    +-----------------------------------------------------+
496	 *
497	 *
498	 * Advanced Receive Descriptor (Write-Back) Format
499	 *
500	 *   63       48 47    32 31  30      21 20 17 16   4 3     0
501	 *   +------------------------------------------------------+
502	 * 0 | Packet     IP     |SPH| HDR_LEN   | RSV|Packet|  RSS |
503	 *   | Checksum   Ident  |   |           |    | Type | Type |
504	 *   +------------------------------------------------------+
505	 * 8 | VLAN Tag | Length | Extended Error | Extended Status |
506	 *   +------------------------------------------------------+
507	 *   63       48 47    32 31            20 19               0
508	 */
509
510	for (n = 0; n < adapter->num_rx_queues; n++) {
511		rx_ring = adapter->rx_ring[n];
512		pr_info("------------------------------------\n");
513		pr_info("RX QUEUE INDEX = %d\n", rx_ring->queue_index);
514		pr_info("------------------------------------\n");
515		pr_info("R  [desc]      [ PktBuf     A0] [  HeadBuf   DD] "
516			"[bi->dma       ] [bi->skb] <-- Adv Rx Read format\n");
517		pr_info("RWB[desc]      [PcsmIpSHl PtRs] [vl er S cks ln] -----"
518			"----------- [bi->skb] <-- Adv Rx Write-Back format\n");
519
520		for (i = 0; i < rx_ring->count; i++) {
521			const char *next_desc;
522			struct igb_rx_buffer *buffer_info;
523			buffer_info = &rx_ring->rx_buffer_info[i];
524			rx_desc = IGB_RX_DESC(rx_ring, i);
525			u0 = (struct my_u0 *)rx_desc;
526			staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
527
528			if (i == rx_ring->next_to_use)
529				next_desc = " NTU";
530			else if (i == rx_ring->next_to_clean)
531				next_desc = " NTC";
532			else
533				next_desc = "";
534
535			if (staterr & E1000_RXD_STAT_DD) {
536				/* Descriptor Done */
537				pr_info("%s[0x%03X]     %016llX %016llX -------"
538					"--------- %p%s\n", "RWB", i,
539					le64_to_cpu(u0->a),
540					le64_to_cpu(u0->b),
541					buffer_info->skb, next_desc);
542			} else {
543				pr_info("%s[0x%03X]     %016llX %016llX %016llX"
544					" %p%s\n", "R  ", i,
545					le64_to_cpu(u0->a),
546					le64_to_cpu(u0->b),
547					(u64)buffer_info->dma,
548					buffer_info->skb, next_desc);
549
550				if (netif_msg_pktdata(adapter)) {
551					print_hex_dump(KERN_INFO, "",
552						DUMP_PREFIX_ADDRESS,
553						16, 1,
554						phys_to_virt(buffer_info->dma),
555						IGB_RX_HDR_LEN, true);
556					print_hex_dump(KERN_INFO, "",
557					  DUMP_PREFIX_ADDRESS,
558					  16, 1,
559					  phys_to_virt(
560					    buffer_info->page_dma +
561					    buffer_info->page_offset),
562					  PAGE_SIZE/2, true);
563				}
564			}
565		}
566	}
567
568exit:
569	return;
570}
571
572/**
573 * igb_get_hw_dev - return device
574 * used by hardware layer to print debugging information
575 **/
576struct net_device *igb_get_hw_dev(struct e1000_hw *hw)
577{
578	struct igb_adapter *adapter = hw->back;
579	return adapter->netdev;
580}
581
582/**
583 * igb_init_module - Driver Registration Routine
584 *
585 * igb_init_module is the first routine called when the driver is
586 * loaded. All it does is register with the PCI subsystem.
587 **/
588static int __init igb_init_module(void)
589{
590	int ret;
591	pr_info("%s - version %s\n",
592	       igb_driver_string, igb_driver_version);
593
594	pr_info("%s\n", igb_copyright);
595
596#ifdef CONFIG_IGB_DCA
597	dca_register_notify(&dca_notifier);
598#endif
599	ret = pci_register_driver(&igb_driver);
600	return ret;
601}
602
603module_init(igb_init_module);
604
605/**
606 * igb_exit_module - Driver Exit Cleanup Routine
607 *
608 * igb_exit_module is called just before the driver is removed
609 * from memory.
610 **/
611static void __exit igb_exit_module(void)
612{
613#ifdef CONFIG_IGB_DCA
614	dca_unregister_notify(&dca_notifier);
615#endif
616	pci_unregister_driver(&igb_driver);
617}
618
619module_exit(igb_exit_module);
620
621#define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
622/**
623 * igb_cache_ring_register - Descriptor ring to register mapping
624 * @adapter: board private structure to initialize
625 *
626 * Once we know the feature-set enabled for the device, we'll cache
627 * the register offset the descriptor ring is assigned to.
628 **/
629static void igb_cache_ring_register(struct igb_adapter *adapter)
630{
631	int i = 0, j = 0;
632	u32 rbase_offset = adapter->vfs_allocated_count;
633
634	switch (adapter->hw.mac.type) {
635	case e1000_82576:
636		/* The queues are allocated for virtualization such that VF 0
637		 * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
638		 * In order to avoid collision we start at the first free queue
639		 * and continue consuming queues in the same sequence
640		 */
641		if (adapter->vfs_allocated_count) {
642			for (; i < adapter->rss_queues; i++)
643				adapter->rx_ring[i]->reg_idx = rbase_offset +
644				                               Q_IDX_82576(i);
645		}
646	case e1000_82575:
647	case e1000_82580:
648	case e1000_i350:
649	case e1000_i210:
650	case e1000_i211:
651	default:
652		for (; i < adapter->num_rx_queues; i++)
653			adapter->rx_ring[i]->reg_idx = rbase_offset + i;
654		for (; j < adapter->num_tx_queues; j++)
655			adapter->tx_ring[j]->reg_idx = rbase_offset + j;
656		break;
657	}
658}
659
660static void igb_free_queues(struct igb_adapter *adapter)
661{
662	int i;
663
664	for (i = 0; i < adapter->num_tx_queues; i++) {
665		kfree(adapter->tx_ring[i]);
666		adapter->tx_ring[i] = NULL;
667	}
668	for (i = 0; i < adapter->num_rx_queues; i++) {
669		kfree(adapter->rx_ring[i]);
670		adapter->rx_ring[i] = NULL;
671	}
672	adapter->num_rx_queues = 0;
673	adapter->num_tx_queues = 0;
674}
675
676/**
677 * igb_alloc_queues - Allocate memory for all rings
678 * @adapter: board private structure to initialize
679 *
680 * We allocate one ring per queue at run-time since we don't know the
681 * number of queues at compile-time.
682 **/
683static int igb_alloc_queues(struct igb_adapter *adapter)
684{
685	struct igb_ring *ring;
686	int i;
687	int orig_node = adapter->node;
688
689	for (i = 0; i < adapter->num_tx_queues; i++) {
690		if (orig_node == -1) {
691			int cur_node = next_online_node(adapter->node);
692			if (cur_node == MAX_NUMNODES)
693				cur_node = first_online_node;
694			adapter->node = cur_node;
695		}
696		ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL,
697				    adapter->node);
698		if (!ring)
699			ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
700		if (!ring)
701			goto err;
702		ring->count = adapter->tx_ring_count;
703		ring->queue_index = i;
704		ring->dev = &adapter->pdev->dev;
705		ring->netdev = adapter->netdev;
706		ring->numa_node = adapter->node;
707		/* For 82575, context index must be unique per ring. */
708		if (adapter->hw.mac.type == e1000_82575)
709			set_bit(IGB_RING_FLAG_TX_CTX_IDX, &ring->flags);
710		adapter->tx_ring[i] = ring;
711	}
712	/* Restore the adapter's original node */
713	adapter->node = orig_node;
714
715	for (i = 0; i < adapter->num_rx_queues; i++) {
716		if (orig_node == -1) {
717			int cur_node = next_online_node(adapter->node);
718			if (cur_node == MAX_NUMNODES)
719				cur_node = first_online_node;
720			adapter->node = cur_node;
721		}
722		ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL,
723				    adapter->node);
724		if (!ring)
725			ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
726		if (!ring)
727			goto err;
728		ring->count = adapter->rx_ring_count;
729		ring->queue_index = i;
730		ring->dev = &adapter->pdev->dev;
731		ring->netdev = adapter->netdev;
732		ring->numa_node = adapter->node;
733		/* set flag indicating ring supports SCTP checksum offload */
734		if (adapter->hw.mac.type >= e1000_82576)
735			set_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags);
736
737		/*
738		 * On i350, i210, and i211, loopback VLAN packets
739		 * have the tag byte-swapped.
740		 * */
741		if (adapter->hw.mac.type >= e1000_i350)
742			set_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &ring->flags);
743
744		adapter->rx_ring[i] = ring;
745	}
746	/* Restore the adapter's original node */
747	adapter->node = orig_node;
748
749	igb_cache_ring_register(adapter);
750
751	return 0;
752
753err:
754	/* Restore the adapter's original node */
755	adapter->node = orig_node;
756	igb_free_queues(adapter);
757
758	return -ENOMEM;
759}
760
761/**
762 *  igb_write_ivar - configure ivar for given MSI-X vector
763 *  @hw: pointer to the HW structure
764 *  @msix_vector: vector number we are allocating to a given ring
765 *  @index: row index of IVAR register to write within IVAR table
766 *  @offset: column offset of in IVAR, should be multiple of 8
767 *
768 *  This function is intended to handle the writing of the IVAR register
769 *  for adapters 82576 and newer.  The IVAR table consists of 2 columns,
770 *  each containing an cause allocation for an Rx and Tx ring, and a
771 *  variable number of rows depending on the number of queues supported.
772 **/
773static void igb_write_ivar(struct e1000_hw *hw, int msix_vector,
774			   int index, int offset)
775{
776	u32 ivar = array_rd32(E1000_IVAR0, index);
777
778	/* clear any bits that are currently set */
779	ivar &= ~((u32)0xFF << offset);
780
781	/* write vector and valid bit */
782	ivar |= (msix_vector | E1000_IVAR_VALID) << offset;
783
784	array_wr32(E1000_IVAR0, index, ivar);
785}
786
787#define IGB_N0_QUEUE -1
788static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
789{
790	struct igb_adapter *adapter = q_vector->adapter;
791	struct e1000_hw *hw = &adapter->hw;
792	int rx_queue = IGB_N0_QUEUE;
793	int tx_queue = IGB_N0_QUEUE;
794	u32 msixbm = 0;
795
796	if (q_vector->rx.ring)
797		rx_queue = q_vector->rx.ring->reg_idx;
798	if (q_vector->tx.ring)
799		tx_queue = q_vector->tx.ring->reg_idx;
800
801	switch (hw->mac.type) {
802	case e1000_82575:
803		/* The 82575 assigns vectors using a bitmask, which matches the
804		   bitmask for the EICR/EIMS/EIMC registers.  To assign one
805		   or more queues to a vector, we write the appropriate bits
806		   into the MSIXBM register for that vector. */
807		if (rx_queue > IGB_N0_QUEUE)
808			msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
809		if (tx_queue > IGB_N0_QUEUE)
810			msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
811		if (!adapter->msix_entries && msix_vector == 0)
812			msixbm |= E1000_EIMS_OTHER;
813		array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
814		q_vector->eims_value = msixbm;
815		break;
816	case e1000_82576:
817		/*
818		 * 82576 uses a table that essentially consists of 2 columns
819		 * with 8 rows.  The ordering is column-major so we use the
820		 * lower 3 bits as the row index, and the 4th bit as the
821		 * column offset.
822		 */
823		if (rx_queue > IGB_N0_QUEUE)
824			igb_write_ivar(hw, msix_vector,
825				       rx_queue & 0x7,
826				       (rx_queue & 0x8) << 1);
827		if (tx_queue > IGB_N0_QUEUE)
828			igb_write_ivar(hw, msix_vector,
829				       tx_queue & 0x7,
830				       ((tx_queue & 0x8) << 1) + 8);
831		q_vector->eims_value = 1 << msix_vector;
832		break;
833	case e1000_82580:
834	case e1000_i350:
835	case e1000_i210:
836	case e1000_i211:
837		/*
838		 * On 82580 and newer adapters the scheme is similar to 82576
839		 * however instead of ordering column-major we have things
840		 * ordered row-major.  So we traverse the table by using
841		 * bit 0 as the column offset, and the remaining bits as the
842		 * row index.
843		 */
844		if (rx_queue > IGB_N0_QUEUE)
845			igb_write_ivar(hw, msix_vector,
846				       rx_queue >> 1,
847				       (rx_queue & 0x1) << 4);
848		if (tx_queue > IGB_N0_QUEUE)
849			igb_write_ivar(hw, msix_vector,
850				       tx_queue >> 1,
851				       ((tx_queue & 0x1) << 4) + 8);
852		q_vector->eims_value = 1 << msix_vector;
853		break;
854	default:
855		BUG();
856		break;
857	}
858
859	/* add q_vector eims value to global eims_enable_mask */
860	adapter->eims_enable_mask |= q_vector->eims_value;
861
862	/* configure q_vector to set itr on first interrupt */
863	q_vector->set_itr = 1;
864}
865
866/**
867 * igb_configure_msix - Configure MSI-X hardware
868 *
869 * igb_configure_msix sets up the hardware to properly
870 * generate MSI-X interrupts.
871 **/
872static void igb_configure_msix(struct igb_adapter *adapter)
873{
874	u32 tmp;
875	int i, vector = 0;
876	struct e1000_hw *hw = &adapter->hw;
877
878	adapter->eims_enable_mask = 0;
879
880	/* set vector for other causes, i.e. link changes */
881	switch (hw->mac.type) {
882	case e1000_82575:
883		tmp = rd32(E1000_CTRL_EXT);
884		/* enable MSI-X PBA support*/
885		tmp |= E1000_CTRL_EXT_PBA_CLR;
886
887		/* Auto-Mask interrupts upon ICR read. */
888		tmp |= E1000_CTRL_EXT_EIAME;
889		tmp |= E1000_CTRL_EXT_IRCA;
890
891		wr32(E1000_CTRL_EXT, tmp);
892
893		/* enable msix_other interrupt */
894		array_wr32(E1000_MSIXBM(0), vector++,
895		                      E1000_EIMS_OTHER);
896		adapter->eims_other = E1000_EIMS_OTHER;
897
898		break;
899
900	case e1000_82576:
901	case e1000_82580:
902	case e1000_i350:
903	case e1000_i210:
904	case e1000_i211:
905		/* Turn on MSI-X capability first, or our settings
906		 * won't stick.  And it will take days to debug. */
907		wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
908		                E1000_GPIE_PBA | E1000_GPIE_EIAME |
909		                E1000_GPIE_NSICR);
910
911		/* enable msix_other interrupt */
912		adapter->eims_other = 1 << vector;
913		tmp = (vector++ | E1000_IVAR_VALID) << 8;
914
915		wr32(E1000_IVAR_MISC, tmp);
916		break;
917	default:
918		/* do nothing, since nothing else supports MSI-X */
919		break;
920	} /* switch (hw->mac.type) */
921
922	adapter->eims_enable_mask |= adapter->eims_other;
923
924	for (i = 0; i < adapter->num_q_vectors; i++)
925		igb_assign_vector(adapter->q_vector[i], vector++);
926
927	wrfl();
928}
929
930/**
931 * igb_request_msix - Initialize MSI-X interrupts
932 *
933 * igb_request_msix allocates MSI-X vectors and requests interrupts from the
934 * kernel.
935 **/
936static int igb_request_msix(struct igb_adapter *adapter)
937{
938	struct net_device *netdev = adapter->netdev;
939	struct e1000_hw *hw = &adapter->hw;
940	int i, err = 0, vector = 0;
941
942	err = request_irq(adapter->msix_entries[vector].vector,
943	                  igb_msix_other, 0, netdev->name, adapter);
944	if (err)
945		goto out;
946	vector++;
947
948	for (i = 0; i < adapter->num_q_vectors; i++) {
949		struct igb_q_vector *q_vector = adapter->q_vector[i];
950
951		q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
952
953		if (q_vector->rx.ring && q_vector->tx.ring)
954			sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
955				q_vector->rx.ring->queue_index);
956		else if (q_vector->tx.ring)
957			sprintf(q_vector->name, "%s-tx-%u", netdev->name,
958				q_vector->tx.ring->queue_index);
959		else if (q_vector->rx.ring)
960			sprintf(q_vector->name, "%s-rx-%u", netdev->name,
961				q_vector->rx.ring->queue_index);
962		else
963			sprintf(q_vector->name, "%s-unused", netdev->name);
964
965		err = request_irq(adapter->msix_entries[vector].vector,
966		                  igb_msix_ring, 0, q_vector->name,
967		                  q_vector);
968		if (err)
969			goto out;
970		vector++;
971	}
972
973	igb_configure_msix(adapter);
974	return 0;
975out:
976	return err;
977}
978
979static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
980{
981	if (adapter->msix_entries) {
982		pci_disable_msix(adapter->pdev);
983		kfree(adapter->msix_entries);
984		adapter->msix_entries = NULL;
985	} else if (adapter->flags & IGB_FLAG_HAS_MSI) {
986		pci_disable_msi(adapter->pdev);
987	}
988}
989
990/**
991 * igb_free_q_vectors - Free memory allocated for interrupt vectors
992 * @adapter: board private structure to initialize
993 *
994 * This function frees the memory allocated to the q_vectors.  In addition if
995 * NAPI is enabled it will delete any references to the NAPI struct prior
996 * to freeing the q_vector.
997 **/
998static void igb_free_q_vectors(struct igb_adapter *adapter)
999{
1000	int v_idx;
1001
1002	for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1003		struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1004		adapter->q_vector[v_idx] = NULL;
1005		if (!q_vector)
1006			continue;
1007		netif_napi_del(&q_vector->napi);
1008		kfree(q_vector);
1009	}
1010	adapter->num_q_vectors = 0;
1011}
1012
1013/**
1014 * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
1015 *
1016 * This function resets the device so that it has 0 rx queues, tx queues, and
1017 * MSI-X interrupts allocated.
1018 */
1019static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
1020{
1021	igb_free_queues(adapter);
1022	igb_free_q_vectors(adapter);
1023	igb_reset_interrupt_capability(adapter);
1024}
1025
1026/**
1027 * igb_set_interrupt_capability - set MSI or MSI-X if supported
1028 *
1029 * Attempt to configure interrupts using the best available
1030 * capabilities of the hardware and kernel.
1031 **/
1032static int igb_set_interrupt_capability(struct igb_adapter *adapter)
1033{
1034	int err;
1035	int numvecs, i;
1036
1037	/* Number of supported queues. */
1038	adapter->num_rx_queues = adapter->rss_queues;
1039	if (adapter->vfs_allocated_count)
1040		adapter->num_tx_queues = 1;
1041	else
1042		adapter->num_tx_queues = adapter->rss_queues;
1043
1044	/* start with one vector for every rx queue */
1045	numvecs = adapter->num_rx_queues;
1046
1047	/* if tx handler is separate add 1 for every tx queue */
1048	if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
1049		numvecs += adapter->num_tx_queues;
1050
1051	/* store the number of vectors reserved for queues */
1052	adapter->num_q_vectors = numvecs;
1053
1054	/* add 1 vector for link status interrupts */
1055	numvecs++;
1056	adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
1057					GFP_KERNEL);
1058
1059	if (!adapter->msix_entries)
1060		goto msi_only;
1061
1062	for (i = 0; i < numvecs; i++)
1063		adapter->msix_entries[i].entry = i;
1064
1065	err = pci_enable_msix(adapter->pdev,
1066			      adapter->msix_entries,
1067			      numvecs);
1068	if (err == 0)
1069		goto out;
1070
1071	igb_reset_interrupt_capability(adapter);
1072
1073	/* If we can't do MSI-X, try MSI */
1074msi_only:
1075#ifdef CONFIG_PCI_IOV
1076	/* disable SR-IOV for non MSI-X configurations */
1077	if (adapter->vf_data) {
1078		struct e1000_hw *hw = &adapter->hw;
1079		/* disable iov and allow time for transactions to clear */
1080		pci_disable_sriov(adapter->pdev);
1081		msleep(500);
1082
1083		kfree(adapter->vf_data);
1084		adapter->vf_data = NULL;
1085		wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1086		wrfl();
1087		msleep(100);
1088		dev_info(&adapter->pdev->dev, "IOV Disabled\n");
1089	}
1090#endif
1091	adapter->vfs_allocated_count = 0;
1092	adapter->rss_queues = 1;
1093	adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1094	adapter->num_rx_queues = 1;
1095	adapter->num_tx_queues = 1;
1096	adapter->num_q_vectors = 1;
1097	if (!pci_enable_msi(adapter->pdev))
1098		adapter->flags |= IGB_FLAG_HAS_MSI;
1099out:
1100	/* Notify the stack of the (possibly) reduced queue counts. */
1101	rtnl_lock();
1102	netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues);
1103	err = netif_set_real_num_rx_queues(adapter->netdev,
1104		adapter->num_rx_queues);
1105	rtnl_unlock();
1106	return err;
1107}
1108
1109/**
1110 * igb_alloc_q_vectors - Allocate memory for interrupt vectors
1111 * @adapter: board private structure to initialize
1112 *
1113 * We allocate one q_vector per queue interrupt.  If allocation fails we
1114 * return -ENOMEM.
1115 **/
1116static int igb_alloc_q_vectors(struct igb_adapter *adapter)
1117{
1118	struct igb_q_vector *q_vector;
1119	struct e1000_hw *hw = &adapter->hw;
1120	int v_idx;
1121	int orig_node = adapter->node;
1122
1123	for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1124		if ((adapter->num_q_vectors == (adapter->num_rx_queues +
1125						adapter->num_tx_queues)) &&
1126		    (adapter->num_rx_queues == v_idx))
1127			adapter->node = orig_node;
1128		if (orig_node == -1) {
1129			int cur_node = next_online_node(adapter->node);
1130			if (cur_node == MAX_NUMNODES)
1131				cur_node = first_online_node;
1132			adapter->node = cur_node;
1133		}
1134		q_vector = kzalloc_node(sizeof(struct igb_q_vector), GFP_KERNEL,
1135					adapter->node);
1136		if (!q_vector)
1137			q_vector = kzalloc(sizeof(struct igb_q_vector),
1138					   GFP_KERNEL);
1139		if (!q_vector)
1140			goto err_out;
1141		q_vector->adapter = adapter;
1142		q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
1143		q_vector->itr_val = IGB_START_ITR;
1144		netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
1145		adapter->q_vector[v_idx] = q_vector;
1146	}
1147	/* Restore the adapter's original node */
1148	adapter->node = orig_node;
1149
1150	return 0;
1151
1152err_out:
1153	/* Restore the adapter's original node */
1154	adapter->node = orig_node;
1155	igb_free_q_vectors(adapter);
1156	return -ENOMEM;
1157}
1158
1159static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
1160                                      int ring_idx, int v_idx)
1161{
1162	struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1163
1164	q_vector->rx.ring = adapter->rx_ring[ring_idx];
1165	q_vector->rx.ring->q_vector = q_vector;
1166	q_vector->rx.count++;
1167	q_vector->itr_val = adapter->rx_itr_setting;
1168	if (q_vector->itr_val && q_vector->itr_val <= 3)
1169		q_vector->itr_val = IGB_START_ITR;
1170}
1171
1172static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
1173                                      int ring_idx, int v_idx)
1174{
1175	struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1176
1177	q_vector->tx.ring = adapter->tx_ring[ring_idx];
1178	q_vector->tx.ring->q_vector = q_vector;
1179	q_vector->tx.count++;
1180	q_vector->itr_val = adapter->tx_itr_setting;
1181	q_vector->tx.work_limit = adapter->tx_work_limit;
1182	if (q_vector->itr_val && q_vector->itr_val <= 3)
1183		q_vector->itr_val = IGB_START_ITR;
1184}
1185
1186/**
1187 * igb_map_ring_to_vector - maps allocated queues to vectors
1188 *
1189 * This function maps the recently allocated queues to vectors.
1190 **/
1191static int igb_map_ring_to_vector(struct igb_adapter *adapter)
1192{
1193	int i;
1194	int v_idx = 0;
1195
1196	if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
1197	    (adapter->num_q_vectors < adapter->num_tx_queues))
1198		return -ENOMEM;
1199
1200	if (adapter->num_q_vectors >=
1201	    (adapter->num_rx_queues + adapter->num_tx_queues)) {
1202		for (i = 0; i < adapter->num_rx_queues; i++)
1203			igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1204		for (i = 0; i < adapter->num_tx_queues; i++)
1205			igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1206	} else {
1207		for (i = 0; i < adapter->num_rx_queues; i++) {
1208			if (i < adapter->num_tx_queues)
1209				igb_map_tx_ring_to_vector(adapter, i, v_idx);
1210			igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1211		}
1212		for (; i < adapter->num_tx_queues; i++)
1213			igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1214	}
1215	return 0;
1216}
1217
1218/**
1219 * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
1220 *
1221 * This function initializes the interrupts and allocates all of the queues.
1222 **/
1223static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
1224{
1225	struct pci_dev *pdev = adapter->pdev;
1226	int err;
1227
1228	err = igb_set_interrupt_capability(adapter);
1229	if (err)
1230		return err;
1231
1232	err = igb_alloc_q_vectors(adapter);
1233	if (err) {
1234		dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
1235		goto err_alloc_q_vectors;
1236	}
1237
1238	err = igb_alloc_queues(adapter);
1239	if (err) {
1240		dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1241		goto err_alloc_queues;
1242	}
1243
1244	err = igb_map_ring_to_vector(adapter);
1245	if (err) {
1246		dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
1247		goto err_map_queues;
1248	}
1249
1250
1251	return 0;
1252err_map_queues:
1253	igb_free_queues(adapter);
1254err_alloc_queues:
1255	igb_free_q_vectors(adapter);
1256err_alloc_q_vectors:
1257	igb_reset_interrupt_capability(adapter);
1258	return err;
1259}
1260
1261/**
1262 * igb_request_irq - initialize interrupts
1263 *
1264 * Attempts to configure interrupts using the best available
1265 * capabilities of the hardware and kernel.
1266 **/
1267static int igb_request_irq(struct igb_adapter *adapter)
1268{
1269	struct net_device *netdev = adapter->netdev;
1270	struct pci_dev *pdev = adapter->pdev;
1271	int err = 0;
1272
1273	if (adapter->msix_entries) {
1274		err = igb_request_msix(adapter);
1275		if (!err)
1276			goto request_done;
1277		/* fall back to MSI */
1278		igb_clear_interrupt_scheme(adapter);
1279		if (!pci_enable_msi(pdev))
1280			adapter->flags |= IGB_FLAG_HAS_MSI;
1281		igb_free_all_tx_resources(adapter);
1282		igb_free_all_rx_resources(adapter);
1283		adapter->num_tx_queues = 1;
1284		adapter->num_rx_queues = 1;
1285		adapter->num_q_vectors = 1;
1286		err = igb_alloc_q_vectors(adapter);
1287		if (err) {
1288			dev_err(&pdev->dev,
1289			        "Unable to allocate memory for vectors\n");
1290			goto request_done;
1291		}
1292		err = igb_alloc_queues(adapter);
1293		if (err) {
1294			dev_err(&pdev->dev,
1295			        "Unable to allocate memory for queues\n");
1296			igb_free_q_vectors(adapter);
1297			goto request_done;
1298		}
1299		igb_setup_all_tx_resources(adapter);
1300		igb_setup_all_rx_resources(adapter);
1301	}
1302
1303	igb_assign_vector(adapter->q_vector[0], 0);
1304
1305	if (adapter->flags & IGB_FLAG_HAS_MSI) {
1306		err = request_irq(pdev->irq, igb_intr_msi, 0,
1307				  netdev->name, adapter);
1308		if (!err)
1309			goto request_done;
1310
1311		/* fall back to legacy interrupts */
1312		igb_reset_interrupt_capability(adapter);
1313		adapter->flags &= ~IGB_FLAG_HAS_MSI;
1314	}
1315
1316	err = request_irq(pdev->irq, igb_intr, IRQF_SHARED,
1317			  netdev->name, adapter);
1318
1319	if (err)
1320		dev_err(&pdev->dev, "Error %d getting interrupt\n",
1321			err);
1322
1323request_done:
1324	return err;
1325}
1326
1327static void igb_free_irq(struct igb_adapter *adapter)
1328{
1329	if (adapter->msix_entries) {
1330		int vector = 0, i;
1331
1332		free_irq(adapter->msix_entries[vector++].vector, adapter);
1333
1334		for (i = 0; i < adapter->num_q_vectors; i++)
1335			free_irq(adapter->msix_entries[vector++].vector,
1336				 adapter->q_vector[i]);
1337	} else {
1338		free_irq(adapter->pdev->irq, adapter);
1339	}
1340}
1341
1342/**
1343 * igb_irq_disable - Mask off interrupt generation on the NIC
1344 * @adapter: board private structure
1345 **/
1346static void igb_irq_disable(struct igb_adapter *adapter)
1347{
1348	struct e1000_hw *hw = &adapter->hw;
1349
1350	/*
1351	 * we need to be careful when disabling interrupts.  The VFs are also
1352	 * mapped into these registers and so clearing the bits can cause
1353	 * issues on the VF drivers so we only need to clear what we set
1354	 */
1355	if (adapter->msix_entries) {
1356		u32 regval = rd32(E1000_EIAM);
1357		wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
1358		wr32(E1000_EIMC, adapter->eims_enable_mask);
1359		regval = rd32(E1000_EIAC);
1360		wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
1361	}
1362
1363	wr32(E1000_IAM, 0);
1364	wr32(E1000_IMC, ~0);
1365	wrfl();
1366	if (adapter->msix_entries) {
1367		int i;
1368		for (i = 0; i < adapter->num_q_vectors; i++)
1369			synchronize_irq(adapter->msix_entries[i].vector);
1370	} else {
1371		synchronize_irq(adapter->pdev->irq);
1372	}
1373}
1374
1375/**
1376 * igb_irq_enable - Enable default interrupt generation settings
1377 * @adapter: board private structure
1378 **/
1379static void igb_irq_enable(struct igb_adapter *adapter)
1380{
1381	struct e1000_hw *hw = &adapter->hw;
1382
1383	if (adapter->msix_entries) {
1384		u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC | E1000_IMS_DRSTA;
1385		u32 regval = rd32(E1000_EIAC);
1386		wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
1387		regval = rd32(E1000_EIAM);
1388		wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
1389		wr32(E1000_EIMS, adapter->eims_enable_mask);
1390		if (adapter->vfs_allocated_count) {
1391			wr32(E1000_MBVFIMR, 0xFF);
1392			ims |= E1000_IMS_VMMB;
1393		}
1394		wr32(E1000_IMS, ims);
1395	} else {
1396		wr32(E1000_IMS, IMS_ENABLE_MASK |
1397				E1000_IMS_DRSTA);
1398		wr32(E1000_IAM, IMS_ENABLE_MASK |
1399				E1000_IMS_DRSTA);
1400	}
1401}
1402
1403static void igb_update_mng_vlan(struct igb_adapter *adapter)
1404{
1405	struct e1000_hw *hw = &adapter->hw;
1406	u16 vid = adapter->hw.mng_cookie.vlan_id;
1407	u16 old_vid = adapter->mng_vlan_id;
1408
1409	if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1410		/* add VID to filter table */
1411		igb_vfta_set(hw, vid, true);
1412		adapter->mng_vlan_id = vid;
1413	} else {
1414		adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1415	}
1416
1417	if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1418	    (vid != old_vid) &&
1419	    !test_bit(old_vid, adapter->active_vlans)) {
1420		/* remove VID from filter table */
1421		igb_vfta_set(hw, old_vid, false);
1422	}
1423}
1424
1425/**
1426 * igb_release_hw_control - release control of the h/w to f/w
1427 * @adapter: address of board private structure
1428 *
1429 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1430 * For ASF and Pass Through versions of f/w this means that the
1431 * driver is no longer loaded.
1432 *
1433 **/
1434static void igb_release_hw_control(struct igb_adapter *adapter)
1435{
1436	struct e1000_hw *hw = &adapter->hw;
1437	u32 ctrl_ext;
1438
1439	/* Let firmware take over control of h/w */
1440	ctrl_ext = rd32(E1000_CTRL_EXT);
1441	wr32(E1000_CTRL_EXT,
1442			ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1443}
1444
1445/**
1446 * igb_get_hw_control - get control of the h/w from f/w
1447 * @adapter: address of board private structure
1448 *
1449 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1450 * For ASF and Pass Through versions of f/w this means that
1451 * the driver is loaded.
1452 *
1453 **/
1454static void igb_get_hw_control(struct igb_adapter *adapter)
1455{
1456	struct e1000_hw *hw = &adapter->hw;
1457	u32 ctrl_ext;
1458
1459	/* Let firmware know the driver has taken over */
1460	ctrl_ext = rd32(E1000_CTRL_EXT);
1461	wr32(E1000_CTRL_EXT,
1462			ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1463}
1464
1465/**
1466 * igb_configure - configure the hardware for RX and TX
1467 * @adapter: private board structure
1468 **/
1469static void igb_configure(struct igb_adapter *adapter)
1470{
1471	struct net_device *netdev = adapter->netdev;
1472	int i;
1473
1474	igb_get_hw_control(adapter);
1475	igb_set_rx_mode(netdev);
1476
1477	igb_restore_vlan(adapter);
1478
1479	igb_setup_tctl(adapter);
1480	igb_setup_mrqc(adapter);
1481	igb_setup_rctl(adapter);
1482
1483	igb_configure_tx(adapter);
1484	igb_configure_rx(adapter);
1485
1486	igb_rx_fifo_flush_82575(&adapter->hw);
1487
1488	/* call igb_desc_unused which always leaves
1489	 * at least 1 descriptor unused to make sure
1490	 * next_to_use != next_to_clean */
1491	for (i = 0; i < adapter->num_rx_queues; i++) {
1492		struct igb_ring *ring = adapter->rx_ring[i];
1493		igb_alloc_rx_buffers(ring, igb_desc_unused(ring));
1494	}
1495}
1496
1497/**
1498 * igb_power_up_link - Power up the phy/serdes link
1499 * @adapter: address of board private structure
1500 **/
1501void igb_power_up_link(struct igb_adapter *adapter)
1502{
1503	igb_reset_phy(&adapter->hw);
1504
1505	if (adapter->hw.phy.media_type == e1000_media_type_copper)
1506		igb_power_up_phy_copper(&adapter->hw);
1507	else
1508		igb_power_up_serdes_link_82575(&adapter->hw);
1509}
1510
1511/**
1512 * igb_power_down_link - Power down the phy/serdes link
1513 * @adapter: address of board private structure
1514 */
1515static void igb_power_down_link(struct igb_adapter *adapter)
1516{
1517	if (adapter->hw.phy.media_type == e1000_media_type_copper)
1518		igb_power_down_phy_copper_82575(&adapter->hw);
1519	else
1520		igb_shutdown_serdes_link_82575(&adapter->hw);
1521}
1522
1523/**
1524 * igb_up - Open the interface and prepare it to handle traffic
1525 * @adapter: board private structure
1526 **/
1527int igb_up(struct igb_adapter *adapter)
1528{
1529	struct e1000_hw *hw = &adapter->hw;
1530	int i;
1531
1532	/* hardware has been reset, we need to reload some things */
1533	igb_configure(adapter);
1534
1535	clear_bit(__IGB_DOWN, &adapter->state);
1536
1537	for (i = 0; i < adapter->num_q_vectors; i++)
1538		napi_enable(&(adapter->q_vector[i]->napi));
1539
1540	if (adapter->msix_entries)
1541		igb_configure_msix(adapter);
1542	else
1543		igb_assign_vector(adapter->q_vector[0], 0);
1544
1545	/* Clear any pending interrupts. */
1546	rd32(E1000_ICR);
1547	igb_irq_enable(adapter);
1548
1549	/* notify VFs that reset has been completed */
1550	if (adapter->vfs_allocated_count) {
1551		u32 reg_data = rd32(E1000_CTRL_EXT);
1552		reg_data |= E1000_CTRL_EXT_PFRSTD;
1553		wr32(E1000_CTRL_EXT, reg_data);
1554	}
1555
1556	netif_tx_start_all_queues(adapter->netdev);
1557
1558	/* start the watchdog. */
1559	hw->mac.get_link_status = 1;
1560	schedule_work(&adapter->watchdog_task);
1561
1562	return 0;
1563}
1564
1565void igb_down(struct igb_adapter *adapter)
1566{
1567	struct net_device *netdev = adapter->netdev;
1568	struct e1000_hw *hw = &adapter->hw;
1569	u32 tctl, rctl;
1570	int i;
1571
1572	/* signal that we're down so the interrupt handler does not
1573	 * reschedule our watchdog timer */
1574	set_bit(__IGB_DOWN, &adapter->state);
1575
1576	/* disable receives in the hardware */
1577	rctl = rd32(E1000_RCTL);
1578	wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1579	/* flush and sleep below */
1580
1581	netif_tx_stop_all_queues(netdev);
1582
1583	/* disable transmits in the hardware */
1584	tctl = rd32(E1000_TCTL);
1585	tctl &= ~E1000_TCTL_EN;
1586	wr32(E1000_TCTL, tctl);
1587	/* flush both disables and wait for them to finish */
1588	wrfl();
1589	msleep(10);
1590
1591	for (i = 0; i < adapter->num_q_vectors; i++)
1592		napi_disable(&(adapter->q_vector[i]->napi));
1593
1594	igb_irq_disable(adapter);
1595
1596	del_timer_sync(&adapter->watchdog_timer);
1597	del_timer_sync(&adapter->phy_info_timer);
1598
1599	netif_carrier_off(netdev);
1600
1601	/* record the stats before reset*/
1602	spin_lock(&adapter->stats64_lock);
1603	igb_update_stats(adapter, &adapter->stats64);
1604	spin_unlock(&adapter->stats64_lock);
1605
1606	adapter->link_speed = 0;
1607	adapter->link_duplex = 0;
1608
1609	if (!pci_channel_offline(adapter->pdev))
1610		igb_reset(adapter);
1611	igb_clean_all_tx_rings(adapter);
1612	igb_clean_all_rx_rings(adapter);
1613#ifdef CONFIG_IGB_DCA
1614
1615	/* since we reset the hardware DCA settings were cleared */
1616	igb_setup_dca(adapter);
1617#endif
1618}
1619
1620void igb_reinit_locked(struct igb_adapter *adapter)
1621{
1622	WARN_ON(in_interrupt());
1623	while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1624		msleep(1);
1625	igb_down(adapter);
1626	igb_up(adapter);
1627	clear_bit(__IGB_RESETTING, &adapter->state);
1628}
1629
1630void igb_reset(struct igb_adapter *adapter)
1631{
1632	struct pci_dev *pdev = adapter->pdev;
1633	struct e1000_hw *hw = &adapter->hw;
1634	struct e1000_mac_info *mac = &hw->mac;
1635	struct e1000_fc_info *fc = &hw->fc;
1636	u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1637	u16 hwm;
1638
1639	/* Repartition Pba for greater than 9k mtu
1640	 * To take effect CTRL.RST is required.
1641	 */
1642	switch (mac->type) {
1643	case e1000_i350:
1644	case e1000_82580:
1645		pba = rd32(E1000_RXPBS);
1646		pba = igb_rxpbs_adjust_82580(pba);
1647		break;
1648	case e1000_82576:
1649		pba = rd32(E1000_RXPBS);
1650		pba &= E1000_RXPBS_SIZE_MASK_82576;
1651		break;
1652	case e1000_82575:
1653	case e1000_i210:
1654	case e1000_i211:
1655	default:
1656		pba = E1000_PBA_34K;
1657		break;
1658	}
1659
1660	if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1661	    (mac->type < e1000_82576)) {
1662		/* adjust PBA for jumbo frames */
1663		wr32(E1000_PBA, pba);
1664
1665		/* To maintain wire speed transmits, the Tx FIFO should be
1666		 * large enough to accommodate two full transmit packets,
1667		 * rounded up to the next 1KB and expressed in KB.  Likewise,
1668		 * the Rx FIFO should be large enough to accommodate at least
1669		 * one full receive packet and is similarly rounded up and
1670		 * expressed in KB. */
1671		pba = rd32(E1000_PBA);
1672		/* upper 16 bits has Tx packet buffer allocation size in KB */
1673		tx_space = pba >> 16;
1674		/* lower 16 bits has Rx packet buffer allocation size in KB */
1675		pba &= 0xffff;
1676		/* the tx fifo also stores 16 bytes of information about the tx
1677		 * but don't include ethernet FCS because hardware appends it */
1678		min_tx_space = (adapter->max_frame_size +
1679				sizeof(union e1000_adv_tx_desc) -
1680				ETH_FCS_LEN) * 2;
1681		min_tx_space = ALIGN(min_tx_space, 1024);
1682		min_tx_space >>= 10;
1683		/* software strips receive CRC, so leave room for it */
1684		min_rx_space = adapter->max_frame_size;
1685		min_rx_space = ALIGN(min_rx_space, 1024);
1686		min_rx_space >>= 10;
1687
1688		/* If current Tx allocation is less than the min Tx FIFO size,
1689		 * and the min Tx FIFO size is less than the current Rx FIFO
1690		 * allocation, take space away from current Rx allocation */
1691		if (tx_space < min_tx_space &&
1692		    ((min_tx_space - tx_space) < pba)) {
1693			pba = pba - (min_tx_space - tx_space);
1694
1695			/* if short on rx space, rx wins and must trump tx
1696			 * adjustment */
1697			if (pba < min_rx_space)
1698				pba = min_rx_space;
1699		}
1700		wr32(E1000_PBA, pba);
1701	}
1702
1703	/* flow control settings */
1704	/* The high water mark must be low enough to fit one full frame
1705	 * (or the size used for early receive) above it in the Rx FIFO.
1706	 * Set it to the lower of:
1707	 * - 90% of the Rx FIFO size, or
1708	 * - the full Rx FIFO size minus one full frame */
1709	hwm = min(((pba << 10) * 9 / 10),
1710			((pba << 10) - 2 * adapter->max_frame_size));
1711
1712	fc->high_water = hwm & 0xFFF0;	/* 16-byte granularity */
1713	fc->low_water = fc->high_water - 16;
1714	fc->pause_time = 0xFFFF;
1715	fc->send_xon = 1;
1716	fc->current_mode = fc->requested_mode;
1717
1718	/* disable receive for all VFs and wait one second */
1719	if (adapter->vfs_allocated_count) {
1720		int i;
1721		for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1722			adapter->vf_data[i].flags &= IGB_VF_FLAG_PF_SET_MAC;
1723
1724		/* ping all the active vfs to let them know we are going down */
1725		igb_ping_all_vfs(adapter);
1726
1727		/* disable transmits and receives */
1728		wr32(E1000_VFRE, 0);
1729		wr32(E1000_VFTE, 0);
1730	}
1731
1732	/* Allow time for pending master requests to run */
1733	hw->mac.ops.reset_hw(hw);
1734	wr32(E1000_WUC, 0);
1735
1736	if (hw->mac.ops.init_hw(hw))
1737		dev_err(&pdev->dev, "Hardware Error\n");
1738
1739	/*
1740	 * Flow control settings reset on hardware reset, so guarantee flow
1741	 * control is off when forcing speed.
1742	 */
1743	if (!hw->mac.autoneg)
1744		igb_force_mac_fc(hw);
1745
1746	igb_init_dmac(adapter, pba);
1747	if (!netif_running(adapter->netdev))
1748		igb_power_down_link(adapter);
1749
1750	igb_update_mng_vlan(adapter);
1751
1752	/* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1753	wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1754
1755	igb_get_phy_info(hw);
1756}
1757
1758static netdev_features_t igb_fix_features(struct net_device *netdev,
1759	netdev_features_t features)
1760{
1761	/*
1762	 * Since there is no support for separate rx/tx vlan accel
1763	 * enable/disable make sure tx flag is always in same state as rx.
1764	 */
1765	if (features & NETIF_F_HW_VLAN_RX)
1766		features |= NETIF_F_HW_VLAN_TX;
1767	else
1768		features &= ~NETIF_F_HW_VLAN_TX;
1769
1770	return features;
1771}
1772
1773static int igb_set_features(struct net_device *netdev,
1774	netdev_features_t features)
1775{
1776	netdev_features_t changed = netdev->features ^ features;
1777	struct igb_adapter *adapter = netdev_priv(netdev);
1778
1779	if (changed & NETIF_F_HW_VLAN_RX)
1780		igb_vlan_mode(netdev, features);
1781
1782	if (!(changed & NETIF_F_RXALL))
1783		return 0;
1784
1785	netdev->features = features;
1786
1787	if (netif_running(netdev))
1788		igb_reinit_locked(adapter);
1789	else
1790		igb_reset(adapter);
1791
1792	return 0;
1793}
1794
1795static const struct net_device_ops igb_netdev_ops = {
1796	.ndo_open		= igb_open,
1797	.ndo_stop		= igb_close,
1798	.ndo_start_xmit		= igb_xmit_frame,
1799	.ndo_get_stats64	= igb_get_stats64,
1800	.ndo_set_rx_mode	= igb_set_rx_mode,
1801	.ndo_set_mac_address	= igb_set_mac,
1802	.ndo_change_mtu		= igb_change_mtu,
1803	.ndo_do_ioctl		= igb_ioctl,
1804	.ndo_tx_timeout		= igb_tx_timeout,
1805	.ndo_validate_addr	= eth_validate_addr,
1806	.ndo_vlan_rx_add_vid	= igb_vlan_rx_add_vid,
1807	.ndo_vlan_rx_kill_vid	= igb_vlan_rx_kill_vid,
1808	.ndo_set_vf_mac		= igb_ndo_set_vf_mac,
1809	.ndo_set_vf_vlan	= igb_ndo_set_vf_vlan,
1810	.ndo_set_vf_tx_rate	= igb_ndo_set_vf_bw,
1811	.ndo_get_vf_config	= igb_ndo_get_vf_config,
1812#ifdef CONFIG_NET_POLL_CONTROLLER
1813	.ndo_poll_controller	= igb_netpoll,
1814#endif
1815	.ndo_fix_features	= igb_fix_features,
1816	.ndo_set_features	= igb_set_features,
1817};
1818
1819/**
1820 * igb_set_fw_version - Configure version string for ethtool
1821 * @adapter: adapter struct
1822 *
1823 **/
1824void igb_set_fw_version(struct igb_adapter *adapter)
1825{
1826	struct e1000_hw *hw = &adapter->hw;
1827	u16 eeprom_verh, eeprom_verl, comb_verh, comb_verl, comb_offset;
1828	u16 major, build, patch, fw_version;
1829	u32 etrack_id;
1830
1831	hw->nvm.ops.read(hw, 5, 1, &fw_version);
1832	if (adapter->hw.mac.type != e1000_i211) {
1833		hw->nvm.ops.read(hw, NVM_ETRACK_WORD, 1, &eeprom_verh);
1834		hw->nvm.ops.read(hw, (NVM_ETRACK_WORD + 1), 1, &eeprom_verl);
1835		etrack_id = (eeprom_verh << IGB_ETRACK_SHIFT) | eeprom_verl;
1836
1837		/* combo image version needs to be found */
1838		hw->nvm.ops.read(hw, NVM_COMB_VER_PTR, 1, &comb_offset);
1839		if ((comb_offset != 0x0) &&
1840		    (comb_offset != IGB_NVM_VER_INVALID)) {
1841			hw->nvm.ops.read(hw, (NVM_COMB_VER_OFF + comb_offset
1842					 + 1), 1, &comb_verh);
1843			hw->nvm.ops.read(hw, (NVM_COMB_VER_OFF + comb_offset),
1844					 1, &comb_verl);
1845
1846			/* Only display Option Rom if it exists and is valid */
1847			if ((comb_verh && comb_verl) &&
1848			    ((comb_verh != IGB_NVM_VER_INVALID) &&
1849			     (comb_verl != IGB_NVM_VER_INVALID))) {
1850				major = comb_verl >> IGB_COMB_VER_SHFT;
1851				build = (comb_verl << IGB_COMB_VER_SHFT) |
1852					(comb_verh >> IGB_COMB_VER_SHFT);
1853				patch = comb_verh & IGB_COMB_VER_MASK;
1854				snprintf(adapter->fw_version,
1855					 sizeof(adapter->fw_version),
1856					 "%d.%d%d, 0x%08x, %d.%d.%d",
1857					 (fw_version & IGB_MAJOR_MASK) >>
1858					 IGB_MAJOR_SHIFT,
1859					 (fw_version & IGB_MINOR_MASK) >>
1860					 IGB_MINOR_SHIFT,
1861					 (fw_version & IGB_BUILD_MASK),
1862					 etrack_id, major, build, patch);
1863				goto out;
1864			}
1865		}
1866		snprintf(adapter->fw_version, sizeof(adapter->fw_version),
1867			 "%d.%d%d, 0x%08x",
1868			 (fw_version & IGB_MAJOR_MASK) >> IGB_MAJOR_SHIFT,
1869			 (fw_version & IGB_MINOR_MASK) >> IGB_MINOR_SHIFT,
1870			 (fw_version & IGB_BUILD_MASK), etrack_id);
1871	} else {
1872		snprintf(adapter->fw_version, sizeof(adapter->fw_version),
1873			 "%d.%d%d",
1874			 (fw_version & IGB_MAJOR_MASK) >> IGB_MAJOR_SHIFT,
1875			 (fw_version & IGB_MINOR_MASK) >> IGB_MINOR_SHIFT,
1876			 (fw_version & IGB_BUILD_MASK));
1877	}
1878out:
1879	return;
1880}
1881
1882/**
1883 * igb_probe - Device Initialization Routine
1884 * @pdev: PCI device information struct
1885 * @ent: entry in igb_pci_tbl
1886 *
1887 * Returns 0 on success, negative on failure
1888 *
1889 * igb_probe initializes an adapter identified by a pci_dev structure.
1890 * The OS initialization, configuring of the adapter private structure,
1891 * and a hardware reset occur.
1892 **/
1893static int __devinit igb_probe(struct pci_dev *pdev,
1894			       const struct pci_device_id *ent)
1895{
1896	struct net_device *netdev;
1897	struct igb_adapter *adapter;
1898	struct e1000_hw *hw;
1899	u16 eeprom_data = 0;
1900	s32 ret_val;
1901	static int global_quad_port_a; /* global quad port a indication */
1902	const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1903	unsigned long mmio_start, mmio_len;
1904	int err, pci_using_dac;
1905	u16 eeprom_apme_mask = IGB_EEPROM_APME;
1906	u8 part_str[E1000_PBANUM_LENGTH];
1907
1908	/* Catch broken hardware that put the wrong VF device ID in
1909	 * the PCIe SR-IOV capability.
1910	 */
1911	if (pdev->is_virtfn) {
1912		WARN(1, KERN_ERR "%s (%hx:%hx) should not be a VF!\n",
1913			pci_name(pdev), pdev->vendor, pdev->device);
1914		return -EINVAL;
1915	}
1916
1917	err = pci_enable_device_mem(pdev);
1918	if (err)
1919		return err;
1920
1921	pci_using_dac = 0;
1922	err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
1923	if (!err) {
1924		err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
1925		if (!err)
1926			pci_using_dac = 1;
1927	} else {
1928		err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
1929		if (err) {
1930			err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
1931			if (err) {
1932				dev_err(&pdev->dev, "No usable DMA "
1933					"configuration, aborting\n");
1934				goto err_dma;
1935			}
1936		}
1937	}
1938
1939	err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1940	                                   IORESOURCE_MEM),
1941	                                   igb_driver_name);
1942	if (err)
1943		goto err_pci_reg;
1944
1945	pci_enable_pcie_error_reporting(pdev);
1946
1947	pci_set_master(pdev);
1948	pci_save_state(pdev);
1949
1950	err = -ENOMEM;
1951	netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1952				   IGB_MAX_TX_QUEUES);
1953	if (!netdev)
1954		goto err_alloc_etherdev;
1955
1956	SET_NETDEV_DEV(netdev, &pdev->dev);
1957
1958	pci_set_drvdata(pdev, netdev);
1959	adapter = netdev_priv(netdev);
1960	adapter->netdev = netdev;
1961	adapter->pdev = pdev;
1962	hw = &adapter->hw;
1963	hw->back = adapter;
1964	adapter->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE);
1965
1966	mmio_start = pci_resource_start(pdev, 0);
1967	mmio_len = pci_resource_len(pdev, 0);
1968
1969	err = -EIO;
1970	hw->hw_addr = ioremap(mmio_start, mmio_len);
1971	if (!hw->hw_addr)
1972		goto err_ioremap;
1973
1974	netdev->netdev_ops = &igb_netdev_ops;
1975	igb_set_ethtool_ops(netdev);
1976	netdev->watchdog_timeo = 5 * HZ;
1977
1978	strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1979
1980	netdev->mem_start = mmio_start;
1981	netdev->mem_end = mmio_start + mmio_len;
1982
1983	/* PCI config space info */
1984	hw->vendor_id = pdev->vendor;
1985	hw->device_id = pdev->device;
1986	hw->revision_id = pdev->revision;
1987	hw->subsystem_vendor_id = pdev->subsystem_vendor;
1988	hw->subsystem_device_id = pdev->subsystem_device;
1989
1990	/* Copy the default MAC, PHY and NVM function pointers */
1991	memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1992	memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1993	memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1994	/* Initialize skew-specific constants */
1995	err = ei->get_invariants(hw);
1996	if (err)
1997		goto err_sw_init;
1998
1999	/* setup the private structure */
2000	err = igb_sw_init(adapter);
2001	if (err)
2002		goto err_sw_init;
2003
2004	igb_get_bus_info_pcie(hw);
2005
2006	hw->phy.autoneg_wait_to_complete = false;
2007
2008	/* Copper options */
2009	if (hw->phy.media_type == e1000_media_type_copper) {
2010		hw->phy.mdix = AUTO_ALL_MODES;
2011		hw->phy.disable_polarity_correction = false;
2012		hw->phy.ms_type = e1000_ms_hw_default;
2013	}
2014
2015	if (igb_check_reset_block(hw))
2016		dev_info(&pdev->dev,
2017			"PHY reset is blocked due to SOL/IDER session.\n");
2018
2019	/*
2020	 * features is initialized to 0 in allocation, it might have bits
2021	 * set by igb_sw_init so we should use an or instead of an
2022	 * assignment.
2023	 */
2024	netdev->features |= NETIF_F_SG |
2025			    NETIF_F_IP_CSUM |
2026			    NETIF_F_IPV6_CSUM |
2027			    NETIF_F_TSO |
2028			    NETIF_F_TSO6 |
2029			    NETIF_F_RXHASH |
2030			    NETIF_F_RXCSUM |
2031			    NETIF_F_HW_VLAN_RX |
2032			    NETIF_F_HW_VLAN_TX;
2033
2034	/* copy netdev features into list of user selectable features */
2035	netdev->hw_features |= netdev->features;
2036	netdev->hw_features |= NETIF_F_RXALL;
2037
2038	/* set this bit last since it cannot be part of hw_features */
2039	netdev->features |= NETIF_F_HW_VLAN_FILTER;
2040
2041	netdev->vlan_features |= NETIF_F_TSO |
2042				 NETIF_F_TSO6 |
2043				 NETIF_F_IP_CSUM |
2044				 NETIF_F_IPV6_CSUM |
2045				 NETIF_F_SG;
2046
2047	netdev->priv_flags |= IFF_SUPP_NOFCS;
2048
2049	if (pci_using_dac) {
2050		netdev->features |= NETIF_F_HIGHDMA;
2051		netdev->vlan_features |= NETIF_F_HIGHDMA;
2052	}
2053
2054	if (hw->mac.type >= e1000_82576) {
2055		netdev->hw_features |= NETIF_F_SCTP_CSUM;
2056		netdev->features |= NETIF_F_SCTP_CSUM;
2057	}
2058
2059	netdev->priv_flags |= IFF_UNICAST_FLT;
2060
2061	adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
2062
2063	/* before reading the NVM, reset the controller to put the device in a
2064	 * known good starting state */
2065	hw->mac.ops.reset_hw(hw);
2066
2067	/*
2068	 * make sure the NVM is good , i211 parts have special NVM that
2069	 * doesn't contain a checksum
2070	 */
2071	if (hw->mac.type != e1000_i211) {
2072		if (hw->nvm.ops.validate(hw) < 0) {
2073			dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
2074			err = -EIO;
2075			goto err_eeprom;
2076		}
2077	}
2078
2079	/* copy the MAC address out of the NVM */
2080	if (hw->mac.ops.read_mac_addr(hw))
2081		dev_err(&pdev->dev, "NVM Read Error\n");
2082
2083	memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
2084	memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
2085
2086	if (!is_valid_ether_addr(netdev->perm_addr)) {
2087		dev_err(&pdev->dev, "Invalid MAC Address\n");
2088		err = -EIO;
2089		goto err_eeprom;
2090	}
2091
2092	/* get firmware version for ethtool -i */
2093	igb_set_fw_version(adapter);
2094
2095	setup_timer(&adapter->watchdog_timer, igb_watchdog,
2096	            (unsigned long) adapter);
2097	setup_timer(&adapter->phy_info_timer, igb_update_phy_info,
2098	            (unsigned long) adapter);
2099
2100	INIT_WORK(&adapter->reset_task, igb_reset_task);
2101	INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
2102
2103	/* Initialize link properties that are user-changeable */
2104	adapter->fc_autoneg = true;
2105	hw->mac.autoneg = true;
2106	hw->phy.autoneg_advertised = 0x2f;
2107
2108	hw->fc.requested_mode = e1000_fc_default;
2109	hw->fc.current_mode = e1000_fc_default;
2110
2111	igb_validate_mdi_setting(hw);
2112
2113	/* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
2114	 * enable the ACPI Magic Packet filter
2115	 */
2116
2117	if (hw->bus.func == 0)
2118		hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
2119	else if (hw->mac.type >= e1000_82580)
2120		hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
2121		                 NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
2122		                 &eeprom_data);
2123	else if (hw->bus.func == 1)
2124		hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
2125
2126	if (eeprom_data & eeprom_apme_mask)
2127		adapter->eeprom_wol |= E1000_WUFC_MAG;
2128
2129	/* now that we have the eeprom settings, apply the special cases where
2130	 * the eeprom may be wrong or the board simply won't support wake on
2131	 * lan on a particular port */
2132	switch (pdev->device) {
2133	case E1000_DEV_ID_82575GB_QUAD_COPPER:
2134		adapter->eeprom_wol = 0;
2135		break;
2136	case E1000_DEV_ID_82575EB_FIBER_SERDES:
2137	case E1000_DEV_ID_82576_FIBER:
2138	case E1000_DEV_ID_82576_SERDES:
2139		/* Wake events only supported on port A for dual fiber
2140		 * regardless of eeprom setting */
2141		if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
2142			adapter->eeprom_wol = 0;
2143		break;
2144	case E1000_DEV_ID_82576_QUAD_COPPER:
2145	case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
2146		/* if quad port adapter, disable WoL on all but port A */
2147		if (global_quad_port_a != 0)
2148			adapter->eeprom_wol = 0;
2149		else
2150			adapter->flags |= IGB_FLAG_QUAD_PORT_A;
2151		/* Reset for multiple quad port adapters */
2152		if (++global_quad_port_a == 4)
2153			global_quad_port_a = 0;
2154		break;
2155	}
2156
2157	/* initialize the wol settings based on the eeprom settings */
2158	adapter->wol = adapter->eeprom_wol;
2159	device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
2160
2161	/* reset the hardware with the new settings */
2162	igb_reset(adapter);
2163
2164	/* let the f/w know that the h/w is now under the control of the
2165	 * driver. */
2166	igb_get_hw_control(adapter);
2167
2168	strcpy(netdev->name, "eth%d");
2169	err = register_netdev(netdev);
2170	if (err)
2171		goto err_register;
2172
2173	/* carrier off reporting is important to ethtool even BEFORE open */
2174	netif_carrier_off(netdev);
2175
2176#ifdef CONFIG_IGB_DCA
2177	if (dca_add_requester(&pdev->dev) == 0) {
2178		adapter->flags |= IGB_FLAG_DCA_ENABLED;
2179		dev_info(&pdev->dev, "DCA enabled\n");
2180		igb_setup_dca(adapter);
2181	}
2182
2183#endif
2184#ifdef CONFIG_IGB_PTP
2185	/* do hw tstamp init after resetting */
2186	igb_ptp_init(adapter);
2187
2188#endif
2189	dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
2190	/* print bus type/speed/width info */
2191	dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
2192		 netdev->name,
2193		 ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
2194		  (hw->bus.speed == e1000_bus_speed_5000) ? "5.0Gb/s" :
2195		                                            "unknown"),
2196		 ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
2197		  (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
2198		  (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
2199		   "unknown"),
2200		 netdev->dev_addr);
2201
2202	ret_val = igb_read_part_string(hw, part_str, E1000_PBANUM_LENGTH);
2203	if (ret_val)
2204		strcpy(part_str, "Unknown");
2205	dev_info(&pdev->dev, "%s: PBA No: %s\n", netdev->name, part_str);
2206	dev_info(&pdev->dev,
2207		"Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
2208		adapter->msix_entries ? "MSI-X" :
2209		(adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
2210		adapter->num_rx_queues, adapter->num_tx_queues);
2211	switch (hw->mac.type) {
2212	case e1000_i350:
2213	case e1000_i210:
2214	case e1000_i211:
2215		igb_set_eee_i350(hw);
2216		break;
2217	default:
2218		break;
2219	}
2220
2221	pm_runtime_put_noidle(&pdev->dev);
2222	return 0;
2223
2224err_register:
2225	igb_release_hw_control(adapter);
2226err_eeprom:
2227	if (!igb_check_reset_block(hw))
2228		igb_reset_phy(hw);
2229
2230	if (hw->flash_address)
2231		iounmap(hw->flash_address);
2232err_sw_init:
2233	igb_clear_interrupt_scheme(adapter);
2234	iounmap(hw->hw_addr);
2235err_ioremap:
2236	free_netdev(netdev);
2237err_alloc_etherdev:
2238	pci_release_selected_regions(pdev,
2239	                             pci_select_bars(pdev, IORESOURCE_MEM));
2240err_pci_reg:
2241err_dma:
2242	pci_disable_device(pdev);
2243	return err;
2244}
2245
2246/**
2247 * igb_remove - Device Removal Routine
2248 * @pdev: PCI device information struct
2249 *
2250 * igb_remove is called by the PCI subsystem to alert the driver
2251 * that it should release a PCI device.  The could be caused by a
2252 * Hot-Plug event, or because the driver is going to be removed from
2253 * memory.
2254 **/
2255static void __devexit igb_remove(struct pci_dev *pdev)
2256{
2257	struct net_device *netdev = pci_get_drvdata(pdev);
2258	struct igb_adapter *adapter = netdev_priv(netdev);
2259	struct e1000_hw *hw = &adapter->hw;
2260
2261	pm_runtime_get_noresume(&pdev->dev);
2262#ifdef CONFIG_IGB_PTP
2263	igb_ptp_remove(adapter);
2264
2265#endif
2266	/*
2267	 * The watchdog timer may be rescheduled, so explicitly
2268	 * disable watchdog from being rescheduled.
2269	 */
2270	set_bit(__IGB_DOWN, &adapter->state);
2271	del_timer_sync(&adapter->watchdog_timer);
2272	del_timer_sync(&adapter->phy_info_timer);
2273
2274	cancel_work_sync(&adapter->reset_task);
2275	cancel_work_sync(&adapter->watchdog_task);
2276
2277#ifdef CONFIG_IGB_DCA
2278	if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
2279		dev_info(&pdev->dev, "DCA disabled\n");
2280		dca_remove_requester(&pdev->dev);
2281		adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
2282		wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
2283	}
2284#endif
2285
2286	/* Release control of h/w to f/w.  If f/w is AMT enabled, this
2287	 * would have already happened in close and is redundant. */
2288	igb_release_hw_control(adapter);
2289
2290	unregister_netdev(netdev);
2291
2292	igb_clear_interrupt_scheme(adapter);
2293
2294#ifdef CONFIG_PCI_IOV
2295	/* reclaim resources allocated to VFs */
2296	if (adapter->vf_data) {
2297		/* disable iov and allow time for transactions to clear */
2298		if (!igb_check_vf_assignment(adapter)) {
2299			pci_disable_sriov(pdev);
2300			msleep(500);
2301		} else {
2302			dev_info(&pdev->dev, "VF(s) assigned to guests!\n");
2303		}
2304
2305		kfree(adapter->vf_data);
2306		adapter->vf_data = NULL;
2307		wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
2308		wrfl();
2309		msleep(100);
2310		dev_info(&pdev->dev, "IOV Disabled\n");
2311	}
2312#endif
2313
2314	iounmap(hw->hw_addr);
2315	if (hw->flash_address)
2316		iounmap(hw->flash_address);
2317	pci_release_selected_regions(pdev,
2318	                             pci_select_bars(pdev, IORESOURCE_MEM));
2319
2320	kfree(adapter->shadow_vfta);
2321	free_netdev(netdev);
2322
2323	pci_disable_pcie_error_reporting(pdev);
2324
2325	pci_disable_device(pdev);
2326}
2327
2328/**
2329 * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
2330 * @adapter: board private structure to initialize
2331 *
2332 * This function initializes the vf specific data storage and then attempts to
2333 * allocate the VFs.  The reason for ordering it this way is because it is much
2334 * mor expensive time wise to disable SR-IOV than it is to allocate and free
2335 * the memory for the VFs.
2336 **/
2337static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
2338{
2339#ifdef CONFIG_PCI_IOV
2340	struct pci_dev *pdev = adapter->pdev;
2341	struct e1000_hw *hw = &adapter->hw;
2342	int old_vfs = igb_find_enabled_vfs(adapter);
2343	int i;
2344
2345	/* Virtualization features not supported on i210 family. */
2346	if ((hw->mac.type == e1000_i210) || (hw->mac.type == e1000_i211))
2347		return;
2348
2349	if (old_vfs) {
2350		dev_info(&pdev->dev, "%d pre-allocated VFs found - override "
2351			 "max_vfs setting of %d\n", old_vfs, max_vfs);
2352		adapter->vfs_allocated_count = old_vfs;
2353	}
2354
2355	if (!adapter->vfs_allocated_count)
2356		return;
2357
2358	adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
2359				sizeof(struct vf_data_storage), GFP_KERNEL);
2360
2361	/* if allocation failed then we do not support SR-IOV */
2362	if (!adapter->vf_data) {
2363		adapter->vfs_allocated_count = 0;
2364		dev_err(&pdev->dev, "Unable to allocate memory for VF "
2365			"Data Storage\n");
2366		goto out;
2367	}
2368
2369	if (!old_vfs) {
2370		if (pci_enable_sriov(pdev, adapter->vfs_allocated_count))
2371			goto err_out;
2372	}
2373	dev_info(&pdev->dev, "%d VFs allocated\n",
2374		 adapter->vfs_allocated_count);
2375	for (i = 0; i < adapter->vfs_allocated_count; i++)
2376		igb_vf_configure(adapter, i);
2377
2378	/* DMA Coalescing is not supported in IOV mode. */
2379	adapter->flags &= ~IGB_FLAG_DMAC;
2380	goto out;
2381err_out:
2382	kfree(adapter->vf_data);
2383	adapter->vf_data = NULL;
2384	adapter->vfs_allocated_count = 0;
2385out:
2386	return;
2387#endif /* CONFIG_PCI_IOV */
2388}
2389
2390/**
2391 * igb_sw_init - Initialize general software structures (struct igb_adapter)
2392 * @adapter: board private structure to initialize
2393 *
2394 * igb_sw_init initializes the Adapter private data structure.
2395 * Fields are initialized based on PCI device information and
2396 * OS network device settings (MTU size).
2397 **/
2398static int __devinit igb_sw_init(struct igb_adapter *adapter)
2399{
2400	struct e1000_hw *hw = &adapter->hw;
2401	struct net_device *netdev = adapter->netdev;
2402	struct pci_dev *pdev = adapter->pdev;
2403	u32 max_rss_queues;
2404
2405	pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
2406
2407	/* set default ring sizes */
2408	adapter->tx_ring_count = IGB_DEFAULT_TXD;
2409	adapter->rx_ring_count = IGB_DEFAULT_RXD;
2410
2411	/* set default ITR values */
2412	adapter->rx_itr_setting = IGB_DEFAULT_ITR;
2413	adapter->tx_itr_setting = IGB_DEFAULT_ITR;
2414
2415	/* set default work limits */
2416	adapter->tx_work_limit = IGB_DEFAULT_TX_WORK;
2417
2418	adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN +
2419				  VLAN_HLEN;
2420	adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
2421
2422	adapter->node = -1;
2423
2424	spin_lock_init(&adapter->stats64_lock);
2425#ifdef CONFIG_PCI_IOV
2426	switch (hw->mac.type) {
2427	case e1000_82576:
2428	case e1000_i350:
2429		if (max_vfs > 7) {
2430			dev_warn(&pdev->dev,
2431				 "Maximum of 7 VFs per PF, using max\n");
2432			adapter->vfs_allocated_count = 7;
2433		} else
2434			adapter->vfs_allocated_count = max_vfs;
2435		break;
2436	default:
2437		break;
2438	}
2439#endif /* CONFIG_PCI_IOV */
2440
2441	/* Determine the maximum number of RSS queues supported. */
2442	switch (hw->mac.type) {
2443	case e1000_i211:
2444		max_rss_queues = IGB_MAX_RX_QUEUES_I211;
2445		break;
2446	case e1000_82575:
2447	case e1000_i210:
2448		max_rss_queues = IGB_MAX_RX_QUEUES_82575;
2449		break;
2450	case e1000_i350:
2451		/* I350 cannot do RSS and SR-IOV at the same time */
2452		if (!!adapter->vfs_allocated_count) {
2453			max_rss_queues = 1;
2454			break;
2455		}
2456		/* fall through */
2457	case e1000_82576:
2458		if (!!adapter->vfs_allocated_count) {
2459			max_rss_queues = 2;
2460			break;
2461		}
2462		/* fall through */
2463	case e1000_82580:
2464	default:
2465		max_rss_queues = IGB_MAX_RX_QUEUES;
2466		break;
2467	}
2468
2469	adapter->rss_queues = min_t(u32, max_rss_queues, num_online_cpus());
2470
2471	/* Determine if we need to pair queues. */
2472	switch (hw->mac.type) {
2473	case e1000_82575:
2474	case e1000_i211:
2475		/* Device supports enough interrupts without queue pairing. */
2476		break;
2477	case e1000_82576:
2478		/*
2479		 * If VFs are going to be allocated with RSS queues then we
2480		 * should pair the queues in order to conserve interrupts due
2481		 * to limited supply.
2482		 */
2483		if ((adapter->rss_queues > 1) &&
2484		    (adapter->vfs_allocated_count > 6))
2485			adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
2486		/* fall through */
2487	case e1000_82580:
2488	case e1000_i350:
2489	case e1000_i210:
2490	default:
2491		/*
2492		 * If rss_queues > half of max_rss_queues, pair the queues in
2493		 * order to conserve interrupts due to limited supply.
2494		 */
2495		if (adapter->rss_queues > (max_rss_queues / 2))
2496			adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
2497		break;
2498	}
2499
2500	/* Setup and initialize a copy of the hw vlan table array */
2501	adapter->shadow_vfta = kzalloc(sizeof(u32) *
2502				E1000_VLAN_FILTER_TBL_SIZE,
2503				GFP_ATOMIC);
2504
2505	/* This call may decrease the number of queues */
2506	if (igb_init_interrupt_scheme(adapter)) {
2507		dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
2508		return -ENOMEM;
2509	}
2510
2511	igb_probe_vfs(adapter);
2512
2513	/* Explicitly disable IRQ since the NIC can be in any state. */
2514	igb_irq_disable(adapter);
2515
2516	if (hw->mac.type >= e1000_i350)
2517		adapter->flags &= ~IGB_FLAG_DMAC;
2518
2519	set_bit(__IGB_DOWN, &adapter->state);
2520	return 0;
2521}
2522
2523/**
2524 * igb_open - Called when a network interface is made active
2525 * @netdev: network interface device structure
2526 *
2527 * Returns 0 on success, negative value on failure
2528 *
2529 * The open entry point is called when a network interface is made
2530 * active by the system (IFF_UP).  At this point all resources needed
2531 * for transmit and receive operations are allocated, the interrupt
2532 * handler is registered with the OS, the watchdog timer is started,
2533 * and the stack is notified that the interface is ready.
2534 **/
2535static int __igb_open(struct net_device *netdev, bool resuming)
2536{
2537	struct igb_adapter *adapter = netdev_priv(netdev);
2538	struct e1000_hw *hw = &adapter->hw;
2539	struct pci_dev *pdev = adapter->pdev;
2540	int err;
2541	int i;
2542
2543	/* disallow open during test */
2544	if (test_bit(__IGB_TESTING, &adapter->state)) {
2545		WARN_ON(resuming);
2546		return -EBUSY;
2547	}
2548
2549	if (!resuming)
2550		pm_runtime_get_sync(&pdev->dev);
2551
2552	netif_carrier_off(netdev);
2553
2554	/* allocate transmit descriptors */
2555	err = igb_setup_all_tx_resources(adapter);
2556	if (err)
2557		goto err_setup_tx;
2558
2559	/* allocate receive descriptors */
2560	err = igb_setup_all_rx_resources(adapter);
2561	if (err)
2562		goto err_setup_rx;
2563
2564	igb_power_up_link(adapter);
2565
2566	/* before we allocate an interrupt, we must be ready to handle it.
2567	 * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2568	 * as soon as we call pci_request_irq, so we have to setup our
2569	 * clean_rx handler before we do so.  */
2570	igb_configure(adapter);
2571
2572	err = igb_request_irq(adapter);
2573	if (err)
2574		goto err_req_irq;
2575
2576	/* From here on the code is the same as igb_up() */
2577	clear_bit(__IGB_DOWN, &adapter->state);
2578
2579	for (i = 0; i < adapter->num_q_vectors; i++)
2580		napi_enable(&(adapter->q_vector[i]->napi));
2581
2582	/* Clear any pending interrupts. */
2583	rd32(E1000_ICR);
2584
2585	igb_irq_enable(adapter);
2586
2587	/* notify VFs that reset has been completed */
2588	if (adapter->vfs_allocated_count) {
2589		u32 reg_data = rd32(E1000_CTRL_EXT);
2590		reg_data |= E1000_CTRL_EXT_PFRSTD;
2591		wr32(E1000_CTRL_EXT, reg_data);
2592	}
2593
2594	netif_tx_start_all_queues(netdev);
2595
2596	if (!resuming)
2597		pm_runtime_put(&pdev->dev);
2598
2599	/* start the watchdog. */
2600	hw->mac.get_link_status = 1;
2601	schedule_work(&adapter->watchdog_task);
2602
2603	return 0;
2604
2605err_req_irq:
2606	igb_release_hw_control(adapter);
2607	igb_power_down_link(adapter);
2608	igb_free_all_rx_resources(adapter);
2609err_setup_rx:
2610	igb_free_all_tx_resources(adapter);
2611err_setup_tx:
2612	igb_reset(adapter);
2613	if (!resuming)
2614		pm_runtime_put(&pdev->dev);
2615
2616	return err;
2617}
2618
2619static int igb_open(struct net_device *netdev)
2620{
2621	return __igb_open(netdev, false);
2622}
2623
2624/**
2625 * igb_close - Disables a network interface
2626 * @netdev: network interface device structure
2627 *
2628 * Returns 0, this is not allowed to fail
2629 *
2630 * The close entry point is called when an interface is de-activated
2631 * by the OS.  The hardware is still under the driver's control, but
2632 * needs to be disabled.  A global MAC reset is issued to stop the
2633 * hardware, and all transmit and receive resources are freed.
2634 **/
2635static int __igb_close(struct net_device *netdev, bool suspending)
2636{
2637	struct igb_adapter *adapter = netdev_priv(netdev);
2638	struct pci_dev *pdev = adapter->pdev;
2639
2640	WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
2641
2642	if (!suspending)
2643		pm_runtime_get_sync(&pdev->dev);
2644
2645	igb_down(adapter);
2646	igb_free_irq(adapter);
2647
2648	igb_free_all_tx_resources(adapter);
2649	igb_free_all_rx_resources(adapter);
2650
2651	if (!suspending)
2652		pm_runtime_put_sync(&pdev->dev);
2653	return 0;
2654}
2655
2656static int igb_close(struct net_device *netdev)
2657{
2658	return __igb_close(netdev, false);
2659}
2660
2661/**
2662 * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2663 * @tx_ring: tx descriptor ring (for a specific queue) to setup
2664 *
2665 * Return 0 on success, negative on failure
2666 **/
2667int igb_setup_tx_resources(struct igb_ring *tx_ring)
2668{
2669	struct device *dev = tx_ring->dev;
2670	int orig_node = dev_to_node(dev);
2671	int size;
2672
2673	size = sizeof(struct igb_tx_buffer) * tx_ring->count;
2674	tx_ring->tx_buffer_info = vzalloc_node(size, tx_ring->numa_node);
2675	if (!tx_ring->tx_buffer_info)
2676		tx_ring->tx_buffer_info = vzalloc(size);
2677	if (!tx_ring->tx_buffer_info)
2678		goto err;
2679
2680	/* round up to nearest 4K */
2681	tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2682	tx_ring->size = ALIGN(tx_ring->size, 4096);
2683
2684	set_dev_node(dev, tx_ring->numa_node);
2685	tx_ring->desc = dma_alloc_coherent(dev,
2686					   tx_ring->size,
2687					   &tx_ring->dma,
2688					   GFP_KERNEL);
2689	set_dev_node(dev, orig_node);
2690	if (!tx_ring->desc)
2691		tx_ring->desc = dma_alloc_coherent(dev,
2692						   tx_ring->size,
2693						   &tx_ring->dma,
2694						   GFP_KERNEL);
2695
2696	if (!tx_ring->desc)
2697		goto err;
2698
2699	tx_ring->next_to_use = 0;
2700	tx_ring->next_to_clean = 0;
2701
2702	return 0;
2703
2704err:
2705	vfree(tx_ring->tx_buffer_info);
2706	dev_err(dev,
2707		"Unable to allocate memory for the transmit descriptor ring\n");
2708	return -ENOMEM;
2709}
2710
2711/**
2712 * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2713 *				  (Descriptors) for all queues
2714 * @adapter: board private structure
2715 *
2716 * Return 0 on success, negative on failure
2717 **/
2718static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2719{
2720	struct pci_dev *pdev = adapter->pdev;
2721	int i, err = 0;
2722
2723	for (i = 0; i < adapter->num_tx_queues; i++) {
2724		err = igb_setup_tx_resources(adapter->tx_ring[i]);
2725		if (err) {
2726			dev_err(&pdev->dev,
2727				"Allocation for Tx Queue %u failed\n", i);
2728			for (i--; i >= 0; i--)
2729				igb_free_tx_resources(adapter->tx_ring[i]);
2730			break;
2731		}
2732	}
2733
2734	return err;
2735}
2736
2737/**
2738 * igb_setup_tctl - configure the transmit control registers
2739 * @adapter: Board private structure
2740 **/
2741void igb_setup_tctl(struct igb_adapter *adapter)
2742{
2743	struct e1000_hw *hw = &adapter->hw;
2744	u32 tctl;
2745
2746	/* disable queue 0 which is enabled by default on 82575 and 82576 */
2747	wr32(E1000_TXDCTL(0), 0);
2748
2749	/* Program the Transmit Control Register */
2750	tctl = rd32(E1000_TCTL);
2751	tctl &= ~E1000_TCTL_CT;
2752	tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2753		(E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2754
2755	igb_config_collision_dist(hw);
2756
2757	/* Enable transmits */
2758	tctl |= E1000_TCTL_EN;
2759
2760	wr32(E1000_TCTL, tctl);
2761}
2762
2763/**
2764 * igb_configure_tx_ring - Configure transmit ring after Reset
2765 * @adapter: board private structure
2766 * @ring: tx ring to configure
2767 *
2768 * Configure a transmit ring after a reset.
2769 **/
2770void igb_configure_tx_ring(struct igb_adapter *adapter,
2771                           struct igb_ring *ring)
2772{
2773	struct e1000_hw *hw = &adapter->hw;
2774	u32 txdctl = 0;
2775	u64 tdba = ring->dma;
2776	int reg_idx = ring->reg_idx;
2777
2778	/* disable the queue */
2779	wr32(E1000_TXDCTL(reg_idx), 0);
2780	wrfl();
2781	mdelay(10);
2782
2783	wr32(E1000_TDLEN(reg_idx),
2784	                ring->count * sizeof(union e1000_adv_tx_desc));
2785	wr32(E1000_TDBAL(reg_idx),
2786	                tdba & 0x00000000ffffffffULL);
2787	wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2788
2789	ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2790	wr32(E1000_TDH(reg_idx), 0);
2791	writel(0, ring->tail);
2792
2793	txdctl |= IGB_TX_PTHRESH;
2794	txdctl |= IGB_TX_HTHRESH << 8;
2795	txdctl |= IGB_TX_WTHRESH << 16;
2796
2797	txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2798	wr32(E1000_TXDCTL(reg_idx), txdctl);
2799}
2800
2801/**
2802 * igb_configure_tx - Configure transmit Unit after Reset
2803 * @adapter: board private structure
2804 *
2805 * Configure the Tx unit of the MAC after a reset.
2806 **/
2807static void igb_configure_tx(struct igb_adapter *adapter)
2808{
2809	int i;
2810
2811	for (i = 0; i < adapter->num_tx_queues; i++)
2812		igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
2813}
2814
2815/**
2816 * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2817 * @rx_ring:    rx descriptor ring (for a specific queue) to setup
2818 *
2819 * Returns 0 on success, negative on failure
2820 **/
2821int igb_setup_rx_resources(struct igb_ring *rx_ring)
2822{
2823	struct device *dev = rx_ring->dev;
2824	int orig_node = dev_to_node(dev);
2825	int size, desc_len;
2826
2827	size = sizeof(struct igb_rx_buffer) * rx_ring->count;
2828	rx_ring->rx_buffer_info = vzalloc_node(size, rx_ring->numa_node);
2829	if (!rx_ring->rx_buffer_info)
2830		rx_ring->rx_buffer_info = vzalloc(size);
2831	if (!rx_ring->rx_buffer_info)
2832		goto err;
2833
2834	desc_len = sizeof(union e1000_adv_rx_desc);
2835
2836	/* Round up to nearest 4K */
2837	rx_ring->size = rx_ring->count * desc_len;
2838	rx_ring->size = ALIGN(rx_ring->size, 4096);
2839
2840	set_dev_node(dev, rx_ring->numa_node);
2841	rx_ring->desc = dma_alloc_coherent(dev,
2842					   rx_ring->size,
2843					   &rx_ring->dma,
2844					   GFP_KERNEL);
2845	set_dev_node(dev, orig_node);
2846	if (!rx_ring->desc)
2847		rx_ring->desc = dma_alloc_coherent(dev,
2848						   rx_ring->size,
2849						   &rx_ring->dma,
2850						   GFP_KERNEL);
2851
2852	if (!rx_ring->desc)
2853		goto err;
2854
2855	rx_ring->next_to_clean = 0;
2856	rx_ring->next_to_use = 0;
2857
2858	return 0;
2859
2860err:
2861	vfree(rx_ring->rx_buffer_info);
2862	rx_ring->rx_buffer_info = NULL;
2863	dev_err(dev, "Unable to allocate memory for the receive descriptor"
2864		" ring\n");
2865	return -ENOMEM;
2866}
2867
2868/**
2869 * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2870 *				  (Descriptors) for all queues
2871 * @adapter: board private structure
2872 *
2873 * Return 0 on success, negative on failure
2874 **/
2875static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2876{
2877	struct pci_dev *pdev = adapter->pdev;
2878	int i, err = 0;
2879
2880	for (i = 0; i < adapter->num_rx_queues; i++) {
2881		err = igb_setup_rx_resources(adapter->rx_ring[i]);
2882		if (err) {
2883			dev_err(&pdev->dev,
2884				"Allocation for Rx Queue %u failed\n", i);
2885			for (i--; i >= 0; i--)
2886				igb_free_rx_resources(adapter->rx_ring[i]);
2887			break;
2888		}
2889	}
2890
2891	return err;
2892}
2893
2894/**
2895 * igb_setup_mrqc - configure the multiple receive queue control registers
2896 * @adapter: Board private structure
2897 **/
2898static void igb_setup_mrqc(struct igb_adapter *adapter)
2899{
2900	struct e1000_hw *hw = &adapter->hw;
2901	u32 mrqc, rxcsum;
2902	u32 j, num_rx_queues, shift = 0, shift2 = 0;
2903	union e1000_reta {
2904		u32 dword;
2905		u8  bytes[4];
2906	} reta;
2907	static const u8 rsshash[40] = {
2908		0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2909		0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2910		0xae, 0x7b, 0x30, 0xb4,	0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2911		0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2912
2913	/* Fill out hash function seeds */
2914	for (j = 0; j < 10; j++) {
2915		u32 rsskey = rsshash[(j * 4)];
2916		rsskey |= rsshash[(j * 4) + 1] << 8;
2917		rsskey |= rsshash[(j * 4) + 2] << 16;
2918		rsskey |= rsshash[(j * 4) + 3] << 24;
2919		array_wr32(E1000_RSSRK(0), j, rsskey);
2920	}
2921
2922	num_rx_queues = adapter->rss_queues;
2923
2924	if (adapter->vfs_allocated_count) {
2925		/* 82575 and 82576 supports 2 RSS queues for VMDq */
2926		switch (hw->mac.type) {
2927		case e1000_i350:
2928		case e1000_82580:
2929			num_rx_queues = 1;
2930			shift = 0;
2931			break;
2932		case e1000_82576:
2933			shift = 3;
2934			num_rx_queues = 2;
2935			break;
2936		case e1000_82575:
2937			shift = 2;
2938			shift2 = 6;
2939		default:
2940			break;
2941		}
2942	} else {
2943		if (hw->mac.type == e1000_82575)
2944			shift = 6;
2945	}
2946
2947	for (j = 0; j < (32 * 4); j++) {
2948		reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2949		if (shift2)
2950			reta.bytes[j & 3] |= num_rx_queues << shift2;
2951		if ((j & 3) == 3)
2952			wr32(E1000_RETA(j >> 2), reta.dword);
2953	}
2954
2955	/*
2956	 * Disable raw packet checksumming so that RSS hash is placed in
2957	 * descriptor on writeback.  No need to enable TCP/UDP/IP checksum
2958	 * offloads as they are enabled by default
2959	 */
2960	rxcsum = rd32(E1000_RXCSUM);
2961	rxcsum |= E1000_RXCSUM_PCSD;
2962
2963	if (adapter->hw.mac.type >= e1000_82576)
2964		/* Enable Receive Checksum Offload for SCTP */
2965		rxcsum |= E1000_RXCSUM_CRCOFL;
2966
2967	/* Don't need to set TUOFL or IPOFL, they default to 1 */
2968	wr32(E1000_RXCSUM, rxcsum);
2969	/*
2970	 * Generate RSS hash based on TCP port numbers and/or
2971	 * IPv4/v6 src and dst addresses since UDP cannot be
2972	 * hashed reliably due to IP fragmentation
2973	 */
2974
2975	mrqc = E1000_MRQC_RSS_FIELD_IPV4 |
2976	       E1000_MRQC_RSS_FIELD_IPV4_TCP |
2977	       E1000_MRQC_RSS_FIELD_IPV6 |
2978	       E1000_MRQC_RSS_FIELD_IPV6_TCP |
2979	       E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
2980
2981	/* If VMDq is enabled then we set the appropriate mode for that, else
2982	 * we default to RSS so that an RSS hash is calculated per packet even
2983	 * if we are only using one queue */
2984	if (adapter->vfs_allocated_count) {
2985		if (hw->mac.type > e1000_82575) {
2986			/* Set the default pool for the PF's first queue */
2987			u32 vtctl = rd32(E1000_VT_CTL);
2988			vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2989				   E1000_VT_CTL_DISABLE_DEF_POOL);
2990			vtctl |= adapter->vfs_allocated_count <<
2991				E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2992			wr32(E1000_VT_CTL, vtctl);
2993		}
2994		if (adapter->rss_queues > 1)
2995			mrqc |= E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2996		else
2997			mrqc |= E1000_MRQC_ENABLE_VMDQ;
2998	} else {
2999		if (hw->mac.type != e1000_i211)
3000			mrqc |= E1000_MRQC_ENABLE_RSS_4Q;
3001	}
3002	igb_vmm_control(adapter);
3003
3004	wr32(E1000_MRQC, mrqc);
3005}
3006
3007/**
3008 * igb_setup_rctl - configure the receive control registers
3009 * @adapter: Board private structure
3010 **/
3011void igb_setup_rctl(struct igb_adapter *adapter)
3012{
3013	struct e1000_hw *hw = &adapter->hw;
3014	u32 rctl;
3015
3016	rctl = rd32(E1000_RCTL);
3017
3018	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
3019	rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
3020
3021	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
3022		(hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
3023
3024	/*
3025	 * enable stripping of CRC. It's unlikely this will break BMC
3026	 * redirection as it did with e1000. Newer features require
3027	 * that the HW strips the CRC.
3028	 */
3029	rctl |= E1000_RCTL_SECRC;
3030
3031	/* disable store bad packets and clear size bits. */
3032	rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
3033
3034	/* enable LPE to prevent packets larger than max_frame_size */
3035	rctl |= E1000_RCTL_LPE;
3036
3037	/* disable queue 0 to prevent tail write w/o re-config */
3038	wr32(E1000_RXDCTL(0), 0);
3039
3040	/* Attention!!!  For SR-IOV PF driver operations you must enable
3041	 * queue drop for all VF and PF queues to prevent head of line blocking
3042	 * if an un-trusted VF does not provide descriptors to hardware.
3043	 */
3044	if (adapter->vfs_allocated_count) {
3045		/* set all queue drop enable bits */
3046		wr32(E1000_QDE, ALL_QUEUES);
3047	}
3048
3049	/* This is useful for sniffing bad packets. */
3050	if (adapter->netdev->features & NETIF_F_RXALL) {
3051		/* UPE and MPE will be handled by normal PROMISC logic
3052		 * in e1000e_set_rx_mode */
3053		rctl |= (E1000_RCTL_SBP | /* Receive bad packets */
3054			 E1000_RCTL_BAM | /* RX All Bcast Pkts */
3055			 E1000_RCTL_PMCF); /* RX All MAC Ctrl Pkts */
3056
3057		rctl &= ~(E1000_RCTL_VFE | /* Disable VLAN filter */
3058			  E1000_RCTL_DPF | /* Allow filtered pause */
3059			  E1000_RCTL_CFIEN); /* Dis VLAN CFIEN Filter */
3060		/* Do not mess with E1000_CTRL_VME, it affects transmit as well,
3061		 * and that breaks VLANs.
3062		 */
3063	}
3064
3065	wr32(E1000_RCTL, rctl);
3066}
3067
3068static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
3069                                   int vfn)
3070{
3071	struct e1000_hw *hw = &adapter->hw;
3072	u32 vmolr;
3073
3074	/* if it isn't the PF check to see if VFs are enabled and
3075	 * increase the size to support vlan tags */
3076	if (vfn < adapter->vfs_allocated_count &&
3077	    adapter->vf_data[vfn].vlans_enabled)
3078		size += VLAN_TAG_SIZE;
3079
3080	vmolr = rd32(E1000_VMOLR(vfn));
3081	vmolr &= ~E1000_VMOLR_RLPML_MASK;
3082	vmolr |= size | E1000_VMOLR_LPE;
3083	wr32(E1000_VMOLR(vfn), vmolr);
3084
3085	return 0;
3086}
3087
3088/**
3089 * igb_rlpml_set - set maximum receive packet size
3090 * @adapter: board private structure
3091 *
3092 * Configure maximum receivable packet size.
3093 **/
3094static void igb_rlpml_set(struct igb_adapter *adapter)
3095{
3096	u32 max_frame_size = adapter->max_frame_size;
3097	struct e1000_hw *hw = &adapter->hw;
3098	u16 pf_id = adapter->vfs_allocated_count;
3099
3100	if (pf_id) {
3101		igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
3102		/*
3103		 * If we're in VMDQ or SR-IOV mode, then set global RLPML
3104		 * to our max jumbo frame size, in case we need to enable
3105		 * jumbo frames on one of the rings later.
3106		 * This will not pass over-length frames into the default
3107		 * queue because it's gated by the VMOLR.RLPML.
3108		 */
3109		max_frame_size = MAX_JUMBO_FRAME_SIZE;
3110	}
3111
3112	wr32(E1000_RLPML, max_frame_size);
3113}
3114
3115static inline void igb_set_vmolr(struct igb_adapter *adapter,
3116				 int vfn, bool aupe)
3117{
3118	struct e1000_hw *hw = &adapter->hw;
3119	u32 vmolr;
3120
3121	/*
3122	 * This register exists only on 82576 and newer so if we are older then
3123	 * we should exit and do nothing
3124	 */
3125	if (hw->mac.type < e1000_82576)
3126		return;
3127
3128	vmolr = rd32(E1000_VMOLR(vfn));
3129	vmolr |= E1000_VMOLR_STRVLAN;      /* Strip vlan tags */
3130	if (aupe)
3131		vmolr |= E1000_VMOLR_AUPE;        /* Accept untagged packets */
3132	else
3133		vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
3134
3135	/* clear all bits that might not be set */
3136	vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
3137
3138	if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
3139		vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
3140	/*
3141	 * for VMDq only allow the VFs and pool 0 to accept broadcast and
3142	 * multicast packets
3143	 */
3144	if (vfn <= adapter->vfs_allocated_count)
3145		vmolr |= E1000_VMOLR_BAM;	   /* Accept broadcast */
3146
3147	wr32(E1000_VMOLR(vfn), vmolr);
3148}
3149
3150/**
3151 * igb_configure_rx_ring - Configure a receive ring after Reset
3152 * @adapter: board private structure
3153 * @ring: receive ring to be configured
3154 *
3155 * Configure the Rx unit of the MAC after a reset.
3156 **/
3157void igb_configure_rx_ring(struct igb_adapter *adapter,
3158                           struct igb_ring *ring)
3159{
3160	struct e1000_hw *hw = &adapter->hw;
3161	u64 rdba = ring->dma;
3162	int reg_idx = ring->reg_idx;
3163	u32 srrctl = 0, rxdctl = 0;
3164
3165	/* disable the queue */
3166	wr32(E1000_RXDCTL(reg_idx), 0);
3167
3168	/* Set DMA base address registers */
3169	wr32(E1000_RDBAL(reg_idx),
3170	     rdba & 0x00000000ffffffffULL);
3171	wr32(E1000_RDBAH(reg_idx), rdba >> 32);
3172	wr32(E1000_RDLEN(reg_idx),
3173	               ring->count * sizeof(union e1000_adv_rx_desc));
3174
3175	/* initialize head and tail */
3176	ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
3177	wr32(E1000_RDH(reg_idx), 0);
3178	writel(0, ring->tail);
3179
3180	/* set descriptor configuration */
3181	srrctl = IGB_RX_HDR_LEN << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
3182#if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
3183	srrctl |= IGB_RXBUFFER_16384 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3184#else
3185	srrctl |= (PAGE_SIZE / 2) >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3186#endif
3187	srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3188	if (hw->mac.type >= e1000_82580)
3189		srrctl |= E1000_SRRCTL_TIMESTAMP;
3190	/* Only set Drop Enable if we are supporting multiple queues */
3191	if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
3192		srrctl |= E1000_SRRCTL_DROP_EN;
3193
3194	wr32(E1000_SRRCTL(reg_idx), srrctl);
3195
3196	/* set filtering for VMDQ pools */
3197	igb_set_vmolr(adapter, reg_idx & 0x7, true);
3198
3199	rxdctl |= IGB_RX_PTHRESH;
3200	rxdctl |= IGB_RX_HTHRESH << 8;
3201	rxdctl |= IGB_RX_WTHRESH << 16;
3202
3203	/* enable receive descriptor fetching */
3204	rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
3205	wr32(E1000_RXDCTL(reg_idx), rxdctl);
3206}
3207
3208/**
3209 * igb_configure_rx - Configure receive Unit after Reset
3210 * @adapter: board private structure
3211 *
3212 * Configure the Rx unit of the MAC after a reset.
3213 **/
3214static void igb_configure_rx(struct igb_adapter *adapter)
3215{
3216	int i;
3217
3218	/* set UTA to appropriate mode */
3219	igb_set_uta(adapter);
3220
3221	/* set the correct pool for the PF default MAC address in entry 0 */
3222	igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
3223	                 adapter->vfs_allocated_count);
3224
3225	/* Setup the HW Rx Head and Tail Descriptor Pointers and
3226	 * the Base and Length of the Rx Descriptor Ring */
3227	for (i = 0; i < adapter->num_rx_queues; i++)
3228		igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
3229}
3230
3231/**
3232 * igb_free_tx_resources - Free Tx Resources per Queue
3233 * @tx_ring: Tx descriptor ring for a specific queue
3234 *
3235 * Free all transmit software resources
3236 **/
3237void igb_free_tx_resources(struct igb_ring *tx_ring)
3238{
3239	igb_clean_tx_ring(tx_ring);
3240
3241	vfree(tx_ring->tx_buffer_info);
3242	tx_ring->tx_buffer_info = NULL;
3243
3244	/* if not set, then don't free */
3245	if (!tx_ring->desc)
3246		return;
3247
3248	dma_free_coherent(tx_ring->dev, tx_ring->size,
3249			  tx_ring->desc, tx_ring->dma);
3250
3251	tx_ring->desc = NULL;
3252}
3253
3254/**
3255 * igb_free_all_tx_resources - Free Tx Resources for All Queues
3256 * @adapter: board private structure
3257 *
3258 * Free all transmit software resources
3259 **/
3260static void igb_free_all_tx_resources(struct igb_adapter *adapter)
3261{
3262	int i;
3263
3264	for (i = 0; i < adapter->num_tx_queues; i++)
3265		igb_free_tx_resources(adapter->tx_ring[i]);
3266}
3267
3268void igb_unmap_and_free_tx_resource(struct igb_ring *ring,
3269				    struct igb_tx_buffer *tx_buffer)
3270{
3271	if (tx_buffer->skb) {
3272		dev_kfree_skb_any(tx_buffer->skb);
3273		if (tx_buffer->dma)
3274			dma_unmap_single(ring->dev,
3275					 tx_buffer->dma,
3276					 tx_buffer->length,
3277					 DMA_TO_DEVICE);
3278	} else if (tx_buffer->dma) {
3279		dma_unmap_page(ring->dev,
3280			       tx_buffer->dma,
3281			       tx_buffer->length,
3282			       DMA_TO_DEVICE);
3283	}
3284	tx_buffer->next_to_watch = NULL;
3285	tx_buffer->skb = NULL;
3286	tx_buffer->dma = 0;
3287	/* buffer_info must be completely set up in the transmit path */
3288}
3289
3290/**
3291 * igb_clean_tx_ring - Free Tx Buffers
3292 * @tx_ring: ring to be cleaned
3293 **/
3294static void igb_clean_tx_ring(struct igb_ring *tx_ring)
3295{
3296	struct igb_tx_buffer *buffer_info;
3297	unsigned long size;
3298	u16 i;
3299
3300	if (!tx_ring->tx_buffer_info)
3301		return;
3302	/* Free all the Tx ring sk_buffs */
3303
3304	for (i = 0; i < tx_ring->count; i++) {
3305		buffer_info = &tx_ring->tx_buffer_info[i];
3306		igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3307	}
3308
3309	netdev_tx_reset_queue(txring_txq(tx_ring));
3310
3311	size = sizeof(struct igb_tx_buffer) * tx_ring->count;
3312	memset(tx_ring->tx_buffer_info, 0, size);
3313
3314	/* Zero out the descriptor ring */
3315	memset(tx_ring->desc, 0, tx_ring->size);
3316
3317	tx_ring->next_to_use = 0;
3318	tx_ring->next_to_clean = 0;
3319}
3320
3321/**
3322 * igb_clean_all_tx_rings - Free Tx Buffers for all queues
3323 * @adapter: board private structure
3324 **/
3325static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
3326{
3327	int i;
3328
3329	for (i = 0; i < adapter->num_tx_queues; i++)
3330		igb_clean_tx_ring(adapter->tx_ring[i]);
3331}
3332
3333/**
3334 * igb_free_rx_resources - Free Rx Resources
3335 * @rx_ring: ring to clean the resources from
3336 *
3337 * Free all receive software resources
3338 **/
3339void igb_free_rx_resources(struct igb_ring *rx_ring)
3340{
3341	igb_clean_rx_ring(rx_ring);
3342
3343	vfree(rx_ring->rx_buffer_info);
3344	rx_ring->rx_buffer_info = NULL;
3345
3346	/* if not set, then don't free */
3347	if (!rx_ring->desc)
3348		return;
3349
3350	dma_free_coherent(rx_ring->dev, rx_ring->size,
3351			  rx_ring->desc, rx_ring->dma);
3352
3353	rx_ring->desc = NULL;
3354}
3355
3356/**
3357 * igb_free_all_rx_resources - Free Rx Resources for All Queues
3358 * @adapter: board private structure
3359 *
3360 * Free all receive software resources
3361 **/
3362static void igb_free_all_rx_resources(struct igb_adapter *adapter)
3363{
3364	int i;
3365
3366	for (i = 0; i < adapter->num_rx_queues; i++)
3367		igb_free_rx_resources(adapter->rx_ring[i]);
3368}
3369
3370/**
3371 * igb_clean_rx_ring - Free Rx Buffers per Queue
3372 * @rx_ring: ring to free buffers from
3373 **/
3374static void igb_clean_rx_ring(struct igb_ring *rx_ring)
3375{
3376	unsigned long size;
3377	u16 i;
3378
3379	if (!rx_ring->rx_buffer_info)
3380		return;
3381
3382	/* Free all the Rx ring sk_buffs */
3383	for (i = 0; i < rx_ring->count; i++) {
3384		struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
3385		if (buffer_info->dma) {
3386			dma_unmap_single(rx_ring->dev,
3387			                 buffer_info->dma,
3388					 IGB_RX_HDR_LEN,
3389					 DMA_FROM_DEVICE);
3390			buffer_info->dma = 0;
3391		}
3392
3393		if (buffer_info->skb) {
3394			dev_kfree_skb(buffer_info->skb);
3395			buffer_info->skb = NULL;
3396		}
3397		if (buffer_info->page_dma) {
3398			dma_unmap_page(rx_ring->dev,
3399			               buffer_info->page_dma,
3400				       PAGE_SIZE / 2,
3401				       DMA_FROM_DEVICE);
3402			buffer_info->page_dma = 0;
3403		}
3404		if (buffer_info->page) {
3405			put_page(buffer_info->page);
3406			buffer_info->page = NULL;
3407			buffer_info->page_offset = 0;
3408		}
3409	}
3410
3411	size = sizeof(struct igb_rx_buffer) * rx_ring->count;
3412	memset(rx_ring->rx_buffer_info, 0, size);
3413
3414	/* Zero out the descriptor ring */
3415	memset(rx_ring->desc, 0, rx_ring->size);
3416
3417	rx_ring->next_to_clean = 0;
3418	rx_ring->next_to_use = 0;
3419}
3420
3421/**
3422 * igb_clean_all_rx_rings - Free Rx Buffers for all queues
3423 * @adapter: board private structure
3424 **/
3425static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
3426{
3427	int i;
3428
3429	for (i = 0; i < adapter->num_rx_queues; i++)
3430		igb_clean_rx_ring(adapter->rx_ring[i]);
3431}
3432
3433/**
3434 * igb_set_mac - Change the Ethernet Address of the NIC
3435 * @netdev: network interface device structure
3436 * @p: pointer to an address structure
3437 *
3438 * Returns 0 on success, negative on failure
3439 **/
3440static int igb_set_mac(struct net_device *netdev, void *p)
3441{
3442	struct igb_adapter *adapter = netdev_priv(netdev);
3443	struct e1000_hw *hw = &adapter->hw;
3444	struct sockaddr *addr = p;
3445
3446	if (!is_valid_ether_addr(addr->sa_data))
3447		return -EADDRNOTAVAIL;
3448
3449	memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
3450	memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
3451
3452	/* set the correct pool for the new PF MAC address in entry 0 */
3453	igb_rar_set_qsel(adapter, hw->mac.addr, 0,
3454	                 adapter->vfs_allocated_count);
3455
3456	return 0;
3457}
3458
3459/**
3460 * igb_write_mc_addr_list - write multicast addresses to MTA
3461 * @netdev: network interface device structure
3462 *
3463 * Writes multicast address list to the MTA hash table.
3464 * Returns: -ENOMEM on failure
3465 *                0 on no addresses written
3466 *                X on writing X addresses to MTA
3467 **/
3468static int igb_write_mc_addr_list(struct net_device *netdev)
3469{
3470	struct igb_adapter *adapter = netdev_priv(netdev);
3471	struct e1000_hw *hw = &adapter->hw;
3472	struct netdev_hw_addr *ha;
3473	u8  *mta_list;
3474	int i;
3475
3476	if (netdev_mc_empty(netdev)) {
3477		/* nothing to program, so clear mc list */
3478		igb_update_mc_addr_list(hw, NULL, 0);
3479		igb_restore_vf_multicasts(adapter);
3480		return 0;
3481	}
3482
3483	mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
3484	if (!mta_list)
3485		return -ENOMEM;
3486
3487	/* The shared function expects a packed array of only addresses. */
3488	i = 0;
3489	netdev_for_each_mc_addr(ha, netdev)
3490		memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
3491
3492	igb_update_mc_addr_list(hw, mta_list, i);
3493	kfree(mta_list);
3494
3495	return netdev_mc_count(netdev);
3496}
3497
3498/**
3499 * igb_write_uc_addr_list - write unicast addresses to RAR table
3500 * @netdev: network interface device structure
3501 *
3502 * Writes unicast address list to the RAR table.
3503 * Returns: -ENOMEM on failure/insufficient address space
3504 *                0 on no addresses written
3505 *                X on writing X addresses to the RAR table
3506 **/
3507static int igb_write_uc_addr_list(struct net_device *netdev)
3508{
3509	struct igb_adapter *adapter = netdev_priv(netdev);
3510	struct e1000_hw *hw = &adapter->hw;
3511	unsigned int vfn = adapter->vfs_allocated_count;
3512	unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
3513	int count = 0;
3514
3515	/* return ENOMEM indicating insufficient memory for addresses */
3516	if (netdev_uc_count(netdev) > rar_entries)
3517		return -ENOMEM;
3518
3519	if (!netdev_uc_empty(netdev) && rar_entries) {
3520		struct netdev_hw_addr *ha;
3521
3522		netdev_for_each_uc_addr(ha, netdev) {
3523			if (!rar_entries)
3524				break;
3525			igb_rar_set_qsel(adapter, ha->addr,
3526			                 rar_entries--,
3527			                 vfn);
3528			count++;
3529		}
3530	}
3531	/* write the addresses in reverse order to avoid write combining */
3532	for (; rar_entries > 0 ; rar_entries--) {
3533		wr32(E1000_RAH(rar_entries), 0);
3534		wr32(E1000_RAL(rar_entries), 0);
3535	}
3536	wrfl();
3537
3538	return count;
3539}
3540
3541/**
3542 * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
3543 * @netdev: network interface device structure
3544 *
3545 * The set_rx_mode entry point is called whenever the unicast or multicast
3546 * address lists or the network interface flags are updated.  This routine is
3547 * responsible for configuring the hardware for proper unicast, multicast,
3548 * promiscuous mode, and all-multi behavior.
3549 **/
3550static void igb_set_rx_mode(struct net_device *netdev)
3551{
3552	struct igb_adapter *adapter = netdev_priv(netdev);
3553	struct e1000_hw *hw = &adapter->hw;
3554	unsigned int vfn = adapter->vfs_allocated_count;
3555	u32 rctl, vmolr = 0;
3556	int count;
3557
3558	/* Check for Promiscuous and All Multicast modes */
3559	rctl = rd32(E1000_RCTL);
3560
3561	/* clear the effected bits */
3562	rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
3563
3564	if (netdev->flags & IFF_PROMISC) {
3565		rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
3566		vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
3567	} else {
3568		if (netdev->flags & IFF_ALLMULTI) {
3569			rctl |= E1000_RCTL_MPE;
3570			vmolr |= E1000_VMOLR_MPME;
3571		} else {
3572			/*
3573			 * Write addresses to the MTA, if the attempt fails
3574			 * then we should just turn on promiscuous mode so
3575			 * that we can at least receive multicast traffic
3576			 */
3577			count = igb_write_mc_addr_list(netdev);
3578			if (count < 0) {
3579				rctl |= E1000_RCTL_MPE;
3580				vmolr |= E1000_VMOLR_MPME;
3581			} else if (count) {
3582				vmolr |= E1000_VMOLR_ROMPE;
3583			}
3584		}
3585		/*
3586		 * Write addresses to available RAR registers, if there is not
3587		 * sufficient space to store all the addresses then enable
3588		 * unicast promiscuous mode
3589		 */
3590		count = igb_write_uc_addr_list(netdev);
3591		if (count < 0) {
3592			rctl |= E1000_RCTL_UPE;
3593			vmolr |= E1000_VMOLR_ROPE;
3594		}
3595		rctl |= E1000_RCTL_VFE;
3596	}
3597	wr32(E1000_RCTL, rctl);
3598
3599	/*
3600	 * In order to support SR-IOV and eventually VMDq it is necessary to set
3601	 * the VMOLR to enable the appropriate modes.  Without this workaround
3602	 * we will have issues with VLAN tag stripping not being done for frames
3603	 * that are only arriving because we are the default pool
3604	 */
3605	if ((hw->mac.type < e1000_82576) || (hw->mac.type > e1000_i350))
3606		return;
3607
3608	vmolr |= rd32(E1000_VMOLR(vfn)) &
3609	         ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
3610	wr32(E1000_VMOLR(vfn), vmolr);
3611	igb_restore_vf_multicasts(adapter);
3612}
3613
3614static void igb_check_wvbr(struct igb_adapter *adapter)
3615{
3616	struct e1000_hw *hw = &adapter->hw;
3617	u32 wvbr = 0;
3618
3619	switch (hw->mac.type) {
3620	case e1000_82576:
3621	case e1000_i350:
3622		if (!(wvbr = rd32(E1000_WVBR)))
3623			return;
3624		break;
3625	default:
3626		break;
3627	}
3628
3629	adapter->wvbr |= wvbr;
3630}
3631
3632#define IGB_STAGGERED_QUEUE_OFFSET 8
3633
3634static void igb_spoof_check(struct igb_adapter *adapter)
3635{
3636	int j;
3637
3638	if (!adapter->wvbr)
3639		return;
3640
3641	for(j = 0; j < adapter->vfs_allocated_count; j++) {
3642		if (adapter->wvbr & (1 << j) ||
3643		    adapter->wvbr & (1 << (j + IGB_STAGGERED_QUEUE_OFFSET))) {
3644			dev_warn(&adapter->pdev->dev,
3645				"Spoof event(s) detected on VF %d\n", j);
3646			adapter->wvbr &=
3647				~((1 << j) |
3648				  (1 << (j + IGB_STAGGERED_QUEUE_OFFSET)));
3649		}
3650	}
3651}
3652
3653/* Need to wait a few seconds after link up to get diagnostic information from
3654 * the phy */
3655static void igb_update_phy_info(unsigned long data)
3656{
3657	struct igb_adapter *adapter = (struct igb_adapter *) data;
3658	igb_get_phy_info(&adapter->hw);
3659}
3660
3661/**
3662 * igb_has_link - check shared code for link and determine up/down
3663 * @adapter: pointer to driver private info
3664 **/
3665bool igb_has_link(struct igb_adapter *adapter)
3666{
3667	struct e1000_hw *hw = &adapter->hw;
3668	bool link_active = false;
3669	s32 ret_val = 0;
3670
3671	/* get_link_status is set on LSC (link status) interrupt or
3672	 * rx sequence error interrupt.  get_link_status will stay
3673	 * false until the e1000_check_for_link establishes link
3674	 * for copper adapters ONLY
3675	 */
3676	switch (hw->phy.media_type) {
3677	case e1000_media_type_copper:
3678		if (hw->mac.get_link_status) {
3679			ret_val = hw->mac.ops.check_for_link(hw);
3680			link_active = !hw->mac.get_link_status;
3681		} else {
3682			link_active = true;
3683		}
3684		break;
3685	case e1000_media_type_internal_serdes:
3686		ret_val = hw->mac.ops.check_for_link(hw);
3687		link_active = hw->mac.serdes_has_link;
3688		break;
3689	default:
3690	case e1000_media_type_unknown:
3691		break;
3692	}
3693
3694	return link_active;
3695}
3696
3697static bool igb_thermal_sensor_event(struct e1000_hw *hw, u32 event)
3698{
3699	bool ret = false;
3700	u32 ctrl_ext, thstat;
3701
3702	/* check for thermal sensor event on i350 copper only */
3703	if (hw->mac.type == e1000_i350) {
3704		thstat = rd32(E1000_THSTAT);
3705		ctrl_ext = rd32(E1000_CTRL_EXT);
3706
3707		if ((hw->phy.media_type == e1000_media_type_copper) &&
3708		    !(ctrl_ext & E1000_CTRL_EXT_LINK_MODE_SGMII)) {
3709			ret = !!(thstat & event);
3710		}
3711	}
3712
3713	return ret;
3714}
3715
3716/**
3717 * igb_watchdog - Timer Call-back
3718 * @data: pointer to adapter cast into an unsigned long
3719 **/
3720static void igb_watchdog(unsigned long data)
3721{
3722	struct igb_adapter *adapter = (struct igb_adapter *)data;
3723	/* Do the rest outside of interrupt context */
3724	schedule_work(&adapter->watchdog_task);
3725}
3726
3727static void igb_watchdog_task(struct work_struct *work)
3728{
3729	struct igb_adapter *adapter = container_of(work,
3730	                                           struct igb_adapter,
3731                                                   watchdog_task);
3732	struct e1000_hw *hw = &adapter->hw;
3733	struct net_device *netdev = adapter->netdev;
3734	u32 link;
3735	int i;
3736
3737	link = igb_has_link(adapter);
3738	if (link) {
3739		/* Cancel scheduled suspend requests. */
3740		pm_runtime_resume(netdev->dev.parent);
3741
3742		if (!netif_carrier_ok(netdev)) {
3743			u32 ctrl;
3744			hw->mac.ops.get_speed_and_duplex(hw,
3745			                                 &adapter->link_speed,
3746			                                 &adapter->link_duplex);
3747
3748			ctrl = rd32(E1000_CTRL);
3749			/* Links status message must follow this format */
3750			printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s "
3751			       "Duplex, Flow Control: %s\n",
3752			       netdev->name,
3753			       adapter->link_speed,
3754			       adapter->link_duplex == FULL_DUPLEX ?
3755			       "Full" : "Half",
3756			       (ctrl & E1000_CTRL_TFCE) &&
3757			       (ctrl & E1000_CTRL_RFCE) ? "RX/TX" :
3758			       (ctrl & E1000_CTRL_RFCE) ?  "RX" :
3759			       (ctrl & E1000_CTRL_TFCE) ?  "TX" : "None");
3760
3761			/* check for thermal sensor event */
3762			if (igb_thermal_sensor_event(hw,
3763			    E1000_THSTAT_LINK_THROTTLE)) {
3764				netdev_info(netdev, "The network adapter link "
3765					    "speed was downshifted because it "
3766					    "overheated\n");
3767			}
3768
3769			/* adjust timeout factor according to speed/duplex */
3770			adapter->tx_timeout_factor = 1;
3771			switch (adapter->link_speed) {
3772			case SPEED_10:
3773				adapter->tx_timeout_factor = 14;
3774				break;
3775			case SPEED_100:
3776				/* maybe add some timeout factor ? */
3777				break;
3778			}
3779
3780			netif_carrier_on(netdev);
3781
3782			igb_ping_all_vfs(adapter);
3783			igb_check_vf_rate_limit(adapter);
3784
3785			/* link state has changed, schedule phy info update */
3786			if (!test_bit(__IGB_DOWN, &adapter->state))
3787				mod_timer(&adapter->phy_info_timer,
3788					  round_jiffies(jiffies + 2 * HZ));
3789		}
3790	} else {
3791		if (netif_carrier_ok(netdev)) {
3792			adapter->link_speed = 0;
3793			adapter->link_duplex = 0;
3794
3795			/* check for thermal sensor event */
3796			if (igb_thermal_sensor_event(hw,
3797			    E1000_THSTAT_PWR_DOWN)) {
3798				netdev_err(netdev, "The network adapter was "
3799					   "stopped because it overheated\n");
3800			}
3801
3802			/* Links status message must follow this format */
3803			printk(KERN_INFO "igb: %s NIC Link is Down\n",
3804			       netdev->name);
3805			netif_carrier_off(netdev);
3806
3807			igb_ping_all_vfs(adapter);
3808
3809			/* link state has changed, schedule phy info update */
3810			if (!test_bit(__IGB_DOWN, &adapter->state))
3811				mod_timer(&adapter->phy_info_timer,
3812					  round_jiffies(jiffies + 2 * HZ));
3813
3814			pm_schedule_suspend(netdev->dev.parent,
3815					    MSEC_PER_SEC * 5);
3816		}
3817	}
3818
3819	spin_lock(&adapter->stats64_lock);
3820	igb_update_stats(adapter, &adapter->stats64);
3821	spin_unlock(&adapter->stats64_lock);
3822
3823	for (i = 0; i < adapter->num_tx_queues; i++) {
3824		struct igb_ring *tx_ring = adapter->tx_ring[i];
3825		if (!netif_carrier_ok(netdev)) {
3826			/* We've lost link, so the controller stops DMA,
3827			 * but we've got queued Tx work that's never going
3828			 * to get done, so reset controller to flush Tx.
3829			 * (Do the reset outside of interrupt context). */
3830			if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3831				adapter->tx_timeout_count++;
3832				schedule_work(&adapter->reset_task);
3833				/* return immediately since reset is imminent */
3834				return;
3835			}
3836		}
3837
3838		/* Force detection of hung controller every watchdog period */
3839		set_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
3840	}
3841
3842	/* Cause software interrupt to ensure rx ring is cleaned */
3843	if (adapter->msix_entries) {
3844		u32 eics = 0;
3845		for (i = 0; i < adapter->num_q_vectors; i++)
3846			eics |= adapter->q_vector[i]->eims_value;
3847		wr32(E1000_EICS, eics);
3848	} else {
3849		wr32(E1000_ICS, E1000_ICS_RXDMT0);
3850	}
3851
3852	igb_spoof_check(adapter);
3853
3854	/* Reset the timer */
3855	if (!test_bit(__IGB_DOWN, &adapter->state))
3856		mod_timer(&adapter->watchdog_timer,
3857			  round_jiffies(jiffies + 2 * HZ));
3858}
3859
3860enum latency_range {
3861	lowest_latency = 0,
3862	low_latency = 1,
3863	bulk_latency = 2,
3864	latency_invalid = 255
3865};
3866
3867/**
3868 * igb_update_ring_itr - update the dynamic ITR value based on packet size
3869 *
3870 *      Stores a new ITR value based on strictly on packet size.  This
3871 *      algorithm is less sophisticated than that used in igb_update_itr,
3872 *      due to the difficulty of synchronizing statistics across multiple
3873 *      receive rings.  The divisors and thresholds used by this function
3874 *      were determined based on theoretical maximum wire speed and testing
3875 *      data, in order to minimize response time while increasing bulk
3876 *      throughput.
3877 *      This functionality is controlled by the InterruptThrottleRate module
3878 *      parameter (see igb_param.c)
3879 *      NOTE:  This function is called only when operating in a multiqueue
3880 *             receive environment.
3881 * @q_vector: pointer to q_vector
3882 **/
3883static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3884{
3885	int new_val = q_vector->itr_val;
3886	int avg_wire_size = 0;
3887	struct igb_adapter *adapter = q_vector->adapter;
3888	unsigned int packets;
3889
3890	/* For non-gigabit speeds, just fix the interrupt rate at 4000
3891	 * ints/sec - ITR timer value of 120 ticks.
3892	 */
3893	if (adapter->link_speed != SPEED_1000) {
3894		new_val = IGB_4K_ITR;
3895		goto set_itr_val;
3896	}
3897
3898	packets = q_vector->rx.total_packets;
3899	if (packets)
3900		avg_wire_size = q_vector->rx.total_bytes / packets;
3901
3902	packets = q_vector->tx.total_packets;
3903	if (packets)
3904		avg_wire_size = max_t(u32, avg_wire_size,
3905				      q_vector->tx.total_bytes / packets);
3906
3907	/* if avg_wire_size isn't set no work was done */
3908	if (!avg_wire_size)
3909		goto clear_counts;
3910
3911	/* Add 24 bytes to size to account for CRC, preamble, and gap */
3912	avg_wire_size += 24;
3913
3914	/* Don't starve jumbo frames */
3915	avg_wire_size = min(avg_wire_size, 3000);
3916
3917	/* Give a little boost to mid-size frames */
3918	if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3919		new_val = avg_wire_size / 3;
3920	else
3921		new_val = avg_wire_size / 2;
3922
3923	/* conservative mode (itr 3) eliminates the lowest_latency setting */
3924	if (new_val < IGB_20K_ITR &&
3925	    ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
3926	     (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
3927		new_val = IGB_20K_ITR;
3928
3929set_itr_val:
3930	if (new_val != q_vector->itr_val) {
3931		q_vector->itr_val = new_val;
3932		q_vector->set_itr = 1;
3933	}
3934clear_counts:
3935	q_vector->rx.total_bytes = 0;
3936	q_vector->rx.total_packets = 0;
3937	q_vector->tx.total_bytes = 0;
3938	q_vector->tx.total_packets = 0;
3939}
3940
3941/**
3942 * igb_update_itr - update the dynamic ITR value based on statistics
3943 *      Stores a new ITR value based on packets and byte
3944 *      counts during the last interrupt.  The advantage of per interrupt
3945 *      computation is faster updates and more accurate ITR for the current
3946 *      traffic pattern.  Constants in this function were computed
3947 *      based on theoretical maximum wire speed and thresholds were set based
3948 *      on testing data as well as attempting to minimize response time
3949 *      while increasing bulk throughput.
3950 *      this functionality is controlled by the InterruptThrottleRate module
3951 *      parameter (see igb_param.c)
3952 *      NOTE:  These calculations are only valid when operating in a single-
3953 *             queue environment.
3954 * @q_vector: pointer to q_vector
3955 * @ring_container: ring info to update the itr for
3956 **/
3957static void igb_update_itr(struct igb_q_vector *q_vector,
3958			   struct igb_ring_container *ring_container)
3959{
3960	unsigned int packets = ring_container->total_packets;
3961	unsigned int bytes = ring_container->total_bytes;
3962	u8 itrval = ring_container->itr;
3963
3964	/* no packets, exit with status unchanged */
3965	if (packets == 0)
3966		return;
3967
3968	switch (itrval) {
3969	case lowest_latency:
3970		/* handle TSO and jumbo frames */
3971		if (bytes/packets > 8000)
3972			itrval = bulk_latency;
3973		else if ((packets < 5) && (bytes > 512))
3974			itrval = low_latency;
3975		break;
3976	case low_latency:  /* 50 usec aka 20000 ints/s */
3977		if (bytes > 10000) {
3978			/* this if handles the TSO accounting */
3979			if (bytes/packets > 8000) {
3980				itrval = bulk_latency;
3981			} else if ((packets < 10) || ((bytes/packets) > 1200)) {
3982				itrval = bulk_latency;
3983			} else if ((packets > 35)) {
3984				itrval = lowest_latency;
3985			}
3986		} else if (bytes/packets > 2000) {
3987			itrval = bulk_latency;
3988		} else if (packets <= 2 && bytes < 512) {
3989			itrval = lowest_latency;
3990		}
3991		break;
3992	case bulk_latency: /* 250 usec aka 4000 ints/s */
3993		if (bytes > 25000) {
3994			if (packets > 35)
3995				itrval = low_latency;
3996		} else if (bytes < 1500) {
3997			itrval = low_latency;
3998		}
3999		break;
4000	}
4001
4002	/* clear work counters since we have the values we need */
4003	ring_container->total_bytes = 0;
4004	ring_container->total_packets = 0;
4005
4006	/* write updated itr to ring container */
4007	ring_container->itr = itrval;
4008}
4009
4010static void igb_set_itr(struct igb_q_vector *q_vector)
4011{
4012	struct igb_adapter *adapter = q_vector->adapter;
4013	u32 new_itr = q_vector->itr_val;
4014	u8 current_itr = 0;
4015
4016	/* for non-gigabit speeds, just fix the interrupt rate at 4000 */
4017	if (adapter->link_speed != SPEED_1000) {
4018		current_itr = 0;
4019		new_itr = IGB_4K_ITR;
4020		goto set_itr_now;
4021	}
4022
4023	igb_update_itr(q_vector, &q_vector->tx);
4024	igb_update_itr(q_vector, &q_vector->rx);
4025
4026	current_itr = max(q_vector->rx.itr, q_vector->tx.itr);
4027
4028	/* conservative mode (itr 3) eliminates the lowest_latency setting */
4029	if (current_itr == lowest_latency &&
4030	    ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
4031	     (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
4032		current_itr = low_latency;
4033
4034	switch (current_itr) {
4035	/* counts and packets in update_itr are dependent on these numbers */
4036	case lowest_latency:
4037		new_itr = IGB_70K_ITR; /* 70,000 ints/sec */
4038		break;
4039	case low_latency:
4040		new_itr = IGB_20K_ITR; /* 20,000 ints/sec */
4041		break;
4042	case bulk_latency:
4043		new_itr = IGB_4K_ITR;  /* 4,000 ints/sec */
4044		break;
4045	default:
4046		break;
4047	}
4048
4049set_itr_now:
4050	if (new_itr != q_vector->itr_val) {
4051		/* this attempts to bias the interrupt rate towards Bulk
4052		 * by adding intermediate steps when interrupt rate is
4053		 * increasing */
4054		new_itr = new_itr > q_vector->itr_val ?
4055		             max((new_itr * q_vector->itr_val) /
4056		                 (new_itr + (q_vector->itr_val >> 2)),
4057				 new_itr) :
4058			     new_itr;
4059		/* Don't write the value here; it resets the adapter's
4060		 * internal timer, and causes us to delay far longer than
4061		 * we should between interrupts.  Instead, we write the ITR
4062		 * value at the beginning of the next interrupt so the timing
4063		 * ends up being correct.
4064		 */
4065		q_vector->itr_val = new_itr;
4066		q_vector->set_itr = 1;
4067	}
4068}
4069
4070static void igb_tx_ctxtdesc(struct igb_ring *tx_ring, u32 vlan_macip_lens,
4071			    u32 type_tucmd, u32 mss_l4len_idx)
4072{
4073	struct e1000_adv_tx_context_desc *context_desc;
4074	u16 i = tx_ring->next_to_use;
4075
4076	context_desc = IGB_TX_CTXTDESC(tx_ring, i);
4077
4078	i++;
4079	tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
4080
4081	/* set bits to identify this as an advanced context descriptor */
4082	type_tucmd |= E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
4083
4084	/* For 82575, context index must be unique per ring. */
4085	if (test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
4086		mss_l4len_idx |= tx_ring->reg_idx << 4;
4087
4088	context_desc->vlan_macip_lens	= cpu_to_le32(vlan_macip_lens);
4089	context_desc->seqnum_seed	= 0;
4090	context_desc->type_tucmd_mlhl	= cpu_to_le32(type_tucmd);
4091	context_desc->mss_l4len_idx	= cpu_to_le32(mss_l4len_idx);
4092}
4093
4094static int igb_tso(struct igb_ring *tx_ring,
4095		   struct igb_tx_buffer *first,
4096		   u8 *hdr_len)
4097{
4098	struct sk_buff *skb = first->skb;
4099	u32 vlan_macip_lens, type_tucmd;
4100	u32 mss_l4len_idx, l4len;
4101
4102	if (!skb_is_gso(skb))
4103		return 0;
4104
4105	if (skb_header_cloned(skb)) {
4106		int err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
4107		if (err)
4108			return err;
4109	}
4110
4111	/* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
4112	type_tucmd = E1000_ADVTXD_TUCMD_L4T_TCP;
4113
4114	if (first->protocol == __constant_htons(ETH_P_IP)) {
4115		struct iphdr *iph = ip_hdr(skb);
4116		iph->tot_len = 0;
4117		iph->check = 0;
4118		tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
4119							 iph->daddr, 0,
4120							 IPPROTO_TCP,
4121							 0);
4122		type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
4123		first->tx_flags |= IGB_TX_FLAGS_TSO |
4124				   IGB_TX_FLAGS_CSUM |
4125				   IGB_TX_FLAGS_IPV4;
4126	} else if (skb_is_gso_v6(skb)) {
4127		ipv6_hdr(skb)->payload_len = 0;
4128		tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
4129						       &ipv6_hdr(skb)->daddr,
4130						       0, IPPROTO_TCP, 0);
4131		first->tx_flags |= IGB_TX_FLAGS_TSO |
4132				   IGB_TX_FLAGS_CSUM;
4133	}
4134
4135	/* compute header lengths */
4136	l4len = tcp_hdrlen(skb);
4137	*hdr_len = skb_transport_offset(skb) + l4len;
4138
4139	/* update gso size and bytecount with header size */
4140	first->gso_segs = skb_shinfo(skb)->gso_segs;
4141	first->bytecount += (first->gso_segs - 1) * *hdr_len;
4142
4143	/* MSS L4LEN IDX */
4144	mss_l4len_idx = l4len << E1000_ADVTXD_L4LEN_SHIFT;
4145	mss_l4len_idx |= skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT;
4146
4147	/* VLAN MACLEN IPLEN */
4148	vlan_macip_lens = skb_network_header_len(skb);
4149	vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
4150	vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
4151
4152	igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
4153
4154	return 1;
4155}
4156
4157static void igb_tx_csum(struct igb_ring *tx_ring, struct igb_tx_buffer *first)
4158{
4159	struct sk_buff *skb = first->skb;
4160	u32 vlan_macip_lens = 0;
4161	u32 mss_l4len_idx = 0;
4162	u32 type_tucmd = 0;
4163
4164	if (skb->ip_summed != CHECKSUM_PARTIAL) {
4165		if (!(first->tx_flags & IGB_TX_FLAGS_VLAN))
4166			return;
4167	} else {
4168		u8 l4_hdr = 0;
4169		switch (first->protocol) {
4170		case __constant_htons(ETH_P_IP):
4171			vlan_macip_lens |= skb_network_header_len(skb);
4172			type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
4173			l4_hdr = ip_hdr(skb)->protocol;
4174			break;
4175		case __constant_htons(ETH_P_IPV6):
4176			vlan_macip_lens |= skb_network_header_len(skb);
4177			l4_hdr = ipv6_hdr(skb)->nexthdr;
4178			break;
4179		default:
4180			if (unlikely(net_ratelimit())) {
4181				dev_warn(tx_ring->dev,
4182				 "partial checksum but proto=%x!\n",
4183				 first->protocol);
4184			}
4185			break;
4186		}
4187
4188		switch (l4_hdr) {
4189		case IPPROTO_TCP:
4190			type_tucmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
4191			mss_l4len_idx = tcp_hdrlen(skb) <<
4192					E1000_ADVTXD_L4LEN_SHIFT;
4193			break;
4194		case IPPROTO_SCTP:
4195			type_tucmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
4196			mss_l4len_idx = sizeof(struct sctphdr) <<
4197					E1000_ADVTXD_L4LEN_SHIFT;
4198			break;
4199		case IPPROTO_UDP:
4200			mss_l4len_idx = sizeof(struct udphdr) <<
4201					E1000_ADVTXD_L4LEN_SHIFT;
4202			break;
4203		default:
4204			if (unlikely(net_ratelimit())) {
4205				dev_warn(tx_ring->dev,
4206				 "partial checksum but l4 proto=%x!\n",
4207				 l4_hdr);
4208			}
4209			break;
4210		}
4211
4212		/* update TX checksum flag */
4213		first->tx_flags |= IGB_TX_FLAGS_CSUM;
4214	}
4215
4216	vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
4217	vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
4218
4219	igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
4220}
4221
4222static __le32 igb_tx_cmd_type(u32 tx_flags)
4223{
4224	/* set type for advanced descriptor with frame checksum insertion */
4225	__le32 cmd_type = cpu_to_le32(E1000_ADVTXD_DTYP_DATA |
4226				      E1000_ADVTXD_DCMD_IFCS |
4227				      E1000_ADVTXD_DCMD_DEXT);
4228
4229	/* set HW vlan bit if vlan is present */
4230	if (tx_flags & IGB_TX_FLAGS_VLAN)
4231		cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_VLE);
4232
4233	/* set timestamp bit if present */
4234	if (tx_flags & IGB_TX_FLAGS_TSTAMP)
4235		cmd_type |= cpu_to_le32(E1000_ADVTXD_MAC_TSTAMP);
4236
4237	/* set segmentation bits for TSO */
4238	if (tx_flags & IGB_TX_FLAGS_TSO)
4239		cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_TSE);
4240
4241	return cmd_type;
4242}
4243
4244static void igb_tx_olinfo_status(struct igb_ring *tx_ring,
4245				 union e1000_adv_tx_desc *tx_desc,
4246				 u32 tx_flags, unsigned int paylen)
4247{
4248	u32 olinfo_status = paylen << E1000_ADVTXD_PAYLEN_SHIFT;
4249
4250	/* 82575 requires a unique index per ring if any offload is enabled */
4251	if ((tx_flags & (IGB_TX_FLAGS_CSUM | IGB_TX_FLAGS_VLAN)) &&
4252	    test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
4253		olinfo_status |= tx_ring->reg_idx << 4;
4254
4255	/* insert L4 checksum */
4256	if (tx_flags & IGB_TX_FLAGS_CSUM) {
4257		olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4258
4259		/* insert IPv4 checksum */
4260		if (tx_flags & IGB_TX_FLAGS_IPV4)
4261			olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
4262	}
4263
4264	tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
4265}
4266
4267/*
4268 * The largest size we can write to the descriptor is 65535.  In order to
4269 * maintain a power of two alignment we have to limit ourselves to 32K.
4270 */
4271#define IGB_MAX_TXD_PWR	15
4272#define IGB_MAX_DATA_PER_TXD	(1<<IGB_MAX_TXD_PWR)
4273
4274static void igb_tx_map(struct igb_ring *tx_ring,
4275		       struct igb_tx_buffer *first,
4276		       const u8 hdr_len)
4277{
4278	struct sk_buff *skb = first->skb;
4279	struct igb_tx_buffer *tx_buffer_info;
4280	union e1000_adv_tx_desc *tx_desc;
4281	dma_addr_t dma;
4282	struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0];
4283	unsigned int data_len = skb->data_len;
4284	unsigned int size = skb_headlen(skb);
4285	unsigned int paylen = skb->len - hdr_len;
4286	__le32 cmd_type;
4287	u32 tx_flags = first->tx_flags;
4288	u16 i = tx_ring->next_to_use;
4289
4290	tx_desc = IGB_TX_DESC(tx_ring, i);
4291
4292	igb_tx_olinfo_status(tx_ring, tx_desc, tx_flags, paylen);
4293	cmd_type = igb_tx_cmd_type(tx_flags);
4294
4295	dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
4296	if (dma_mapping_error(tx_ring->dev, dma))
4297		goto dma_error;
4298
4299	/* record length, and DMA address */
4300	first->length = size;
4301	first->dma = dma;
4302	tx_desc->read.buffer_addr = cpu_to_le64(dma);
4303
4304	for (;;) {
4305		while (unlikely(size > IGB_MAX_DATA_PER_TXD)) {
4306			tx_desc->read.cmd_type_len =
4307				cmd_type | cpu_to_le32(IGB_MAX_DATA_PER_TXD);
4308
4309			i++;
4310			tx_desc++;
4311			if (i == tx_ring->count) {
4312				tx_desc = IGB_TX_DESC(tx_ring, 0);
4313				i = 0;
4314			}
4315
4316			dma += IGB_MAX_DATA_PER_TXD;
4317			size -= IGB_MAX_DATA_PER_TXD;
4318
4319			tx_desc->read.olinfo_status = 0;
4320			tx_desc->read.buffer_addr = cpu_to_le64(dma);
4321		}
4322
4323		if (likely(!data_len))
4324			break;
4325
4326		tx_desc->read.cmd_type_len = cmd_type | cpu_to_le32(size);
4327
4328		i++;
4329		tx_desc++;
4330		if (i == tx_ring->count) {
4331			tx_desc = IGB_TX_DESC(tx_ring, 0);
4332			i = 0;
4333		}
4334
4335		size = skb_frag_size(frag);
4336		data_len -= size;
4337
4338		dma = skb_frag_dma_map(tx_ring->dev, frag, 0,
4339				   size, DMA_TO_DEVICE);
4340		if (dma_mapping_error(tx_ring->dev, dma))
4341			goto dma_error;
4342
4343		tx_buffer_info = &tx_ring->tx_buffer_info[i];
4344		tx_buffer_info->length = size;
4345		tx_buffer_info->dma = dma;
4346
4347		tx_desc->read.olinfo_status = 0;
4348		tx_desc->read.buffer_addr = cpu_to_le64(dma);
4349
4350		frag++;
4351	}
4352
4353	netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount);
4354
4355	/* write last descriptor with RS and EOP bits */
4356	cmd_type |= cpu_to_le32(size) | cpu_to_le32(IGB_TXD_DCMD);
4357	if (unlikely(skb->no_fcs))
4358		cmd_type &= ~(cpu_to_le32(E1000_ADVTXD_DCMD_IFCS));
4359	tx_desc->read.cmd_type_len = cmd_type;
4360
4361	/* set the timestamp */
4362	first->time_stamp = jiffies;
4363
4364	/*
4365	 * Force memory writes to complete before letting h/w know there
4366	 * are new descriptors to fetch.  (Only applicable for weak-ordered
4367	 * memory model archs, such as IA-64).
4368	 *
4369	 * We also need this memory barrier to make certain all of the
4370	 * status bits have been updated before next_to_watch is written.
4371	 */
4372	wmb();
4373
4374	/* set next_to_watch value indicating a packet is present */
4375	first->next_to_watch = tx_desc;
4376
4377	i++;
4378	if (i == tx_ring->count)
4379		i = 0;
4380
4381	tx_ring->next_to_use = i;
4382
4383	writel(i, tx_ring->tail);
4384
4385	/* we need this if more than one processor can write to our tail
4386	 * at a time, it syncronizes IO on IA64/Altix systems */
4387	mmiowb();
4388
4389	return;
4390
4391dma_error:
4392	dev_err(tx_ring->dev, "TX DMA map failed\n");
4393
4394	/* clear dma mappings for failed tx_buffer_info map */
4395	for (;;) {
4396		tx_buffer_info = &tx_ring->tx_buffer_info[i];
4397		igb_unmap_and_free_tx_resource(tx_ring, tx_buffer_info);
4398		if (tx_buffer_info == first)
4399			break;
4400		if (i == 0)
4401			i = tx_ring->count;
4402		i--;
4403	}
4404
4405	tx_ring->next_to_use = i;
4406}
4407
4408static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
4409{
4410	struct net_device *netdev = tx_ring->netdev;
4411
4412	netif_stop_subqueue(netdev, tx_ring->queue_index);
4413
4414	/* Herbert's original patch had:
4415	 *  smp_mb__after_netif_stop_queue();
4416	 * but since that doesn't exist yet, just open code it. */
4417	smp_mb();
4418
4419	/* We need to check again in a case another CPU has just
4420	 * made room available. */
4421	if (igb_desc_unused(tx_ring) < size)
4422		return -EBUSY;
4423
4424	/* A reprieve! */
4425	netif_wake_subqueue(netdev, tx_ring->queue_index);
4426
4427	u64_stats_update_begin(&tx_ring->tx_syncp2);
4428	tx_ring->tx_stats.restart_queue2++;
4429	u64_stats_update_end(&tx_ring->tx_syncp2);
4430
4431	return 0;
4432}
4433
4434static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
4435{
4436	if (igb_desc_unused(tx_ring) >= size)
4437		return 0;
4438	return __igb_maybe_stop_tx(tx_ring, size);
4439}
4440
4441netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb,
4442				struct igb_ring *tx_ring)
4443{
4444	struct igb_tx_buffer *first;
4445	int tso;
4446	u32 tx_flags = 0;
4447	__be16 protocol = vlan_get_protocol(skb);
4448	u8 hdr_len = 0;
4449
4450	/* need: 1 descriptor per page,
4451	 *       + 2 desc gap to keep tail from touching head,
4452	 *       + 1 desc for skb->data,
4453	 *       + 1 desc for context descriptor,
4454	 * otherwise try next time */
4455	if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
4456		/* this is a hard error */
4457		return NETDEV_TX_BUSY;
4458	}
4459
4460	/* record the location of the first descriptor for this packet */
4461	first = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
4462	first->skb = skb;
4463	first->bytecount = skb->len;
4464	first->gso_segs = 1;
4465
4466	if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
4467		skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
4468		tx_flags |= IGB_TX_FLAGS_TSTAMP;
4469	}
4470
4471	if (vlan_tx_tag_present(skb)) {
4472		tx_flags |= IGB_TX_FLAGS_VLAN;
4473		tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
4474	}
4475
4476	/* record initial flags and protocol */
4477	first->tx_flags = tx_flags;
4478	first->protocol = protocol;
4479
4480	tso = igb_tso(tx_ring, first, &hdr_len);
4481	if (tso < 0)
4482		goto out_drop;
4483	else if (!tso)
4484		igb_tx_csum(tx_ring, first);
4485
4486	igb_tx_map(tx_ring, first, hdr_len);
4487
4488	/* Make sure there is space in the ring for the next send. */
4489	igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
4490
4491	return NETDEV_TX_OK;
4492
4493out_drop:
4494	igb_unmap_and_free_tx_resource(tx_ring, first);
4495
4496	return NETDEV_TX_OK;
4497}
4498
4499static inline struct igb_ring *igb_tx_queue_mapping(struct igb_adapter *adapter,
4500						    struct sk_buff *skb)
4501{
4502	unsigned int r_idx = skb->queue_mapping;
4503
4504	if (r_idx >= adapter->num_tx_queues)
4505		r_idx = r_idx % adapter->num_tx_queues;
4506
4507	return adapter->tx_ring[r_idx];
4508}
4509
4510static netdev_tx_t igb_xmit_frame(struct sk_buff *skb,
4511				  struct net_device *netdev)
4512{
4513	struct igb_adapter *adapter = netdev_priv(netdev);
4514
4515	if (test_bit(__IGB_DOWN, &adapter->state)) {
4516		dev_kfree_skb_any(skb);
4517		return NETDEV_TX_OK;
4518	}
4519
4520	if (skb->len <= 0) {
4521		dev_kfree_skb_any(skb);
4522		return NETDEV_TX_OK;
4523	}
4524
4525	/*
4526	 * The minimum packet size with TCTL.PSP set is 17 so pad the skb
4527	 * in order to meet this minimum size requirement.
4528	 */
4529	if (skb->len < 17) {
4530		if (skb_padto(skb, 17))
4531			return NETDEV_TX_OK;
4532		skb->len = 17;
4533	}
4534
4535	return igb_xmit_frame_ring(skb, igb_tx_queue_mapping(adapter, skb));
4536}
4537
4538/**
4539 * igb_tx_timeout - Respond to a Tx Hang
4540 * @netdev: network interface device structure
4541 **/
4542static void igb_tx_timeout(struct net_device *netdev)
4543{
4544	struct igb_adapter *adapter = netdev_priv(netdev);
4545	struct e1000_hw *hw = &adapter->hw;
4546
4547	/* Do the reset outside of interrupt context */
4548	adapter->tx_timeout_count++;
4549
4550	if (hw->mac.type >= e1000_82580)
4551		hw->dev_spec._82575.global_device_reset = true;
4552
4553	schedule_work(&adapter->reset_task);
4554	wr32(E1000_EICS,
4555	     (adapter->eims_enable_mask & ~adapter->eims_other));
4556}
4557
4558static void igb_reset_task(struct work_struct *work)
4559{
4560	struct igb_adapter *adapter;
4561	adapter = container_of(work, struct igb_adapter, reset_task);
4562
4563	igb_dump(adapter);
4564	netdev_err(adapter->netdev, "Reset adapter\n");
4565	igb_reinit_locked(adapter);
4566}
4567
4568/**
4569 * igb_get_stats64 - Get System Network Statistics
4570 * @netdev: network interface device structure
4571 * @stats: rtnl_link_stats64 pointer
4572 *
4573 **/
4574static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *netdev,
4575						 struct rtnl_link_stats64 *stats)
4576{
4577	struct igb_adapter *adapter = netdev_priv(netdev);
4578
4579	spin_lock(&adapter->stats64_lock);
4580	igb_update_stats(adapter, &adapter->stats64);
4581	memcpy(stats, &adapter->stats64, sizeof(*stats));
4582	spin_unlock(&adapter->stats64_lock);
4583
4584	return stats;
4585}
4586
4587/**
4588 * igb_change_mtu - Change the Maximum Transfer Unit
4589 * @netdev: network interface device structure
4590 * @new_mtu: new value for maximum frame size
4591 *
4592 * Returns 0 on success, negative on failure
4593 **/
4594static int igb_change_mtu(struct net_device *netdev, int new_mtu)
4595{
4596	struct igb_adapter *adapter = netdev_priv(netdev);
4597	struct pci_dev *pdev = adapter->pdev;
4598	int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
4599
4600	if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
4601		dev_err(&pdev->dev, "Invalid MTU setting\n");
4602		return -EINVAL;
4603	}
4604
4605#define MAX_STD_JUMBO_FRAME_SIZE 9238
4606	if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
4607		dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
4608		return -EINVAL;
4609	}
4610
4611	while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
4612		msleep(1);
4613
4614	/* igb_down has a dependency on max_frame_size */
4615	adapter->max_frame_size = max_frame;
4616
4617	if (netif_running(netdev))
4618		igb_down(adapter);
4619
4620	dev_info(&pdev->dev, "changing MTU from %d to %d\n",
4621		 netdev->mtu, new_mtu);
4622	netdev->mtu = new_mtu;
4623
4624	if (netif_running(netdev))
4625		igb_up(adapter);
4626	else
4627		igb_reset(adapter);
4628
4629	clear_bit(__IGB_RESETTING, &adapter->state);
4630
4631	return 0;
4632}
4633
4634/**
4635 * igb_update_stats - Update the board statistics counters
4636 * @adapter: board private structure
4637 **/
4638
4639void igb_update_stats(struct igb_adapter *adapter,
4640		      struct rtnl_link_stats64 *net_stats)
4641{
4642	struct e1000_hw *hw = &adapter->hw;
4643	struct pci_dev *pdev = adapter->pdev;
4644	u32 reg, mpc;
4645	u16 phy_tmp;
4646	int i;
4647	u64 bytes, packets;
4648	unsigned int start;
4649	u64 _bytes, _packets;
4650
4651#define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
4652
4653	/*
4654	 * Prevent stats update while adapter is being reset, or if the pci
4655	 * connection is down.
4656	 */
4657	if (adapter->link_speed == 0)
4658		return;
4659	if (pci_channel_offline(pdev))
4660		return;
4661
4662	bytes = 0;
4663	packets = 0;
4664	for (i = 0; i < adapter->num_rx_queues; i++) {
4665		u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
4666		struct igb_ring *ring = adapter->rx_ring[i];
4667
4668		ring->rx_stats.drops += rqdpc_tmp;
4669		net_stats->rx_fifo_errors += rqdpc_tmp;
4670
4671		do {
4672			start = u64_stats_fetch_begin_bh(&ring->rx_syncp);
4673			_bytes = ring->rx_stats.bytes;
4674			_packets = ring->rx_stats.packets;
4675		} while (u64_stats_fetch_retry_bh(&ring->rx_syncp, start));
4676		bytes += _bytes;
4677		packets += _packets;
4678	}
4679
4680	net_stats->rx_bytes = bytes;
4681	net_stats->rx_packets = packets;
4682
4683	bytes = 0;
4684	packets = 0;
4685	for (i = 0; i < adapter->num_tx_queues; i++) {
4686		struct igb_ring *ring = adapter->tx_ring[i];
4687		do {
4688			start = u64_stats_fetch_begin_bh(&ring->tx_syncp);
4689			_bytes = ring->tx_stats.bytes;
4690			_packets = ring->tx_stats.packets;
4691		} while (u64_stats_fetch_retry_bh(&ring->tx_syncp, start));
4692		bytes += _bytes;
4693		packets += _packets;
4694	}
4695	net_stats->tx_bytes = bytes;
4696	net_stats->tx_packets = packets;
4697
4698	/* read stats registers */
4699	adapter->stats.crcerrs += rd32(E1000_CRCERRS);
4700	adapter->stats.gprc += rd32(E1000_GPRC);
4701	adapter->stats.gorc += rd32(E1000_GORCL);
4702	rd32(E1000_GORCH); /* clear GORCL */
4703	adapter->stats.bprc += rd32(E1000_BPRC);
4704	adapter->stats.mprc += rd32(E1000_MPRC);
4705	adapter->stats.roc += rd32(E1000_ROC);
4706
4707	adapter->stats.prc64 += rd32(E1000_PRC64);
4708	adapter->stats.prc127 += rd32(E1000_PRC127);
4709	adapter->stats.prc255 += rd32(E1000_PRC255);
4710	adapter->stats.prc511 += rd32(E1000_PRC511);
4711	adapter->stats.prc1023 += rd32(E1000_PRC1023);
4712	adapter->stats.prc1522 += rd32(E1000_PRC1522);
4713	adapter->stats.symerrs += rd32(E1000_SYMERRS);
4714	adapter->stats.sec += rd32(E1000_SEC);
4715
4716	mpc = rd32(E1000_MPC);
4717	adapter->stats.mpc += mpc;
4718	net_stats->rx_fifo_errors += mpc;
4719	adapter->stats.scc += rd32(E1000_SCC);
4720	adapter->stats.ecol += rd32(E1000_ECOL);
4721	adapter->stats.mcc += rd32(E1000_MCC);
4722	adapter->stats.latecol += rd32(E1000_LATECOL);
4723	adapter->stats.dc += rd32(E1000_DC);
4724	adapter->stats.rlec += rd32(E1000_RLEC);
4725	adapter->stats.xonrxc += rd32(E1000_XONRXC);
4726	adapter->stats.xontxc += rd32(E1000_XONTXC);
4727	adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
4728	adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
4729	adapter->stats.fcruc += rd32(E1000_FCRUC);
4730	adapter->stats.gptc += rd32(E1000_GPTC);
4731	adapter->stats.gotc += rd32(E1000_GOTCL);
4732	rd32(E1000_GOTCH); /* clear GOTCL */
4733	adapter->stats.rnbc += rd32(E1000_RNBC);
4734	adapter->stats.ruc += rd32(E1000_RUC);
4735	adapter->stats.rfc += rd32(E1000_RFC);
4736	adapter->stats.rjc += rd32(E1000_RJC);
4737	adapter->stats.tor += rd32(E1000_TORH);
4738	adapter->stats.tot += rd32(E1000_TOTH);
4739	adapter->stats.tpr += rd32(E1000_TPR);
4740
4741	adapter->stats.ptc64 += rd32(E1000_PTC64);
4742	adapter->stats.ptc127 += rd32(E1000_PTC127);
4743	adapter->stats.ptc255 += rd32(E1000_PTC255);
4744	adapter->stats.ptc511 += rd32(E1000_PTC511);
4745	adapter->stats.ptc1023 += rd32(E1000_PTC1023);
4746	adapter->stats.ptc1522 += rd32(E1000_PTC1522);
4747
4748	adapter->stats.mptc += rd32(E1000_MPTC);
4749	adapter->stats.bptc += rd32(E1000_BPTC);
4750
4751	adapter->stats.tpt += rd32(E1000_TPT);
4752	adapter->stats.colc += rd32(E1000_COLC);
4753
4754	adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
4755	/* read internal phy specific stats */
4756	reg = rd32(E1000_CTRL_EXT);
4757	if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
4758		adapter->stats.rxerrc += rd32(E1000_RXERRC);
4759		adapter->stats.tncrs += rd32(E1000_TNCRS);
4760	}
4761
4762	adapter->stats.tsctc += rd32(E1000_TSCTC);
4763	adapter->stats.tsctfc += rd32(E1000_TSCTFC);
4764
4765	adapter->stats.iac += rd32(E1000_IAC);
4766	adapter->stats.icrxoc += rd32(E1000_ICRXOC);
4767	adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
4768	adapter->stats.icrxatc += rd32(E1000_ICRXATC);
4769	adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
4770	adapter->stats.ictxatc += rd32(E1000_ICTXATC);
4771	adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
4772	adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
4773	adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
4774
4775	/* Fill out the OS statistics structure */
4776	net_stats->multicast = adapter->stats.mprc;
4777	net_stats->collisions = adapter->stats.colc;
4778
4779	/* Rx Errors */
4780
4781	/* RLEC on some newer hardware can be incorrect so build
4782	 * our own version based on RUC and ROC */
4783	net_stats->rx_errors = adapter->stats.rxerrc +
4784		adapter->stats.crcerrs + adapter->stats.algnerrc +
4785		adapter->stats.ruc + adapter->stats.roc +
4786		adapter->stats.cexterr;
4787	net_stats->rx_length_errors = adapter->stats.ruc +
4788				      adapter->stats.roc;
4789	net_stats->rx_crc_errors = adapter->stats.crcerrs;
4790	net_stats->rx_frame_errors = adapter->stats.algnerrc;
4791	net_stats->rx_missed_errors = adapter->stats.mpc;
4792
4793	/* Tx Errors */
4794	net_stats->tx_errors = adapter->stats.ecol +
4795			       adapter->stats.latecol;
4796	net_stats->tx_aborted_errors = adapter->stats.ecol;
4797	net_stats->tx_window_errors = adapter->stats.latecol;
4798	net_stats->tx_carrier_errors = adapter->stats.tncrs;
4799
4800	/* Tx Dropped needs to be maintained elsewhere */
4801
4802	/* Phy Stats */
4803	if (hw->phy.media_type == e1000_media_type_copper) {
4804		if ((adapter->link_speed == SPEED_1000) &&
4805		   (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
4806			phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
4807			adapter->phy_stats.idle_errors += phy_tmp;
4808		}
4809	}
4810
4811	/* Management Stats */
4812	adapter->stats.mgptc += rd32(E1000_MGTPTC);
4813	adapter->stats.mgprc += rd32(E1000_MGTPRC);
4814	adapter->stats.mgpdc += rd32(E1000_MGTPDC);
4815
4816	/* OS2BMC Stats */
4817	reg = rd32(E1000_MANC);
4818	if (reg & E1000_MANC_EN_BMC2OS) {
4819		adapter->stats.o2bgptc += rd32(E1000_O2BGPTC);
4820		adapter->stats.o2bspc += rd32(E1000_O2BSPC);
4821		adapter->stats.b2ospc += rd32(E1000_B2OSPC);
4822		adapter->stats.b2ogprc += rd32(E1000_B2OGPRC);
4823	}
4824}
4825
4826static irqreturn_t igb_msix_other(int irq, void *data)
4827{
4828	struct igb_adapter *adapter = data;
4829	struct e1000_hw *hw = &adapter->hw;
4830	u32 icr = rd32(E1000_ICR);
4831	/* reading ICR causes bit 31 of EICR to be cleared */
4832
4833	if (icr & E1000_ICR_DRSTA)
4834		schedule_work(&adapter->reset_task);
4835
4836	if (icr & E1000_ICR_DOUTSYNC) {
4837		/* HW is reporting DMA is out of sync */
4838		adapter->stats.doosync++;
4839		/* The DMA Out of Sync is also indication of a spoof event
4840		 * in IOV mode. Check the Wrong VM Behavior register to
4841		 * see if it is really a spoof event. */
4842		igb_check_wvbr(adapter);
4843	}
4844
4845	/* Check for a mailbox event */
4846	if (icr & E1000_ICR_VMMB)
4847		igb_msg_task(adapter);
4848
4849	if (icr & E1000_ICR_LSC) {
4850		hw->mac.get_link_status = 1;
4851		/* guard against interrupt when we're going down */
4852		if (!test_bit(__IGB_DOWN, &adapter->state))
4853			mod_timer(&adapter->watchdog_timer, jiffies + 1);
4854	}
4855
4856	wr32(E1000_EIMS, adapter->eims_other);
4857
4858	return IRQ_HANDLED;
4859}
4860
4861static void igb_write_itr(struct igb_q_vector *q_vector)
4862{
4863	struct igb_adapter *adapter = q_vector->adapter;
4864	u32 itr_val = q_vector->itr_val & 0x7FFC;
4865
4866	if (!q_vector->set_itr)
4867		return;
4868
4869	if (!itr_val)
4870		itr_val = 0x4;
4871
4872	if (adapter->hw.mac.type == e1000_82575)
4873		itr_val |= itr_val << 16;
4874	else
4875		itr_val |= E1000_EITR_CNT_IGNR;
4876
4877	writel(itr_val, q_vector->itr_register);
4878	q_vector->set_itr = 0;
4879}
4880
4881static irqreturn_t igb_msix_ring(int irq, void *data)
4882{
4883	struct igb_q_vector *q_vector = data;
4884
4885	/* Write the ITR value calculated from the previous interrupt. */
4886	igb_write_itr(q_vector);
4887
4888	napi_schedule(&q_vector->napi);
4889
4890	return IRQ_HANDLED;
4891}
4892
4893#ifdef CONFIG_IGB_DCA
4894static void igb_update_dca(struct igb_q_vector *q_vector)
4895{
4896	struct igb_adapter *adapter = q_vector->adapter;
4897	struct e1000_hw *hw = &adapter->hw;
4898	int cpu = get_cpu();
4899
4900	if (q_vector->cpu == cpu)
4901		goto out_no_update;
4902
4903	if (q_vector->tx.ring) {
4904		int q = q_vector->tx.ring->reg_idx;
4905		u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4906		if (hw->mac.type == e1000_82575) {
4907			dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4908			dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4909		} else {
4910			dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4911			dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4912			              E1000_DCA_TXCTRL_CPUID_SHIFT;
4913		}
4914		dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4915		wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4916	}
4917	if (q_vector->rx.ring) {
4918		int q = q_vector->rx.ring->reg_idx;
4919		u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4920		if (hw->mac.type == e1000_82575) {
4921			dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4922			dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4923		} else {
4924			dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4925			dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4926			              E1000_DCA_RXCTRL_CPUID_SHIFT;
4927		}
4928		dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4929		dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4930		dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4931		wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4932	}
4933	q_vector->cpu = cpu;
4934out_no_update:
4935	put_cpu();
4936}
4937
4938static void igb_setup_dca(struct igb_adapter *adapter)
4939{
4940	struct e1000_hw *hw = &adapter->hw;
4941	int i;
4942
4943	if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4944		return;
4945
4946	/* Always use CB2 mode, difference is masked in the CB driver. */
4947	wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4948
4949	for (i = 0; i < adapter->num_q_vectors; i++) {
4950		adapter->q_vector[i]->cpu = -1;
4951		igb_update_dca(adapter->q_vector[i]);
4952	}
4953}
4954
4955static int __igb_notify_dca(struct device *dev, void *data)
4956{
4957	struct net_device *netdev = dev_get_drvdata(dev);
4958	struct igb_adapter *adapter = netdev_priv(netdev);
4959	struct pci_dev *pdev = adapter->pdev;
4960	struct e1000_hw *hw = &adapter->hw;
4961	unsigned long event = *(unsigned long *)data;
4962
4963	switch (event) {
4964	case DCA_PROVIDER_ADD:
4965		/* if already enabled, don't do it again */
4966		if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4967			break;
4968		if (dca_add_requester(dev) == 0) {
4969			adapter->flags |= IGB_FLAG_DCA_ENABLED;
4970			dev_info(&pdev->dev, "DCA enabled\n");
4971			igb_setup_dca(adapter);
4972			break;
4973		}
4974		/* Fall Through since DCA is disabled. */
4975	case DCA_PROVIDER_REMOVE:
4976		if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4977			/* without this a class_device is left
4978			 * hanging around in the sysfs model */
4979			dca_remove_requester(dev);
4980			dev_info(&pdev->dev, "DCA disabled\n");
4981			adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4982			wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4983		}
4984		break;
4985	}
4986
4987	return 0;
4988}
4989
4990static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4991                          void *p)
4992{
4993	int ret_val;
4994
4995	ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4996	                                 __igb_notify_dca);
4997
4998	return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4999}
5000#endif /* CONFIG_IGB_DCA */
5001
5002#ifdef CONFIG_PCI_IOV
5003static int igb_vf_configure(struct igb_adapter *adapter, int vf)
5004{
5005	unsigned char mac_addr[ETH_ALEN];
5006	struct pci_dev *pdev = adapter->pdev;
5007	struct e1000_hw *hw = &adapter->hw;
5008	struct pci_dev *pvfdev;
5009	unsigned int device_id;
5010	u16 thisvf_devfn;
5011
5012	eth_random_addr(mac_addr);
5013	igb_set_vf_mac(adapter, vf, mac_addr);
5014
5015	switch (adapter->hw.mac.type) {
5016	case e1000_82576:
5017		device_id = IGB_82576_VF_DEV_ID;
5018		/* VF Stride for 82576 is 2 */
5019		thisvf_devfn = (pdev->devfn + 0x80 + (vf << 1)) |
5020			(pdev->devfn & 1);
5021		break;
5022	case e1000_i350:
5023		device_id = IGB_I350_VF_DEV_ID;
5024		/* VF Stride for I350 is 4 */
5025		thisvf_devfn = (pdev->devfn + 0x80 + (vf << 2)) |
5026				(pdev->devfn & 3);
5027		break;
5028	default:
5029		device_id = 0;
5030		thisvf_devfn = 0;
5031		break;
5032	}
5033
5034	pvfdev = pci_get_device(hw->vendor_id, device_id, NULL);
5035	while (pvfdev) {
5036		if (pvfdev->devfn == thisvf_devfn)
5037			break;
5038		pvfdev = pci_get_device(hw->vendor_id,
5039					device_id, pvfdev);
5040	}
5041
5042	if (pvfdev)
5043		adapter->vf_data[vf].vfdev = pvfdev;
5044	else
5045		dev_err(&pdev->dev,
5046			"Couldn't find pci dev ptr for VF %4.4x\n",
5047			thisvf_devfn);
5048	return pvfdev != NULL;
5049}
5050
5051static int igb_find_enabled_vfs(struct igb_adapter *adapter)
5052{
5053	struct e1000_hw *hw = &adapter->hw;
5054	struct pci_dev *pdev = adapter->pdev;
5055	struct pci_dev *pvfdev;
5056	u16 vf_devfn = 0;
5057	u16 vf_stride;
5058	unsigned int device_id;
5059	int vfs_found = 0;
5060
5061	switch (adapter->hw.mac.type) {
5062	case e1000_82576:
5063		device_id = IGB_82576_VF_DEV_ID;
5064		/* VF Stride for 82576 is 2 */
5065		vf_stride = 2;
5066		break;
5067	case e1000_i350:
5068		device_id = IGB_I350_VF_DEV_ID;
5069		/* VF Stride for I350 is 4 */
5070		vf_stride = 4;
5071		break;
5072	default:
5073		device_id = 0;
5074		vf_stride = 0;
5075		break;
5076	}
5077
5078	vf_devfn = pdev->devfn + 0x80;
5079	pvfdev = pci_get_device(hw->vendor_id, device_id, NULL);
5080	while (pvfdev) {
5081		if (pvfdev->devfn == vf_devfn &&
5082		    (pvfdev->bus->number >= pdev->bus->number))
5083			vfs_found++;
5084		vf_devfn += vf_stride;
5085		pvfdev = pci_get_device(hw->vendor_id,
5086					device_id, pvfdev);
5087	}
5088
5089	return vfs_found;
5090}
5091
5092static int igb_check_vf_assignment(struct igb_adapter *adapter)
5093{
5094	int i;
5095	for (i = 0; i < adapter->vfs_allocated_count; i++) {
5096		if (adapter->vf_data[i].vfdev) {
5097			if (adapter->vf_data[i].vfdev->dev_flags &
5098			    PCI_DEV_FLAGS_ASSIGNED)
5099				return true;
5100		}
5101	}
5102	return false;
5103}
5104
5105#endif
5106static void igb_ping_all_vfs(struct igb_adapter *adapter)
5107{
5108	struct e1000_hw *hw = &adapter->hw;
5109	u32 ping;
5110	int i;
5111
5112	for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
5113		ping = E1000_PF_CONTROL_MSG;
5114		if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
5115			ping |= E1000_VT_MSGTYPE_CTS;
5116		igb_write_mbx(hw, &ping, 1, i);
5117	}
5118}
5119
5120static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5121{
5122	struct e1000_hw *hw = &adapter->hw;
5123	u32 vmolr = rd32(E1000_VMOLR(vf));
5124	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5125
5126	vf_data->flags &= ~(IGB_VF_FLAG_UNI_PROMISC |
5127	                    IGB_VF_FLAG_MULTI_PROMISC);
5128	vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
5129
5130	if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
5131		vmolr |= E1000_VMOLR_MPME;
5132		vf_data->flags |= IGB_VF_FLAG_MULTI_PROMISC;
5133		*msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
5134	} else {
5135		/*
5136		 * if we have hashes and we are clearing a multicast promisc
5137		 * flag we need to write the hashes to the MTA as this step
5138		 * was previously skipped
5139		 */
5140		if (vf_data->num_vf_mc_hashes > 30) {
5141			vmolr |= E1000_VMOLR_MPME;
5142		} else if (vf_data->num_vf_mc_hashes) {
5143			int j;
5144			vmolr |= E1000_VMOLR_ROMPE;
5145			for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
5146				igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
5147		}
5148	}
5149
5150	wr32(E1000_VMOLR(vf), vmolr);
5151
5152	/* there are flags left unprocessed, likely not supported */
5153	if (*msgbuf & E1000_VT_MSGINFO_MASK)
5154		return -EINVAL;
5155
5156	return 0;
5157
5158}
5159
5160static int igb_set_vf_multicasts(struct igb_adapter *adapter,
5161				  u32 *msgbuf, u32 vf)
5162{
5163	int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5164	u16 *hash_list = (u16 *)&msgbuf[1];
5165	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5166	int i;
5167
5168	/* salt away the number of multicast addresses assigned
5169	 * to this VF for later use to restore when the PF multi cast
5170	 * list changes
5171	 */
5172	vf_data->num_vf_mc_hashes = n;
5173
5174	/* only up to 30 hash values supported */
5175	if (n > 30)
5176		n = 30;
5177
5178	/* store the hashes for later use */
5179	for (i = 0; i < n; i++)
5180		vf_data->vf_mc_hashes[i] = hash_list[i];
5181
5182	/* Flush and reset the mta with the new values */
5183	igb_set_rx_mode(adapter->netdev);
5184
5185	return 0;
5186}
5187
5188static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
5189{
5190	struct e1000_hw *hw = &adapter->hw;
5191	struct vf_data_storage *vf_data;
5192	int i, j;
5193
5194	for (i = 0; i < adapter->vfs_allocated_count; i++) {
5195		u32 vmolr = rd32(E1000_VMOLR(i));
5196		vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
5197
5198		vf_data = &adapter->vf_data[i];
5199
5200		if ((vf_data->num_vf_mc_hashes > 30) ||
5201		    (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
5202			vmolr |= E1000_VMOLR_MPME;
5203		} else if (vf_data->num_vf_mc_hashes) {
5204			vmolr |= E1000_VMOLR_ROMPE;
5205			for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
5206				igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
5207		}
5208		wr32(E1000_VMOLR(i), vmolr);
5209	}
5210}
5211
5212static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
5213{
5214	struct e1000_hw *hw = &adapter->hw;
5215	u32 pool_mask, reg, vid;
5216	int i;
5217
5218	pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5219
5220	/* Find the vlan filter for this id */
5221	for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5222		reg = rd32(E1000_VLVF(i));
5223
5224		/* remove the vf from the pool */
5225		reg &= ~pool_mask;
5226
5227		/* if pool is empty then remove entry from vfta */
5228		if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
5229		    (reg & E1000_VLVF_VLANID_ENABLE)) {
5230			reg = 0;
5231			vid = reg & E1000_VLVF_VLANID_MASK;
5232			igb_vfta_set(hw, vid, false);
5233		}
5234
5235		wr32(E1000_VLVF(i), reg);
5236	}
5237
5238	adapter->vf_data[vf].vlans_enabled = 0;
5239}
5240
5241static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
5242{
5243	struct e1000_hw *hw = &adapter->hw;
5244	u32 reg, i;
5245
5246	/* The vlvf table only exists on 82576 hardware and newer */
5247	if (hw->mac.type < e1000_82576)
5248		return -1;
5249
5250	/* we only need to do this if VMDq is enabled */
5251	if (!adapter->vfs_allocated_count)
5252		return -1;
5253
5254	/* Find the vlan filter for this id */
5255	for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5256		reg = rd32(E1000_VLVF(i));
5257		if ((reg & E1000_VLVF_VLANID_ENABLE) &&
5258		    vid == (reg & E1000_VLVF_VLANID_MASK))
5259			break;
5260	}
5261
5262	if (add) {
5263		if (i == E1000_VLVF_ARRAY_SIZE) {
5264			/* Did not find a matching VLAN ID entry that was
5265			 * enabled.  Search for a free filter entry, i.e.
5266			 * one without the enable bit set
5267			 */
5268			for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5269				reg = rd32(E1000_VLVF(i));
5270				if (!(reg & E1000_VLVF_VLANID_ENABLE))
5271					break;
5272			}
5273		}
5274		if (i < E1000_VLVF_ARRAY_SIZE) {
5275			/* Found an enabled/available entry */
5276			reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5277
5278			/* if !enabled we need to set this up in vfta */
5279			if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
5280				/* add VID to filter table */
5281				igb_vfta_set(hw, vid, true);
5282				reg |= E1000_VLVF_VLANID_ENABLE;
5283			}
5284			reg &= ~E1000_VLVF_VLANID_MASK;
5285			reg |= vid;
5286			wr32(E1000_VLVF(i), reg);
5287
5288			/* do not modify RLPML for PF devices */
5289			if (vf >= adapter->vfs_allocated_count)
5290				return 0;
5291
5292			if (!adapter->vf_data[vf].vlans_enabled) {
5293				u32 size;
5294				reg = rd32(E1000_VMOLR(vf));
5295				size = reg & E1000_VMOLR_RLPML_MASK;
5296				size += 4;
5297				reg &= ~E1000_VMOLR_RLPML_MASK;
5298				reg |= size;
5299				wr32(E1000_VMOLR(vf), reg);
5300			}
5301
5302			adapter->vf_data[vf].vlans_enabled++;
5303		}
5304	} else {
5305		if (i < E1000_VLVF_ARRAY_SIZE) {
5306			/* remove vf from the pool */
5307			reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
5308			/* if pool is empty then remove entry from vfta */
5309			if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
5310				reg = 0;
5311				igb_vfta_set(hw, vid, false);
5312			}
5313			wr32(E1000_VLVF(i), reg);
5314
5315			/* do not modify RLPML for PF devices */
5316			if (vf >= adapter->vfs_allocated_count)
5317				return 0;
5318
5319			adapter->vf_data[vf].vlans_enabled--;
5320			if (!adapter->vf_data[vf].vlans_enabled) {
5321				u32 size;
5322				reg = rd32(E1000_VMOLR(vf));
5323				size = reg & E1000_VMOLR_RLPML_MASK;
5324				size -= 4;
5325				reg &= ~E1000_VMOLR_RLPML_MASK;
5326				reg |= size;
5327				wr32(E1000_VMOLR(vf), reg);
5328			}
5329		}
5330	}
5331	return 0;
5332}
5333
5334static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
5335{
5336	struct e1000_hw *hw = &adapter->hw;
5337
5338	if (vid)
5339		wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
5340	else
5341		wr32(E1000_VMVIR(vf), 0);
5342}
5343
5344static int igb_ndo_set_vf_vlan(struct net_device *netdev,
5345			       int vf, u16 vlan, u8 qos)
5346{
5347	int err = 0;
5348	struct igb_adapter *adapter = netdev_priv(netdev);
5349
5350	if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
5351		return -EINVAL;
5352	if (vlan || qos) {
5353		err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
5354		if (err)
5355			goto out;
5356		igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
5357		igb_set_vmolr(adapter, vf, !vlan);
5358		adapter->vf_data[vf].pf_vlan = vlan;
5359		adapter->vf_data[vf].pf_qos = qos;
5360		dev_info(&adapter->pdev->dev,
5361			 "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
5362		if (test_bit(__IGB_DOWN, &adapter->state)) {
5363			dev_warn(&adapter->pdev->dev,
5364				 "The VF VLAN has been set,"
5365				 " but the PF device is not up.\n");
5366			dev_warn(&adapter->pdev->dev,
5367				 "Bring the PF device up before"
5368				 " attempting to use the VF device.\n");
5369		}
5370	} else {
5371		igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
5372				   false, vf);
5373		igb_set_vmvir(adapter, vlan, vf);
5374		igb_set_vmolr(adapter, vf, true);
5375		adapter->vf_data[vf].pf_vlan = 0;
5376		adapter->vf_data[vf].pf_qos = 0;
5377       }
5378out:
5379       return err;
5380}
5381
5382static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5383{
5384	int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5385	int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
5386
5387	return igb_vlvf_set(adapter, vid, add, vf);
5388}
5389
5390static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
5391{
5392	/* clear flags - except flag that indicates PF has set the MAC */
5393	adapter->vf_data[vf].flags &= IGB_VF_FLAG_PF_SET_MAC;
5394	adapter->vf_data[vf].last_nack = jiffies;
5395
5396	/* reset offloads to defaults */
5397	igb_set_vmolr(adapter, vf, true);
5398
5399	/* reset vlans for device */
5400	igb_clear_vf_vfta(adapter, vf);
5401	if (adapter->vf_data[vf].pf_vlan)
5402		igb_ndo_set_vf_vlan(adapter->netdev, vf,
5403				    adapter->vf_data[vf].pf_vlan,
5404				    adapter->vf_data[vf].pf_qos);
5405	else
5406		igb_clear_vf_vfta(adapter, vf);
5407
5408	/* reset multicast table array for vf */
5409	adapter->vf_data[vf].num_vf_mc_hashes = 0;
5410
5411	/* Flush and reset the mta with the new values */
5412	igb_set_rx_mode(adapter->netdev);
5413}
5414
5415static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
5416{
5417	unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5418
5419	/* generate a new mac address as we were hotplug removed/added */
5420	if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
5421		eth_random_addr(vf_mac);
5422
5423	/* process remaining reset events */
5424	igb_vf_reset(adapter, vf);
5425}
5426
5427static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
5428{
5429	struct e1000_hw *hw = &adapter->hw;
5430	unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5431	int rar_entry = hw->mac.rar_entry_count - (vf + 1);
5432	u32 reg, msgbuf[3];
5433	u8 *addr = (u8 *)(&msgbuf[1]);
5434
5435	/* process all the same items cleared in a function level reset */
5436	igb_vf_reset(adapter, vf);
5437
5438	/* set vf mac address */
5439	igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
5440
5441	/* enable transmit and receive for vf */
5442	reg = rd32(E1000_VFTE);
5443	wr32(E1000_VFTE, reg | (1 << vf));
5444	reg = rd32(E1000_VFRE);
5445	wr32(E1000_VFRE, reg | (1 << vf));
5446
5447	adapter->vf_data[vf].flags |= IGB_VF_FLAG_CTS;
5448
5449	/* reply to reset with ack and vf mac address */
5450	msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
5451	memcpy(addr, vf_mac, 6);
5452	igb_write_mbx(hw, msgbuf, 3, vf);
5453}
5454
5455static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
5456{
5457	/*
5458	 * The VF MAC Address is stored in a packed array of bytes
5459	 * starting at the second 32 bit word of the msg array
5460	 */
5461	unsigned char *addr = (char *)&msg[1];
5462	int err = -1;
5463
5464	if (is_valid_ether_addr(addr))
5465		err = igb_set_vf_mac(adapter, vf, addr);
5466
5467	return err;
5468}
5469
5470static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
5471{
5472	struct e1000_hw *hw = &adapter->hw;
5473	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5474	u32 msg = E1000_VT_MSGTYPE_NACK;
5475
5476	/* if device isn't clear to send it shouldn't be reading either */
5477	if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
5478	    time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
5479		igb_write_mbx(hw, &msg, 1, vf);
5480		vf_data->last_nack = jiffies;
5481	}
5482}
5483
5484static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
5485{
5486	struct pci_dev *pdev = adapter->pdev;
5487	u32 msgbuf[E1000_VFMAILBOX_SIZE];
5488	struct e1000_hw *hw = &adapter->hw;
5489	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5490	s32 retval;
5491
5492	retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
5493
5494	if (retval) {
5495		/* if receive failed revoke VF CTS stats and restart init */
5496		dev_err(&pdev->dev, "Error receiving message from VF\n");
5497		vf_data->flags &= ~IGB_VF_FLAG_CTS;
5498		if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5499			return;
5500		goto out;
5501	}
5502
5503	/* this is a message we already processed, do nothing */
5504	if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
5505		return;
5506
5507	/*
5508	 * until the vf completes a reset it should not be
5509	 * allowed to start any configuration.
5510	 */
5511
5512	if (msgbuf[0] == E1000_VF_RESET) {
5513		igb_vf_reset_msg(adapter, vf);
5514		return;
5515	}
5516
5517	if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
5518		if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5519			return;
5520		retval = -1;
5521		goto out;
5522	}
5523
5524	switch ((msgbuf[0] & 0xFFFF)) {
5525	case E1000_VF_SET_MAC_ADDR:
5526		retval = -EINVAL;
5527		if (!(vf_data->flags & IGB_VF_FLAG_PF_SET_MAC))
5528			retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
5529		else
5530			dev_warn(&pdev->dev,
5531				 "VF %d attempted to override administratively "
5532				 "set MAC address\nReload the VF driver to "
5533				 "resume operations\n", vf);
5534		break;
5535	case E1000_VF_SET_PROMISC:
5536		retval = igb_set_vf_promisc(adapter, msgbuf, vf);
5537		break;
5538	case E1000_VF_SET_MULTICAST:
5539		retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
5540		break;
5541	case E1000_VF_SET_LPE:
5542		retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
5543		break;
5544	case E1000_VF_SET_VLAN:
5545		retval = -1;
5546		if (vf_data->pf_vlan)
5547			dev_warn(&pdev->dev,
5548				 "VF %d attempted to override administratively "
5549				 "set VLAN tag\nReload the VF driver to "
5550				 "resume operations\n", vf);
5551		else
5552			retval = igb_set_vf_vlan(adapter, msgbuf, vf);
5553		break;
5554	default:
5555		dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
5556		retval = -1;
5557		break;
5558	}
5559
5560	msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
5561out:
5562	/* notify the VF of the results of what it sent us */
5563	if (retval)
5564		msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
5565	else
5566		msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
5567
5568	igb_write_mbx(hw, msgbuf, 1, vf);
5569}
5570
5571static void igb_msg_task(struct igb_adapter *adapter)
5572{
5573	struct e1000_hw *hw = &adapter->hw;
5574	u32 vf;
5575
5576	for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
5577		/* process any reset requests */
5578		if (!igb_check_for_rst(hw, vf))
5579			igb_vf_reset_event(adapter, vf);
5580
5581		/* process any messages pending */
5582		if (!igb_check_for_msg(hw, vf))
5583			igb_rcv_msg_from_vf(adapter, vf);
5584
5585		/* process any acks */
5586		if (!igb_check_for_ack(hw, vf))
5587			igb_rcv_ack_from_vf(adapter, vf);
5588	}
5589}
5590
5591/**
5592 *  igb_set_uta - Set unicast filter table address
5593 *  @adapter: board private structure
5594 *
5595 *  The unicast table address is a register array of 32-bit registers.
5596 *  The table is meant to be used in a way similar to how the MTA is used
5597 *  however due to certain limitations in the hardware it is necessary to
5598 *  set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscuous
5599 *  enable bit to allow vlan tag stripping when promiscuous mode is enabled
5600 **/
5601static void igb_set_uta(struct igb_adapter *adapter)
5602{
5603	struct e1000_hw *hw = &adapter->hw;
5604	int i;
5605
5606	/* The UTA table only exists on 82576 hardware and newer */
5607	if (hw->mac.type < e1000_82576)
5608		return;
5609
5610	/* we only need to do this if VMDq is enabled */
5611	if (!adapter->vfs_allocated_count)
5612		return;
5613
5614	for (i = 0; i < hw->mac.uta_reg_count; i++)
5615		array_wr32(E1000_UTA, i, ~0);
5616}
5617
5618/**
5619 * igb_intr_msi - Interrupt Handler
5620 * @irq: interrupt number
5621 * @data: pointer to a network interface device structure
5622 **/
5623static irqreturn_t igb_intr_msi(int irq, void *data)
5624{
5625	struct igb_adapter *adapter = data;
5626	struct igb_q_vector *q_vector = adapter->q_vector[0];
5627	struct e1000_hw *hw = &adapter->hw;
5628	/* read ICR disables interrupts using IAM */
5629	u32 icr = rd32(E1000_ICR);
5630
5631	igb_write_itr(q_vector);
5632
5633	if (icr & E1000_ICR_DRSTA)
5634		schedule_work(&adapter->reset_task);
5635
5636	if (icr & E1000_ICR_DOUTSYNC) {
5637		/* HW is reporting DMA is out of sync */
5638		adapter->stats.doosync++;
5639	}
5640
5641	if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5642		hw->mac.get_link_status = 1;
5643		if (!test_bit(__IGB_DOWN, &adapter->state))
5644			mod_timer(&adapter->watchdog_timer, jiffies + 1);
5645	}
5646
5647	napi_schedule(&q_vector->napi);
5648
5649	return IRQ_HANDLED;
5650}
5651
5652/**
5653 * igb_intr - Legacy Interrupt Handler
5654 * @irq: interrupt number
5655 * @data: pointer to a network interface device structure
5656 **/
5657static irqreturn_t igb_intr(int irq, void *data)
5658{
5659	struct igb_adapter *adapter = data;
5660	struct igb_q_vector *q_vector = adapter->q_vector[0];
5661	struct e1000_hw *hw = &adapter->hw;
5662	/* Interrupt Auto-Mask...upon reading ICR, interrupts are masked.  No
5663	 * need for the IMC write */
5664	u32 icr = rd32(E1000_ICR);
5665
5666	/* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
5667	 * not set, then the adapter didn't send an interrupt */
5668	if (!(icr & E1000_ICR_INT_ASSERTED))
5669		return IRQ_NONE;
5670
5671	igb_write_itr(q_vector);
5672
5673	if (icr & E1000_ICR_DRSTA)
5674		schedule_work(&adapter->reset_task);
5675
5676	if (icr & E1000_ICR_DOUTSYNC) {
5677		/* HW is reporting DMA is out of sync */
5678		adapter->stats.doosync++;
5679	}
5680
5681	if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5682		hw->mac.get_link_status = 1;
5683		/* guard against interrupt when we're going down */
5684		if (!test_bit(__IGB_DOWN, &adapter->state))
5685			mod_timer(&adapter->watchdog_timer, jiffies + 1);
5686	}
5687
5688	napi_schedule(&q_vector->napi);
5689
5690	return IRQ_HANDLED;
5691}
5692
5693static void igb_ring_irq_enable(struct igb_q_vector *q_vector)
5694{
5695	struct igb_adapter *adapter = q_vector->adapter;
5696	struct e1000_hw *hw = &adapter->hw;
5697
5698	if ((q_vector->rx.ring && (adapter->rx_itr_setting & 3)) ||
5699	    (!q_vector->rx.ring && (adapter->tx_itr_setting & 3))) {
5700		if ((adapter->num_q_vectors == 1) && !adapter->vf_data)
5701			igb_set_itr(q_vector);
5702		else
5703			igb_update_ring_itr(q_vector);
5704	}
5705
5706	if (!test_bit(__IGB_DOWN, &adapter->state)) {
5707		if (adapter->msix_entries)
5708			wr32(E1000_EIMS, q_vector->eims_value);
5709		else
5710			igb_irq_enable(adapter);
5711	}
5712}
5713
5714/**
5715 * igb_poll - NAPI Rx polling callback
5716 * @napi: napi polling structure
5717 * @budget: count of how many packets we should handle
5718 **/
5719static int igb_poll(struct napi_struct *napi, int budget)
5720{
5721	struct igb_q_vector *q_vector = container_of(napi,
5722	                                             struct igb_q_vector,
5723	                                             napi);
5724	bool clean_complete = true;
5725
5726#ifdef CONFIG_IGB_DCA
5727	if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
5728		igb_update_dca(q_vector);
5729#endif
5730	if (q_vector->tx.ring)
5731		clean_complete = igb_clean_tx_irq(q_vector);
5732
5733	if (q_vector->rx.ring)
5734		clean_complete &= igb_clean_rx_irq(q_vector, budget);
5735
5736	/* If all work not completed, return budget and keep polling */
5737	if (!clean_complete)
5738		return budget;
5739
5740	/* If not enough Rx work done, exit the polling mode */
5741	napi_complete(napi);
5742	igb_ring_irq_enable(q_vector);
5743
5744	return 0;
5745}
5746
5747#ifdef CONFIG_IGB_PTP
5748/**
5749 * igb_tx_hwtstamp - utility function which checks for TX time stamp
5750 * @q_vector: pointer to q_vector containing needed info
5751 * @buffer: pointer to igb_tx_buffer structure
5752 *
5753 * If we were asked to do hardware stamping and such a time stamp is
5754 * available, then it must have been for this skb here because we only
5755 * allow only one such packet into the queue.
5756 */
5757static void igb_tx_hwtstamp(struct igb_q_vector *q_vector,
5758			    struct igb_tx_buffer *buffer_info)
5759{
5760	struct igb_adapter *adapter = q_vector->adapter;
5761	struct e1000_hw *hw = &adapter->hw;
5762	struct skb_shared_hwtstamps shhwtstamps;
5763	u64 regval;
5764
5765	/* if skb does not support hw timestamp or TX stamp not valid exit */
5766	if (likely(!(buffer_info->tx_flags & IGB_TX_FLAGS_TSTAMP)) ||
5767	    !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
5768		return;
5769
5770	regval = rd32(E1000_TXSTMPL);
5771	regval |= (u64)rd32(E1000_TXSTMPH) << 32;
5772
5773	igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
5774	skb_tstamp_tx(buffer_info->skb, &shhwtstamps);
5775}
5776
5777#endif
5778/**
5779 * igb_clean_tx_irq - Reclaim resources after transmit completes
5780 * @q_vector: pointer to q_vector containing needed info
5781 *
5782 * returns true if ring is completely cleaned
5783 **/
5784static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
5785{
5786	struct igb_adapter *adapter = q_vector->adapter;
5787	struct igb_ring *tx_ring = q_vector->tx.ring;
5788	struct igb_tx_buffer *tx_buffer;
5789	union e1000_adv_tx_desc *tx_desc, *eop_desc;
5790	unsigned int total_bytes = 0, total_packets = 0;
5791	unsigned int budget = q_vector->tx.work_limit;
5792	unsigned int i = tx_ring->next_to_clean;
5793
5794	if (test_bit(__IGB_DOWN, &adapter->state))
5795		return true;
5796
5797	tx_buffer = &tx_ring->tx_buffer_info[i];
5798	tx_desc = IGB_TX_DESC(tx_ring, i);
5799	i -= tx_ring->count;
5800
5801	for (; budget; budget--) {
5802		eop_desc = tx_buffer->next_to_watch;
5803
5804		/* prevent any other reads prior to eop_desc */
5805		rmb();
5806
5807		/* if next_to_watch is not set then there is no work pending */
5808		if (!eop_desc)
5809			break;
5810
5811		/* if DD is not set pending work has not been completed */
5812		if (!(eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)))
5813			break;
5814
5815		/* clear next_to_watch to prevent false hangs */
5816		tx_buffer->next_to_watch = NULL;
5817
5818		/* update the statistics for this packet */
5819		total_bytes += tx_buffer->bytecount;
5820		total_packets += tx_buffer->gso_segs;
5821
5822#ifdef CONFIG_IGB_PTP
5823		/* retrieve hardware timestamp */
5824		igb_tx_hwtstamp(q_vector, tx_buffer);
5825
5826#endif
5827		/* free the skb */
5828		dev_kfree_skb_any(tx_buffer->skb);
5829		tx_buffer->skb = NULL;
5830
5831		/* unmap skb header data */
5832		dma_unmap_single(tx_ring->dev,
5833				 tx_buffer->dma,
5834				 tx_buffer->length,
5835				 DMA_TO_DEVICE);
5836
5837		/* clear last DMA location and unmap remaining buffers */
5838		while (tx_desc != eop_desc) {
5839			tx_buffer->dma = 0;
5840
5841			tx_buffer++;
5842			tx_desc++;
5843			i++;
5844			if (unlikely(!i)) {
5845				i -= tx_ring->count;
5846				tx_buffer = tx_ring->tx_buffer_info;
5847				tx_desc = IGB_TX_DESC(tx_ring, 0);
5848			}
5849
5850			/* unmap any remaining paged data */
5851			if (tx_buffer->dma) {
5852				dma_unmap_page(tx_ring->dev,
5853					       tx_buffer->dma,
5854					       tx_buffer->length,
5855					       DMA_TO_DEVICE);
5856			}
5857		}
5858
5859		/* clear last DMA location */
5860		tx_buffer->dma = 0;
5861
5862		/* move us one more past the eop_desc for start of next pkt */
5863		tx_buffer++;
5864		tx_desc++;
5865		i++;
5866		if (unlikely(!i)) {
5867			i -= tx_ring->count;
5868			tx_buffer = tx_ring->tx_buffer_info;
5869			tx_desc = IGB_TX_DESC(tx_ring, 0);
5870		}
5871	}
5872
5873	netdev_tx_completed_queue(txring_txq(tx_ring),
5874				  total_packets, total_bytes);
5875	i += tx_ring->count;
5876	tx_ring->next_to_clean = i;
5877	u64_stats_update_begin(&tx_ring->tx_syncp);
5878	tx_ring->tx_stats.bytes += total_bytes;
5879	tx_ring->tx_stats.packets += total_packets;
5880	u64_stats_update_end(&tx_ring->tx_syncp);
5881	q_vector->tx.total_bytes += total_bytes;
5882	q_vector->tx.total_packets += total_packets;
5883
5884	if (test_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags)) {
5885		struct e1000_hw *hw = &adapter->hw;
5886
5887		eop_desc = tx_buffer->next_to_watch;
5888
5889		/* Detect a transmit hang in hardware, this serializes the
5890		 * check with the clearing of time_stamp and movement of i */
5891		clear_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
5892		if (eop_desc &&
5893		    time_after(jiffies, tx_buffer->time_stamp +
5894			       (adapter->tx_timeout_factor * HZ)) &&
5895		    !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
5896
5897			/* detected Tx unit hang */
5898			dev_err(tx_ring->dev,
5899				"Detected Tx Unit Hang\n"
5900				"  Tx Queue             <%d>\n"
5901				"  TDH                  <%x>\n"
5902				"  TDT                  <%x>\n"
5903				"  next_to_use          <%x>\n"
5904				"  next_to_clean        <%x>\n"
5905				"buffer_info[next_to_clean]\n"
5906				"  time_stamp           <%lx>\n"
5907				"  next_to_watch        <%p>\n"
5908				"  jiffies              <%lx>\n"
5909				"  desc.status          <%x>\n",
5910				tx_ring->queue_index,
5911				rd32(E1000_TDH(tx_ring->reg_idx)),
5912				readl(tx_ring->tail),
5913				tx_ring->next_to_use,
5914				tx_ring->next_to_clean,
5915				tx_buffer->time_stamp,
5916				eop_desc,
5917				jiffies,
5918				eop_desc->wb.status);
5919			netif_stop_subqueue(tx_ring->netdev,
5920					    tx_ring->queue_index);
5921
5922			/* we are about to reset, no point in enabling stuff */
5923			return true;
5924		}
5925	}
5926
5927	if (unlikely(total_packets &&
5928		     netif_carrier_ok(tx_ring->netdev) &&
5929		     igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
5930		/* Make sure that anybody stopping the queue after this
5931		 * sees the new next_to_clean.
5932		 */
5933		smp_mb();
5934		if (__netif_subqueue_stopped(tx_ring->netdev,
5935					     tx_ring->queue_index) &&
5936		    !(test_bit(__IGB_DOWN, &adapter->state))) {
5937			netif_wake_subqueue(tx_ring->netdev,
5938					    tx_ring->queue_index);
5939
5940			u64_stats_update_begin(&tx_ring->tx_syncp);
5941			tx_ring->tx_stats.restart_queue++;
5942			u64_stats_update_end(&tx_ring->tx_syncp);
5943		}
5944	}
5945
5946	return !!budget;
5947}
5948
5949static inline void igb_rx_checksum(struct igb_ring *ring,
5950				   union e1000_adv_rx_desc *rx_desc,
5951				   struct sk_buff *skb)
5952{
5953	skb_checksum_none_assert(skb);
5954
5955	/* Ignore Checksum bit is set */
5956	if (igb_test_staterr(rx_desc, E1000_RXD_STAT_IXSM))
5957		return;
5958
5959	/* Rx checksum disabled via ethtool */
5960	if (!(ring->netdev->features & NETIF_F_RXCSUM))
5961		return;
5962
5963	/* TCP/UDP checksum error bit is set */
5964	if (igb_test_staterr(rx_desc,
5965			     E1000_RXDEXT_STATERR_TCPE |
5966			     E1000_RXDEXT_STATERR_IPE)) {
5967		/*
5968		 * work around errata with sctp packets where the TCPE aka
5969		 * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
5970		 * packets, (aka let the stack check the crc32c)
5971		 */
5972		if (!((skb->len == 60) &&
5973		      test_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags))) {
5974			u64_stats_update_begin(&ring->rx_syncp);
5975			ring->rx_stats.csum_err++;
5976			u64_stats_update_end(&ring->rx_syncp);
5977		}
5978		/* let the stack verify checksum errors */
5979		return;
5980	}
5981	/* It must be a TCP or UDP packet with a valid checksum */
5982	if (igb_test_staterr(rx_desc, E1000_RXD_STAT_TCPCS |
5983				      E1000_RXD_STAT_UDPCS))
5984		skb->ip_summed = CHECKSUM_UNNECESSARY;
5985
5986	dev_dbg(ring->dev, "cksum success: bits %08X\n",
5987		le32_to_cpu(rx_desc->wb.upper.status_error));
5988}
5989
5990static inline void igb_rx_hash(struct igb_ring *ring,
5991			       union e1000_adv_rx_desc *rx_desc,
5992			       struct sk_buff *skb)
5993{
5994	if (ring->netdev->features & NETIF_F_RXHASH)
5995		skb->rxhash = le32_to_cpu(rx_desc->wb.lower.hi_dword.rss);
5996}
5997
5998#ifdef CONFIG_IGB_PTP
5999static void igb_rx_hwtstamp(struct igb_q_vector *q_vector,
6000			    union e1000_adv_rx_desc *rx_desc,
6001			    struct sk_buff *skb)
6002{
6003	struct igb_adapter *adapter = q_vector->adapter;
6004	struct e1000_hw *hw = &adapter->hw;
6005	u64 regval;
6006
6007	if (!igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP |
6008				       E1000_RXDADV_STAT_TS))
6009		return;
6010
6011	/*
6012	 * If this bit is set, then the RX registers contain the time stamp. No
6013	 * other packet will be time stamped until we read these registers, so
6014	 * read the registers to make them available again. Because only one
6015	 * packet can be time stamped at a time, we know that the register
6016	 * values must belong to this one here and therefore we don't need to
6017	 * compare any of the additional attributes stored for it.
6018	 *
6019	 * If nothing went wrong, then it should have a shared tx_flags that we
6020	 * can turn into a skb_shared_hwtstamps.
6021	 */
6022	if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) {
6023		u32 *stamp = (u32 *)skb->data;
6024		regval = le32_to_cpu(*(stamp + 2));
6025		regval |= (u64)le32_to_cpu(*(stamp + 3)) << 32;
6026		skb_pull(skb, IGB_TS_HDR_LEN);
6027	} else {
6028		if(!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
6029			return;
6030
6031		regval = rd32(E1000_RXSTMPL);
6032		regval |= (u64)rd32(E1000_RXSTMPH) << 32;
6033	}
6034
6035	igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
6036}
6037
6038#endif
6039static void igb_rx_vlan(struct igb_ring *ring,
6040			union e1000_adv_rx_desc *rx_desc,
6041			struct sk_buff *skb)
6042{
6043	if (igb_test_staterr(rx_desc, E1000_RXD_STAT_VP)) {
6044		u16 vid;
6045		if (igb_test_staterr(rx_desc, E1000_RXDEXT_STATERR_LB) &&
6046		    test_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &ring->flags))
6047			vid = be16_to_cpu(rx_desc->wb.upper.vlan);
6048		else
6049			vid = le16_to_cpu(rx_desc->wb.upper.vlan);
6050
6051		__vlan_hwaccel_put_tag(skb, vid);
6052	}
6053}
6054
6055static inline u16 igb_get_hlen(union e1000_adv_rx_desc *rx_desc)
6056{
6057	/* HW will not DMA in data larger than the given buffer, even if it
6058	 * parses the (NFS, of course) header to be larger.  In that case, it
6059	 * fills the header buffer and spills the rest into the page.
6060	 */
6061	u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
6062	           E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
6063	if (hlen > IGB_RX_HDR_LEN)
6064		hlen = IGB_RX_HDR_LEN;
6065	return hlen;
6066}
6067
6068static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, int budget)
6069{
6070	struct igb_ring *rx_ring = q_vector->rx.ring;
6071	union e1000_adv_rx_desc *rx_desc;
6072	const int current_node = numa_node_id();
6073	unsigned int total_bytes = 0, total_packets = 0;
6074	u16 cleaned_count = igb_desc_unused(rx_ring);
6075	u16 i = rx_ring->next_to_clean;
6076
6077	rx_desc = IGB_RX_DESC(rx_ring, i);
6078
6079	while (igb_test_staterr(rx_desc, E1000_RXD_STAT_DD)) {
6080		struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
6081		struct sk_buff *skb = buffer_info->skb;
6082		union e1000_adv_rx_desc *next_rxd;
6083
6084		buffer_info->skb = NULL;
6085		prefetch(skb->data);
6086
6087		i++;
6088		if (i == rx_ring->count)
6089			i = 0;
6090
6091		next_rxd = IGB_RX_DESC(rx_ring, i);
6092		prefetch(next_rxd);
6093
6094		/*
6095		 * This memory barrier is needed to keep us from reading
6096		 * any other fields out of the rx_desc until we know the
6097		 * RXD_STAT_DD bit is set
6098		 */
6099		rmb();
6100
6101		if (!skb_is_nonlinear(skb)) {
6102			__skb_put(skb, igb_get_hlen(rx_desc));
6103			dma_unmap_single(rx_ring->dev, buffer_info->dma,
6104					 IGB_RX_HDR_LEN,
6105					 DMA_FROM_DEVICE);
6106			buffer_info->dma = 0;
6107		}
6108
6109		if (rx_desc->wb.upper.length) {
6110			u16 length = le16_to_cpu(rx_desc->wb.upper.length);
6111
6112			skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
6113						buffer_info->page,
6114						buffer_info->page_offset,
6115						length);
6116
6117			skb->len += length;
6118			skb->data_len += length;
6119			skb->truesize += PAGE_SIZE / 2;
6120
6121			if ((page_count(buffer_info->page) != 1) ||
6122			    (page_to_nid(buffer_info->page) != current_node))
6123				buffer_info->page = NULL;
6124			else
6125				get_page(buffer_info->page);
6126
6127			dma_unmap_page(rx_ring->dev, buffer_info->page_dma,
6128				       PAGE_SIZE / 2, DMA_FROM_DEVICE);
6129			buffer_info->page_dma = 0;
6130		}
6131
6132		if (!igb_test_staterr(rx_desc, E1000_RXD_STAT_EOP)) {
6133			struct igb_rx_buffer *next_buffer;
6134			next_buffer = &rx_ring->rx_buffer_info[i];
6135			buffer_info->skb = next_buffer->skb;
6136			buffer_info->dma = next_buffer->dma;
6137			next_buffer->skb = skb;
6138			next_buffer->dma = 0;
6139			goto next_desc;
6140		}
6141
6142		if (unlikely((igb_test_staterr(rx_desc,
6143					       E1000_RXDEXT_ERR_FRAME_ERR_MASK))
6144			     && !(rx_ring->netdev->features & NETIF_F_RXALL))) {
6145			dev_kfree_skb_any(skb);
6146			goto next_desc;
6147		}
6148
6149#ifdef CONFIG_IGB_PTP
6150		igb_rx_hwtstamp(q_vector, rx_desc, skb);
6151#endif
6152		igb_rx_hash(rx_ring, rx_desc, skb);
6153		igb_rx_checksum(rx_ring, rx_desc, skb);
6154		igb_rx_vlan(rx_ring, rx_desc, skb);
6155
6156		total_bytes += skb->len;
6157		total_packets++;
6158
6159		skb->protocol = eth_type_trans(skb, rx_ring->netdev);
6160
6161		napi_gro_receive(&q_vector->napi, skb);
6162
6163		budget--;
6164next_desc:
6165		if (!budget)
6166			break;
6167
6168		cleaned_count++;
6169		/* return some buffers to hardware, one at a time is too slow */
6170		if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
6171			igb_alloc_rx_buffers(rx_ring, cleaned_count);
6172			cleaned_count = 0;
6173		}
6174
6175		/* use prefetched values */
6176		rx_desc = next_rxd;
6177	}
6178
6179	rx_ring->next_to_clean = i;
6180	u64_stats_update_begin(&rx_ring->rx_syncp);
6181	rx_ring->rx_stats.packets += total_packets;
6182	rx_ring->rx_stats.bytes += total_bytes;
6183	u64_stats_update_end(&rx_ring->rx_syncp);
6184	q_vector->rx.total_packets += total_packets;
6185	q_vector->rx.total_bytes += total_bytes;
6186
6187	if (cleaned_count)
6188		igb_alloc_rx_buffers(rx_ring, cleaned_count);
6189
6190	return !!budget;
6191}
6192
6193static bool igb_alloc_mapped_skb(struct igb_ring *rx_ring,
6194				 struct igb_rx_buffer *bi)
6195{
6196	struct sk_buff *skb = bi->skb;
6197	dma_addr_t dma = bi->dma;
6198
6199	if (dma)
6200		return true;
6201
6202	if (likely(!skb)) {
6203		skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
6204						IGB_RX_HDR_LEN);
6205		bi->skb = skb;
6206		if (!skb) {
6207			rx_ring->rx_stats.alloc_failed++;
6208			return false;
6209		}
6210
6211		/* initialize skb for ring */
6212		skb_record_rx_queue(skb, rx_ring->queue_index);
6213	}
6214
6215	dma = dma_map_single(rx_ring->dev, skb->data,
6216			     IGB_RX_HDR_LEN, DMA_FROM_DEVICE);
6217
6218	if (dma_mapping_error(rx_ring->dev, dma)) {
6219		rx_ring->rx_stats.alloc_failed++;
6220		return false;
6221	}
6222
6223	bi->dma = dma;
6224	return true;
6225}
6226
6227static bool igb_alloc_mapped_page(struct igb_ring *rx_ring,
6228				  struct igb_rx_buffer *bi)
6229{
6230	struct page *page = bi->page;
6231	dma_addr_t page_dma = bi->page_dma;
6232	unsigned int page_offset = bi->page_offset ^ (PAGE_SIZE / 2);
6233
6234	if (page_dma)
6235		return true;
6236
6237	if (!page) {
6238		page = alloc_page(GFP_ATOMIC | __GFP_COLD);
6239		bi->page = page;
6240		if (unlikely(!page)) {
6241			rx_ring->rx_stats.alloc_failed++;
6242			return false;
6243		}
6244	}
6245
6246	page_dma = dma_map_page(rx_ring->dev, page,
6247				page_offset, PAGE_SIZE / 2,
6248				DMA_FROM_DEVICE);
6249
6250	if (dma_mapping_error(rx_ring->dev, page_dma)) {
6251		rx_ring->rx_stats.alloc_failed++;
6252		return false;
6253	}
6254
6255	bi->page_dma = page_dma;
6256	bi->page_offset = page_offset;
6257	return true;
6258}
6259
6260/**
6261 * igb_alloc_rx_buffers - Replace used receive buffers; packet split
6262 * @adapter: address of board private structure
6263 **/
6264void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count)
6265{
6266	union e1000_adv_rx_desc *rx_desc;
6267	struct igb_rx_buffer *bi;
6268	u16 i = rx_ring->next_to_use;
6269
6270	rx_desc = IGB_RX_DESC(rx_ring, i);
6271	bi = &rx_ring->rx_buffer_info[i];
6272	i -= rx_ring->count;
6273
6274	while (cleaned_count--) {
6275		if (!igb_alloc_mapped_skb(rx_ring, bi))
6276			break;
6277
6278		/* Refresh the desc even if buffer_addrs didn't change
6279		 * because each write-back erases this info. */
6280		rx_desc->read.hdr_addr = cpu_to_le64(bi->dma);
6281
6282		if (!igb_alloc_mapped_page(rx_ring, bi))
6283			break;
6284
6285		rx_desc->read.pkt_addr = cpu_to_le64(bi->page_dma);
6286
6287		rx_desc++;
6288		bi++;
6289		i++;
6290		if (unlikely(!i)) {
6291			rx_desc = IGB_RX_DESC(rx_ring, 0);
6292			bi = rx_ring->rx_buffer_info;
6293			i -= rx_ring->count;
6294		}
6295
6296		/* clear the hdr_addr for the next_to_use descriptor */
6297		rx_desc->read.hdr_addr = 0;
6298	}
6299
6300	i += rx_ring->count;
6301
6302	if (rx_ring->next_to_use != i) {
6303		rx_ring->next_to_use = i;
6304
6305		/* Force memory writes to complete before letting h/w
6306		 * know there are new descriptors to fetch.  (Only
6307		 * applicable for weak-ordered memory model archs,
6308		 * such as IA-64). */
6309		wmb();
6310		writel(i, rx_ring->tail);
6311	}
6312}
6313
6314/**
6315 * igb_mii_ioctl -
6316 * @netdev:
6317 * @ifreq:
6318 * @cmd:
6319 **/
6320static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6321{
6322	struct igb_adapter *adapter = netdev_priv(netdev);
6323	struct mii_ioctl_data *data = if_mii(ifr);
6324
6325	if (adapter->hw.phy.media_type != e1000_media_type_copper)
6326		return -EOPNOTSUPP;
6327
6328	switch (cmd) {
6329	case SIOCGMIIPHY:
6330		data->phy_id = adapter->hw.phy.addr;
6331		break;
6332	case SIOCGMIIREG:
6333		if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
6334		                     &data->val_out))
6335			return -EIO;
6336		break;
6337	case SIOCSMIIREG:
6338	default:
6339		return -EOPNOTSUPP;
6340	}
6341	return 0;
6342}
6343
6344/**
6345 * igb_hwtstamp_ioctl - control hardware time stamping
6346 * @netdev:
6347 * @ifreq:
6348 * @cmd:
6349 *
6350 * Outgoing time stamping can be enabled and disabled. Play nice and
6351 * disable it when requested, although it shouldn't case any overhead
6352 * when no packet needs it. At most one packet in the queue may be
6353 * marked for time stamping, otherwise it would be impossible to tell
6354 * for sure to which packet the hardware time stamp belongs.
6355 *
6356 * Incoming time stamping has to be configured via the hardware
6357 * filters. Not all combinations are supported, in particular event
6358 * type has to be specified. Matching the kind of event packet is
6359 * not supported, with the exception of "all V2 events regardless of
6360 * level 2 or 4".
6361 *
6362 **/
6363static int igb_hwtstamp_ioctl(struct net_device *netdev,
6364			      struct ifreq *ifr, int cmd)
6365{
6366	struct igb_adapter *adapter = netdev_priv(netdev);
6367	struct e1000_hw *hw = &adapter->hw;
6368	struct hwtstamp_config config;
6369	u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
6370	u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6371	u32 tsync_rx_cfg = 0;
6372	bool is_l4 = false;
6373	bool is_l2 = false;
6374	u32 regval;
6375
6376	if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
6377		return -EFAULT;
6378
6379	/* reserved for future extensions */
6380	if (config.flags)
6381		return -EINVAL;
6382
6383	switch (config.tx_type) {
6384	case HWTSTAMP_TX_OFF:
6385		tsync_tx_ctl = 0;
6386	case HWTSTAMP_TX_ON:
6387		break;
6388	default:
6389		return -ERANGE;
6390	}
6391
6392	switch (config.rx_filter) {
6393	case HWTSTAMP_FILTER_NONE:
6394		tsync_rx_ctl = 0;
6395		break;
6396	case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
6397	case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
6398	case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
6399	case HWTSTAMP_FILTER_ALL:
6400		/*
6401		 * register TSYNCRXCFG must be set, therefore it is not
6402		 * possible to time stamp both Sync and Delay_Req messages
6403		 * => fall back to time stamping all packets
6404		 */
6405		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6406		config.rx_filter = HWTSTAMP_FILTER_ALL;
6407		break;
6408	case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
6409		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6410		tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
6411		is_l4 = true;
6412		break;
6413	case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
6414		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6415		tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
6416		is_l4 = true;
6417		break;
6418	case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
6419	case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
6420		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6421		tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
6422		is_l2 = true;
6423		is_l4 = true;
6424		config.rx_filter = HWTSTAMP_FILTER_SOME;
6425		break;
6426	case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
6427	case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
6428		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6429		tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
6430		is_l2 = true;
6431		is_l4 = true;
6432		config.rx_filter = HWTSTAMP_FILTER_SOME;
6433		break;
6434	case HWTSTAMP_FILTER_PTP_V2_EVENT:
6435	case HWTSTAMP_FILTER_PTP_V2_SYNC:
6436	case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
6437		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
6438		config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
6439		is_l2 = true;
6440		is_l4 = true;
6441		break;
6442	default:
6443		return -ERANGE;
6444	}
6445
6446	if (hw->mac.type == e1000_82575) {
6447		if (tsync_rx_ctl | tsync_tx_ctl)
6448			return -EINVAL;
6449		return 0;
6450	}
6451
6452	/*
6453	 * Per-packet timestamping only works if all packets are
6454	 * timestamped, so enable timestamping in all packets as
6455	 * long as one rx filter was configured.
6456	 */
6457	if ((hw->mac.type >= e1000_82580) && tsync_rx_ctl) {
6458		tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6459		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6460	}
6461
6462	/* enable/disable TX */
6463	regval = rd32(E1000_TSYNCTXCTL);
6464	regval &= ~E1000_TSYNCTXCTL_ENABLED;
6465	regval |= tsync_tx_ctl;
6466	wr32(E1000_TSYNCTXCTL, regval);
6467
6468	/* enable/disable RX */
6469	regval = rd32(E1000_TSYNCRXCTL);
6470	regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
6471	regval |= tsync_rx_ctl;
6472	wr32(E1000_TSYNCRXCTL, regval);
6473
6474	/* define which PTP packets are time stamped */
6475	wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
6476
6477	/* define ethertype filter for timestamped packets */
6478	if (is_l2)
6479		wr32(E1000_ETQF(3),
6480		                (E1000_ETQF_FILTER_ENABLE | /* enable filter */
6481		                 E1000_ETQF_1588 | /* enable timestamping */
6482		                 ETH_P_1588));     /* 1588 eth protocol type */
6483	else
6484		wr32(E1000_ETQF(3), 0);
6485
6486#define PTP_PORT 319
6487	/* L4 Queue Filter[3]: filter by destination port and protocol */
6488	if (is_l4) {
6489		u32 ftqf = (IPPROTO_UDP /* UDP */
6490			| E1000_FTQF_VF_BP /* VF not compared */
6491			| E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
6492			| E1000_FTQF_MASK); /* mask all inputs */
6493		ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
6494
6495		wr32(E1000_IMIR(3), htons(PTP_PORT));
6496		wr32(E1000_IMIREXT(3),
6497		     (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
6498		if (hw->mac.type == e1000_82576) {
6499			/* enable source port check */
6500			wr32(E1000_SPQF(3), htons(PTP_PORT));
6501			ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
6502		}
6503		wr32(E1000_FTQF(3), ftqf);
6504	} else {
6505		wr32(E1000_FTQF(3), E1000_FTQF_MASK);
6506	}
6507	wrfl();
6508
6509	adapter->hwtstamp_config = config;
6510
6511	/* clear TX/RX time stamp registers, just to be sure */
6512	regval = rd32(E1000_TXSTMPH);
6513	regval = rd32(E1000_RXSTMPH);
6514
6515	return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
6516		-EFAULT : 0;
6517}
6518
6519/**
6520 * igb_ioctl -
6521 * @netdev:
6522 * @ifreq:
6523 * @cmd:
6524 **/
6525static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6526{
6527	switch (cmd) {
6528	case SIOCGMIIPHY:
6529	case SIOCGMIIREG:
6530	case SIOCSMIIREG:
6531		return igb_mii_ioctl(netdev, ifr, cmd);
6532	case SIOCSHWTSTAMP:
6533		return igb_hwtstamp_ioctl(netdev, ifr, cmd);
6534	default:
6535		return -EOPNOTSUPP;
6536	}
6537}
6538
6539s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6540{
6541	struct igb_adapter *adapter = hw->back;
6542	u16 cap_offset;
6543
6544	cap_offset = adapter->pdev->pcie_cap;
6545	if (!cap_offset)
6546		return -E1000_ERR_CONFIG;
6547
6548	pci_read_config_word(adapter->pdev, cap_offset + reg, value);
6549
6550	return 0;
6551}
6552
6553s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6554{
6555	struct igb_adapter *adapter = hw->back;
6556	u16 cap_offset;
6557
6558	cap_offset = adapter->pdev->pcie_cap;
6559	if (!cap_offset)
6560		return -E1000_ERR_CONFIG;
6561
6562	pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
6563
6564	return 0;
6565}
6566
6567static void igb_vlan_mode(struct net_device *netdev, netdev_features_t features)
6568{
6569	struct igb_adapter *adapter = netdev_priv(netdev);
6570	struct e1000_hw *hw = &adapter->hw;
6571	u32 ctrl, rctl;
6572	bool enable = !!(features & NETIF_F_HW_VLAN_RX);
6573
6574	if (enable) {
6575		/* enable VLAN tag insert/strip */
6576		ctrl = rd32(E1000_CTRL);
6577		ctrl |= E1000_CTRL_VME;
6578		wr32(E1000_CTRL, ctrl);
6579
6580		/* Disable CFI check */
6581		rctl = rd32(E1000_RCTL);
6582		rctl &= ~E1000_RCTL_CFIEN;
6583		wr32(E1000_RCTL, rctl);
6584	} else {
6585		/* disable VLAN tag insert/strip */
6586		ctrl = rd32(E1000_CTRL);
6587		ctrl &= ~E1000_CTRL_VME;
6588		wr32(E1000_CTRL, ctrl);
6589	}
6590
6591	igb_rlpml_set(adapter);
6592}
6593
6594static int igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
6595{
6596	struct igb_adapter *adapter = netdev_priv(netdev);
6597	struct e1000_hw *hw = &adapter->hw;
6598	int pf_id = adapter->vfs_allocated_count;
6599
6600	/* attempt to add filter to vlvf array */
6601	igb_vlvf_set(adapter, vid, true, pf_id);
6602
6603	/* add the filter since PF can receive vlans w/o entry in vlvf */
6604	igb_vfta_set(hw, vid, true);
6605
6606	set_bit(vid, adapter->active_vlans);
6607
6608	return 0;
6609}
6610
6611static int igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
6612{
6613	struct igb_adapter *adapter = netdev_priv(netdev);
6614	struct e1000_hw *hw = &adapter->hw;
6615	int pf_id = adapter->vfs_allocated_count;
6616	s32 err;
6617
6618	/* remove vlan from VLVF table array */
6619	err = igb_vlvf_set(adapter, vid, false, pf_id);
6620
6621	/* if vid was not present in VLVF just remove it from table */
6622	if (err)
6623		igb_vfta_set(hw, vid, false);
6624
6625	clear_bit(vid, adapter->active_vlans);
6626
6627	return 0;
6628}
6629
6630static void igb_restore_vlan(struct igb_adapter *adapter)
6631{
6632	u16 vid;
6633
6634	igb_vlan_mode(adapter->netdev, adapter->netdev->features);
6635
6636	for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID)
6637		igb_vlan_rx_add_vid(adapter->netdev, vid);
6638}
6639
6640int igb_set_spd_dplx(struct igb_adapter *adapter, u32 spd, u8 dplx)
6641{
6642	struct pci_dev *pdev = adapter->pdev;
6643	struct e1000_mac_info *mac = &adapter->hw.mac;
6644
6645	mac->autoneg = 0;
6646
6647	/* Make sure dplx is at most 1 bit and lsb of speed is not set
6648	 * for the switch() below to work */
6649	if ((spd & 1) || (dplx & ~1))
6650		goto err_inval;
6651
6652	/* Fiber NIC's only allow 1000 Gbps Full duplex */
6653	if ((adapter->hw.phy.media_type == e1000_media_type_internal_serdes) &&
6654	    spd != SPEED_1000 &&
6655	    dplx != DUPLEX_FULL)
6656		goto err_inval;
6657
6658	switch (spd + dplx) {
6659	case SPEED_10 + DUPLEX_HALF:
6660		mac->forced_speed_duplex = ADVERTISE_10_HALF;
6661		break;
6662	case SPEED_10 + DUPLEX_FULL:
6663		mac->forced_speed_duplex = ADVERTISE_10_FULL;
6664		break;
6665	case SPEED_100 + DUPLEX_HALF:
6666		mac->forced_speed_duplex = ADVERTISE_100_HALF;
6667		break;
6668	case SPEED_100 + DUPLEX_FULL:
6669		mac->forced_speed_duplex = ADVERTISE_100_FULL;
6670		break;
6671	case SPEED_1000 + DUPLEX_FULL:
6672		mac->autoneg = 1;
6673		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
6674		break;
6675	case SPEED_1000 + DUPLEX_HALF: /* not supported */
6676	default:
6677		goto err_inval;
6678	}
6679	return 0;
6680
6681err_inval:
6682	dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
6683	return -EINVAL;
6684}
6685
6686static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake,
6687			  bool runtime)
6688{
6689	struct net_device *netdev = pci_get_drvdata(pdev);
6690	struct igb_adapter *adapter = netdev_priv(netdev);
6691	struct e1000_hw *hw = &adapter->hw;
6692	u32 ctrl, rctl, status;
6693	u32 wufc = runtime ? E1000_WUFC_LNKC : adapter->wol;
6694#ifdef CONFIG_PM
6695	int retval = 0;
6696#endif
6697
6698	netif_device_detach(netdev);
6699
6700	if (netif_running(netdev))
6701		__igb_close(netdev, true);
6702
6703	igb_clear_interrupt_scheme(adapter);
6704
6705#ifdef CONFIG_PM
6706	retval = pci_save_state(pdev);
6707	if (retval)
6708		return retval;
6709#endif
6710
6711	status = rd32(E1000_STATUS);
6712	if (status & E1000_STATUS_LU)
6713		wufc &= ~E1000_WUFC_LNKC;
6714
6715	if (wufc) {
6716		igb_setup_rctl(adapter);
6717		igb_set_rx_mode(netdev);
6718
6719		/* turn on all-multi mode if wake on multicast is enabled */
6720		if (wufc & E1000_WUFC_MC) {
6721			rctl = rd32(E1000_RCTL);
6722			rctl |= E1000_RCTL_MPE;
6723			wr32(E1000_RCTL, rctl);
6724		}
6725
6726		ctrl = rd32(E1000_CTRL);
6727		/* advertise wake from D3Cold */
6728		#define E1000_CTRL_ADVD3WUC 0x00100000
6729		/* phy power management enable */
6730		#define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
6731		ctrl |= E1000_CTRL_ADVD3WUC;
6732		wr32(E1000_CTRL, ctrl);
6733
6734		/* Allow time for pending master requests to run */
6735		igb_disable_pcie_master(hw);
6736
6737		wr32(E1000_WUC, E1000_WUC_PME_EN);
6738		wr32(E1000_WUFC, wufc);
6739	} else {
6740		wr32(E1000_WUC, 0);
6741		wr32(E1000_WUFC, 0);
6742	}
6743
6744	*enable_wake = wufc || adapter->en_mng_pt;
6745	if (!*enable_wake)
6746		igb_power_down_link(adapter);
6747	else
6748		igb_power_up_link(adapter);
6749
6750	/* Release control of h/w to f/w.  If f/w is AMT enabled, this
6751	 * would have already happened in close and is redundant. */
6752	igb_release_hw_control(adapter);
6753
6754	pci_disable_device(pdev);
6755
6756	return 0;
6757}
6758
6759#ifdef CONFIG_PM
6760#ifdef CONFIG_PM_SLEEP
6761static int igb_suspend(struct device *dev)
6762{
6763	int retval;
6764	bool wake;
6765	struct pci_dev *pdev = to_pci_dev(dev);
6766
6767	retval = __igb_shutdown(pdev, &wake, 0);
6768	if (retval)
6769		return retval;
6770
6771	if (wake) {
6772		pci_prepare_to_sleep(pdev);
6773	} else {
6774		pci_wake_from_d3(pdev, false);
6775		pci_set_power_state(pdev, PCI_D3hot);
6776	}
6777
6778	return 0;
6779}
6780#endif /* CONFIG_PM_SLEEP */
6781
6782static int igb_resume(struct device *dev)
6783{
6784	struct pci_dev *pdev = to_pci_dev(dev);
6785	struct net_device *netdev = pci_get_drvdata(pdev);
6786	struct igb_adapter *adapter = netdev_priv(netdev);
6787	struct e1000_hw *hw = &adapter->hw;
6788	u32 err;
6789
6790	pci_set_power_state(pdev, PCI_D0);
6791	pci_restore_state(pdev);
6792	pci_save_state(pdev);
6793
6794	err = pci_enable_device_mem(pdev);
6795	if (err) {
6796		dev_err(&pdev->dev,
6797			"igb: Cannot enable PCI device from suspend\n");
6798		return err;
6799	}
6800	pci_set_master(pdev);
6801
6802	pci_enable_wake(pdev, PCI_D3hot, 0);
6803	pci_enable_wake(pdev, PCI_D3cold, 0);
6804
6805	if (igb_init_interrupt_scheme(adapter)) {
6806		dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
6807		return -ENOMEM;
6808	}
6809
6810	igb_reset(adapter);
6811
6812	/* let the f/w know that the h/w is now under the control of the
6813	 * driver. */
6814	igb_get_hw_control(adapter);
6815
6816	wr32(E1000_WUS, ~0);
6817
6818	if (netdev->flags & IFF_UP) {
6819		err = __igb_open(netdev, true);
6820		if (err)
6821			return err;
6822	}
6823
6824	netif_device_attach(netdev);
6825	return 0;
6826}
6827
6828#ifdef CONFIG_PM_RUNTIME
6829static int igb_runtime_idle(struct device *dev)
6830{
6831	struct pci_dev *pdev = to_pci_dev(dev);
6832	struct net_device *netdev = pci_get_drvdata(pdev);
6833	struct igb_adapter *adapter = netdev_priv(netdev);
6834
6835	if (!igb_has_link(adapter))
6836		pm_schedule_suspend(dev, MSEC_PER_SEC * 5);
6837
6838	return -EBUSY;
6839}
6840
6841static int igb_runtime_suspend(struct device *dev)
6842{
6843	struct pci_dev *pdev = to_pci_dev(dev);
6844	int retval;
6845	bool wake;
6846
6847	retval = __igb_shutdown(pdev, &wake, 1);
6848	if (retval)
6849		return retval;
6850
6851	if (wake) {
6852		pci_prepare_to_sleep(pdev);
6853	} else {
6854		pci_wake_from_d3(pdev, false);
6855		pci_set_power_state(pdev, PCI_D3hot);
6856	}
6857
6858	return 0;
6859}
6860
6861static int igb_runtime_resume(struct device *dev)
6862{
6863	return igb_resume(dev);
6864}
6865#endif /* CONFIG_PM_RUNTIME */
6866#endif
6867
6868static void igb_shutdown(struct pci_dev *pdev)
6869{
6870	bool wake;
6871
6872	__igb_shutdown(pdev, &wake, 0);
6873
6874	if (system_state == SYSTEM_POWER_OFF) {
6875		pci_wake_from_d3(pdev, wake);
6876		pci_set_power_state(pdev, PCI_D3hot);
6877	}
6878}
6879
6880#ifdef CONFIG_NET_POLL_CONTROLLER
6881/*
6882 * Polling 'interrupt' - used by things like netconsole to send skbs
6883 * without having to re-enable interrupts. It's not called while
6884 * the interrupt routine is executing.
6885 */
6886static void igb_netpoll(struct net_device *netdev)
6887{
6888	struct igb_adapter *adapter = netdev_priv(netdev);
6889	struct e1000_hw *hw = &adapter->hw;
6890	struct igb_q_vector *q_vector;
6891	int i;
6892
6893	for (i = 0; i < adapter->num_q_vectors; i++) {
6894		q_vector = adapter->q_vector[i];
6895		if (adapter->msix_entries)
6896			wr32(E1000_EIMC, q_vector->eims_value);
6897		else
6898			igb_irq_disable(adapter);
6899		napi_schedule(&q_vector->napi);
6900	}
6901}
6902#endif /* CONFIG_NET_POLL_CONTROLLER */
6903
6904/**
6905 * igb_io_error_detected - called when PCI error is detected
6906 * @pdev: Pointer to PCI device
6907 * @state: The current pci connection state
6908 *
6909 * This function is called after a PCI bus error affecting
6910 * this device has been detected.
6911 */
6912static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
6913					      pci_channel_state_t state)
6914{
6915	struct net_device *netdev = pci_get_drvdata(pdev);
6916	struct igb_adapter *adapter = netdev_priv(netdev);
6917
6918	netif_device_detach(netdev);
6919
6920	if (state == pci_channel_io_perm_failure)
6921		return PCI_ERS_RESULT_DISCONNECT;
6922
6923	if (netif_running(netdev))
6924		igb_down(adapter);
6925	pci_disable_device(pdev);
6926
6927	/* Request a slot slot reset. */
6928	return PCI_ERS_RESULT_NEED_RESET;
6929}
6930
6931/**
6932 * igb_io_slot_reset - called after the pci bus has been reset.
6933 * @pdev: Pointer to PCI device
6934 *
6935 * Restart the card from scratch, as if from a cold-boot. Implementation
6936 * resembles the first-half of the igb_resume routine.
6937 */
6938static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
6939{
6940	struct net_device *netdev = pci_get_drvdata(pdev);
6941	struct igb_adapter *adapter = netdev_priv(netdev);
6942	struct e1000_hw *hw = &adapter->hw;
6943	pci_ers_result_t result;
6944	int err;
6945
6946	if (pci_enable_device_mem(pdev)) {
6947		dev_err(&pdev->dev,
6948			"Cannot re-enable PCI device after reset.\n");
6949		result = PCI_ERS_RESULT_DISCONNECT;
6950	} else {
6951		pci_set_master(pdev);
6952		pci_restore_state(pdev);
6953		pci_save_state(pdev);
6954
6955		pci_enable_wake(pdev, PCI_D3hot, 0);
6956		pci_enable_wake(pdev, PCI_D3cold, 0);
6957
6958		igb_reset(adapter);
6959		wr32(E1000_WUS, ~0);
6960		result = PCI_ERS_RESULT_RECOVERED;
6961	}
6962
6963	err = pci_cleanup_aer_uncorrect_error_status(pdev);
6964	if (err) {
6965		dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
6966		        "failed 0x%0x\n", err);
6967		/* non-fatal, continue */
6968	}
6969
6970	return result;
6971}
6972
6973/**
6974 * igb_io_resume - called when traffic can start flowing again.
6975 * @pdev: Pointer to PCI device
6976 *
6977 * This callback is called when the error recovery driver tells us that
6978 * its OK to resume normal operation. Implementation resembles the
6979 * second-half of the igb_resume routine.
6980 */
6981static void igb_io_resume(struct pci_dev *pdev)
6982{
6983	struct net_device *netdev = pci_get_drvdata(pdev);
6984	struct igb_adapter *adapter = netdev_priv(netdev);
6985
6986	if (netif_running(netdev)) {
6987		if (igb_up(adapter)) {
6988			dev_err(&pdev->dev, "igb_up failed after reset\n");
6989			return;
6990		}
6991	}
6992
6993	netif_device_attach(netdev);
6994
6995	/* let the f/w know that the h/w is now under the control of the
6996	 * driver. */
6997	igb_get_hw_control(adapter);
6998}
6999
7000static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
7001                             u8 qsel)
7002{
7003	u32 rar_low, rar_high;
7004	struct e1000_hw *hw = &adapter->hw;
7005
7006	/* HW expects these in little endian so we reverse the byte order
7007	 * from network order (big endian) to little endian
7008	 */
7009	rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
7010	          ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
7011	rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
7012
7013	/* Indicate to hardware the Address is Valid. */
7014	rar_high |= E1000_RAH_AV;
7015
7016	if (hw->mac.type == e1000_82575)
7017		rar_high |= E1000_RAH_POOL_1 * qsel;
7018	else
7019		rar_high |= E1000_RAH_POOL_1 << qsel;
7020
7021	wr32(E1000_RAL(index), rar_low);
7022	wrfl();
7023	wr32(E1000_RAH(index), rar_high);
7024	wrfl();
7025}
7026
7027static int igb_set_vf_mac(struct igb_adapter *adapter,
7028                          int vf, unsigned char *mac_addr)
7029{
7030	struct e1000_hw *hw = &adapter->hw;
7031	/* VF MAC addresses start at end of receive addresses and moves
7032	 * torwards the first, as a result a collision should not be possible */
7033	int rar_entry = hw->mac.rar_entry_count - (vf + 1);
7034
7035	memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
7036
7037	igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
7038
7039	return 0;
7040}
7041
7042static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
7043{
7044	struct igb_adapter *adapter = netdev_priv(netdev);
7045	if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
7046		return -EINVAL;
7047	adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
7048	dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
7049	dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
7050				      " change effective.");
7051	if (test_bit(__IGB_DOWN, &adapter->state)) {
7052		dev_warn(&adapter->pdev->dev, "The VF MAC address has been set,"
7053			 " but the PF device is not up.\n");
7054		dev_warn(&adapter->pdev->dev, "Bring the PF device up before"
7055			 " attempting to use the VF device.\n");
7056	}
7057	return igb_set_vf_mac(adapter, vf, mac);
7058}
7059
7060static int igb_link_mbps(int internal_link_speed)
7061{
7062	switch (internal_link_speed) {
7063	case SPEED_100:
7064		return 100;
7065	case SPEED_1000:
7066		return 1000;
7067	default:
7068		return 0;
7069	}
7070}
7071
7072static void igb_set_vf_rate_limit(struct e1000_hw *hw, int vf, int tx_rate,
7073				  int link_speed)
7074{
7075	int rf_dec, rf_int;
7076	u32 bcnrc_val;
7077
7078	if (tx_rate != 0) {
7079		/* Calculate the rate factor values to set */
7080		rf_int = link_speed / tx_rate;
7081		rf_dec = (link_speed - (rf_int * tx_rate));
7082		rf_dec = (rf_dec * (1<<E1000_RTTBCNRC_RF_INT_SHIFT)) / tx_rate;
7083
7084		bcnrc_val = E1000_RTTBCNRC_RS_ENA;
7085		bcnrc_val |= ((rf_int<<E1000_RTTBCNRC_RF_INT_SHIFT) &
7086		               E1000_RTTBCNRC_RF_INT_MASK);
7087		bcnrc_val |= (rf_dec & E1000_RTTBCNRC_RF_DEC_MASK);
7088	} else {
7089		bcnrc_val = 0;
7090	}
7091
7092	wr32(E1000_RTTDQSEL, vf); /* vf X uses queue X */
7093	/*
7094	 * Set global transmit compensation time to the MMW_SIZE in RTTBCNRM
7095	 * register. MMW_SIZE=0x014 if 9728-byte jumbo is supported.
7096	 */
7097	wr32(E1000_RTTBCNRM, 0x14);
7098	wr32(E1000_RTTBCNRC, bcnrc_val);
7099}
7100
7101static void igb_check_vf_rate_limit(struct igb_adapter *adapter)
7102{
7103	int actual_link_speed, i;
7104	bool reset_rate = false;
7105
7106	/* VF TX rate limit was not set or not supported */
7107	if ((adapter->vf_rate_link_speed == 0) ||
7108	    (adapter->hw.mac.type != e1000_82576))
7109		return;
7110
7111	actual_link_speed = igb_link_mbps(adapter->link_speed);
7112	if (actual_link_speed != adapter->vf_rate_link_speed) {
7113		reset_rate = true;
7114		adapter->vf_rate_link_speed = 0;
7115		dev_info(&adapter->pdev->dev,
7116		         "Link speed has been changed. VF Transmit "
7117		         "rate is disabled\n");
7118	}
7119
7120	for (i = 0; i < adapter->vfs_allocated_count; i++) {
7121		if (reset_rate)
7122			adapter->vf_data[i].tx_rate = 0;
7123
7124		igb_set_vf_rate_limit(&adapter->hw, i,
7125		                      adapter->vf_data[i].tx_rate,
7126		                      actual_link_speed);
7127	}
7128}
7129
7130static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
7131{
7132	struct igb_adapter *adapter = netdev_priv(netdev);
7133	struct e1000_hw *hw = &adapter->hw;
7134	int actual_link_speed;
7135
7136	if (hw->mac.type != e1000_82576)
7137		return -EOPNOTSUPP;
7138
7139	actual_link_speed = igb_link_mbps(adapter->link_speed);
7140	if ((vf >= adapter->vfs_allocated_count) ||
7141	    (!(rd32(E1000_STATUS) & E1000_STATUS_LU)) ||
7142	    (tx_rate < 0) || (tx_rate > actual_link_speed))
7143		return -EINVAL;
7144
7145	adapter->vf_rate_link_speed = actual_link_speed;
7146	adapter->vf_data[vf].tx_rate = (u16)tx_rate;
7147	igb_set_vf_rate_limit(hw, vf, tx_rate, actual_link_speed);
7148
7149	return 0;
7150}
7151
7152static int igb_ndo_get_vf_config(struct net_device *netdev,
7153				 int vf, struct ifla_vf_info *ivi)
7154{
7155	struct igb_adapter *adapter = netdev_priv(netdev);
7156	if (vf >= adapter->vfs_allocated_count)
7157		return -EINVAL;
7158	ivi->vf = vf;
7159	memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
7160	ivi->tx_rate = adapter->vf_data[vf].tx_rate;
7161	ivi->vlan = adapter->vf_data[vf].pf_vlan;
7162	ivi->qos = adapter->vf_data[vf].pf_qos;
7163	return 0;
7164}
7165
7166static void igb_vmm_control(struct igb_adapter *adapter)
7167{
7168	struct e1000_hw *hw = &adapter->hw;
7169	u32 reg;
7170
7171	switch (hw->mac.type) {
7172	case e1000_82575:
7173	case e1000_i210:
7174	case e1000_i211:
7175	default:
7176		/* replication is not supported for 82575 */
7177		return;
7178	case e1000_82576:
7179		/* notify HW that the MAC is adding vlan tags */
7180		reg = rd32(E1000_DTXCTL);
7181		reg |= E1000_DTXCTL_VLAN_ADDED;
7182		wr32(E1000_DTXCTL, reg);
7183	case e1000_82580:
7184		/* enable replication vlan tag stripping */
7185		reg = rd32(E1000_RPLOLR);
7186		reg |= E1000_RPLOLR_STRVLAN;
7187		wr32(E1000_RPLOLR, reg);
7188	case e1000_i350:
7189		/* none of the above registers are supported by i350 */
7190		break;
7191	}
7192
7193	if (adapter->vfs_allocated_count) {
7194		igb_vmdq_set_loopback_pf(hw, true);
7195		igb_vmdq_set_replication_pf(hw, true);
7196		igb_vmdq_set_anti_spoofing_pf(hw, true,
7197						adapter->vfs_allocated_count);
7198	} else {
7199		igb_vmdq_set_loopback_pf(hw, false);
7200		igb_vmdq_set_replication_pf(hw, false);
7201	}
7202}
7203
7204static void igb_init_dmac(struct igb_adapter *adapter, u32 pba)
7205{
7206	struct e1000_hw *hw = &adapter->hw;
7207	u32 dmac_thr;
7208	u16 hwm;
7209
7210	if (hw->mac.type > e1000_82580) {
7211		if (adapter->flags & IGB_FLAG_DMAC) {
7212			u32 reg;
7213
7214			/* force threshold to 0. */
7215			wr32(E1000_DMCTXTH, 0);
7216
7217			/*
7218			 * DMA Coalescing high water mark needs to be greater
7219			 * than the Rx threshold. Set hwm to PBA - max frame
7220			 * size in 16B units, capping it at PBA - 6KB.
7221			 */
7222			hwm = 64 * pba - adapter->max_frame_size / 16;
7223			if (hwm < 64 * (pba - 6))
7224				hwm = 64 * (pba - 6);
7225			reg = rd32(E1000_FCRTC);
7226			reg &= ~E1000_FCRTC_RTH_COAL_MASK;
7227			reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT)
7228				& E1000_FCRTC_RTH_COAL_MASK);
7229			wr32(E1000_FCRTC, reg);
7230
7231			/*
7232			 * Set the DMA Coalescing Rx threshold to PBA - 2 * max
7233			 * frame size, capping it at PBA - 10KB.
7234			 */
7235			dmac_thr = pba - adapter->max_frame_size / 512;
7236			if (dmac_thr < pba - 10)
7237				dmac_thr = pba - 10;
7238			reg = rd32(E1000_DMACR);
7239			reg &= ~E1000_DMACR_DMACTHR_MASK;
7240			reg |= ((dmac_thr << E1000_DMACR_DMACTHR_SHIFT)
7241				& E1000_DMACR_DMACTHR_MASK);
7242
7243			/* transition to L0x or L1 if available..*/
7244			reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
7245
7246			/* watchdog timer= +-1000 usec in 32usec intervals */
7247			reg |= (1000 >> 5);
7248
7249			/* Disable BMC-to-OS Watchdog Enable */
7250			reg &= ~E1000_DMACR_DC_BMC2OSW_EN;
7251			wr32(E1000_DMACR, reg);
7252
7253			/*
7254			 * no lower threshold to disable
7255			 * coalescing(smart fifb)-UTRESH=0
7256			 */
7257			wr32(E1000_DMCRTRH, 0);
7258
7259			reg = (IGB_DMCTLX_DCFLUSH_DIS | 0x4);
7260
7261			wr32(E1000_DMCTLX, reg);
7262
7263			/*
7264			 * free space in tx packet buffer to wake from
7265			 * DMA coal
7266			 */
7267			wr32(E1000_DMCTXTH, (IGB_MIN_TXPBSIZE -
7268			     (IGB_TX_BUF_4096 + adapter->max_frame_size)) >> 6);
7269
7270			/*
7271			 * make low power state decision controlled
7272			 * by DMA coal
7273			 */
7274			reg = rd32(E1000_PCIEMISC);
7275			reg &= ~E1000_PCIEMISC_LX_DECISION;
7276			wr32(E1000_PCIEMISC, reg);
7277		} /* endif adapter->dmac is not disabled */
7278	} else if (hw->mac.type == e1000_82580) {
7279		u32 reg = rd32(E1000_PCIEMISC);
7280		wr32(E1000_PCIEMISC, reg & ~E1000_PCIEMISC_LX_DECISION);
7281		wr32(E1000_DMACR, 0);
7282	}
7283}
7284
7285/* igb_main.c */
7286