igb_main.c revision 7efd26d0db5917b9e53d72e76e52338b2600ae20
1/*******************************************************************************
2
3  Intel(R) Gigabit Ethernet Linux driver
4  Copyright(c) 2007-2012 Intel Corporation.
5
6  This program is free software; you can redistribute it and/or modify it
7  under the terms and conditions of the GNU General Public License,
8  version 2, as published by the Free Software Foundation.
9
10  This program is distributed in the hope it will be useful, but WITHOUT
11  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13  more details.
14
15  You should have received a copy of the GNU General Public License along with
16  this program; if not, write to the Free Software Foundation, Inc.,
17  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18
19  The full GNU General Public License is included in this distribution in
20  the file called "COPYING".
21
22  Contact Information:
23  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
25
26*******************************************************************************/
27
28#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
29
30#include <linux/module.h>
31#include <linux/types.h>
32#include <linux/init.h>
33#include <linux/bitops.h>
34#include <linux/vmalloc.h>
35#include <linux/pagemap.h>
36#include <linux/netdevice.h>
37#include <linux/ipv6.h>
38#include <linux/slab.h>
39#include <net/checksum.h>
40#include <net/ip6_checksum.h>
41#include <linux/net_tstamp.h>
42#include <linux/mii.h>
43#include <linux/ethtool.h>
44#include <linux/if.h>
45#include <linux/if_vlan.h>
46#include <linux/pci.h>
47#include <linux/pci-aspm.h>
48#include <linux/delay.h>
49#include <linux/interrupt.h>
50#include <linux/ip.h>
51#include <linux/tcp.h>
52#include <linux/sctp.h>
53#include <linux/if_ether.h>
54#include <linux/aer.h>
55#include <linux/prefetch.h>
56#include <linux/pm_runtime.h>
57#ifdef CONFIG_IGB_DCA
58#include <linux/dca.h>
59#endif
60#include "igb.h"
61
62#define MAJ 4
63#define MIN 0
64#define BUILD 1
65#define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \
66__stringify(BUILD) "-k"
67char igb_driver_name[] = "igb";
68char igb_driver_version[] = DRV_VERSION;
69static const char igb_driver_string[] =
70				"Intel(R) Gigabit Ethernet Network Driver";
71static const char igb_copyright[] = "Copyright (c) 2007-2012 Intel Corporation.";
72
73static const struct e1000_info *igb_info_tbl[] = {
74	[board_82575] = &e1000_82575_info,
75};
76
77static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
78	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I211_COPPER), board_82575 },
79	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_COPPER), board_82575 },
80	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_FIBER), board_82575 },
81	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_SERDES), board_82575 },
82	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_SGMII), board_82575 },
83	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER), board_82575 },
84	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER), board_82575 },
85	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES), board_82575 },
86	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII), board_82575 },
87	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
88	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
89	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_QUAD_FIBER), board_82575 },
90	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
91	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
92	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
93	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SGMII), board_82575 },
94	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SERDES), board_82575 },
95	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_BACKPLANE), board_82575 },
96	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SFP), board_82575 },
97	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
98	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
99	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
100	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
101	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
102	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
103	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2), board_82575 },
104	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
105	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
106	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
107	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
108	/* required last entry */
109	{0, }
110};
111
112MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
113
114void igb_reset(struct igb_adapter *);
115static int igb_setup_all_tx_resources(struct igb_adapter *);
116static int igb_setup_all_rx_resources(struct igb_adapter *);
117static void igb_free_all_tx_resources(struct igb_adapter *);
118static void igb_free_all_rx_resources(struct igb_adapter *);
119static void igb_setup_mrqc(struct igb_adapter *);
120static int igb_probe(struct pci_dev *, const struct pci_device_id *);
121static void __devexit igb_remove(struct pci_dev *pdev);
122static int igb_sw_init(struct igb_adapter *);
123static int igb_open(struct net_device *);
124static int igb_close(struct net_device *);
125static void igb_configure_tx(struct igb_adapter *);
126static void igb_configure_rx(struct igb_adapter *);
127static void igb_clean_all_tx_rings(struct igb_adapter *);
128static void igb_clean_all_rx_rings(struct igb_adapter *);
129static void igb_clean_tx_ring(struct igb_ring *);
130static void igb_clean_rx_ring(struct igb_ring *);
131static void igb_set_rx_mode(struct net_device *);
132static void igb_update_phy_info(unsigned long);
133static void igb_watchdog(unsigned long);
134static void igb_watchdog_task(struct work_struct *);
135static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, struct net_device *);
136static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *dev,
137						 struct rtnl_link_stats64 *stats);
138static int igb_change_mtu(struct net_device *, int);
139static int igb_set_mac(struct net_device *, void *);
140static void igb_set_uta(struct igb_adapter *adapter);
141static irqreturn_t igb_intr(int irq, void *);
142static irqreturn_t igb_intr_msi(int irq, void *);
143static irqreturn_t igb_msix_other(int irq, void *);
144static irqreturn_t igb_msix_ring(int irq, void *);
145#ifdef CONFIG_IGB_DCA
146static void igb_update_dca(struct igb_q_vector *);
147static void igb_setup_dca(struct igb_adapter *);
148#endif /* CONFIG_IGB_DCA */
149static int igb_poll(struct napi_struct *, int);
150static bool igb_clean_tx_irq(struct igb_q_vector *);
151static bool igb_clean_rx_irq(struct igb_q_vector *, int);
152static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
153static void igb_tx_timeout(struct net_device *);
154static void igb_reset_task(struct work_struct *);
155static void igb_vlan_mode(struct net_device *netdev, netdev_features_t features);
156static int igb_vlan_rx_add_vid(struct net_device *, u16);
157static int igb_vlan_rx_kill_vid(struct net_device *, u16);
158static void igb_restore_vlan(struct igb_adapter *);
159static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
160static void igb_ping_all_vfs(struct igb_adapter *);
161static void igb_msg_task(struct igb_adapter *);
162static void igb_vmm_control(struct igb_adapter *);
163static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
164static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
165static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
166static int igb_ndo_set_vf_vlan(struct net_device *netdev,
167			       int vf, u16 vlan, u8 qos);
168static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
169static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
170				 struct ifla_vf_info *ivi);
171static void igb_check_vf_rate_limit(struct igb_adapter *);
172
173#ifdef CONFIG_PCI_IOV
174static int igb_vf_configure(struct igb_adapter *adapter, int vf);
175static int igb_find_enabled_vfs(struct igb_adapter *adapter);
176static int igb_check_vf_assignment(struct igb_adapter *adapter);
177#endif
178
179#ifdef CONFIG_PM
180#ifdef CONFIG_PM_SLEEP
181static int igb_suspend(struct device *);
182#endif
183static int igb_resume(struct device *);
184#ifdef CONFIG_PM_RUNTIME
185static int igb_runtime_suspend(struct device *dev);
186static int igb_runtime_resume(struct device *dev);
187static int igb_runtime_idle(struct device *dev);
188#endif
189static const struct dev_pm_ops igb_pm_ops = {
190	SET_SYSTEM_SLEEP_PM_OPS(igb_suspend, igb_resume)
191	SET_RUNTIME_PM_OPS(igb_runtime_suspend, igb_runtime_resume,
192			igb_runtime_idle)
193};
194#endif
195static void igb_shutdown(struct pci_dev *);
196#ifdef CONFIG_IGB_DCA
197static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
198static struct notifier_block dca_notifier = {
199	.notifier_call	= igb_notify_dca,
200	.next		= NULL,
201	.priority	= 0
202};
203#endif
204#ifdef CONFIG_NET_POLL_CONTROLLER
205/* for netdump / net console */
206static void igb_netpoll(struct net_device *);
207#endif
208#ifdef CONFIG_PCI_IOV
209static unsigned int max_vfs = 0;
210module_param(max_vfs, uint, 0);
211MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
212                 "per physical function");
213#endif /* CONFIG_PCI_IOV */
214
215static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
216		     pci_channel_state_t);
217static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
218static void igb_io_resume(struct pci_dev *);
219
220static struct pci_error_handlers igb_err_handler = {
221	.error_detected = igb_io_error_detected,
222	.slot_reset = igb_io_slot_reset,
223	.resume = igb_io_resume,
224};
225
226static void igb_init_dmac(struct igb_adapter *adapter, u32 pba);
227
228static struct pci_driver igb_driver = {
229	.name     = igb_driver_name,
230	.id_table = igb_pci_tbl,
231	.probe    = igb_probe,
232	.remove   = __devexit_p(igb_remove),
233#ifdef CONFIG_PM
234	.driver.pm = &igb_pm_ops,
235#endif
236	.shutdown = igb_shutdown,
237	.err_handler = &igb_err_handler
238};
239
240MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
241MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
242MODULE_LICENSE("GPL");
243MODULE_VERSION(DRV_VERSION);
244
245#define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV|NETIF_MSG_PROBE|NETIF_MSG_LINK)
246static int debug = -1;
247module_param(debug, int, 0);
248MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
249
250struct igb_reg_info {
251	u32 ofs;
252	char *name;
253};
254
255static const struct igb_reg_info igb_reg_info_tbl[] = {
256
257	/* General Registers */
258	{E1000_CTRL, "CTRL"},
259	{E1000_STATUS, "STATUS"},
260	{E1000_CTRL_EXT, "CTRL_EXT"},
261
262	/* Interrupt Registers */
263	{E1000_ICR, "ICR"},
264
265	/* RX Registers */
266	{E1000_RCTL, "RCTL"},
267	{E1000_RDLEN(0), "RDLEN"},
268	{E1000_RDH(0), "RDH"},
269	{E1000_RDT(0), "RDT"},
270	{E1000_RXDCTL(0), "RXDCTL"},
271	{E1000_RDBAL(0), "RDBAL"},
272	{E1000_RDBAH(0), "RDBAH"},
273
274	/* TX Registers */
275	{E1000_TCTL, "TCTL"},
276	{E1000_TDBAL(0), "TDBAL"},
277	{E1000_TDBAH(0), "TDBAH"},
278	{E1000_TDLEN(0), "TDLEN"},
279	{E1000_TDH(0), "TDH"},
280	{E1000_TDT(0), "TDT"},
281	{E1000_TXDCTL(0), "TXDCTL"},
282	{E1000_TDFH, "TDFH"},
283	{E1000_TDFT, "TDFT"},
284	{E1000_TDFHS, "TDFHS"},
285	{E1000_TDFPC, "TDFPC"},
286
287	/* List Terminator */
288	{}
289};
290
291/*
292 * igb_regdump - register printout routine
293 */
294static void igb_regdump(struct e1000_hw *hw, struct igb_reg_info *reginfo)
295{
296	int n = 0;
297	char rname[16];
298	u32 regs[8];
299
300	switch (reginfo->ofs) {
301	case E1000_RDLEN(0):
302		for (n = 0; n < 4; n++)
303			regs[n] = rd32(E1000_RDLEN(n));
304		break;
305	case E1000_RDH(0):
306		for (n = 0; n < 4; n++)
307			regs[n] = rd32(E1000_RDH(n));
308		break;
309	case E1000_RDT(0):
310		for (n = 0; n < 4; n++)
311			regs[n] = rd32(E1000_RDT(n));
312		break;
313	case E1000_RXDCTL(0):
314		for (n = 0; n < 4; n++)
315			regs[n] = rd32(E1000_RXDCTL(n));
316		break;
317	case E1000_RDBAL(0):
318		for (n = 0; n < 4; n++)
319			regs[n] = rd32(E1000_RDBAL(n));
320		break;
321	case E1000_RDBAH(0):
322		for (n = 0; n < 4; n++)
323			regs[n] = rd32(E1000_RDBAH(n));
324		break;
325	case E1000_TDBAL(0):
326		for (n = 0; n < 4; n++)
327			regs[n] = rd32(E1000_RDBAL(n));
328		break;
329	case E1000_TDBAH(0):
330		for (n = 0; n < 4; n++)
331			regs[n] = rd32(E1000_TDBAH(n));
332		break;
333	case E1000_TDLEN(0):
334		for (n = 0; n < 4; n++)
335			regs[n] = rd32(E1000_TDLEN(n));
336		break;
337	case E1000_TDH(0):
338		for (n = 0; n < 4; n++)
339			regs[n] = rd32(E1000_TDH(n));
340		break;
341	case E1000_TDT(0):
342		for (n = 0; n < 4; n++)
343			regs[n] = rd32(E1000_TDT(n));
344		break;
345	case E1000_TXDCTL(0):
346		for (n = 0; n < 4; n++)
347			regs[n] = rd32(E1000_TXDCTL(n));
348		break;
349	default:
350		pr_info("%-15s %08x\n", reginfo->name, rd32(reginfo->ofs));
351		return;
352	}
353
354	snprintf(rname, 16, "%s%s", reginfo->name, "[0-3]");
355	pr_info("%-15s %08x %08x %08x %08x\n", rname, regs[0], regs[1],
356		regs[2], regs[3]);
357}
358
359/*
360 * igb_dump - Print registers, tx-rings and rx-rings
361 */
362static void igb_dump(struct igb_adapter *adapter)
363{
364	struct net_device *netdev = adapter->netdev;
365	struct e1000_hw *hw = &adapter->hw;
366	struct igb_reg_info *reginfo;
367	struct igb_ring *tx_ring;
368	union e1000_adv_tx_desc *tx_desc;
369	struct my_u0 { u64 a; u64 b; } *u0;
370	struct igb_ring *rx_ring;
371	union e1000_adv_rx_desc *rx_desc;
372	u32 staterr;
373	u16 i, n;
374
375	if (!netif_msg_hw(adapter))
376		return;
377
378	/* Print netdevice Info */
379	if (netdev) {
380		dev_info(&adapter->pdev->dev, "Net device Info\n");
381		pr_info("Device Name     state            trans_start      "
382			"last_rx\n");
383		pr_info("%-15s %016lX %016lX %016lX\n", netdev->name,
384			netdev->state, netdev->trans_start, netdev->last_rx);
385	}
386
387	/* Print Registers */
388	dev_info(&adapter->pdev->dev, "Register Dump\n");
389	pr_info(" Register Name   Value\n");
390	for (reginfo = (struct igb_reg_info *)igb_reg_info_tbl;
391	     reginfo->name; reginfo++) {
392		igb_regdump(hw, reginfo);
393	}
394
395	/* Print TX Ring Summary */
396	if (!netdev || !netif_running(netdev))
397		goto exit;
398
399	dev_info(&adapter->pdev->dev, "TX Rings Summary\n");
400	pr_info("Queue [NTU] [NTC] [bi(ntc)->dma  ] leng ntw timestamp\n");
401	for (n = 0; n < adapter->num_tx_queues; n++) {
402		struct igb_tx_buffer *buffer_info;
403		tx_ring = adapter->tx_ring[n];
404		buffer_info = &tx_ring->tx_buffer_info[tx_ring->next_to_clean];
405		pr_info(" %5d %5X %5X %016llX %04X %p %016llX\n",
406			n, tx_ring->next_to_use, tx_ring->next_to_clean,
407			(u64)buffer_info->dma,
408			buffer_info->length,
409			buffer_info->next_to_watch,
410			(u64)buffer_info->time_stamp);
411	}
412
413	/* Print TX Rings */
414	if (!netif_msg_tx_done(adapter))
415		goto rx_ring_summary;
416
417	dev_info(&adapter->pdev->dev, "TX Rings Dump\n");
418
419	/* Transmit Descriptor Formats
420	 *
421	 * Advanced Transmit Descriptor
422	 *   +--------------------------------------------------------------+
423	 * 0 |         Buffer Address [63:0]                                |
424	 *   +--------------------------------------------------------------+
425	 * 8 | PAYLEN  | PORTS  |CC|IDX | STA | DCMD  |DTYP|MAC|RSV| DTALEN |
426	 *   +--------------------------------------------------------------+
427	 *   63      46 45    40 39 38 36 35 32 31   24             15       0
428	 */
429
430	for (n = 0; n < adapter->num_tx_queues; n++) {
431		tx_ring = adapter->tx_ring[n];
432		pr_info("------------------------------------\n");
433		pr_info("TX QUEUE INDEX = %d\n", tx_ring->queue_index);
434		pr_info("------------------------------------\n");
435		pr_info("T [desc]     [address 63:0  ] [PlPOCIStDDM Ln] "
436			"[bi->dma       ] leng  ntw timestamp        "
437			"bi->skb\n");
438
439		for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) {
440			const char *next_desc;
441			struct igb_tx_buffer *buffer_info;
442			tx_desc = IGB_TX_DESC(tx_ring, i);
443			buffer_info = &tx_ring->tx_buffer_info[i];
444			u0 = (struct my_u0 *)tx_desc;
445			if (i == tx_ring->next_to_use &&
446			    i == tx_ring->next_to_clean)
447				next_desc = " NTC/U";
448			else if (i == tx_ring->next_to_use)
449				next_desc = " NTU";
450			else if (i == tx_ring->next_to_clean)
451				next_desc = " NTC";
452			else
453				next_desc = "";
454
455			pr_info("T [0x%03X]    %016llX %016llX %016llX"
456				" %04X  %p %016llX %p%s\n", i,
457				le64_to_cpu(u0->a),
458				le64_to_cpu(u0->b),
459				(u64)buffer_info->dma,
460				buffer_info->length,
461				buffer_info->next_to_watch,
462				(u64)buffer_info->time_stamp,
463				buffer_info->skb, next_desc);
464
465			if (netif_msg_pktdata(adapter) && buffer_info->dma != 0)
466				print_hex_dump(KERN_INFO, "",
467					DUMP_PREFIX_ADDRESS,
468					16, 1, phys_to_virt(buffer_info->dma),
469					buffer_info->length, true);
470		}
471	}
472
473	/* Print RX Rings Summary */
474rx_ring_summary:
475	dev_info(&adapter->pdev->dev, "RX Rings Summary\n");
476	pr_info("Queue [NTU] [NTC]\n");
477	for (n = 0; n < adapter->num_rx_queues; n++) {
478		rx_ring = adapter->rx_ring[n];
479		pr_info(" %5d %5X %5X\n",
480			n, rx_ring->next_to_use, rx_ring->next_to_clean);
481	}
482
483	/* Print RX Rings */
484	if (!netif_msg_rx_status(adapter))
485		goto exit;
486
487	dev_info(&adapter->pdev->dev, "RX Rings Dump\n");
488
489	/* Advanced Receive Descriptor (Read) Format
490	 *    63                                           1        0
491	 *    +-----------------------------------------------------+
492	 *  0 |       Packet Buffer Address [63:1]           |A0/NSE|
493	 *    +----------------------------------------------+------+
494	 *  8 |       Header Buffer Address [63:1]           |  DD  |
495	 *    +-----------------------------------------------------+
496	 *
497	 *
498	 * Advanced Receive Descriptor (Write-Back) Format
499	 *
500	 *   63       48 47    32 31  30      21 20 17 16   4 3     0
501	 *   +------------------------------------------------------+
502	 * 0 | Packet     IP     |SPH| HDR_LEN   | RSV|Packet|  RSS |
503	 *   | Checksum   Ident  |   |           |    | Type | Type |
504	 *   +------------------------------------------------------+
505	 * 8 | VLAN Tag | Length | Extended Error | Extended Status |
506	 *   +------------------------------------------------------+
507	 *   63       48 47    32 31            20 19               0
508	 */
509
510	for (n = 0; n < adapter->num_rx_queues; n++) {
511		rx_ring = adapter->rx_ring[n];
512		pr_info("------------------------------------\n");
513		pr_info("RX QUEUE INDEX = %d\n", rx_ring->queue_index);
514		pr_info("------------------------------------\n");
515		pr_info("R  [desc]      [ PktBuf     A0] [  HeadBuf   DD] "
516			"[bi->dma       ] [bi->skb] <-- Adv Rx Read format\n");
517		pr_info("RWB[desc]      [PcsmIpSHl PtRs] [vl er S cks ln] -----"
518			"----------- [bi->skb] <-- Adv Rx Write-Back format\n");
519
520		for (i = 0; i < rx_ring->count; i++) {
521			const char *next_desc;
522			struct igb_rx_buffer *buffer_info;
523			buffer_info = &rx_ring->rx_buffer_info[i];
524			rx_desc = IGB_RX_DESC(rx_ring, i);
525			u0 = (struct my_u0 *)rx_desc;
526			staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
527
528			if (i == rx_ring->next_to_use)
529				next_desc = " NTU";
530			else if (i == rx_ring->next_to_clean)
531				next_desc = " NTC";
532			else
533				next_desc = "";
534
535			if (staterr & E1000_RXD_STAT_DD) {
536				/* Descriptor Done */
537				pr_info("%s[0x%03X]     %016llX %016llX -------"
538					"--------- %p%s\n", "RWB", i,
539					le64_to_cpu(u0->a),
540					le64_to_cpu(u0->b),
541					buffer_info->skb, next_desc);
542			} else {
543				pr_info("%s[0x%03X]     %016llX %016llX %016llX"
544					" %p%s\n", "R  ", i,
545					le64_to_cpu(u0->a),
546					le64_to_cpu(u0->b),
547					(u64)buffer_info->dma,
548					buffer_info->skb, next_desc);
549
550				if (netif_msg_pktdata(adapter)) {
551					print_hex_dump(KERN_INFO, "",
552						DUMP_PREFIX_ADDRESS,
553						16, 1,
554						phys_to_virt(buffer_info->dma),
555						IGB_RX_HDR_LEN, true);
556					print_hex_dump(KERN_INFO, "",
557					  DUMP_PREFIX_ADDRESS,
558					  16, 1,
559					  phys_to_virt(
560					    buffer_info->page_dma +
561					    buffer_info->page_offset),
562					  PAGE_SIZE/2, true);
563				}
564			}
565		}
566	}
567
568exit:
569	return;
570}
571
572/**
573 * igb_get_hw_dev - return device
574 * used by hardware layer to print debugging information
575 **/
576struct net_device *igb_get_hw_dev(struct e1000_hw *hw)
577{
578	struct igb_adapter *adapter = hw->back;
579	return adapter->netdev;
580}
581
582/**
583 * igb_init_module - Driver Registration Routine
584 *
585 * igb_init_module is the first routine called when the driver is
586 * loaded. All it does is register with the PCI subsystem.
587 **/
588static int __init igb_init_module(void)
589{
590	int ret;
591	pr_info("%s - version %s\n",
592	       igb_driver_string, igb_driver_version);
593
594	pr_info("%s\n", igb_copyright);
595
596#ifdef CONFIG_IGB_DCA
597	dca_register_notify(&dca_notifier);
598#endif
599	ret = pci_register_driver(&igb_driver);
600	return ret;
601}
602
603module_init(igb_init_module);
604
605/**
606 * igb_exit_module - Driver Exit Cleanup Routine
607 *
608 * igb_exit_module is called just before the driver is removed
609 * from memory.
610 **/
611static void __exit igb_exit_module(void)
612{
613#ifdef CONFIG_IGB_DCA
614	dca_unregister_notify(&dca_notifier);
615#endif
616	pci_unregister_driver(&igb_driver);
617}
618
619module_exit(igb_exit_module);
620
621#define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
622/**
623 * igb_cache_ring_register - Descriptor ring to register mapping
624 * @adapter: board private structure to initialize
625 *
626 * Once we know the feature-set enabled for the device, we'll cache
627 * the register offset the descriptor ring is assigned to.
628 **/
629static void igb_cache_ring_register(struct igb_adapter *adapter)
630{
631	int i = 0, j = 0;
632	u32 rbase_offset = adapter->vfs_allocated_count;
633
634	switch (adapter->hw.mac.type) {
635	case e1000_82576:
636		/* The queues are allocated for virtualization such that VF 0
637		 * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
638		 * In order to avoid collision we start at the first free queue
639		 * and continue consuming queues in the same sequence
640		 */
641		if (adapter->vfs_allocated_count) {
642			for (; i < adapter->rss_queues; i++)
643				adapter->rx_ring[i]->reg_idx = rbase_offset +
644				                               Q_IDX_82576(i);
645		}
646	case e1000_82575:
647	case e1000_82580:
648	case e1000_i350:
649	case e1000_i210:
650	case e1000_i211:
651	default:
652		for (; i < adapter->num_rx_queues; i++)
653			adapter->rx_ring[i]->reg_idx = rbase_offset + i;
654		for (; j < adapter->num_tx_queues; j++)
655			adapter->tx_ring[j]->reg_idx = rbase_offset + j;
656		break;
657	}
658}
659
660static void igb_free_queues(struct igb_adapter *adapter)
661{
662	int i;
663
664	for (i = 0; i < adapter->num_tx_queues; i++) {
665		kfree(adapter->tx_ring[i]);
666		adapter->tx_ring[i] = NULL;
667	}
668	for (i = 0; i < adapter->num_rx_queues; i++) {
669		kfree(adapter->rx_ring[i]);
670		adapter->rx_ring[i] = NULL;
671	}
672	adapter->num_rx_queues = 0;
673	adapter->num_tx_queues = 0;
674}
675
676/**
677 * igb_alloc_queues - Allocate memory for all rings
678 * @adapter: board private structure to initialize
679 *
680 * We allocate one ring per queue at run-time since we don't know the
681 * number of queues at compile-time.
682 **/
683static int igb_alloc_queues(struct igb_adapter *adapter)
684{
685	struct igb_ring *ring;
686	int i;
687	int orig_node = adapter->node;
688
689	for (i = 0; i < adapter->num_tx_queues; i++) {
690		if (orig_node == -1) {
691			int cur_node = next_online_node(adapter->node);
692			if (cur_node == MAX_NUMNODES)
693				cur_node = first_online_node;
694			adapter->node = cur_node;
695		}
696		ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL,
697				    adapter->node);
698		if (!ring)
699			ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
700		if (!ring)
701			goto err;
702		ring->count = adapter->tx_ring_count;
703		ring->queue_index = i;
704		ring->dev = &adapter->pdev->dev;
705		ring->netdev = adapter->netdev;
706		ring->numa_node = adapter->node;
707		/* For 82575, context index must be unique per ring. */
708		if (adapter->hw.mac.type == e1000_82575)
709			set_bit(IGB_RING_FLAG_TX_CTX_IDX, &ring->flags);
710		adapter->tx_ring[i] = ring;
711	}
712	/* Restore the adapter's original node */
713	adapter->node = orig_node;
714
715	for (i = 0; i < adapter->num_rx_queues; i++) {
716		if (orig_node == -1) {
717			int cur_node = next_online_node(adapter->node);
718			if (cur_node == MAX_NUMNODES)
719				cur_node = first_online_node;
720			adapter->node = cur_node;
721		}
722		ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL,
723				    adapter->node);
724		if (!ring)
725			ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
726		if (!ring)
727			goto err;
728		ring->count = adapter->rx_ring_count;
729		ring->queue_index = i;
730		ring->dev = &adapter->pdev->dev;
731		ring->netdev = adapter->netdev;
732		ring->numa_node = adapter->node;
733		/* set flag indicating ring supports SCTP checksum offload */
734		if (adapter->hw.mac.type >= e1000_82576)
735			set_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags);
736
737		/*
738		 * On i350, i210, and i211, loopback VLAN packets
739		 * have the tag byte-swapped.
740		 * */
741		if (adapter->hw.mac.type >= e1000_i350)
742			set_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &ring->flags);
743
744		adapter->rx_ring[i] = ring;
745	}
746	/* Restore the adapter's original node */
747	adapter->node = orig_node;
748
749	igb_cache_ring_register(adapter);
750
751	return 0;
752
753err:
754	/* Restore the adapter's original node */
755	adapter->node = orig_node;
756	igb_free_queues(adapter);
757
758	return -ENOMEM;
759}
760
761/**
762 *  igb_write_ivar - configure ivar for given MSI-X vector
763 *  @hw: pointer to the HW structure
764 *  @msix_vector: vector number we are allocating to a given ring
765 *  @index: row index of IVAR register to write within IVAR table
766 *  @offset: column offset of in IVAR, should be multiple of 8
767 *
768 *  This function is intended to handle the writing of the IVAR register
769 *  for adapters 82576 and newer.  The IVAR table consists of 2 columns,
770 *  each containing an cause allocation for an Rx and Tx ring, and a
771 *  variable number of rows depending on the number of queues supported.
772 **/
773static void igb_write_ivar(struct e1000_hw *hw, int msix_vector,
774			   int index, int offset)
775{
776	u32 ivar = array_rd32(E1000_IVAR0, index);
777
778	/* clear any bits that are currently set */
779	ivar &= ~((u32)0xFF << offset);
780
781	/* write vector and valid bit */
782	ivar |= (msix_vector | E1000_IVAR_VALID) << offset;
783
784	array_wr32(E1000_IVAR0, index, ivar);
785}
786
787#define IGB_N0_QUEUE -1
788static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
789{
790	struct igb_adapter *adapter = q_vector->adapter;
791	struct e1000_hw *hw = &adapter->hw;
792	int rx_queue = IGB_N0_QUEUE;
793	int tx_queue = IGB_N0_QUEUE;
794	u32 msixbm = 0;
795
796	if (q_vector->rx.ring)
797		rx_queue = q_vector->rx.ring->reg_idx;
798	if (q_vector->tx.ring)
799		tx_queue = q_vector->tx.ring->reg_idx;
800
801	switch (hw->mac.type) {
802	case e1000_82575:
803		/* The 82575 assigns vectors using a bitmask, which matches the
804		   bitmask for the EICR/EIMS/EIMC registers.  To assign one
805		   or more queues to a vector, we write the appropriate bits
806		   into the MSIXBM register for that vector. */
807		if (rx_queue > IGB_N0_QUEUE)
808			msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
809		if (tx_queue > IGB_N0_QUEUE)
810			msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
811		if (!adapter->msix_entries && msix_vector == 0)
812			msixbm |= E1000_EIMS_OTHER;
813		array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
814		q_vector->eims_value = msixbm;
815		break;
816	case e1000_82576:
817		/*
818		 * 82576 uses a table that essentially consists of 2 columns
819		 * with 8 rows.  The ordering is column-major so we use the
820		 * lower 3 bits as the row index, and the 4th bit as the
821		 * column offset.
822		 */
823		if (rx_queue > IGB_N0_QUEUE)
824			igb_write_ivar(hw, msix_vector,
825				       rx_queue & 0x7,
826				       (rx_queue & 0x8) << 1);
827		if (tx_queue > IGB_N0_QUEUE)
828			igb_write_ivar(hw, msix_vector,
829				       tx_queue & 0x7,
830				       ((tx_queue & 0x8) << 1) + 8);
831		q_vector->eims_value = 1 << msix_vector;
832		break;
833	case e1000_82580:
834	case e1000_i350:
835	case e1000_i210:
836	case e1000_i211:
837		/*
838		 * On 82580 and newer adapters the scheme is similar to 82576
839		 * however instead of ordering column-major we have things
840		 * ordered row-major.  So we traverse the table by using
841		 * bit 0 as the column offset, and the remaining bits as the
842		 * row index.
843		 */
844		if (rx_queue > IGB_N0_QUEUE)
845			igb_write_ivar(hw, msix_vector,
846				       rx_queue >> 1,
847				       (rx_queue & 0x1) << 4);
848		if (tx_queue > IGB_N0_QUEUE)
849			igb_write_ivar(hw, msix_vector,
850				       tx_queue >> 1,
851				       ((tx_queue & 0x1) << 4) + 8);
852		q_vector->eims_value = 1 << msix_vector;
853		break;
854	default:
855		BUG();
856		break;
857	}
858
859	/* add q_vector eims value to global eims_enable_mask */
860	adapter->eims_enable_mask |= q_vector->eims_value;
861
862	/* configure q_vector to set itr on first interrupt */
863	q_vector->set_itr = 1;
864}
865
866/**
867 * igb_configure_msix - Configure MSI-X hardware
868 *
869 * igb_configure_msix sets up the hardware to properly
870 * generate MSI-X interrupts.
871 **/
872static void igb_configure_msix(struct igb_adapter *adapter)
873{
874	u32 tmp;
875	int i, vector = 0;
876	struct e1000_hw *hw = &adapter->hw;
877
878	adapter->eims_enable_mask = 0;
879
880	/* set vector for other causes, i.e. link changes */
881	switch (hw->mac.type) {
882	case e1000_82575:
883		tmp = rd32(E1000_CTRL_EXT);
884		/* enable MSI-X PBA support*/
885		tmp |= E1000_CTRL_EXT_PBA_CLR;
886
887		/* Auto-Mask interrupts upon ICR read. */
888		tmp |= E1000_CTRL_EXT_EIAME;
889		tmp |= E1000_CTRL_EXT_IRCA;
890
891		wr32(E1000_CTRL_EXT, tmp);
892
893		/* enable msix_other interrupt */
894		array_wr32(E1000_MSIXBM(0), vector++,
895		                      E1000_EIMS_OTHER);
896		adapter->eims_other = E1000_EIMS_OTHER;
897
898		break;
899
900	case e1000_82576:
901	case e1000_82580:
902	case e1000_i350:
903	case e1000_i210:
904	case e1000_i211:
905		/* Turn on MSI-X capability first, or our settings
906		 * won't stick.  And it will take days to debug. */
907		wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
908		                E1000_GPIE_PBA | E1000_GPIE_EIAME |
909		                E1000_GPIE_NSICR);
910
911		/* enable msix_other interrupt */
912		adapter->eims_other = 1 << vector;
913		tmp = (vector++ | E1000_IVAR_VALID) << 8;
914
915		wr32(E1000_IVAR_MISC, tmp);
916		break;
917	default:
918		/* do nothing, since nothing else supports MSI-X */
919		break;
920	} /* switch (hw->mac.type) */
921
922	adapter->eims_enable_mask |= adapter->eims_other;
923
924	for (i = 0; i < adapter->num_q_vectors; i++)
925		igb_assign_vector(adapter->q_vector[i], vector++);
926
927	wrfl();
928}
929
930/**
931 * igb_request_msix - Initialize MSI-X interrupts
932 *
933 * igb_request_msix allocates MSI-X vectors and requests interrupts from the
934 * kernel.
935 **/
936static int igb_request_msix(struct igb_adapter *adapter)
937{
938	struct net_device *netdev = adapter->netdev;
939	struct e1000_hw *hw = &adapter->hw;
940	int i, err = 0, vector = 0;
941
942	err = request_irq(adapter->msix_entries[vector].vector,
943	                  igb_msix_other, 0, netdev->name, adapter);
944	if (err)
945		goto out;
946	vector++;
947
948	for (i = 0; i < adapter->num_q_vectors; i++) {
949		struct igb_q_vector *q_vector = adapter->q_vector[i];
950
951		q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
952
953		if (q_vector->rx.ring && q_vector->tx.ring)
954			sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
955				q_vector->rx.ring->queue_index);
956		else if (q_vector->tx.ring)
957			sprintf(q_vector->name, "%s-tx-%u", netdev->name,
958				q_vector->tx.ring->queue_index);
959		else if (q_vector->rx.ring)
960			sprintf(q_vector->name, "%s-rx-%u", netdev->name,
961				q_vector->rx.ring->queue_index);
962		else
963			sprintf(q_vector->name, "%s-unused", netdev->name);
964
965		err = request_irq(adapter->msix_entries[vector].vector,
966		                  igb_msix_ring, 0, q_vector->name,
967		                  q_vector);
968		if (err)
969			goto out;
970		vector++;
971	}
972
973	igb_configure_msix(adapter);
974	return 0;
975out:
976	return err;
977}
978
979static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
980{
981	if (adapter->msix_entries) {
982		pci_disable_msix(adapter->pdev);
983		kfree(adapter->msix_entries);
984		adapter->msix_entries = NULL;
985	} else if (adapter->flags & IGB_FLAG_HAS_MSI) {
986		pci_disable_msi(adapter->pdev);
987	}
988}
989
990/**
991 * igb_free_q_vectors - Free memory allocated for interrupt vectors
992 * @adapter: board private structure to initialize
993 *
994 * This function frees the memory allocated to the q_vectors.  In addition if
995 * NAPI is enabled it will delete any references to the NAPI struct prior
996 * to freeing the q_vector.
997 **/
998static void igb_free_q_vectors(struct igb_adapter *adapter)
999{
1000	int v_idx;
1001
1002	for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1003		struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1004		adapter->q_vector[v_idx] = NULL;
1005		if (!q_vector)
1006			continue;
1007		netif_napi_del(&q_vector->napi);
1008		kfree(q_vector);
1009	}
1010	adapter->num_q_vectors = 0;
1011}
1012
1013/**
1014 * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
1015 *
1016 * This function resets the device so that it has 0 rx queues, tx queues, and
1017 * MSI-X interrupts allocated.
1018 */
1019static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
1020{
1021	igb_free_queues(adapter);
1022	igb_free_q_vectors(adapter);
1023	igb_reset_interrupt_capability(adapter);
1024}
1025
1026/**
1027 * igb_set_interrupt_capability - set MSI or MSI-X if supported
1028 *
1029 * Attempt to configure interrupts using the best available
1030 * capabilities of the hardware and kernel.
1031 **/
1032static int igb_set_interrupt_capability(struct igb_adapter *adapter)
1033{
1034	int err;
1035	int numvecs, i;
1036
1037	/* Number of supported queues. */
1038	adapter->num_rx_queues = adapter->rss_queues;
1039	if (adapter->vfs_allocated_count)
1040		adapter->num_tx_queues = 1;
1041	else
1042		adapter->num_tx_queues = adapter->rss_queues;
1043
1044	/* start with one vector for every rx queue */
1045	numvecs = adapter->num_rx_queues;
1046
1047	/* if tx handler is separate add 1 for every tx queue */
1048	if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
1049		numvecs += adapter->num_tx_queues;
1050
1051	/* store the number of vectors reserved for queues */
1052	adapter->num_q_vectors = numvecs;
1053
1054	/* add 1 vector for link status interrupts */
1055	numvecs++;
1056	adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
1057					GFP_KERNEL);
1058
1059	if (!adapter->msix_entries)
1060		goto msi_only;
1061
1062	for (i = 0; i < numvecs; i++)
1063		adapter->msix_entries[i].entry = i;
1064
1065	err = pci_enable_msix(adapter->pdev,
1066			      adapter->msix_entries,
1067			      numvecs);
1068	if (err == 0)
1069		goto out;
1070
1071	igb_reset_interrupt_capability(adapter);
1072
1073	/* If we can't do MSI-X, try MSI */
1074msi_only:
1075#ifdef CONFIG_PCI_IOV
1076	/* disable SR-IOV for non MSI-X configurations */
1077	if (adapter->vf_data) {
1078		struct e1000_hw *hw = &adapter->hw;
1079		/* disable iov and allow time for transactions to clear */
1080		pci_disable_sriov(adapter->pdev);
1081		msleep(500);
1082
1083		kfree(adapter->vf_data);
1084		adapter->vf_data = NULL;
1085		wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1086		wrfl();
1087		msleep(100);
1088		dev_info(&adapter->pdev->dev, "IOV Disabled\n");
1089	}
1090#endif
1091	adapter->vfs_allocated_count = 0;
1092	adapter->rss_queues = 1;
1093	adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1094	adapter->num_rx_queues = 1;
1095	adapter->num_tx_queues = 1;
1096	adapter->num_q_vectors = 1;
1097	if (!pci_enable_msi(adapter->pdev))
1098		adapter->flags |= IGB_FLAG_HAS_MSI;
1099out:
1100	/* Notify the stack of the (possibly) reduced queue counts. */
1101	rtnl_lock();
1102	netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues);
1103	err = netif_set_real_num_rx_queues(adapter->netdev,
1104		adapter->num_rx_queues);
1105	rtnl_unlock();
1106	return err;
1107}
1108
1109/**
1110 * igb_alloc_q_vectors - Allocate memory for interrupt vectors
1111 * @adapter: board private structure to initialize
1112 *
1113 * We allocate one q_vector per queue interrupt.  If allocation fails we
1114 * return -ENOMEM.
1115 **/
1116static int igb_alloc_q_vectors(struct igb_adapter *adapter)
1117{
1118	struct igb_q_vector *q_vector;
1119	struct e1000_hw *hw = &adapter->hw;
1120	int v_idx;
1121	int orig_node = adapter->node;
1122
1123	for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1124		if ((adapter->num_q_vectors == (adapter->num_rx_queues +
1125						adapter->num_tx_queues)) &&
1126		    (adapter->num_rx_queues == v_idx))
1127			adapter->node = orig_node;
1128		if (orig_node == -1) {
1129			int cur_node = next_online_node(adapter->node);
1130			if (cur_node == MAX_NUMNODES)
1131				cur_node = first_online_node;
1132			adapter->node = cur_node;
1133		}
1134		q_vector = kzalloc_node(sizeof(struct igb_q_vector), GFP_KERNEL,
1135					adapter->node);
1136		if (!q_vector)
1137			q_vector = kzalloc(sizeof(struct igb_q_vector),
1138					   GFP_KERNEL);
1139		if (!q_vector)
1140			goto err_out;
1141		q_vector->adapter = adapter;
1142		q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
1143		q_vector->itr_val = IGB_START_ITR;
1144		netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
1145		adapter->q_vector[v_idx] = q_vector;
1146	}
1147	/* Restore the adapter's original node */
1148	adapter->node = orig_node;
1149
1150	return 0;
1151
1152err_out:
1153	/* Restore the adapter's original node */
1154	adapter->node = orig_node;
1155	igb_free_q_vectors(adapter);
1156	return -ENOMEM;
1157}
1158
1159static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
1160                                      int ring_idx, int v_idx)
1161{
1162	struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1163
1164	q_vector->rx.ring = adapter->rx_ring[ring_idx];
1165	q_vector->rx.ring->q_vector = q_vector;
1166	q_vector->rx.count++;
1167	q_vector->itr_val = adapter->rx_itr_setting;
1168	if (q_vector->itr_val && q_vector->itr_val <= 3)
1169		q_vector->itr_val = IGB_START_ITR;
1170}
1171
1172static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
1173                                      int ring_idx, int v_idx)
1174{
1175	struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1176
1177	q_vector->tx.ring = adapter->tx_ring[ring_idx];
1178	q_vector->tx.ring->q_vector = q_vector;
1179	q_vector->tx.count++;
1180	q_vector->itr_val = adapter->tx_itr_setting;
1181	q_vector->tx.work_limit = adapter->tx_work_limit;
1182	if (q_vector->itr_val && q_vector->itr_val <= 3)
1183		q_vector->itr_val = IGB_START_ITR;
1184}
1185
1186/**
1187 * igb_map_ring_to_vector - maps allocated queues to vectors
1188 *
1189 * This function maps the recently allocated queues to vectors.
1190 **/
1191static int igb_map_ring_to_vector(struct igb_adapter *adapter)
1192{
1193	int i;
1194	int v_idx = 0;
1195
1196	if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
1197	    (adapter->num_q_vectors < adapter->num_tx_queues))
1198		return -ENOMEM;
1199
1200	if (adapter->num_q_vectors >=
1201	    (adapter->num_rx_queues + adapter->num_tx_queues)) {
1202		for (i = 0; i < adapter->num_rx_queues; i++)
1203			igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1204		for (i = 0; i < adapter->num_tx_queues; i++)
1205			igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1206	} else {
1207		for (i = 0; i < adapter->num_rx_queues; i++) {
1208			if (i < adapter->num_tx_queues)
1209				igb_map_tx_ring_to_vector(adapter, i, v_idx);
1210			igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1211		}
1212		for (; i < adapter->num_tx_queues; i++)
1213			igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1214	}
1215	return 0;
1216}
1217
1218/**
1219 * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
1220 *
1221 * This function initializes the interrupts and allocates all of the queues.
1222 **/
1223static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
1224{
1225	struct pci_dev *pdev = adapter->pdev;
1226	int err;
1227
1228	err = igb_set_interrupt_capability(adapter);
1229	if (err)
1230		return err;
1231
1232	err = igb_alloc_q_vectors(adapter);
1233	if (err) {
1234		dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
1235		goto err_alloc_q_vectors;
1236	}
1237
1238	err = igb_alloc_queues(adapter);
1239	if (err) {
1240		dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1241		goto err_alloc_queues;
1242	}
1243
1244	err = igb_map_ring_to_vector(adapter);
1245	if (err) {
1246		dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
1247		goto err_map_queues;
1248	}
1249
1250
1251	return 0;
1252err_map_queues:
1253	igb_free_queues(adapter);
1254err_alloc_queues:
1255	igb_free_q_vectors(adapter);
1256err_alloc_q_vectors:
1257	igb_reset_interrupt_capability(adapter);
1258	return err;
1259}
1260
1261/**
1262 * igb_request_irq - initialize interrupts
1263 *
1264 * Attempts to configure interrupts using the best available
1265 * capabilities of the hardware and kernel.
1266 **/
1267static int igb_request_irq(struct igb_adapter *adapter)
1268{
1269	struct net_device *netdev = adapter->netdev;
1270	struct pci_dev *pdev = adapter->pdev;
1271	int err = 0;
1272
1273	if (adapter->msix_entries) {
1274		err = igb_request_msix(adapter);
1275		if (!err)
1276			goto request_done;
1277		/* fall back to MSI */
1278		igb_clear_interrupt_scheme(adapter);
1279		if (!pci_enable_msi(pdev))
1280			adapter->flags |= IGB_FLAG_HAS_MSI;
1281		igb_free_all_tx_resources(adapter);
1282		igb_free_all_rx_resources(adapter);
1283		adapter->num_tx_queues = 1;
1284		adapter->num_rx_queues = 1;
1285		adapter->num_q_vectors = 1;
1286		err = igb_alloc_q_vectors(adapter);
1287		if (err) {
1288			dev_err(&pdev->dev,
1289			        "Unable to allocate memory for vectors\n");
1290			goto request_done;
1291		}
1292		err = igb_alloc_queues(adapter);
1293		if (err) {
1294			dev_err(&pdev->dev,
1295			        "Unable to allocate memory for queues\n");
1296			igb_free_q_vectors(adapter);
1297			goto request_done;
1298		}
1299		igb_setup_all_tx_resources(adapter);
1300		igb_setup_all_rx_resources(adapter);
1301	}
1302
1303	igb_assign_vector(adapter->q_vector[0], 0);
1304
1305	if (adapter->flags & IGB_FLAG_HAS_MSI) {
1306		err = request_irq(pdev->irq, igb_intr_msi, 0,
1307				  netdev->name, adapter);
1308		if (!err)
1309			goto request_done;
1310
1311		/* fall back to legacy interrupts */
1312		igb_reset_interrupt_capability(adapter);
1313		adapter->flags &= ~IGB_FLAG_HAS_MSI;
1314	}
1315
1316	err = request_irq(pdev->irq, igb_intr, IRQF_SHARED,
1317			  netdev->name, adapter);
1318
1319	if (err)
1320		dev_err(&pdev->dev, "Error %d getting interrupt\n",
1321			err);
1322
1323request_done:
1324	return err;
1325}
1326
1327static void igb_free_irq(struct igb_adapter *adapter)
1328{
1329	if (adapter->msix_entries) {
1330		int vector = 0, i;
1331
1332		free_irq(adapter->msix_entries[vector++].vector, adapter);
1333
1334		for (i = 0; i < adapter->num_q_vectors; i++)
1335			free_irq(adapter->msix_entries[vector++].vector,
1336				 adapter->q_vector[i]);
1337	} else {
1338		free_irq(adapter->pdev->irq, adapter);
1339	}
1340}
1341
1342/**
1343 * igb_irq_disable - Mask off interrupt generation on the NIC
1344 * @adapter: board private structure
1345 **/
1346static void igb_irq_disable(struct igb_adapter *adapter)
1347{
1348	struct e1000_hw *hw = &adapter->hw;
1349
1350	/*
1351	 * we need to be careful when disabling interrupts.  The VFs are also
1352	 * mapped into these registers and so clearing the bits can cause
1353	 * issues on the VF drivers so we only need to clear what we set
1354	 */
1355	if (adapter->msix_entries) {
1356		u32 regval = rd32(E1000_EIAM);
1357		wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
1358		wr32(E1000_EIMC, adapter->eims_enable_mask);
1359		regval = rd32(E1000_EIAC);
1360		wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
1361	}
1362
1363	wr32(E1000_IAM, 0);
1364	wr32(E1000_IMC, ~0);
1365	wrfl();
1366	if (adapter->msix_entries) {
1367		int i;
1368		for (i = 0; i < adapter->num_q_vectors; i++)
1369			synchronize_irq(adapter->msix_entries[i].vector);
1370	} else {
1371		synchronize_irq(adapter->pdev->irq);
1372	}
1373}
1374
1375/**
1376 * igb_irq_enable - Enable default interrupt generation settings
1377 * @adapter: board private structure
1378 **/
1379static void igb_irq_enable(struct igb_adapter *adapter)
1380{
1381	struct e1000_hw *hw = &adapter->hw;
1382
1383	if (adapter->msix_entries) {
1384		u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC | E1000_IMS_DRSTA;
1385		u32 regval = rd32(E1000_EIAC);
1386		wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
1387		regval = rd32(E1000_EIAM);
1388		wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
1389		wr32(E1000_EIMS, adapter->eims_enable_mask);
1390		if (adapter->vfs_allocated_count) {
1391			wr32(E1000_MBVFIMR, 0xFF);
1392			ims |= E1000_IMS_VMMB;
1393		}
1394		wr32(E1000_IMS, ims);
1395	} else {
1396		wr32(E1000_IMS, IMS_ENABLE_MASK |
1397				E1000_IMS_DRSTA);
1398		wr32(E1000_IAM, IMS_ENABLE_MASK |
1399				E1000_IMS_DRSTA);
1400	}
1401}
1402
1403static void igb_update_mng_vlan(struct igb_adapter *adapter)
1404{
1405	struct e1000_hw *hw = &adapter->hw;
1406	u16 vid = adapter->hw.mng_cookie.vlan_id;
1407	u16 old_vid = adapter->mng_vlan_id;
1408
1409	if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1410		/* add VID to filter table */
1411		igb_vfta_set(hw, vid, true);
1412		adapter->mng_vlan_id = vid;
1413	} else {
1414		adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1415	}
1416
1417	if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1418	    (vid != old_vid) &&
1419	    !test_bit(old_vid, adapter->active_vlans)) {
1420		/* remove VID from filter table */
1421		igb_vfta_set(hw, old_vid, false);
1422	}
1423}
1424
1425/**
1426 * igb_release_hw_control - release control of the h/w to f/w
1427 * @adapter: address of board private structure
1428 *
1429 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1430 * For ASF and Pass Through versions of f/w this means that the
1431 * driver is no longer loaded.
1432 *
1433 **/
1434static void igb_release_hw_control(struct igb_adapter *adapter)
1435{
1436	struct e1000_hw *hw = &adapter->hw;
1437	u32 ctrl_ext;
1438
1439	/* Let firmware take over control of h/w */
1440	ctrl_ext = rd32(E1000_CTRL_EXT);
1441	wr32(E1000_CTRL_EXT,
1442			ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1443}
1444
1445/**
1446 * igb_get_hw_control - get control of the h/w from f/w
1447 * @adapter: address of board private structure
1448 *
1449 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1450 * For ASF and Pass Through versions of f/w this means that
1451 * the driver is loaded.
1452 *
1453 **/
1454static void igb_get_hw_control(struct igb_adapter *adapter)
1455{
1456	struct e1000_hw *hw = &adapter->hw;
1457	u32 ctrl_ext;
1458
1459	/* Let firmware know the driver has taken over */
1460	ctrl_ext = rd32(E1000_CTRL_EXT);
1461	wr32(E1000_CTRL_EXT,
1462			ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1463}
1464
1465/**
1466 * igb_configure - configure the hardware for RX and TX
1467 * @adapter: private board structure
1468 **/
1469static void igb_configure(struct igb_adapter *adapter)
1470{
1471	struct net_device *netdev = adapter->netdev;
1472	int i;
1473
1474	igb_get_hw_control(adapter);
1475	igb_set_rx_mode(netdev);
1476
1477	igb_restore_vlan(adapter);
1478
1479	igb_setup_tctl(adapter);
1480	igb_setup_mrqc(adapter);
1481	igb_setup_rctl(adapter);
1482
1483	igb_configure_tx(adapter);
1484	igb_configure_rx(adapter);
1485
1486	igb_rx_fifo_flush_82575(&adapter->hw);
1487
1488	/* call igb_desc_unused which always leaves
1489	 * at least 1 descriptor unused to make sure
1490	 * next_to_use != next_to_clean */
1491	for (i = 0; i < adapter->num_rx_queues; i++) {
1492		struct igb_ring *ring = adapter->rx_ring[i];
1493		igb_alloc_rx_buffers(ring, igb_desc_unused(ring));
1494	}
1495}
1496
1497/**
1498 * igb_power_up_link - Power up the phy/serdes link
1499 * @adapter: address of board private structure
1500 **/
1501void igb_power_up_link(struct igb_adapter *adapter)
1502{
1503	if (adapter->hw.phy.media_type == e1000_media_type_copper)
1504		igb_power_up_phy_copper(&adapter->hw);
1505	else
1506		igb_power_up_serdes_link_82575(&adapter->hw);
1507	igb_reset_phy(&adapter->hw);
1508}
1509
1510/**
1511 * igb_power_down_link - Power down the phy/serdes link
1512 * @adapter: address of board private structure
1513 */
1514static void igb_power_down_link(struct igb_adapter *adapter)
1515{
1516	if (adapter->hw.phy.media_type == e1000_media_type_copper)
1517		igb_power_down_phy_copper_82575(&adapter->hw);
1518	else
1519		igb_shutdown_serdes_link_82575(&adapter->hw);
1520}
1521
1522/**
1523 * igb_up - Open the interface and prepare it to handle traffic
1524 * @adapter: board private structure
1525 **/
1526int igb_up(struct igb_adapter *adapter)
1527{
1528	struct e1000_hw *hw = &adapter->hw;
1529	int i;
1530
1531	/* hardware has been reset, we need to reload some things */
1532	igb_configure(adapter);
1533
1534	clear_bit(__IGB_DOWN, &adapter->state);
1535
1536	for (i = 0; i < adapter->num_q_vectors; i++)
1537		napi_enable(&(adapter->q_vector[i]->napi));
1538
1539	if (adapter->msix_entries)
1540		igb_configure_msix(adapter);
1541	else
1542		igb_assign_vector(adapter->q_vector[0], 0);
1543
1544	/* Clear any pending interrupts. */
1545	rd32(E1000_ICR);
1546	igb_irq_enable(adapter);
1547
1548	/* notify VFs that reset has been completed */
1549	if (adapter->vfs_allocated_count) {
1550		u32 reg_data = rd32(E1000_CTRL_EXT);
1551		reg_data |= E1000_CTRL_EXT_PFRSTD;
1552		wr32(E1000_CTRL_EXT, reg_data);
1553	}
1554
1555	netif_tx_start_all_queues(adapter->netdev);
1556
1557	/* start the watchdog. */
1558	hw->mac.get_link_status = 1;
1559	schedule_work(&adapter->watchdog_task);
1560
1561	return 0;
1562}
1563
1564void igb_down(struct igb_adapter *adapter)
1565{
1566	struct net_device *netdev = adapter->netdev;
1567	struct e1000_hw *hw = &adapter->hw;
1568	u32 tctl, rctl;
1569	int i;
1570
1571	/* signal that we're down so the interrupt handler does not
1572	 * reschedule our watchdog timer */
1573	set_bit(__IGB_DOWN, &adapter->state);
1574
1575	/* disable receives in the hardware */
1576	rctl = rd32(E1000_RCTL);
1577	wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1578	/* flush and sleep below */
1579
1580	netif_tx_stop_all_queues(netdev);
1581
1582	/* disable transmits in the hardware */
1583	tctl = rd32(E1000_TCTL);
1584	tctl &= ~E1000_TCTL_EN;
1585	wr32(E1000_TCTL, tctl);
1586	/* flush both disables and wait for them to finish */
1587	wrfl();
1588	msleep(10);
1589
1590	for (i = 0; i < adapter->num_q_vectors; i++)
1591		napi_disable(&(adapter->q_vector[i]->napi));
1592
1593	igb_irq_disable(adapter);
1594
1595	del_timer_sync(&adapter->watchdog_timer);
1596	del_timer_sync(&adapter->phy_info_timer);
1597
1598	netif_carrier_off(netdev);
1599
1600	/* record the stats before reset*/
1601	spin_lock(&adapter->stats64_lock);
1602	igb_update_stats(adapter, &adapter->stats64);
1603	spin_unlock(&adapter->stats64_lock);
1604
1605	adapter->link_speed = 0;
1606	adapter->link_duplex = 0;
1607
1608	if (!pci_channel_offline(adapter->pdev))
1609		igb_reset(adapter);
1610	igb_clean_all_tx_rings(adapter);
1611	igb_clean_all_rx_rings(adapter);
1612#ifdef CONFIG_IGB_DCA
1613
1614	/* since we reset the hardware DCA settings were cleared */
1615	igb_setup_dca(adapter);
1616#endif
1617}
1618
1619void igb_reinit_locked(struct igb_adapter *adapter)
1620{
1621	WARN_ON(in_interrupt());
1622	while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1623		msleep(1);
1624	igb_down(adapter);
1625	igb_up(adapter);
1626	clear_bit(__IGB_RESETTING, &adapter->state);
1627}
1628
1629void igb_reset(struct igb_adapter *adapter)
1630{
1631	struct pci_dev *pdev = adapter->pdev;
1632	struct e1000_hw *hw = &adapter->hw;
1633	struct e1000_mac_info *mac = &hw->mac;
1634	struct e1000_fc_info *fc = &hw->fc;
1635	u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1636	u16 hwm;
1637
1638	/* Repartition Pba for greater than 9k mtu
1639	 * To take effect CTRL.RST is required.
1640	 */
1641	switch (mac->type) {
1642	case e1000_i350:
1643	case e1000_82580:
1644		pba = rd32(E1000_RXPBS);
1645		pba = igb_rxpbs_adjust_82580(pba);
1646		break;
1647	case e1000_82576:
1648		pba = rd32(E1000_RXPBS);
1649		pba &= E1000_RXPBS_SIZE_MASK_82576;
1650		break;
1651	case e1000_82575:
1652	case e1000_i210:
1653	case e1000_i211:
1654	default:
1655		pba = E1000_PBA_34K;
1656		break;
1657	}
1658
1659	if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1660	    (mac->type < e1000_82576)) {
1661		/* adjust PBA for jumbo frames */
1662		wr32(E1000_PBA, pba);
1663
1664		/* To maintain wire speed transmits, the Tx FIFO should be
1665		 * large enough to accommodate two full transmit packets,
1666		 * rounded up to the next 1KB and expressed in KB.  Likewise,
1667		 * the Rx FIFO should be large enough to accommodate at least
1668		 * one full receive packet and is similarly rounded up and
1669		 * expressed in KB. */
1670		pba = rd32(E1000_PBA);
1671		/* upper 16 bits has Tx packet buffer allocation size in KB */
1672		tx_space = pba >> 16;
1673		/* lower 16 bits has Rx packet buffer allocation size in KB */
1674		pba &= 0xffff;
1675		/* the tx fifo also stores 16 bytes of information about the tx
1676		 * but don't include ethernet FCS because hardware appends it */
1677		min_tx_space = (adapter->max_frame_size +
1678				sizeof(union e1000_adv_tx_desc) -
1679				ETH_FCS_LEN) * 2;
1680		min_tx_space = ALIGN(min_tx_space, 1024);
1681		min_tx_space >>= 10;
1682		/* software strips receive CRC, so leave room for it */
1683		min_rx_space = adapter->max_frame_size;
1684		min_rx_space = ALIGN(min_rx_space, 1024);
1685		min_rx_space >>= 10;
1686
1687		/* If current Tx allocation is less than the min Tx FIFO size,
1688		 * and the min Tx FIFO size is less than the current Rx FIFO
1689		 * allocation, take space away from current Rx allocation */
1690		if (tx_space < min_tx_space &&
1691		    ((min_tx_space - tx_space) < pba)) {
1692			pba = pba - (min_tx_space - tx_space);
1693
1694			/* if short on rx space, rx wins and must trump tx
1695			 * adjustment */
1696			if (pba < min_rx_space)
1697				pba = min_rx_space;
1698		}
1699		wr32(E1000_PBA, pba);
1700	}
1701
1702	/* flow control settings */
1703	/* The high water mark must be low enough to fit one full frame
1704	 * (or the size used for early receive) above it in the Rx FIFO.
1705	 * Set it to the lower of:
1706	 * - 90% of the Rx FIFO size, or
1707	 * - the full Rx FIFO size minus one full frame */
1708	hwm = min(((pba << 10) * 9 / 10),
1709			((pba << 10) - 2 * adapter->max_frame_size));
1710
1711	fc->high_water = hwm & 0xFFF0;	/* 16-byte granularity */
1712	fc->low_water = fc->high_water - 16;
1713	fc->pause_time = 0xFFFF;
1714	fc->send_xon = 1;
1715	fc->current_mode = fc->requested_mode;
1716
1717	/* disable receive for all VFs and wait one second */
1718	if (adapter->vfs_allocated_count) {
1719		int i;
1720		for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1721			adapter->vf_data[i].flags &= IGB_VF_FLAG_PF_SET_MAC;
1722
1723		/* ping all the active vfs to let them know we are going down */
1724		igb_ping_all_vfs(adapter);
1725
1726		/* disable transmits and receives */
1727		wr32(E1000_VFRE, 0);
1728		wr32(E1000_VFTE, 0);
1729	}
1730
1731	/* Allow time for pending master requests to run */
1732	hw->mac.ops.reset_hw(hw);
1733	wr32(E1000_WUC, 0);
1734
1735	if (hw->mac.ops.init_hw(hw))
1736		dev_err(&pdev->dev, "Hardware Error\n");
1737
1738	/*
1739	 * Flow control settings reset on hardware reset, so guarantee flow
1740	 * control is off when forcing speed.
1741	 */
1742	if (!hw->mac.autoneg)
1743		igb_force_mac_fc(hw);
1744
1745	igb_init_dmac(adapter, pba);
1746	if (!netif_running(adapter->netdev))
1747		igb_power_down_link(adapter);
1748
1749	igb_update_mng_vlan(adapter);
1750
1751	/* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1752	wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1753
1754	igb_get_phy_info(hw);
1755}
1756
1757static netdev_features_t igb_fix_features(struct net_device *netdev,
1758	netdev_features_t features)
1759{
1760	/*
1761	 * Since there is no support for separate rx/tx vlan accel
1762	 * enable/disable make sure tx flag is always in same state as rx.
1763	 */
1764	if (features & NETIF_F_HW_VLAN_RX)
1765		features |= NETIF_F_HW_VLAN_TX;
1766	else
1767		features &= ~NETIF_F_HW_VLAN_TX;
1768
1769	return features;
1770}
1771
1772static int igb_set_features(struct net_device *netdev,
1773	netdev_features_t features)
1774{
1775	netdev_features_t changed = netdev->features ^ features;
1776	struct igb_adapter *adapter = netdev_priv(netdev);
1777
1778	if (changed & NETIF_F_HW_VLAN_RX)
1779		igb_vlan_mode(netdev, features);
1780
1781	if (!(changed & NETIF_F_RXALL))
1782		return 0;
1783
1784	netdev->features = features;
1785
1786	if (netif_running(netdev))
1787		igb_reinit_locked(adapter);
1788	else
1789		igb_reset(adapter);
1790
1791	return 0;
1792}
1793
1794static const struct net_device_ops igb_netdev_ops = {
1795	.ndo_open		= igb_open,
1796	.ndo_stop		= igb_close,
1797	.ndo_start_xmit		= igb_xmit_frame,
1798	.ndo_get_stats64	= igb_get_stats64,
1799	.ndo_set_rx_mode	= igb_set_rx_mode,
1800	.ndo_set_mac_address	= igb_set_mac,
1801	.ndo_change_mtu		= igb_change_mtu,
1802	.ndo_do_ioctl		= igb_ioctl,
1803	.ndo_tx_timeout		= igb_tx_timeout,
1804	.ndo_validate_addr	= eth_validate_addr,
1805	.ndo_vlan_rx_add_vid	= igb_vlan_rx_add_vid,
1806	.ndo_vlan_rx_kill_vid	= igb_vlan_rx_kill_vid,
1807	.ndo_set_vf_mac		= igb_ndo_set_vf_mac,
1808	.ndo_set_vf_vlan	= igb_ndo_set_vf_vlan,
1809	.ndo_set_vf_tx_rate	= igb_ndo_set_vf_bw,
1810	.ndo_get_vf_config	= igb_ndo_get_vf_config,
1811#ifdef CONFIG_NET_POLL_CONTROLLER
1812	.ndo_poll_controller	= igb_netpoll,
1813#endif
1814	.ndo_fix_features	= igb_fix_features,
1815	.ndo_set_features	= igb_set_features,
1816};
1817
1818/**
1819 * igb_set_fw_version - Configure version string for ethtool
1820 * @adapter: adapter struct
1821 *
1822 **/
1823void igb_set_fw_version(struct igb_adapter *adapter)
1824{
1825	struct e1000_hw *hw = &adapter->hw;
1826	u16 eeprom_verh, eeprom_verl, comb_verh, comb_verl, comb_offset;
1827	u16 major, build, patch, fw_version;
1828	u32 etrack_id;
1829
1830	hw->nvm.ops.read(hw, 5, 1, &fw_version);
1831	if (adapter->hw.mac.type != e1000_i211) {
1832		hw->nvm.ops.read(hw, NVM_ETRACK_WORD, 1, &eeprom_verh);
1833		hw->nvm.ops.read(hw, (NVM_ETRACK_WORD + 1), 1, &eeprom_verl);
1834		etrack_id = (eeprom_verh << IGB_ETRACK_SHIFT) | eeprom_verl;
1835
1836		/* combo image version needs to be found */
1837		hw->nvm.ops.read(hw, NVM_COMB_VER_PTR, 1, &comb_offset);
1838		if ((comb_offset != 0x0) &&
1839		    (comb_offset != IGB_NVM_VER_INVALID)) {
1840			hw->nvm.ops.read(hw, (NVM_COMB_VER_OFF + comb_offset
1841					 + 1), 1, &comb_verh);
1842			hw->nvm.ops.read(hw, (NVM_COMB_VER_OFF + comb_offset),
1843					 1, &comb_verl);
1844
1845			/* Only display Option Rom if it exists and is valid */
1846			if ((comb_verh && comb_verl) &&
1847			    ((comb_verh != IGB_NVM_VER_INVALID) &&
1848			     (comb_verl != IGB_NVM_VER_INVALID))) {
1849				major = comb_verl >> IGB_COMB_VER_SHFT;
1850				build = (comb_verl << IGB_COMB_VER_SHFT) |
1851					(comb_verh >> IGB_COMB_VER_SHFT);
1852				patch = comb_verh & IGB_COMB_VER_MASK;
1853				snprintf(adapter->fw_version,
1854					 sizeof(adapter->fw_version),
1855					 "%d.%d%d, 0x%08x, %d.%d.%d",
1856					 (fw_version & IGB_MAJOR_MASK) >>
1857					 IGB_MAJOR_SHIFT,
1858					 (fw_version & IGB_MINOR_MASK) >>
1859					 IGB_MINOR_SHIFT,
1860					 (fw_version & IGB_BUILD_MASK),
1861					 etrack_id, major, build, patch);
1862				goto out;
1863			}
1864		}
1865		snprintf(adapter->fw_version, sizeof(adapter->fw_version),
1866			 "%d.%d%d, 0x%08x",
1867			 (fw_version & IGB_MAJOR_MASK) >> IGB_MAJOR_SHIFT,
1868			 (fw_version & IGB_MINOR_MASK) >> IGB_MINOR_SHIFT,
1869			 (fw_version & IGB_BUILD_MASK), etrack_id);
1870	} else {
1871		snprintf(adapter->fw_version, sizeof(adapter->fw_version),
1872			 "%d.%d%d",
1873			 (fw_version & IGB_MAJOR_MASK) >> IGB_MAJOR_SHIFT,
1874			 (fw_version & IGB_MINOR_MASK) >> IGB_MINOR_SHIFT,
1875			 (fw_version & IGB_BUILD_MASK));
1876	}
1877out:
1878	return;
1879}
1880
1881/**
1882 * igb_probe - Device Initialization Routine
1883 * @pdev: PCI device information struct
1884 * @ent: entry in igb_pci_tbl
1885 *
1886 * Returns 0 on success, negative on failure
1887 *
1888 * igb_probe initializes an adapter identified by a pci_dev structure.
1889 * The OS initialization, configuring of the adapter private structure,
1890 * and a hardware reset occur.
1891 **/
1892static int __devinit igb_probe(struct pci_dev *pdev,
1893			       const struct pci_device_id *ent)
1894{
1895	struct net_device *netdev;
1896	struct igb_adapter *adapter;
1897	struct e1000_hw *hw;
1898	u16 eeprom_data = 0;
1899	s32 ret_val;
1900	static int global_quad_port_a; /* global quad port a indication */
1901	const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1902	unsigned long mmio_start, mmio_len;
1903	int err, pci_using_dac;
1904	u16 eeprom_apme_mask = IGB_EEPROM_APME;
1905	u8 part_str[E1000_PBANUM_LENGTH];
1906
1907	/* Catch broken hardware that put the wrong VF device ID in
1908	 * the PCIe SR-IOV capability.
1909	 */
1910	if (pdev->is_virtfn) {
1911		WARN(1, KERN_ERR "%s (%hx:%hx) should not be a VF!\n",
1912			pci_name(pdev), pdev->vendor, pdev->device);
1913		return -EINVAL;
1914	}
1915
1916	err = pci_enable_device_mem(pdev);
1917	if (err)
1918		return err;
1919
1920	pci_using_dac = 0;
1921	err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
1922	if (!err) {
1923		err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
1924		if (!err)
1925			pci_using_dac = 1;
1926	} else {
1927		err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
1928		if (err) {
1929			err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
1930			if (err) {
1931				dev_err(&pdev->dev, "No usable DMA "
1932					"configuration, aborting\n");
1933				goto err_dma;
1934			}
1935		}
1936	}
1937
1938	err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1939	                                   IORESOURCE_MEM),
1940	                                   igb_driver_name);
1941	if (err)
1942		goto err_pci_reg;
1943
1944	pci_enable_pcie_error_reporting(pdev);
1945
1946	pci_set_master(pdev);
1947	pci_save_state(pdev);
1948
1949	err = -ENOMEM;
1950	netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1951				   IGB_MAX_TX_QUEUES);
1952	if (!netdev)
1953		goto err_alloc_etherdev;
1954
1955	SET_NETDEV_DEV(netdev, &pdev->dev);
1956
1957	pci_set_drvdata(pdev, netdev);
1958	adapter = netdev_priv(netdev);
1959	adapter->netdev = netdev;
1960	adapter->pdev = pdev;
1961	hw = &adapter->hw;
1962	hw->back = adapter;
1963	adapter->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE);
1964
1965	mmio_start = pci_resource_start(pdev, 0);
1966	mmio_len = pci_resource_len(pdev, 0);
1967
1968	err = -EIO;
1969	hw->hw_addr = ioremap(mmio_start, mmio_len);
1970	if (!hw->hw_addr)
1971		goto err_ioremap;
1972
1973	netdev->netdev_ops = &igb_netdev_ops;
1974	igb_set_ethtool_ops(netdev);
1975	netdev->watchdog_timeo = 5 * HZ;
1976
1977	strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1978
1979	netdev->mem_start = mmio_start;
1980	netdev->mem_end = mmio_start + mmio_len;
1981
1982	/* PCI config space info */
1983	hw->vendor_id = pdev->vendor;
1984	hw->device_id = pdev->device;
1985	hw->revision_id = pdev->revision;
1986	hw->subsystem_vendor_id = pdev->subsystem_vendor;
1987	hw->subsystem_device_id = pdev->subsystem_device;
1988
1989	/* Copy the default MAC, PHY and NVM function pointers */
1990	memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1991	memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1992	memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1993	/* Initialize skew-specific constants */
1994	err = ei->get_invariants(hw);
1995	if (err)
1996		goto err_sw_init;
1997
1998	/* setup the private structure */
1999	err = igb_sw_init(adapter);
2000	if (err)
2001		goto err_sw_init;
2002
2003	igb_get_bus_info_pcie(hw);
2004
2005	hw->phy.autoneg_wait_to_complete = false;
2006
2007	/* Copper options */
2008	if (hw->phy.media_type == e1000_media_type_copper) {
2009		hw->phy.mdix = AUTO_ALL_MODES;
2010		hw->phy.disable_polarity_correction = false;
2011		hw->phy.ms_type = e1000_ms_hw_default;
2012	}
2013
2014	if (igb_check_reset_block(hw))
2015		dev_info(&pdev->dev,
2016			"PHY reset is blocked due to SOL/IDER session.\n");
2017
2018	/*
2019	 * features is initialized to 0 in allocation, it might have bits
2020	 * set by igb_sw_init so we should use an or instead of an
2021	 * assignment.
2022	 */
2023	netdev->features |= NETIF_F_SG |
2024			    NETIF_F_IP_CSUM |
2025			    NETIF_F_IPV6_CSUM |
2026			    NETIF_F_TSO |
2027			    NETIF_F_TSO6 |
2028			    NETIF_F_RXHASH |
2029			    NETIF_F_RXCSUM |
2030			    NETIF_F_HW_VLAN_RX |
2031			    NETIF_F_HW_VLAN_TX;
2032
2033	/* copy netdev features into list of user selectable features */
2034	netdev->hw_features |= netdev->features;
2035	netdev->hw_features |= NETIF_F_RXALL;
2036
2037	/* set this bit last since it cannot be part of hw_features */
2038	netdev->features |= NETIF_F_HW_VLAN_FILTER;
2039
2040	netdev->vlan_features |= NETIF_F_TSO |
2041				 NETIF_F_TSO6 |
2042				 NETIF_F_IP_CSUM |
2043				 NETIF_F_IPV6_CSUM |
2044				 NETIF_F_SG;
2045
2046	netdev->priv_flags |= IFF_SUPP_NOFCS;
2047
2048	if (pci_using_dac) {
2049		netdev->features |= NETIF_F_HIGHDMA;
2050		netdev->vlan_features |= NETIF_F_HIGHDMA;
2051	}
2052
2053	if (hw->mac.type >= e1000_82576) {
2054		netdev->hw_features |= NETIF_F_SCTP_CSUM;
2055		netdev->features |= NETIF_F_SCTP_CSUM;
2056	}
2057
2058	netdev->priv_flags |= IFF_UNICAST_FLT;
2059
2060	adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
2061
2062	/* before reading the NVM, reset the controller to put the device in a
2063	 * known good starting state */
2064	hw->mac.ops.reset_hw(hw);
2065
2066	/*
2067	 * make sure the NVM is good , i211 parts have special NVM that
2068	 * doesn't contain a checksum
2069	 */
2070	if (hw->mac.type != e1000_i211) {
2071		if (hw->nvm.ops.validate(hw) < 0) {
2072			dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
2073			err = -EIO;
2074			goto err_eeprom;
2075		}
2076	}
2077
2078	/* copy the MAC address out of the NVM */
2079	if (hw->mac.ops.read_mac_addr(hw))
2080		dev_err(&pdev->dev, "NVM Read Error\n");
2081
2082	memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
2083	memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
2084
2085	if (!is_valid_ether_addr(netdev->perm_addr)) {
2086		dev_err(&pdev->dev, "Invalid MAC Address\n");
2087		err = -EIO;
2088		goto err_eeprom;
2089	}
2090
2091	/* get firmware version for ethtool -i */
2092	igb_set_fw_version(adapter);
2093
2094	setup_timer(&adapter->watchdog_timer, igb_watchdog,
2095	            (unsigned long) adapter);
2096	setup_timer(&adapter->phy_info_timer, igb_update_phy_info,
2097	            (unsigned long) adapter);
2098
2099	INIT_WORK(&adapter->reset_task, igb_reset_task);
2100	INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
2101
2102	/* Initialize link properties that are user-changeable */
2103	adapter->fc_autoneg = true;
2104	hw->mac.autoneg = true;
2105	hw->phy.autoneg_advertised = 0x2f;
2106
2107	hw->fc.requested_mode = e1000_fc_default;
2108	hw->fc.current_mode = e1000_fc_default;
2109
2110	igb_validate_mdi_setting(hw);
2111
2112	/* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
2113	 * enable the ACPI Magic Packet filter
2114	 */
2115
2116	if (hw->bus.func == 0)
2117		hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
2118	else if (hw->mac.type >= e1000_82580)
2119		hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
2120		                 NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
2121		                 &eeprom_data);
2122	else if (hw->bus.func == 1)
2123		hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
2124
2125	if (eeprom_data & eeprom_apme_mask)
2126		adapter->eeprom_wol |= E1000_WUFC_MAG;
2127
2128	/* now that we have the eeprom settings, apply the special cases where
2129	 * the eeprom may be wrong or the board simply won't support wake on
2130	 * lan on a particular port */
2131	switch (pdev->device) {
2132	case E1000_DEV_ID_82575GB_QUAD_COPPER:
2133		adapter->eeprom_wol = 0;
2134		break;
2135	case E1000_DEV_ID_82575EB_FIBER_SERDES:
2136	case E1000_DEV_ID_82576_FIBER:
2137	case E1000_DEV_ID_82576_SERDES:
2138		/* Wake events only supported on port A for dual fiber
2139		 * regardless of eeprom setting */
2140		if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
2141			adapter->eeprom_wol = 0;
2142		break;
2143	case E1000_DEV_ID_82576_QUAD_COPPER:
2144	case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
2145		/* if quad port adapter, disable WoL on all but port A */
2146		if (global_quad_port_a != 0)
2147			adapter->eeprom_wol = 0;
2148		else
2149			adapter->flags |= IGB_FLAG_QUAD_PORT_A;
2150		/* Reset for multiple quad port adapters */
2151		if (++global_quad_port_a == 4)
2152			global_quad_port_a = 0;
2153		break;
2154	}
2155
2156	/* initialize the wol settings based on the eeprom settings */
2157	adapter->wol = adapter->eeprom_wol;
2158	device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
2159
2160	/* reset the hardware with the new settings */
2161	igb_reset(adapter);
2162
2163	/* let the f/w know that the h/w is now under the control of the
2164	 * driver. */
2165	igb_get_hw_control(adapter);
2166
2167	strcpy(netdev->name, "eth%d");
2168	err = register_netdev(netdev);
2169	if (err)
2170		goto err_register;
2171
2172	/* carrier off reporting is important to ethtool even BEFORE open */
2173	netif_carrier_off(netdev);
2174
2175#ifdef CONFIG_IGB_DCA
2176	if (dca_add_requester(&pdev->dev) == 0) {
2177		adapter->flags |= IGB_FLAG_DCA_ENABLED;
2178		dev_info(&pdev->dev, "DCA enabled\n");
2179		igb_setup_dca(adapter);
2180	}
2181
2182#endif
2183#ifdef CONFIG_IGB_PTP
2184	/* do hw tstamp init after resetting */
2185	igb_ptp_init(adapter);
2186
2187#endif
2188	dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
2189	/* print bus type/speed/width info */
2190	dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
2191		 netdev->name,
2192		 ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
2193		  (hw->bus.speed == e1000_bus_speed_5000) ? "5.0Gb/s" :
2194		                                            "unknown"),
2195		 ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
2196		  (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
2197		  (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
2198		   "unknown"),
2199		 netdev->dev_addr);
2200
2201	ret_val = igb_read_part_string(hw, part_str, E1000_PBANUM_LENGTH);
2202	if (ret_val)
2203		strcpy(part_str, "Unknown");
2204	dev_info(&pdev->dev, "%s: PBA No: %s\n", netdev->name, part_str);
2205	dev_info(&pdev->dev,
2206		"Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
2207		adapter->msix_entries ? "MSI-X" :
2208		(adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
2209		adapter->num_rx_queues, adapter->num_tx_queues);
2210	switch (hw->mac.type) {
2211	case e1000_i350:
2212	case e1000_i210:
2213	case e1000_i211:
2214		igb_set_eee_i350(hw);
2215		break;
2216	default:
2217		break;
2218	}
2219
2220	pm_runtime_put_noidle(&pdev->dev);
2221	return 0;
2222
2223err_register:
2224	igb_release_hw_control(adapter);
2225err_eeprom:
2226	if (!igb_check_reset_block(hw))
2227		igb_reset_phy(hw);
2228
2229	if (hw->flash_address)
2230		iounmap(hw->flash_address);
2231err_sw_init:
2232	igb_clear_interrupt_scheme(adapter);
2233	iounmap(hw->hw_addr);
2234err_ioremap:
2235	free_netdev(netdev);
2236err_alloc_etherdev:
2237	pci_release_selected_regions(pdev,
2238	                             pci_select_bars(pdev, IORESOURCE_MEM));
2239err_pci_reg:
2240err_dma:
2241	pci_disable_device(pdev);
2242	return err;
2243}
2244
2245/**
2246 * igb_remove - Device Removal Routine
2247 * @pdev: PCI device information struct
2248 *
2249 * igb_remove is called by the PCI subsystem to alert the driver
2250 * that it should release a PCI device.  The could be caused by a
2251 * Hot-Plug event, or because the driver is going to be removed from
2252 * memory.
2253 **/
2254static void __devexit igb_remove(struct pci_dev *pdev)
2255{
2256	struct net_device *netdev = pci_get_drvdata(pdev);
2257	struct igb_adapter *adapter = netdev_priv(netdev);
2258	struct e1000_hw *hw = &adapter->hw;
2259
2260	pm_runtime_get_noresume(&pdev->dev);
2261#ifdef CONFIG_IGB_PTP
2262	igb_ptp_remove(adapter);
2263
2264#endif
2265	/*
2266	 * The watchdog timer may be rescheduled, so explicitly
2267	 * disable watchdog from being rescheduled.
2268	 */
2269	set_bit(__IGB_DOWN, &adapter->state);
2270	del_timer_sync(&adapter->watchdog_timer);
2271	del_timer_sync(&adapter->phy_info_timer);
2272
2273	cancel_work_sync(&adapter->reset_task);
2274	cancel_work_sync(&adapter->watchdog_task);
2275
2276#ifdef CONFIG_IGB_DCA
2277	if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
2278		dev_info(&pdev->dev, "DCA disabled\n");
2279		dca_remove_requester(&pdev->dev);
2280		adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
2281		wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
2282	}
2283#endif
2284
2285	/* Release control of h/w to f/w.  If f/w is AMT enabled, this
2286	 * would have already happened in close and is redundant. */
2287	igb_release_hw_control(adapter);
2288
2289	unregister_netdev(netdev);
2290
2291	igb_clear_interrupt_scheme(adapter);
2292
2293#ifdef CONFIG_PCI_IOV
2294	/* reclaim resources allocated to VFs */
2295	if (adapter->vf_data) {
2296		/* disable iov and allow time for transactions to clear */
2297		if (!igb_check_vf_assignment(adapter)) {
2298			pci_disable_sriov(pdev);
2299			msleep(500);
2300		} else {
2301			dev_info(&pdev->dev, "VF(s) assigned to guests!\n");
2302		}
2303
2304		kfree(adapter->vf_data);
2305		adapter->vf_data = NULL;
2306		wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
2307		wrfl();
2308		msleep(100);
2309		dev_info(&pdev->dev, "IOV Disabled\n");
2310	}
2311#endif
2312
2313	iounmap(hw->hw_addr);
2314	if (hw->flash_address)
2315		iounmap(hw->flash_address);
2316	pci_release_selected_regions(pdev,
2317	                             pci_select_bars(pdev, IORESOURCE_MEM));
2318
2319	kfree(adapter->shadow_vfta);
2320	free_netdev(netdev);
2321
2322	pci_disable_pcie_error_reporting(pdev);
2323
2324	pci_disable_device(pdev);
2325}
2326
2327/**
2328 * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
2329 * @adapter: board private structure to initialize
2330 *
2331 * This function initializes the vf specific data storage and then attempts to
2332 * allocate the VFs.  The reason for ordering it this way is because it is much
2333 * mor expensive time wise to disable SR-IOV than it is to allocate and free
2334 * the memory for the VFs.
2335 **/
2336static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
2337{
2338#ifdef CONFIG_PCI_IOV
2339	struct pci_dev *pdev = adapter->pdev;
2340	struct e1000_hw *hw = &adapter->hw;
2341	int old_vfs = igb_find_enabled_vfs(adapter);
2342	int i;
2343
2344	/* Virtualization features not supported on i210 family. */
2345	if ((hw->mac.type == e1000_i210) || (hw->mac.type == e1000_i211))
2346		return;
2347
2348	if (old_vfs) {
2349		dev_info(&pdev->dev, "%d pre-allocated VFs found - override "
2350			 "max_vfs setting of %d\n", old_vfs, max_vfs);
2351		adapter->vfs_allocated_count = old_vfs;
2352	}
2353
2354	if (!adapter->vfs_allocated_count)
2355		return;
2356
2357	adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
2358				sizeof(struct vf_data_storage), GFP_KERNEL);
2359
2360	/* if allocation failed then we do not support SR-IOV */
2361	if (!adapter->vf_data) {
2362		adapter->vfs_allocated_count = 0;
2363		dev_err(&pdev->dev, "Unable to allocate memory for VF "
2364			"Data Storage\n");
2365		goto out;
2366	}
2367
2368	if (!old_vfs) {
2369		if (pci_enable_sriov(pdev, adapter->vfs_allocated_count))
2370			goto err_out;
2371	}
2372	dev_info(&pdev->dev, "%d VFs allocated\n",
2373		 adapter->vfs_allocated_count);
2374	for (i = 0; i < adapter->vfs_allocated_count; i++)
2375		igb_vf_configure(adapter, i);
2376
2377	/* DMA Coalescing is not supported in IOV mode. */
2378	adapter->flags &= ~IGB_FLAG_DMAC;
2379	goto out;
2380err_out:
2381	kfree(adapter->vf_data);
2382	adapter->vf_data = NULL;
2383	adapter->vfs_allocated_count = 0;
2384out:
2385	return;
2386#endif /* CONFIG_PCI_IOV */
2387}
2388
2389/**
2390 * igb_sw_init - Initialize general software structures (struct igb_adapter)
2391 * @adapter: board private structure to initialize
2392 *
2393 * igb_sw_init initializes the Adapter private data structure.
2394 * Fields are initialized based on PCI device information and
2395 * OS network device settings (MTU size).
2396 **/
2397static int __devinit igb_sw_init(struct igb_adapter *adapter)
2398{
2399	struct e1000_hw *hw = &adapter->hw;
2400	struct net_device *netdev = adapter->netdev;
2401	struct pci_dev *pdev = adapter->pdev;
2402	u32 max_rss_queues;
2403
2404	pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
2405
2406	/* set default ring sizes */
2407	adapter->tx_ring_count = IGB_DEFAULT_TXD;
2408	adapter->rx_ring_count = IGB_DEFAULT_RXD;
2409
2410	/* set default ITR values */
2411	adapter->rx_itr_setting = IGB_DEFAULT_ITR;
2412	adapter->tx_itr_setting = IGB_DEFAULT_ITR;
2413
2414	/* set default work limits */
2415	adapter->tx_work_limit = IGB_DEFAULT_TX_WORK;
2416
2417	adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN +
2418				  VLAN_HLEN;
2419	adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
2420
2421	adapter->node = -1;
2422
2423	spin_lock_init(&adapter->stats64_lock);
2424#ifdef CONFIG_PCI_IOV
2425	switch (hw->mac.type) {
2426	case e1000_82576:
2427	case e1000_i350:
2428		if (max_vfs > 7) {
2429			dev_warn(&pdev->dev,
2430				 "Maximum of 7 VFs per PF, using max\n");
2431			adapter->vfs_allocated_count = 7;
2432		} else
2433			adapter->vfs_allocated_count = max_vfs;
2434		break;
2435	default:
2436		break;
2437	}
2438#endif /* CONFIG_PCI_IOV */
2439
2440	/* Determine the maximum number of RSS queues supported. */
2441	switch (hw->mac.type) {
2442	case e1000_i211:
2443		max_rss_queues = IGB_MAX_RX_QUEUES_I211;
2444		break;
2445	case e1000_82575:
2446	case e1000_i210:
2447		max_rss_queues = IGB_MAX_RX_QUEUES_82575;
2448		break;
2449	case e1000_i350:
2450		/* I350 cannot do RSS and SR-IOV at the same time */
2451		if (!!adapter->vfs_allocated_count) {
2452			max_rss_queues = 1;
2453			break;
2454		}
2455		/* fall through */
2456	case e1000_82576:
2457		if (!!adapter->vfs_allocated_count) {
2458			max_rss_queues = 2;
2459			break;
2460		}
2461		/* fall through */
2462	case e1000_82580:
2463	default:
2464		max_rss_queues = IGB_MAX_RX_QUEUES;
2465		break;
2466	}
2467
2468	adapter->rss_queues = min_t(u32, max_rss_queues, num_online_cpus());
2469
2470	/* Determine if we need to pair queues. */
2471	switch (hw->mac.type) {
2472	case e1000_82575:
2473	case e1000_i211:
2474		/* Device supports enough interrupts without queue pairing. */
2475		break;
2476	case e1000_82576:
2477		/*
2478		 * If VFs are going to be allocated with RSS queues then we
2479		 * should pair the queues in order to conserve interrupts due
2480		 * to limited supply.
2481		 */
2482		if ((adapter->rss_queues > 1) &&
2483		    (adapter->vfs_allocated_count > 6))
2484			adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
2485		/* fall through */
2486	case e1000_82580:
2487	case e1000_i350:
2488	case e1000_i210:
2489	default:
2490		/*
2491		 * If rss_queues > half of max_rss_queues, pair the queues in
2492		 * order to conserve interrupts due to limited supply.
2493		 */
2494		if (adapter->rss_queues > (max_rss_queues / 2))
2495			adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
2496		break;
2497	}
2498
2499	/* Setup and initialize a copy of the hw vlan table array */
2500	adapter->shadow_vfta = kzalloc(sizeof(u32) *
2501				E1000_VLAN_FILTER_TBL_SIZE,
2502				GFP_ATOMIC);
2503
2504	/* This call may decrease the number of queues */
2505	if (igb_init_interrupt_scheme(adapter)) {
2506		dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
2507		return -ENOMEM;
2508	}
2509
2510	igb_probe_vfs(adapter);
2511
2512	/* Explicitly disable IRQ since the NIC can be in any state. */
2513	igb_irq_disable(adapter);
2514
2515	if (hw->mac.type >= e1000_i350)
2516		adapter->flags &= ~IGB_FLAG_DMAC;
2517
2518	set_bit(__IGB_DOWN, &adapter->state);
2519	return 0;
2520}
2521
2522/**
2523 * igb_open - Called when a network interface is made active
2524 * @netdev: network interface device structure
2525 *
2526 * Returns 0 on success, negative value on failure
2527 *
2528 * The open entry point is called when a network interface is made
2529 * active by the system (IFF_UP).  At this point all resources needed
2530 * for transmit and receive operations are allocated, the interrupt
2531 * handler is registered with the OS, the watchdog timer is started,
2532 * and the stack is notified that the interface is ready.
2533 **/
2534static int __igb_open(struct net_device *netdev, bool resuming)
2535{
2536	struct igb_adapter *adapter = netdev_priv(netdev);
2537	struct e1000_hw *hw = &adapter->hw;
2538	struct pci_dev *pdev = adapter->pdev;
2539	int err;
2540	int i;
2541
2542	/* disallow open during test */
2543	if (test_bit(__IGB_TESTING, &adapter->state)) {
2544		WARN_ON(resuming);
2545		return -EBUSY;
2546	}
2547
2548	if (!resuming)
2549		pm_runtime_get_sync(&pdev->dev);
2550
2551	netif_carrier_off(netdev);
2552
2553	/* allocate transmit descriptors */
2554	err = igb_setup_all_tx_resources(adapter);
2555	if (err)
2556		goto err_setup_tx;
2557
2558	/* allocate receive descriptors */
2559	err = igb_setup_all_rx_resources(adapter);
2560	if (err)
2561		goto err_setup_rx;
2562
2563	igb_power_up_link(adapter);
2564
2565	/* before we allocate an interrupt, we must be ready to handle it.
2566	 * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2567	 * as soon as we call pci_request_irq, so we have to setup our
2568	 * clean_rx handler before we do so.  */
2569	igb_configure(adapter);
2570
2571	err = igb_request_irq(adapter);
2572	if (err)
2573		goto err_req_irq;
2574
2575	/* From here on the code is the same as igb_up() */
2576	clear_bit(__IGB_DOWN, &adapter->state);
2577
2578	for (i = 0; i < adapter->num_q_vectors; i++)
2579		napi_enable(&(adapter->q_vector[i]->napi));
2580
2581	/* Clear any pending interrupts. */
2582	rd32(E1000_ICR);
2583
2584	igb_irq_enable(adapter);
2585
2586	/* notify VFs that reset has been completed */
2587	if (adapter->vfs_allocated_count) {
2588		u32 reg_data = rd32(E1000_CTRL_EXT);
2589		reg_data |= E1000_CTRL_EXT_PFRSTD;
2590		wr32(E1000_CTRL_EXT, reg_data);
2591	}
2592
2593	netif_tx_start_all_queues(netdev);
2594
2595	if (!resuming)
2596		pm_runtime_put(&pdev->dev);
2597
2598	/* start the watchdog. */
2599	hw->mac.get_link_status = 1;
2600	schedule_work(&adapter->watchdog_task);
2601
2602	return 0;
2603
2604err_req_irq:
2605	igb_release_hw_control(adapter);
2606	igb_power_down_link(adapter);
2607	igb_free_all_rx_resources(adapter);
2608err_setup_rx:
2609	igb_free_all_tx_resources(adapter);
2610err_setup_tx:
2611	igb_reset(adapter);
2612	if (!resuming)
2613		pm_runtime_put(&pdev->dev);
2614
2615	return err;
2616}
2617
2618static int igb_open(struct net_device *netdev)
2619{
2620	return __igb_open(netdev, false);
2621}
2622
2623/**
2624 * igb_close - Disables a network interface
2625 * @netdev: network interface device structure
2626 *
2627 * Returns 0, this is not allowed to fail
2628 *
2629 * The close entry point is called when an interface is de-activated
2630 * by the OS.  The hardware is still under the driver's control, but
2631 * needs to be disabled.  A global MAC reset is issued to stop the
2632 * hardware, and all transmit and receive resources are freed.
2633 **/
2634static int __igb_close(struct net_device *netdev, bool suspending)
2635{
2636	struct igb_adapter *adapter = netdev_priv(netdev);
2637	struct pci_dev *pdev = adapter->pdev;
2638
2639	WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
2640
2641	if (!suspending)
2642		pm_runtime_get_sync(&pdev->dev);
2643
2644	igb_down(adapter);
2645	igb_free_irq(adapter);
2646
2647	igb_free_all_tx_resources(adapter);
2648	igb_free_all_rx_resources(adapter);
2649
2650	if (!suspending)
2651		pm_runtime_put_sync(&pdev->dev);
2652	return 0;
2653}
2654
2655static int igb_close(struct net_device *netdev)
2656{
2657	return __igb_close(netdev, false);
2658}
2659
2660/**
2661 * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2662 * @tx_ring: tx descriptor ring (for a specific queue) to setup
2663 *
2664 * Return 0 on success, negative on failure
2665 **/
2666int igb_setup_tx_resources(struct igb_ring *tx_ring)
2667{
2668	struct device *dev = tx_ring->dev;
2669	int orig_node = dev_to_node(dev);
2670	int size;
2671
2672	size = sizeof(struct igb_tx_buffer) * tx_ring->count;
2673	tx_ring->tx_buffer_info = vzalloc_node(size, tx_ring->numa_node);
2674	if (!tx_ring->tx_buffer_info)
2675		tx_ring->tx_buffer_info = vzalloc(size);
2676	if (!tx_ring->tx_buffer_info)
2677		goto err;
2678
2679	/* round up to nearest 4K */
2680	tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2681	tx_ring->size = ALIGN(tx_ring->size, 4096);
2682
2683	set_dev_node(dev, tx_ring->numa_node);
2684	tx_ring->desc = dma_alloc_coherent(dev,
2685					   tx_ring->size,
2686					   &tx_ring->dma,
2687					   GFP_KERNEL);
2688	set_dev_node(dev, orig_node);
2689	if (!tx_ring->desc)
2690		tx_ring->desc = dma_alloc_coherent(dev,
2691						   tx_ring->size,
2692						   &tx_ring->dma,
2693						   GFP_KERNEL);
2694
2695	if (!tx_ring->desc)
2696		goto err;
2697
2698	tx_ring->next_to_use = 0;
2699	tx_ring->next_to_clean = 0;
2700
2701	return 0;
2702
2703err:
2704	vfree(tx_ring->tx_buffer_info);
2705	dev_err(dev,
2706		"Unable to allocate memory for the transmit descriptor ring\n");
2707	return -ENOMEM;
2708}
2709
2710/**
2711 * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2712 *				  (Descriptors) for all queues
2713 * @adapter: board private structure
2714 *
2715 * Return 0 on success, negative on failure
2716 **/
2717static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2718{
2719	struct pci_dev *pdev = adapter->pdev;
2720	int i, err = 0;
2721
2722	for (i = 0; i < adapter->num_tx_queues; i++) {
2723		err = igb_setup_tx_resources(adapter->tx_ring[i]);
2724		if (err) {
2725			dev_err(&pdev->dev,
2726				"Allocation for Tx Queue %u failed\n", i);
2727			for (i--; i >= 0; i--)
2728				igb_free_tx_resources(adapter->tx_ring[i]);
2729			break;
2730		}
2731	}
2732
2733	return err;
2734}
2735
2736/**
2737 * igb_setup_tctl - configure the transmit control registers
2738 * @adapter: Board private structure
2739 **/
2740void igb_setup_tctl(struct igb_adapter *adapter)
2741{
2742	struct e1000_hw *hw = &adapter->hw;
2743	u32 tctl;
2744
2745	/* disable queue 0 which is enabled by default on 82575 and 82576 */
2746	wr32(E1000_TXDCTL(0), 0);
2747
2748	/* Program the Transmit Control Register */
2749	tctl = rd32(E1000_TCTL);
2750	tctl &= ~E1000_TCTL_CT;
2751	tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2752		(E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2753
2754	igb_config_collision_dist(hw);
2755
2756	/* Enable transmits */
2757	tctl |= E1000_TCTL_EN;
2758
2759	wr32(E1000_TCTL, tctl);
2760}
2761
2762/**
2763 * igb_configure_tx_ring - Configure transmit ring after Reset
2764 * @adapter: board private structure
2765 * @ring: tx ring to configure
2766 *
2767 * Configure a transmit ring after a reset.
2768 **/
2769void igb_configure_tx_ring(struct igb_adapter *adapter,
2770                           struct igb_ring *ring)
2771{
2772	struct e1000_hw *hw = &adapter->hw;
2773	u32 txdctl = 0;
2774	u64 tdba = ring->dma;
2775	int reg_idx = ring->reg_idx;
2776
2777	/* disable the queue */
2778	wr32(E1000_TXDCTL(reg_idx), 0);
2779	wrfl();
2780	mdelay(10);
2781
2782	wr32(E1000_TDLEN(reg_idx),
2783	                ring->count * sizeof(union e1000_adv_tx_desc));
2784	wr32(E1000_TDBAL(reg_idx),
2785	                tdba & 0x00000000ffffffffULL);
2786	wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2787
2788	ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2789	wr32(E1000_TDH(reg_idx), 0);
2790	writel(0, ring->tail);
2791
2792	txdctl |= IGB_TX_PTHRESH;
2793	txdctl |= IGB_TX_HTHRESH << 8;
2794	txdctl |= IGB_TX_WTHRESH << 16;
2795
2796	txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2797	wr32(E1000_TXDCTL(reg_idx), txdctl);
2798}
2799
2800/**
2801 * igb_configure_tx - Configure transmit Unit after Reset
2802 * @adapter: board private structure
2803 *
2804 * Configure the Tx unit of the MAC after a reset.
2805 **/
2806static void igb_configure_tx(struct igb_adapter *adapter)
2807{
2808	int i;
2809
2810	for (i = 0; i < adapter->num_tx_queues; i++)
2811		igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
2812}
2813
2814/**
2815 * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2816 * @rx_ring:    rx descriptor ring (for a specific queue) to setup
2817 *
2818 * Returns 0 on success, negative on failure
2819 **/
2820int igb_setup_rx_resources(struct igb_ring *rx_ring)
2821{
2822	struct device *dev = rx_ring->dev;
2823	int orig_node = dev_to_node(dev);
2824	int size, desc_len;
2825
2826	size = sizeof(struct igb_rx_buffer) * rx_ring->count;
2827	rx_ring->rx_buffer_info = vzalloc_node(size, rx_ring->numa_node);
2828	if (!rx_ring->rx_buffer_info)
2829		rx_ring->rx_buffer_info = vzalloc(size);
2830	if (!rx_ring->rx_buffer_info)
2831		goto err;
2832
2833	desc_len = sizeof(union e1000_adv_rx_desc);
2834
2835	/* Round up to nearest 4K */
2836	rx_ring->size = rx_ring->count * desc_len;
2837	rx_ring->size = ALIGN(rx_ring->size, 4096);
2838
2839	set_dev_node(dev, rx_ring->numa_node);
2840	rx_ring->desc = dma_alloc_coherent(dev,
2841					   rx_ring->size,
2842					   &rx_ring->dma,
2843					   GFP_KERNEL);
2844	set_dev_node(dev, orig_node);
2845	if (!rx_ring->desc)
2846		rx_ring->desc = dma_alloc_coherent(dev,
2847						   rx_ring->size,
2848						   &rx_ring->dma,
2849						   GFP_KERNEL);
2850
2851	if (!rx_ring->desc)
2852		goto err;
2853
2854	rx_ring->next_to_clean = 0;
2855	rx_ring->next_to_use = 0;
2856
2857	return 0;
2858
2859err:
2860	vfree(rx_ring->rx_buffer_info);
2861	rx_ring->rx_buffer_info = NULL;
2862	dev_err(dev, "Unable to allocate memory for the receive descriptor"
2863		" ring\n");
2864	return -ENOMEM;
2865}
2866
2867/**
2868 * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2869 *				  (Descriptors) for all queues
2870 * @adapter: board private structure
2871 *
2872 * Return 0 on success, negative on failure
2873 **/
2874static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2875{
2876	struct pci_dev *pdev = adapter->pdev;
2877	int i, err = 0;
2878
2879	for (i = 0; i < adapter->num_rx_queues; i++) {
2880		err = igb_setup_rx_resources(adapter->rx_ring[i]);
2881		if (err) {
2882			dev_err(&pdev->dev,
2883				"Allocation for Rx Queue %u failed\n", i);
2884			for (i--; i >= 0; i--)
2885				igb_free_rx_resources(adapter->rx_ring[i]);
2886			break;
2887		}
2888	}
2889
2890	return err;
2891}
2892
2893/**
2894 * igb_setup_mrqc - configure the multiple receive queue control registers
2895 * @adapter: Board private structure
2896 **/
2897static void igb_setup_mrqc(struct igb_adapter *adapter)
2898{
2899	struct e1000_hw *hw = &adapter->hw;
2900	u32 mrqc, rxcsum;
2901	u32 j, num_rx_queues, shift = 0, shift2 = 0;
2902	union e1000_reta {
2903		u32 dword;
2904		u8  bytes[4];
2905	} reta;
2906	static const u8 rsshash[40] = {
2907		0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2908		0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2909		0xae, 0x7b, 0x30, 0xb4,	0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2910		0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2911
2912	/* Fill out hash function seeds */
2913	for (j = 0; j < 10; j++) {
2914		u32 rsskey = rsshash[(j * 4)];
2915		rsskey |= rsshash[(j * 4) + 1] << 8;
2916		rsskey |= rsshash[(j * 4) + 2] << 16;
2917		rsskey |= rsshash[(j * 4) + 3] << 24;
2918		array_wr32(E1000_RSSRK(0), j, rsskey);
2919	}
2920
2921	num_rx_queues = adapter->rss_queues;
2922
2923	if (adapter->vfs_allocated_count) {
2924		/* 82575 and 82576 supports 2 RSS queues for VMDq */
2925		switch (hw->mac.type) {
2926		case e1000_i350:
2927		case e1000_82580:
2928			num_rx_queues = 1;
2929			shift = 0;
2930			break;
2931		case e1000_82576:
2932			shift = 3;
2933			num_rx_queues = 2;
2934			break;
2935		case e1000_82575:
2936			shift = 2;
2937			shift2 = 6;
2938		default:
2939			break;
2940		}
2941	} else {
2942		if (hw->mac.type == e1000_82575)
2943			shift = 6;
2944	}
2945
2946	for (j = 0; j < (32 * 4); j++) {
2947		reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2948		if (shift2)
2949			reta.bytes[j & 3] |= num_rx_queues << shift2;
2950		if ((j & 3) == 3)
2951			wr32(E1000_RETA(j >> 2), reta.dword);
2952	}
2953
2954	/*
2955	 * Disable raw packet checksumming so that RSS hash is placed in
2956	 * descriptor on writeback.  No need to enable TCP/UDP/IP checksum
2957	 * offloads as they are enabled by default
2958	 */
2959	rxcsum = rd32(E1000_RXCSUM);
2960	rxcsum |= E1000_RXCSUM_PCSD;
2961
2962	if (adapter->hw.mac.type >= e1000_82576)
2963		/* Enable Receive Checksum Offload for SCTP */
2964		rxcsum |= E1000_RXCSUM_CRCOFL;
2965
2966	/* Don't need to set TUOFL or IPOFL, they default to 1 */
2967	wr32(E1000_RXCSUM, rxcsum);
2968	/*
2969	 * Generate RSS hash based on TCP port numbers and/or
2970	 * IPv4/v6 src and dst addresses since UDP cannot be
2971	 * hashed reliably due to IP fragmentation
2972	 */
2973
2974	mrqc = E1000_MRQC_RSS_FIELD_IPV4 |
2975	       E1000_MRQC_RSS_FIELD_IPV4_TCP |
2976	       E1000_MRQC_RSS_FIELD_IPV6 |
2977	       E1000_MRQC_RSS_FIELD_IPV6_TCP |
2978	       E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
2979
2980	/* If VMDq is enabled then we set the appropriate mode for that, else
2981	 * we default to RSS so that an RSS hash is calculated per packet even
2982	 * if we are only using one queue */
2983	if (adapter->vfs_allocated_count) {
2984		if (hw->mac.type > e1000_82575) {
2985			/* Set the default pool for the PF's first queue */
2986			u32 vtctl = rd32(E1000_VT_CTL);
2987			vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2988				   E1000_VT_CTL_DISABLE_DEF_POOL);
2989			vtctl |= adapter->vfs_allocated_count <<
2990				E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2991			wr32(E1000_VT_CTL, vtctl);
2992		}
2993		if (adapter->rss_queues > 1)
2994			mrqc |= E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2995		else
2996			mrqc |= E1000_MRQC_ENABLE_VMDQ;
2997	} else {
2998		if (hw->mac.type != e1000_i211)
2999			mrqc |= E1000_MRQC_ENABLE_RSS_4Q;
3000	}
3001	igb_vmm_control(adapter);
3002
3003	wr32(E1000_MRQC, mrqc);
3004}
3005
3006/**
3007 * igb_setup_rctl - configure the receive control registers
3008 * @adapter: Board private structure
3009 **/
3010void igb_setup_rctl(struct igb_adapter *adapter)
3011{
3012	struct e1000_hw *hw = &adapter->hw;
3013	u32 rctl;
3014
3015	rctl = rd32(E1000_RCTL);
3016
3017	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
3018	rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
3019
3020	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
3021		(hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
3022
3023	/*
3024	 * enable stripping of CRC. It's unlikely this will break BMC
3025	 * redirection as it did with e1000. Newer features require
3026	 * that the HW strips the CRC.
3027	 */
3028	rctl |= E1000_RCTL_SECRC;
3029
3030	/* disable store bad packets and clear size bits. */
3031	rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
3032
3033	/* enable LPE to prevent packets larger than max_frame_size */
3034	rctl |= E1000_RCTL_LPE;
3035
3036	/* disable queue 0 to prevent tail write w/o re-config */
3037	wr32(E1000_RXDCTL(0), 0);
3038
3039	/* Attention!!!  For SR-IOV PF driver operations you must enable
3040	 * queue drop for all VF and PF queues to prevent head of line blocking
3041	 * if an un-trusted VF does not provide descriptors to hardware.
3042	 */
3043	if (adapter->vfs_allocated_count) {
3044		/* set all queue drop enable bits */
3045		wr32(E1000_QDE, ALL_QUEUES);
3046	}
3047
3048	/* This is useful for sniffing bad packets. */
3049	if (adapter->netdev->features & NETIF_F_RXALL) {
3050		/* UPE and MPE will be handled by normal PROMISC logic
3051		 * in e1000e_set_rx_mode */
3052		rctl |= (E1000_RCTL_SBP | /* Receive bad packets */
3053			 E1000_RCTL_BAM | /* RX All Bcast Pkts */
3054			 E1000_RCTL_PMCF); /* RX All MAC Ctrl Pkts */
3055
3056		rctl &= ~(E1000_RCTL_VFE | /* Disable VLAN filter */
3057			  E1000_RCTL_DPF | /* Allow filtered pause */
3058			  E1000_RCTL_CFIEN); /* Dis VLAN CFIEN Filter */
3059		/* Do not mess with E1000_CTRL_VME, it affects transmit as well,
3060		 * and that breaks VLANs.
3061		 */
3062	}
3063
3064	wr32(E1000_RCTL, rctl);
3065}
3066
3067static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
3068                                   int vfn)
3069{
3070	struct e1000_hw *hw = &adapter->hw;
3071	u32 vmolr;
3072
3073	/* if it isn't the PF check to see if VFs are enabled and
3074	 * increase the size to support vlan tags */
3075	if (vfn < adapter->vfs_allocated_count &&
3076	    adapter->vf_data[vfn].vlans_enabled)
3077		size += VLAN_TAG_SIZE;
3078
3079	vmolr = rd32(E1000_VMOLR(vfn));
3080	vmolr &= ~E1000_VMOLR_RLPML_MASK;
3081	vmolr |= size | E1000_VMOLR_LPE;
3082	wr32(E1000_VMOLR(vfn), vmolr);
3083
3084	return 0;
3085}
3086
3087/**
3088 * igb_rlpml_set - set maximum receive packet size
3089 * @adapter: board private structure
3090 *
3091 * Configure maximum receivable packet size.
3092 **/
3093static void igb_rlpml_set(struct igb_adapter *adapter)
3094{
3095	u32 max_frame_size = adapter->max_frame_size;
3096	struct e1000_hw *hw = &adapter->hw;
3097	u16 pf_id = adapter->vfs_allocated_count;
3098
3099	if (pf_id) {
3100		igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
3101		/*
3102		 * If we're in VMDQ or SR-IOV mode, then set global RLPML
3103		 * to our max jumbo frame size, in case we need to enable
3104		 * jumbo frames on one of the rings later.
3105		 * This will not pass over-length frames into the default
3106		 * queue because it's gated by the VMOLR.RLPML.
3107		 */
3108		max_frame_size = MAX_JUMBO_FRAME_SIZE;
3109	}
3110
3111	wr32(E1000_RLPML, max_frame_size);
3112}
3113
3114static inline void igb_set_vmolr(struct igb_adapter *adapter,
3115				 int vfn, bool aupe)
3116{
3117	struct e1000_hw *hw = &adapter->hw;
3118	u32 vmolr;
3119
3120	/*
3121	 * This register exists only on 82576 and newer so if we are older then
3122	 * we should exit and do nothing
3123	 */
3124	if (hw->mac.type < e1000_82576)
3125		return;
3126
3127	vmolr = rd32(E1000_VMOLR(vfn));
3128	vmolr |= E1000_VMOLR_STRVLAN;      /* Strip vlan tags */
3129	if (aupe)
3130		vmolr |= E1000_VMOLR_AUPE;        /* Accept untagged packets */
3131	else
3132		vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
3133
3134	/* clear all bits that might not be set */
3135	vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
3136
3137	if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
3138		vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
3139	/*
3140	 * for VMDq only allow the VFs and pool 0 to accept broadcast and
3141	 * multicast packets
3142	 */
3143	if (vfn <= adapter->vfs_allocated_count)
3144		vmolr |= E1000_VMOLR_BAM;	   /* Accept broadcast */
3145
3146	wr32(E1000_VMOLR(vfn), vmolr);
3147}
3148
3149/**
3150 * igb_configure_rx_ring - Configure a receive ring after Reset
3151 * @adapter: board private structure
3152 * @ring: receive ring to be configured
3153 *
3154 * Configure the Rx unit of the MAC after a reset.
3155 **/
3156void igb_configure_rx_ring(struct igb_adapter *adapter,
3157                           struct igb_ring *ring)
3158{
3159	struct e1000_hw *hw = &adapter->hw;
3160	u64 rdba = ring->dma;
3161	int reg_idx = ring->reg_idx;
3162	u32 srrctl = 0, rxdctl = 0;
3163
3164	/* disable the queue */
3165	wr32(E1000_RXDCTL(reg_idx), 0);
3166
3167	/* Set DMA base address registers */
3168	wr32(E1000_RDBAL(reg_idx),
3169	     rdba & 0x00000000ffffffffULL);
3170	wr32(E1000_RDBAH(reg_idx), rdba >> 32);
3171	wr32(E1000_RDLEN(reg_idx),
3172	               ring->count * sizeof(union e1000_adv_rx_desc));
3173
3174	/* initialize head and tail */
3175	ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
3176	wr32(E1000_RDH(reg_idx), 0);
3177	writel(0, ring->tail);
3178
3179	/* set descriptor configuration */
3180	srrctl = IGB_RX_HDR_LEN << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
3181#if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
3182	srrctl |= IGB_RXBUFFER_16384 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3183#else
3184	srrctl |= (PAGE_SIZE / 2) >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3185#endif
3186	srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3187	if (hw->mac.type >= e1000_82580)
3188		srrctl |= E1000_SRRCTL_TIMESTAMP;
3189	/* Only set Drop Enable if we are supporting multiple queues */
3190	if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
3191		srrctl |= E1000_SRRCTL_DROP_EN;
3192
3193	wr32(E1000_SRRCTL(reg_idx), srrctl);
3194
3195	/* set filtering for VMDQ pools */
3196	igb_set_vmolr(adapter, reg_idx & 0x7, true);
3197
3198	rxdctl |= IGB_RX_PTHRESH;
3199	rxdctl |= IGB_RX_HTHRESH << 8;
3200	rxdctl |= IGB_RX_WTHRESH << 16;
3201
3202	/* enable receive descriptor fetching */
3203	rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
3204	wr32(E1000_RXDCTL(reg_idx), rxdctl);
3205}
3206
3207/**
3208 * igb_configure_rx - Configure receive Unit after Reset
3209 * @adapter: board private structure
3210 *
3211 * Configure the Rx unit of the MAC after a reset.
3212 **/
3213static void igb_configure_rx(struct igb_adapter *adapter)
3214{
3215	int i;
3216
3217	/* set UTA to appropriate mode */
3218	igb_set_uta(adapter);
3219
3220	/* set the correct pool for the PF default MAC address in entry 0 */
3221	igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
3222	                 adapter->vfs_allocated_count);
3223
3224	/* Setup the HW Rx Head and Tail Descriptor Pointers and
3225	 * the Base and Length of the Rx Descriptor Ring */
3226	for (i = 0; i < adapter->num_rx_queues; i++)
3227		igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
3228}
3229
3230/**
3231 * igb_free_tx_resources - Free Tx Resources per Queue
3232 * @tx_ring: Tx descriptor ring for a specific queue
3233 *
3234 * Free all transmit software resources
3235 **/
3236void igb_free_tx_resources(struct igb_ring *tx_ring)
3237{
3238	igb_clean_tx_ring(tx_ring);
3239
3240	vfree(tx_ring->tx_buffer_info);
3241	tx_ring->tx_buffer_info = NULL;
3242
3243	/* if not set, then don't free */
3244	if (!tx_ring->desc)
3245		return;
3246
3247	dma_free_coherent(tx_ring->dev, tx_ring->size,
3248			  tx_ring->desc, tx_ring->dma);
3249
3250	tx_ring->desc = NULL;
3251}
3252
3253/**
3254 * igb_free_all_tx_resources - Free Tx Resources for All Queues
3255 * @adapter: board private structure
3256 *
3257 * Free all transmit software resources
3258 **/
3259static void igb_free_all_tx_resources(struct igb_adapter *adapter)
3260{
3261	int i;
3262
3263	for (i = 0; i < adapter->num_tx_queues; i++)
3264		igb_free_tx_resources(adapter->tx_ring[i]);
3265}
3266
3267void igb_unmap_and_free_tx_resource(struct igb_ring *ring,
3268				    struct igb_tx_buffer *tx_buffer)
3269{
3270	if (tx_buffer->skb) {
3271		dev_kfree_skb_any(tx_buffer->skb);
3272		if (tx_buffer->dma)
3273			dma_unmap_single(ring->dev,
3274					 tx_buffer->dma,
3275					 tx_buffer->length,
3276					 DMA_TO_DEVICE);
3277	} else if (tx_buffer->dma) {
3278		dma_unmap_page(ring->dev,
3279			       tx_buffer->dma,
3280			       tx_buffer->length,
3281			       DMA_TO_DEVICE);
3282	}
3283	tx_buffer->next_to_watch = NULL;
3284	tx_buffer->skb = NULL;
3285	tx_buffer->dma = 0;
3286	/* buffer_info must be completely set up in the transmit path */
3287}
3288
3289/**
3290 * igb_clean_tx_ring - Free Tx Buffers
3291 * @tx_ring: ring to be cleaned
3292 **/
3293static void igb_clean_tx_ring(struct igb_ring *tx_ring)
3294{
3295	struct igb_tx_buffer *buffer_info;
3296	unsigned long size;
3297	u16 i;
3298
3299	if (!tx_ring->tx_buffer_info)
3300		return;
3301	/* Free all the Tx ring sk_buffs */
3302
3303	for (i = 0; i < tx_ring->count; i++) {
3304		buffer_info = &tx_ring->tx_buffer_info[i];
3305		igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3306	}
3307
3308	netdev_tx_reset_queue(txring_txq(tx_ring));
3309
3310	size = sizeof(struct igb_tx_buffer) * tx_ring->count;
3311	memset(tx_ring->tx_buffer_info, 0, size);
3312
3313	/* Zero out the descriptor ring */
3314	memset(tx_ring->desc, 0, tx_ring->size);
3315
3316	tx_ring->next_to_use = 0;
3317	tx_ring->next_to_clean = 0;
3318}
3319
3320/**
3321 * igb_clean_all_tx_rings - Free Tx Buffers for all queues
3322 * @adapter: board private structure
3323 **/
3324static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
3325{
3326	int i;
3327
3328	for (i = 0; i < adapter->num_tx_queues; i++)
3329		igb_clean_tx_ring(adapter->tx_ring[i]);
3330}
3331
3332/**
3333 * igb_free_rx_resources - Free Rx Resources
3334 * @rx_ring: ring to clean the resources from
3335 *
3336 * Free all receive software resources
3337 **/
3338void igb_free_rx_resources(struct igb_ring *rx_ring)
3339{
3340	igb_clean_rx_ring(rx_ring);
3341
3342	vfree(rx_ring->rx_buffer_info);
3343	rx_ring->rx_buffer_info = NULL;
3344
3345	/* if not set, then don't free */
3346	if (!rx_ring->desc)
3347		return;
3348
3349	dma_free_coherent(rx_ring->dev, rx_ring->size,
3350			  rx_ring->desc, rx_ring->dma);
3351
3352	rx_ring->desc = NULL;
3353}
3354
3355/**
3356 * igb_free_all_rx_resources - Free Rx Resources for All Queues
3357 * @adapter: board private structure
3358 *
3359 * Free all receive software resources
3360 **/
3361static void igb_free_all_rx_resources(struct igb_adapter *adapter)
3362{
3363	int i;
3364
3365	for (i = 0; i < adapter->num_rx_queues; i++)
3366		igb_free_rx_resources(adapter->rx_ring[i]);
3367}
3368
3369/**
3370 * igb_clean_rx_ring - Free Rx Buffers per Queue
3371 * @rx_ring: ring to free buffers from
3372 **/
3373static void igb_clean_rx_ring(struct igb_ring *rx_ring)
3374{
3375	unsigned long size;
3376	u16 i;
3377
3378	if (!rx_ring->rx_buffer_info)
3379		return;
3380
3381	/* Free all the Rx ring sk_buffs */
3382	for (i = 0; i < rx_ring->count; i++) {
3383		struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
3384		if (buffer_info->dma) {
3385			dma_unmap_single(rx_ring->dev,
3386			                 buffer_info->dma,
3387					 IGB_RX_HDR_LEN,
3388					 DMA_FROM_DEVICE);
3389			buffer_info->dma = 0;
3390		}
3391
3392		if (buffer_info->skb) {
3393			dev_kfree_skb(buffer_info->skb);
3394			buffer_info->skb = NULL;
3395		}
3396		if (buffer_info->page_dma) {
3397			dma_unmap_page(rx_ring->dev,
3398			               buffer_info->page_dma,
3399				       PAGE_SIZE / 2,
3400				       DMA_FROM_DEVICE);
3401			buffer_info->page_dma = 0;
3402		}
3403		if (buffer_info->page) {
3404			put_page(buffer_info->page);
3405			buffer_info->page = NULL;
3406			buffer_info->page_offset = 0;
3407		}
3408	}
3409
3410	size = sizeof(struct igb_rx_buffer) * rx_ring->count;
3411	memset(rx_ring->rx_buffer_info, 0, size);
3412
3413	/* Zero out the descriptor ring */
3414	memset(rx_ring->desc, 0, rx_ring->size);
3415
3416	rx_ring->next_to_clean = 0;
3417	rx_ring->next_to_use = 0;
3418}
3419
3420/**
3421 * igb_clean_all_rx_rings - Free Rx Buffers for all queues
3422 * @adapter: board private structure
3423 **/
3424static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
3425{
3426	int i;
3427
3428	for (i = 0; i < adapter->num_rx_queues; i++)
3429		igb_clean_rx_ring(adapter->rx_ring[i]);
3430}
3431
3432/**
3433 * igb_set_mac - Change the Ethernet Address of the NIC
3434 * @netdev: network interface device structure
3435 * @p: pointer to an address structure
3436 *
3437 * Returns 0 on success, negative on failure
3438 **/
3439static int igb_set_mac(struct net_device *netdev, void *p)
3440{
3441	struct igb_adapter *adapter = netdev_priv(netdev);
3442	struct e1000_hw *hw = &adapter->hw;
3443	struct sockaddr *addr = p;
3444
3445	if (!is_valid_ether_addr(addr->sa_data))
3446		return -EADDRNOTAVAIL;
3447
3448	memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
3449	memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
3450
3451	/* set the correct pool for the new PF MAC address in entry 0 */
3452	igb_rar_set_qsel(adapter, hw->mac.addr, 0,
3453	                 adapter->vfs_allocated_count);
3454
3455	return 0;
3456}
3457
3458/**
3459 * igb_write_mc_addr_list - write multicast addresses to MTA
3460 * @netdev: network interface device structure
3461 *
3462 * Writes multicast address list to the MTA hash table.
3463 * Returns: -ENOMEM on failure
3464 *                0 on no addresses written
3465 *                X on writing X addresses to MTA
3466 **/
3467static int igb_write_mc_addr_list(struct net_device *netdev)
3468{
3469	struct igb_adapter *adapter = netdev_priv(netdev);
3470	struct e1000_hw *hw = &adapter->hw;
3471	struct netdev_hw_addr *ha;
3472	u8  *mta_list;
3473	int i;
3474
3475	if (netdev_mc_empty(netdev)) {
3476		/* nothing to program, so clear mc list */
3477		igb_update_mc_addr_list(hw, NULL, 0);
3478		igb_restore_vf_multicasts(adapter);
3479		return 0;
3480	}
3481
3482	mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
3483	if (!mta_list)
3484		return -ENOMEM;
3485
3486	/* The shared function expects a packed array of only addresses. */
3487	i = 0;
3488	netdev_for_each_mc_addr(ha, netdev)
3489		memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
3490
3491	igb_update_mc_addr_list(hw, mta_list, i);
3492	kfree(mta_list);
3493
3494	return netdev_mc_count(netdev);
3495}
3496
3497/**
3498 * igb_write_uc_addr_list - write unicast addresses to RAR table
3499 * @netdev: network interface device structure
3500 *
3501 * Writes unicast address list to the RAR table.
3502 * Returns: -ENOMEM on failure/insufficient address space
3503 *                0 on no addresses written
3504 *                X on writing X addresses to the RAR table
3505 **/
3506static int igb_write_uc_addr_list(struct net_device *netdev)
3507{
3508	struct igb_adapter *adapter = netdev_priv(netdev);
3509	struct e1000_hw *hw = &adapter->hw;
3510	unsigned int vfn = adapter->vfs_allocated_count;
3511	unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
3512	int count = 0;
3513
3514	/* return ENOMEM indicating insufficient memory for addresses */
3515	if (netdev_uc_count(netdev) > rar_entries)
3516		return -ENOMEM;
3517
3518	if (!netdev_uc_empty(netdev) && rar_entries) {
3519		struct netdev_hw_addr *ha;
3520
3521		netdev_for_each_uc_addr(ha, netdev) {
3522			if (!rar_entries)
3523				break;
3524			igb_rar_set_qsel(adapter, ha->addr,
3525			                 rar_entries--,
3526			                 vfn);
3527			count++;
3528		}
3529	}
3530	/* write the addresses in reverse order to avoid write combining */
3531	for (; rar_entries > 0 ; rar_entries--) {
3532		wr32(E1000_RAH(rar_entries), 0);
3533		wr32(E1000_RAL(rar_entries), 0);
3534	}
3535	wrfl();
3536
3537	return count;
3538}
3539
3540/**
3541 * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
3542 * @netdev: network interface device structure
3543 *
3544 * The set_rx_mode entry point is called whenever the unicast or multicast
3545 * address lists or the network interface flags are updated.  This routine is
3546 * responsible for configuring the hardware for proper unicast, multicast,
3547 * promiscuous mode, and all-multi behavior.
3548 **/
3549static void igb_set_rx_mode(struct net_device *netdev)
3550{
3551	struct igb_adapter *adapter = netdev_priv(netdev);
3552	struct e1000_hw *hw = &adapter->hw;
3553	unsigned int vfn = adapter->vfs_allocated_count;
3554	u32 rctl, vmolr = 0;
3555	int count;
3556
3557	/* Check for Promiscuous and All Multicast modes */
3558	rctl = rd32(E1000_RCTL);
3559
3560	/* clear the effected bits */
3561	rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
3562
3563	if (netdev->flags & IFF_PROMISC) {
3564		rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
3565		vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
3566	} else {
3567		if (netdev->flags & IFF_ALLMULTI) {
3568			rctl |= E1000_RCTL_MPE;
3569			vmolr |= E1000_VMOLR_MPME;
3570		} else {
3571			/*
3572			 * Write addresses to the MTA, if the attempt fails
3573			 * then we should just turn on promiscuous mode so
3574			 * that we can at least receive multicast traffic
3575			 */
3576			count = igb_write_mc_addr_list(netdev);
3577			if (count < 0) {
3578				rctl |= E1000_RCTL_MPE;
3579				vmolr |= E1000_VMOLR_MPME;
3580			} else if (count) {
3581				vmolr |= E1000_VMOLR_ROMPE;
3582			}
3583		}
3584		/*
3585		 * Write addresses to available RAR registers, if there is not
3586		 * sufficient space to store all the addresses then enable
3587		 * unicast promiscuous mode
3588		 */
3589		count = igb_write_uc_addr_list(netdev);
3590		if (count < 0) {
3591			rctl |= E1000_RCTL_UPE;
3592			vmolr |= E1000_VMOLR_ROPE;
3593		}
3594		rctl |= E1000_RCTL_VFE;
3595	}
3596	wr32(E1000_RCTL, rctl);
3597
3598	/*
3599	 * In order to support SR-IOV and eventually VMDq it is necessary to set
3600	 * the VMOLR to enable the appropriate modes.  Without this workaround
3601	 * we will have issues with VLAN tag stripping not being done for frames
3602	 * that are only arriving because we are the default pool
3603	 */
3604	if ((hw->mac.type < e1000_82576) || (hw->mac.type > e1000_i350))
3605		return;
3606
3607	vmolr |= rd32(E1000_VMOLR(vfn)) &
3608	         ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
3609	wr32(E1000_VMOLR(vfn), vmolr);
3610	igb_restore_vf_multicasts(adapter);
3611}
3612
3613static void igb_check_wvbr(struct igb_adapter *adapter)
3614{
3615	struct e1000_hw *hw = &adapter->hw;
3616	u32 wvbr = 0;
3617
3618	switch (hw->mac.type) {
3619	case e1000_82576:
3620	case e1000_i350:
3621		if (!(wvbr = rd32(E1000_WVBR)))
3622			return;
3623		break;
3624	default:
3625		break;
3626	}
3627
3628	adapter->wvbr |= wvbr;
3629}
3630
3631#define IGB_STAGGERED_QUEUE_OFFSET 8
3632
3633static void igb_spoof_check(struct igb_adapter *adapter)
3634{
3635	int j;
3636
3637	if (!adapter->wvbr)
3638		return;
3639
3640	for(j = 0; j < adapter->vfs_allocated_count; j++) {
3641		if (adapter->wvbr & (1 << j) ||
3642		    adapter->wvbr & (1 << (j + IGB_STAGGERED_QUEUE_OFFSET))) {
3643			dev_warn(&adapter->pdev->dev,
3644				"Spoof event(s) detected on VF %d\n", j);
3645			adapter->wvbr &=
3646				~((1 << j) |
3647				  (1 << (j + IGB_STAGGERED_QUEUE_OFFSET)));
3648		}
3649	}
3650}
3651
3652/* Need to wait a few seconds after link up to get diagnostic information from
3653 * the phy */
3654static void igb_update_phy_info(unsigned long data)
3655{
3656	struct igb_adapter *adapter = (struct igb_adapter *) data;
3657	igb_get_phy_info(&adapter->hw);
3658}
3659
3660/**
3661 * igb_has_link - check shared code for link and determine up/down
3662 * @adapter: pointer to driver private info
3663 **/
3664bool igb_has_link(struct igb_adapter *adapter)
3665{
3666	struct e1000_hw *hw = &adapter->hw;
3667	bool link_active = false;
3668	s32 ret_val = 0;
3669
3670	/* get_link_status is set on LSC (link status) interrupt or
3671	 * rx sequence error interrupt.  get_link_status will stay
3672	 * false until the e1000_check_for_link establishes link
3673	 * for copper adapters ONLY
3674	 */
3675	switch (hw->phy.media_type) {
3676	case e1000_media_type_copper:
3677		if (hw->mac.get_link_status) {
3678			ret_val = hw->mac.ops.check_for_link(hw);
3679			link_active = !hw->mac.get_link_status;
3680		} else {
3681			link_active = true;
3682		}
3683		break;
3684	case e1000_media_type_internal_serdes:
3685		ret_val = hw->mac.ops.check_for_link(hw);
3686		link_active = hw->mac.serdes_has_link;
3687		break;
3688	default:
3689	case e1000_media_type_unknown:
3690		break;
3691	}
3692
3693	return link_active;
3694}
3695
3696static bool igb_thermal_sensor_event(struct e1000_hw *hw, u32 event)
3697{
3698	bool ret = false;
3699	u32 ctrl_ext, thstat;
3700
3701	/* check for thermal sensor event on i350 copper only */
3702	if (hw->mac.type == e1000_i350) {
3703		thstat = rd32(E1000_THSTAT);
3704		ctrl_ext = rd32(E1000_CTRL_EXT);
3705
3706		if ((hw->phy.media_type == e1000_media_type_copper) &&
3707		    !(ctrl_ext & E1000_CTRL_EXT_LINK_MODE_SGMII)) {
3708			ret = !!(thstat & event);
3709		}
3710	}
3711
3712	return ret;
3713}
3714
3715/**
3716 * igb_watchdog - Timer Call-back
3717 * @data: pointer to adapter cast into an unsigned long
3718 **/
3719static void igb_watchdog(unsigned long data)
3720{
3721	struct igb_adapter *adapter = (struct igb_adapter *)data;
3722	/* Do the rest outside of interrupt context */
3723	schedule_work(&adapter->watchdog_task);
3724}
3725
3726static void igb_watchdog_task(struct work_struct *work)
3727{
3728	struct igb_adapter *adapter = container_of(work,
3729	                                           struct igb_adapter,
3730                                                   watchdog_task);
3731	struct e1000_hw *hw = &adapter->hw;
3732	struct net_device *netdev = adapter->netdev;
3733	u32 link;
3734	int i;
3735
3736	link = igb_has_link(adapter);
3737	if (link) {
3738		/* Cancel scheduled suspend requests. */
3739		pm_runtime_resume(netdev->dev.parent);
3740
3741		if (!netif_carrier_ok(netdev)) {
3742			u32 ctrl;
3743			hw->mac.ops.get_speed_and_duplex(hw,
3744			                                 &adapter->link_speed,
3745			                                 &adapter->link_duplex);
3746
3747			ctrl = rd32(E1000_CTRL);
3748			/* Links status message must follow this format */
3749			printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s "
3750			       "Duplex, Flow Control: %s\n",
3751			       netdev->name,
3752			       adapter->link_speed,
3753			       adapter->link_duplex == FULL_DUPLEX ?
3754			       "Full" : "Half",
3755			       (ctrl & E1000_CTRL_TFCE) &&
3756			       (ctrl & E1000_CTRL_RFCE) ? "RX/TX" :
3757			       (ctrl & E1000_CTRL_RFCE) ?  "RX" :
3758			       (ctrl & E1000_CTRL_TFCE) ?  "TX" : "None");
3759
3760			/* check for thermal sensor event */
3761			if (igb_thermal_sensor_event(hw,
3762			    E1000_THSTAT_LINK_THROTTLE)) {
3763				netdev_info(netdev, "The network adapter link "
3764					    "speed was downshifted because it "
3765					    "overheated\n");
3766			}
3767
3768			/* adjust timeout factor according to speed/duplex */
3769			adapter->tx_timeout_factor = 1;
3770			switch (adapter->link_speed) {
3771			case SPEED_10:
3772				adapter->tx_timeout_factor = 14;
3773				break;
3774			case SPEED_100:
3775				/* maybe add some timeout factor ? */
3776				break;
3777			}
3778
3779			netif_carrier_on(netdev);
3780
3781			igb_ping_all_vfs(adapter);
3782			igb_check_vf_rate_limit(adapter);
3783
3784			/* link state has changed, schedule phy info update */
3785			if (!test_bit(__IGB_DOWN, &adapter->state))
3786				mod_timer(&adapter->phy_info_timer,
3787					  round_jiffies(jiffies + 2 * HZ));
3788		}
3789	} else {
3790		if (netif_carrier_ok(netdev)) {
3791			adapter->link_speed = 0;
3792			adapter->link_duplex = 0;
3793
3794			/* check for thermal sensor event */
3795			if (igb_thermal_sensor_event(hw,
3796			    E1000_THSTAT_PWR_DOWN)) {
3797				netdev_err(netdev, "The network adapter was "
3798					   "stopped because it overheated\n");
3799			}
3800
3801			/* Links status message must follow this format */
3802			printk(KERN_INFO "igb: %s NIC Link is Down\n",
3803			       netdev->name);
3804			netif_carrier_off(netdev);
3805
3806			igb_ping_all_vfs(adapter);
3807
3808			/* link state has changed, schedule phy info update */
3809			if (!test_bit(__IGB_DOWN, &adapter->state))
3810				mod_timer(&adapter->phy_info_timer,
3811					  round_jiffies(jiffies + 2 * HZ));
3812
3813			pm_schedule_suspend(netdev->dev.parent,
3814					    MSEC_PER_SEC * 5);
3815		}
3816	}
3817
3818	spin_lock(&adapter->stats64_lock);
3819	igb_update_stats(adapter, &adapter->stats64);
3820	spin_unlock(&adapter->stats64_lock);
3821
3822	for (i = 0; i < adapter->num_tx_queues; i++) {
3823		struct igb_ring *tx_ring = adapter->tx_ring[i];
3824		if (!netif_carrier_ok(netdev)) {
3825			/* We've lost link, so the controller stops DMA,
3826			 * but we've got queued Tx work that's never going
3827			 * to get done, so reset controller to flush Tx.
3828			 * (Do the reset outside of interrupt context). */
3829			if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3830				adapter->tx_timeout_count++;
3831				schedule_work(&adapter->reset_task);
3832				/* return immediately since reset is imminent */
3833				return;
3834			}
3835		}
3836
3837		/* Force detection of hung controller every watchdog period */
3838		set_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
3839	}
3840
3841	/* Cause software interrupt to ensure rx ring is cleaned */
3842	if (adapter->msix_entries) {
3843		u32 eics = 0;
3844		for (i = 0; i < adapter->num_q_vectors; i++)
3845			eics |= adapter->q_vector[i]->eims_value;
3846		wr32(E1000_EICS, eics);
3847	} else {
3848		wr32(E1000_ICS, E1000_ICS_RXDMT0);
3849	}
3850
3851	igb_spoof_check(adapter);
3852
3853	/* Reset the timer */
3854	if (!test_bit(__IGB_DOWN, &adapter->state))
3855		mod_timer(&adapter->watchdog_timer,
3856			  round_jiffies(jiffies + 2 * HZ));
3857}
3858
3859enum latency_range {
3860	lowest_latency = 0,
3861	low_latency = 1,
3862	bulk_latency = 2,
3863	latency_invalid = 255
3864};
3865
3866/**
3867 * igb_update_ring_itr - update the dynamic ITR value based on packet size
3868 *
3869 *      Stores a new ITR value based on strictly on packet size.  This
3870 *      algorithm is less sophisticated than that used in igb_update_itr,
3871 *      due to the difficulty of synchronizing statistics across multiple
3872 *      receive rings.  The divisors and thresholds used by this function
3873 *      were determined based on theoretical maximum wire speed and testing
3874 *      data, in order to minimize response time while increasing bulk
3875 *      throughput.
3876 *      This functionality is controlled by the InterruptThrottleRate module
3877 *      parameter (see igb_param.c)
3878 *      NOTE:  This function is called only when operating in a multiqueue
3879 *             receive environment.
3880 * @q_vector: pointer to q_vector
3881 **/
3882static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3883{
3884	int new_val = q_vector->itr_val;
3885	int avg_wire_size = 0;
3886	struct igb_adapter *adapter = q_vector->adapter;
3887	unsigned int packets;
3888
3889	/* For non-gigabit speeds, just fix the interrupt rate at 4000
3890	 * ints/sec - ITR timer value of 120 ticks.
3891	 */
3892	if (adapter->link_speed != SPEED_1000) {
3893		new_val = IGB_4K_ITR;
3894		goto set_itr_val;
3895	}
3896
3897	packets = q_vector->rx.total_packets;
3898	if (packets)
3899		avg_wire_size = q_vector->rx.total_bytes / packets;
3900
3901	packets = q_vector->tx.total_packets;
3902	if (packets)
3903		avg_wire_size = max_t(u32, avg_wire_size,
3904				      q_vector->tx.total_bytes / packets);
3905
3906	/* if avg_wire_size isn't set no work was done */
3907	if (!avg_wire_size)
3908		goto clear_counts;
3909
3910	/* Add 24 bytes to size to account for CRC, preamble, and gap */
3911	avg_wire_size += 24;
3912
3913	/* Don't starve jumbo frames */
3914	avg_wire_size = min(avg_wire_size, 3000);
3915
3916	/* Give a little boost to mid-size frames */
3917	if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3918		new_val = avg_wire_size / 3;
3919	else
3920		new_val = avg_wire_size / 2;
3921
3922	/* conservative mode (itr 3) eliminates the lowest_latency setting */
3923	if (new_val < IGB_20K_ITR &&
3924	    ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
3925	     (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
3926		new_val = IGB_20K_ITR;
3927
3928set_itr_val:
3929	if (new_val != q_vector->itr_val) {
3930		q_vector->itr_val = new_val;
3931		q_vector->set_itr = 1;
3932	}
3933clear_counts:
3934	q_vector->rx.total_bytes = 0;
3935	q_vector->rx.total_packets = 0;
3936	q_vector->tx.total_bytes = 0;
3937	q_vector->tx.total_packets = 0;
3938}
3939
3940/**
3941 * igb_update_itr - update the dynamic ITR value based on statistics
3942 *      Stores a new ITR value based on packets and byte
3943 *      counts during the last interrupt.  The advantage of per interrupt
3944 *      computation is faster updates and more accurate ITR for the current
3945 *      traffic pattern.  Constants in this function were computed
3946 *      based on theoretical maximum wire speed and thresholds were set based
3947 *      on testing data as well as attempting to minimize response time
3948 *      while increasing bulk throughput.
3949 *      this functionality is controlled by the InterruptThrottleRate module
3950 *      parameter (see igb_param.c)
3951 *      NOTE:  These calculations are only valid when operating in a single-
3952 *             queue environment.
3953 * @q_vector: pointer to q_vector
3954 * @ring_container: ring info to update the itr for
3955 **/
3956static void igb_update_itr(struct igb_q_vector *q_vector,
3957			   struct igb_ring_container *ring_container)
3958{
3959	unsigned int packets = ring_container->total_packets;
3960	unsigned int bytes = ring_container->total_bytes;
3961	u8 itrval = ring_container->itr;
3962
3963	/* no packets, exit with status unchanged */
3964	if (packets == 0)
3965		return;
3966
3967	switch (itrval) {
3968	case lowest_latency:
3969		/* handle TSO and jumbo frames */
3970		if (bytes/packets > 8000)
3971			itrval = bulk_latency;
3972		else if ((packets < 5) && (bytes > 512))
3973			itrval = low_latency;
3974		break;
3975	case low_latency:  /* 50 usec aka 20000 ints/s */
3976		if (bytes > 10000) {
3977			/* this if handles the TSO accounting */
3978			if (bytes/packets > 8000) {
3979				itrval = bulk_latency;
3980			} else if ((packets < 10) || ((bytes/packets) > 1200)) {
3981				itrval = bulk_latency;
3982			} else if ((packets > 35)) {
3983				itrval = lowest_latency;
3984			}
3985		} else if (bytes/packets > 2000) {
3986			itrval = bulk_latency;
3987		} else if (packets <= 2 && bytes < 512) {
3988			itrval = lowest_latency;
3989		}
3990		break;
3991	case bulk_latency: /* 250 usec aka 4000 ints/s */
3992		if (bytes > 25000) {
3993			if (packets > 35)
3994				itrval = low_latency;
3995		} else if (bytes < 1500) {
3996			itrval = low_latency;
3997		}
3998		break;
3999	}
4000
4001	/* clear work counters since we have the values we need */
4002	ring_container->total_bytes = 0;
4003	ring_container->total_packets = 0;
4004
4005	/* write updated itr to ring container */
4006	ring_container->itr = itrval;
4007}
4008
4009static void igb_set_itr(struct igb_q_vector *q_vector)
4010{
4011	struct igb_adapter *adapter = q_vector->adapter;
4012	u32 new_itr = q_vector->itr_val;
4013	u8 current_itr = 0;
4014
4015	/* for non-gigabit speeds, just fix the interrupt rate at 4000 */
4016	if (adapter->link_speed != SPEED_1000) {
4017		current_itr = 0;
4018		new_itr = IGB_4K_ITR;
4019		goto set_itr_now;
4020	}
4021
4022	igb_update_itr(q_vector, &q_vector->tx);
4023	igb_update_itr(q_vector, &q_vector->rx);
4024
4025	current_itr = max(q_vector->rx.itr, q_vector->tx.itr);
4026
4027	/* conservative mode (itr 3) eliminates the lowest_latency setting */
4028	if (current_itr == lowest_latency &&
4029	    ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
4030	     (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
4031		current_itr = low_latency;
4032
4033	switch (current_itr) {
4034	/* counts and packets in update_itr are dependent on these numbers */
4035	case lowest_latency:
4036		new_itr = IGB_70K_ITR; /* 70,000 ints/sec */
4037		break;
4038	case low_latency:
4039		new_itr = IGB_20K_ITR; /* 20,000 ints/sec */
4040		break;
4041	case bulk_latency:
4042		new_itr = IGB_4K_ITR;  /* 4,000 ints/sec */
4043		break;
4044	default:
4045		break;
4046	}
4047
4048set_itr_now:
4049	if (new_itr != q_vector->itr_val) {
4050		/* this attempts to bias the interrupt rate towards Bulk
4051		 * by adding intermediate steps when interrupt rate is
4052		 * increasing */
4053		new_itr = new_itr > q_vector->itr_val ?
4054		             max((new_itr * q_vector->itr_val) /
4055		                 (new_itr + (q_vector->itr_val >> 2)),
4056				 new_itr) :
4057			     new_itr;
4058		/* Don't write the value here; it resets the adapter's
4059		 * internal timer, and causes us to delay far longer than
4060		 * we should between interrupts.  Instead, we write the ITR
4061		 * value at the beginning of the next interrupt so the timing
4062		 * ends up being correct.
4063		 */
4064		q_vector->itr_val = new_itr;
4065		q_vector->set_itr = 1;
4066	}
4067}
4068
4069static void igb_tx_ctxtdesc(struct igb_ring *tx_ring, u32 vlan_macip_lens,
4070			    u32 type_tucmd, u32 mss_l4len_idx)
4071{
4072	struct e1000_adv_tx_context_desc *context_desc;
4073	u16 i = tx_ring->next_to_use;
4074
4075	context_desc = IGB_TX_CTXTDESC(tx_ring, i);
4076
4077	i++;
4078	tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
4079
4080	/* set bits to identify this as an advanced context descriptor */
4081	type_tucmd |= E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
4082
4083	/* For 82575, context index must be unique per ring. */
4084	if (test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
4085		mss_l4len_idx |= tx_ring->reg_idx << 4;
4086
4087	context_desc->vlan_macip_lens	= cpu_to_le32(vlan_macip_lens);
4088	context_desc->seqnum_seed	= 0;
4089	context_desc->type_tucmd_mlhl	= cpu_to_le32(type_tucmd);
4090	context_desc->mss_l4len_idx	= cpu_to_le32(mss_l4len_idx);
4091}
4092
4093static int igb_tso(struct igb_ring *tx_ring,
4094		   struct igb_tx_buffer *first,
4095		   u8 *hdr_len)
4096{
4097	struct sk_buff *skb = first->skb;
4098	u32 vlan_macip_lens, type_tucmd;
4099	u32 mss_l4len_idx, l4len;
4100
4101	if (!skb_is_gso(skb))
4102		return 0;
4103
4104	if (skb_header_cloned(skb)) {
4105		int err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
4106		if (err)
4107			return err;
4108	}
4109
4110	/* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
4111	type_tucmd = E1000_ADVTXD_TUCMD_L4T_TCP;
4112
4113	if (first->protocol == __constant_htons(ETH_P_IP)) {
4114		struct iphdr *iph = ip_hdr(skb);
4115		iph->tot_len = 0;
4116		iph->check = 0;
4117		tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
4118							 iph->daddr, 0,
4119							 IPPROTO_TCP,
4120							 0);
4121		type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
4122		first->tx_flags |= IGB_TX_FLAGS_TSO |
4123				   IGB_TX_FLAGS_CSUM |
4124				   IGB_TX_FLAGS_IPV4;
4125	} else if (skb_is_gso_v6(skb)) {
4126		ipv6_hdr(skb)->payload_len = 0;
4127		tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
4128						       &ipv6_hdr(skb)->daddr,
4129						       0, IPPROTO_TCP, 0);
4130		first->tx_flags |= IGB_TX_FLAGS_TSO |
4131				   IGB_TX_FLAGS_CSUM;
4132	}
4133
4134	/* compute header lengths */
4135	l4len = tcp_hdrlen(skb);
4136	*hdr_len = skb_transport_offset(skb) + l4len;
4137
4138	/* update gso size and bytecount with header size */
4139	first->gso_segs = skb_shinfo(skb)->gso_segs;
4140	first->bytecount += (first->gso_segs - 1) * *hdr_len;
4141
4142	/* MSS L4LEN IDX */
4143	mss_l4len_idx = l4len << E1000_ADVTXD_L4LEN_SHIFT;
4144	mss_l4len_idx |= skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT;
4145
4146	/* VLAN MACLEN IPLEN */
4147	vlan_macip_lens = skb_network_header_len(skb);
4148	vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
4149	vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
4150
4151	igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
4152
4153	return 1;
4154}
4155
4156static void igb_tx_csum(struct igb_ring *tx_ring, struct igb_tx_buffer *first)
4157{
4158	struct sk_buff *skb = first->skb;
4159	u32 vlan_macip_lens = 0;
4160	u32 mss_l4len_idx = 0;
4161	u32 type_tucmd = 0;
4162
4163	if (skb->ip_summed != CHECKSUM_PARTIAL) {
4164		if (!(first->tx_flags & IGB_TX_FLAGS_VLAN))
4165			return;
4166	} else {
4167		u8 l4_hdr = 0;
4168		switch (first->protocol) {
4169		case __constant_htons(ETH_P_IP):
4170			vlan_macip_lens |= skb_network_header_len(skb);
4171			type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
4172			l4_hdr = ip_hdr(skb)->protocol;
4173			break;
4174		case __constant_htons(ETH_P_IPV6):
4175			vlan_macip_lens |= skb_network_header_len(skb);
4176			l4_hdr = ipv6_hdr(skb)->nexthdr;
4177			break;
4178		default:
4179			if (unlikely(net_ratelimit())) {
4180				dev_warn(tx_ring->dev,
4181				 "partial checksum but proto=%x!\n",
4182				 first->protocol);
4183			}
4184			break;
4185		}
4186
4187		switch (l4_hdr) {
4188		case IPPROTO_TCP:
4189			type_tucmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
4190			mss_l4len_idx = tcp_hdrlen(skb) <<
4191					E1000_ADVTXD_L4LEN_SHIFT;
4192			break;
4193		case IPPROTO_SCTP:
4194			type_tucmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
4195			mss_l4len_idx = sizeof(struct sctphdr) <<
4196					E1000_ADVTXD_L4LEN_SHIFT;
4197			break;
4198		case IPPROTO_UDP:
4199			mss_l4len_idx = sizeof(struct udphdr) <<
4200					E1000_ADVTXD_L4LEN_SHIFT;
4201			break;
4202		default:
4203			if (unlikely(net_ratelimit())) {
4204				dev_warn(tx_ring->dev,
4205				 "partial checksum but l4 proto=%x!\n",
4206				 l4_hdr);
4207			}
4208			break;
4209		}
4210
4211		/* update TX checksum flag */
4212		first->tx_flags |= IGB_TX_FLAGS_CSUM;
4213	}
4214
4215	vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
4216	vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
4217
4218	igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
4219}
4220
4221static __le32 igb_tx_cmd_type(u32 tx_flags)
4222{
4223	/* set type for advanced descriptor with frame checksum insertion */
4224	__le32 cmd_type = cpu_to_le32(E1000_ADVTXD_DTYP_DATA |
4225				      E1000_ADVTXD_DCMD_IFCS |
4226				      E1000_ADVTXD_DCMD_DEXT);
4227
4228	/* set HW vlan bit if vlan is present */
4229	if (tx_flags & IGB_TX_FLAGS_VLAN)
4230		cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_VLE);
4231
4232	/* set timestamp bit if present */
4233	if (tx_flags & IGB_TX_FLAGS_TSTAMP)
4234		cmd_type |= cpu_to_le32(E1000_ADVTXD_MAC_TSTAMP);
4235
4236	/* set segmentation bits for TSO */
4237	if (tx_flags & IGB_TX_FLAGS_TSO)
4238		cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_TSE);
4239
4240	return cmd_type;
4241}
4242
4243static void igb_tx_olinfo_status(struct igb_ring *tx_ring,
4244				 union e1000_adv_tx_desc *tx_desc,
4245				 u32 tx_flags, unsigned int paylen)
4246{
4247	u32 olinfo_status = paylen << E1000_ADVTXD_PAYLEN_SHIFT;
4248
4249	/* 82575 requires a unique index per ring if any offload is enabled */
4250	if ((tx_flags & (IGB_TX_FLAGS_CSUM | IGB_TX_FLAGS_VLAN)) &&
4251	    test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
4252		olinfo_status |= tx_ring->reg_idx << 4;
4253
4254	/* insert L4 checksum */
4255	if (tx_flags & IGB_TX_FLAGS_CSUM) {
4256		olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4257
4258		/* insert IPv4 checksum */
4259		if (tx_flags & IGB_TX_FLAGS_IPV4)
4260			olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
4261	}
4262
4263	tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
4264}
4265
4266/*
4267 * The largest size we can write to the descriptor is 65535.  In order to
4268 * maintain a power of two alignment we have to limit ourselves to 32K.
4269 */
4270#define IGB_MAX_TXD_PWR	15
4271#define IGB_MAX_DATA_PER_TXD	(1<<IGB_MAX_TXD_PWR)
4272
4273static void igb_tx_map(struct igb_ring *tx_ring,
4274		       struct igb_tx_buffer *first,
4275		       const u8 hdr_len)
4276{
4277	struct sk_buff *skb = first->skb;
4278	struct igb_tx_buffer *tx_buffer_info;
4279	union e1000_adv_tx_desc *tx_desc;
4280	dma_addr_t dma;
4281	struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0];
4282	unsigned int data_len = skb->data_len;
4283	unsigned int size = skb_headlen(skb);
4284	unsigned int paylen = skb->len - hdr_len;
4285	__le32 cmd_type;
4286	u32 tx_flags = first->tx_flags;
4287	u16 i = tx_ring->next_to_use;
4288
4289	tx_desc = IGB_TX_DESC(tx_ring, i);
4290
4291	igb_tx_olinfo_status(tx_ring, tx_desc, tx_flags, paylen);
4292	cmd_type = igb_tx_cmd_type(tx_flags);
4293
4294	dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
4295	if (dma_mapping_error(tx_ring->dev, dma))
4296		goto dma_error;
4297
4298	/* record length, and DMA address */
4299	first->length = size;
4300	first->dma = dma;
4301	tx_desc->read.buffer_addr = cpu_to_le64(dma);
4302
4303	for (;;) {
4304		while (unlikely(size > IGB_MAX_DATA_PER_TXD)) {
4305			tx_desc->read.cmd_type_len =
4306				cmd_type | cpu_to_le32(IGB_MAX_DATA_PER_TXD);
4307
4308			i++;
4309			tx_desc++;
4310			if (i == tx_ring->count) {
4311				tx_desc = IGB_TX_DESC(tx_ring, 0);
4312				i = 0;
4313			}
4314
4315			dma += IGB_MAX_DATA_PER_TXD;
4316			size -= IGB_MAX_DATA_PER_TXD;
4317
4318			tx_desc->read.olinfo_status = 0;
4319			tx_desc->read.buffer_addr = cpu_to_le64(dma);
4320		}
4321
4322		if (likely(!data_len))
4323			break;
4324
4325		tx_desc->read.cmd_type_len = cmd_type | cpu_to_le32(size);
4326
4327		i++;
4328		tx_desc++;
4329		if (i == tx_ring->count) {
4330			tx_desc = IGB_TX_DESC(tx_ring, 0);
4331			i = 0;
4332		}
4333
4334		size = skb_frag_size(frag);
4335		data_len -= size;
4336
4337		dma = skb_frag_dma_map(tx_ring->dev, frag, 0,
4338				   size, DMA_TO_DEVICE);
4339		if (dma_mapping_error(tx_ring->dev, dma))
4340			goto dma_error;
4341
4342		tx_buffer_info = &tx_ring->tx_buffer_info[i];
4343		tx_buffer_info->length = size;
4344		tx_buffer_info->dma = dma;
4345
4346		tx_desc->read.olinfo_status = 0;
4347		tx_desc->read.buffer_addr = cpu_to_le64(dma);
4348
4349		frag++;
4350	}
4351
4352	netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount);
4353
4354	/* write last descriptor with RS and EOP bits */
4355	cmd_type |= cpu_to_le32(size) | cpu_to_le32(IGB_TXD_DCMD);
4356	if (unlikely(skb->no_fcs))
4357		cmd_type &= ~(cpu_to_le32(E1000_ADVTXD_DCMD_IFCS));
4358	tx_desc->read.cmd_type_len = cmd_type;
4359
4360	/* set the timestamp */
4361	first->time_stamp = jiffies;
4362
4363	/*
4364	 * Force memory writes to complete before letting h/w know there
4365	 * are new descriptors to fetch.  (Only applicable for weak-ordered
4366	 * memory model archs, such as IA-64).
4367	 *
4368	 * We also need this memory barrier to make certain all of the
4369	 * status bits have been updated before next_to_watch is written.
4370	 */
4371	wmb();
4372
4373	/* set next_to_watch value indicating a packet is present */
4374	first->next_to_watch = tx_desc;
4375
4376	i++;
4377	if (i == tx_ring->count)
4378		i = 0;
4379
4380	tx_ring->next_to_use = i;
4381
4382	writel(i, tx_ring->tail);
4383
4384	/* we need this if more than one processor can write to our tail
4385	 * at a time, it syncronizes IO on IA64/Altix systems */
4386	mmiowb();
4387
4388	return;
4389
4390dma_error:
4391	dev_err(tx_ring->dev, "TX DMA map failed\n");
4392
4393	/* clear dma mappings for failed tx_buffer_info map */
4394	for (;;) {
4395		tx_buffer_info = &tx_ring->tx_buffer_info[i];
4396		igb_unmap_and_free_tx_resource(tx_ring, tx_buffer_info);
4397		if (tx_buffer_info == first)
4398			break;
4399		if (i == 0)
4400			i = tx_ring->count;
4401		i--;
4402	}
4403
4404	tx_ring->next_to_use = i;
4405}
4406
4407static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
4408{
4409	struct net_device *netdev = tx_ring->netdev;
4410
4411	netif_stop_subqueue(netdev, tx_ring->queue_index);
4412
4413	/* Herbert's original patch had:
4414	 *  smp_mb__after_netif_stop_queue();
4415	 * but since that doesn't exist yet, just open code it. */
4416	smp_mb();
4417
4418	/* We need to check again in a case another CPU has just
4419	 * made room available. */
4420	if (igb_desc_unused(tx_ring) < size)
4421		return -EBUSY;
4422
4423	/* A reprieve! */
4424	netif_wake_subqueue(netdev, tx_ring->queue_index);
4425
4426	u64_stats_update_begin(&tx_ring->tx_syncp2);
4427	tx_ring->tx_stats.restart_queue2++;
4428	u64_stats_update_end(&tx_ring->tx_syncp2);
4429
4430	return 0;
4431}
4432
4433static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
4434{
4435	if (igb_desc_unused(tx_ring) >= size)
4436		return 0;
4437	return __igb_maybe_stop_tx(tx_ring, size);
4438}
4439
4440netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb,
4441				struct igb_ring *tx_ring)
4442{
4443	struct igb_tx_buffer *first;
4444	int tso;
4445	u32 tx_flags = 0;
4446	__be16 protocol = vlan_get_protocol(skb);
4447	u8 hdr_len = 0;
4448
4449	/* need: 1 descriptor per page,
4450	 *       + 2 desc gap to keep tail from touching head,
4451	 *       + 1 desc for skb->data,
4452	 *       + 1 desc for context descriptor,
4453	 * otherwise try next time */
4454	if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
4455		/* this is a hard error */
4456		return NETDEV_TX_BUSY;
4457	}
4458
4459	/* record the location of the first descriptor for this packet */
4460	first = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
4461	first->skb = skb;
4462	first->bytecount = skb->len;
4463	first->gso_segs = 1;
4464
4465	if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
4466		skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
4467		tx_flags |= IGB_TX_FLAGS_TSTAMP;
4468	}
4469
4470	if (vlan_tx_tag_present(skb)) {
4471		tx_flags |= IGB_TX_FLAGS_VLAN;
4472		tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
4473	}
4474
4475	/* record initial flags and protocol */
4476	first->tx_flags = tx_flags;
4477	first->protocol = protocol;
4478
4479	tso = igb_tso(tx_ring, first, &hdr_len);
4480	if (tso < 0)
4481		goto out_drop;
4482	else if (!tso)
4483		igb_tx_csum(tx_ring, first);
4484
4485	igb_tx_map(tx_ring, first, hdr_len);
4486
4487	/* Make sure there is space in the ring for the next send. */
4488	igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
4489
4490	return NETDEV_TX_OK;
4491
4492out_drop:
4493	igb_unmap_and_free_tx_resource(tx_ring, first);
4494
4495	return NETDEV_TX_OK;
4496}
4497
4498static inline struct igb_ring *igb_tx_queue_mapping(struct igb_adapter *adapter,
4499						    struct sk_buff *skb)
4500{
4501	unsigned int r_idx = skb->queue_mapping;
4502
4503	if (r_idx >= adapter->num_tx_queues)
4504		r_idx = r_idx % adapter->num_tx_queues;
4505
4506	return adapter->tx_ring[r_idx];
4507}
4508
4509static netdev_tx_t igb_xmit_frame(struct sk_buff *skb,
4510				  struct net_device *netdev)
4511{
4512	struct igb_adapter *adapter = netdev_priv(netdev);
4513
4514	if (test_bit(__IGB_DOWN, &adapter->state)) {
4515		dev_kfree_skb_any(skb);
4516		return NETDEV_TX_OK;
4517	}
4518
4519	if (skb->len <= 0) {
4520		dev_kfree_skb_any(skb);
4521		return NETDEV_TX_OK;
4522	}
4523
4524	/*
4525	 * The minimum packet size with TCTL.PSP set is 17 so pad the skb
4526	 * in order to meet this minimum size requirement.
4527	 */
4528	if (skb->len < 17) {
4529		if (skb_padto(skb, 17))
4530			return NETDEV_TX_OK;
4531		skb->len = 17;
4532	}
4533
4534	return igb_xmit_frame_ring(skb, igb_tx_queue_mapping(adapter, skb));
4535}
4536
4537/**
4538 * igb_tx_timeout - Respond to a Tx Hang
4539 * @netdev: network interface device structure
4540 **/
4541static void igb_tx_timeout(struct net_device *netdev)
4542{
4543	struct igb_adapter *adapter = netdev_priv(netdev);
4544	struct e1000_hw *hw = &adapter->hw;
4545
4546	/* Do the reset outside of interrupt context */
4547	adapter->tx_timeout_count++;
4548
4549	if (hw->mac.type >= e1000_82580)
4550		hw->dev_spec._82575.global_device_reset = true;
4551
4552	schedule_work(&adapter->reset_task);
4553	wr32(E1000_EICS,
4554	     (adapter->eims_enable_mask & ~adapter->eims_other));
4555}
4556
4557static void igb_reset_task(struct work_struct *work)
4558{
4559	struct igb_adapter *adapter;
4560	adapter = container_of(work, struct igb_adapter, reset_task);
4561
4562	igb_dump(adapter);
4563	netdev_err(adapter->netdev, "Reset adapter\n");
4564	igb_reinit_locked(adapter);
4565}
4566
4567/**
4568 * igb_get_stats64 - Get System Network Statistics
4569 * @netdev: network interface device structure
4570 * @stats: rtnl_link_stats64 pointer
4571 *
4572 **/
4573static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *netdev,
4574						 struct rtnl_link_stats64 *stats)
4575{
4576	struct igb_adapter *adapter = netdev_priv(netdev);
4577
4578	spin_lock(&adapter->stats64_lock);
4579	igb_update_stats(adapter, &adapter->stats64);
4580	memcpy(stats, &adapter->stats64, sizeof(*stats));
4581	spin_unlock(&adapter->stats64_lock);
4582
4583	return stats;
4584}
4585
4586/**
4587 * igb_change_mtu - Change the Maximum Transfer Unit
4588 * @netdev: network interface device structure
4589 * @new_mtu: new value for maximum frame size
4590 *
4591 * Returns 0 on success, negative on failure
4592 **/
4593static int igb_change_mtu(struct net_device *netdev, int new_mtu)
4594{
4595	struct igb_adapter *adapter = netdev_priv(netdev);
4596	struct pci_dev *pdev = adapter->pdev;
4597	int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
4598
4599	if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
4600		dev_err(&pdev->dev, "Invalid MTU setting\n");
4601		return -EINVAL;
4602	}
4603
4604#define MAX_STD_JUMBO_FRAME_SIZE 9238
4605	if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
4606		dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
4607		return -EINVAL;
4608	}
4609
4610	while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
4611		msleep(1);
4612
4613	/* igb_down has a dependency on max_frame_size */
4614	adapter->max_frame_size = max_frame;
4615
4616	if (netif_running(netdev))
4617		igb_down(adapter);
4618
4619	dev_info(&pdev->dev, "changing MTU from %d to %d\n",
4620		 netdev->mtu, new_mtu);
4621	netdev->mtu = new_mtu;
4622
4623	if (netif_running(netdev))
4624		igb_up(adapter);
4625	else
4626		igb_reset(adapter);
4627
4628	clear_bit(__IGB_RESETTING, &adapter->state);
4629
4630	return 0;
4631}
4632
4633/**
4634 * igb_update_stats - Update the board statistics counters
4635 * @adapter: board private structure
4636 **/
4637
4638void igb_update_stats(struct igb_adapter *adapter,
4639		      struct rtnl_link_stats64 *net_stats)
4640{
4641	struct e1000_hw *hw = &adapter->hw;
4642	struct pci_dev *pdev = adapter->pdev;
4643	u32 reg, mpc;
4644	u16 phy_tmp;
4645	int i;
4646	u64 bytes, packets;
4647	unsigned int start;
4648	u64 _bytes, _packets;
4649
4650#define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
4651
4652	/*
4653	 * Prevent stats update while adapter is being reset, or if the pci
4654	 * connection is down.
4655	 */
4656	if (adapter->link_speed == 0)
4657		return;
4658	if (pci_channel_offline(pdev))
4659		return;
4660
4661	bytes = 0;
4662	packets = 0;
4663	for (i = 0; i < adapter->num_rx_queues; i++) {
4664		u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
4665		struct igb_ring *ring = adapter->rx_ring[i];
4666
4667		ring->rx_stats.drops += rqdpc_tmp;
4668		net_stats->rx_fifo_errors += rqdpc_tmp;
4669
4670		do {
4671			start = u64_stats_fetch_begin_bh(&ring->rx_syncp);
4672			_bytes = ring->rx_stats.bytes;
4673			_packets = ring->rx_stats.packets;
4674		} while (u64_stats_fetch_retry_bh(&ring->rx_syncp, start));
4675		bytes += _bytes;
4676		packets += _packets;
4677	}
4678
4679	net_stats->rx_bytes = bytes;
4680	net_stats->rx_packets = packets;
4681
4682	bytes = 0;
4683	packets = 0;
4684	for (i = 0; i < adapter->num_tx_queues; i++) {
4685		struct igb_ring *ring = adapter->tx_ring[i];
4686		do {
4687			start = u64_stats_fetch_begin_bh(&ring->tx_syncp);
4688			_bytes = ring->tx_stats.bytes;
4689			_packets = ring->tx_stats.packets;
4690		} while (u64_stats_fetch_retry_bh(&ring->tx_syncp, start));
4691		bytes += _bytes;
4692		packets += _packets;
4693	}
4694	net_stats->tx_bytes = bytes;
4695	net_stats->tx_packets = packets;
4696
4697	/* read stats registers */
4698	adapter->stats.crcerrs += rd32(E1000_CRCERRS);
4699	adapter->stats.gprc += rd32(E1000_GPRC);
4700	adapter->stats.gorc += rd32(E1000_GORCL);
4701	rd32(E1000_GORCH); /* clear GORCL */
4702	adapter->stats.bprc += rd32(E1000_BPRC);
4703	adapter->stats.mprc += rd32(E1000_MPRC);
4704	adapter->stats.roc += rd32(E1000_ROC);
4705
4706	adapter->stats.prc64 += rd32(E1000_PRC64);
4707	adapter->stats.prc127 += rd32(E1000_PRC127);
4708	adapter->stats.prc255 += rd32(E1000_PRC255);
4709	adapter->stats.prc511 += rd32(E1000_PRC511);
4710	adapter->stats.prc1023 += rd32(E1000_PRC1023);
4711	adapter->stats.prc1522 += rd32(E1000_PRC1522);
4712	adapter->stats.symerrs += rd32(E1000_SYMERRS);
4713	adapter->stats.sec += rd32(E1000_SEC);
4714
4715	mpc = rd32(E1000_MPC);
4716	adapter->stats.mpc += mpc;
4717	net_stats->rx_fifo_errors += mpc;
4718	adapter->stats.scc += rd32(E1000_SCC);
4719	adapter->stats.ecol += rd32(E1000_ECOL);
4720	adapter->stats.mcc += rd32(E1000_MCC);
4721	adapter->stats.latecol += rd32(E1000_LATECOL);
4722	adapter->stats.dc += rd32(E1000_DC);
4723	adapter->stats.rlec += rd32(E1000_RLEC);
4724	adapter->stats.xonrxc += rd32(E1000_XONRXC);
4725	adapter->stats.xontxc += rd32(E1000_XONTXC);
4726	adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
4727	adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
4728	adapter->stats.fcruc += rd32(E1000_FCRUC);
4729	adapter->stats.gptc += rd32(E1000_GPTC);
4730	adapter->stats.gotc += rd32(E1000_GOTCL);
4731	rd32(E1000_GOTCH); /* clear GOTCL */
4732	adapter->stats.rnbc += rd32(E1000_RNBC);
4733	adapter->stats.ruc += rd32(E1000_RUC);
4734	adapter->stats.rfc += rd32(E1000_RFC);
4735	adapter->stats.rjc += rd32(E1000_RJC);
4736	adapter->stats.tor += rd32(E1000_TORH);
4737	adapter->stats.tot += rd32(E1000_TOTH);
4738	adapter->stats.tpr += rd32(E1000_TPR);
4739
4740	adapter->stats.ptc64 += rd32(E1000_PTC64);
4741	adapter->stats.ptc127 += rd32(E1000_PTC127);
4742	adapter->stats.ptc255 += rd32(E1000_PTC255);
4743	adapter->stats.ptc511 += rd32(E1000_PTC511);
4744	adapter->stats.ptc1023 += rd32(E1000_PTC1023);
4745	adapter->stats.ptc1522 += rd32(E1000_PTC1522);
4746
4747	adapter->stats.mptc += rd32(E1000_MPTC);
4748	adapter->stats.bptc += rd32(E1000_BPTC);
4749
4750	adapter->stats.tpt += rd32(E1000_TPT);
4751	adapter->stats.colc += rd32(E1000_COLC);
4752
4753	adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
4754	/* read internal phy specific stats */
4755	reg = rd32(E1000_CTRL_EXT);
4756	if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
4757		adapter->stats.rxerrc += rd32(E1000_RXERRC);
4758		adapter->stats.tncrs += rd32(E1000_TNCRS);
4759	}
4760
4761	adapter->stats.tsctc += rd32(E1000_TSCTC);
4762	adapter->stats.tsctfc += rd32(E1000_TSCTFC);
4763
4764	adapter->stats.iac += rd32(E1000_IAC);
4765	adapter->stats.icrxoc += rd32(E1000_ICRXOC);
4766	adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
4767	adapter->stats.icrxatc += rd32(E1000_ICRXATC);
4768	adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
4769	adapter->stats.ictxatc += rd32(E1000_ICTXATC);
4770	adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
4771	adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
4772	adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
4773
4774	/* Fill out the OS statistics structure */
4775	net_stats->multicast = adapter->stats.mprc;
4776	net_stats->collisions = adapter->stats.colc;
4777
4778	/* Rx Errors */
4779
4780	/* RLEC on some newer hardware can be incorrect so build
4781	 * our own version based on RUC and ROC */
4782	net_stats->rx_errors = adapter->stats.rxerrc +
4783		adapter->stats.crcerrs + adapter->stats.algnerrc +
4784		adapter->stats.ruc + adapter->stats.roc +
4785		adapter->stats.cexterr;
4786	net_stats->rx_length_errors = adapter->stats.ruc +
4787				      adapter->stats.roc;
4788	net_stats->rx_crc_errors = adapter->stats.crcerrs;
4789	net_stats->rx_frame_errors = adapter->stats.algnerrc;
4790	net_stats->rx_missed_errors = adapter->stats.mpc;
4791
4792	/* Tx Errors */
4793	net_stats->tx_errors = adapter->stats.ecol +
4794			       adapter->stats.latecol;
4795	net_stats->tx_aborted_errors = adapter->stats.ecol;
4796	net_stats->tx_window_errors = adapter->stats.latecol;
4797	net_stats->tx_carrier_errors = adapter->stats.tncrs;
4798
4799	/* Tx Dropped needs to be maintained elsewhere */
4800
4801	/* Phy Stats */
4802	if (hw->phy.media_type == e1000_media_type_copper) {
4803		if ((adapter->link_speed == SPEED_1000) &&
4804		   (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
4805			phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
4806			adapter->phy_stats.idle_errors += phy_tmp;
4807		}
4808	}
4809
4810	/* Management Stats */
4811	adapter->stats.mgptc += rd32(E1000_MGTPTC);
4812	adapter->stats.mgprc += rd32(E1000_MGTPRC);
4813	adapter->stats.mgpdc += rd32(E1000_MGTPDC);
4814
4815	/* OS2BMC Stats */
4816	reg = rd32(E1000_MANC);
4817	if (reg & E1000_MANC_EN_BMC2OS) {
4818		adapter->stats.o2bgptc += rd32(E1000_O2BGPTC);
4819		adapter->stats.o2bspc += rd32(E1000_O2BSPC);
4820		adapter->stats.b2ospc += rd32(E1000_B2OSPC);
4821		adapter->stats.b2ogprc += rd32(E1000_B2OGPRC);
4822	}
4823}
4824
4825static irqreturn_t igb_msix_other(int irq, void *data)
4826{
4827	struct igb_adapter *adapter = data;
4828	struct e1000_hw *hw = &adapter->hw;
4829	u32 icr = rd32(E1000_ICR);
4830	/* reading ICR causes bit 31 of EICR to be cleared */
4831
4832	if (icr & E1000_ICR_DRSTA)
4833		schedule_work(&adapter->reset_task);
4834
4835	if (icr & E1000_ICR_DOUTSYNC) {
4836		/* HW is reporting DMA is out of sync */
4837		adapter->stats.doosync++;
4838		/* The DMA Out of Sync is also indication of a spoof event
4839		 * in IOV mode. Check the Wrong VM Behavior register to
4840		 * see if it is really a spoof event. */
4841		igb_check_wvbr(adapter);
4842	}
4843
4844	/* Check for a mailbox event */
4845	if (icr & E1000_ICR_VMMB)
4846		igb_msg_task(adapter);
4847
4848	if (icr & E1000_ICR_LSC) {
4849		hw->mac.get_link_status = 1;
4850		/* guard against interrupt when we're going down */
4851		if (!test_bit(__IGB_DOWN, &adapter->state))
4852			mod_timer(&adapter->watchdog_timer, jiffies + 1);
4853	}
4854
4855	wr32(E1000_EIMS, adapter->eims_other);
4856
4857	return IRQ_HANDLED;
4858}
4859
4860static void igb_write_itr(struct igb_q_vector *q_vector)
4861{
4862	struct igb_adapter *adapter = q_vector->adapter;
4863	u32 itr_val = q_vector->itr_val & 0x7FFC;
4864
4865	if (!q_vector->set_itr)
4866		return;
4867
4868	if (!itr_val)
4869		itr_val = 0x4;
4870
4871	if (adapter->hw.mac.type == e1000_82575)
4872		itr_val |= itr_val << 16;
4873	else
4874		itr_val |= E1000_EITR_CNT_IGNR;
4875
4876	writel(itr_val, q_vector->itr_register);
4877	q_vector->set_itr = 0;
4878}
4879
4880static irqreturn_t igb_msix_ring(int irq, void *data)
4881{
4882	struct igb_q_vector *q_vector = data;
4883
4884	/* Write the ITR value calculated from the previous interrupt. */
4885	igb_write_itr(q_vector);
4886
4887	napi_schedule(&q_vector->napi);
4888
4889	return IRQ_HANDLED;
4890}
4891
4892#ifdef CONFIG_IGB_DCA
4893static void igb_update_dca(struct igb_q_vector *q_vector)
4894{
4895	struct igb_adapter *adapter = q_vector->adapter;
4896	struct e1000_hw *hw = &adapter->hw;
4897	int cpu = get_cpu();
4898
4899	if (q_vector->cpu == cpu)
4900		goto out_no_update;
4901
4902	if (q_vector->tx.ring) {
4903		int q = q_vector->tx.ring->reg_idx;
4904		u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4905		if (hw->mac.type == e1000_82575) {
4906			dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4907			dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4908		} else {
4909			dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4910			dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4911			              E1000_DCA_TXCTRL_CPUID_SHIFT;
4912		}
4913		dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4914		wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4915	}
4916	if (q_vector->rx.ring) {
4917		int q = q_vector->rx.ring->reg_idx;
4918		u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4919		if (hw->mac.type == e1000_82575) {
4920			dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4921			dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4922		} else {
4923			dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4924			dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4925			              E1000_DCA_RXCTRL_CPUID_SHIFT;
4926		}
4927		dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4928		dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4929		dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4930		wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4931	}
4932	q_vector->cpu = cpu;
4933out_no_update:
4934	put_cpu();
4935}
4936
4937static void igb_setup_dca(struct igb_adapter *adapter)
4938{
4939	struct e1000_hw *hw = &adapter->hw;
4940	int i;
4941
4942	if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4943		return;
4944
4945	/* Always use CB2 mode, difference is masked in the CB driver. */
4946	wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4947
4948	for (i = 0; i < adapter->num_q_vectors; i++) {
4949		adapter->q_vector[i]->cpu = -1;
4950		igb_update_dca(adapter->q_vector[i]);
4951	}
4952}
4953
4954static int __igb_notify_dca(struct device *dev, void *data)
4955{
4956	struct net_device *netdev = dev_get_drvdata(dev);
4957	struct igb_adapter *adapter = netdev_priv(netdev);
4958	struct pci_dev *pdev = adapter->pdev;
4959	struct e1000_hw *hw = &adapter->hw;
4960	unsigned long event = *(unsigned long *)data;
4961
4962	switch (event) {
4963	case DCA_PROVIDER_ADD:
4964		/* if already enabled, don't do it again */
4965		if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4966			break;
4967		if (dca_add_requester(dev) == 0) {
4968			adapter->flags |= IGB_FLAG_DCA_ENABLED;
4969			dev_info(&pdev->dev, "DCA enabled\n");
4970			igb_setup_dca(adapter);
4971			break;
4972		}
4973		/* Fall Through since DCA is disabled. */
4974	case DCA_PROVIDER_REMOVE:
4975		if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4976			/* without this a class_device is left
4977			 * hanging around in the sysfs model */
4978			dca_remove_requester(dev);
4979			dev_info(&pdev->dev, "DCA disabled\n");
4980			adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4981			wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4982		}
4983		break;
4984	}
4985
4986	return 0;
4987}
4988
4989static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4990                          void *p)
4991{
4992	int ret_val;
4993
4994	ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4995	                                 __igb_notify_dca);
4996
4997	return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4998}
4999#endif /* CONFIG_IGB_DCA */
5000
5001#ifdef CONFIG_PCI_IOV
5002static int igb_vf_configure(struct igb_adapter *adapter, int vf)
5003{
5004	unsigned char mac_addr[ETH_ALEN];
5005	struct pci_dev *pdev = adapter->pdev;
5006	struct e1000_hw *hw = &adapter->hw;
5007	struct pci_dev *pvfdev;
5008	unsigned int device_id;
5009	u16 thisvf_devfn;
5010
5011	eth_random_addr(mac_addr);
5012	igb_set_vf_mac(adapter, vf, mac_addr);
5013
5014	switch (adapter->hw.mac.type) {
5015	case e1000_82576:
5016		device_id = IGB_82576_VF_DEV_ID;
5017		/* VF Stride for 82576 is 2 */
5018		thisvf_devfn = (pdev->devfn + 0x80 + (vf << 1)) |
5019			(pdev->devfn & 1);
5020		break;
5021	case e1000_i350:
5022		device_id = IGB_I350_VF_DEV_ID;
5023		/* VF Stride for I350 is 4 */
5024		thisvf_devfn = (pdev->devfn + 0x80 + (vf << 2)) |
5025				(pdev->devfn & 3);
5026		break;
5027	default:
5028		device_id = 0;
5029		thisvf_devfn = 0;
5030		break;
5031	}
5032
5033	pvfdev = pci_get_device(hw->vendor_id, device_id, NULL);
5034	while (pvfdev) {
5035		if (pvfdev->devfn == thisvf_devfn)
5036			break;
5037		pvfdev = pci_get_device(hw->vendor_id,
5038					device_id, pvfdev);
5039	}
5040
5041	if (pvfdev)
5042		adapter->vf_data[vf].vfdev = pvfdev;
5043	else
5044		dev_err(&pdev->dev,
5045			"Couldn't find pci dev ptr for VF %4.4x\n",
5046			thisvf_devfn);
5047	return pvfdev != NULL;
5048}
5049
5050static int igb_find_enabled_vfs(struct igb_adapter *adapter)
5051{
5052	struct e1000_hw *hw = &adapter->hw;
5053	struct pci_dev *pdev = adapter->pdev;
5054	struct pci_dev *pvfdev;
5055	u16 vf_devfn = 0;
5056	u16 vf_stride;
5057	unsigned int device_id;
5058	int vfs_found = 0;
5059
5060	switch (adapter->hw.mac.type) {
5061	case e1000_82576:
5062		device_id = IGB_82576_VF_DEV_ID;
5063		/* VF Stride for 82576 is 2 */
5064		vf_stride = 2;
5065		break;
5066	case e1000_i350:
5067		device_id = IGB_I350_VF_DEV_ID;
5068		/* VF Stride for I350 is 4 */
5069		vf_stride = 4;
5070		break;
5071	default:
5072		device_id = 0;
5073		vf_stride = 0;
5074		break;
5075	}
5076
5077	vf_devfn = pdev->devfn + 0x80;
5078	pvfdev = pci_get_device(hw->vendor_id, device_id, NULL);
5079	while (pvfdev) {
5080		if (pvfdev->devfn == vf_devfn &&
5081		    (pvfdev->bus->number >= pdev->bus->number))
5082			vfs_found++;
5083		vf_devfn += vf_stride;
5084		pvfdev = pci_get_device(hw->vendor_id,
5085					device_id, pvfdev);
5086	}
5087
5088	return vfs_found;
5089}
5090
5091static int igb_check_vf_assignment(struct igb_adapter *adapter)
5092{
5093	int i;
5094	for (i = 0; i < adapter->vfs_allocated_count; i++) {
5095		if (adapter->vf_data[i].vfdev) {
5096			if (adapter->vf_data[i].vfdev->dev_flags &
5097			    PCI_DEV_FLAGS_ASSIGNED)
5098				return true;
5099		}
5100	}
5101	return false;
5102}
5103
5104#endif
5105static void igb_ping_all_vfs(struct igb_adapter *adapter)
5106{
5107	struct e1000_hw *hw = &adapter->hw;
5108	u32 ping;
5109	int i;
5110
5111	for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
5112		ping = E1000_PF_CONTROL_MSG;
5113		if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
5114			ping |= E1000_VT_MSGTYPE_CTS;
5115		igb_write_mbx(hw, &ping, 1, i);
5116	}
5117}
5118
5119static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5120{
5121	struct e1000_hw *hw = &adapter->hw;
5122	u32 vmolr = rd32(E1000_VMOLR(vf));
5123	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5124
5125	vf_data->flags &= ~(IGB_VF_FLAG_UNI_PROMISC |
5126	                    IGB_VF_FLAG_MULTI_PROMISC);
5127	vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
5128
5129	if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
5130		vmolr |= E1000_VMOLR_MPME;
5131		vf_data->flags |= IGB_VF_FLAG_MULTI_PROMISC;
5132		*msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
5133	} else {
5134		/*
5135		 * if we have hashes and we are clearing a multicast promisc
5136		 * flag we need to write the hashes to the MTA as this step
5137		 * was previously skipped
5138		 */
5139		if (vf_data->num_vf_mc_hashes > 30) {
5140			vmolr |= E1000_VMOLR_MPME;
5141		} else if (vf_data->num_vf_mc_hashes) {
5142			int j;
5143			vmolr |= E1000_VMOLR_ROMPE;
5144			for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
5145				igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
5146		}
5147	}
5148
5149	wr32(E1000_VMOLR(vf), vmolr);
5150
5151	/* there are flags left unprocessed, likely not supported */
5152	if (*msgbuf & E1000_VT_MSGINFO_MASK)
5153		return -EINVAL;
5154
5155	return 0;
5156
5157}
5158
5159static int igb_set_vf_multicasts(struct igb_adapter *adapter,
5160				  u32 *msgbuf, u32 vf)
5161{
5162	int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5163	u16 *hash_list = (u16 *)&msgbuf[1];
5164	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5165	int i;
5166
5167	/* salt away the number of multicast addresses assigned
5168	 * to this VF for later use to restore when the PF multi cast
5169	 * list changes
5170	 */
5171	vf_data->num_vf_mc_hashes = n;
5172
5173	/* only up to 30 hash values supported */
5174	if (n > 30)
5175		n = 30;
5176
5177	/* store the hashes for later use */
5178	for (i = 0; i < n; i++)
5179		vf_data->vf_mc_hashes[i] = hash_list[i];
5180
5181	/* Flush and reset the mta with the new values */
5182	igb_set_rx_mode(adapter->netdev);
5183
5184	return 0;
5185}
5186
5187static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
5188{
5189	struct e1000_hw *hw = &adapter->hw;
5190	struct vf_data_storage *vf_data;
5191	int i, j;
5192
5193	for (i = 0; i < adapter->vfs_allocated_count; i++) {
5194		u32 vmolr = rd32(E1000_VMOLR(i));
5195		vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
5196
5197		vf_data = &adapter->vf_data[i];
5198
5199		if ((vf_data->num_vf_mc_hashes > 30) ||
5200		    (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
5201			vmolr |= E1000_VMOLR_MPME;
5202		} else if (vf_data->num_vf_mc_hashes) {
5203			vmolr |= E1000_VMOLR_ROMPE;
5204			for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
5205				igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
5206		}
5207		wr32(E1000_VMOLR(i), vmolr);
5208	}
5209}
5210
5211static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
5212{
5213	struct e1000_hw *hw = &adapter->hw;
5214	u32 pool_mask, reg, vid;
5215	int i;
5216
5217	pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5218
5219	/* Find the vlan filter for this id */
5220	for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5221		reg = rd32(E1000_VLVF(i));
5222
5223		/* remove the vf from the pool */
5224		reg &= ~pool_mask;
5225
5226		/* if pool is empty then remove entry from vfta */
5227		if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
5228		    (reg & E1000_VLVF_VLANID_ENABLE)) {
5229			reg = 0;
5230			vid = reg & E1000_VLVF_VLANID_MASK;
5231			igb_vfta_set(hw, vid, false);
5232		}
5233
5234		wr32(E1000_VLVF(i), reg);
5235	}
5236
5237	adapter->vf_data[vf].vlans_enabled = 0;
5238}
5239
5240static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
5241{
5242	struct e1000_hw *hw = &adapter->hw;
5243	u32 reg, i;
5244
5245	/* The vlvf table only exists on 82576 hardware and newer */
5246	if (hw->mac.type < e1000_82576)
5247		return -1;
5248
5249	/* we only need to do this if VMDq is enabled */
5250	if (!adapter->vfs_allocated_count)
5251		return -1;
5252
5253	/* Find the vlan filter for this id */
5254	for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5255		reg = rd32(E1000_VLVF(i));
5256		if ((reg & E1000_VLVF_VLANID_ENABLE) &&
5257		    vid == (reg & E1000_VLVF_VLANID_MASK))
5258			break;
5259	}
5260
5261	if (add) {
5262		if (i == E1000_VLVF_ARRAY_SIZE) {
5263			/* Did not find a matching VLAN ID entry that was
5264			 * enabled.  Search for a free filter entry, i.e.
5265			 * one without the enable bit set
5266			 */
5267			for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5268				reg = rd32(E1000_VLVF(i));
5269				if (!(reg & E1000_VLVF_VLANID_ENABLE))
5270					break;
5271			}
5272		}
5273		if (i < E1000_VLVF_ARRAY_SIZE) {
5274			/* Found an enabled/available entry */
5275			reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5276
5277			/* if !enabled we need to set this up in vfta */
5278			if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
5279				/* add VID to filter table */
5280				igb_vfta_set(hw, vid, true);
5281				reg |= E1000_VLVF_VLANID_ENABLE;
5282			}
5283			reg &= ~E1000_VLVF_VLANID_MASK;
5284			reg |= vid;
5285			wr32(E1000_VLVF(i), reg);
5286
5287			/* do not modify RLPML for PF devices */
5288			if (vf >= adapter->vfs_allocated_count)
5289				return 0;
5290
5291			if (!adapter->vf_data[vf].vlans_enabled) {
5292				u32 size;
5293				reg = rd32(E1000_VMOLR(vf));
5294				size = reg & E1000_VMOLR_RLPML_MASK;
5295				size += 4;
5296				reg &= ~E1000_VMOLR_RLPML_MASK;
5297				reg |= size;
5298				wr32(E1000_VMOLR(vf), reg);
5299			}
5300
5301			adapter->vf_data[vf].vlans_enabled++;
5302		}
5303	} else {
5304		if (i < E1000_VLVF_ARRAY_SIZE) {
5305			/* remove vf from the pool */
5306			reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
5307			/* if pool is empty then remove entry from vfta */
5308			if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
5309				reg = 0;
5310				igb_vfta_set(hw, vid, false);
5311			}
5312			wr32(E1000_VLVF(i), reg);
5313
5314			/* do not modify RLPML for PF devices */
5315			if (vf >= adapter->vfs_allocated_count)
5316				return 0;
5317
5318			adapter->vf_data[vf].vlans_enabled--;
5319			if (!adapter->vf_data[vf].vlans_enabled) {
5320				u32 size;
5321				reg = rd32(E1000_VMOLR(vf));
5322				size = reg & E1000_VMOLR_RLPML_MASK;
5323				size -= 4;
5324				reg &= ~E1000_VMOLR_RLPML_MASK;
5325				reg |= size;
5326				wr32(E1000_VMOLR(vf), reg);
5327			}
5328		}
5329	}
5330	return 0;
5331}
5332
5333static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
5334{
5335	struct e1000_hw *hw = &adapter->hw;
5336
5337	if (vid)
5338		wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
5339	else
5340		wr32(E1000_VMVIR(vf), 0);
5341}
5342
5343static int igb_ndo_set_vf_vlan(struct net_device *netdev,
5344			       int vf, u16 vlan, u8 qos)
5345{
5346	int err = 0;
5347	struct igb_adapter *adapter = netdev_priv(netdev);
5348
5349	if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
5350		return -EINVAL;
5351	if (vlan || qos) {
5352		err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
5353		if (err)
5354			goto out;
5355		igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
5356		igb_set_vmolr(adapter, vf, !vlan);
5357		adapter->vf_data[vf].pf_vlan = vlan;
5358		adapter->vf_data[vf].pf_qos = qos;
5359		dev_info(&adapter->pdev->dev,
5360			 "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
5361		if (test_bit(__IGB_DOWN, &adapter->state)) {
5362			dev_warn(&adapter->pdev->dev,
5363				 "The VF VLAN has been set,"
5364				 " but the PF device is not up.\n");
5365			dev_warn(&adapter->pdev->dev,
5366				 "Bring the PF device up before"
5367				 " attempting to use the VF device.\n");
5368		}
5369	} else {
5370		igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
5371				   false, vf);
5372		igb_set_vmvir(adapter, vlan, vf);
5373		igb_set_vmolr(adapter, vf, true);
5374		adapter->vf_data[vf].pf_vlan = 0;
5375		adapter->vf_data[vf].pf_qos = 0;
5376       }
5377out:
5378       return err;
5379}
5380
5381static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5382{
5383	int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5384	int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
5385
5386	return igb_vlvf_set(adapter, vid, add, vf);
5387}
5388
5389static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
5390{
5391	/* clear flags - except flag that indicates PF has set the MAC */
5392	adapter->vf_data[vf].flags &= IGB_VF_FLAG_PF_SET_MAC;
5393	adapter->vf_data[vf].last_nack = jiffies;
5394
5395	/* reset offloads to defaults */
5396	igb_set_vmolr(adapter, vf, true);
5397
5398	/* reset vlans for device */
5399	igb_clear_vf_vfta(adapter, vf);
5400	if (adapter->vf_data[vf].pf_vlan)
5401		igb_ndo_set_vf_vlan(adapter->netdev, vf,
5402				    adapter->vf_data[vf].pf_vlan,
5403				    adapter->vf_data[vf].pf_qos);
5404	else
5405		igb_clear_vf_vfta(adapter, vf);
5406
5407	/* reset multicast table array for vf */
5408	adapter->vf_data[vf].num_vf_mc_hashes = 0;
5409
5410	/* Flush and reset the mta with the new values */
5411	igb_set_rx_mode(adapter->netdev);
5412}
5413
5414static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
5415{
5416	unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5417
5418	/* generate a new mac address as we were hotplug removed/added */
5419	if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
5420		eth_random_addr(vf_mac);
5421
5422	/* process remaining reset events */
5423	igb_vf_reset(adapter, vf);
5424}
5425
5426static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
5427{
5428	struct e1000_hw *hw = &adapter->hw;
5429	unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5430	int rar_entry = hw->mac.rar_entry_count - (vf + 1);
5431	u32 reg, msgbuf[3];
5432	u8 *addr = (u8 *)(&msgbuf[1]);
5433
5434	/* process all the same items cleared in a function level reset */
5435	igb_vf_reset(adapter, vf);
5436
5437	/* set vf mac address */
5438	igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
5439
5440	/* enable transmit and receive for vf */
5441	reg = rd32(E1000_VFTE);
5442	wr32(E1000_VFTE, reg | (1 << vf));
5443	reg = rd32(E1000_VFRE);
5444	wr32(E1000_VFRE, reg | (1 << vf));
5445
5446	adapter->vf_data[vf].flags |= IGB_VF_FLAG_CTS;
5447
5448	/* reply to reset with ack and vf mac address */
5449	msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
5450	memcpy(addr, vf_mac, 6);
5451	igb_write_mbx(hw, msgbuf, 3, vf);
5452}
5453
5454static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
5455{
5456	/*
5457	 * The VF MAC Address is stored in a packed array of bytes
5458	 * starting at the second 32 bit word of the msg array
5459	 */
5460	unsigned char *addr = (char *)&msg[1];
5461	int err = -1;
5462
5463	if (is_valid_ether_addr(addr))
5464		err = igb_set_vf_mac(adapter, vf, addr);
5465
5466	return err;
5467}
5468
5469static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
5470{
5471	struct e1000_hw *hw = &adapter->hw;
5472	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5473	u32 msg = E1000_VT_MSGTYPE_NACK;
5474
5475	/* if device isn't clear to send it shouldn't be reading either */
5476	if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
5477	    time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
5478		igb_write_mbx(hw, &msg, 1, vf);
5479		vf_data->last_nack = jiffies;
5480	}
5481}
5482
5483static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
5484{
5485	struct pci_dev *pdev = adapter->pdev;
5486	u32 msgbuf[E1000_VFMAILBOX_SIZE];
5487	struct e1000_hw *hw = &adapter->hw;
5488	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5489	s32 retval;
5490
5491	retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
5492
5493	if (retval) {
5494		/* if receive failed revoke VF CTS stats and restart init */
5495		dev_err(&pdev->dev, "Error receiving message from VF\n");
5496		vf_data->flags &= ~IGB_VF_FLAG_CTS;
5497		if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5498			return;
5499		goto out;
5500	}
5501
5502	/* this is a message we already processed, do nothing */
5503	if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
5504		return;
5505
5506	/*
5507	 * until the vf completes a reset it should not be
5508	 * allowed to start any configuration.
5509	 */
5510
5511	if (msgbuf[0] == E1000_VF_RESET) {
5512		igb_vf_reset_msg(adapter, vf);
5513		return;
5514	}
5515
5516	if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
5517		if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5518			return;
5519		retval = -1;
5520		goto out;
5521	}
5522
5523	switch ((msgbuf[0] & 0xFFFF)) {
5524	case E1000_VF_SET_MAC_ADDR:
5525		retval = -EINVAL;
5526		if (!(vf_data->flags & IGB_VF_FLAG_PF_SET_MAC))
5527			retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
5528		else
5529			dev_warn(&pdev->dev,
5530				 "VF %d attempted to override administratively "
5531				 "set MAC address\nReload the VF driver to "
5532				 "resume operations\n", vf);
5533		break;
5534	case E1000_VF_SET_PROMISC:
5535		retval = igb_set_vf_promisc(adapter, msgbuf, vf);
5536		break;
5537	case E1000_VF_SET_MULTICAST:
5538		retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
5539		break;
5540	case E1000_VF_SET_LPE:
5541		retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
5542		break;
5543	case E1000_VF_SET_VLAN:
5544		retval = -1;
5545		if (vf_data->pf_vlan)
5546			dev_warn(&pdev->dev,
5547				 "VF %d attempted to override administratively "
5548				 "set VLAN tag\nReload the VF driver to "
5549				 "resume operations\n", vf);
5550		else
5551			retval = igb_set_vf_vlan(adapter, msgbuf, vf);
5552		break;
5553	default:
5554		dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
5555		retval = -1;
5556		break;
5557	}
5558
5559	msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
5560out:
5561	/* notify the VF of the results of what it sent us */
5562	if (retval)
5563		msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
5564	else
5565		msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
5566
5567	igb_write_mbx(hw, msgbuf, 1, vf);
5568}
5569
5570static void igb_msg_task(struct igb_adapter *adapter)
5571{
5572	struct e1000_hw *hw = &adapter->hw;
5573	u32 vf;
5574
5575	for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
5576		/* process any reset requests */
5577		if (!igb_check_for_rst(hw, vf))
5578			igb_vf_reset_event(adapter, vf);
5579
5580		/* process any messages pending */
5581		if (!igb_check_for_msg(hw, vf))
5582			igb_rcv_msg_from_vf(adapter, vf);
5583
5584		/* process any acks */
5585		if (!igb_check_for_ack(hw, vf))
5586			igb_rcv_ack_from_vf(adapter, vf);
5587	}
5588}
5589
5590/**
5591 *  igb_set_uta - Set unicast filter table address
5592 *  @adapter: board private structure
5593 *
5594 *  The unicast table address is a register array of 32-bit registers.
5595 *  The table is meant to be used in a way similar to how the MTA is used
5596 *  however due to certain limitations in the hardware it is necessary to
5597 *  set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscuous
5598 *  enable bit to allow vlan tag stripping when promiscuous mode is enabled
5599 **/
5600static void igb_set_uta(struct igb_adapter *adapter)
5601{
5602	struct e1000_hw *hw = &adapter->hw;
5603	int i;
5604
5605	/* The UTA table only exists on 82576 hardware and newer */
5606	if (hw->mac.type < e1000_82576)
5607		return;
5608
5609	/* we only need to do this if VMDq is enabled */
5610	if (!adapter->vfs_allocated_count)
5611		return;
5612
5613	for (i = 0; i < hw->mac.uta_reg_count; i++)
5614		array_wr32(E1000_UTA, i, ~0);
5615}
5616
5617/**
5618 * igb_intr_msi - Interrupt Handler
5619 * @irq: interrupt number
5620 * @data: pointer to a network interface device structure
5621 **/
5622static irqreturn_t igb_intr_msi(int irq, void *data)
5623{
5624	struct igb_adapter *adapter = data;
5625	struct igb_q_vector *q_vector = adapter->q_vector[0];
5626	struct e1000_hw *hw = &adapter->hw;
5627	/* read ICR disables interrupts using IAM */
5628	u32 icr = rd32(E1000_ICR);
5629
5630	igb_write_itr(q_vector);
5631
5632	if (icr & E1000_ICR_DRSTA)
5633		schedule_work(&adapter->reset_task);
5634
5635	if (icr & E1000_ICR_DOUTSYNC) {
5636		/* HW is reporting DMA is out of sync */
5637		adapter->stats.doosync++;
5638	}
5639
5640	if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5641		hw->mac.get_link_status = 1;
5642		if (!test_bit(__IGB_DOWN, &adapter->state))
5643			mod_timer(&adapter->watchdog_timer, jiffies + 1);
5644	}
5645
5646	napi_schedule(&q_vector->napi);
5647
5648	return IRQ_HANDLED;
5649}
5650
5651/**
5652 * igb_intr - Legacy Interrupt Handler
5653 * @irq: interrupt number
5654 * @data: pointer to a network interface device structure
5655 **/
5656static irqreturn_t igb_intr(int irq, void *data)
5657{
5658	struct igb_adapter *adapter = data;
5659	struct igb_q_vector *q_vector = adapter->q_vector[0];
5660	struct e1000_hw *hw = &adapter->hw;
5661	/* Interrupt Auto-Mask...upon reading ICR, interrupts are masked.  No
5662	 * need for the IMC write */
5663	u32 icr = rd32(E1000_ICR);
5664
5665	/* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
5666	 * not set, then the adapter didn't send an interrupt */
5667	if (!(icr & E1000_ICR_INT_ASSERTED))
5668		return IRQ_NONE;
5669
5670	igb_write_itr(q_vector);
5671
5672	if (icr & E1000_ICR_DRSTA)
5673		schedule_work(&adapter->reset_task);
5674
5675	if (icr & E1000_ICR_DOUTSYNC) {
5676		/* HW is reporting DMA is out of sync */
5677		adapter->stats.doosync++;
5678	}
5679
5680	if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5681		hw->mac.get_link_status = 1;
5682		/* guard against interrupt when we're going down */
5683		if (!test_bit(__IGB_DOWN, &adapter->state))
5684			mod_timer(&adapter->watchdog_timer, jiffies + 1);
5685	}
5686
5687	napi_schedule(&q_vector->napi);
5688
5689	return IRQ_HANDLED;
5690}
5691
5692static void igb_ring_irq_enable(struct igb_q_vector *q_vector)
5693{
5694	struct igb_adapter *adapter = q_vector->adapter;
5695	struct e1000_hw *hw = &adapter->hw;
5696
5697	if ((q_vector->rx.ring && (adapter->rx_itr_setting & 3)) ||
5698	    (!q_vector->rx.ring && (adapter->tx_itr_setting & 3))) {
5699		if ((adapter->num_q_vectors == 1) && !adapter->vf_data)
5700			igb_set_itr(q_vector);
5701		else
5702			igb_update_ring_itr(q_vector);
5703	}
5704
5705	if (!test_bit(__IGB_DOWN, &adapter->state)) {
5706		if (adapter->msix_entries)
5707			wr32(E1000_EIMS, q_vector->eims_value);
5708		else
5709			igb_irq_enable(adapter);
5710	}
5711}
5712
5713/**
5714 * igb_poll - NAPI Rx polling callback
5715 * @napi: napi polling structure
5716 * @budget: count of how many packets we should handle
5717 **/
5718static int igb_poll(struct napi_struct *napi, int budget)
5719{
5720	struct igb_q_vector *q_vector = container_of(napi,
5721	                                             struct igb_q_vector,
5722	                                             napi);
5723	bool clean_complete = true;
5724
5725#ifdef CONFIG_IGB_DCA
5726	if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
5727		igb_update_dca(q_vector);
5728#endif
5729	if (q_vector->tx.ring)
5730		clean_complete = igb_clean_tx_irq(q_vector);
5731
5732	if (q_vector->rx.ring)
5733		clean_complete &= igb_clean_rx_irq(q_vector, budget);
5734
5735	/* If all work not completed, return budget and keep polling */
5736	if (!clean_complete)
5737		return budget;
5738
5739	/* If not enough Rx work done, exit the polling mode */
5740	napi_complete(napi);
5741	igb_ring_irq_enable(q_vector);
5742
5743	return 0;
5744}
5745
5746#ifdef CONFIG_IGB_PTP
5747/**
5748 * igb_tx_hwtstamp - utility function which checks for TX time stamp
5749 * @q_vector: pointer to q_vector containing needed info
5750 * @buffer: pointer to igb_tx_buffer structure
5751 *
5752 * If we were asked to do hardware stamping and such a time stamp is
5753 * available, then it must have been for this skb here because we only
5754 * allow only one such packet into the queue.
5755 */
5756static void igb_tx_hwtstamp(struct igb_q_vector *q_vector,
5757			    struct igb_tx_buffer *buffer_info)
5758{
5759	struct igb_adapter *adapter = q_vector->adapter;
5760	struct e1000_hw *hw = &adapter->hw;
5761	struct skb_shared_hwtstamps shhwtstamps;
5762	u64 regval;
5763
5764	/* if skb does not support hw timestamp or TX stamp not valid exit */
5765	if (likely(!(buffer_info->tx_flags & IGB_TX_FLAGS_TSTAMP)) ||
5766	    !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
5767		return;
5768
5769	regval = rd32(E1000_TXSTMPL);
5770	regval |= (u64)rd32(E1000_TXSTMPH) << 32;
5771
5772	igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
5773	skb_tstamp_tx(buffer_info->skb, &shhwtstamps);
5774}
5775
5776#endif
5777/**
5778 * igb_clean_tx_irq - Reclaim resources after transmit completes
5779 * @q_vector: pointer to q_vector containing needed info
5780 *
5781 * returns true if ring is completely cleaned
5782 **/
5783static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
5784{
5785	struct igb_adapter *adapter = q_vector->adapter;
5786	struct igb_ring *tx_ring = q_vector->tx.ring;
5787	struct igb_tx_buffer *tx_buffer;
5788	union e1000_adv_tx_desc *tx_desc, *eop_desc;
5789	unsigned int total_bytes = 0, total_packets = 0;
5790	unsigned int budget = q_vector->tx.work_limit;
5791	unsigned int i = tx_ring->next_to_clean;
5792
5793	if (test_bit(__IGB_DOWN, &adapter->state))
5794		return true;
5795
5796	tx_buffer = &tx_ring->tx_buffer_info[i];
5797	tx_desc = IGB_TX_DESC(tx_ring, i);
5798	i -= tx_ring->count;
5799
5800	for (; budget; budget--) {
5801		eop_desc = tx_buffer->next_to_watch;
5802
5803		/* prevent any other reads prior to eop_desc */
5804		rmb();
5805
5806		/* if next_to_watch is not set then there is no work pending */
5807		if (!eop_desc)
5808			break;
5809
5810		/* if DD is not set pending work has not been completed */
5811		if (!(eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)))
5812			break;
5813
5814		/* clear next_to_watch to prevent false hangs */
5815		tx_buffer->next_to_watch = NULL;
5816
5817		/* update the statistics for this packet */
5818		total_bytes += tx_buffer->bytecount;
5819		total_packets += tx_buffer->gso_segs;
5820
5821#ifdef CONFIG_IGB_PTP
5822		/* retrieve hardware timestamp */
5823		igb_tx_hwtstamp(q_vector, tx_buffer);
5824
5825#endif
5826		/* free the skb */
5827		dev_kfree_skb_any(tx_buffer->skb);
5828		tx_buffer->skb = NULL;
5829
5830		/* unmap skb header data */
5831		dma_unmap_single(tx_ring->dev,
5832				 tx_buffer->dma,
5833				 tx_buffer->length,
5834				 DMA_TO_DEVICE);
5835
5836		/* clear last DMA location and unmap remaining buffers */
5837		while (tx_desc != eop_desc) {
5838			tx_buffer->dma = 0;
5839
5840			tx_buffer++;
5841			tx_desc++;
5842			i++;
5843			if (unlikely(!i)) {
5844				i -= tx_ring->count;
5845				tx_buffer = tx_ring->tx_buffer_info;
5846				tx_desc = IGB_TX_DESC(tx_ring, 0);
5847			}
5848
5849			/* unmap any remaining paged data */
5850			if (tx_buffer->dma) {
5851				dma_unmap_page(tx_ring->dev,
5852					       tx_buffer->dma,
5853					       tx_buffer->length,
5854					       DMA_TO_DEVICE);
5855			}
5856		}
5857
5858		/* clear last DMA location */
5859		tx_buffer->dma = 0;
5860
5861		/* move us one more past the eop_desc for start of next pkt */
5862		tx_buffer++;
5863		tx_desc++;
5864		i++;
5865		if (unlikely(!i)) {
5866			i -= tx_ring->count;
5867			tx_buffer = tx_ring->tx_buffer_info;
5868			tx_desc = IGB_TX_DESC(tx_ring, 0);
5869		}
5870	}
5871
5872	netdev_tx_completed_queue(txring_txq(tx_ring),
5873				  total_packets, total_bytes);
5874	i += tx_ring->count;
5875	tx_ring->next_to_clean = i;
5876	u64_stats_update_begin(&tx_ring->tx_syncp);
5877	tx_ring->tx_stats.bytes += total_bytes;
5878	tx_ring->tx_stats.packets += total_packets;
5879	u64_stats_update_end(&tx_ring->tx_syncp);
5880	q_vector->tx.total_bytes += total_bytes;
5881	q_vector->tx.total_packets += total_packets;
5882
5883	if (test_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags)) {
5884		struct e1000_hw *hw = &adapter->hw;
5885
5886		eop_desc = tx_buffer->next_to_watch;
5887
5888		/* Detect a transmit hang in hardware, this serializes the
5889		 * check with the clearing of time_stamp and movement of i */
5890		clear_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
5891		if (eop_desc &&
5892		    time_after(jiffies, tx_buffer->time_stamp +
5893			       (adapter->tx_timeout_factor * HZ)) &&
5894		    !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
5895
5896			/* detected Tx unit hang */
5897			dev_err(tx_ring->dev,
5898				"Detected Tx Unit Hang\n"
5899				"  Tx Queue             <%d>\n"
5900				"  TDH                  <%x>\n"
5901				"  TDT                  <%x>\n"
5902				"  next_to_use          <%x>\n"
5903				"  next_to_clean        <%x>\n"
5904				"buffer_info[next_to_clean]\n"
5905				"  time_stamp           <%lx>\n"
5906				"  next_to_watch        <%p>\n"
5907				"  jiffies              <%lx>\n"
5908				"  desc.status          <%x>\n",
5909				tx_ring->queue_index,
5910				rd32(E1000_TDH(tx_ring->reg_idx)),
5911				readl(tx_ring->tail),
5912				tx_ring->next_to_use,
5913				tx_ring->next_to_clean,
5914				tx_buffer->time_stamp,
5915				eop_desc,
5916				jiffies,
5917				eop_desc->wb.status);
5918			netif_stop_subqueue(tx_ring->netdev,
5919					    tx_ring->queue_index);
5920
5921			/* we are about to reset, no point in enabling stuff */
5922			return true;
5923		}
5924	}
5925
5926	if (unlikely(total_packets &&
5927		     netif_carrier_ok(tx_ring->netdev) &&
5928		     igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
5929		/* Make sure that anybody stopping the queue after this
5930		 * sees the new next_to_clean.
5931		 */
5932		smp_mb();
5933		if (__netif_subqueue_stopped(tx_ring->netdev,
5934					     tx_ring->queue_index) &&
5935		    !(test_bit(__IGB_DOWN, &adapter->state))) {
5936			netif_wake_subqueue(tx_ring->netdev,
5937					    tx_ring->queue_index);
5938
5939			u64_stats_update_begin(&tx_ring->tx_syncp);
5940			tx_ring->tx_stats.restart_queue++;
5941			u64_stats_update_end(&tx_ring->tx_syncp);
5942		}
5943	}
5944
5945	return !!budget;
5946}
5947
5948static inline void igb_rx_checksum(struct igb_ring *ring,
5949				   union e1000_adv_rx_desc *rx_desc,
5950				   struct sk_buff *skb)
5951{
5952	skb_checksum_none_assert(skb);
5953
5954	/* Ignore Checksum bit is set */
5955	if (igb_test_staterr(rx_desc, E1000_RXD_STAT_IXSM))
5956		return;
5957
5958	/* Rx checksum disabled via ethtool */
5959	if (!(ring->netdev->features & NETIF_F_RXCSUM))
5960		return;
5961
5962	/* TCP/UDP checksum error bit is set */
5963	if (igb_test_staterr(rx_desc,
5964			     E1000_RXDEXT_STATERR_TCPE |
5965			     E1000_RXDEXT_STATERR_IPE)) {
5966		/*
5967		 * work around errata with sctp packets where the TCPE aka
5968		 * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
5969		 * packets, (aka let the stack check the crc32c)
5970		 */
5971		if (!((skb->len == 60) &&
5972		      test_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags))) {
5973			u64_stats_update_begin(&ring->rx_syncp);
5974			ring->rx_stats.csum_err++;
5975			u64_stats_update_end(&ring->rx_syncp);
5976		}
5977		/* let the stack verify checksum errors */
5978		return;
5979	}
5980	/* It must be a TCP or UDP packet with a valid checksum */
5981	if (igb_test_staterr(rx_desc, E1000_RXD_STAT_TCPCS |
5982				      E1000_RXD_STAT_UDPCS))
5983		skb->ip_summed = CHECKSUM_UNNECESSARY;
5984
5985	dev_dbg(ring->dev, "cksum success: bits %08X\n",
5986		le32_to_cpu(rx_desc->wb.upper.status_error));
5987}
5988
5989static inline void igb_rx_hash(struct igb_ring *ring,
5990			       union e1000_adv_rx_desc *rx_desc,
5991			       struct sk_buff *skb)
5992{
5993	if (ring->netdev->features & NETIF_F_RXHASH)
5994		skb->rxhash = le32_to_cpu(rx_desc->wb.lower.hi_dword.rss);
5995}
5996
5997#ifdef CONFIG_IGB_PTP
5998static void igb_rx_hwtstamp(struct igb_q_vector *q_vector,
5999			    union e1000_adv_rx_desc *rx_desc,
6000			    struct sk_buff *skb)
6001{
6002	struct igb_adapter *adapter = q_vector->adapter;
6003	struct e1000_hw *hw = &adapter->hw;
6004	u64 regval;
6005
6006	if (!igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP |
6007				       E1000_RXDADV_STAT_TS))
6008		return;
6009
6010	/*
6011	 * If this bit is set, then the RX registers contain the time stamp. No
6012	 * other packet will be time stamped until we read these registers, so
6013	 * read the registers to make them available again. Because only one
6014	 * packet can be time stamped at a time, we know that the register
6015	 * values must belong to this one here and therefore we don't need to
6016	 * compare any of the additional attributes stored for it.
6017	 *
6018	 * If nothing went wrong, then it should have a shared tx_flags that we
6019	 * can turn into a skb_shared_hwtstamps.
6020	 */
6021	if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) {
6022		u32 *stamp = (u32 *)skb->data;
6023		regval = le32_to_cpu(*(stamp + 2));
6024		regval |= (u64)le32_to_cpu(*(stamp + 3)) << 32;
6025		skb_pull(skb, IGB_TS_HDR_LEN);
6026	} else {
6027		if(!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
6028			return;
6029
6030		regval = rd32(E1000_RXSTMPL);
6031		regval |= (u64)rd32(E1000_RXSTMPH) << 32;
6032	}
6033
6034	igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
6035}
6036
6037#endif
6038static void igb_rx_vlan(struct igb_ring *ring,
6039			union e1000_adv_rx_desc *rx_desc,
6040			struct sk_buff *skb)
6041{
6042	if (igb_test_staterr(rx_desc, E1000_RXD_STAT_VP)) {
6043		u16 vid;
6044		if (igb_test_staterr(rx_desc, E1000_RXDEXT_STATERR_LB) &&
6045		    test_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &ring->flags))
6046			vid = be16_to_cpu(rx_desc->wb.upper.vlan);
6047		else
6048			vid = le16_to_cpu(rx_desc->wb.upper.vlan);
6049
6050		__vlan_hwaccel_put_tag(skb, vid);
6051	}
6052}
6053
6054static inline u16 igb_get_hlen(union e1000_adv_rx_desc *rx_desc)
6055{
6056	/* HW will not DMA in data larger than the given buffer, even if it
6057	 * parses the (NFS, of course) header to be larger.  In that case, it
6058	 * fills the header buffer and spills the rest into the page.
6059	 */
6060	u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
6061	           E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
6062	if (hlen > IGB_RX_HDR_LEN)
6063		hlen = IGB_RX_HDR_LEN;
6064	return hlen;
6065}
6066
6067static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, int budget)
6068{
6069	struct igb_ring *rx_ring = q_vector->rx.ring;
6070	union e1000_adv_rx_desc *rx_desc;
6071	const int current_node = numa_node_id();
6072	unsigned int total_bytes = 0, total_packets = 0;
6073	u16 cleaned_count = igb_desc_unused(rx_ring);
6074	u16 i = rx_ring->next_to_clean;
6075
6076	rx_desc = IGB_RX_DESC(rx_ring, i);
6077
6078	while (igb_test_staterr(rx_desc, E1000_RXD_STAT_DD)) {
6079		struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
6080		struct sk_buff *skb = buffer_info->skb;
6081		union e1000_adv_rx_desc *next_rxd;
6082
6083		buffer_info->skb = NULL;
6084		prefetch(skb->data);
6085
6086		i++;
6087		if (i == rx_ring->count)
6088			i = 0;
6089
6090		next_rxd = IGB_RX_DESC(rx_ring, i);
6091		prefetch(next_rxd);
6092
6093		/*
6094		 * This memory barrier is needed to keep us from reading
6095		 * any other fields out of the rx_desc until we know the
6096		 * RXD_STAT_DD bit is set
6097		 */
6098		rmb();
6099
6100		if (!skb_is_nonlinear(skb)) {
6101			__skb_put(skb, igb_get_hlen(rx_desc));
6102			dma_unmap_single(rx_ring->dev, buffer_info->dma,
6103					 IGB_RX_HDR_LEN,
6104					 DMA_FROM_DEVICE);
6105			buffer_info->dma = 0;
6106		}
6107
6108		if (rx_desc->wb.upper.length) {
6109			u16 length = le16_to_cpu(rx_desc->wb.upper.length);
6110
6111			skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
6112						buffer_info->page,
6113						buffer_info->page_offset,
6114						length);
6115
6116			skb->len += length;
6117			skb->data_len += length;
6118			skb->truesize += PAGE_SIZE / 2;
6119
6120			if ((page_count(buffer_info->page) != 1) ||
6121			    (page_to_nid(buffer_info->page) != current_node))
6122				buffer_info->page = NULL;
6123			else
6124				get_page(buffer_info->page);
6125
6126			dma_unmap_page(rx_ring->dev, buffer_info->page_dma,
6127				       PAGE_SIZE / 2, DMA_FROM_DEVICE);
6128			buffer_info->page_dma = 0;
6129		}
6130
6131		if (!igb_test_staterr(rx_desc, E1000_RXD_STAT_EOP)) {
6132			struct igb_rx_buffer *next_buffer;
6133			next_buffer = &rx_ring->rx_buffer_info[i];
6134			buffer_info->skb = next_buffer->skb;
6135			buffer_info->dma = next_buffer->dma;
6136			next_buffer->skb = skb;
6137			next_buffer->dma = 0;
6138			goto next_desc;
6139		}
6140
6141		if (unlikely((igb_test_staterr(rx_desc,
6142					       E1000_RXDEXT_ERR_FRAME_ERR_MASK))
6143			     && !(rx_ring->netdev->features & NETIF_F_RXALL))) {
6144			dev_kfree_skb_any(skb);
6145			goto next_desc;
6146		}
6147
6148#ifdef CONFIG_IGB_PTP
6149		igb_rx_hwtstamp(q_vector, rx_desc, skb);
6150#endif
6151		igb_rx_hash(rx_ring, rx_desc, skb);
6152		igb_rx_checksum(rx_ring, rx_desc, skb);
6153		igb_rx_vlan(rx_ring, rx_desc, skb);
6154
6155		total_bytes += skb->len;
6156		total_packets++;
6157
6158		skb->protocol = eth_type_trans(skb, rx_ring->netdev);
6159
6160		napi_gro_receive(&q_vector->napi, skb);
6161
6162		budget--;
6163next_desc:
6164		if (!budget)
6165			break;
6166
6167		cleaned_count++;
6168		/* return some buffers to hardware, one at a time is too slow */
6169		if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
6170			igb_alloc_rx_buffers(rx_ring, cleaned_count);
6171			cleaned_count = 0;
6172		}
6173
6174		/* use prefetched values */
6175		rx_desc = next_rxd;
6176	}
6177
6178	rx_ring->next_to_clean = i;
6179	u64_stats_update_begin(&rx_ring->rx_syncp);
6180	rx_ring->rx_stats.packets += total_packets;
6181	rx_ring->rx_stats.bytes += total_bytes;
6182	u64_stats_update_end(&rx_ring->rx_syncp);
6183	q_vector->rx.total_packets += total_packets;
6184	q_vector->rx.total_bytes += total_bytes;
6185
6186	if (cleaned_count)
6187		igb_alloc_rx_buffers(rx_ring, cleaned_count);
6188
6189	return !!budget;
6190}
6191
6192static bool igb_alloc_mapped_skb(struct igb_ring *rx_ring,
6193				 struct igb_rx_buffer *bi)
6194{
6195	struct sk_buff *skb = bi->skb;
6196	dma_addr_t dma = bi->dma;
6197
6198	if (dma)
6199		return true;
6200
6201	if (likely(!skb)) {
6202		skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
6203						IGB_RX_HDR_LEN);
6204		bi->skb = skb;
6205		if (!skb) {
6206			rx_ring->rx_stats.alloc_failed++;
6207			return false;
6208		}
6209
6210		/* initialize skb for ring */
6211		skb_record_rx_queue(skb, rx_ring->queue_index);
6212	}
6213
6214	dma = dma_map_single(rx_ring->dev, skb->data,
6215			     IGB_RX_HDR_LEN, DMA_FROM_DEVICE);
6216
6217	if (dma_mapping_error(rx_ring->dev, dma)) {
6218		rx_ring->rx_stats.alloc_failed++;
6219		return false;
6220	}
6221
6222	bi->dma = dma;
6223	return true;
6224}
6225
6226static bool igb_alloc_mapped_page(struct igb_ring *rx_ring,
6227				  struct igb_rx_buffer *bi)
6228{
6229	struct page *page = bi->page;
6230	dma_addr_t page_dma = bi->page_dma;
6231	unsigned int page_offset = bi->page_offset ^ (PAGE_SIZE / 2);
6232
6233	if (page_dma)
6234		return true;
6235
6236	if (!page) {
6237		page = alloc_page(GFP_ATOMIC | __GFP_COLD);
6238		bi->page = page;
6239		if (unlikely(!page)) {
6240			rx_ring->rx_stats.alloc_failed++;
6241			return false;
6242		}
6243	}
6244
6245	page_dma = dma_map_page(rx_ring->dev, page,
6246				page_offset, PAGE_SIZE / 2,
6247				DMA_FROM_DEVICE);
6248
6249	if (dma_mapping_error(rx_ring->dev, page_dma)) {
6250		rx_ring->rx_stats.alloc_failed++;
6251		return false;
6252	}
6253
6254	bi->page_dma = page_dma;
6255	bi->page_offset = page_offset;
6256	return true;
6257}
6258
6259/**
6260 * igb_alloc_rx_buffers - Replace used receive buffers; packet split
6261 * @adapter: address of board private structure
6262 **/
6263void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count)
6264{
6265	union e1000_adv_rx_desc *rx_desc;
6266	struct igb_rx_buffer *bi;
6267	u16 i = rx_ring->next_to_use;
6268
6269	rx_desc = IGB_RX_DESC(rx_ring, i);
6270	bi = &rx_ring->rx_buffer_info[i];
6271	i -= rx_ring->count;
6272
6273	while (cleaned_count--) {
6274		if (!igb_alloc_mapped_skb(rx_ring, bi))
6275			break;
6276
6277		/* Refresh the desc even if buffer_addrs didn't change
6278		 * because each write-back erases this info. */
6279		rx_desc->read.hdr_addr = cpu_to_le64(bi->dma);
6280
6281		if (!igb_alloc_mapped_page(rx_ring, bi))
6282			break;
6283
6284		rx_desc->read.pkt_addr = cpu_to_le64(bi->page_dma);
6285
6286		rx_desc++;
6287		bi++;
6288		i++;
6289		if (unlikely(!i)) {
6290			rx_desc = IGB_RX_DESC(rx_ring, 0);
6291			bi = rx_ring->rx_buffer_info;
6292			i -= rx_ring->count;
6293		}
6294
6295		/* clear the hdr_addr for the next_to_use descriptor */
6296		rx_desc->read.hdr_addr = 0;
6297	}
6298
6299	i += rx_ring->count;
6300
6301	if (rx_ring->next_to_use != i) {
6302		rx_ring->next_to_use = i;
6303
6304		/* Force memory writes to complete before letting h/w
6305		 * know there are new descriptors to fetch.  (Only
6306		 * applicable for weak-ordered memory model archs,
6307		 * such as IA-64). */
6308		wmb();
6309		writel(i, rx_ring->tail);
6310	}
6311}
6312
6313/**
6314 * igb_mii_ioctl -
6315 * @netdev:
6316 * @ifreq:
6317 * @cmd:
6318 **/
6319static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6320{
6321	struct igb_adapter *adapter = netdev_priv(netdev);
6322	struct mii_ioctl_data *data = if_mii(ifr);
6323
6324	if (adapter->hw.phy.media_type != e1000_media_type_copper)
6325		return -EOPNOTSUPP;
6326
6327	switch (cmd) {
6328	case SIOCGMIIPHY:
6329		data->phy_id = adapter->hw.phy.addr;
6330		break;
6331	case SIOCGMIIREG:
6332		if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
6333		                     &data->val_out))
6334			return -EIO;
6335		break;
6336	case SIOCSMIIREG:
6337	default:
6338		return -EOPNOTSUPP;
6339	}
6340	return 0;
6341}
6342
6343/**
6344 * igb_hwtstamp_ioctl - control hardware time stamping
6345 * @netdev:
6346 * @ifreq:
6347 * @cmd:
6348 *
6349 * Outgoing time stamping can be enabled and disabled. Play nice and
6350 * disable it when requested, although it shouldn't case any overhead
6351 * when no packet needs it. At most one packet in the queue may be
6352 * marked for time stamping, otherwise it would be impossible to tell
6353 * for sure to which packet the hardware time stamp belongs.
6354 *
6355 * Incoming time stamping has to be configured via the hardware
6356 * filters. Not all combinations are supported, in particular event
6357 * type has to be specified. Matching the kind of event packet is
6358 * not supported, with the exception of "all V2 events regardless of
6359 * level 2 or 4".
6360 *
6361 **/
6362static int igb_hwtstamp_ioctl(struct net_device *netdev,
6363			      struct ifreq *ifr, int cmd)
6364{
6365	struct igb_adapter *adapter = netdev_priv(netdev);
6366	struct e1000_hw *hw = &adapter->hw;
6367	struct hwtstamp_config config;
6368	u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
6369	u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6370	u32 tsync_rx_cfg = 0;
6371	bool is_l4 = false;
6372	bool is_l2 = false;
6373	u32 regval;
6374
6375	if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
6376		return -EFAULT;
6377
6378	/* reserved for future extensions */
6379	if (config.flags)
6380		return -EINVAL;
6381
6382	switch (config.tx_type) {
6383	case HWTSTAMP_TX_OFF:
6384		tsync_tx_ctl = 0;
6385	case HWTSTAMP_TX_ON:
6386		break;
6387	default:
6388		return -ERANGE;
6389	}
6390
6391	switch (config.rx_filter) {
6392	case HWTSTAMP_FILTER_NONE:
6393		tsync_rx_ctl = 0;
6394		break;
6395	case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
6396	case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
6397	case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
6398	case HWTSTAMP_FILTER_ALL:
6399		/*
6400		 * register TSYNCRXCFG must be set, therefore it is not
6401		 * possible to time stamp both Sync and Delay_Req messages
6402		 * => fall back to time stamping all packets
6403		 */
6404		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6405		config.rx_filter = HWTSTAMP_FILTER_ALL;
6406		break;
6407	case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
6408		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6409		tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
6410		is_l4 = true;
6411		break;
6412	case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
6413		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6414		tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
6415		is_l4 = true;
6416		break;
6417	case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
6418	case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
6419		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6420		tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
6421		is_l2 = true;
6422		is_l4 = true;
6423		config.rx_filter = HWTSTAMP_FILTER_SOME;
6424		break;
6425	case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
6426	case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
6427		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6428		tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
6429		is_l2 = true;
6430		is_l4 = true;
6431		config.rx_filter = HWTSTAMP_FILTER_SOME;
6432		break;
6433	case HWTSTAMP_FILTER_PTP_V2_EVENT:
6434	case HWTSTAMP_FILTER_PTP_V2_SYNC:
6435	case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
6436		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
6437		config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
6438		is_l2 = true;
6439		is_l4 = true;
6440		break;
6441	default:
6442		return -ERANGE;
6443	}
6444
6445	if (hw->mac.type == e1000_82575) {
6446		if (tsync_rx_ctl | tsync_tx_ctl)
6447			return -EINVAL;
6448		return 0;
6449	}
6450
6451	/*
6452	 * Per-packet timestamping only works if all packets are
6453	 * timestamped, so enable timestamping in all packets as
6454	 * long as one rx filter was configured.
6455	 */
6456	if ((hw->mac.type >= e1000_82580) && tsync_rx_ctl) {
6457		tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6458		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6459	}
6460
6461	/* enable/disable TX */
6462	regval = rd32(E1000_TSYNCTXCTL);
6463	regval &= ~E1000_TSYNCTXCTL_ENABLED;
6464	regval |= tsync_tx_ctl;
6465	wr32(E1000_TSYNCTXCTL, regval);
6466
6467	/* enable/disable RX */
6468	regval = rd32(E1000_TSYNCRXCTL);
6469	regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
6470	regval |= tsync_rx_ctl;
6471	wr32(E1000_TSYNCRXCTL, regval);
6472
6473	/* define which PTP packets are time stamped */
6474	wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
6475
6476	/* define ethertype filter for timestamped packets */
6477	if (is_l2)
6478		wr32(E1000_ETQF(3),
6479		                (E1000_ETQF_FILTER_ENABLE | /* enable filter */
6480		                 E1000_ETQF_1588 | /* enable timestamping */
6481		                 ETH_P_1588));     /* 1588 eth protocol type */
6482	else
6483		wr32(E1000_ETQF(3), 0);
6484
6485#define PTP_PORT 319
6486	/* L4 Queue Filter[3]: filter by destination port and protocol */
6487	if (is_l4) {
6488		u32 ftqf = (IPPROTO_UDP /* UDP */
6489			| E1000_FTQF_VF_BP /* VF not compared */
6490			| E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
6491			| E1000_FTQF_MASK); /* mask all inputs */
6492		ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
6493
6494		wr32(E1000_IMIR(3), htons(PTP_PORT));
6495		wr32(E1000_IMIREXT(3),
6496		     (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
6497		if (hw->mac.type == e1000_82576) {
6498			/* enable source port check */
6499			wr32(E1000_SPQF(3), htons(PTP_PORT));
6500			ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
6501		}
6502		wr32(E1000_FTQF(3), ftqf);
6503	} else {
6504		wr32(E1000_FTQF(3), E1000_FTQF_MASK);
6505	}
6506	wrfl();
6507
6508	adapter->hwtstamp_config = config;
6509
6510	/* clear TX/RX time stamp registers, just to be sure */
6511	regval = rd32(E1000_TXSTMPH);
6512	regval = rd32(E1000_RXSTMPH);
6513
6514	return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
6515		-EFAULT : 0;
6516}
6517
6518/**
6519 * igb_ioctl -
6520 * @netdev:
6521 * @ifreq:
6522 * @cmd:
6523 **/
6524static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6525{
6526	switch (cmd) {
6527	case SIOCGMIIPHY:
6528	case SIOCGMIIREG:
6529	case SIOCSMIIREG:
6530		return igb_mii_ioctl(netdev, ifr, cmd);
6531	case SIOCSHWTSTAMP:
6532		return igb_hwtstamp_ioctl(netdev, ifr, cmd);
6533	default:
6534		return -EOPNOTSUPP;
6535	}
6536}
6537
6538s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6539{
6540	struct igb_adapter *adapter = hw->back;
6541	u16 cap_offset;
6542
6543	cap_offset = adapter->pdev->pcie_cap;
6544	if (!cap_offset)
6545		return -E1000_ERR_CONFIG;
6546
6547	pci_read_config_word(adapter->pdev, cap_offset + reg, value);
6548
6549	return 0;
6550}
6551
6552s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6553{
6554	struct igb_adapter *adapter = hw->back;
6555	u16 cap_offset;
6556
6557	cap_offset = adapter->pdev->pcie_cap;
6558	if (!cap_offset)
6559		return -E1000_ERR_CONFIG;
6560
6561	pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
6562
6563	return 0;
6564}
6565
6566static void igb_vlan_mode(struct net_device *netdev, netdev_features_t features)
6567{
6568	struct igb_adapter *adapter = netdev_priv(netdev);
6569	struct e1000_hw *hw = &adapter->hw;
6570	u32 ctrl, rctl;
6571	bool enable = !!(features & NETIF_F_HW_VLAN_RX);
6572
6573	if (enable) {
6574		/* enable VLAN tag insert/strip */
6575		ctrl = rd32(E1000_CTRL);
6576		ctrl |= E1000_CTRL_VME;
6577		wr32(E1000_CTRL, ctrl);
6578
6579		/* Disable CFI check */
6580		rctl = rd32(E1000_RCTL);
6581		rctl &= ~E1000_RCTL_CFIEN;
6582		wr32(E1000_RCTL, rctl);
6583	} else {
6584		/* disable VLAN tag insert/strip */
6585		ctrl = rd32(E1000_CTRL);
6586		ctrl &= ~E1000_CTRL_VME;
6587		wr32(E1000_CTRL, ctrl);
6588	}
6589
6590	igb_rlpml_set(adapter);
6591}
6592
6593static int igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
6594{
6595	struct igb_adapter *adapter = netdev_priv(netdev);
6596	struct e1000_hw *hw = &adapter->hw;
6597	int pf_id = adapter->vfs_allocated_count;
6598
6599	/* attempt to add filter to vlvf array */
6600	igb_vlvf_set(adapter, vid, true, pf_id);
6601
6602	/* add the filter since PF can receive vlans w/o entry in vlvf */
6603	igb_vfta_set(hw, vid, true);
6604
6605	set_bit(vid, adapter->active_vlans);
6606
6607	return 0;
6608}
6609
6610static int igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
6611{
6612	struct igb_adapter *adapter = netdev_priv(netdev);
6613	struct e1000_hw *hw = &adapter->hw;
6614	int pf_id = adapter->vfs_allocated_count;
6615	s32 err;
6616
6617	/* remove vlan from VLVF table array */
6618	err = igb_vlvf_set(adapter, vid, false, pf_id);
6619
6620	/* if vid was not present in VLVF just remove it from table */
6621	if (err)
6622		igb_vfta_set(hw, vid, false);
6623
6624	clear_bit(vid, adapter->active_vlans);
6625
6626	return 0;
6627}
6628
6629static void igb_restore_vlan(struct igb_adapter *adapter)
6630{
6631	u16 vid;
6632
6633	igb_vlan_mode(adapter->netdev, adapter->netdev->features);
6634
6635	for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID)
6636		igb_vlan_rx_add_vid(adapter->netdev, vid);
6637}
6638
6639int igb_set_spd_dplx(struct igb_adapter *adapter, u32 spd, u8 dplx)
6640{
6641	struct pci_dev *pdev = adapter->pdev;
6642	struct e1000_mac_info *mac = &adapter->hw.mac;
6643
6644	mac->autoneg = 0;
6645
6646	/* Make sure dplx is at most 1 bit and lsb of speed is not set
6647	 * for the switch() below to work */
6648	if ((spd & 1) || (dplx & ~1))
6649		goto err_inval;
6650
6651	/* Fiber NIC's only allow 1000 Gbps Full duplex */
6652	if ((adapter->hw.phy.media_type == e1000_media_type_internal_serdes) &&
6653	    spd != SPEED_1000 &&
6654	    dplx != DUPLEX_FULL)
6655		goto err_inval;
6656
6657	switch (spd + dplx) {
6658	case SPEED_10 + DUPLEX_HALF:
6659		mac->forced_speed_duplex = ADVERTISE_10_HALF;
6660		break;
6661	case SPEED_10 + DUPLEX_FULL:
6662		mac->forced_speed_duplex = ADVERTISE_10_FULL;
6663		break;
6664	case SPEED_100 + DUPLEX_HALF:
6665		mac->forced_speed_duplex = ADVERTISE_100_HALF;
6666		break;
6667	case SPEED_100 + DUPLEX_FULL:
6668		mac->forced_speed_duplex = ADVERTISE_100_FULL;
6669		break;
6670	case SPEED_1000 + DUPLEX_FULL:
6671		mac->autoneg = 1;
6672		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
6673		break;
6674	case SPEED_1000 + DUPLEX_HALF: /* not supported */
6675	default:
6676		goto err_inval;
6677	}
6678	return 0;
6679
6680err_inval:
6681	dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
6682	return -EINVAL;
6683}
6684
6685static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake,
6686			  bool runtime)
6687{
6688	struct net_device *netdev = pci_get_drvdata(pdev);
6689	struct igb_adapter *adapter = netdev_priv(netdev);
6690	struct e1000_hw *hw = &adapter->hw;
6691	u32 ctrl, rctl, status;
6692	u32 wufc = runtime ? E1000_WUFC_LNKC : adapter->wol;
6693#ifdef CONFIG_PM
6694	int retval = 0;
6695#endif
6696
6697	netif_device_detach(netdev);
6698
6699	if (netif_running(netdev))
6700		__igb_close(netdev, true);
6701
6702	igb_clear_interrupt_scheme(adapter);
6703
6704#ifdef CONFIG_PM
6705	retval = pci_save_state(pdev);
6706	if (retval)
6707		return retval;
6708#endif
6709
6710	status = rd32(E1000_STATUS);
6711	if (status & E1000_STATUS_LU)
6712		wufc &= ~E1000_WUFC_LNKC;
6713
6714	if (wufc) {
6715		igb_setup_rctl(adapter);
6716		igb_set_rx_mode(netdev);
6717
6718		/* turn on all-multi mode if wake on multicast is enabled */
6719		if (wufc & E1000_WUFC_MC) {
6720			rctl = rd32(E1000_RCTL);
6721			rctl |= E1000_RCTL_MPE;
6722			wr32(E1000_RCTL, rctl);
6723		}
6724
6725		ctrl = rd32(E1000_CTRL);
6726		/* advertise wake from D3Cold */
6727		#define E1000_CTRL_ADVD3WUC 0x00100000
6728		/* phy power management enable */
6729		#define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
6730		ctrl |= E1000_CTRL_ADVD3WUC;
6731		wr32(E1000_CTRL, ctrl);
6732
6733		/* Allow time for pending master requests to run */
6734		igb_disable_pcie_master(hw);
6735
6736		wr32(E1000_WUC, E1000_WUC_PME_EN);
6737		wr32(E1000_WUFC, wufc);
6738	} else {
6739		wr32(E1000_WUC, 0);
6740		wr32(E1000_WUFC, 0);
6741	}
6742
6743	*enable_wake = wufc || adapter->en_mng_pt;
6744	if (!*enable_wake)
6745		igb_power_down_link(adapter);
6746	else
6747		igb_power_up_link(adapter);
6748
6749	/* Release control of h/w to f/w.  If f/w is AMT enabled, this
6750	 * would have already happened in close and is redundant. */
6751	igb_release_hw_control(adapter);
6752
6753	pci_disable_device(pdev);
6754
6755	return 0;
6756}
6757
6758#ifdef CONFIG_PM
6759#ifdef CONFIG_PM_SLEEP
6760static int igb_suspend(struct device *dev)
6761{
6762	int retval;
6763	bool wake;
6764	struct pci_dev *pdev = to_pci_dev(dev);
6765
6766	retval = __igb_shutdown(pdev, &wake, 0);
6767	if (retval)
6768		return retval;
6769
6770	if (wake) {
6771		pci_prepare_to_sleep(pdev);
6772	} else {
6773		pci_wake_from_d3(pdev, false);
6774		pci_set_power_state(pdev, PCI_D3hot);
6775	}
6776
6777	return 0;
6778}
6779#endif /* CONFIG_PM_SLEEP */
6780
6781static int igb_resume(struct device *dev)
6782{
6783	struct pci_dev *pdev = to_pci_dev(dev);
6784	struct net_device *netdev = pci_get_drvdata(pdev);
6785	struct igb_adapter *adapter = netdev_priv(netdev);
6786	struct e1000_hw *hw = &adapter->hw;
6787	u32 err;
6788
6789	pci_set_power_state(pdev, PCI_D0);
6790	pci_restore_state(pdev);
6791	pci_save_state(pdev);
6792
6793	err = pci_enable_device_mem(pdev);
6794	if (err) {
6795		dev_err(&pdev->dev,
6796			"igb: Cannot enable PCI device from suspend\n");
6797		return err;
6798	}
6799	pci_set_master(pdev);
6800
6801	pci_enable_wake(pdev, PCI_D3hot, 0);
6802	pci_enable_wake(pdev, PCI_D3cold, 0);
6803
6804	if (igb_init_interrupt_scheme(adapter)) {
6805		dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
6806		return -ENOMEM;
6807	}
6808
6809	igb_reset(adapter);
6810
6811	/* let the f/w know that the h/w is now under the control of the
6812	 * driver. */
6813	igb_get_hw_control(adapter);
6814
6815	wr32(E1000_WUS, ~0);
6816
6817	if (netdev->flags & IFF_UP) {
6818		err = __igb_open(netdev, true);
6819		if (err)
6820			return err;
6821	}
6822
6823	netif_device_attach(netdev);
6824	return 0;
6825}
6826
6827#ifdef CONFIG_PM_RUNTIME
6828static int igb_runtime_idle(struct device *dev)
6829{
6830	struct pci_dev *pdev = to_pci_dev(dev);
6831	struct net_device *netdev = pci_get_drvdata(pdev);
6832	struct igb_adapter *adapter = netdev_priv(netdev);
6833
6834	if (!igb_has_link(adapter))
6835		pm_schedule_suspend(dev, MSEC_PER_SEC * 5);
6836
6837	return -EBUSY;
6838}
6839
6840static int igb_runtime_suspend(struct device *dev)
6841{
6842	struct pci_dev *pdev = to_pci_dev(dev);
6843	int retval;
6844	bool wake;
6845
6846	retval = __igb_shutdown(pdev, &wake, 1);
6847	if (retval)
6848		return retval;
6849
6850	if (wake) {
6851		pci_prepare_to_sleep(pdev);
6852	} else {
6853		pci_wake_from_d3(pdev, false);
6854		pci_set_power_state(pdev, PCI_D3hot);
6855	}
6856
6857	return 0;
6858}
6859
6860static int igb_runtime_resume(struct device *dev)
6861{
6862	return igb_resume(dev);
6863}
6864#endif /* CONFIG_PM_RUNTIME */
6865#endif
6866
6867static void igb_shutdown(struct pci_dev *pdev)
6868{
6869	bool wake;
6870
6871	__igb_shutdown(pdev, &wake, 0);
6872
6873	if (system_state == SYSTEM_POWER_OFF) {
6874		pci_wake_from_d3(pdev, wake);
6875		pci_set_power_state(pdev, PCI_D3hot);
6876	}
6877}
6878
6879#ifdef CONFIG_NET_POLL_CONTROLLER
6880/*
6881 * Polling 'interrupt' - used by things like netconsole to send skbs
6882 * without having to re-enable interrupts. It's not called while
6883 * the interrupt routine is executing.
6884 */
6885static void igb_netpoll(struct net_device *netdev)
6886{
6887	struct igb_adapter *adapter = netdev_priv(netdev);
6888	struct e1000_hw *hw = &adapter->hw;
6889	struct igb_q_vector *q_vector;
6890	int i;
6891
6892	for (i = 0; i < adapter->num_q_vectors; i++) {
6893		q_vector = adapter->q_vector[i];
6894		if (adapter->msix_entries)
6895			wr32(E1000_EIMC, q_vector->eims_value);
6896		else
6897			igb_irq_disable(adapter);
6898		napi_schedule(&q_vector->napi);
6899	}
6900}
6901#endif /* CONFIG_NET_POLL_CONTROLLER */
6902
6903/**
6904 * igb_io_error_detected - called when PCI error is detected
6905 * @pdev: Pointer to PCI device
6906 * @state: The current pci connection state
6907 *
6908 * This function is called after a PCI bus error affecting
6909 * this device has been detected.
6910 */
6911static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
6912					      pci_channel_state_t state)
6913{
6914	struct net_device *netdev = pci_get_drvdata(pdev);
6915	struct igb_adapter *adapter = netdev_priv(netdev);
6916
6917	netif_device_detach(netdev);
6918
6919	if (state == pci_channel_io_perm_failure)
6920		return PCI_ERS_RESULT_DISCONNECT;
6921
6922	if (netif_running(netdev))
6923		igb_down(adapter);
6924	pci_disable_device(pdev);
6925
6926	/* Request a slot slot reset. */
6927	return PCI_ERS_RESULT_NEED_RESET;
6928}
6929
6930/**
6931 * igb_io_slot_reset - called after the pci bus has been reset.
6932 * @pdev: Pointer to PCI device
6933 *
6934 * Restart the card from scratch, as if from a cold-boot. Implementation
6935 * resembles the first-half of the igb_resume routine.
6936 */
6937static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
6938{
6939	struct net_device *netdev = pci_get_drvdata(pdev);
6940	struct igb_adapter *adapter = netdev_priv(netdev);
6941	struct e1000_hw *hw = &adapter->hw;
6942	pci_ers_result_t result;
6943	int err;
6944
6945	if (pci_enable_device_mem(pdev)) {
6946		dev_err(&pdev->dev,
6947			"Cannot re-enable PCI device after reset.\n");
6948		result = PCI_ERS_RESULT_DISCONNECT;
6949	} else {
6950		pci_set_master(pdev);
6951		pci_restore_state(pdev);
6952		pci_save_state(pdev);
6953
6954		pci_enable_wake(pdev, PCI_D3hot, 0);
6955		pci_enable_wake(pdev, PCI_D3cold, 0);
6956
6957		igb_reset(adapter);
6958		wr32(E1000_WUS, ~0);
6959		result = PCI_ERS_RESULT_RECOVERED;
6960	}
6961
6962	err = pci_cleanup_aer_uncorrect_error_status(pdev);
6963	if (err) {
6964		dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
6965		        "failed 0x%0x\n", err);
6966		/* non-fatal, continue */
6967	}
6968
6969	return result;
6970}
6971
6972/**
6973 * igb_io_resume - called when traffic can start flowing again.
6974 * @pdev: Pointer to PCI device
6975 *
6976 * This callback is called when the error recovery driver tells us that
6977 * its OK to resume normal operation. Implementation resembles the
6978 * second-half of the igb_resume routine.
6979 */
6980static void igb_io_resume(struct pci_dev *pdev)
6981{
6982	struct net_device *netdev = pci_get_drvdata(pdev);
6983	struct igb_adapter *adapter = netdev_priv(netdev);
6984
6985	if (netif_running(netdev)) {
6986		if (igb_up(adapter)) {
6987			dev_err(&pdev->dev, "igb_up failed after reset\n");
6988			return;
6989		}
6990	}
6991
6992	netif_device_attach(netdev);
6993
6994	/* let the f/w know that the h/w is now under the control of the
6995	 * driver. */
6996	igb_get_hw_control(adapter);
6997}
6998
6999static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
7000                             u8 qsel)
7001{
7002	u32 rar_low, rar_high;
7003	struct e1000_hw *hw = &adapter->hw;
7004
7005	/* HW expects these in little endian so we reverse the byte order
7006	 * from network order (big endian) to little endian
7007	 */
7008	rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
7009	          ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
7010	rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
7011
7012	/* Indicate to hardware the Address is Valid. */
7013	rar_high |= E1000_RAH_AV;
7014
7015	if (hw->mac.type == e1000_82575)
7016		rar_high |= E1000_RAH_POOL_1 * qsel;
7017	else
7018		rar_high |= E1000_RAH_POOL_1 << qsel;
7019
7020	wr32(E1000_RAL(index), rar_low);
7021	wrfl();
7022	wr32(E1000_RAH(index), rar_high);
7023	wrfl();
7024}
7025
7026static int igb_set_vf_mac(struct igb_adapter *adapter,
7027                          int vf, unsigned char *mac_addr)
7028{
7029	struct e1000_hw *hw = &adapter->hw;
7030	/* VF MAC addresses start at end of receive addresses and moves
7031	 * torwards the first, as a result a collision should not be possible */
7032	int rar_entry = hw->mac.rar_entry_count - (vf + 1);
7033
7034	memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
7035
7036	igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
7037
7038	return 0;
7039}
7040
7041static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
7042{
7043	struct igb_adapter *adapter = netdev_priv(netdev);
7044	if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
7045		return -EINVAL;
7046	adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
7047	dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
7048	dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
7049				      " change effective.");
7050	if (test_bit(__IGB_DOWN, &adapter->state)) {
7051		dev_warn(&adapter->pdev->dev, "The VF MAC address has been set,"
7052			 " but the PF device is not up.\n");
7053		dev_warn(&adapter->pdev->dev, "Bring the PF device up before"
7054			 " attempting to use the VF device.\n");
7055	}
7056	return igb_set_vf_mac(adapter, vf, mac);
7057}
7058
7059static int igb_link_mbps(int internal_link_speed)
7060{
7061	switch (internal_link_speed) {
7062	case SPEED_100:
7063		return 100;
7064	case SPEED_1000:
7065		return 1000;
7066	default:
7067		return 0;
7068	}
7069}
7070
7071static void igb_set_vf_rate_limit(struct e1000_hw *hw, int vf, int tx_rate,
7072				  int link_speed)
7073{
7074	int rf_dec, rf_int;
7075	u32 bcnrc_val;
7076
7077	if (tx_rate != 0) {
7078		/* Calculate the rate factor values to set */
7079		rf_int = link_speed / tx_rate;
7080		rf_dec = (link_speed - (rf_int * tx_rate));
7081		rf_dec = (rf_dec * (1<<E1000_RTTBCNRC_RF_INT_SHIFT)) / tx_rate;
7082
7083		bcnrc_val = E1000_RTTBCNRC_RS_ENA;
7084		bcnrc_val |= ((rf_int<<E1000_RTTBCNRC_RF_INT_SHIFT) &
7085		               E1000_RTTBCNRC_RF_INT_MASK);
7086		bcnrc_val |= (rf_dec & E1000_RTTBCNRC_RF_DEC_MASK);
7087	} else {
7088		bcnrc_val = 0;
7089	}
7090
7091	wr32(E1000_RTTDQSEL, vf); /* vf X uses queue X */
7092	/*
7093	 * Set global transmit compensation time to the MMW_SIZE in RTTBCNRM
7094	 * register. MMW_SIZE=0x014 if 9728-byte jumbo is supported.
7095	 */
7096	wr32(E1000_RTTBCNRM, 0x14);
7097	wr32(E1000_RTTBCNRC, bcnrc_val);
7098}
7099
7100static void igb_check_vf_rate_limit(struct igb_adapter *adapter)
7101{
7102	int actual_link_speed, i;
7103	bool reset_rate = false;
7104
7105	/* VF TX rate limit was not set or not supported */
7106	if ((adapter->vf_rate_link_speed == 0) ||
7107	    (adapter->hw.mac.type != e1000_82576))
7108		return;
7109
7110	actual_link_speed = igb_link_mbps(adapter->link_speed);
7111	if (actual_link_speed != adapter->vf_rate_link_speed) {
7112		reset_rate = true;
7113		adapter->vf_rate_link_speed = 0;
7114		dev_info(&adapter->pdev->dev,
7115		         "Link speed has been changed. VF Transmit "
7116		         "rate is disabled\n");
7117	}
7118
7119	for (i = 0; i < adapter->vfs_allocated_count; i++) {
7120		if (reset_rate)
7121			adapter->vf_data[i].tx_rate = 0;
7122
7123		igb_set_vf_rate_limit(&adapter->hw, i,
7124		                      adapter->vf_data[i].tx_rate,
7125		                      actual_link_speed);
7126	}
7127}
7128
7129static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
7130{
7131	struct igb_adapter *adapter = netdev_priv(netdev);
7132	struct e1000_hw *hw = &adapter->hw;
7133	int actual_link_speed;
7134
7135	if (hw->mac.type != e1000_82576)
7136		return -EOPNOTSUPP;
7137
7138	actual_link_speed = igb_link_mbps(adapter->link_speed);
7139	if ((vf >= adapter->vfs_allocated_count) ||
7140	    (!(rd32(E1000_STATUS) & E1000_STATUS_LU)) ||
7141	    (tx_rate < 0) || (tx_rate > actual_link_speed))
7142		return -EINVAL;
7143
7144	adapter->vf_rate_link_speed = actual_link_speed;
7145	adapter->vf_data[vf].tx_rate = (u16)tx_rate;
7146	igb_set_vf_rate_limit(hw, vf, tx_rate, actual_link_speed);
7147
7148	return 0;
7149}
7150
7151static int igb_ndo_get_vf_config(struct net_device *netdev,
7152				 int vf, struct ifla_vf_info *ivi)
7153{
7154	struct igb_adapter *adapter = netdev_priv(netdev);
7155	if (vf >= adapter->vfs_allocated_count)
7156		return -EINVAL;
7157	ivi->vf = vf;
7158	memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
7159	ivi->tx_rate = adapter->vf_data[vf].tx_rate;
7160	ivi->vlan = adapter->vf_data[vf].pf_vlan;
7161	ivi->qos = adapter->vf_data[vf].pf_qos;
7162	return 0;
7163}
7164
7165static void igb_vmm_control(struct igb_adapter *adapter)
7166{
7167	struct e1000_hw *hw = &adapter->hw;
7168	u32 reg;
7169
7170	switch (hw->mac.type) {
7171	case e1000_82575:
7172	case e1000_i210:
7173	case e1000_i211:
7174	default:
7175		/* replication is not supported for 82575 */
7176		return;
7177	case e1000_82576:
7178		/* notify HW that the MAC is adding vlan tags */
7179		reg = rd32(E1000_DTXCTL);
7180		reg |= E1000_DTXCTL_VLAN_ADDED;
7181		wr32(E1000_DTXCTL, reg);
7182	case e1000_82580:
7183		/* enable replication vlan tag stripping */
7184		reg = rd32(E1000_RPLOLR);
7185		reg |= E1000_RPLOLR_STRVLAN;
7186		wr32(E1000_RPLOLR, reg);
7187	case e1000_i350:
7188		/* none of the above registers are supported by i350 */
7189		break;
7190	}
7191
7192	if (adapter->vfs_allocated_count) {
7193		igb_vmdq_set_loopback_pf(hw, true);
7194		igb_vmdq_set_replication_pf(hw, true);
7195		igb_vmdq_set_anti_spoofing_pf(hw, true,
7196						adapter->vfs_allocated_count);
7197	} else {
7198		igb_vmdq_set_loopback_pf(hw, false);
7199		igb_vmdq_set_replication_pf(hw, false);
7200	}
7201}
7202
7203static void igb_init_dmac(struct igb_adapter *adapter, u32 pba)
7204{
7205	struct e1000_hw *hw = &adapter->hw;
7206	u32 dmac_thr;
7207	u16 hwm;
7208
7209	if (hw->mac.type > e1000_82580) {
7210		if (adapter->flags & IGB_FLAG_DMAC) {
7211			u32 reg;
7212
7213			/* force threshold to 0. */
7214			wr32(E1000_DMCTXTH, 0);
7215
7216			/*
7217			 * DMA Coalescing high water mark needs to be greater
7218			 * than the Rx threshold. Set hwm to PBA - max frame
7219			 * size in 16B units, capping it at PBA - 6KB.
7220			 */
7221			hwm = 64 * pba - adapter->max_frame_size / 16;
7222			if (hwm < 64 * (pba - 6))
7223				hwm = 64 * (pba - 6);
7224			reg = rd32(E1000_FCRTC);
7225			reg &= ~E1000_FCRTC_RTH_COAL_MASK;
7226			reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT)
7227				& E1000_FCRTC_RTH_COAL_MASK);
7228			wr32(E1000_FCRTC, reg);
7229
7230			/*
7231			 * Set the DMA Coalescing Rx threshold to PBA - 2 * max
7232			 * frame size, capping it at PBA - 10KB.
7233			 */
7234			dmac_thr = pba - adapter->max_frame_size / 512;
7235			if (dmac_thr < pba - 10)
7236				dmac_thr = pba - 10;
7237			reg = rd32(E1000_DMACR);
7238			reg &= ~E1000_DMACR_DMACTHR_MASK;
7239			reg |= ((dmac_thr << E1000_DMACR_DMACTHR_SHIFT)
7240				& E1000_DMACR_DMACTHR_MASK);
7241
7242			/* transition to L0x or L1 if available..*/
7243			reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
7244
7245			/* watchdog timer= +-1000 usec in 32usec intervals */
7246			reg |= (1000 >> 5);
7247
7248			/* Disable BMC-to-OS Watchdog Enable */
7249			reg &= ~E1000_DMACR_DC_BMC2OSW_EN;
7250			wr32(E1000_DMACR, reg);
7251
7252			/*
7253			 * no lower threshold to disable
7254			 * coalescing(smart fifb)-UTRESH=0
7255			 */
7256			wr32(E1000_DMCRTRH, 0);
7257
7258			reg = (IGB_DMCTLX_DCFLUSH_DIS | 0x4);
7259
7260			wr32(E1000_DMCTLX, reg);
7261
7262			/*
7263			 * free space in tx packet buffer to wake from
7264			 * DMA coal
7265			 */
7266			wr32(E1000_DMCTXTH, (IGB_MIN_TXPBSIZE -
7267			     (IGB_TX_BUF_4096 + adapter->max_frame_size)) >> 6);
7268
7269			/*
7270			 * make low power state decision controlled
7271			 * by DMA coal
7272			 */
7273			reg = rd32(E1000_PCIEMISC);
7274			reg &= ~E1000_PCIEMISC_LX_DECISION;
7275			wr32(E1000_PCIEMISC, reg);
7276		} /* endif adapter->dmac is not disabled */
7277	} else if (hw->mac.type == e1000_82580) {
7278		u32 reg = rd32(E1000_PCIEMISC);
7279		wr32(E1000_PCIEMISC, reg & ~E1000_PCIEMISC_LX_DECISION);
7280		wr32(E1000_DMACR, 0);
7281	}
7282}
7283
7284/* igb_main.c */
7285