igb_main.c revision 6e861326b1d78bb439c0724864a6ca83ec23d289
1/*******************************************************************************
2
3  Intel(R) Gigabit Ethernet Linux driver
4  Copyright(c) 2007-2012 Intel Corporation.
5
6  This program is free software; you can redistribute it and/or modify it
7  under the terms and conditions of the GNU General Public License,
8  version 2, as published by the Free Software Foundation.
9
10  This program is distributed in the hope it will be useful, but WITHOUT
11  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13  more details.
14
15  You should have received a copy of the GNU General Public License along with
16  this program; if not, write to the Free Software Foundation, Inc.,
17  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18
19  The full GNU General Public License is included in this distribution in
20  the file called "COPYING".
21
22  Contact Information:
23  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
25
26*******************************************************************************/
27
28#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
29
30#include <linux/module.h>
31#include <linux/types.h>
32#include <linux/init.h>
33#include <linux/bitops.h>
34#include <linux/vmalloc.h>
35#include <linux/pagemap.h>
36#include <linux/netdevice.h>
37#include <linux/ipv6.h>
38#include <linux/slab.h>
39#include <net/checksum.h>
40#include <net/ip6_checksum.h>
41#include <linux/net_tstamp.h>
42#include <linux/mii.h>
43#include <linux/ethtool.h>
44#include <linux/if.h>
45#include <linux/if_vlan.h>
46#include <linux/pci.h>
47#include <linux/pci-aspm.h>
48#include <linux/delay.h>
49#include <linux/interrupt.h>
50#include <linux/ip.h>
51#include <linux/tcp.h>
52#include <linux/sctp.h>
53#include <linux/if_ether.h>
54#include <linux/aer.h>
55#include <linux/prefetch.h>
56#include <linux/pm_runtime.h>
57#ifdef CONFIG_IGB_DCA
58#include <linux/dca.h>
59#endif
60#include "igb.h"
61
62#define MAJ 3
63#define MIN 2
64#define BUILD 10
65#define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \
66__stringify(BUILD) "-k"
67char igb_driver_name[] = "igb";
68char igb_driver_version[] = DRV_VERSION;
69static const char igb_driver_string[] =
70				"Intel(R) Gigabit Ethernet Network Driver";
71static const char igb_copyright[] = "Copyright (c) 2007-2012 Intel Corporation.";
72
73static const struct e1000_info *igb_info_tbl[] = {
74	[board_82575] = &e1000_82575_info,
75};
76
77static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
78	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER), board_82575 },
79	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER), board_82575 },
80	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES), board_82575 },
81	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII), board_82575 },
82	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
83	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
84	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_QUAD_FIBER), board_82575 },
85	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
86	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
87	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
88	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SGMII), board_82575 },
89	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SERDES), board_82575 },
90	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_BACKPLANE), board_82575 },
91	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SFP), board_82575 },
92	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
93	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
94	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
95	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
96	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
97	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
98	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2), board_82575 },
99	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
100	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
101	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
102	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
103	/* required last entry */
104	{0, }
105};
106
107MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
108
109void igb_reset(struct igb_adapter *);
110static int igb_setup_all_tx_resources(struct igb_adapter *);
111static int igb_setup_all_rx_resources(struct igb_adapter *);
112static void igb_free_all_tx_resources(struct igb_adapter *);
113static void igb_free_all_rx_resources(struct igb_adapter *);
114static void igb_setup_mrqc(struct igb_adapter *);
115static int igb_probe(struct pci_dev *, const struct pci_device_id *);
116static void __devexit igb_remove(struct pci_dev *pdev);
117static void igb_init_hw_timer(struct igb_adapter *adapter);
118static int igb_sw_init(struct igb_adapter *);
119static int igb_open(struct net_device *);
120static int igb_close(struct net_device *);
121static void igb_configure_tx(struct igb_adapter *);
122static void igb_configure_rx(struct igb_adapter *);
123static void igb_clean_all_tx_rings(struct igb_adapter *);
124static void igb_clean_all_rx_rings(struct igb_adapter *);
125static void igb_clean_tx_ring(struct igb_ring *);
126static void igb_clean_rx_ring(struct igb_ring *);
127static void igb_set_rx_mode(struct net_device *);
128static void igb_update_phy_info(unsigned long);
129static void igb_watchdog(unsigned long);
130static void igb_watchdog_task(struct work_struct *);
131static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, struct net_device *);
132static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *dev,
133						 struct rtnl_link_stats64 *stats);
134static int igb_change_mtu(struct net_device *, int);
135static int igb_set_mac(struct net_device *, void *);
136static void igb_set_uta(struct igb_adapter *adapter);
137static irqreturn_t igb_intr(int irq, void *);
138static irqreturn_t igb_intr_msi(int irq, void *);
139static irqreturn_t igb_msix_other(int irq, void *);
140static irqreturn_t igb_msix_ring(int irq, void *);
141#ifdef CONFIG_IGB_DCA
142static void igb_update_dca(struct igb_q_vector *);
143static void igb_setup_dca(struct igb_adapter *);
144#endif /* CONFIG_IGB_DCA */
145static int igb_poll(struct napi_struct *, int);
146static bool igb_clean_tx_irq(struct igb_q_vector *);
147static bool igb_clean_rx_irq(struct igb_q_vector *, int);
148static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
149static void igb_tx_timeout(struct net_device *);
150static void igb_reset_task(struct work_struct *);
151static void igb_vlan_mode(struct net_device *netdev, netdev_features_t features);
152static int igb_vlan_rx_add_vid(struct net_device *, u16);
153static int igb_vlan_rx_kill_vid(struct net_device *, u16);
154static void igb_restore_vlan(struct igb_adapter *);
155static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
156static void igb_ping_all_vfs(struct igb_adapter *);
157static void igb_msg_task(struct igb_adapter *);
158static void igb_vmm_control(struct igb_adapter *);
159static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
160static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
161static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
162static int igb_ndo_set_vf_vlan(struct net_device *netdev,
163			       int vf, u16 vlan, u8 qos);
164static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
165static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
166				 struct ifla_vf_info *ivi);
167static void igb_check_vf_rate_limit(struct igb_adapter *);
168
169#ifdef CONFIG_PCI_IOV
170static int igb_vf_configure(struct igb_adapter *adapter, int vf);
171static int igb_find_enabled_vfs(struct igb_adapter *adapter);
172static int igb_check_vf_assignment(struct igb_adapter *adapter);
173#endif
174
175#ifdef CONFIG_PM
176static int igb_suspend(struct device *);
177static int igb_resume(struct device *);
178#ifdef CONFIG_PM_RUNTIME
179static int igb_runtime_suspend(struct device *dev);
180static int igb_runtime_resume(struct device *dev);
181static int igb_runtime_idle(struct device *dev);
182#endif
183static const struct dev_pm_ops igb_pm_ops = {
184	SET_SYSTEM_SLEEP_PM_OPS(igb_suspend, igb_resume)
185	SET_RUNTIME_PM_OPS(igb_runtime_suspend, igb_runtime_resume,
186			igb_runtime_idle)
187};
188#endif
189static void igb_shutdown(struct pci_dev *);
190#ifdef CONFIG_IGB_DCA
191static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
192static struct notifier_block dca_notifier = {
193	.notifier_call	= igb_notify_dca,
194	.next		= NULL,
195	.priority	= 0
196};
197#endif
198#ifdef CONFIG_NET_POLL_CONTROLLER
199/* for netdump / net console */
200static void igb_netpoll(struct net_device *);
201#endif
202#ifdef CONFIG_PCI_IOV
203static unsigned int max_vfs = 0;
204module_param(max_vfs, uint, 0);
205MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
206                 "per physical function");
207#endif /* CONFIG_PCI_IOV */
208
209static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
210		     pci_channel_state_t);
211static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
212static void igb_io_resume(struct pci_dev *);
213
214static struct pci_error_handlers igb_err_handler = {
215	.error_detected = igb_io_error_detected,
216	.slot_reset = igb_io_slot_reset,
217	.resume = igb_io_resume,
218};
219
220static void igb_init_dmac(struct igb_adapter *adapter, u32 pba);
221
222static struct pci_driver igb_driver = {
223	.name     = igb_driver_name,
224	.id_table = igb_pci_tbl,
225	.probe    = igb_probe,
226	.remove   = __devexit_p(igb_remove),
227#ifdef CONFIG_PM
228	.driver.pm = &igb_pm_ops,
229#endif
230	.shutdown = igb_shutdown,
231	.err_handler = &igb_err_handler
232};
233
234MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
235MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
236MODULE_LICENSE("GPL");
237MODULE_VERSION(DRV_VERSION);
238
239struct igb_reg_info {
240	u32 ofs;
241	char *name;
242};
243
244static const struct igb_reg_info igb_reg_info_tbl[] = {
245
246	/* General Registers */
247	{E1000_CTRL, "CTRL"},
248	{E1000_STATUS, "STATUS"},
249	{E1000_CTRL_EXT, "CTRL_EXT"},
250
251	/* Interrupt Registers */
252	{E1000_ICR, "ICR"},
253
254	/* RX Registers */
255	{E1000_RCTL, "RCTL"},
256	{E1000_RDLEN(0), "RDLEN"},
257	{E1000_RDH(0), "RDH"},
258	{E1000_RDT(0), "RDT"},
259	{E1000_RXDCTL(0), "RXDCTL"},
260	{E1000_RDBAL(0), "RDBAL"},
261	{E1000_RDBAH(0), "RDBAH"},
262
263	/* TX Registers */
264	{E1000_TCTL, "TCTL"},
265	{E1000_TDBAL(0), "TDBAL"},
266	{E1000_TDBAH(0), "TDBAH"},
267	{E1000_TDLEN(0), "TDLEN"},
268	{E1000_TDH(0), "TDH"},
269	{E1000_TDT(0), "TDT"},
270	{E1000_TXDCTL(0), "TXDCTL"},
271	{E1000_TDFH, "TDFH"},
272	{E1000_TDFT, "TDFT"},
273	{E1000_TDFHS, "TDFHS"},
274	{E1000_TDFPC, "TDFPC"},
275
276	/* List Terminator */
277	{}
278};
279
280/*
281 * igb_regdump - register printout routine
282 */
283static void igb_regdump(struct e1000_hw *hw, struct igb_reg_info *reginfo)
284{
285	int n = 0;
286	char rname[16];
287	u32 regs[8];
288
289	switch (reginfo->ofs) {
290	case E1000_RDLEN(0):
291		for (n = 0; n < 4; n++)
292			regs[n] = rd32(E1000_RDLEN(n));
293		break;
294	case E1000_RDH(0):
295		for (n = 0; n < 4; n++)
296			regs[n] = rd32(E1000_RDH(n));
297		break;
298	case E1000_RDT(0):
299		for (n = 0; n < 4; n++)
300			regs[n] = rd32(E1000_RDT(n));
301		break;
302	case E1000_RXDCTL(0):
303		for (n = 0; n < 4; n++)
304			regs[n] = rd32(E1000_RXDCTL(n));
305		break;
306	case E1000_RDBAL(0):
307		for (n = 0; n < 4; n++)
308			regs[n] = rd32(E1000_RDBAL(n));
309		break;
310	case E1000_RDBAH(0):
311		for (n = 0; n < 4; n++)
312			regs[n] = rd32(E1000_RDBAH(n));
313		break;
314	case E1000_TDBAL(0):
315		for (n = 0; n < 4; n++)
316			regs[n] = rd32(E1000_RDBAL(n));
317		break;
318	case E1000_TDBAH(0):
319		for (n = 0; n < 4; n++)
320			regs[n] = rd32(E1000_TDBAH(n));
321		break;
322	case E1000_TDLEN(0):
323		for (n = 0; n < 4; n++)
324			regs[n] = rd32(E1000_TDLEN(n));
325		break;
326	case E1000_TDH(0):
327		for (n = 0; n < 4; n++)
328			regs[n] = rd32(E1000_TDH(n));
329		break;
330	case E1000_TDT(0):
331		for (n = 0; n < 4; n++)
332			regs[n] = rd32(E1000_TDT(n));
333		break;
334	case E1000_TXDCTL(0):
335		for (n = 0; n < 4; n++)
336			regs[n] = rd32(E1000_TXDCTL(n));
337		break;
338	default:
339		pr_info("%-15s %08x\n", reginfo->name, rd32(reginfo->ofs));
340		return;
341	}
342
343	snprintf(rname, 16, "%s%s", reginfo->name, "[0-3]");
344	pr_info("%-15s %08x %08x %08x %08x\n", rname, regs[0], regs[1],
345		regs[2], regs[3]);
346}
347
348/*
349 * igb_dump - Print registers, tx-rings and rx-rings
350 */
351static void igb_dump(struct igb_adapter *adapter)
352{
353	struct net_device *netdev = adapter->netdev;
354	struct e1000_hw *hw = &adapter->hw;
355	struct igb_reg_info *reginfo;
356	struct igb_ring *tx_ring;
357	union e1000_adv_tx_desc *tx_desc;
358	struct my_u0 { u64 a; u64 b; } *u0;
359	struct igb_ring *rx_ring;
360	union e1000_adv_rx_desc *rx_desc;
361	u32 staterr;
362	u16 i, n;
363
364	if (!netif_msg_hw(adapter))
365		return;
366
367	/* Print netdevice Info */
368	if (netdev) {
369		dev_info(&adapter->pdev->dev, "Net device Info\n");
370		pr_info("Device Name     state            trans_start      "
371			"last_rx\n");
372		pr_info("%-15s %016lX %016lX %016lX\n", netdev->name,
373			netdev->state, netdev->trans_start, netdev->last_rx);
374	}
375
376	/* Print Registers */
377	dev_info(&adapter->pdev->dev, "Register Dump\n");
378	pr_info(" Register Name   Value\n");
379	for (reginfo = (struct igb_reg_info *)igb_reg_info_tbl;
380	     reginfo->name; reginfo++) {
381		igb_regdump(hw, reginfo);
382	}
383
384	/* Print TX Ring Summary */
385	if (!netdev || !netif_running(netdev))
386		goto exit;
387
388	dev_info(&adapter->pdev->dev, "TX Rings Summary\n");
389	pr_info("Queue [NTU] [NTC] [bi(ntc)->dma  ] leng ntw timestamp\n");
390	for (n = 0; n < adapter->num_tx_queues; n++) {
391		struct igb_tx_buffer *buffer_info;
392		tx_ring = adapter->tx_ring[n];
393		buffer_info = &tx_ring->tx_buffer_info[tx_ring->next_to_clean];
394		pr_info(" %5d %5X %5X %016llX %04X %p %016llX\n",
395			n, tx_ring->next_to_use, tx_ring->next_to_clean,
396			(u64)buffer_info->dma,
397			buffer_info->length,
398			buffer_info->next_to_watch,
399			(u64)buffer_info->time_stamp);
400	}
401
402	/* Print TX Rings */
403	if (!netif_msg_tx_done(adapter))
404		goto rx_ring_summary;
405
406	dev_info(&adapter->pdev->dev, "TX Rings Dump\n");
407
408	/* Transmit Descriptor Formats
409	 *
410	 * Advanced Transmit Descriptor
411	 *   +--------------------------------------------------------------+
412	 * 0 |         Buffer Address [63:0]                                |
413	 *   +--------------------------------------------------------------+
414	 * 8 | PAYLEN  | PORTS  |CC|IDX | STA | DCMD  |DTYP|MAC|RSV| DTALEN |
415	 *   +--------------------------------------------------------------+
416	 *   63      46 45    40 39 38 36 35 32 31   24             15       0
417	 */
418
419	for (n = 0; n < adapter->num_tx_queues; n++) {
420		tx_ring = adapter->tx_ring[n];
421		pr_info("------------------------------------\n");
422		pr_info("TX QUEUE INDEX = %d\n", tx_ring->queue_index);
423		pr_info("------------------------------------\n");
424		pr_info("T [desc]     [address 63:0  ] [PlPOCIStDDM Ln] "
425			"[bi->dma       ] leng  ntw timestamp        "
426			"bi->skb\n");
427
428		for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) {
429			const char *next_desc;
430			struct igb_tx_buffer *buffer_info;
431			tx_desc = IGB_TX_DESC(tx_ring, i);
432			buffer_info = &tx_ring->tx_buffer_info[i];
433			u0 = (struct my_u0 *)tx_desc;
434			if (i == tx_ring->next_to_use &&
435			    i == tx_ring->next_to_clean)
436				next_desc = " NTC/U";
437			else if (i == tx_ring->next_to_use)
438				next_desc = " NTU";
439			else if (i == tx_ring->next_to_clean)
440				next_desc = " NTC";
441			else
442				next_desc = "";
443
444			pr_info("T [0x%03X]    %016llX %016llX %016llX"
445				" %04X  %p %016llX %p%s\n", i,
446				le64_to_cpu(u0->a),
447				le64_to_cpu(u0->b),
448				(u64)buffer_info->dma,
449				buffer_info->length,
450				buffer_info->next_to_watch,
451				(u64)buffer_info->time_stamp,
452				buffer_info->skb, next_desc);
453
454			if (netif_msg_pktdata(adapter) && buffer_info->dma != 0)
455				print_hex_dump(KERN_INFO, "",
456					DUMP_PREFIX_ADDRESS,
457					16, 1, phys_to_virt(buffer_info->dma),
458					buffer_info->length, true);
459		}
460	}
461
462	/* Print RX Rings Summary */
463rx_ring_summary:
464	dev_info(&adapter->pdev->dev, "RX Rings Summary\n");
465	pr_info("Queue [NTU] [NTC]\n");
466	for (n = 0; n < adapter->num_rx_queues; n++) {
467		rx_ring = adapter->rx_ring[n];
468		pr_info(" %5d %5X %5X\n",
469			n, rx_ring->next_to_use, rx_ring->next_to_clean);
470	}
471
472	/* Print RX Rings */
473	if (!netif_msg_rx_status(adapter))
474		goto exit;
475
476	dev_info(&adapter->pdev->dev, "RX Rings Dump\n");
477
478	/* Advanced Receive Descriptor (Read) Format
479	 *    63                                           1        0
480	 *    +-----------------------------------------------------+
481	 *  0 |       Packet Buffer Address [63:1]           |A0/NSE|
482	 *    +----------------------------------------------+------+
483	 *  8 |       Header Buffer Address [63:1]           |  DD  |
484	 *    +-----------------------------------------------------+
485	 *
486	 *
487	 * Advanced Receive Descriptor (Write-Back) Format
488	 *
489	 *   63       48 47    32 31  30      21 20 17 16   4 3     0
490	 *   +------------------------------------------------------+
491	 * 0 | Packet     IP     |SPH| HDR_LEN   | RSV|Packet|  RSS |
492	 *   | Checksum   Ident  |   |           |    | Type | Type |
493	 *   +------------------------------------------------------+
494	 * 8 | VLAN Tag | Length | Extended Error | Extended Status |
495	 *   +------------------------------------------------------+
496	 *   63       48 47    32 31            20 19               0
497	 */
498
499	for (n = 0; n < adapter->num_rx_queues; n++) {
500		rx_ring = adapter->rx_ring[n];
501		pr_info("------------------------------------\n");
502		pr_info("RX QUEUE INDEX = %d\n", rx_ring->queue_index);
503		pr_info("------------------------------------\n");
504		pr_info("R  [desc]      [ PktBuf     A0] [  HeadBuf   DD] "
505			"[bi->dma       ] [bi->skb] <-- Adv Rx Read format\n");
506		pr_info("RWB[desc]      [PcsmIpSHl PtRs] [vl er S cks ln] -----"
507			"----------- [bi->skb] <-- Adv Rx Write-Back format\n");
508
509		for (i = 0; i < rx_ring->count; i++) {
510			const char *next_desc;
511			struct igb_rx_buffer *buffer_info;
512			buffer_info = &rx_ring->rx_buffer_info[i];
513			rx_desc = IGB_RX_DESC(rx_ring, i);
514			u0 = (struct my_u0 *)rx_desc;
515			staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
516
517			if (i == rx_ring->next_to_use)
518				next_desc = " NTU";
519			else if (i == rx_ring->next_to_clean)
520				next_desc = " NTC";
521			else
522				next_desc = "";
523
524			if (staterr & E1000_RXD_STAT_DD) {
525				/* Descriptor Done */
526				pr_info("%s[0x%03X]     %016llX %016llX -------"
527					"--------- %p%s\n", "RWB", i,
528					le64_to_cpu(u0->a),
529					le64_to_cpu(u0->b),
530					buffer_info->skb, next_desc);
531			} else {
532				pr_info("%s[0x%03X]     %016llX %016llX %016llX"
533					" %p%s\n", "R  ", i,
534					le64_to_cpu(u0->a),
535					le64_to_cpu(u0->b),
536					(u64)buffer_info->dma,
537					buffer_info->skb, next_desc);
538
539				if (netif_msg_pktdata(adapter)) {
540					print_hex_dump(KERN_INFO, "",
541						DUMP_PREFIX_ADDRESS,
542						16, 1,
543						phys_to_virt(buffer_info->dma),
544						IGB_RX_HDR_LEN, true);
545					print_hex_dump(KERN_INFO, "",
546					  DUMP_PREFIX_ADDRESS,
547					  16, 1,
548					  phys_to_virt(
549					    buffer_info->page_dma +
550					    buffer_info->page_offset),
551					  PAGE_SIZE/2, true);
552				}
553			}
554		}
555	}
556
557exit:
558	return;
559}
560
561
562/**
563 * igb_read_clock - read raw cycle counter (to be used by time counter)
564 */
565static cycle_t igb_read_clock(const struct cyclecounter *tc)
566{
567	struct igb_adapter *adapter =
568		container_of(tc, struct igb_adapter, cycles);
569	struct e1000_hw *hw = &adapter->hw;
570	u64 stamp = 0;
571	int shift = 0;
572
573	/*
574	 * The timestamp latches on lowest register read. For the 82580
575	 * the lowest register is SYSTIMR instead of SYSTIML.  However we never
576	 * adjusted TIMINCA so SYSTIMR will just read as all 0s so ignore it.
577	 */
578	if (hw->mac.type >= e1000_82580) {
579		stamp = rd32(E1000_SYSTIMR) >> 8;
580		shift = IGB_82580_TSYNC_SHIFT;
581	}
582
583	stamp |= (u64)rd32(E1000_SYSTIML) << shift;
584	stamp |= (u64)rd32(E1000_SYSTIMH) << (shift + 32);
585	return stamp;
586}
587
588/**
589 * igb_get_hw_dev - return device
590 * used by hardware layer to print debugging information
591 **/
592struct net_device *igb_get_hw_dev(struct e1000_hw *hw)
593{
594	struct igb_adapter *adapter = hw->back;
595	return adapter->netdev;
596}
597
598/**
599 * igb_init_module - Driver Registration Routine
600 *
601 * igb_init_module is the first routine called when the driver is
602 * loaded. All it does is register with the PCI subsystem.
603 **/
604static int __init igb_init_module(void)
605{
606	int ret;
607	pr_info("%s - version %s\n",
608	       igb_driver_string, igb_driver_version);
609
610	pr_info("%s\n", igb_copyright);
611
612#ifdef CONFIG_IGB_DCA
613	dca_register_notify(&dca_notifier);
614#endif
615	ret = pci_register_driver(&igb_driver);
616	return ret;
617}
618
619module_init(igb_init_module);
620
621/**
622 * igb_exit_module - Driver Exit Cleanup Routine
623 *
624 * igb_exit_module is called just before the driver is removed
625 * from memory.
626 **/
627static void __exit igb_exit_module(void)
628{
629#ifdef CONFIG_IGB_DCA
630	dca_unregister_notify(&dca_notifier);
631#endif
632	pci_unregister_driver(&igb_driver);
633}
634
635module_exit(igb_exit_module);
636
637#define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
638/**
639 * igb_cache_ring_register - Descriptor ring to register mapping
640 * @adapter: board private structure to initialize
641 *
642 * Once we know the feature-set enabled for the device, we'll cache
643 * the register offset the descriptor ring is assigned to.
644 **/
645static void igb_cache_ring_register(struct igb_adapter *adapter)
646{
647	int i = 0, j = 0;
648	u32 rbase_offset = adapter->vfs_allocated_count;
649
650	switch (adapter->hw.mac.type) {
651	case e1000_82576:
652		/* The queues are allocated for virtualization such that VF 0
653		 * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
654		 * In order to avoid collision we start at the first free queue
655		 * and continue consuming queues in the same sequence
656		 */
657		if (adapter->vfs_allocated_count) {
658			for (; i < adapter->rss_queues; i++)
659				adapter->rx_ring[i]->reg_idx = rbase_offset +
660				                               Q_IDX_82576(i);
661		}
662	case e1000_82575:
663	case e1000_82580:
664	case e1000_i350:
665	default:
666		for (; i < adapter->num_rx_queues; i++)
667			adapter->rx_ring[i]->reg_idx = rbase_offset + i;
668		for (; j < adapter->num_tx_queues; j++)
669			adapter->tx_ring[j]->reg_idx = rbase_offset + j;
670		break;
671	}
672}
673
674static void igb_free_queues(struct igb_adapter *adapter)
675{
676	int i;
677
678	for (i = 0; i < adapter->num_tx_queues; i++) {
679		kfree(adapter->tx_ring[i]);
680		adapter->tx_ring[i] = NULL;
681	}
682	for (i = 0; i < adapter->num_rx_queues; i++) {
683		kfree(adapter->rx_ring[i]);
684		adapter->rx_ring[i] = NULL;
685	}
686	adapter->num_rx_queues = 0;
687	adapter->num_tx_queues = 0;
688}
689
690/**
691 * igb_alloc_queues - Allocate memory for all rings
692 * @adapter: board private structure to initialize
693 *
694 * We allocate one ring per queue at run-time since we don't know the
695 * number of queues at compile-time.
696 **/
697static int igb_alloc_queues(struct igb_adapter *adapter)
698{
699	struct igb_ring *ring;
700	int i;
701	int orig_node = adapter->node;
702
703	for (i = 0; i < adapter->num_tx_queues; i++) {
704		if (orig_node == -1) {
705			int cur_node = next_online_node(adapter->node);
706			if (cur_node == MAX_NUMNODES)
707				cur_node = first_online_node;
708			adapter->node = cur_node;
709		}
710		ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL,
711				    adapter->node);
712		if (!ring)
713			ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
714		if (!ring)
715			goto err;
716		ring->count = adapter->tx_ring_count;
717		ring->queue_index = i;
718		ring->dev = &adapter->pdev->dev;
719		ring->netdev = adapter->netdev;
720		ring->numa_node = adapter->node;
721		/* For 82575, context index must be unique per ring. */
722		if (adapter->hw.mac.type == e1000_82575)
723			set_bit(IGB_RING_FLAG_TX_CTX_IDX, &ring->flags);
724		adapter->tx_ring[i] = ring;
725	}
726	/* Restore the adapter's original node */
727	adapter->node = orig_node;
728
729	for (i = 0; i < adapter->num_rx_queues; i++) {
730		if (orig_node == -1) {
731			int cur_node = next_online_node(adapter->node);
732			if (cur_node == MAX_NUMNODES)
733				cur_node = first_online_node;
734			adapter->node = cur_node;
735		}
736		ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL,
737				    adapter->node);
738		if (!ring)
739			ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
740		if (!ring)
741			goto err;
742		ring->count = adapter->rx_ring_count;
743		ring->queue_index = i;
744		ring->dev = &adapter->pdev->dev;
745		ring->netdev = adapter->netdev;
746		ring->numa_node = adapter->node;
747		/* set flag indicating ring supports SCTP checksum offload */
748		if (adapter->hw.mac.type >= e1000_82576)
749			set_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags);
750
751		/* On i350, loopback VLAN packets have the tag byte-swapped. */
752		if (adapter->hw.mac.type == e1000_i350)
753			set_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &ring->flags);
754
755		adapter->rx_ring[i] = ring;
756	}
757	/* Restore the adapter's original node */
758	adapter->node = orig_node;
759
760	igb_cache_ring_register(adapter);
761
762	return 0;
763
764err:
765	/* Restore the adapter's original node */
766	adapter->node = orig_node;
767	igb_free_queues(adapter);
768
769	return -ENOMEM;
770}
771
772/**
773 *  igb_write_ivar - configure ivar for given MSI-X vector
774 *  @hw: pointer to the HW structure
775 *  @msix_vector: vector number we are allocating to a given ring
776 *  @index: row index of IVAR register to write within IVAR table
777 *  @offset: column offset of in IVAR, should be multiple of 8
778 *
779 *  This function is intended to handle the writing of the IVAR register
780 *  for adapters 82576 and newer.  The IVAR table consists of 2 columns,
781 *  each containing an cause allocation for an Rx and Tx ring, and a
782 *  variable number of rows depending on the number of queues supported.
783 **/
784static void igb_write_ivar(struct e1000_hw *hw, int msix_vector,
785			   int index, int offset)
786{
787	u32 ivar = array_rd32(E1000_IVAR0, index);
788
789	/* clear any bits that are currently set */
790	ivar &= ~((u32)0xFF << offset);
791
792	/* write vector and valid bit */
793	ivar |= (msix_vector | E1000_IVAR_VALID) << offset;
794
795	array_wr32(E1000_IVAR0, index, ivar);
796}
797
798#define IGB_N0_QUEUE -1
799static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
800{
801	struct igb_adapter *adapter = q_vector->adapter;
802	struct e1000_hw *hw = &adapter->hw;
803	int rx_queue = IGB_N0_QUEUE;
804	int tx_queue = IGB_N0_QUEUE;
805	u32 msixbm = 0;
806
807	if (q_vector->rx.ring)
808		rx_queue = q_vector->rx.ring->reg_idx;
809	if (q_vector->tx.ring)
810		tx_queue = q_vector->tx.ring->reg_idx;
811
812	switch (hw->mac.type) {
813	case e1000_82575:
814		/* The 82575 assigns vectors using a bitmask, which matches the
815		   bitmask for the EICR/EIMS/EIMC registers.  To assign one
816		   or more queues to a vector, we write the appropriate bits
817		   into the MSIXBM register for that vector. */
818		if (rx_queue > IGB_N0_QUEUE)
819			msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
820		if (tx_queue > IGB_N0_QUEUE)
821			msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
822		if (!adapter->msix_entries && msix_vector == 0)
823			msixbm |= E1000_EIMS_OTHER;
824		array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
825		q_vector->eims_value = msixbm;
826		break;
827	case e1000_82576:
828		/*
829		 * 82576 uses a table that essentially consists of 2 columns
830		 * with 8 rows.  The ordering is column-major so we use the
831		 * lower 3 bits as the row index, and the 4th bit as the
832		 * column offset.
833		 */
834		if (rx_queue > IGB_N0_QUEUE)
835			igb_write_ivar(hw, msix_vector,
836				       rx_queue & 0x7,
837				       (rx_queue & 0x8) << 1);
838		if (tx_queue > IGB_N0_QUEUE)
839			igb_write_ivar(hw, msix_vector,
840				       tx_queue & 0x7,
841				       ((tx_queue & 0x8) << 1) + 8);
842		q_vector->eims_value = 1 << msix_vector;
843		break;
844	case e1000_82580:
845	case e1000_i350:
846		/*
847		 * On 82580 and newer adapters the scheme is similar to 82576
848		 * however instead of ordering column-major we have things
849		 * ordered row-major.  So we traverse the table by using
850		 * bit 0 as the column offset, and the remaining bits as the
851		 * row index.
852		 */
853		if (rx_queue > IGB_N0_QUEUE)
854			igb_write_ivar(hw, msix_vector,
855				       rx_queue >> 1,
856				       (rx_queue & 0x1) << 4);
857		if (tx_queue > IGB_N0_QUEUE)
858			igb_write_ivar(hw, msix_vector,
859				       tx_queue >> 1,
860				       ((tx_queue & 0x1) << 4) + 8);
861		q_vector->eims_value = 1 << msix_vector;
862		break;
863	default:
864		BUG();
865		break;
866	}
867
868	/* add q_vector eims value to global eims_enable_mask */
869	adapter->eims_enable_mask |= q_vector->eims_value;
870
871	/* configure q_vector to set itr on first interrupt */
872	q_vector->set_itr = 1;
873}
874
875/**
876 * igb_configure_msix - Configure MSI-X hardware
877 *
878 * igb_configure_msix sets up the hardware to properly
879 * generate MSI-X interrupts.
880 **/
881static void igb_configure_msix(struct igb_adapter *adapter)
882{
883	u32 tmp;
884	int i, vector = 0;
885	struct e1000_hw *hw = &adapter->hw;
886
887	adapter->eims_enable_mask = 0;
888
889	/* set vector for other causes, i.e. link changes */
890	switch (hw->mac.type) {
891	case e1000_82575:
892		tmp = rd32(E1000_CTRL_EXT);
893		/* enable MSI-X PBA support*/
894		tmp |= E1000_CTRL_EXT_PBA_CLR;
895
896		/* Auto-Mask interrupts upon ICR read. */
897		tmp |= E1000_CTRL_EXT_EIAME;
898		tmp |= E1000_CTRL_EXT_IRCA;
899
900		wr32(E1000_CTRL_EXT, tmp);
901
902		/* enable msix_other interrupt */
903		array_wr32(E1000_MSIXBM(0), vector++,
904		                      E1000_EIMS_OTHER);
905		adapter->eims_other = E1000_EIMS_OTHER;
906
907		break;
908
909	case e1000_82576:
910	case e1000_82580:
911	case e1000_i350:
912		/* Turn on MSI-X capability first, or our settings
913		 * won't stick.  And it will take days to debug. */
914		wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
915		                E1000_GPIE_PBA | E1000_GPIE_EIAME |
916		                E1000_GPIE_NSICR);
917
918		/* enable msix_other interrupt */
919		adapter->eims_other = 1 << vector;
920		tmp = (vector++ | E1000_IVAR_VALID) << 8;
921
922		wr32(E1000_IVAR_MISC, tmp);
923		break;
924	default:
925		/* do nothing, since nothing else supports MSI-X */
926		break;
927	} /* switch (hw->mac.type) */
928
929	adapter->eims_enable_mask |= adapter->eims_other;
930
931	for (i = 0; i < adapter->num_q_vectors; i++)
932		igb_assign_vector(adapter->q_vector[i], vector++);
933
934	wrfl();
935}
936
937/**
938 * igb_request_msix - Initialize MSI-X interrupts
939 *
940 * igb_request_msix allocates MSI-X vectors and requests interrupts from the
941 * kernel.
942 **/
943static int igb_request_msix(struct igb_adapter *adapter)
944{
945	struct net_device *netdev = adapter->netdev;
946	struct e1000_hw *hw = &adapter->hw;
947	int i, err = 0, vector = 0;
948
949	err = request_irq(adapter->msix_entries[vector].vector,
950	                  igb_msix_other, 0, netdev->name, adapter);
951	if (err)
952		goto out;
953	vector++;
954
955	for (i = 0; i < adapter->num_q_vectors; i++) {
956		struct igb_q_vector *q_vector = adapter->q_vector[i];
957
958		q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
959
960		if (q_vector->rx.ring && q_vector->tx.ring)
961			sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
962				q_vector->rx.ring->queue_index);
963		else if (q_vector->tx.ring)
964			sprintf(q_vector->name, "%s-tx-%u", netdev->name,
965				q_vector->tx.ring->queue_index);
966		else if (q_vector->rx.ring)
967			sprintf(q_vector->name, "%s-rx-%u", netdev->name,
968				q_vector->rx.ring->queue_index);
969		else
970			sprintf(q_vector->name, "%s-unused", netdev->name);
971
972		err = request_irq(adapter->msix_entries[vector].vector,
973		                  igb_msix_ring, 0, q_vector->name,
974		                  q_vector);
975		if (err)
976			goto out;
977		vector++;
978	}
979
980	igb_configure_msix(adapter);
981	return 0;
982out:
983	return err;
984}
985
986static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
987{
988	if (adapter->msix_entries) {
989		pci_disable_msix(adapter->pdev);
990		kfree(adapter->msix_entries);
991		adapter->msix_entries = NULL;
992	} else if (adapter->flags & IGB_FLAG_HAS_MSI) {
993		pci_disable_msi(adapter->pdev);
994	}
995}
996
997/**
998 * igb_free_q_vectors - Free memory allocated for interrupt vectors
999 * @adapter: board private structure to initialize
1000 *
1001 * This function frees the memory allocated to the q_vectors.  In addition if
1002 * NAPI is enabled it will delete any references to the NAPI struct prior
1003 * to freeing the q_vector.
1004 **/
1005static void igb_free_q_vectors(struct igb_adapter *adapter)
1006{
1007	int v_idx;
1008
1009	for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1010		struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1011		adapter->q_vector[v_idx] = NULL;
1012		if (!q_vector)
1013			continue;
1014		netif_napi_del(&q_vector->napi);
1015		kfree(q_vector);
1016	}
1017	adapter->num_q_vectors = 0;
1018}
1019
1020/**
1021 * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
1022 *
1023 * This function resets the device so that it has 0 rx queues, tx queues, and
1024 * MSI-X interrupts allocated.
1025 */
1026static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
1027{
1028	igb_free_queues(adapter);
1029	igb_free_q_vectors(adapter);
1030	igb_reset_interrupt_capability(adapter);
1031}
1032
1033/**
1034 * igb_set_interrupt_capability - set MSI or MSI-X if supported
1035 *
1036 * Attempt to configure interrupts using the best available
1037 * capabilities of the hardware and kernel.
1038 **/
1039static int igb_set_interrupt_capability(struct igb_adapter *adapter)
1040{
1041	int err;
1042	int numvecs, i;
1043
1044	/* Number of supported queues. */
1045	adapter->num_rx_queues = adapter->rss_queues;
1046	if (adapter->vfs_allocated_count)
1047		adapter->num_tx_queues = 1;
1048	else
1049		adapter->num_tx_queues = adapter->rss_queues;
1050
1051	/* start with one vector for every rx queue */
1052	numvecs = adapter->num_rx_queues;
1053
1054	/* if tx handler is separate add 1 for every tx queue */
1055	if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
1056		numvecs += adapter->num_tx_queues;
1057
1058	/* store the number of vectors reserved for queues */
1059	adapter->num_q_vectors = numvecs;
1060
1061	/* add 1 vector for link status interrupts */
1062	numvecs++;
1063	adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
1064					GFP_KERNEL);
1065	if (!adapter->msix_entries)
1066		goto msi_only;
1067
1068	for (i = 0; i < numvecs; i++)
1069		adapter->msix_entries[i].entry = i;
1070
1071	err = pci_enable_msix(adapter->pdev,
1072			      adapter->msix_entries,
1073			      numvecs);
1074	if (err == 0)
1075		goto out;
1076
1077	igb_reset_interrupt_capability(adapter);
1078
1079	/* If we can't do MSI-X, try MSI */
1080msi_only:
1081#ifdef CONFIG_PCI_IOV
1082	/* disable SR-IOV for non MSI-X configurations */
1083	if (adapter->vf_data) {
1084		struct e1000_hw *hw = &adapter->hw;
1085		/* disable iov and allow time for transactions to clear */
1086		pci_disable_sriov(adapter->pdev);
1087		msleep(500);
1088
1089		kfree(adapter->vf_data);
1090		adapter->vf_data = NULL;
1091		wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1092		wrfl();
1093		msleep(100);
1094		dev_info(&adapter->pdev->dev, "IOV Disabled\n");
1095	}
1096#endif
1097	adapter->vfs_allocated_count = 0;
1098	adapter->rss_queues = 1;
1099	adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1100	adapter->num_rx_queues = 1;
1101	adapter->num_tx_queues = 1;
1102	adapter->num_q_vectors = 1;
1103	if (!pci_enable_msi(adapter->pdev))
1104		adapter->flags |= IGB_FLAG_HAS_MSI;
1105out:
1106	/* Notify the stack of the (possibly) reduced queue counts. */
1107	netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues);
1108	return netif_set_real_num_rx_queues(adapter->netdev,
1109					    adapter->num_rx_queues);
1110}
1111
1112/**
1113 * igb_alloc_q_vectors - Allocate memory for interrupt vectors
1114 * @adapter: board private structure to initialize
1115 *
1116 * We allocate one q_vector per queue interrupt.  If allocation fails we
1117 * return -ENOMEM.
1118 **/
1119static int igb_alloc_q_vectors(struct igb_adapter *adapter)
1120{
1121	struct igb_q_vector *q_vector;
1122	struct e1000_hw *hw = &adapter->hw;
1123	int v_idx;
1124	int orig_node = adapter->node;
1125
1126	for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1127		if ((adapter->num_q_vectors == (adapter->num_rx_queues +
1128						adapter->num_tx_queues)) &&
1129		    (adapter->num_rx_queues == v_idx))
1130			adapter->node = orig_node;
1131		if (orig_node == -1) {
1132			int cur_node = next_online_node(adapter->node);
1133			if (cur_node == MAX_NUMNODES)
1134				cur_node = first_online_node;
1135			adapter->node = cur_node;
1136		}
1137		q_vector = kzalloc_node(sizeof(struct igb_q_vector), GFP_KERNEL,
1138					adapter->node);
1139		if (!q_vector)
1140			q_vector = kzalloc(sizeof(struct igb_q_vector),
1141					   GFP_KERNEL);
1142		if (!q_vector)
1143			goto err_out;
1144		q_vector->adapter = adapter;
1145		q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
1146		q_vector->itr_val = IGB_START_ITR;
1147		netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
1148		adapter->q_vector[v_idx] = q_vector;
1149	}
1150	/* Restore the adapter's original node */
1151	adapter->node = orig_node;
1152
1153	return 0;
1154
1155err_out:
1156	/* Restore the adapter's original node */
1157	adapter->node = orig_node;
1158	igb_free_q_vectors(adapter);
1159	return -ENOMEM;
1160}
1161
1162static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
1163                                      int ring_idx, int v_idx)
1164{
1165	struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1166
1167	q_vector->rx.ring = adapter->rx_ring[ring_idx];
1168	q_vector->rx.ring->q_vector = q_vector;
1169	q_vector->rx.count++;
1170	q_vector->itr_val = adapter->rx_itr_setting;
1171	if (q_vector->itr_val && q_vector->itr_val <= 3)
1172		q_vector->itr_val = IGB_START_ITR;
1173}
1174
1175static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
1176                                      int ring_idx, int v_idx)
1177{
1178	struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1179
1180	q_vector->tx.ring = adapter->tx_ring[ring_idx];
1181	q_vector->tx.ring->q_vector = q_vector;
1182	q_vector->tx.count++;
1183	q_vector->itr_val = adapter->tx_itr_setting;
1184	q_vector->tx.work_limit = adapter->tx_work_limit;
1185	if (q_vector->itr_val && q_vector->itr_val <= 3)
1186		q_vector->itr_val = IGB_START_ITR;
1187}
1188
1189/**
1190 * igb_map_ring_to_vector - maps allocated queues to vectors
1191 *
1192 * This function maps the recently allocated queues to vectors.
1193 **/
1194static int igb_map_ring_to_vector(struct igb_adapter *adapter)
1195{
1196	int i;
1197	int v_idx = 0;
1198
1199	if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
1200	    (adapter->num_q_vectors < adapter->num_tx_queues))
1201		return -ENOMEM;
1202
1203	if (adapter->num_q_vectors >=
1204	    (adapter->num_rx_queues + adapter->num_tx_queues)) {
1205		for (i = 0; i < adapter->num_rx_queues; i++)
1206			igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1207		for (i = 0; i < adapter->num_tx_queues; i++)
1208			igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1209	} else {
1210		for (i = 0; i < adapter->num_rx_queues; i++) {
1211			if (i < adapter->num_tx_queues)
1212				igb_map_tx_ring_to_vector(adapter, i, v_idx);
1213			igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1214		}
1215		for (; i < adapter->num_tx_queues; i++)
1216			igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1217	}
1218	return 0;
1219}
1220
1221/**
1222 * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
1223 *
1224 * This function initializes the interrupts and allocates all of the queues.
1225 **/
1226static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
1227{
1228	struct pci_dev *pdev = adapter->pdev;
1229	int err;
1230
1231	err = igb_set_interrupt_capability(adapter);
1232	if (err)
1233		return err;
1234
1235	err = igb_alloc_q_vectors(adapter);
1236	if (err) {
1237		dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
1238		goto err_alloc_q_vectors;
1239	}
1240
1241	err = igb_alloc_queues(adapter);
1242	if (err) {
1243		dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1244		goto err_alloc_queues;
1245	}
1246
1247	err = igb_map_ring_to_vector(adapter);
1248	if (err) {
1249		dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
1250		goto err_map_queues;
1251	}
1252
1253
1254	return 0;
1255err_map_queues:
1256	igb_free_queues(adapter);
1257err_alloc_queues:
1258	igb_free_q_vectors(adapter);
1259err_alloc_q_vectors:
1260	igb_reset_interrupt_capability(adapter);
1261	return err;
1262}
1263
1264/**
1265 * igb_request_irq - initialize interrupts
1266 *
1267 * Attempts to configure interrupts using the best available
1268 * capabilities of the hardware and kernel.
1269 **/
1270static int igb_request_irq(struct igb_adapter *adapter)
1271{
1272	struct net_device *netdev = adapter->netdev;
1273	struct pci_dev *pdev = adapter->pdev;
1274	int err = 0;
1275
1276	if (adapter->msix_entries) {
1277		err = igb_request_msix(adapter);
1278		if (!err)
1279			goto request_done;
1280		/* fall back to MSI */
1281		igb_clear_interrupt_scheme(adapter);
1282		if (!pci_enable_msi(pdev))
1283			adapter->flags |= IGB_FLAG_HAS_MSI;
1284		igb_free_all_tx_resources(adapter);
1285		igb_free_all_rx_resources(adapter);
1286		adapter->num_tx_queues = 1;
1287		adapter->num_rx_queues = 1;
1288		adapter->num_q_vectors = 1;
1289		err = igb_alloc_q_vectors(adapter);
1290		if (err) {
1291			dev_err(&pdev->dev,
1292			        "Unable to allocate memory for vectors\n");
1293			goto request_done;
1294		}
1295		err = igb_alloc_queues(adapter);
1296		if (err) {
1297			dev_err(&pdev->dev,
1298			        "Unable to allocate memory for queues\n");
1299			igb_free_q_vectors(adapter);
1300			goto request_done;
1301		}
1302		igb_setup_all_tx_resources(adapter);
1303		igb_setup_all_rx_resources(adapter);
1304	}
1305
1306	igb_assign_vector(adapter->q_vector[0], 0);
1307
1308	if (adapter->flags & IGB_FLAG_HAS_MSI) {
1309		err = request_irq(pdev->irq, igb_intr_msi, 0,
1310				  netdev->name, adapter);
1311		if (!err)
1312			goto request_done;
1313
1314		/* fall back to legacy interrupts */
1315		igb_reset_interrupt_capability(adapter);
1316		adapter->flags &= ~IGB_FLAG_HAS_MSI;
1317	}
1318
1319	err = request_irq(pdev->irq, igb_intr, IRQF_SHARED,
1320			  netdev->name, adapter);
1321
1322	if (err)
1323		dev_err(&pdev->dev, "Error %d getting interrupt\n",
1324			err);
1325
1326request_done:
1327	return err;
1328}
1329
1330static void igb_free_irq(struct igb_adapter *adapter)
1331{
1332	if (adapter->msix_entries) {
1333		int vector = 0, i;
1334
1335		free_irq(adapter->msix_entries[vector++].vector, adapter);
1336
1337		for (i = 0; i < adapter->num_q_vectors; i++)
1338			free_irq(adapter->msix_entries[vector++].vector,
1339				 adapter->q_vector[i]);
1340	} else {
1341		free_irq(adapter->pdev->irq, adapter);
1342	}
1343}
1344
1345/**
1346 * igb_irq_disable - Mask off interrupt generation on the NIC
1347 * @adapter: board private structure
1348 **/
1349static void igb_irq_disable(struct igb_adapter *adapter)
1350{
1351	struct e1000_hw *hw = &adapter->hw;
1352
1353	/*
1354	 * we need to be careful when disabling interrupts.  The VFs are also
1355	 * mapped into these registers and so clearing the bits can cause
1356	 * issues on the VF drivers so we only need to clear what we set
1357	 */
1358	if (adapter->msix_entries) {
1359		u32 regval = rd32(E1000_EIAM);
1360		wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
1361		wr32(E1000_EIMC, adapter->eims_enable_mask);
1362		regval = rd32(E1000_EIAC);
1363		wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
1364	}
1365
1366	wr32(E1000_IAM, 0);
1367	wr32(E1000_IMC, ~0);
1368	wrfl();
1369	if (adapter->msix_entries) {
1370		int i;
1371		for (i = 0; i < adapter->num_q_vectors; i++)
1372			synchronize_irq(adapter->msix_entries[i].vector);
1373	} else {
1374		synchronize_irq(adapter->pdev->irq);
1375	}
1376}
1377
1378/**
1379 * igb_irq_enable - Enable default interrupt generation settings
1380 * @adapter: board private structure
1381 **/
1382static void igb_irq_enable(struct igb_adapter *adapter)
1383{
1384	struct e1000_hw *hw = &adapter->hw;
1385
1386	if (adapter->msix_entries) {
1387		u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC | E1000_IMS_DRSTA;
1388		u32 regval = rd32(E1000_EIAC);
1389		wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
1390		regval = rd32(E1000_EIAM);
1391		wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
1392		wr32(E1000_EIMS, adapter->eims_enable_mask);
1393		if (adapter->vfs_allocated_count) {
1394			wr32(E1000_MBVFIMR, 0xFF);
1395			ims |= E1000_IMS_VMMB;
1396		}
1397		wr32(E1000_IMS, ims);
1398	} else {
1399		wr32(E1000_IMS, IMS_ENABLE_MASK |
1400				E1000_IMS_DRSTA);
1401		wr32(E1000_IAM, IMS_ENABLE_MASK |
1402				E1000_IMS_DRSTA);
1403	}
1404}
1405
1406static void igb_update_mng_vlan(struct igb_adapter *adapter)
1407{
1408	struct e1000_hw *hw = &adapter->hw;
1409	u16 vid = adapter->hw.mng_cookie.vlan_id;
1410	u16 old_vid = adapter->mng_vlan_id;
1411
1412	if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1413		/* add VID to filter table */
1414		igb_vfta_set(hw, vid, true);
1415		adapter->mng_vlan_id = vid;
1416	} else {
1417		adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1418	}
1419
1420	if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1421	    (vid != old_vid) &&
1422	    !test_bit(old_vid, adapter->active_vlans)) {
1423		/* remove VID from filter table */
1424		igb_vfta_set(hw, old_vid, false);
1425	}
1426}
1427
1428/**
1429 * igb_release_hw_control - release control of the h/w to f/w
1430 * @adapter: address of board private structure
1431 *
1432 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1433 * For ASF and Pass Through versions of f/w this means that the
1434 * driver is no longer loaded.
1435 *
1436 **/
1437static void igb_release_hw_control(struct igb_adapter *adapter)
1438{
1439	struct e1000_hw *hw = &adapter->hw;
1440	u32 ctrl_ext;
1441
1442	/* Let firmware take over control of h/w */
1443	ctrl_ext = rd32(E1000_CTRL_EXT);
1444	wr32(E1000_CTRL_EXT,
1445			ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1446}
1447
1448/**
1449 * igb_get_hw_control - get control of the h/w from f/w
1450 * @adapter: address of board private structure
1451 *
1452 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1453 * For ASF and Pass Through versions of f/w this means that
1454 * the driver is loaded.
1455 *
1456 **/
1457static void igb_get_hw_control(struct igb_adapter *adapter)
1458{
1459	struct e1000_hw *hw = &adapter->hw;
1460	u32 ctrl_ext;
1461
1462	/* Let firmware know the driver has taken over */
1463	ctrl_ext = rd32(E1000_CTRL_EXT);
1464	wr32(E1000_CTRL_EXT,
1465			ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1466}
1467
1468/**
1469 * igb_configure - configure the hardware for RX and TX
1470 * @adapter: private board structure
1471 **/
1472static void igb_configure(struct igb_adapter *adapter)
1473{
1474	struct net_device *netdev = adapter->netdev;
1475	int i;
1476
1477	igb_get_hw_control(adapter);
1478	igb_set_rx_mode(netdev);
1479
1480	igb_restore_vlan(adapter);
1481
1482	igb_setup_tctl(adapter);
1483	igb_setup_mrqc(adapter);
1484	igb_setup_rctl(adapter);
1485
1486	igb_configure_tx(adapter);
1487	igb_configure_rx(adapter);
1488
1489	igb_rx_fifo_flush_82575(&adapter->hw);
1490
1491	/* call igb_desc_unused which always leaves
1492	 * at least 1 descriptor unused to make sure
1493	 * next_to_use != next_to_clean */
1494	for (i = 0; i < adapter->num_rx_queues; i++) {
1495		struct igb_ring *ring = adapter->rx_ring[i];
1496		igb_alloc_rx_buffers(ring, igb_desc_unused(ring));
1497	}
1498}
1499
1500/**
1501 * igb_power_up_link - Power up the phy/serdes link
1502 * @adapter: address of board private structure
1503 **/
1504void igb_power_up_link(struct igb_adapter *adapter)
1505{
1506	if (adapter->hw.phy.media_type == e1000_media_type_copper)
1507		igb_power_up_phy_copper(&adapter->hw);
1508	else
1509		igb_power_up_serdes_link_82575(&adapter->hw);
1510	igb_reset_phy(&adapter->hw);
1511}
1512
1513/**
1514 * igb_power_down_link - Power down the phy/serdes link
1515 * @adapter: address of board private structure
1516 */
1517static void igb_power_down_link(struct igb_adapter *adapter)
1518{
1519	if (adapter->hw.phy.media_type == e1000_media_type_copper)
1520		igb_power_down_phy_copper_82575(&adapter->hw);
1521	else
1522		igb_shutdown_serdes_link_82575(&adapter->hw);
1523}
1524
1525/**
1526 * igb_up - Open the interface and prepare it to handle traffic
1527 * @adapter: board private structure
1528 **/
1529int igb_up(struct igb_adapter *adapter)
1530{
1531	struct e1000_hw *hw = &adapter->hw;
1532	int i;
1533
1534	/* hardware has been reset, we need to reload some things */
1535	igb_configure(adapter);
1536
1537	clear_bit(__IGB_DOWN, &adapter->state);
1538
1539	for (i = 0; i < adapter->num_q_vectors; i++)
1540		napi_enable(&(adapter->q_vector[i]->napi));
1541
1542	if (adapter->msix_entries)
1543		igb_configure_msix(adapter);
1544	else
1545		igb_assign_vector(adapter->q_vector[0], 0);
1546
1547	/* Clear any pending interrupts. */
1548	rd32(E1000_ICR);
1549	igb_irq_enable(adapter);
1550
1551	/* notify VFs that reset has been completed */
1552	if (adapter->vfs_allocated_count) {
1553		u32 reg_data = rd32(E1000_CTRL_EXT);
1554		reg_data |= E1000_CTRL_EXT_PFRSTD;
1555		wr32(E1000_CTRL_EXT, reg_data);
1556	}
1557
1558	netif_tx_start_all_queues(adapter->netdev);
1559
1560	/* start the watchdog. */
1561	hw->mac.get_link_status = 1;
1562	schedule_work(&adapter->watchdog_task);
1563
1564	return 0;
1565}
1566
1567void igb_down(struct igb_adapter *adapter)
1568{
1569	struct net_device *netdev = adapter->netdev;
1570	struct e1000_hw *hw = &adapter->hw;
1571	u32 tctl, rctl;
1572	int i;
1573
1574	/* signal that we're down so the interrupt handler does not
1575	 * reschedule our watchdog timer */
1576	set_bit(__IGB_DOWN, &adapter->state);
1577
1578	/* disable receives in the hardware */
1579	rctl = rd32(E1000_RCTL);
1580	wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1581	/* flush and sleep below */
1582
1583	netif_tx_stop_all_queues(netdev);
1584
1585	/* disable transmits in the hardware */
1586	tctl = rd32(E1000_TCTL);
1587	tctl &= ~E1000_TCTL_EN;
1588	wr32(E1000_TCTL, tctl);
1589	/* flush both disables and wait for them to finish */
1590	wrfl();
1591	msleep(10);
1592
1593	for (i = 0; i < adapter->num_q_vectors; i++)
1594		napi_disable(&(adapter->q_vector[i]->napi));
1595
1596	igb_irq_disable(adapter);
1597
1598	del_timer_sync(&adapter->watchdog_timer);
1599	del_timer_sync(&adapter->phy_info_timer);
1600
1601	netif_carrier_off(netdev);
1602
1603	/* record the stats before reset*/
1604	spin_lock(&adapter->stats64_lock);
1605	igb_update_stats(adapter, &adapter->stats64);
1606	spin_unlock(&adapter->stats64_lock);
1607
1608	adapter->link_speed = 0;
1609	adapter->link_duplex = 0;
1610
1611	if (!pci_channel_offline(adapter->pdev))
1612		igb_reset(adapter);
1613	igb_clean_all_tx_rings(adapter);
1614	igb_clean_all_rx_rings(adapter);
1615#ifdef CONFIG_IGB_DCA
1616
1617	/* since we reset the hardware DCA settings were cleared */
1618	igb_setup_dca(adapter);
1619#endif
1620}
1621
1622void igb_reinit_locked(struct igb_adapter *adapter)
1623{
1624	WARN_ON(in_interrupt());
1625	while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1626		msleep(1);
1627	igb_down(adapter);
1628	igb_up(adapter);
1629	clear_bit(__IGB_RESETTING, &adapter->state);
1630}
1631
1632void igb_reset(struct igb_adapter *adapter)
1633{
1634	struct pci_dev *pdev = adapter->pdev;
1635	struct e1000_hw *hw = &adapter->hw;
1636	struct e1000_mac_info *mac = &hw->mac;
1637	struct e1000_fc_info *fc = &hw->fc;
1638	u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1639	u16 hwm;
1640
1641	/* Repartition Pba for greater than 9k mtu
1642	 * To take effect CTRL.RST is required.
1643	 */
1644	switch (mac->type) {
1645	case e1000_i350:
1646	case e1000_82580:
1647		pba = rd32(E1000_RXPBS);
1648		pba = igb_rxpbs_adjust_82580(pba);
1649		break;
1650	case e1000_82576:
1651		pba = rd32(E1000_RXPBS);
1652		pba &= E1000_RXPBS_SIZE_MASK_82576;
1653		break;
1654	case e1000_82575:
1655	default:
1656		pba = E1000_PBA_34K;
1657		break;
1658	}
1659
1660	if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1661	    (mac->type < e1000_82576)) {
1662		/* adjust PBA for jumbo frames */
1663		wr32(E1000_PBA, pba);
1664
1665		/* To maintain wire speed transmits, the Tx FIFO should be
1666		 * large enough to accommodate two full transmit packets,
1667		 * rounded up to the next 1KB and expressed in KB.  Likewise,
1668		 * the Rx FIFO should be large enough to accommodate at least
1669		 * one full receive packet and is similarly rounded up and
1670		 * expressed in KB. */
1671		pba = rd32(E1000_PBA);
1672		/* upper 16 bits has Tx packet buffer allocation size in KB */
1673		tx_space = pba >> 16;
1674		/* lower 16 bits has Rx packet buffer allocation size in KB */
1675		pba &= 0xffff;
1676		/* the tx fifo also stores 16 bytes of information about the tx
1677		 * but don't include ethernet FCS because hardware appends it */
1678		min_tx_space = (adapter->max_frame_size +
1679				sizeof(union e1000_adv_tx_desc) -
1680				ETH_FCS_LEN) * 2;
1681		min_tx_space = ALIGN(min_tx_space, 1024);
1682		min_tx_space >>= 10;
1683		/* software strips receive CRC, so leave room for it */
1684		min_rx_space = adapter->max_frame_size;
1685		min_rx_space = ALIGN(min_rx_space, 1024);
1686		min_rx_space >>= 10;
1687
1688		/* If current Tx allocation is less than the min Tx FIFO size,
1689		 * and the min Tx FIFO size is less than the current Rx FIFO
1690		 * allocation, take space away from current Rx allocation */
1691		if (tx_space < min_tx_space &&
1692		    ((min_tx_space - tx_space) < pba)) {
1693			pba = pba - (min_tx_space - tx_space);
1694
1695			/* if short on rx space, rx wins and must trump tx
1696			 * adjustment */
1697			if (pba < min_rx_space)
1698				pba = min_rx_space;
1699		}
1700		wr32(E1000_PBA, pba);
1701	}
1702
1703	/* flow control settings */
1704	/* The high water mark must be low enough to fit one full frame
1705	 * (or the size used for early receive) above it in the Rx FIFO.
1706	 * Set it to the lower of:
1707	 * - 90% of the Rx FIFO size, or
1708	 * - the full Rx FIFO size minus one full frame */
1709	hwm = min(((pba << 10) * 9 / 10),
1710			((pba << 10) - 2 * adapter->max_frame_size));
1711
1712	fc->high_water = hwm & 0xFFF0;	/* 16-byte granularity */
1713	fc->low_water = fc->high_water - 16;
1714	fc->pause_time = 0xFFFF;
1715	fc->send_xon = 1;
1716	fc->current_mode = fc->requested_mode;
1717
1718	/* disable receive for all VFs and wait one second */
1719	if (adapter->vfs_allocated_count) {
1720		int i;
1721		for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1722			adapter->vf_data[i].flags &= IGB_VF_FLAG_PF_SET_MAC;
1723
1724		/* ping all the active vfs to let them know we are going down */
1725		igb_ping_all_vfs(adapter);
1726
1727		/* disable transmits and receives */
1728		wr32(E1000_VFRE, 0);
1729		wr32(E1000_VFTE, 0);
1730	}
1731
1732	/* Allow time for pending master requests to run */
1733	hw->mac.ops.reset_hw(hw);
1734	wr32(E1000_WUC, 0);
1735
1736	if (hw->mac.ops.init_hw(hw))
1737		dev_err(&pdev->dev, "Hardware Error\n");
1738
1739	igb_init_dmac(adapter, pba);
1740	if (!netif_running(adapter->netdev))
1741		igb_power_down_link(adapter);
1742
1743	igb_update_mng_vlan(adapter);
1744
1745	/* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1746	wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1747
1748	igb_get_phy_info(hw);
1749}
1750
1751static netdev_features_t igb_fix_features(struct net_device *netdev,
1752	netdev_features_t features)
1753{
1754	/*
1755	 * Since there is no support for separate rx/tx vlan accel
1756	 * enable/disable make sure tx flag is always in same state as rx.
1757	 */
1758	if (features & NETIF_F_HW_VLAN_RX)
1759		features |= NETIF_F_HW_VLAN_TX;
1760	else
1761		features &= ~NETIF_F_HW_VLAN_TX;
1762
1763	return features;
1764}
1765
1766static int igb_set_features(struct net_device *netdev,
1767	netdev_features_t features)
1768{
1769	netdev_features_t changed = netdev->features ^ features;
1770
1771	if (changed & NETIF_F_HW_VLAN_RX)
1772		igb_vlan_mode(netdev, features);
1773
1774	return 0;
1775}
1776
1777static const struct net_device_ops igb_netdev_ops = {
1778	.ndo_open		= igb_open,
1779	.ndo_stop		= igb_close,
1780	.ndo_start_xmit		= igb_xmit_frame,
1781	.ndo_get_stats64	= igb_get_stats64,
1782	.ndo_set_rx_mode	= igb_set_rx_mode,
1783	.ndo_set_mac_address	= igb_set_mac,
1784	.ndo_change_mtu		= igb_change_mtu,
1785	.ndo_do_ioctl		= igb_ioctl,
1786	.ndo_tx_timeout		= igb_tx_timeout,
1787	.ndo_validate_addr	= eth_validate_addr,
1788	.ndo_vlan_rx_add_vid	= igb_vlan_rx_add_vid,
1789	.ndo_vlan_rx_kill_vid	= igb_vlan_rx_kill_vid,
1790	.ndo_set_vf_mac		= igb_ndo_set_vf_mac,
1791	.ndo_set_vf_vlan	= igb_ndo_set_vf_vlan,
1792	.ndo_set_vf_tx_rate	= igb_ndo_set_vf_bw,
1793	.ndo_get_vf_config	= igb_ndo_get_vf_config,
1794#ifdef CONFIG_NET_POLL_CONTROLLER
1795	.ndo_poll_controller	= igb_netpoll,
1796#endif
1797	.ndo_fix_features	= igb_fix_features,
1798	.ndo_set_features	= igb_set_features,
1799};
1800
1801/**
1802 * igb_probe - Device Initialization Routine
1803 * @pdev: PCI device information struct
1804 * @ent: entry in igb_pci_tbl
1805 *
1806 * Returns 0 on success, negative on failure
1807 *
1808 * igb_probe initializes an adapter identified by a pci_dev structure.
1809 * The OS initialization, configuring of the adapter private structure,
1810 * and a hardware reset occur.
1811 **/
1812static int __devinit igb_probe(struct pci_dev *pdev,
1813			       const struct pci_device_id *ent)
1814{
1815	struct net_device *netdev;
1816	struct igb_adapter *adapter;
1817	struct e1000_hw *hw;
1818	u16 eeprom_data = 0;
1819	s32 ret_val;
1820	static int global_quad_port_a; /* global quad port a indication */
1821	const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1822	unsigned long mmio_start, mmio_len;
1823	int err, pci_using_dac;
1824	u16 eeprom_apme_mask = IGB_EEPROM_APME;
1825	u8 part_str[E1000_PBANUM_LENGTH];
1826
1827	/* Catch broken hardware that put the wrong VF device ID in
1828	 * the PCIe SR-IOV capability.
1829	 */
1830	if (pdev->is_virtfn) {
1831		WARN(1, KERN_ERR "%s (%hx:%hx) should not be a VF!\n",
1832		     pci_name(pdev), pdev->vendor, pdev->device);
1833		return -EINVAL;
1834	}
1835
1836	err = pci_enable_device_mem(pdev);
1837	if (err)
1838		return err;
1839
1840	pci_using_dac = 0;
1841	err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
1842	if (!err) {
1843		err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
1844		if (!err)
1845			pci_using_dac = 1;
1846	} else {
1847		err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
1848		if (err) {
1849			err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
1850			if (err) {
1851				dev_err(&pdev->dev, "No usable DMA "
1852					"configuration, aborting\n");
1853				goto err_dma;
1854			}
1855		}
1856	}
1857
1858	err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1859	                                   IORESOURCE_MEM),
1860	                                   igb_driver_name);
1861	if (err)
1862		goto err_pci_reg;
1863
1864	pci_enable_pcie_error_reporting(pdev);
1865
1866	pci_set_master(pdev);
1867	pci_save_state(pdev);
1868
1869	err = -ENOMEM;
1870	netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1871				   IGB_MAX_TX_QUEUES);
1872	if (!netdev)
1873		goto err_alloc_etherdev;
1874
1875	SET_NETDEV_DEV(netdev, &pdev->dev);
1876
1877	pci_set_drvdata(pdev, netdev);
1878	adapter = netdev_priv(netdev);
1879	adapter->netdev = netdev;
1880	adapter->pdev = pdev;
1881	hw = &adapter->hw;
1882	hw->back = adapter;
1883	adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE;
1884
1885	mmio_start = pci_resource_start(pdev, 0);
1886	mmio_len = pci_resource_len(pdev, 0);
1887
1888	err = -EIO;
1889	hw->hw_addr = ioremap(mmio_start, mmio_len);
1890	if (!hw->hw_addr)
1891		goto err_ioremap;
1892
1893	netdev->netdev_ops = &igb_netdev_ops;
1894	igb_set_ethtool_ops(netdev);
1895	netdev->watchdog_timeo = 5 * HZ;
1896
1897	strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1898
1899	netdev->mem_start = mmio_start;
1900	netdev->mem_end = mmio_start + mmio_len;
1901
1902	/* PCI config space info */
1903	hw->vendor_id = pdev->vendor;
1904	hw->device_id = pdev->device;
1905	hw->revision_id = pdev->revision;
1906	hw->subsystem_vendor_id = pdev->subsystem_vendor;
1907	hw->subsystem_device_id = pdev->subsystem_device;
1908
1909	/* Copy the default MAC, PHY and NVM function pointers */
1910	memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1911	memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1912	memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1913	/* Initialize skew-specific constants */
1914	err = ei->get_invariants(hw);
1915	if (err)
1916		goto err_sw_init;
1917
1918	/* setup the private structure */
1919	err = igb_sw_init(adapter);
1920	if (err)
1921		goto err_sw_init;
1922
1923	igb_get_bus_info_pcie(hw);
1924
1925	hw->phy.autoneg_wait_to_complete = false;
1926
1927	/* Copper options */
1928	if (hw->phy.media_type == e1000_media_type_copper) {
1929		hw->phy.mdix = AUTO_ALL_MODES;
1930		hw->phy.disable_polarity_correction = false;
1931		hw->phy.ms_type = e1000_ms_hw_default;
1932	}
1933
1934	if (igb_check_reset_block(hw))
1935		dev_info(&pdev->dev,
1936			"PHY reset is blocked due to SOL/IDER session.\n");
1937
1938	/*
1939	 * features is initialized to 0 in allocation, it might have bits
1940	 * set by igb_sw_init so we should use an or instead of an
1941	 * assignment.
1942	 */
1943	netdev->features |= NETIF_F_SG |
1944			    NETIF_F_IP_CSUM |
1945			    NETIF_F_IPV6_CSUM |
1946			    NETIF_F_TSO |
1947			    NETIF_F_TSO6 |
1948			    NETIF_F_RXHASH |
1949			    NETIF_F_RXCSUM |
1950			    NETIF_F_HW_VLAN_RX |
1951			    NETIF_F_HW_VLAN_TX;
1952
1953	/* copy netdev features into list of user selectable features */
1954	netdev->hw_features |= netdev->features;
1955
1956	/* set this bit last since it cannot be part of hw_features */
1957	netdev->features |= NETIF_F_HW_VLAN_FILTER;
1958
1959	netdev->vlan_features |= NETIF_F_TSO |
1960				 NETIF_F_TSO6 |
1961				 NETIF_F_IP_CSUM |
1962				 NETIF_F_IPV6_CSUM |
1963				 NETIF_F_SG;
1964
1965	if (pci_using_dac) {
1966		netdev->features |= NETIF_F_HIGHDMA;
1967		netdev->vlan_features |= NETIF_F_HIGHDMA;
1968	}
1969
1970	if (hw->mac.type >= e1000_82576) {
1971		netdev->hw_features |= NETIF_F_SCTP_CSUM;
1972		netdev->features |= NETIF_F_SCTP_CSUM;
1973	}
1974
1975	netdev->priv_flags |= IFF_UNICAST_FLT;
1976
1977	adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
1978
1979	/* before reading the NVM, reset the controller to put the device in a
1980	 * known good starting state */
1981	hw->mac.ops.reset_hw(hw);
1982
1983	/* make sure the NVM is good */
1984	if (hw->nvm.ops.validate(hw) < 0) {
1985		dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
1986		err = -EIO;
1987		goto err_eeprom;
1988	}
1989
1990	/* copy the MAC address out of the NVM */
1991	if (hw->mac.ops.read_mac_addr(hw))
1992		dev_err(&pdev->dev, "NVM Read Error\n");
1993
1994	memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
1995	memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
1996
1997	if (!is_valid_ether_addr(netdev->perm_addr)) {
1998		dev_err(&pdev->dev, "Invalid MAC Address\n");
1999		err = -EIO;
2000		goto err_eeprom;
2001	}
2002
2003	setup_timer(&adapter->watchdog_timer, igb_watchdog,
2004	            (unsigned long) adapter);
2005	setup_timer(&adapter->phy_info_timer, igb_update_phy_info,
2006	            (unsigned long) adapter);
2007
2008	INIT_WORK(&adapter->reset_task, igb_reset_task);
2009	INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
2010
2011	/* Initialize link properties that are user-changeable */
2012	adapter->fc_autoneg = true;
2013	hw->mac.autoneg = true;
2014	hw->phy.autoneg_advertised = 0x2f;
2015
2016	hw->fc.requested_mode = e1000_fc_default;
2017	hw->fc.current_mode = e1000_fc_default;
2018
2019	igb_validate_mdi_setting(hw);
2020
2021	/* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
2022	 * enable the ACPI Magic Packet filter
2023	 */
2024
2025	if (hw->bus.func == 0)
2026		hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
2027	else if (hw->mac.type >= e1000_82580)
2028		hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
2029		                 NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
2030		                 &eeprom_data);
2031	else if (hw->bus.func == 1)
2032		hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
2033
2034	if (eeprom_data & eeprom_apme_mask)
2035		adapter->eeprom_wol |= E1000_WUFC_MAG;
2036
2037	/* now that we have the eeprom settings, apply the special cases where
2038	 * the eeprom may be wrong or the board simply won't support wake on
2039	 * lan on a particular port */
2040	switch (pdev->device) {
2041	case E1000_DEV_ID_82575GB_QUAD_COPPER:
2042		adapter->eeprom_wol = 0;
2043		break;
2044	case E1000_DEV_ID_82575EB_FIBER_SERDES:
2045	case E1000_DEV_ID_82576_FIBER:
2046	case E1000_DEV_ID_82576_SERDES:
2047		/* Wake events only supported on port A for dual fiber
2048		 * regardless of eeprom setting */
2049		if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
2050			adapter->eeprom_wol = 0;
2051		break;
2052	case E1000_DEV_ID_82576_QUAD_COPPER:
2053	case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
2054		/* if quad port adapter, disable WoL on all but port A */
2055		if (global_quad_port_a != 0)
2056			adapter->eeprom_wol = 0;
2057		else
2058			adapter->flags |= IGB_FLAG_QUAD_PORT_A;
2059		/* Reset for multiple quad port adapters */
2060		if (++global_quad_port_a == 4)
2061			global_quad_port_a = 0;
2062		break;
2063	}
2064
2065	/* initialize the wol settings based on the eeprom settings */
2066	adapter->wol = adapter->eeprom_wol;
2067	device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
2068
2069	/* reset the hardware with the new settings */
2070	igb_reset(adapter);
2071
2072	/* let the f/w know that the h/w is now under the control of the
2073	 * driver. */
2074	igb_get_hw_control(adapter);
2075
2076	strcpy(netdev->name, "eth%d");
2077	err = register_netdev(netdev);
2078	if (err)
2079		goto err_register;
2080
2081	/* carrier off reporting is important to ethtool even BEFORE open */
2082	netif_carrier_off(netdev);
2083
2084#ifdef CONFIG_IGB_DCA
2085	if (dca_add_requester(&pdev->dev) == 0) {
2086		adapter->flags |= IGB_FLAG_DCA_ENABLED;
2087		dev_info(&pdev->dev, "DCA enabled\n");
2088		igb_setup_dca(adapter);
2089	}
2090
2091#endif
2092	/* do hw tstamp init after resetting */
2093	igb_init_hw_timer(adapter);
2094
2095	dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
2096	/* print bus type/speed/width info */
2097	dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
2098		 netdev->name,
2099		 ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
2100		  (hw->bus.speed == e1000_bus_speed_5000) ? "5.0Gb/s" :
2101		                                            "unknown"),
2102		 ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
2103		  (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
2104		  (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
2105		   "unknown"),
2106		 netdev->dev_addr);
2107
2108	ret_val = igb_read_part_string(hw, part_str, E1000_PBANUM_LENGTH);
2109	if (ret_val)
2110		strcpy(part_str, "Unknown");
2111	dev_info(&pdev->dev, "%s: PBA No: %s\n", netdev->name, part_str);
2112	dev_info(&pdev->dev,
2113		"Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
2114		adapter->msix_entries ? "MSI-X" :
2115		(adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
2116		adapter->num_rx_queues, adapter->num_tx_queues);
2117	switch (hw->mac.type) {
2118	case e1000_i350:
2119		igb_set_eee_i350(hw);
2120		break;
2121	default:
2122		break;
2123	}
2124
2125	pm_runtime_put_noidle(&pdev->dev);
2126	return 0;
2127
2128err_register:
2129	igb_release_hw_control(adapter);
2130err_eeprom:
2131	if (!igb_check_reset_block(hw))
2132		igb_reset_phy(hw);
2133
2134	if (hw->flash_address)
2135		iounmap(hw->flash_address);
2136err_sw_init:
2137	igb_clear_interrupt_scheme(adapter);
2138	iounmap(hw->hw_addr);
2139err_ioremap:
2140	free_netdev(netdev);
2141err_alloc_etherdev:
2142	pci_release_selected_regions(pdev,
2143	                             pci_select_bars(pdev, IORESOURCE_MEM));
2144err_pci_reg:
2145err_dma:
2146	pci_disable_device(pdev);
2147	return err;
2148}
2149
2150/**
2151 * igb_remove - Device Removal Routine
2152 * @pdev: PCI device information struct
2153 *
2154 * igb_remove is called by the PCI subsystem to alert the driver
2155 * that it should release a PCI device.  The could be caused by a
2156 * Hot-Plug event, or because the driver is going to be removed from
2157 * memory.
2158 **/
2159static void __devexit igb_remove(struct pci_dev *pdev)
2160{
2161	struct net_device *netdev = pci_get_drvdata(pdev);
2162	struct igb_adapter *adapter = netdev_priv(netdev);
2163	struct e1000_hw *hw = &adapter->hw;
2164
2165	pm_runtime_get_noresume(&pdev->dev);
2166
2167	/*
2168	 * The watchdog timer may be rescheduled, so explicitly
2169	 * disable watchdog from being rescheduled.
2170	 */
2171	set_bit(__IGB_DOWN, &adapter->state);
2172	del_timer_sync(&adapter->watchdog_timer);
2173	del_timer_sync(&adapter->phy_info_timer);
2174
2175	cancel_work_sync(&adapter->reset_task);
2176	cancel_work_sync(&adapter->watchdog_task);
2177
2178#ifdef CONFIG_IGB_DCA
2179	if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
2180		dev_info(&pdev->dev, "DCA disabled\n");
2181		dca_remove_requester(&pdev->dev);
2182		adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
2183		wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
2184	}
2185#endif
2186
2187	/* Release control of h/w to f/w.  If f/w is AMT enabled, this
2188	 * would have already happened in close and is redundant. */
2189	igb_release_hw_control(adapter);
2190
2191	unregister_netdev(netdev);
2192
2193	igb_clear_interrupt_scheme(adapter);
2194
2195#ifdef CONFIG_PCI_IOV
2196	/* reclaim resources allocated to VFs */
2197	if (adapter->vf_data) {
2198		/* disable iov and allow time for transactions to clear */
2199		if (!igb_check_vf_assignment(adapter)) {
2200			pci_disable_sriov(pdev);
2201			msleep(500);
2202		} else {
2203			dev_info(&pdev->dev, "VF(s) assigned to guests!\n");
2204		}
2205
2206		kfree(adapter->vf_data);
2207		adapter->vf_data = NULL;
2208		wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
2209		wrfl();
2210		msleep(100);
2211		dev_info(&pdev->dev, "IOV Disabled\n");
2212	}
2213#endif
2214
2215	iounmap(hw->hw_addr);
2216	if (hw->flash_address)
2217		iounmap(hw->flash_address);
2218	pci_release_selected_regions(pdev,
2219	                             pci_select_bars(pdev, IORESOURCE_MEM));
2220
2221	kfree(adapter->shadow_vfta);
2222	free_netdev(netdev);
2223
2224	pci_disable_pcie_error_reporting(pdev);
2225
2226	pci_disable_device(pdev);
2227}
2228
2229/**
2230 * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
2231 * @adapter: board private structure to initialize
2232 *
2233 * This function initializes the vf specific data storage and then attempts to
2234 * allocate the VFs.  The reason for ordering it this way is because it is much
2235 * mor expensive time wise to disable SR-IOV than it is to allocate and free
2236 * the memory for the VFs.
2237 **/
2238static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
2239{
2240#ifdef CONFIG_PCI_IOV
2241	struct pci_dev *pdev = adapter->pdev;
2242	int old_vfs = igb_find_enabled_vfs(adapter);
2243	int i;
2244
2245	if (old_vfs) {
2246		dev_info(&pdev->dev, "%d pre-allocated VFs found - override "
2247			 "max_vfs setting of %d\n", old_vfs, max_vfs);
2248		adapter->vfs_allocated_count = old_vfs;
2249	}
2250
2251	if (!adapter->vfs_allocated_count)
2252		return;
2253
2254	adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
2255				sizeof(struct vf_data_storage), GFP_KERNEL);
2256	/* if allocation failed then we do not support SR-IOV */
2257	if (!adapter->vf_data) {
2258		adapter->vfs_allocated_count = 0;
2259		dev_err(&pdev->dev, "Unable to allocate memory for VF "
2260			"Data Storage\n");
2261		goto out;
2262	}
2263
2264	if (!old_vfs) {
2265		if (pci_enable_sriov(pdev, adapter->vfs_allocated_count))
2266			goto err_out;
2267	}
2268	dev_info(&pdev->dev, "%d VFs allocated\n",
2269		 adapter->vfs_allocated_count);
2270	for (i = 0; i < adapter->vfs_allocated_count; i++)
2271		igb_vf_configure(adapter, i);
2272
2273	/* DMA Coalescing is not supported in IOV mode. */
2274	adapter->flags &= ~IGB_FLAG_DMAC;
2275	goto out;
2276err_out:
2277	kfree(adapter->vf_data);
2278	adapter->vf_data = NULL;
2279	adapter->vfs_allocated_count = 0;
2280out:
2281	return;
2282#endif /* CONFIG_PCI_IOV */
2283}
2284
2285/**
2286 * igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp
2287 * @adapter: board private structure to initialize
2288 *
2289 * igb_init_hw_timer initializes the function pointer and values for the hw
2290 * timer found in hardware.
2291 **/
2292static void igb_init_hw_timer(struct igb_adapter *adapter)
2293{
2294	struct e1000_hw *hw = &adapter->hw;
2295
2296	switch (hw->mac.type) {
2297	case e1000_i350:
2298	case e1000_82580:
2299		memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2300		adapter->cycles.read = igb_read_clock;
2301		adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2302		adapter->cycles.mult = 1;
2303		/*
2304		 * The 82580 timesync updates the system timer every 8ns by 8ns
2305		 * and the value cannot be shifted.  Instead we need to shift
2306		 * the registers to generate a 64bit timer value.  As a result
2307		 * SYSTIMR/L/H, TXSTMPL/H, RXSTMPL/H all have to be shifted by
2308		 * 24 in order to generate a larger value for synchronization.
2309		 */
2310		adapter->cycles.shift = IGB_82580_TSYNC_SHIFT;
2311		/* disable system timer temporarily by setting bit 31 */
2312		wr32(E1000_TSAUXC, 0x80000000);
2313		wrfl();
2314
2315		/* Set registers so that rollover occurs soon to test this. */
2316		wr32(E1000_SYSTIMR, 0x00000000);
2317		wr32(E1000_SYSTIML, 0x80000000);
2318		wr32(E1000_SYSTIMH, 0x000000FF);
2319		wrfl();
2320
2321		/* enable system timer by clearing bit 31 */
2322		wr32(E1000_TSAUXC, 0x0);
2323		wrfl();
2324
2325		timecounter_init(&adapter->clock,
2326				 &adapter->cycles,
2327				 ktime_to_ns(ktime_get_real()));
2328		/*
2329		 * Synchronize our NIC clock against system wall clock. NIC
2330		 * time stamp reading requires ~3us per sample, each sample
2331		 * was pretty stable even under load => only require 10
2332		 * samples for each offset comparison.
2333		 */
2334		memset(&adapter->compare, 0, sizeof(adapter->compare));
2335		adapter->compare.source = &adapter->clock;
2336		adapter->compare.target = ktime_get_real;
2337		adapter->compare.num_samples = 10;
2338		timecompare_update(&adapter->compare, 0);
2339		break;
2340	case e1000_82576:
2341		/*
2342		 * Initialize hardware timer: we keep it running just in case
2343		 * that some program needs it later on.
2344		 */
2345		memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2346		adapter->cycles.read = igb_read_clock;
2347		adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2348		adapter->cycles.mult = 1;
2349		/**
2350		 * Scale the NIC clock cycle by a large factor so that
2351		 * relatively small clock corrections can be added or
2352		 * subtracted at each clock tick. The drawbacks of a large
2353		 * factor are a) that the clock register overflows more quickly
2354		 * (not such a big deal) and b) that the increment per tick has
2355		 * to fit into 24 bits.  As a result we need to use a shift of
2356		 * 19 so we can fit a value of 16 into the TIMINCA register.
2357		 */
2358		adapter->cycles.shift = IGB_82576_TSYNC_SHIFT;
2359		wr32(E1000_TIMINCA,
2360		                (1 << E1000_TIMINCA_16NS_SHIFT) |
2361		                (16 << IGB_82576_TSYNC_SHIFT));
2362
2363		/* Set registers so that rollover occurs soon to test this. */
2364		wr32(E1000_SYSTIML, 0x00000000);
2365		wr32(E1000_SYSTIMH, 0xFF800000);
2366		wrfl();
2367
2368		timecounter_init(&adapter->clock,
2369				 &adapter->cycles,
2370				 ktime_to_ns(ktime_get_real()));
2371		/*
2372		 * Synchronize our NIC clock against system wall clock. NIC
2373		 * time stamp reading requires ~3us per sample, each sample
2374		 * was pretty stable even under load => only require 10
2375		 * samples for each offset comparison.
2376		 */
2377		memset(&adapter->compare, 0, sizeof(adapter->compare));
2378		adapter->compare.source = &adapter->clock;
2379		adapter->compare.target = ktime_get_real;
2380		adapter->compare.num_samples = 10;
2381		timecompare_update(&adapter->compare, 0);
2382		break;
2383	case e1000_82575:
2384		/* 82575 does not support timesync */
2385	default:
2386		break;
2387	}
2388
2389}
2390
2391/**
2392 * igb_sw_init - Initialize general software structures (struct igb_adapter)
2393 * @adapter: board private structure to initialize
2394 *
2395 * igb_sw_init initializes the Adapter private data structure.
2396 * Fields are initialized based on PCI device information and
2397 * OS network device settings (MTU size).
2398 **/
2399static int __devinit igb_sw_init(struct igb_adapter *adapter)
2400{
2401	struct e1000_hw *hw = &adapter->hw;
2402	struct net_device *netdev = adapter->netdev;
2403	struct pci_dev *pdev = adapter->pdev;
2404
2405	pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
2406
2407	/* set default ring sizes */
2408	adapter->tx_ring_count = IGB_DEFAULT_TXD;
2409	adapter->rx_ring_count = IGB_DEFAULT_RXD;
2410
2411	/* set default ITR values */
2412	adapter->rx_itr_setting = IGB_DEFAULT_ITR;
2413	adapter->tx_itr_setting = IGB_DEFAULT_ITR;
2414
2415	/* set default work limits */
2416	adapter->tx_work_limit = IGB_DEFAULT_TX_WORK;
2417
2418	adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN +
2419				  VLAN_HLEN;
2420	adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
2421
2422	adapter->node = -1;
2423
2424	spin_lock_init(&adapter->stats64_lock);
2425#ifdef CONFIG_PCI_IOV
2426	switch (hw->mac.type) {
2427	case e1000_82576:
2428	case e1000_i350:
2429		if (max_vfs > 7) {
2430			dev_warn(&pdev->dev,
2431				 "Maximum of 7 VFs per PF, using max\n");
2432			adapter->vfs_allocated_count = 7;
2433		} else
2434			adapter->vfs_allocated_count = max_vfs;
2435		break;
2436	default:
2437		break;
2438	}
2439#endif /* CONFIG_PCI_IOV */
2440	adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
2441	/* i350 cannot do RSS and SR-IOV at the same time */
2442	if (hw->mac.type == e1000_i350 && adapter->vfs_allocated_count)
2443		adapter->rss_queues = 1;
2444
2445	/*
2446	 * if rss_queues > 4 or vfs are going to be allocated with rss_queues
2447	 * then we should combine the queues into a queue pair in order to
2448	 * conserve interrupts due to limited supply
2449	 */
2450	if ((adapter->rss_queues > 4) ||
2451	    ((adapter->rss_queues > 1) && (adapter->vfs_allocated_count > 6)))
2452		adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
2453
2454	/* Setup and initialize a copy of the hw vlan table array */
2455	adapter->shadow_vfta = kzalloc(sizeof(u32) *
2456				E1000_VLAN_FILTER_TBL_SIZE,
2457				GFP_ATOMIC);
2458
2459	/* This call may decrease the number of queues */
2460	if (igb_init_interrupt_scheme(adapter)) {
2461		dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
2462		return -ENOMEM;
2463	}
2464
2465	igb_probe_vfs(adapter);
2466
2467	/* Explicitly disable IRQ since the NIC can be in any state. */
2468	igb_irq_disable(adapter);
2469
2470	if (hw->mac.type == e1000_i350)
2471		adapter->flags &= ~IGB_FLAG_DMAC;
2472
2473	set_bit(__IGB_DOWN, &adapter->state);
2474	return 0;
2475}
2476
2477/**
2478 * igb_open - Called when a network interface is made active
2479 * @netdev: network interface device structure
2480 *
2481 * Returns 0 on success, negative value on failure
2482 *
2483 * The open entry point is called when a network interface is made
2484 * active by the system (IFF_UP).  At this point all resources needed
2485 * for transmit and receive operations are allocated, the interrupt
2486 * handler is registered with the OS, the watchdog timer is started,
2487 * and the stack is notified that the interface is ready.
2488 **/
2489static int __igb_open(struct net_device *netdev, bool resuming)
2490{
2491	struct igb_adapter *adapter = netdev_priv(netdev);
2492	struct e1000_hw *hw = &adapter->hw;
2493	struct pci_dev *pdev = adapter->pdev;
2494	int err;
2495	int i;
2496
2497	/* disallow open during test */
2498	if (test_bit(__IGB_TESTING, &adapter->state)) {
2499		WARN_ON(resuming);
2500		return -EBUSY;
2501	}
2502
2503	if (!resuming)
2504		pm_runtime_get_sync(&pdev->dev);
2505
2506	netif_carrier_off(netdev);
2507
2508	/* allocate transmit descriptors */
2509	err = igb_setup_all_tx_resources(adapter);
2510	if (err)
2511		goto err_setup_tx;
2512
2513	/* allocate receive descriptors */
2514	err = igb_setup_all_rx_resources(adapter);
2515	if (err)
2516		goto err_setup_rx;
2517
2518	igb_power_up_link(adapter);
2519
2520	/* before we allocate an interrupt, we must be ready to handle it.
2521	 * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2522	 * as soon as we call pci_request_irq, so we have to setup our
2523	 * clean_rx handler before we do so.  */
2524	igb_configure(adapter);
2525
2526	err = igb_request_irq(adapter);
2527	if (err)
2528		goto err_req_irq;
2529
2530	/* From here on the code is the same as igb_up() */
2531	clear_bit(__IGB_DOWN, &adapter->state);
2532
2533	for (i = 0; i < adapter->num_q_vectors; i++)
2534		napi_enable(&(adapter->q_vector[i]->napi));
2535
2536	/* Clear any pending interrupts. */
2537	rd32(E1000_ICR);
2538
2539	igb_irq_enable(adapter);
2540
2541	/* notify VFs that reset has been completed */
2542	if (adapter->vfs_allocated_count) {
2543		u32 reg_data = rd32(E1000_CTRL_EXT);
2544		reg_data |= E1000_CTRL_EXT_PFRSTD;
2545		wr32(E1000_CTRL_EXT, reg_data);
2546	}
2547
2548	netif_tx_start_all_queues(netdev);
2549
2550	if (!resuming)
2551		pm_runtime_put(&pdev->dev);
2552
2553	/* start the watchdog. */
2554	hw->mac.get_link_status = 1;
2555	schedule_work(&adapter->watchdog_task);
2556
2557	return 0;
2558
2559err_req_irq:
2560	igb_release_hw_control(adapter);
2561	igb_power_down_link(adapter);
2562	igb_free_all_rx_resources(adapter);
2563err_setup_rx:
2564	igb_free_all_tx_resources(adapter);
2565err_setup_tx:
2566	igb_reset(adapter);
2567	if (!resuming)
2568		pm_runtime_put(&pdev->dev);
2569
2570	return err;
2571}
2572
2573static int igb_open(struct net_device *netdev)
2574{
2575	return __igb_open(netdev, false);
2576}
2577
2578/**
2579 * igb_close - Disables a network interface
2580 * @netdev: network interface device structure
2581 *
2582 * Returns 0, this is not allowed to fail
2583 *
2584 * The close entry point is called when an interface is de-activated
2585 * by the OS.  The hardware is still under the driver's control, but
2586 * needs to be disabled.  A global MAC reset is issued to stop the
2587 * hardware, and all transmit and receive resources are freed.
2588 **/
2589static int __igb_close(struct net_device *netdev, bool suspending)
2590{
2591	struct igb_adapter *adapter = netdev_priv(netdev);
2592	struct pci_dev *pdev = adapter->pdev;
2593
2594	WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
2595
2596	if (!suspending)
2597		pm_runtime_get_sync(&pdev->dev);
2598
2599	igb_down(adapter);
2600	igb_free_irq(adapter);
2601
2602	igb_free_all_tx_resources(adapter);
2603	igb_free_all_rx_resources(adapter);
2604
2605	if (!suspending)
2606		pm_runtime_put_sync(&pdev->dev);
2607	return 0;
2608}
2609
2610static int igb_close(struct net_device *netdev)
2611{
2612	return __igb_close(netdev, false);
2613}
2614
2615/**
2616 * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2617 * @tx_ring: tx descriptor ring (for a specific queue) to setup
2618 *
2619 * Return 0 on success, negative on failure
2620 **/
2621int igb_setup_tx_resources(struct igb_ring *tx_ring)
2622{
2623	struct device *dev = tx_ring->dev;
2624	int orig_node = dev_to_node(dev);
2625	int size;
2626
2627	size = sizeof(struct igb_tx_buffer) * tx_ring->count;
2628	tx_ring->tx_buffer_info = vzalloc_node(size, tx_ring->numa_node);
2629	if (!tx_ring->tx_buffer_info)
2630		tx_ring->tx_buffer_info = vzalloc(size);
2631	if (!tx_ring->tx_buffer_info)
2632		goto err;
2633
2634	/* round up to nearest 4K */
2635	tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2636	tx_ring->size = ALIGN(tx_ring->size, 4096);
2637
2638	set_dev_node(dev, tx_ring->numa_node);
2639	tx_ring->desc = dma_alloc_coherent(dev,
2640					   tx_ring->size,
2641					   &tx_ring->dma,
2642					   GFP_KERNEL);
2643	set_dev_node(dev, orig_node);
2644	if (!tx_ring->desc)
2645		tx_ring->desc = dma_alloc_coherent(dev,
2646						   tx_ring->size,
2647						   &tx_ring->dma,
2648						   GFP_KERNEL);
2649
2650	if (!tx_ring->desc)
2651		goto err;
2652
2653	tx_ring->next_to_use = 0;
2654	tx_ring->next_to_clean = 0;
2655
2656	return 0;
2657
2658err:
2659	vfree(tx_ring->tx_buffer_info);
2660	dev_err(dev,
2661		"Unable to allocate memory for the transmit descriptor ring\n");
2662	return -ENOMEM;
2663}
2664
2665/**
2666 * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2667 *				  (Descriptors) for all queues
2668 * @adapter: board private structure
2669 *
2670 * Return 0 on success, negative on failure
2671 **/
2672static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2673{
2674	struct pci_dev *pdev = adapter->pdev;
2675	int i, err = 0;
2676
2677	for (i = 0; i < adapter->num_tx_queues; i++) {
2678		err = igb_setup_tx_resources(adapter->tx_ring[i]);
2679		if (err) {
2680			dev_err(&pdev->dev,
2681				"Allocation for Tx Queue %u failed\n", i);
2682			for (i--; i >= 0; i--)
2683				igb_free_tx_resources(adapter->tx_ring[i]);
2684			break;
2685		}
2686	}
2687
2688	return err;
2689}
2690
2691/**
2692 * igb_setup_tctl - configure the transmit control registers
2693 * @adapter: Board private structure
2694 **/
2695void igb_setup_tctl(struct igb_adapter *adapter)
2696{
2697	struct e1000_hw *hw = &adapter->hw;
2698	u32 tctl;
2699
2700	/* disable queue 0 which is enabled by default on 82575 and 82576 */
2701	wr32(E1000_TXDCTL(0), 0);
2702
2703	/* Program the Transmit Control Register */
2704	tctl = rd32(E1000_TCTL);
2705	tctl &= ~E1000_TCTL_CT;
2706	tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2707		(E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2708
2709	igb_config_collision_dist(hw);
2710
2711	/* Enable transmits */
2712	tctl |= E1000_TCTL_EN;
2713
2714	wr32(E1000_TCTL, tctl);
2715}
2716
2717/**
2718 * igb_configure_tx_ring - Configure transmit ring after Reset
2719 * @adapter: board private structure
2720 * @ring: tx ring to configure
2721 *
2722 * Configure a transmit ring after a reset.
2723 **/
2724void igb_configure_tx_ring(struct igb_adapter *adapter,
2725                           struct igb_ring *ring)
2726{
2727	struct e1000_hw *hw = &adapter->hw;
2728	u32 txdctl = 0;
2729	u64 tdba = ring->dma;
2730	int reg_idx = ring->reg_idx;
2731
2732	/* disable the queue */
2733	wr32(E1000_TXDCTL(reg_idx), 0);
2734	wrfl();
2735	mdelay(10);
2736
2737	wr32(E1000_TDLEN(reg_idx),
2738	                ring->count * sizeof(union e1000_adv_tx_desc));
2739	wr32(E1000_TDBAL(reg_idx),
2740	                tdba & 0x00000000ffffffffULL);
2741	wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2742
2743	ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2744	wr32(E1000_TDH(reg_idx), 0);
2745	writel(0, ring->tail);
2746
2747	txdctl |= IGB_TX_PTHRESH;
2748	txdctl |= IGB_TX_HTHRESH << 8;
2749	txdctl |= IGB_TX_WTHRESH << 16;
2750
2751	txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2752	wr32(E1000_TXDCTL(reg_idx), txdctl);
2753}
2754
2755/**
2756 * igb_configure_tx - Configure transmit Unit after Reset
2757 * @adapter: board private structure
2758 *
2759 * Configure the Tx unit of the MAC after a reset.
2760 **/
2761static void igb_configure_tx(struct igb_adapter *adapter)
2762{
2763	int i;
2764
2765	for (i = 0; i < adapter->num_tx_queues; i++)
2766		igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
2767}
2768
2769/**
2770 * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2771 * @rx_ring:    rx descriptor ring (for a specific queue) to setup
2772 *
2773 * Returns 0 on success, negative on failure
2774 **/
2775int igb_setup_rx_resources(struct igb_ring *rx_ring)
2776{
2777	struct device *dev = rx_ring->dev;
2778	int orig_node = dev_to_node(dev);
2779	int size, desc_len;
2780
2781	size = sizeof(struct igb_rx_buffer) * rx_ring->count;
2782	rx_ring->rx_buffer_info = vzalloc_node(size, rx_ring->numa_node);
2783	if (!rx_ring->rx_buffer_info)
2784		rx_ring->rx_buffer_info = vzalloc(size);
2785	if (!rx_ring->rx_buffer_info)
2786		goto err;
2787
2788	desc_len = sizeof(union e1000_adv_rx_desc);
2789
2790	/* Round up to nearest 4K */
2791	rx_ring->size = rx_ring->count * desc_len;
2792	rx_ring->size = ALIGN(rx_ring->size, 4096);
2793
2794	set_dev_node(dev, rx_ring->numa_node);
2795	rx_ring->desc = dma_alloc_coherent(dev,
2796					   rx_ring->size,
2797					   &rx_ring->dma,
2798					   GFP_KERNEL);
2799	set_dev_node(dev, orig_node);
2800	if (!rx_ring->desc)
2801		rx_ring->desc = dma_alloc_coherent(dev,
2802						   rx_ring->size,
2803						   &rx_ring->dma,
2804						   GFP_KERNEL);
2805
2806	if (!rx_ring->desc)
2807		goto err;
2808
2809	rx_ring->next_to_clean = 0;
2810	rx_ring->next_to_use = 0;
2811
2812	return 0;
2813
2814err:
2815	vfree(rx_ring->rx_buffer_info);
2816	rx_ring->rx_buffer_info = NULL;
2817	dev_err(dev, "Unable to allocate memory for the receive descriptor"
2818		" ring\n");
2819	return -ENOMEM;
2820}
2821
2822/**
2823 * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2824 *				  (Descriptors) for all queues
2825 * @adapter: board private structure
2826 *
2827 * Return 0 on success, negative on failure
2828 **/
2829static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2830{
2831	struct pci_dev *pdev = adapter->pdev;
2832	int i, err = 0;
2833
2834	for (i = 0; i < adapter->num_rx_queues; i++) {
2835		err = igb_setup_rx_resources(adapter->rx_ring[i]);
2836		if (err) {
2837			dev_err(&pdev->dev,
2838				"Allocation for Rx Queue %u failed\n", i);
2839			for (i--; i >= 0; i--)
2840				igb_free_rx_resources(adapter->rx_ring[i]);
2841			break;
2842		}
2843	}
2844
2845	return err;
2846}
2847
2848/**
2849 * igb_setup_mrqc - configure the multiple receive queue control registers
2850 * @adapter: Board private structure
2851 **/
2852static void igb_setup_mrqc(struct igb_adapter *adapter)
2853{
2854	struct e1000_hw *hw = &adapter->hw;
2855	u32 mrqc, rxcsum;
2856	u32 j, num_rx_queues, shift = 0, shift2 = 0;
2857	union e1000_reta {
2858		u32 dword;
2859		u8  bytes[4];
2860	} reta;
2861	static const u8 rsshash[40] = {
2862		0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2863		0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2864		0xae, 0x7b, 0x30, 0xb4,	0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2865		0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2866
2867	/* Fill out hash function seeds */
2868	for (j = 0; j < 10; j++) {
2869		u32 rsskey = rsshash[(j * 4)];
2870		rsskey |= rsshash[(j * 4) + 1] << 8;
2871		rsskey |= rsshash[(j * 4) + 2] << 16;
2872		rsskey |= rsshash[(j * 4) + 3] << 24;
2873		array_wr32(E1000_RSSRK(0), j, rsskey);
2874	}
2875
2876	num_rx_queues = adapter->rss_queues;
2877
2878	if (adapter->vfs_allocated_count) {
2879		/* 82575 and 82576 supports 2 RSS queues for VMDq */
2880		switch (hw->mac.type) {
2881		case e1000_i350:
2882		case e1000_82580:
2883			num_rx_queues = 1;
2884			shift = 0;
2885			break;
2886		case e1000_82576:
2887			shift = 3;
2888			num_rx_queues = 2;
2889			break;
2890		case e1000_82575:
2891			shift = 2;
2892			shift2 = 6;
2893		default:
2894			break;
2895		}
2896	} else {
2897		if (hw->mac.type == e1000_82575)
2898			shift = 6;
2899	}
2900
2901	for (j = 0; j < (32 * 4); j++) {
2902		reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2903		if (shift2)
2904			reta.bytes[j & 3] |= num_rx_queues << shift2;
2905		if ((j & 3) == 3)
2906			wr32(E1000_RETA(j >> 2), reta.dword);
2907	}
2908
2909	/*
2910	 * Disable raw packet checksumming so that RSS hash is placed in
2911	 * descriptor on writeback.  No need to enable TCP/UDP/IP checksum
2912	 * offloads as they are enabled by default
2913	 */
2914	rxcsum = rd32(E1000_RXCSUM);
2915	rxcsum |= E1000_RXCSUM_PCSD;
2916
2917	if (adapter->hw.mac.type >= e1000_82576)
2918		/* Enable Receive Checksum Offload for SCTP */
2919		rxcsum |= E1000_RXCSUM_CRCOFL;
2920
2921	/* Don't need to set TUOFL or IPOFL, they default to 1 */
2922	wr32(E1000_RXCSUM, rxcsum);
2923
2924	/* If VMDq is enabled then we set the appropriate mode for that, else
2925	 * we default to RSS so that an RSS hash is calculated per packet even
2926	 * if we are only using one queue */
2927	if (adapter->vfs_allocated_count) {
2928		if (hw->mac.type > e1000_82575) {
2929			/* Set the default pool for the PF's first queue */
2930			u32 vtctl = rd32(E1000_VT_CTL);
2931			vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2932				   E1000_VT_CTL_DISABLE_DEF_POOL);
2933			vtctl |= adapter->vfs_allocated_count <<
2934				E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2935			wr32(E1000_VT_CTL, vtctl);
2936		}
2937		if (adapter->rss_queues > 1)
2938			mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2939		else
2940			mrqc = E1000_MRQC_ENABLE_VMDQ;
2941	} else {
2942		mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2943	}
2944	igb_vmm_control(adapter);
2945
2946	/*
2947	 * Generate RSS hash based on TCP port numbers and/or
2948	 * IPv4/v6 src and dst addresses since UDP cannot be
2949	 * hashed reliably due to IP fragmentation
2950	 */
2951	mrqc |= E1000_MRQC_RSS_FIELD_IPV4 |
2952		E1000_MRQC_RSS_FIELD_IPV4_TCP |
2953		E1000_MRQC_RSS_FIELD_IPV6 |
2954		E1000_MRQC_RSS_FIELD_IPV6_TCP |
2955		E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
2956
2957	wr32(E1000_MRQC, mrqc);
2958}
2959
2960/**
2961 * igb_setup_rctl - configure the receive control registers
2962 * @adapter: Board private structure
2963 **/
2964void igb_setup_rctl(struct igb_adapter *adapter)
2965{
2966	struct e1000_hw *hw = &adapter->hw;
2967	u32 rctl;
2968
2969	rctl = rd32(E1000_RCTL);
2970
2971	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2972	rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2973
2974	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2975		(hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2976
2977	/*
2978	 * enable stripping of CRC. It's unlikely this will break BMC
2979	 * redirection as it did with e1000. Newer features require
2980	 * that the HW strips the CRC.
2981	 */
2982	rctl |= E1000_RCTL_SECRC;
2983
2984	/* disable store bad packets and clear size bits. */
2985	rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
2986
2987	/* enable LPE to prevent packets larger than max_frame_size */
2988	rctl |= E1000_RCTL_LPE;
2989
2990	/* disable queue 0 to prevent tail write w/o re-config */
2991	wr32(E1000_RXDCTL(0), 0);
2992
2993	/* Attention!!!  For SR-IOV PF driver operations you must enable
2994	 * queue drop for all VF and PF queues to prevent head of line blocking
2995	 * if an un-trusted VF does not provide descriptors to hardware.
2996	 */
2997	if (adapter->vfs_allocated_count) {
2998		/* set all queue drop enable bits */
2999		wr32(E1000_QDE, ALL_QUEUES);
3000	}
3001
3002	wr32(E1000_RCTL, rctl);
3003}
3004
3005static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
3006                                   int vfn)
3007{
3008	struct e1000_hw *hw = &adapter->hw;
3009	u32 vmolr;
3010
3011	/* if it isn't the PF check to see if VFs are enabled and
3012	 * increase the size to support vlan tags */
3013	if (vfn < adapter->vfs_allocated_count &&
3014	    adapter->vf_data[vfn].vlans_enabled)
3015		size += VLAN_TAG_SIZE;
3016
3017	vmolr = rd32(E1000_VMOLR(vfn));
3018	vmolr &= ~E1000_VMOLR_RLPML_MASK;
3019	vmolr |= size | E1000_VMOLR_LPE;
3020	wr32(E1000_VMOLR(vfn), vmolr);
3021
3022	return 0;
3023}
3024
3025/**
3026 * igb_rlpml_set - set maximum receive packet size
3027 * @adapter: board private structure
3028 *
3029 * Configure maximum receivable packet size.
3030 **/
3031static void igb_rlpml_set(struct igb_adapter *adapter)
3032{
3033	u32 max_frame_size = adapter->max_frame_size;
3034	struct e1000_hw *hw = &adapter->hw;
3035	u16 pf_id = adapter->vfs_allocated_count;
3036
3037	if (pf_id) {
3038		igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
3039		/*
3040		 * If we're in VMDQ or SR-IOV mode, then set global RLPML
3041		 * to our max jumbo frame size, in case we need to enable
3042		 * jumbo frames on one of the rings later.
3043		 * This will not pass over-length frames into the default
3044		 * queue because it's gated by the VMOLR.RLPML.
3045		 */
3046		max_frame_size = MAX_JUMBO_FRAME_SIZE;
3047	}
3048
3049	wr32(E1000_RLPML, max_frame_size);
3050}
3051
3052static inline void igb_set_vmolr(struct igb_adapter *adapter,
3053				 int vfn, bool aupe)
3054{
3055	struct e1000_hw *hw = &adapter->hw;
3056	u32 vmolr;
3057
3058	/*
3059	 * This register exists only on 82576 and newer so if we are older then
3060	 * we should exit and do nothing
3061	 */
3062	if (hw->mac.type < e1000_82576)
3063		return;
3064
3065	vmolr = rd32(E1000_VMOLR(vfn));
3066	vmolr |= E1000_VMOLR_STRVLAN;      /* Strip vlan tags */
3067	if (aupe)
3068		vmolr |= E1000_VMOLR_AUPE;        /* Accept untagged packets */
3069	else
3070		vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
3071
3072	/* clear all bits that might not be set */
3073	vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
3074
3075	if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
3076		vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
3077	/*
3078	 * for VMDq only allow the VFs and pool 0 to accept broadcast and
3079	 * multicast packets
3080	 */
3081	if (vfn <= adapter->vfs_allocated_count)
3082		vmolr |= E1000_VMOLR_BAM;	   /* Accept broadcast */
3083
3084	wr32(E1000_VMOLR(vfn), vmolr);
3085}
3086
3087/**
3088 * igb_configure_rx_ring - Configure a receive ring after Reset
3089 * @adapter: board private structure
3090 * @ring: receive ring to be configured
3091 *
3092 * Configure the Rx unit of the MAC after a reset.
3093 **/
3094void igb_configure_rx_ring(struct igb_adapter *adapter,
3095                           struct igb_ring *ring)
3096{
3097	struct e1000_hw *hw = &adapter->hw;
3098	u64 rdba = ring->dma;
3099	int reg_idx = ring->reg_idx;
3100	u32 srrctl = 0, rxdctl = 0;
3101
3102	/* disable the queue */
3103	wr32(E1000_RXDCTL(reg_idx), 0);
3104
3105	/* Set DMA base address registers */
3106	wr32(E1000_RDBAL(reg_idx),
3107	     rdba & 0x00000000ffffffffULL);
3108	wr32(E1000_RDBAH(reg_idx), rdba >> 32);
3109	wr32(E1000_RDLEN(reg_idx),
3110	               ring->count * sizeof(union e1000_adv_rx_desc));
3111
3112	/* initialize head and tail */
3113	ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
3114	wr32(E1000_RDH(reg_idx), 0);
3115	writel(0, ring->tail);
3116
3117	/* set descriptor configuration */
3118	srrctl = IGB_RX_HDR_LEN << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
3119#if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
3120	srrctl |= IGB_RXBUFFER_16384 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3121#else
3122	srrctl |= (PAGE_SIZE / 2) >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3123#endif
3124	srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3125	if (hw->mac.type >= e1000_82580)
3126		srrctl |= E1000_SRRCTL_TIMESTAMP;
3127	/* Only set Drop Enable if we are supporting multiple queues */
3128	if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
3129		srrctl |= E1000_SRRCTL_DROP_EN;
3130
3131	wr32(E1000_SRRCTL(reg_idx), srrctl);
3132
3133	/* set filtering for VMDQ pools */
3134	igb_set_vmolr(adapter, reg_idx & 0x7, true);
3135
3136	rxdctl |= IGB_RX_PTHRESH;
3137	rxdctl |= IGB_RX_HTHRESH << 8;
3138	rxdctl |= IGB_RX_WTHRESH << 16;
3139
3140	/* enable receive descriptor fetching */
3141	rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
3142	wr32(E1000_RXDCTL(reg_idx), rxdctl);
3143}
3144
3145/**
3146 * igb_configure_rx - Configure receive Unit after Reset
3147 * @adapter: board private structure
3148 *
3149 * Configure the Rx unit of the MAC after a reset.
3150 **/
3151static void igb_configure_rx(struct igb_adapter *adapter)
3152{
3153	int i;
3154
3155	/* set UTA to appropriate mode */
3156	igb_set_uta(adapter);
3157
3158	/* set the correct pool for the PF default MAC address in entry 0 */
3159	igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
3160	                 adapter->vfs_allocated_count);
3161
3162	/* Setup the HW Rx Head and Tail Descriptor Pointers and
3163	 * the Base and Length of the Rx Descriptor Ring */
3164	for (i = 0; i < adapter->num_rx_queues; i++)
3165		igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
3166}
3167
3168/**
3169 * igb_free_tx_resources - Free Tx Resources per Queue
3170 * @tx_ring: Tx descriptor ring for a specific queue
3171 *
3172 * Free all transmit software resources
3173 **/
3174void igb_free_tx_resources(struct igb_ring *tx_ring)
3175{
3176	igb_clean_tx_ring(tx_ring);
3177
3178	vfree(tx_ring->tx_buffer_info);
3179	tx_ring->tx_buffer_info = NULL;
3180
3181	/* if not set, then don't free */
3182	if (!tx_ring->desc)
3183		return;
3184
3185	dma_free_coherent(tx_ring->dev, tx_ring->size,
3186			  tx_ring->desc, tx_ring->dma);
3187
3188	tx_ring->desc = NULL;
3189}
3190
3191/**
3192 * igb_free_all_tx_resources - Free Tx Resources for All Queues
3193 * @adapter: board private structure
3194 *
3195 * Free all transmit software resources
3196 **/
3197static void igb_free_all_tx_resources(struct igb_adapter *adapter)
3198{
3199	int i;
3200
3201	for (i = 0; i < adapter->num_tx_queues; i++)
3202		igb_free_tx_resources(adapter->tx_ring[i]);
3203}
3204
3205void igb_unmap_and_free_tx_resource(struct igb_ring *ring,
3206				    struct igb_tx_buffer *tx_buffer)
3207{
3208	if (tx_buffer->skb) {
3209		dev_kfree_skb_any(tx_buffer->skb);
3210		if (tx_buffer->dma)
3211			dma_unmap_single(ring->dev,
3212					 tx_buffer->dma,
3213					 tx_buffer->length,
3214					 DMA_TO_DEVICE);
3215	} else if (tx_buffer->dma) {
3216		dma_unmap_page(ring->dev,
3217			       tx_buffer->dma,
3218			       tx_buffer->length,
3219			       DMA_TO_DEVICE);
3220	}
3221	tx_buffer->next_to_watch = NULL;
3222	tx_buffer->skb = NULL;
3223	tx_buffer->dma = 0;
3224	/* buffer_info must be completely set up in the transmit path */
3225}
3226
3227/**
3228 * igb_clean_tx_ring - Free Tx Buffers
3229 * @tx_ring: ring to be cleaned
3230 **/
3231static void igb_clean_tx_ring(struct igb_ring *tx_ring)
3232{
3233	struct igb_tx_buffer *buffer_info;
3234	unsigned long size;
3235	u16 i;
3236
3237	if (!tx_ring->tx_buffer_info)
3238		return;
3239	/* Free all the Tx ring sk_buffs */
3240
3241	for (i = 0; i < tx_ring->count; i++) {
3242		buffer_info = &tx_ring->tx_buffer_info[i];
3243		igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3244	}
3245	netdev_tx_reset_queue(txring_txq(tx_ring));
3246
3247	size = sizeof(struct igb_tx_buffer) * tx_ring->count;
3248	memset(tx_ring->tx_buffer_info, 0, size);
3249
3250	/* Zero out the descriptor ring */
3251	memset(tx_ring->desc, 0, tx_ring->size);
3252
3253	tx_ring->next_to_use = 0;
3254	tx_ring->next_to_clean = 0;
3255}
3256
3257/**
3258 * igb_clean_all_tx_rings - Free Tx Buffers for all queues
3259 * @adapter: board private structure
3260 **/
3261static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
3262{
3263	int i;
3264
3265	for (i = 0; i < adapter->num_tx_queues; i++)
3266		igb_clean_tx_ring(adapter->tx_ring[i]);
3267}
3268
3269/**
3270 * igb_free_rx_resources - Free Rx Resources
3271 * @rx_ring: ring to clean the resources from
3272 *
3273 * Free all receive software resources
3274 **/
3275void igb_free_rx_resources(struct igb_ring *rx_ring)
3276{
3277	igb_clean_rx_ring(rx_ring);
3278
3279	vfree(rx_ring->rx_buffer_info);
3280	rx_ring->rx_buffer_info = NULL;
3281
3282	/* if not set, then don't free */
3283	if (!rx_ring->desc)
3284		return;
3285
3286	dma_free_coherent(rx_ring->dev, rx_ring->size,
3287			  rx_ring->desc, rx_ring->dma);
3288
3289	rx_ring->desc = NULL;
3290}
3291
3292/**
3293 * igb_free_all_rx_resources - Free Rx Resources for All Queues
3294 * @adapter: board private structure
3295 *
3296 * Free all receive software resources
3297 **/
3298static void igb_free_all_rx_resources(struct igb_adapter *adapter)
3299{
3300	int i;
3301
3302	for (i = 0; i < adapter->num_rx_queues; i++)
3303		igb_free_rx_resources(adapter->rx_ring[i]);
3304}
3305
3306/**
3307 * igb_clean_rx_ring - Free Rx Buffers per Queue
3308 * @rx_ring: ring to free buffers from
3309 **/
3310static void igb_clean_rx_ring(struct igb_ring *rx_ring)
3311{
3312	unsigned long size;
3313	u16 i;
3314
3315	if (!rx_ring->rx_buffer_info)
3316		return;
3317
3318	/* Free all the Rx ring sk_buffs */
3319	for (i = 0; i < rx_ring->count; i++) {
3320		struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
3321		if (buffer_info->dma) {
3322			dma_unmap_single(rx_ring->dev,
3323			                 buffer_info->dma,
3324					 IGB_RX_HDR_LEN,
3325					 DMA_FROM_DEVICE);
3326			buffer_info->dma = 0;
3327		}
3328
3329		if (buffer_info->skb) {
3330			dev_kfree_skb(buffer_info->skb);
3331			buffer_info->skb = NULL;
3332		}
3333		if (buffer_info->page_dma) {
3334			dma_unmap_page(rx_ring->dev,
3335			               buffer_info->page_dma,
3336				       PAGE_SIZE / 2,
3337				       DMA_FROM_DEVICE);
3338			buffer_info->page_dma = 0;
3339		}
3340		if (buffer_info->page) {
3341			put_page(buffer_info->page);
3342			buffer_info->page = NULL;
3343			buffer_info->page_offset = 0;
3344		}
3345	}
3346
3347	size = sizeof(struct igb_rx_buffer) * rx_ring->count;
3348	memset(rx_ring->rx_buffer_info, 0, size);
3349
3350	/* Zero out the descriptor ring */
3351	memset(rx_ring->desc, 0, rx_ring->size);
3352
3353	rx_ring->next_to_clean = 0;
3354	rx_ring->next_to_use = 0;
3355}
3356
3357/**
3358 * igb_clean_all_rx_rings - Free Rx Buffers for all queues
3359 * @adapter: board private structure
3360 **/
3361static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
3362{
3363	int i;
3364
3365	for (i = 0; i < adapter->num_rx_queues; i++)
3366		igb_clean_rx_ring(adapter->rx_ring[i]);
3367}
3368
3369/**
3370 * igb_set_mac - Change the Ethernet Address of the NIC
3371 * @netdev: network interface device structure
3372 * @p: pointer to an address structure
3373 *
3374 * Returns 0 on success, negative on failure
3375 **/
3376static int igb_set_mac(struct net_device *netdev, void *p)
3377{
3378	struct igb_adapter *adapter = netdev_priv(netdev);
3379	struct e1000_hw *hw = &adapter->hw;
3380	struct sockaddr *addr = p;
3381
3382	if (!is_valid_ether_addr(addr->sa_data))
3383		return -EADDRNOTAVAIL;
3384
3385	memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
3386	memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
3387
3388	/* set the correct pool for the new PF MAC address in entry 0 */
3389	igb_rar_set_qsel(adapter, hw->mac.addr, 0,
3390	                 adapter->vfs_allocated_count);
3391
3392	return 0;
3393}
3394
3395/**
3396 * igb_write_mc_addr_list - write multicast addresses to MTA
3397 * @netdev: network interface device structure
3398 *
3399 * Writes multicast address list to the MTA hash table.
3400 * Returns: -ENOMEM on failure
3401 *                0 on no addresses written
3402 *                X on writing X addresses to MTA
3403 **/
3404static int igb_write_mc_addr_list(struct net_device *netdev)
3405{
3406	struct igb_adapter *adapter = netdev_priv(netdev);
3407	struct e1000_hw *hw = &adapter->hw;
3408	struct netdev_hw_addr *ha;
3409	u8  *mta_list;
3410	int i;
3411
3412	if (netdev_mc_empty(netdev)) {
3413		/* nothing to program, so clear mc list */
3414		igb_update_mc_addr_list(hw, NULL, 0);
3415		igb_restore_vf_multicasts(adapter);
3416		return 0;
3417	}
3418
3419	mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
3420	if (!mta_list)
3421		return -ENOMEM;
3422
3423	/* The shared function expects a packed array of only addresses. */
3424	i = 0;
3425	netdev_for_each_mc_addr(ha, netdev)
3426		memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
3427
3428	igb_update_mc_addr_list(hw, mta_list, i);
3429	kfree(mta_list);
3430
3431	return netdev_mc_count(netdev);
3432}
3433
3434/**
3435 * igb_write_uc_addr_list - write unicast addresses to RAR table
3436 * @netdev: network interface device structure
3437 *
3438 * Writes unicast address list to the RAR table.
3439 * Returns: -ENOMEM on failure/insufficient address space
3440 *                0 on no addresses written
3441 *                X on writing X addresses to the RAR table
3442 **/
3443static int igb_write_uc_addr_list(struct net_device *netdev)
3444{
3445	struct igb_adapter *adapter = netdev_priv(netdev);
3446	struct e1000_hw *hw = &adapter->hw;
3447	unsigned int vfn = adapter->vfs_allocated_count;
3448	unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
3449	int count = 0;
3450
3451	/* return ENOMEM indicating insufficient memory for addresses */
3452	if (netdev_uc_count(netdev) > rar_entries)
3453		return -ENOMEM;
3454
3455	if (!netdev_uc_empty(netdev) && rar_entries) {
3456		struct netdev_hw_addr *ha;
3457
3458		netdev_for_each_uc_addr(ha, netdev) {
3459			if (!rar_entries)
3460				break;
3461			igb_rar_set_qsel(adapter, ha->addr,
3462			                 rar_entries--,
3463			                 vfn);
3464			count++;
3465		}
3466	}
3467	/* write the addresses in reverse order to avoid write combining */
3468	for (; rar_entries > 0 ; rar_entries--) {
3469		wr32(E1000_RAH(rar_entries), 0);
3470		wr32(E1000_RAL(rar_entries), 0);
3471	}
3472	wrfl();
3473
3474	return count;
3475}
3476
3477/**
3478 * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
3479 * @netdev: network interface device structure
3480 *
3481 * The set_rx_mode entry point is called whenever the unicast or multicast
3482 * address lists or the network interface flags are updated.  This routine is
3483 * responsible for configuring the hardware for proper unicast, multicast,
3484 * promiscuous mode, and all-multi behavior.
3485 **/
3486static void igb_set_rx_mode(struct net_device *netdev)
3487{
3488	struct igb_adapter *adapter = netdev_priv(netdev);
3489	struct e1000_hw *hw = &adapter->hw;
3490	unsigned int vfn = adapter->vfs_allocated_count;
3491	u32 rctl, vmolr = 0;
3492	int count;
3493
3494	/* Check for Promiscuous and All Multicast modes */
3495	rctl = rd32(E1000_RCTL);
3496
3497	/* clear the effected bits */
3498	rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
3499
3500	if (netdev->flags & IFF_PROMISC) {
3501		rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
3502		vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
3503	} else {
3504		if (netdev->flags & IFF_ALLMULTI) {
3505			rctl |= E1000_RCTL_MPE;
3506			vmolr |= E1000_VMOLR_MPME;
3507		} else {
3508			/*
3509			 * Write addresses to the MTA, if the attempt fails
3510			 * then we should just turn on promiscuous mode so
3511			 * that we can at least receive multicast traffic
3512			 */
3513			count = igb_write_mc_addr_list(netdev);
3514			if (count < 0) {
3515				rctl |= E1000_RCTL_MPE;
3516				vmolr |= E1000_VMOLR_MPME;
3517			} else if (count) {
3518				vmolr |= E1000_VMOLR_ROMPE;
3519			}
3520		}
3521		/*
3522		 * Write addresses to available RAR registers, if there is not
3523		 * sufficient space to store all the addresses then enable
3524		 * unicast promiscuous mode
3525		 */
3526		count = igb_write_uc_addr_list(netdev);
3527		if (count < 0) {
3528			rctl |= E1000_RCTL_UPE;
3529			vmolr |= E1000_VMOLR_ROPE;
3530		}
3531		rctl |= E1000_RCTL_VFE;
3532	}
3533	wr32(E1000_RCTL, rctl);
3534
3535	/*
3536	 * In order to support SR-IOV and eventually VMDq it is necessary to set
3537	 * the VMOLR to enable the appropriate modes.  Without this workaround
3538	 * we will have issues with VLAN tag stripping not being done for frames
3539	 * that are only arriving because we are the default pool
3540	 */
3541	if (hw->mac.type < e1000_82576)
3542		return;
3543
3544	vmolr |= rd32(E1000_VMOLR(vfn)) &
3545	         ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
3546	wr32(E1000_VMOLR(vfn), vmolr);
3547	igb_restore_vf_multicasts(adapter);
3548}
3549
3550static void igb_check_wvbr(struct igb_adapter *adapter)
3551{
3552	struct e1000_hw *hw = &adapter->hw;
3553	u32 wvbr = 0;
3554
3555	switch (hw->mac.type) {
3556	case e1000_82576:
3557	case e1000_i350:
3558		if (!(wvbr = rd32(E1000_WVBR)))
3559			return;
3560		break;
3561	default:
3562		break;
3563	}
3564
3565	adapter->wvbr |= wvbr;
3566}
3567
3568#define IGB_STAGGERED_QUEUE_OFFSET 8
3569
3570static void igb_spoof_check(struct igb_adapter *adapter)
3571{
3572	int j;
3573
3574	if (!adapter->wvbr)
3575		return;
3576
3577	for(j = 0; j < adapter->vfs_allocated_count; j++) {
3578		if (adapter->wvbr & (1 << j) ||
3579		    adapter->wvbr & (1 << (j + IGB_STAGGERED_QUEUE_OFFSET))) {
3580			dev_warn(&adapter->pdev->dev,
3581				"Spoof event(s) detected on VF %d\n", j);
3582			adapter->wvbr &=
3583				~((1 << j) |
3584				  (1 << (j + IGB_STAGGERED_QUEUE_OFFSET)));
3585		}
3586	}
3587}
3588
3589/* Need to wait a few seconds after link up to get diagnostic information from
3590 * the phy */
3591static void igb_update_phy_info(unsigned long data)
3592{
3593	struct igb_adapter *adapter = (struct igb_adapter *) data;
3594	igb_get_phy_info(&adapter->hw);
3595}
3596
3597/**
3598 * igb_has_link - check shared code for link and determine up/down
3599 * @adapter: pointer to driver private info
3600 **/
3601bool igb_has_link(struct igb_adapter *adapter)
3602{
3603	struct e1000_hw *hw = &adapter->hw;
3604	bool link_active = false;
3605	s32 ret_val = 0;
3606
3607	/* get_link_status is set on LSC (link status) interrupt or
3608	 * rx sequence error interrupt.  get_link_status will stay
3609	 * false until the e1000_check_for_link establishes link
3610	 * for copper adapters ONLY
3611	 */
3612	switch (hw->phy.media_type) {
3613	case e1000_media_type_copper:
3614		if (hw->mac.get_link_status) {
3615			ret_val = hw->mac.ops.check_for_link(hw);
3616			link_active = !hw->mac.get_link_status;
3617		} else {
3618			link_active = true;
3619		}
3620		break;
3621	case e1000_media_type_internal_serdes:
3622		ret_val = hw->mac.ops.check_for_link(hw);
3623		link_active = hw->mac.serdes_has_link;
3624		break;
3625	default:
3626	case e1000_media_type_unknown:
3627		break;
3628	}
3629
3630	return link_active;
3631}
3632
3633static bool igb_thermal_sensor_event(struct e1000_hw *hw, u32 event)
3634{
3635	bool ret = false;
3636	u32 ctrl_ext, thstat;
3637
3638	/* check for thermal sensor event on i350, copper only */
3639	if (hw->mac.type == e1000_i350) {
3640		thstat = rd32(E1000_THSTAT);
3641		ctrl_ext = rd32(E1000_CTRL_EXT);
3642
3643		if ((hw->phy.media_type == e1000_media_type_copper) &&
3644		    !(ctrl_ext & E1000_CTRL_EXT_LINK_MODE_SGMII)) {
3645			ret = !!(thstat & event);
3646		}
3647	}
3648
3649	return ret;
3650}
3651
3652/**
3653 * igb_watchdog - Timer Call-back
3654 * @data: pointer to adapter cast into an unsigned long
3655 **/
3656static void igb_watchdog(unsigned long data)
3657{
3658	struct igb_adapter *adapter = (struct igb_adapter *)data;
3659	/* Do the rest outside of interrupt context */
3660	schedule_work(&adapter->watchdog_task);
3661}
3662
3663static void igb_watchdog_task(struct work_struct *work)
3664{
3665	struct igb_adapter *adapter = container_of(work,
3666	                                           struct igb_adapter,
3667                                                   watchdog_task);
3668	struct e1000_hw *hw = &adapter->hw;
3669	struct net_device *netdev = adapter->netdev;
3670	u32 link;
3671	int i;
3672
3673	link = igb_has_link(adapter);
3674	if (link) {
3675		/* Cancel scheduled suspend requests. */
3676		pm_runtime_resume(netdev->dev.parent);
3677
3678		if (!netif_carrier_ok(netdev)) {
3679			u32 ctrl;
3680			hw->mac.ops.get_speed_and_duplex(hw,
3681			                                 &adapter->link_speed,
3682			                                 &adapter->link_duplex);
3683
3684			ctrl = rd32(E1000_CTRL);
3685			/* Links status message must follow this format */
3686			printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s "
3687			       "Duplex, Flow Control: %s\n",
3688			       netdev->name,
3689			       adapter->link_speed,
3690			       adapter->link_duplex == FULL_DUPLEX ?
3691			       "Full" : "Half",
3692			       (ctrl & E1000_CTRL_TFCE) &&
3693			       (ctrl & E1000_CTRL_RFCE) ? "RX/TX" :
3694			       (ctrl & E1000_CTRL_RFCE) ?  "RX" :
3695			       (ctrl & E1000_CTRL_TFCE) ?  "TX" : "None");
3696
3697			/* check for thermal sensor event */
3698			if (igb_thermal_sensor_event(hw,
3699			    E1000_THSTAT_LINK_THROTTLE)) {
3700				netdev_info(netdev, "The network adapter link "
3701					    "speed was downshifted because it "
3702					    "overheated\n");
3703			}
3704
3705			/* adjust timeout factor according to speed/duplex */
3706			adapter->tx_timeout_factor = 1;
3707			switch (adapter->link_speed) {
3708			case SPEED_10:
3709				adapter->tx_timeout_factor = 14;
3710				break;
3711			case SPEED_100:
3712				/* maybe add some timeout factor ? */
3713				break;
3714			}
3715
3716			netif_carrier_on(netdev);
3717
3718			igb_ping_all_vfs(adapter);
3719			igb_check_vf_rate_limit(adapter);
3720
3721			/* link state has changed, schedule phy info update */
3722			if (!test_bit(__IGB_DOWN, &adapter->state))
3723				mod_timer(&adapter->phy_info_timer,
3724					  round_jiffies(jiffies + 2 * HZ));
3725		}
3726	} else {
3727		if (netif_carrier_ok(netdev)) {
3728			adapter->link_speed = 0;
3729			adapter->link_duplex = 0;
3730
3731			/* check for thermal sensor event */
3732			if (igb_thermal_sensor_event(hw,
3733			    E1000_THSTAT_PWR_DOWN)) {
3734				netdev_err(netdev, "The network adapter was "
3735					   "stopped because it overheated\n");
3736			}
3737
3738			/* Links status message must follow this format */
3739			printk(KERN_INFO "igb: %s NIC Link is Down\n",
3740			       netdev->name);
3741			netif_carrier_off(netdev);
3742
3743			igb_ping_all_vfs(adapter);
3744
3745			/* link state has changed, schedule phy info update */
3746			if (!test_bit(__IGB_DOWN, &adapter->state))
3747				mod_timer(&adapter->phy_info_timer,
3748					  round_jiffies(jiffies + 2 * HZ));
3749
3750			pm_schedule_suspend(netdev->dev.parent,
3751					    MSEC_PER_SEC * 5);
3752		}
3753	}
3754
3755	spin_lock(&adapter->stats64_lock);
3756	igb_update_stats(adapter, &adapter->stats64);
3757	spin_unlock(&adapter->stats64_lock);
3758
3759	for (i = 0; i < adapter->num_tx_queues; i++) {
3760		struct igb_ring *tx_ring = adapter->tx_ring[i];
3761		if (!netif_carrier_ok(netdev)) {
3762			/* We've lost link, so the controller stops DMA,
3763			 * but we've got queued Tx work that's never going
3764			 * to get done, so reset controller to flush Tx.
3765			 * (Do the reset outside of interrupt context). */
3766			if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3767				adapter->tx_timeout_count++;
3768				schedule_work(&adapter->reset_task);
3769				/* return immediately since reset is imminent */
3770				return;
3771			}
3772		}
3773
3774		/* Force detection of hung controller every watchdog period */
3775		set_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
3776	}
3777
3778	/* Cause software interrupt to ensure rx ring is cleaned */
3779	if (adapter->msix_entries) {
3780		u32 eics = 0;
3781		for (i = 0; i < adapter->num_q_vectors; i++)
3782			eics |= adapter->q_vector[i]->eims_value;
3783		wr32(E1000_EICS, eics);
3784	} else {
3785		wr32(E1000_ICS, E1000_ICS_RXDMT0);
3786	}
3787
3788	igb_spoof_check(adapter);
3789
3790	/* Reset the timer */
3791	if (!test_bit(__IGB_DOWN, &adapter->state))
3792		mod_timer(&adapter->watchdog_timer,
3793			  round_jiffies(jiffies + 2 * HZ));
3794}
3795
3796enum latency_range {
3797	lowest_latency = 0,
3798	low_latency = 1,
3799	bulk_latency = 2,
3800	latency_invalid = 255
3801};
3802
3803/**
3804 * igb_update_ring_itr - update the dynamic ITR value based on packet size
3805 *
3806 *      Stores a new ITR value based on strictly on packet size.  This
3807 *      algorithm is less sophisticated than that used in igb_update_itr,
3808 *      due to the difficulty of synchronizing statistics across multiple
3809 *      receive rings.  The divisors and thresholds used by this function
3810 *      were determined based on theoretical maximum wire speed and testing
3811 *      data, in order to minimize response time while increasing bulk
3812 *      throughput.
3813 *      This functionality is controlled by the InterruptThrottleRate module
3814 *      parameter (see igb_param.c)
3815 *      NOTE:  This function is called only when operating in a multiqueue
3816 *             receive environment.
3817 * @q_vector: pointer to q_vector
3818 **/
3819static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3820{
3821	int new_val = q_vector->itr_val;
3822	int avg_wire_size = 0;
3823	struct igb_adapter *adapter = q_vector->adapter;
3824	unsigned int packets;
3825
3826	/* For non-gigabit speeds, just fix the interrupt rate at 4000
3827	 * ints/sec - ITR timer value of 120 ticks.
3828	 */
3829	if (adapter->link_speed != SPEED_1000) {
3830		new_val = IGB_4K_ITR;
3831		goto set_itr_val;
3832	}
3833
3834	packets = q_vector->rx.total_packets;
3835	if (packets)
3836		avg_wire_size = q_vector->rx.total_bytes / packets;
3837
3838	packets = q_vector->tx.total_packets;
3839	if (packets)
3840		avg_wire_size = max_t(u32, avg_wire_size,
3841				      q_vector->tx.total_bytes / packets);
3842
3843	/* if avg_wire_size isn't set no work was done */
3844	if (!avg_wire_size)
3845		goto clear_counts;
3846
3847	/* Add 24 bytes to size to account for CRC, preamble, and gap */
3848	avg_wire_size += 24;
3849
3850	/* Don't starve jumbo frames */
3851	avg_wire_size = min(avg_wire_size, 3000);
3852
3853	/* Give a little boost to mid-size frames */
3854	if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3855		new_val = avg_wire_size / 3;
3856	else
3857		new_val = avg_wire_size / 2;
3858
3859	/* conservative mode (itr 3) eliminates the lowest_latency setting */
3860	if (new_val < IGB_20K_ITR &&
3861	    ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
3862	     (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
3863		new_val = IGB_20K_ITR;
3864
3865set_itr_val:
3866	if (new_val != q_vector->itr_val) {
3867		q_vector->itr_val = new_val;
3868		q_vector->set_itr = 1;
3869	}
3870clear_counts:
3871	q_vector->rx.total_bytes = 0;
3872	q_vector->rx.total_packets = 0;
3873	q_vector->tx.total_bytes = 0;
3874	q_vector->tx.total_packets = 0;
3875}
3876
3877/**
3878 * igb_update_itr - update the dynamic ITR value based on statistics
3879 *      Stores a new ITR value based on packets and byte
3880 *      counts during the last interrupt.  The advantage of per interrupt
3881 *      computation is faster updates and more accurate ITR for the current
3882 *      traffic pattern.  Constants in this function were computed
3883 *      based on theoretical maximum wire speed and thresholds were set based
3884 *      on testing data as well as attempting to minimize response time
3885 *      while increasing bulk throughput.
3886 *      this functionality is controlled by the InterruptThrottleRate module
3887 *      parameter (see igb_param.c)
3888 *      NOTE:  These calculations are only valid when operating in a single-
3889 *             queue environment.
3890 * @q_vector: pointer to q_vector
3891 * @ring_container: ring info to update the itr for
3892 **/
3893static void igb_update_itr(struct igb_q_vector *q_vector,
3894			   struct igb_ring_container *ring_container)
3895{
3896	unsigned int packets = ring_container->total_packets;
3897	unsigned int bytes = ring_container->total_bytes;
3898	u8 itrval = ring_container->itr;
3899
3900	/* no packets, exit with status unchanged */
3901	if (packets == 0)
3902		return;
3903
3904	switch (itrval) {
3905	case lowest_latency:
3906		/* handle TSO and jumbo frames */
3907		if (bytes/packets > 8000)
3908			itrval = bulk_latency;
3909		else if ((packets < 5) && (bytes > 512))
3910			itrval = low_latency;
3911		break;
3912	case low_latency:  /* 50 usec aka 20000 ints/s */
3913		if (bytes > 10000) {
3914			/* this if handles the TSO accounting */
3915			if (bytes/packets > 8000) {
3916				itrval = bulk_latency;
3917			} else if ((packets < 10) || ((bytes/packets) > 1200)) {
3918				itrval = bulk_latency;
3919			} else if ((packets > 35)) {
3920				itrval = lowest_latency;
3921			}
3922		} else if (bytes/packets > 2000) {
3923			itrval = bulk_latency;
3924		} else if (packets <= 2 && bytes < 512) {
3925			itrval = lowest_latency;
3926		}
3927		break;
3928	case bulk_latency: /* 250 usec aka 4000 ints/s */
3929		if (bytes > 25000) {
3930			if (packets > 35)
3931				itrval = low_latency;
3932		} else if (bytes < 1500) {
3933			itrval = low_latency;
3934		}
3935		break;
3936	}
3937
3938	/* clear work counters since we have the values we need */
3939	ring_container->total_bytes = 0;
3940	ring_container->total_packets = 0;
3941
3942	/* write updated itr to ring container */
3943	ring_container->itr = itrval;
3944}
3945
3946static void igb_set_itr(struct igb_q_vector *q_vector)
3947{
3948	struct igb_adapter *adapter = q_vector->adapter;
3949	u32 new_itr = q_vector->itr_val;
3950	u8 current_itr = 0;
3951
3952	/* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3953	if (adapter->link_speed != SPEED_1000) {
3954		current_itr = 0;
3955		new_itr = IGB_4K_ITR;
3956		goto set_itr_now;
3957	}
3958
3959	igb_update_itr(q_vector, &q_vector->tx);
3960	igb_update_itr(q_vector, &q_vector->rx);
3961
3962	current_itr = max(q_vector->rx.itr, q_vector->tx.itr);
3963
3964	/* conservative mode (itr 3) eliminates the lowest_latency setting */
3965	if (current_itr == lowest_latency &&
3966	    ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
3967	     (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
3968		current_itr = low_latency;
3969
3970	switch (current_itr) {
3971	/* counts and packets in update_itr are dependent on these numbers */
3972	case lowest_latency:
3973		new_itr = IGB_70K_ITR; /* 70,000 ints/sec */
3974		break;
3975	case low_latency:
3976		new_itr = IGB_20K_ITR; /* 20,000 ints/sec */
3977		break;
3978	case bulk_latency:
3979		new_itr = IGB_4K_ITR;  /* 4,000 ints/sec */
3980		break;
3981	default:
3982		break;
3983	}
3984
3985set_itr_now:
3986	if (new_itr != q_vector->itr_val) {
3987		/* this attempts to bias the interrupt rate towards Bulk
3988		 * by adding intermediate steps when interrupt rate is
3989		 * increasing */
3990		new_itr = new_itr > q_vector->itr_val ?
3991		             max((new_itr * q_vector->itr_val) /
3992		                 (new_itr + (q_vector->itr_val >> 2)),
3993				 new_itr) :
3994			     new_itr;
3995		/* Don't write the value here; it resets the adapter's
3996		 * internal timer, and causes us to delay far longer than
3997		 * we should between interrupts.  Instead, we write the ITR
3998		 * value at the beginning of the next interrupt so the timing
3999		 * ends up being correct.
4000		 */
4001		q_vector->itr_val = new_itr;
4002		q_vector->set_itr = 1;
4003	}
4004}
4005
4006static void igb_tx_ctxtdesc(struct igb_ring *tx_ring, u32 vlan_macip_lens,
4007			    u32 type_tucmd, u32 mss_l4len_idx)
4008{
4009	struct e1000_adv_tx_context_desc *context_desc;
4010	u16 i = tx_ring->next_to_use;
4011
4012	context_desc = IGB_TX_CTXTDESC(tx_ring, i);
4013
4014	i++;
4015	tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
4016
4017	/* set bits to identify this as an advanced context descriptor */
4018	type_tucmd |= E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
4019
4020	/* For 82575, context index must be unique per ring. */
4021	if (test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
4022		mss_l4len_idx |= tx_ring->reg_idx << 4;
4023
4024	context_desc->vlan_macip_lens	= cpu_to_le32(vlan_macip_lens);
4025	context_desc->seqnum_seed	= 0;
4026	context_desc->type_tucmd_mlhl	= cpu_to_le32(type_tucmd);
4027	context_desc->mss_l4len_idx	= cpu_to_le32(mss_l4len_idx);
4028}
4029
4030static int igb_tso(struct igb_ring *tx_ring,
4031		   struct igb_tx_buffer *first,
4032		   u8 *hdr_len)
4033{
4034	struct sk_buff *skb = first->skb;
4035	u32 vlan_macip_lens, type_tucmd;
4036	u32 mss_l4len_idx, l4len;
4037
4038	if (!skb_is_gso(skb))
4039		return 0;
4040
4041	if (skb_header_cloned(skb)) {
4042		int err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
4043		if (err)
4044			return err;
4045	}
4046
4047	/* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
4048	type_tucmd = E1000_ADVTXD_TUCMD_L4T_TCP;
4049
4050	if (first->protocol == __constant_htons(ETH_P_IP)) {
4051		struct iphdr *iph = ip_hdr(skb);
4052		iph->tot_len = 0;
4053		iph->check = 0;
4054		tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
4055							 iph->daddr, 0,
4056							 IPPROTO_TCP,
4057							 0);
4058		type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
4059		first->tx_flags |= IGB_TX_FLAGS_TSO |
4060				   IGB_TX_FLAGS_CSUM |
4061				   IGB_TX_FLAGS_IPV4;
4062	} else if (skb_is_gso_v6(skb)) {
4063		ipv6_hdr(skb)->payload_len = 0;
4064		tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
4065						       &ipv6_hdr(skb)->daddr,
4066						       0, IPPROTO_TCP, 0);
4067		first->tx_flags |= IGB_TX_FLAGS_TSO |
4068				   IGB_TX_FLAGS_CSUM;
4069	}
4070
4071	/* compute header lengths */
4072	l4len = tcp_hdrlen(skb);
4073	*hdr_len = skb_transport_offset(skb) + l4len;
4074
4075	/* update gso size and bytecount with header size */
4076	first->gso_segs = skb_shinfo(skb)->gso_segs;
4077	first->bytecount += (first->gso_segs - 1) * *hdr_len;
4078
4079	/* MSS L4LEN IDX */
4080	mss_l4len_idx = l4len << E1000_ADVTXD_L4LEN_SHIFT;
4081	mss_l4len_idx |= skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT;
4082
4083	/* VLAN MACLEN IPLEN */
4084	vlan_macip_lens = skb_network_header_len(skb);
4085	vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
4086	vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
4087
4088	igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
4089
4090	return 1;
4091}
4092
4093static void igb_tx_csum(struct igb_ring *tx_ring, struct igb_tx_buffer *first)
4094{
4095	struct sk_buff *skb = first->skb;
4096	u32 vlan_macip_lens = 0;
4097	u32 mss_l4len_idx = 0;
4098	u32 type_tucmd = 0;
4099
4100	if (skb->ip_summed != CHECKSUM_PARTIAL) {
4101		if (!(first->tx_flags & IGB_TX_FLAGS_VLAN))
4102			return;
4103	} else {
4104		u8 l4_hdr = 0;
4105		switch (first->protocol) {
4106		case __constant_htons(ETH_P_IP):
4107			vlan_macip_lens |= skb_network_header_len(skb);
4108			type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
4109			l4_hdr = ip_hdr(skb)->protocol;
4110			break;
4111		case __constant_htons(ETH_P_IPV6):
4112			vlan_macip_lens |= skb_network_header_len(skb);
4113			l4_hdr = ipv6_hdr(skb)->nexthdr;
4114			break;
4115		default:
4116			if (unlikely(net_ratelimit())) {
4117				dev_warn(tx_ring->dev,
4118				 "partial checksum but proto=%x!\n",
4119				 first->protocol);
4120			}
4121			break;
4122		}
4123
4124		switch (l4_hdr) {
4125		case IPPROTO_TCP:
4126			type_tucmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
4127			mss_l4len_idx = tcp_hdrlen(skb) <<
4128					E1000_ADVTXD_L4LEN_SHIFT;
4129			break;
4130		case IPPROTO_SCTP:
4131			type_tucmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
4132			mss_l4len_idx = sizeof(struct sctphdr) <<
4133					E1000_ADVTXD_L4LEN_SHIFT;
4134			break;
4135		case IPPROTO_UDP:
4136			mss_l4len_idx = sizeof(struct udphdr) <<
4137					E1000_ADVTXD_L4LEN_SHIFT;
4138			break;
4139		default:
4140			if (unlikely(net_ratelimit())) {
4141				dev_warn(tx_ring->dev,
4142				 "partial checksum but l4 proto=%x!\n",
4143				 l4_hdr);
4144			}
4145			break;
4146		}
4147
4148		/* update TX checksum flag */
4149		first->tx_flags |= IGB_TX_FLAGS_CSUM;
4150	}
4151
4152	vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
4153	vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
4154
4155	igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
4156}
4157
4158static __le32 igb_tx_cmd_type(u32 tx_flags)
4159{
4160	/* set type for advanced descriptor with frame checksum insertion */
4161	__le32 cmd_type = cpu_to_le32(E1000_ADVTXD_DTYP_DATA |
4162				      E1000_ADVTXD_DCMD_IFCS |
4163				      E1000_ADVTXD_DCMD_DEXT);
4164
4165	/* set HW vlan bit if vlan is present */
4166	if (tx_flags & IGB_TX_FLAGS_VLAN)
4167		cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_VLE);
4168
4169	/* set timestamp bit if present */
4170	if (tx_flags & IGB_TX_FLAGS_TSTAMP)
4171		cmd_type |= cpu_to_le32(E1000_ADVTXD_MAC_TSTAMP);
4172
4173	/* set segmentation bits for TSO */
4174	if (tx_flags & IGB_TX_FLAGS_TSO)
4175		cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_TSE);
4176
4177	return cmd_type;
4178}
4179
4180static void igb_tx_olinfo_status(struct igb_ring *tx_ring,
4181				 union e1000_adv_tx_desc *tx_desc,
4182				 u32 tx_flags, unsigned int paylen)
4183{
4184	u32 olinfo_status = paylen << E1000_ADVTXD_PAYLEN_SHIFT;
4185
4186	/* 82575 requires a unique index per ring if any offload is enabled */
4187	if ((tx_flags & (IGB_TX_FLAGS_CSUM | IGB_TX_FLAGS_VLAN)) &&
4188	    test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
4189		olinfo_status |= tx_ring->reg_idx << 4;
4190
4191	/* insert L4 checksum */
4192	if (tx_flags & IGB_TX_FLAGS_CSUM) {
4193		olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4194
4195		/* insert IPv4 checksum */
4196		if (tx_flags & IGB_TX_FLAGS_IPV4)
4197			olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
4198	}
4199
4200	tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
4201}
4202
4203/*
4204 * The largest size we can write to the descriptor is 65535.  In order to
4205 * maintain a power of two alignment we have to limit ourselves to 32K.
4206 */
4207#define IGB_MAX_TXD_PWR	15
4208#define IGB_MAX_DATA_PER_TXD	(1<<IGB_MAX_TXD_PWR)
4209
4210static void igb_tx_map(struct igb_ring *tx_ring,
4211		       struct igb_tx_buffer *first,
4212		       const u8 hdr_len)
4213{
4214	struct sk_buff *skb = first->skb;
4215	struct igb_tx_buffer *tx_buffer_info;
4216	union e1000_adv_tx_desc *tx_desc;
4217	dma_addr_t dma;
4218	struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0];
4219	unsigned int data_len = skb->data_len;
4220	unsigned int size = skb_headlen(skb);
4221	unsigned int paylen = skb->len - hdr_len;
4222	__le32 cmd_type;
4223	u32 tx_flags = first->tx_flags;
4224	u16 i = tx_ring->next_to_use;
4225
4226	tx_desc = IGB_TX_DESC(tx_ring, i);
4227
4228	igb_tx_olinfo_status(tx_ring, tx_desc, tx_flags, paylen);
4229	cmd_type = igb_tx_cmd_type(tx_flags);
4230
4231	dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
4232	if (dma_mapping_error(tx_ring->dev, dma))
4233		goto dma_error;
4234
4235	/* record length, and DMA address */
4236	first->length = size;
4237	first->dma = dma;
4238	tx_desc->read.buffer_addr = cpu_to_le64(dma);
4239
4240	for (;;) {
4241		while (unlikely(size > IGB_MAX_DATA_PER_TXD)) {
4242			tx_desc->read.cmd_type_len =
4243				cmd_type | cpu_to_le32(IGB_MAX_DATA_PER_TXD);
4244
4245			i++;
4246			tx_desc++;
4247			if (i == tx_ring->count) {
4248				tx_desc = IGB_TX_DESC(tx_ring, 0);
4249				i = 0;
4250			}
4251
4252			dma += IGB_MAX_DATA_PER_TXD;
4253			size -= IGB_MAX_DATA_PER_TXD;
4254
4255			tx_desc->read.olinfo_status = 0;
4256			tx_desc->read.buffer_addr = cpu_to_le64(dma);
4257		}
4258
4259		if (likely(!data_len))
4260			break;
4261
4262		tx_desc->read.cmd_type_len = cmd_type | cpu_to_le32(size);
4263
4264		i++;
4265		tx_desc++;
4266		if (i == tx_ring->count) {
4267			tx_desc = IGB_TX_DESC(tx_ring, 0);
4268			i = 0;
4269		}
4270
4271		size = skb_frag_size(frag);
4272		data_len -= size;
4273
4274		dma = skb_frag_dma_map(tx_ring->dev, frag, 0,
4275				   size, DMA_TO_DEVICE);
4276		if (dma_mapping_error(tx_ring->dev, dma))
4277			goto dma_error;
4278
4279		tx_buffer_info = &tx_ring->tx_buffer_info[i];
4280		tx_buffer_info->length = size;
4281		tx_buffer_info->dma = dma;
4282
4283		tx_desc->read.olinfo_status = 0;
4284		tx_desc->read.buffer_addr = cpu_to_le64(dma);
4285
4286		frag++;
4287	}
4288
4289	netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount);
4290
4291	/* write last descriptor with RS and EOP bits */
4292	cmd_type |= cpu_to_le32(size) | cpu_to_le32(IGB_TXD_DCMD);
4293	tx_desc->read.cmd_type_len = cmd_type;
4294
4295	/* set the timestamp */
4296	first->time_stamp = jiffies;
4297
4298	/*
4299	 * Force memory writes to complete before letting h/w know there
4300	 * are new descriptors to fetch.  (Only applicable for weak-ordered
4301	 * memory model archs, such as IA-64).
4302	 *
4303	 * We also need this memory barrier to make certain all of the
4304	 * status bits have been updated before next_to_watch is written.
4305	 */
4306	wmb();
4307
4308	/* set next_to_watch value indicating a packet is present */
4309	first->next_to_watch = tx_desc;
4310
4311	i++;
4312	if (i == tx_ring->count)
4313		i = 0;
4314
4315	tx_ring->next_to_use = i;
4316
4317	writel(i, tx_ring->tail);
4318
4319	/* we need this if more than one processor can write to our tail
4320	 * at a time, it syncronizes IO on IA64/Altix systems */
4321	mmiowb();
4322
4323	return;
4324
4325dma_error:
4326	dev_err(tx_ring->dev, "TX DMA map failed\n");
4327
4328	/* clear dma mappings for failed tx_buffer_info map */
4329	for (;;) {
4330		tx_buffer_info = &tx_ring->tx_buffer_info[i];
4331		igb_unmap_and_free_tx_resource(tx_ring, tx_buffer_info);
4332		if (tx_buffer_info == first)
4333			break;
4334		if (i == 0)
4335			i = tx_ring->count;
4336		i--;
4337	}
4338
4339	tx_ring->next_to_use = i;
4340}
4341
4342static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
4343{
4344	struct net_device *netdev = tx_ring->netdev;
4345
4346	netif_stop_subqueue(netdev, tx_ring->queue_index);
4347
4348	/* Herbert's original patch had:
4349	 *  smp_mb__after_netif_stop_queue();
4350	 * but since that doesn't exist yet, just open code it. */
4351	smp_mb();
4352
4353	/* We need to check again in a case another CPU has just
4354	 * made room available. */
4355	if (igb_desc_unused(tx_ring) < size)
4356		return -EBUSY;
4357
4358	/* A reprieve! */
4359	netif_wake_subqueue(netdev, tx_ring->queue_index);
4360
4361	u64_stats_update_begin(&tx_ring->tx_syncp2);
4362	tx_ring->tx_stats.restart_queue2++;
4363	u64_stats_update_end(&tx_ring->tx_syncp2);
4364
4365	return 0;
4366}
4367
4368static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
4369{
4370	if (igb_desc_unused(tx_ring) >= size)
4371		return 0;
4372	return __igb_maybe_stop_tx(tx_ring, size);
4373}
4374
4375netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb,
4376				struct igb_ring *tx_ring)
4377{
4378	struct igb_tx_buffer *first;
4379	int tso;
4380	u32 tx_flags = 0;
4381	__be16 protocol = vlan_get_protocol(skb);
4382	u8 hdr_len = 0;
4383
4384	/* need: 1 descriptor per page,
4385	 *       + 2 desc gap to keep tail from touching head,
4386	 *       + 1 desc for skb->data,
4387	 *       + 1 desc for context descriptor,
4388	 * otherwise try next time */
4389	if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
4390		/* this is a hard error */
4391		return NETDEV_TX_BUSY;
4392	}
4393
4394	/* record the location of the first descriptor for this packet */
4395	first = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
4396	first->skb = skb;
4397	first->bytecount = skb->len;
4398	first->gso_segs = 1;
4399
4400	if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
4401		skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
4402		tx_flags |= IGB_TX_FLAGS_TSTAMP;
4403	}
4404
4405	if (vlan_tx_tag_present(skb)) {
4406		tx_flags |= IGB_TX_FLAGS_VLAN;
4407		tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
4408	}
4409
4410	/* record initial flags and protocol */
4411	first->tx_flags = tx_flags;
4412	first->protocol = protocol;
4413
4414	tso = igb_tso(tx_ring, first, &hdr_len);
4415	if (tso < 0)
4416		goto out_drop;
4417	else if (!tso)
4418		igb_tx_csum(tx_ring, first);
4419
4420	igb_tx_map(tx_ring, first, hdr_len);
4421
4422	/* Make sure there is space in the ring for the next send. */
4423	igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
4424
4425	return NETDEV_TX_OK;
4426
4427out_drop:
4428	igb_unmap_and_free_tx_resource(tx_ring, first);
4429
4430	return NETDEV_TX_OK;
4431}
4432
4433static inline struct igb_ring *igb_tx_queue_mapping(struct igb_adapter *adapter,
4434						    struct sk_buff *skb)
4435{
4436	unsigned int r_idx = skb->queue_mapping;
4437
4438	if (r_idx >= adapter->num_tx_queues)
4439		r_idx = r_idx % adapter->num_tx_queues;
4440
4441	return adapter->tx_ring[r_idx];
4442}
4443
4444static netdev_tx_t igb_xmit_frame(struct sk_buff *skb,
4445				  struct net_device *netdev)
4446{
4447	struct igb_adapter *adapter = netdev_priv(netdev);
4448
4449	if (test_bit(__IGB_DOWN, &adapter->state)) {
4450		dev_kfree_skb_any(skb);
4451		return NETDEV_TX_OK;
4452	}
4453
4454	if (skb->len <= 0) {
4455		dev_kfree_skb_any(skb);
4456		return NETDEV_TX_OK;
4457	}
4458
4459	/*
4460	 * The minimum packet size with TCTL.PSP set is 17 so pad the skb
4461	 * in order to meet this minimum size requirement.
4462	 */
4463	if (skb->len < 17) {
4464		if (skb_padto(skb, 17))
4465			return NETDEV_TX_OK;
4466		skb->len = 17;
4467	}
4468
4469	return igb_xmit_frame_ring(skb, igb_tx_queue_mapping(adapter, skb));
4470}
4471
4472/**
4473 * igb_tx_timeout - Respond to a Tx Hang
4474 * @netdev: network interface device structure
4475 **/
4476static void igb_tx_timeout(struct net_device *netdev)
4477{
4478	struct igb_adapter *adapter = netdev_priv(netdev);
4479	struct e1000_hw *hw = &adapter->hw;
4480
4481	/* Do the reset outside of interrupt context */
4482	adapter->tx_timeout_count++;
4483
4484	if (hw->mac.type >= e1000_82580)
4485		hw->dev_spec._82575.global_device_reset = true;
4486
4487	schedule_work(&adapter->reset_task);
4488	wr32(E1000_EICS,
4489	     (adapter->eims_enable_mask & ~adapter->eims_other));
4490}
4491
4492static void igb_reset_task(struct work_struct *work)
4493{
4494	struct igb_adapter *adapter;
4495	adapter = container_of(work, struct igb_adapter, reset_task);
4496
4497	igb_dump(adapter);
4498	netdev_err(adapter->netdev, "Reset adapter\n");
4499	igb_reinit_locked(adapter);
4500}
4501
4502/**
4503 * igb_get_stats64 - Get System Network Statistics
4504 * @netdev: network interface device structure
4505 * @stats: rtnl_link_stats64 pointer
4506 *
4507 **/
4508static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *netdev,
4509						 struct rtnl_link_stats64 *stats)
4510{
4511	struct igb_adapter *adapter = netdev_priv(netdev);
4512
4513	spin_lock(&adapter->stats64_lock);
4514	igb_update_stats(adapter, &adapter->stats64);
4515	memcpy(stats, &adapter->stats64, sizeof(*stats));
4516	spin_unlock(&adapter->stats64_lock);
4517
4518	return stats;
4519}
4520
4521/**
4522 * igb_change_mtu - Change the Maximum Transfer Unit
4523 * @netdev: network interface device structure
4524 * @new_mtu: new value for maximum frame size
4525 *
4526 * Returns 0 on success, negative on failure
4527 **/
4528static int igb_change_mtu(struct net_device *netdev, int new_mtu)
4529{
4530	struct igb_adapter *adapter = netdev_priv(netdev);
4531	struct pci_dev *pdev = adapter->pdev;
4532	int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
4533
4534	if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
4535		dev_err(&pdev->dev, "Invalid MTU setting\n");
4536		return -EINVAL;
4537	}
4538
4539#define MAX_STD_JUMBO_FRAME_SIZE 9238
4540	if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
4541		dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
4542		return -EINVAL;
4543	}
4544
4545	while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
4546		msleep(1);
4547
4548	/* igb_down has a dependency on max_frame_size */
4549	adapter->max_frame_size = max_frame;
4550
4551	if (netif_running(netdev))
4552		igb_down(adapter);
4553
4554	dev_info(&pdev->dev, "changing MTU from %d to %d\n",
4555		 netdev->mtu, new_mtu);
4556	netdev->mtu = new_mtu;
4557
4558	if (netif_running(netdev))
4559		igb_up(adapter);
4560	else
4561		igb_reset(adapter);
4562
4563	clear_bit(__IGB_RESETTING, &adapter->state);
4564
4565	return 0;
4566}
4567
4568/**
4569 * igb_update_stats - Update the board statistics counters
4570 * @adapter: board private structure
4571 **/
4572
4573void igb_update_stats(struct igb_adapter *adapter,
4574		      struct rtnl_link_stats64 *net_stats)
4575{
4576	struct e1000_hw *hw = &adapter->hw;
4577	struct pci_dev *pdev = adapter->pdev;
4578	u32 reg, mpc;
4579	u16 phy_tmp;
4580	int i;
4581	u64 bytes, packets;
4582	unsigned int start;
4583	u64 _bytes, _packets;
4584
4585#define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
4586
4587	/*
4588	 * Prevent stats update while adapter is being reset, or if the pci
4589	 * connection is down.
4590	 */
4591	if (adapter->link_speed == 0)
4592		return;
4593	if (pci_channel_offline(pdev))
4594		return;
4595
4596	bytes = 0;
4597	packets = 0;
4598	for (i = 0; i < adapter->num_rx_queues; i++) {
4599		u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
4600		struct igb_ring *ring = adapter->rx_ring[i];
4601
4602		ring->rx_stats.drops += rqdpc_tmp;
4603		net_stats->rx_fifo_errors += rqdpc_tmp;
4604
4605		do {
4606			start = u64_stats_fetch_begin_bh(&ring->rx_syncp);
4607			_bytes = ring->rx_stats.bytes;
4608			_packets = ring->rx_stats.packets;
4609		} while (u64_stats_fetch_retry_bh(&ring->rx_syncp, start));
4610		bytes += _bytes;
4611		packets += _packets;
4612	}
4613
4614	net_stats->rx_bytes = bytes;
4615	net_stats->rx_packets = packets;
4616
4617	bytes = 0;
4618	packets = 0;
4619	for (i = 0; i < adapter->num_tx_queues; i++) {
4620		struct igb_ring *ring = adapter->tx_ring[i];
4621		do {
4622			start = u64_stats_fetch_begin_bh(&ring->tx_syncp);
4623			_bytes = ring->tx_stats.bytes;
4624			_packets = ring->tx_stats.packets;
4625		} while (u64_stats_fetch_retry_bh(&ring->tx_syncp, start));
4626		bytes += _bytes;
4627		packets += _packets;
4628	}
4629	net_stats->tx_bytes = bytes;
4630	net_stats->tx_packets = packets;
4631
4632	/* read stats registers */
4633	adapter->stats.crcerrs += rd32(E1000_CRCERRS);
4634	adapter->stats.gprc += rd32(E1000_GPRC);
4635	adapter->stats.gorc += rd32(E1000_GORCL);
4636	rd32(E1000_GORCH); /* clear GORCL */
4637	adapter->stats.bprc += rd32(E1000_BPRC);
4638	adapter->stats.mprc += rd32(E1000_MPRC);
4639	adapter->stats.roc += rd32(E1000_ROC);
4640
4641	adapter->stats.prc64 += rd32(E1000_PRC64);
4642	adapter->stats.prc127 += rd32(E1000_PRC127);
4643	adapter->stats.prc255 += rd32(E1000_PRC255);
4644	adapter->stats.prc511 += rd32(E1000_PRC511);
4645	adapter->stats.prc1023 += rd32(E1000_PRC1023);
4646	adapter->stats.prc1522 += rd32(E1000_PRC1522);
4647	adapter->stats.symerrs += rd32(E1000_SYMERRS);
4648	adapter->stats.sec += rd32(E1000_SEC);
4649
4650	mpc = rd32(E1000_MPC);
4651	adapter->stats.mpc += mpc;
4652	net_stats->rx_fifo_errors += mpc;
4653	adapter->stats.scc += rd32(E1000_SCC);
4654	adapter->stats.ecol += rd32(E1000_ECOL);
4655	adapter->stats.mcc += rd32(E1000_MCC);
4656	adapter->stats.latecol += rd32(E1000_LATECOL);
4657	adapter->stats.dc += rd32(E1000_DC);
4658	adapter->stats.rlec += rd32(E1000_RLEC);
4659	adapter->stats.xonrxc += rd32(E1000_XONRXC);
4660	adapter->stats.xontxc += rd32(E1000_XONTXC);
4661	adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
4662	adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
4663	adapter->stats.fcruc += rd32(E1000_FCRUC);
4664	adapter->stats.gptc += rd32(E1000_GPTC);
4665	adapter->stats.gotc += rd32(E1000_GOTCL);
4666	rd32(E1000_GOTCH); /* clear GOTCL */
4667	adapter->stats.rnbc += rd32(E1000_RNBC);
4668	adapter->stats.ruc += rd32(E1000_RUC);
4669	adapter->stats.rfc += rd32(E1000_RFC);
4670	adapter->stats.rjc += rd32(E1000_RJC);
4671	adapter->stats.tor += rd32(E1000_TORH);
4672	adapter->stats.tot += rd32(E1000_TOTH);
4673	adapter->stats.tpr += rd32(E1000_TPR);
4674
4675	adapter->stats.ptc64 += rd32(E1000_PTC64);
4676	adapter->stats.ptc127 += rd32(E1000_PTC127);
4677	adapter->stats.ptc255 += rd32(E1000_PTC255);
4678	adapter->stats.ptc511 += rd32(E1000_PTC511);
4679	adapter->stats.ptc1023 += rd32(E1000_PTC1023);
4680	adapter->stats.ptc1522 += rd32(E1000_PTC1522);
4681
4682	adapter->stats.mptc += rd32(E1000_MPTC);
4683	adapter->stats.bptc += rd32(E1000_BPTC);
4684
4685	adapter->stats.tpt += rd32(E1000_TPT);
4686	adapter->stats.colc += rd32(E1000_COLC);
4687
4688	adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
4689	/* read internal phy specific stats */
4690	reg = rd32(E1000_CTRL_EXT);
4691	if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
4692		adapter->stats.rxerrc += rd32(E1000_RXERRC);
4693		adapter->stats.tncrs += rd32(E1000_TNCRS);
4694	}
4695
4696	adapter->stats.tsctc += rd32(E1000_TSCTC);
4697	adapter->stats.tsctfc += rd32(E1000_TSCTFC);
4698
4699	adapter->stats.iac += rd32(E1000_IAC);
4700	adapter->stats.icrxoc += rd32(E1000_ICRXOC);
4701	adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
4702	adapter->stats.icrxatc += rd32(E1000_ICRXATC);
4703	adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
4704	adapter->stats.ictxatc += rd32(E1000_ICTXATC);
4705	adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
4706	adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
4707	adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
4708
4709	/* Fill out the OS statistics structure */
4710	net_stats->multicast = adapter->stats.mprc;
4711	net_stats->collisions = adapter->stats.colc;
4712
4713	/* Rx Errors */
4714
4715	/* RLEC on some newer hardware can be incorrect so build
4716	 * our own version based on RUC and ROC */
4717	net_stats->rx_errors = adapter->stats.rxerrc +
4718		adapter->stats.crcerrs + adapter->stats.algnerrc +
4719		adapter->stats.ruc + adapter->stats.roc +
4720		adapter->stats.cexterr;
4721	net_stats->rx_length_errors = adapter->stats.ruc +
4722				      adapter->stats.roc;
4723	net_stats->rx_crc_errors = adapter->stats.crcerrs;
4724	net_stats->rx_frame_errors = adapter->stats.algnerrc;
4725	net_stats->rx_missed_errors = adapter->stats.mpc;
4726
4727	/* Tx Errors */
4728	net_stats->tx_errors = adapter->stats.ecol +
4729			       adapter->stats.latecol;
4730	net_stats->tx_aborted_errors = adapter->stats.ecol;
4731	net_stats->tx_window_errors = adapter->stats.latecol;
4732	net_stats->tx_carrier_errors = adapter->stats.tncrs;
4733
4734	/* Tx Dropped needs to be maintained elsewhere */
4735
4736	/* Phy Stats */
4737	if (hw->phy.media_type == e1000_media_type_copper) {
4738		if ((adapter->link_speed == SPEED_1000) &&
4739		   (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
4740			phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
4741			adapter->phy_stats.idle_errors += phy_tmp;
4742		}
4743	}
4744
4745	/* Management Stats */
4746	adapter->stats.mgptc += rd32(E1000_MGTPTC);
4747	adapter->stats.mgprc += rd32(E1000_MGTPRC);
4748	adapter->stats.mgpdc += rd32(E1000_MGTPDC);
4749
4750	/* OS2BMC Stats */
4751	reg = rd32(E1000_MANC);
4752	if (reg & E1000_MANC_EN_BMC2OS) {
4753		adapter->stats.o2bgptc += rd32(E1000_O2BGPTC);
4754		adapter->stats.o2bspc += rd32(E1000_O2BSPC);
4755		adapter->stats.b2ospc += rd32(E1000_B2OSPC);
4756		adapter->stats.b2ogprc += rd32(E1000_B2OGPRC);
4757	}
4758}
4759
4760static irqreturn_t igb_msix_other(int irq, void *data)
4761{
4762	struct igb_adapter *adapter = data;
4763	struct e1000_hw *hw = &adapter->hw;
4764	u32 icr = rd32(E1000_ICR);
4765	/* reading ICR causes bit 31 of EICR to be cleared */
4766
4767	if (icr & E1000_ICR_DRSTA)
4768		schedule_work(&adapter->reset_task);
4769
4770	if (icr & E1000_ICR_DOUTSYNC) {
4771		/* HW is reporting DMA is out of sync */
4772		adapter->stats.doosync++;
4773		/* The DMA Out of Sync is also indication of a spoof event
4774		 * in IOV mode. Check the Wrong VM Behavior register to
4775		 * see if it is really a spoof event. */
4776		igb_check_wvbr(adapter);
4777	}
4778
4779	/* Check for a mailbox event */
4780	if (icr & E1000_ICR_VMMB)
4781		igb_msg_task(adapter);
4782
4783	if (icr & E1000_ICR_LSC) {
4784		hw->mac.get_link_status = 1;
4785		/* guard against interrupt when we're going down */
4786		if (!test_bit(__IGB_DOWN, &adapter->state))
4787			mod_timer(&adapter->watchdog_timer, jiffies + 1);
4788	}
4789
4790	wr32(E1000_EIMS, adapter->eims_other);
4791
4792	return IRQ_HANDLED;
4793}
4794
4795static void igb_write_itr(struct igb_q_vector *q_vector)
4796{
4797	struct igb_adapter *adapter = q_vector->adapter;
4798	u32 itr_val = q_vector->itr_val & 0x7FFC;
4799
4800	if (!q_vector->set_itr)
4801		return;
4802
4803	if (!itr_val)
4804		itr_val = 0x4;
4805
4806	if (adapter->hw.mac.type == e1000_82575)
4807		itr_val |= itr_val << 16;
4808	else
4809		itr_val |= E1000_EITR_CNT_IGNR;
4810
4811	writel(itr_val, q_vector->itr_register);
4812	q_vector->set_itr = 0;
4813}
4814
4815static irqreturn_t igb_msix_ring(int irq, void *data)
4816{
4817	struct igb_q_vector *q_vector = data;
4818
4819	/* Write the ITR value calculated from the previous interrupt. */
4820	igb_write_itr(q_vector);
4821
4822	napi_schedule(&q_vector->napi);
4823
4824	return IRQ_HANDLED;
4825}
4826
4827#ifdef CONFIG_IGB_DCA
4828static void igb_update_dca(struct igb_q_vector *q_vector)
4829{
4830	struct igb_adapter *adapter = q_vector->adapter;
4831	struct e1000_hw *hw = &adapter->hw;
4832	int cpu = get_cpu();
4833
4834	if (q_vector->cpu == cpu)
4835		goto out_no_update;
4836
4837	if (q_vector->tx.ring) {
4838		int q = q_vector->tx.ring->reg_idx;
4839		u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4840		if (hw->mac.type == e1000_82575) {
4841			dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4842			dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4843		} else {
4844			dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4845			dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4846			              E1000_DCA_TXCTRL_CPUID_SHIFT;
4847		}
4848		dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4849		wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4850	}
4851	if (q_vector->rx.ring) {
4852		int q = q_vector->rx.ring->reg_idx;
4853		u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4854		if (hw->mac.type == e1000_82575) {
4855			dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4856			dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4857		} else {
4858			dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4859			dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4860			              E1000_DCA_RXCTRL_CPUID_SHIFT;
4861		}
4862		dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4863		dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4864		dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4865		wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4866	}
4867	q_vector->cpu = cpu;
4868out_no_update:
4869	put_cpu();
4870}
4871
4872static void igb_setup_dca(struct igb_adapter *adapter)
4873{
4874	struct e1000_hw *hw = &adapter->hw;
4875	int i;
4876
4877	if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4878		return;
4879
4880	/* Always use CB2 mode, difference is masked in the CB driver. */
4881	wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4882
4883	for (i = 0; i < adapter->num_q_vectors; i++) {
4884		adapter->q_vector[i]->cpu = -1;
4885		igb_update_dca(adapter->q_vector[i]);
4886	}
4887}
4888
4889static int __igb_notify_dca(struct device *dev, void *data)
4890{
4891	struct net_device *netdev = dev_get_drvdata(dev);
4892	struct igb_adapter *adapter = netdev_priv(netdev);
4893	struct pci_dev *pdev = adapter->pdev;
4894	struct e1000_hw *hw = &adapter->hw;
4895	unsigned long event = *(unsigned long *)data;
4896
4897	switch (event) {
4898	case DCA_PROVIDER_ADD:
4899		/* if already enabled, don't do it again */
4900		if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4901			break;
4902		if (dca_add_requester(dev) == 0) {
4903			adapter->flags |= IGB_FLAG_DCA_ENABLED;
4904			dev_info(&pdev->dev, "DCA enabled\n");
4905			igb_setup_dca(adapter);
4906			break;
4907		}
4908		/* Fall Through since DCA is disabled. */
4909	case DCA_PROVIDER_REMOVE:
4910		if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4911			/* without this a class_device is left
4912			 * hanging around in the sysfs model */
4913			dca_remove_requester(dev);
4914			dev_info(&pdev->dev, "DCA disabled\n");
4915			adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4916			wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4917		}
4918		break;
4919	}
4920
4921	return 0;
4922}
4923
4924static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4925                          void *p)
4926{
4927	int ret_val;
4928
4929	ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4930	                                 __igb_notify_dca);
4931
4932	return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4933}
4934#endif /* CONFIG_IGB_DCA */
4935
4936#ifdef CONFIG_PCI_IOV
4937static int igb_vf_configure(struct igb_adapter *adapter, int vf)
4938{
4939	unsigned char mac_addr[ETH_ALEN];
4940	struct pci_dev *pdev = adapter->pdev;
4941	struct e1000_hw *hw = &adapter->hw;
4942	struct pci_dev *pvfdev;
4943	unsigned int device_id;
4944	u16 thisvf_devfn;
4945
4946	random_ether_addr(mac_addr);
4947	igb_set_vf_mac(adapter, vf, mac_addr);
4948
4949	switch (adapter->hw.mac.type) {
4950	case e1000_82576:
4951		device_id = IGB_82576_VF_DEV_ID;
4952		/* VF Stride for 82576 is 2 */
4953		thisvf_devfn = (pdev->devfn + 0x80 + (vf << 1)) |
4954			(pdev->devfn & 1);
4955		break;
4956	case e1000_i350:
4957		device_id = IGB_I350_VF_DEV_ID;
4958		/* VF Stride for I350 is 4 */
4959		thisvf_devfn = (pdev->devfn + 0x80 + (vf << 2)) |
4960				(pdev->devfn & 3);
4961		break;
4962	default:
4963		device_id = 0;
4964		thisvf_devfn = 0;
4965		break;
4966	}
4967
4968	pvfdev = pci_get_device(hw->vendor_id, device_id, NULL);
4969	while (pvfdev) {
4970		if (pvfdev->devfn == thisvf_devfn)
4971			break;
4972		pvfdev = pci_get_device(hw->vendor_id,
4973					device_id, pvfdev);
4974	}
4975
4976	if (pvfdev)
4977		adapter->vf_data[vf].vfdev = pvfdev;
4978	else
4979		dev_err(&pdev->dev,
4980			"Couldn't find pci dev ptr for VF %4.4x\n",
4981			thisvf_devfn);
4982	return pvfdev != NULL;
4983}
4984
4985static int igb_find_enabled_vfs(struct igb_adapter *adapter)
4986{
4987	struct e1000_hw *hw = &adapter->hw;
4988	struct pci_dev *pdev = adapter->pdev;
4989	struct pci_dev *pvfdev;
4990	u16 vf_devfn = 0;
4991	u16 vf_stride;
4992	unsigned int device_id;
4993	int vfs_found = 0;
4994
4995	switch (adapter->hw.mac.type) {
4996	case e1000_82576:
4997		device_id = IGB_82576_VF_DEV_ID;
4998		/* VF Stride for 82576 is 2 */
4999		vf_stride = 2;
5000		break;
5001	case e1000_i350:
5002		device_id = IGB_I350_VF_DEV_ID;
5003		/* VF Stride for I350 is 4 */
5004		vf_stride = 4;
5005		break;
5006	default:
5007		device_id = 0;
5008		vf_stride = 0;
5009		break;
5010	}
5011
5012	vf_devfn = pdev->devfn + 0x80;
5013	pvfdev = pci_get_device(hw->vendor_id, device_id, NULL);
5014	while (pvfdev) {
5015		if (pvfdev->devfn == vf_devfn)
5016			vfs_found++;
5017		vf_devfn += vf_stride;
5018		pvfdev = pci_get_device(hw->vendor_id,
5019					device_id, pvfdev);
5020	}
5021
5022	return vfs_found;
5023}
5024
5025static int igb_check_vf_assignment(struct igb_adapter *adapter)
5026{
5027	int i;
5028	for (i = 0; i < adapter->vfs_allocated_count; i++) {
5029		if (adapter->vf_data[i].vfdev) {
5030			if (adapter->vf_data[i].vfdev->dev_flags &
5031			    PCI_DEV_FLAGS_ASSIGNED)
5032				return true;
5033		}
5034	}
5035	return false;
5036}
5037
5038#endif
5039static void igb_ping_all_vfs(struct igb_adapter *adapter)
5040{
5041	struct e1000_hw *hw = &adapter->hw;
5042	u32 ping;
5043	int i;
5044
5045	for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
5046		ping = E1000_PF_CONTROL_MSG;
5047		if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
5048			ping |= E1000_VT_MSGTYPE_CTS;
5049		igb_write_mbx(hw, &ping, 1, i);
5050	}
5051}
5052
5053static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5054{
5055	struct e1000_hw *hw = &adapter->hw;
5056	u32 vmolr = rd32(E1000_VMOLR(vf));
5057	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5058
5059	vf_data->flags &= ~(IGB_VF_FLAG_UNI_PROMISC |
5060	                    IGB_VF_FLAG_MULTI_PROMISC);
5061	vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
5062
5063	if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
5064		vmolr |= E1000_VMOLR_MPME;
5065		vf_data->flags |= IGB_VF_FLAG_MULTI_PROMISC;
5066		*msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
5067	} else {
5068		/*
5069		 * if we have hashes and we are clearing a multicast promisc
5070		 * flag we need to write the hashes to the MTA as this step
5071		 * was previously skipped
5072		 */
5073		if (vf_data->num_vf_mc_hashes > 30) {
5074			vmolr |= E1000_VMOLR_MPME;
5075		} else if (vf_data->num_vf_mc_hashes) {
5076			int j;
5077			vmolr |= E1000_VMOLR_ROMPE;
5078			for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
5079				igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
5080		}
5081	}
5082
5083	wr32(E1000_VMOLR(vf), vmolr);
5084
5085	/* there are flags left unprocessed, likely not supported */
5086	if (*msgbuf & E1000_VT_MSGINFO_MASK)
5087		return -EINVAL;
5088
5089	return 0;
5090
5091}
5092
5093static int igb_set_vf_multicasts(struct igb_adapter *adapter,
5094				  u32 *msgbuf, u32 vf)
5095{
5096	int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5097	u16 *hash_list = (u16 *)&msgbuf[1];
5098	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5099	int i;
5100
5101	/* salt away the number of multicast addresses assigned
5102	 * to this VF for later use to restore when the PF multi cast
5103	 * list changes
5104	 */
5105	vf_data->num_vf_mc_hashes = n;
5106
5107	/* only up to 30 hash values supported */
5108	if (n > 30)
5109		n = 30;
5110
5111	/* store the hashes for later use */
5112	for (i = 0; i < n; i++)
5113		vf_data->vf_mc_hashes[i] = hash_list[i];
5114
5115	/* Flush and reset the mta with the new values */
5116	igb_set_rx_mode(adapter->netdev);
5117
5118	return 0;
5119}
5120
5121static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
5122{
5123	struct e1000_hw *hw = &adapter->hw;
5124	struct vf_data_storage *vf_data;
5125	int i, j;
5126
5127	for (i = 0; i < adapter->vfs_allocated_count; i++) {
5128		u32 vmolr = rd32(E1000_VMOLR(i));
5129		vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
5130
5131		vf_data = &adapter->vf_data[i];
5132
5133		if ((vf_data->num_vf_mc_hashes > 30) ||
5134		    (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
5135			vmolr |= E1000_VMOLR_MPME;
5136		} else if (vf_data->num_vf_mc_hashes) {
5137			vmolr |= E1000_VMOLR_ROMPE;
5138			for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
5139				igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
5140		}
5141		wr32(E1000_VMOLR(i), vmolr);
5142	}
5143}
5144
5145static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
5146{
5147	struct e1000_hw *hw = &adapter->hw;
5148	u32 pool_mask, reg, vid;
5149	int i;
5150
5151	pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5152
5153	/* Find the vlan filter for this id */
5154	for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5155		reg = rd32(E1000_VLVF(i));
5156
5157		/* remove the vf from the pool */
5158		reg &= ~pool_mask;
5159
5160		/* if pool is empty then remove entry from vfta */
5161		if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
5162		    (reg & E1000_VLVF_VLANID_ENABLE)) {
5163			reg = 0;
5164			vid = reg & E1000_VLVF_VLANID_MASK;
5165			igb_vfta_set(hw, vid, false);
5166		}
5167
5168		wr32(E1000_VLVF(i), reg);
5169	}
5170
5171	adapter->vf_data[vf].vlans_enabled = 0;
5172}
5173
5174static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
5175{
5176	struct e1000_hw *hw = &adapter->hw;
5177	u32 reg, i;
5178
5179	/* The vlvf table only exists on 82576 hardware and newer */
5180	if (hw->mac.type < e1000_82576)
5181		return -1;
5182
5183	/* we only need to do this if VMDq is enabled */
5184	if (!adapter->vfs_allocated_count)
5185		return -1;
5186
5187	/* Find the vlan filter for this id */
5188	for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5189		reg = rd32(E1000_VLVF(i));
5190		if ((reg & E1000_VLVF_VLANID_ENABLE) &&
5191		    vid == (reg & E1000_VLVF_VLANID_MASK))
5192			break;
5193	}
5194
5195	if (add) {
5196		if (i == E1000_VLVF_ARRAY_SIZE) {
5197			/* Did not find a matching VLAN ID entry that was
5198			 * enabled.  Search for a free filter entry, i.e.
5199			 * one without the enable bit set
5200			 */
5201			for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5202				reg = rd32(E1000_VLVF(i));
5203				if (!(reg & E1000_VLVF_VLANID_ENABLE))
5204					break;
5205			}
5206		}
5207		if (i < E1000_VLVF_ARRAY_SIZE) {
5208			/* Found an enabled/available entry */
5209			reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5210
5211			/* if !enabled we need to set this up in vfta */
5212			if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
5213				/* add VID to filter table */
5214				igb_vfta_set(hw, vid, true);
5215				reg |= E1000_VLVF_VLANID_ENABLE;
5216			}
5217			reg &= ~E1000_VLVF_VLANID_MASK;
5218			reg |= vid;
5219			wr32(E1000_VLVF(i), reg);
5220
5221			/* do not modify RLPML for PF devices */
5222			if (vf >= adapter->vfs_allocated_count)
5223				return 0;
5224
5225			if (!adapter->vf_data[vf].vlans_enabled) {
5226				u32 size;
5227				reg = rd32(E1000_VMOLR(vf));
5228				size = reg & E1000_VMOLR_RLPML_MASK;
5229				size += 4;
5230				reg &= ~E1000_VMOLR_RLPML_MASK;
5231				reg |= size;
5232				wr32(E1000_VMOLR(vf), reg);
5233			}
5234
5235			adapter->vf_data[vf].vlans_enabled++;
5236		}
5237	} else {
5238		if (i < E1000_VLVF_ARRAY_SIZE) {
5239			/* remove vf from the pool */
5240			reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
5241			/* if pool is empty then remove entry from vfta */
5242			if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
5243				reg = 0;
5244				igb_vfta_set(hw, vid, false);
5245			}
5246			wr32(E1000_VLVF(i), reg);
5247
5248			/* do not modify RLPML for PF devices */
5249			if (vf >= adapter->vfs_allocated_count)
5250				return 0;
5251
5252			adapter->vf_data[vf].vlans_enabled--;
5253			if (!adapter->vf_data[vf].vlans_enabled) {
5254				u32 size;
5255				reg = rd32(E1000_VMOLR(vf));
5256				size = reg & E1000_VMOLR_RLPML_MASK;
5257				size -= 4;
5258				reg &= ~E1000_VMOLR_RLPML_MASK;
5259				reg |= size;
5260				wr32(E1000_VMOLR(vf), reg);
5261			}
5262		}
5263	}
5264	return 0;
5265}
5266
5267static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
5268{
5269	struct e1000_hw *hw = &adapter->hw;
5270
5271	if (vid)
5272		wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
5273	else
5274		wr32(E1000_VMVIR(vf), 0);
5275}
5276
5277static int igb_ndo_set_vf_vlan(struct net_device *netdev,
5278			       int vf, u16 vlan, u8 qos)
5279{
5280	int err = 0;
5281	struct igb_adapter *adapter = netdev_priv(netdev);
5282
5283	if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
5284		return -EINVAL;
5285	if (vlan || qos) {
5286		err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
5287		if (err)
5288			goto out;
5289		igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
5290		igb_set_vmolr(adapter, vf, !vlan);
5291		adapter->vf_data[vf].pf_vlan = vlan;
5292		adapter->vf_data[vf].pf_qos = qos;
5293		dev_info(&adapter->pdev->dev,
5294			 "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
5295		if (test_bit(__IGB_DOWN, &adapter->state)) {
5296			dev_warn(&adapter->pdev->dev,
5297				 "The VF VLAN has been set,"
5298				 " but the PF device is not up.\n");
5299			dev_warn(&adapter->pdev->dev,
5300				 "Bring the PF device up before"
5301				 " attempting to use the VF device.\n");
5302		}
5303	} else {
5304		igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
5305				   false, vf);
5306		igb_set_vmvir(adapter, vlan, vf);
5307		igb_set_vmolr(adapter, vf, true);
5308		adapter->vf_data[vf].pf_vlan = 0;
5309		adapter->vf_data[vf].pf_qos = 0;
5310       }
5311out:
5312       return err;
5313}
5314
5315static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5316{
5317	int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5318	int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
5319
5320	return igb_vlvf_set(adapter, vid, add, vf);
5321}
5322
5323static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
5324{
5325	/* clear flags - except flag that indicates PF has set the MAC */
5326	adapter->vf_data[vf].flags &= IGB_VF_FLAG_PF_SET_MAC;
5327	adapter->vf_data[vf].last_nack = jiffies;
5328
5329	/* reset offloads to defaults */
5330	igb_set_vmolr(adapter, vf, true);
5331
5332	/* reset vlans for device */
5333	igb_clear_vf_vfta(adapter, vf);
5334	if (adapter->vf_data[vf].pf_vlan)
5335		igb_ndo_set_vf_vlan(adapter->netdev, vf,
5336				    adapter->vf_data[vf].pf_vlan,
5337				    adapter->vf_data[vf].pf_qos);
5338	else
5339		igb_clear_vf_vfta(adapter, vf);
5340
5341	/* reset multicast table array for vf */
5342	adapter->vf_data[vf].num_vf_mc_hashes = 0;
5343
5344	/* Flush and reset the mta with the new values */
5345	igb_set_rx_mode(adapter->netdev);
5346}
5347
5348static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
5349{
5350	unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5351
5352	/* generate a new mac address as we were hotplug removed/added */
5353	if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
5354		random_ether_addr(vf_mac);
5355
5356	/* process remaining reset events */
5357	igb_vf_reset(adapter, vf);
5358}
5359
5360static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
5361{
5362	struct e1000_hw *hw = &adapter->hw;
5363	unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5364	int rar_entry = hw->mac.rar_entry_count - (vf + 1);
5365	u32 reg, msgbuf[3];
5366	u8 *addr = (u8 *)(&msgbuf[1]);
5367
5368	/* process all the same items cleared in a function level reset */
5369	igb_vf_reset(adapter, vf);
5370
5371	/* set vf mac address */
5372	igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
5373
5374	/* enable transmit and receive for vf */
5375	reg = rd32(E1000_VFTE);
5376	wr32(E1000_VFTE, reg | (1 << vf));
5377	reg = rd32(E1000_VFRE);
5378	wr32(E1000_VFRE, reg | (1 << vf));
5379
5380	adapter->vf_data[vf].flags |= IGB_VF_FLAG_CTS;
5381
5382	/* reply to reset with ack and vf mac address */
5383	msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
5384	memcpy(addr, vf_mac, 6);
5385	igb_write_mbx(hw, msgbuf, 3, vf);
5386}
5387
5388static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
5389{
5390	/*
5391	 * The VF MAC Address is stored in a packed array of bytes
5392	 * starting at the second 32 bit word of the msg array
5393	 */
5394	unsigned char *addr = (char *)&msg[1];
5395	int err = -1;
5396
5397	if (is_valid_ether_addr(addr))
5398		err = igb_set_vf_mac(adapter, vf, addr);
5399
5400	return err;
5401}
5402
5403static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
5404{
5405	struct e1000_hw *hw = &adapter->hw;
5406	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5407	u32 msg = E1000_VT_MSGTYPE_NACK;
5408
5409	/* if device isn't clear to send it shouldn't be reading either */
5410	if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
5411	    time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
5412		igb_write_mbx(hw, &msg, 1, vf);
5413		vf_data->last_nack = jiffies;
5414	}
5415}
5416
5417static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
5418{
5419	struct pci_dev *pdev = adapter->pdev;
5420	u32 msgbuf[E1000_VFMAILBOX_SIZE];
5421	struct e1000_hw *hw = &adapter->hw;
5422	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5423	s32 retval;
5424
5425	retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
5426
5427	if (retval) {
5428		/* if receive failed revoke VF CTS stats and restart init */
5429		dev_err(&pdev->dev, "Error receiving message from VF\n");
5430		vf_data->flags &= ~IGB_VF_FLAG_CTS;
5431		if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5432			return;
5433		goto out;
5434	}
5435
5436	/* this is a message we already processed, do nothing */
5437	if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
5438		return;
5439
5440	/*
5441	 * until the vf completes a reset it should not be
5442	 * allowed to start any configuration.
5443	 */
5444
5445	if (msgbuf[0] == E1000_VF_RESET) {
5446		igb_vf_reset_msg(adapter, vf);
5447		return;
5448	}
5449
5450	if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
5451		if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5452			return;
5453		retval = -1;
5454		goto out;
5455	}
5456
5457	switch ((msgbuf[0] & 0xFFFF)) {
5458	case E1000_VF_SET_MAC_ADDR:
5459		retval = -EINVAL;
5460		if (!(vf_data->flags & IGB_VF_FLAG_PF_SET_MAC))
5461			retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
5462		else
5463			dev_warn(&pdev->dev,
5464				 "VF %d attempted to override administratively "
5465				 "set MAC address\nReload the VF driver to "
5466				 "resume operations\n", vf);
5467		break;
5468	case E1000_VF_SET_PROMISC:
5469		retval = igb_set_vf_promisc(adapter, msgbuf, vf);
5470		break;
5471	case E1000_VF_SET_MULTICAST:
5472		retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
5473		break;
5474	case E1000_VF_SET_LPE:
5475		retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
5476		break;
5477	case E1000_VF_SET_VLAN:
5478		retval = -1;
5479		if (vf_data->pf_vlan)
5480			dev_warn(&pdev->dev,
5481				 "VF %d attempted to override administratively "
5482				 "set VLAN tag\nReload the VF driver to "
5483				 "resume operations\n", vf);
5484		else
5485			retval = igb_set_vf_vlan(adapter, msgbuf, vf);
5486		break;
5487	default:
5488		dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
5489		retval = -1;
5490		break;
5491	}
5492
5493	msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
5494out:
5495	/* notify the VF of the results of what it sent us */
5496	if (retval)
5497		msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
5498	else
5499		msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
5500
5501	igb_write_mbx(hw, msgbuf, 1, vf);
5502}
5503
5504static void igb_msg_task(struct igb_adapter *adapter)
5505{
5506	struct e1000_hw *hw = &adapter->hw;
5507	u32 vf;
5508
5509	for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
5510		/* process any reset requests */
5511		if (!igb_check_for_rst(hw, vf))
5512			igb_vf_reset_event(adapter, vf);
5513
5514		/* process any messages pending */
5515		if (!igb_check_for_msg(hw, vf))
5516			igb_rcv_msg_from_vf(adapter, vf);
5517
5518		/* process any acks */
5519		if (!igb_check_for_ack(hw, vf))
5520			igb_rcv_ack_from_vf(adapter, vf);
5521	}
5522}
5523
5524/**
5525 *  igb_set_uta - Set unicast filter table address
5526 *  @adapter: board private structure
5527 *
5528 *  The unicast table address is a register array of 32-bit registers.
5529 *  The table is meant to be used in a way similar to how the MTA is used
5530 *  however due to certain limitations in the hardware it is necessary to
5531 *  set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscuous
5532 *  enable bit to allow vlan tag stripping when promiscuous mode is enabled
5533 **/
5534static void igb_set_uta(struct igb_adapter *adapter)
5535{
5536	struct e1000_hw *hw = &adapter->hw;
5537	int i;
5538
5539	/* The UTA table only exists on 82576 hardware and newer */
5540	if (hw->mac.type < e1000_82576)
5541		return;
5542
5543	/* we only need to do this if VMDq is enabled */
5544	if (!adapter->vfs_allocated_count)
5545		return;
5546
5547	for (i = 0; i < hw->mac.uta_reg_count; i++)
5548		array_wr32(E1000_UTA, i, ~0);
5549}
5550
5551/**
5552 * igb_intr_msi - Interrupt Handler
5553 * @irq: interrupt number
5554 * @data: pointer to a network interface device structure
5555 **/
5556static irqreturn_t igb_intr_msi(int irq, void *data)
5557{
5558	struct igb_adapter *adapter = data;
5559	struct igb_q_vector *q_vector = adapter->q_vector[0];
5560	struct e1000_hw *hw = &adapter->hw;
5561	/* read ICR disables interrupts using IAM */
5562	u32 icr = rd32(E1000_ICR);
5563
5564	igb_write_itr(q_vector);
5565
5566	if (icr & E1000_ICR_DRSTA)
5567		schedule_work(&adapter->reset_task);
5568
5569	if (icr & E1000_ICR_DOUTSYNC) {
5570		/* HW is reporting DMA is out of sync */
5571		adapter->stats.doosync++;
5572	}
5573
5574	if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5575		hw->mac.get_link_status = 1;
5576		if (!test_bit(__IGB_DOWN, &adapter->state))
5577			mod_timer(&adapter->watchdog_timer, jiffies + 1);
5578	}
5579
5580	napi_schedule(&q_vector->napi);
5581
5582	return IRQ_HANDLED;
5583}
5584
5585/**
5586 * igb_intr - Legacy Interrupt Handler
5587 * @irq: interrupt number
5588 * @data: pointer to a network interface device structure
5589 **/
5590static irqreturn_t igb_intr(int irq, void *data)
5591{
5592	struct igb_adapter *adapter = data;
5593	struct igb_q_vector *q_vector = adapter->q_vector[0];
5594	struct e1000_hw *hw = &adapter->hw;
5595	/* Interrupt Auto-Mask...upon reading ICR, interrupts are masked.  No
5596	 * need for the IMC write */
5597	u32 icr = rd32(E1000_ICR);
5598
5599	/* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
5600	 * not set, then the adapter didn't send an interrupt */
5601	if (!(icr & E1000_ICR_INT_ASSERTED))
5602		return IRQ_NONE;
5603
5604	igb_write_itr(q_vector);
5605
5606	if (icr & E1000_ICR_DRSTA)
5607		schedule_work(&adapter->reset_task);
5608
5609	if (icr & E1000_ICR_DOUTSYNC) {
5610		/* HW is reporting DMA is out of sync */
5611		adapter->stats.doosync++;
5612	}
5613
5614	if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5615		hw->mac.get_link_status = 1;
5616		/* guard against interrupt when we're going down */
5617		if (!test_bit(__IGB_DOWN, &adapter->state))
5618			mod_timer(&adapter->watchdog_timer, jiffies + 1);
5619	}
5620
5621	napi_schedule(&q_vector->napi);
5622
5623	return IRQ_HANDLED;
5624}
5625
5626static void igb_ring_irq_enable(struct igb_q_vector *q_vector)
5627{
5628	struct igb_adapter *adapter = q_vector->adapter;
5629	struct e1000_hw *hw = &adapter->hw;
5630
5631	if ((q_vector->rx.ring && (adapter->rx_itr_setting & 3)) ||
5632	    (!q_vector->rx.ring && (adapter->tx_itr_setting & 3))) {
5633		if ((adapter->num_q_vectors == 1) && !adapter->vf_data)
5634			igb_set_itr(q_vector);
5635		else
5636			igb_update_ring_itr(q_vector);
5637	}
5638
5639	if (!test_bit(__IGB_DOWN, &adapter->state)) {
5640		if (adapter->msix_entries)
5641			wr32(E1000_EIMS, q_vector->eims_value);
5642		else
5643			igb_irq_enable(adapter);
5644	}
5645}
5646
5647/**
5648 * igb_poll - NAPI Rx polling callback
5649 * @napi: napi polling structure
5650 * @budget: count of how many packets we should handle
5651 **/
5652static int igb_poll(struct napi_struct *napi, int budget)
5653{
5654	struct igb_q_vector *q_vector = container_of(napi,
5655	                                             struct igb_q_vector,
5656	                                             napi);
5657	bool clean_complete = true;
5658
5659#ifdef CONFIG_IGB_DCA
5660	if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
5661		igb_update_dca(q_vector);
5662#endif
5663	if (q_vector->tx.ring)
5664		clean_complete = igb_clean_tx_irq(q_vector);
5665
5666	if (q_vector->rx.ring)
5667		clean_complete &= igb_clean_rx_irq(q_vector, budget);
5668
5669	/* If all work not completed, return budget and keep polling */
5670	if (!clean_complete)
5671		return budget;
5672
5673	/* If not enough Rx work done, exit the polling mode */
5674	napi_complete(napi);
5675	igb_ring_irq_enable(q_vector);
5676
5677	return 0;
5678}
5679
5680/**
5681 * igb_systim_to_hwtstamp - convert system time value to hw timestamp
5682 * @adapter: board private structure
5683 * @shhwtstamps: timestamp structure to update
5684 * @regval: unsigned 64bit system time value.
5685 *
5686 * We need to convert the system time value stored in the RX/TXSTMP registers
5687 * into a hwtstamp which can be used by the upper level timestamping functions
5688 */
5689static void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
5690                                   struct skb_shared_hwtstamps *shhwtstamps,
5691                                   u64 regval)
5692{
5693	u64 ns;
5694
5695	/*
5696	 * The 82580 starts with 1ns at bit 0 in RX/TXSTMPL, shift this up to
5697	 * 24 to match clock shift we setup earlier.
5698	 */
5699	if (adapter->hw.mac.type >= e1000_82580)
5700		regval <<= IGB_82580_TSYNC_SHIFT;
5701
5702	ns = timecounter_cyc2time(&adapter->clock, regval);
5703	timecompare_update(&adapter->compare, ns);
5704	memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
5705	shhwtstamps->hwtstamp = ns_to_ktime(ns);
5706	shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns);
5707}
5708
5709/**
5710 * igb_tx_hwtstamp - utility function which checks for TX time stamp
5711 * @q_vector: pointer to q_vector containing needed info
5712 * @buffer: pointer to igb_tx_buffer structure
5713 *
5714 * If we were asked to do hardware stamping and such a time stamp is
5715 * available, then it must have been for this skb here because we only
5716 * allow only one such packet into the queue.
5717 */
5718static void igb_tx_hwtstamp(struct igb_q_vector *q_vector,
5719			    struct igb_tx_buffer *buffer_info)
5720{
5721	struct igb_adapter *adapter = q_vector->adapter;
5722	struct e1000_hw *hw = &adapter->hw;
5723	struct skb_shared_hwtstamps shhwtstamps;
5724	u64 regval;
5725
5726	/* if skb does not support hw timestamp or TX stamp not valid exit */
5727	if (likely(!(buffer_info->tx_flags & IGB_TX_FLAGS_TSTAMP)) ||
5728	    !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
5729		return;
5730
5731	regval = rd32(E1000_TXSTMPL);
5732	regval |= (u64)rd32(E1000_TXSTMPH) << 32;
5733
5734	igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
5735	skb_tstamp_tx(buffer_info->skb, &shhwtstamps);
5736}
5737
5738/**
5739 * igb_clean_tx_irq - Reclaim resources after transmit completes
5740 * @q_vector: pointer to q_vector containing needed info
5741 * returns true if ring is completely cleaned
5742 **/
5743static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
5744{
5745	struct igb_adapter *adapter = q_vector->adapter;
5746	struct igb_ring *tx_ring = q_vector->tx.ring;
5747	struct igb_tx_buffer *tx_buffer;
5748	union e1000_adv_tx_desc *tx_desc, *eop_desc;
5749	unsigned int total_bytes = 0, total_packets = 0;
5750	unsigned int budget = q_vector->tx.work_limit;
5751	unsigned int i = tx_ring->next_to_clean;
5752
5753	if (test_bit(__IGB_DOWN, &adapter->state))
5754		return true;
5755
5756	tx_buffer = &tx_ring->tx_buffer_info[i];
5757	tx_desc = IGB_TX_DESC(tx_ring, i);
5758	i -= tx_ring->count;
5759
5760	for (; budget; budget--) {
5761		eop_desc = tx_buffer->next_to_watch;
5762
5763		/* prevent any other reads prior to eop_desc */
5764		rmb();
5765
5766		/* if next_to_watch is not set then there is no work pending */
5767		if (!eop_desc)
5768			break;
5769
5770		/* if DD is not set pending work has not been completed */
5771		if (!(eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)))
5772			break;
5773
5774		/* clear next_to_watch to prevent false hangs */
5775		tx_buffer->next_to_watch = NULL;
5776
5777		/* update the statistics for this packet */
5778		total_bytes += tx_buffer->bytecount;
5779		total_packets += tx_buffer->gso_segs;
5780
5781		/* retrieve hardware timestamp */
5782		igb_tx_hwtstamp(q_vector, tx_buffer);
5783
5784		/* free the skb */
5785		dev_kfree_skb_any(tx_buffer->skb);
5786		tx_buffer->skb = NULL;
5787
5788		/* unmap skb header data */
5789		dma_unmap_single(tx_ring->dev,
5790				 tx_buffer->dma,
5791				 tx_buffer->length,
5792				 DMA_TO_DEVICE);
5793
5794		/* clear last DMA location and unmap remaining buffers */
5795		while (tx_desc != eop_desc) {
5796			tx_buffer->dma = 0;
5797
5798			tx_buffer++;
5799			tx_desc++;
5800			i++;
5801			if (unlikely(!i)) {
5802				i -= tx_ring->count;
5803				tx_buffer = tx_ring->tx_buffer_info;
5804				tx_desc = IGB_TX_DESC(tx_ring, 0);
5805			}
5806
5807			/* unmap any remaining paged data */
5808			if (tx_buffer->dma) {
5809				dma_unmap_page(tx_ring->dev,
5810					       tx_buffer->dma,
5811					       tx_buffer->length,
5812					       DMA_TO_DEVICE);
5813			}
5814		}
5815
5816		/* clear last DMA location */
5817		tx_buffer->dma = 0;
5818
5819		/* move us one more past the eop_desc for start of next pkt */
5820		tx_buffer++;
5821		tx_desc++;
5822		i++;
5823		if (unlikely(!i)) {
5824			i -= tx_ring->count;
5825			tx_buffer = tx_ring->tx_buffer_info;
5826			tx_desc = IGB_TX_DESC(tx_ring, 0);
5827		}
5828	}
5829
5830	netdev_tx_completed_queue(txring_txq(tx_ring),
5831				  total_packets, total_bytes);
5832	i += tx_ring->count;
5833	tx_ring->next_to_clean = i;
5834	u64_stats_update_begin(&tx_ring->tx_syncp);
5835	tx_ring->tx_stats.bytes += total_bytes;
5836	tx_ring->tx_stats.packets += total_packets;
5837	u64_stats_update_end(&tx_ring->tx_syncp);
5838	q_vector->tx.total_bytes += total_bytes;
5839	q_vector->tx.total_packets += total_packets;
5840
5841	if (test_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags)) {
5842		struct e1000_hw *hw = &adapter->hw;
5843
5844		eop_desc = tx_buffer->next_to_watch;
5845
5846		/* Detect a transmit hang in hardware, this serializes the
5847		 * check with the clearing of time_stamp and movement of i */
5848		clear_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
5849		if (eop_desc &&
5850		    time_after(jiffies, tx_buffer->time_stamp +
5851			       (adapter->tx_timeout_factor * HZ)) &&
5852		    !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
5853
5854			/* detected Tx unit hang */
5855			dev_err(tx_ring->dev,
5856				"Detected Tx Unit Hang\n"
5857				"  Tx Queue             <%d>\n"
5858				"  TDH                  <%x>\n"
5859				"  TDT                  <%x>\n"
5860				"  next_to_use          <%x>\n"
5861				"  next_to_clean        <%x>\n"
5862				"buffer_info[next_to_clean]\n"
5863				"  time_stamp           <%lx>\n"
5864				"  next_to_watch        <%p>\n"
5865				"  jiffies              <%lx>\n"
5866				"  desc.status          <%x>\n",
5867				tx_ring->queue_index,
5868				rd32(E1000_TDH(tx_ring->reg_idx)),
5869				readl(tx_ring->tail),
5870				tx_ring->next_to_use,
5871				tx_ring->next_to_clean,
5872				tx_buffer->time_stamp,
5873				eop_desc,
5874				jiffies,
5875				eop_desc->wb.status);
5876			netif_stop_subqueue(tx_ring->netdev,
5877					    tx_ring->queue_index);
5878
5879			/* we are about to reset, no point in enabling stuff */
5880			return true;
5881		}
5882	}
5883
5884	if (unlikely(total_packets &&
5885		     netif_carrier_ok(tx_ring->netdev) &&
5886		     igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
5887		/* Make sure that anybody stopping the queue after this
5888		 * sees the new next_to_clean.
5889		 */
5890		smp_mb();
5891		if (__netif_subqueue_stopped(tx_ring->netdev,
5892					     tx_ring->queue_index) &&
5893		    !(test_bit(__IGB_DOWN, &adapter->state))) {
5894			netif_wake_subqueue(tx_ring->netdev,
5895					    tx_ring->queue_index);
5896
5897			u64_stats_update_begin(&tx_ring->tx_syncp);
5898			tx_ring->tx_stats.restart_queue++;
5899			u64_stats_update_end(&tx_ring->tx_syncp);
5900		}
5901	}
5902
5903	return !!budget;
5904}
5905
5906static inline void igb_rx_checksum(struct igb_ring *ring,
5907				   union e1000_adv_rx_desc *rx_desc,
5908				   struct sk_buff *skb)
5909{
5910	skb_checksum_none_assert(skb);
5911
5912	/* Ignore Checksum bit is set */
5913	if (igb_test_staterr(rx_desc, E1000_RXD_STAT_IXSM))
5914		return;
5915
5916	/* Rx checksum disabled via ethtool */
5917	if (!(ring->netdev->features & NETIF_F_RXCSUM))
5918		return;
5919
5920	/* TCP/UDP checksum error bit is set */
5921	if (igb_test_staterr(rx_desc,
5922			     E1000_RXDEXT_STATERR_TCPE |
5923			     E1000_RXDEXT_STATERR_IPE)) {
5924		/*
5925		 * work around errata with sctp packets where the TCPE aka
5926		 * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
5927		 * packets, (aka let the stack check the crc32c)
5928		 */
5929		if (!((skb->len == 60) &&
5930		      test_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags))) {
5931			u64_stats_update_begin(&ring->rx_syncp);
5932			ring->rx_stats.csum_err++;
5933			u64_stats_update_end(&ring->rx_syncp);
5934		}
5935		/* let the stack verify checksum errors */
5936		return;
5937	}
5938	/* It must be a TCP or UDP packet with a valid checksum */
5939	if (igb_test_staterr(rx_desc, E1000_RXD_STAT_TCPCS |
5940				      E1000_RXD_STAT_UDPCS))
5941		skb->ip_summed = CHECKSUM_UNNECESSARY;
5942
5943	dev_dbg(ring->dev, "cksum success: bits %08X\n",
5944		le32_to_cpu(rx_desc->wb.upper.status_error));
5945}
5946
5947static inline void igb_rx_hash(struct igb_ring *ring,
5948			       union e1000_adv_rx_desc *rx_desc,
5949			       struct sk_buff *skb)
5950{
5951	if (ring->netdev->features & NETIF_F_RXHASH)
5952		skb->rxhash = le32_to_cpu(rx_desc->wb.lower.hi_dword.rss);
5953}
5954
5955static void igb_rx_hwtstamp(struct igb_q_vector *q_vector,
5956			    union e1000_adv_rx_desc *rx_desc,
5957			    struct sk_buff *skb)
5958{
5959	struct igb_adapter *adapter = q_vector->adapter;
5960	struct e1000_hw *hw = &adapter->hw;
5961	u64 regval;
5962
5963	if (!igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP |
5964				       E1000_RXDADV_STAT_TS))
5965		return;
5966
5967	/*
5968	 * If this bit is set, then the RX registers contain the time stamp. No
5969	 * other packet will be time stamped until we read these registers, so
5970	 * read the registers to make them available again. Because only one
5971	 * packet can be time stamped at a time, we know that the register
5972	 * values must belong to this one here and therefore we don't need to
5973	 * compare any of the additional attributes stored for it.
5974	 *
5975	 * If nothing went wrong, then it should have a shared tx_flags that we
5976	 * can turn into a skb_shared_hwtstamps.
5977	 */
5978	if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) {
5979		u32 *stamp = (u32 *)skb->data;
5980		regval = le32_to_cpu(*(stamp + 2));
5981		regval |= (u64)le32_to_cpu(*(stamp + 3)) << 32;
5982		skb_pull(skb, IGB_TS_HDR_LEN);
5983	} else {
5984		if(!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
5985			return;
5986
5987		regval = rd32(E1000_RXSTMPL);
5988		regval |= (u64)rd32(E1000_RXSTMPH) << 32;
5989	}
5990
5991	igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
5992}
5993
5994static void igb_rx_vlan(struct igb_ring *ring,
5995			union e1000_adv_rx_desc *rx_desc,
5996			struct sk_buff *skb)
5997{
5998	if (igb_test_staterr(rx_desc, E1000_RXD_STAT_VP)) {
5999		u16 vid;
6000		if (igb_test_staterr(rx_desc, E1000_RXDEXT_STATERR_LB) &&
6001		    test_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &ring->flags))
6002			vid = be16_to_cpu(rx_desc->wb.upper.vlan);
6003		else
6004			vid = le16_to_cpu(rx_desc->wb.upper.vlan);
6005
6006		__vlan_hwaccel_put_tag(skb, vid);
6007	}
6008}
6009
6010static inline u16 igb_get_hlen(union e1000_adv_rx_desc *rx_desc)
6011{
6012	/* HW will not DMA in data larger than the given buffer, even if it
6013	 * parses the (NFS, of course) header to be larger.  In that case, it
6014	 * fills the header buffer and spills the rest into the page.
6015	 */
6016	u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
6017	           E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
6018	if (hlen > IGB_RX_HDR_LEN)
6019		hlen = IGB_RX_HDR_LEN;
6020	return hlen;
6021}
6022
6023static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, int budget)
6024{
6025	struct igb_ring *rx_ring = q_vector->rx.ring;
6026	union e1000_adv_rx_desc *rx_desc;
6027	const int current_node = numa_node_id();
6028	unsigned int total_bytes = 0, total_packets = 0;
6029	u16 cleaned_count = igb_desc_unused(rx_ring);
6030	u16 i = rx_ring->next_to_clean;
6031
6032	rx_desc = IGB_RX_DESC(rx_ring, i);
6033
6034	while (igb_test_staterr(rx_desc, E1000_RXD_STAT_DD)) {
6035		struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
6036		struct sk_buff *skb = buffer_info->skb;
6037		union e1000_adv_rx_desc *next_rxd;
6038
6039		buffer_info->skb = NULL;
6040		prefetch(skb->data);
6041
6042		i++;
6043		if (i == rx_ring->count)
6044			i = 0;
6045
6046		next_rxd = IGB_RX_DESC(rx_ring, i);
6047		prefetch(next_rxd);
6048
6049		/*
6050		 * This memory barrier is needed to keep us from reading
6051		 * any other fields out of the rx_desc until we know the
6052		 * RXD_STAT_DD bit is set
6053		 */
6054		rmb();
6055
6056		if (!skb_is_nonlinear(skb)) {
6057			__skb_put(skb, igb_get_hlen(rx_desc));
6058			dma_unmap_single(rx_ring->dev, buffer_info->dma,
6059					 IGB_RX_HDR_LEN,
6060					 DMA_FROM_DEVICE);
6061			buffer_info->dma = 0;
6062		}
6063
6064		if (rx_desc->wb.upper.length) {
6065			u16 length = le16_to_cpu(rx_desc->wb.upper.length);
6066
6067			skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
6068						buffer_info->page,
6069						buffer_info->page_offset,
6070						length);
6071
6072			skb->len += length;
6073			skb->data_len += length;
6074			skb->truesize += PAGE_SIZE / 2;
6075
6076			if ((page_count(buffer_info->page) != 1) ||
6077			    (page_to_nid(buffer_info->page) != current_node))
6078				buffer_info->page = NULL;
6079			else
6080				get_page(buffer_info->page);
6081
6082			dma_unmap_page(rx_ring->dev, buffer_info->page_dma,
6083				       PAGE_SIZE / 2, DMA_FROM_DEVICE);
6084			buffer_info->page_dma = 0;
6085		}
6086
6087		if (!igb_test_staterr(rx_desc, E1000_RXD_STAT_EOP)) {
6088			struct igb_rx_buffer *next_buffer;
6089			next_buffer = &rx_ring->rx_buffer_info[i];
6090			buffer_info->skb = next_buffer->skb;
6091			buffer_info->dma = next_buffer->dma;
6092			next_buffer->skb = skb;
6093			next_buffer->dma = 0;
6094			goto next_desc;
6095		}
6096
6097		if (igb_test_staterr(rx_desc,
6098				     E1000_RXDEXT_ERR_FRAME_ERR_MASK)) {
6099			dev_kfree_skb_any(skb);
6100			goto next_desc;
6101		}
6102
6103		igb_rx_hwtstamp(q_vector, rx_desc, skb);
6104		igb_rx_hash(rx_ring, rx_desc, skb);
6105		igb_rx_checksum(rx_ring, rx_desc, skb);
6106		igb_rx_vlan(rx_ring, rx_desc, skb);
6107
6108		total_bytes += skb->len;
6109		total_packets++;
6110
6111		skb->protocol = eth_type_trans(skb, rx_ring->netdev);
6112
6113		napi_gro_receive(&q_vector->napi, skb);
6114
6115		budget--;
6116next_desc:
6117		if (!budget)
6118			break;
6119
6120		cleaned_count++;
6121		/* return some buffers to hardware, one at a time is too slow */
6122		if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
6123			igb_alloc_rx_buffers(rx_ring, cleaned_count);
6124			cleaned_count = 0;
6125		}
6126
6127		/* use prefetched values */
6128		rx_desc = next_rxd;
6129	}
6130
6131	rx_ring->next_to_clean = i;
6132	u64_stats_update_begin(&rx_ring->rx_syncp);
6133	rx_ring->rx_stats.packets += total_packets;
6134	rx_ring->rx_stats.bytes += total_bytes;
6135	u64_stats_update_end(&rx_ring->rx_syncp);
6136	q_vector->rx.total_packets += total_packets;
6137	q_vector->rx.total_bytes += total_bytes;
6138
6139	if (cleaned_count)
6140		igb_alloc_rx_buffers(rx_ring, cleaned_count);
6141
6142	return !!budget;
6143}
6144
6145static bool igb_alloc_mapped_skb(struct igb_ring *rx_ring,
6146				 struct igb_rx_buffer *bi)
6147{
6148	struct sk_buff *skb = bi->skb;
6149	dma_addr_t dma = bi->dma;
6150
6151	if (dma)
6152		return true;
6153
6154	if (likely(!skb)) {
6155		skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
6156						IGB_RX_HDR_LEN);
6157		bi->skb = skb;
6158		if (!skb) {
6159			rx_ring->rx_stats.alloc_failed++;
6160			return false;
6161		}
6162
6163		/* initialize skb for ring */
6164		skb_record_rx_queue(skb, rx_ring->queue_index);
6165	}
6166
6167	dma = dma_map_single(rx_ring->dev, skb->data,
6168			     IGB_RX_HDR_LEN, DMA_FROM_DEVICE);
6169
6170	if (dma_mapping_error(rx_ring->dev, dma)) {
6171		rx_ring->rx_stats.alloc_failed++;
6172		return false;
6173	}
6174
6175	bi->dma = dma;
6176	return true;
6177}
6178
6179static bool igb_alloc_mapped_page(struct igb_ring *rx_ring,
6180				  struct igb_rx_buffer *bi)
6181{
6182	struct page *page = bi->page;
6183	dma_addr_t page_dma = bi->page_dma;
6184	unsigned int page_offset = bi->page_offset ^ (PAGE_SIZE / 2);
6185
6186	if (page_dma)
6187		return true;
6188
6189	if (!page) {
6190		page = alloc_page(GFP_ATOMIC | __GFP_COLD);
6191		bi->page = page;
6192		if (unlikely(!page)) {
6193			rx_ring->rx_stats.alloc_failed++;
6194			return false;
6195		}
6196	}
6197
6198	page_dma = dma_map_page(rx_ring->dev, page,
6199				page_offset, PAGE_SIZE / 2,
6200				DMA_FROM_DEVICE);
6201
6202	if (dma_mapping_error(rx_ring->dev, page_dma)) {
6203		rx_ring->rx_stats.alloc_failed++;
6204		return false;
6205	}
6206
6207	bi->page_dma = page_dma;
6208	bi->page_offset = page_offset;
6209	return true;
6210}
6211
6212/**
6213 * igb_alloc_rx_buffers - Replace used receive buffers; packet split
6214 * @adapter: address of board private structure
6215 **/
6216void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count)
6217{
6218	union e1000_adv_rx_desc *rx_desc;
6219	struct igb_rx_buffer *bi;
6220	u16 i = rx_ring->next_to_use;
6221
6222	rx_desc = IGB_RX_DESC(rx_ring, i);
6223	bi = &rx_ring->rx_buffer_info[i];
6224	i -= rx_ring->count;
6225
6226	while (cleaned_count--) {
6227		if (!igb_alloc_mapped_skb(rx_ring, bi))
6228			break;
6229
6230		/* Refresh the desc even if buffer_addrs didn't change
6231		 * because each write-back erases this info. */
6232		rx_desc->read.hdr_addr = cpu_to_le64(bi->dma);
6233
6234		if (!igb_alloc_mapped_page(rx_ring, bi))
6235			break;
6236
6237		rx_desc->read.pkt_addr = cpu_to_le64(bi->page_dma);
6238
6239		rx_desc++;
6240		bi++;
6241		i++;
6242		if (unlikely(!i)) {
6243			rx_desc = IGB_RX_DESC(rx_ring, 0);
6244			bi = rx_ring->rx_buffer_info;
6245			i -= rx_ring->count;
6246		}
6247
6248		/* clear the hdr_addr for the next_to_use descriptor */
6249		rx_desc->read.hdr_addr = 0;
6250	}
6251
6252	i += rx_ring->count;
6253
6254	if (rx_ring->next_to_use != i) {
6255		rx_ring->next_to_use = i;
6256
6257		/* Force memory writes to complete before letting h/w
6258		 * know there are new descriptors to fetch.  (Only
6259		 * applicable for weak-ordered memory model archs,
6260		 * such as IA-64). */
6261		wmb();
6262		writel(i, rx_ring->tail);
6263	}
6264}
6265
6266/**
6267 * igb_mii_ioctl -
6268 * @netdev:
6269 * @ifreq:
6270 * @cmd:
6271 **/
6272static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6273{
6274	struct igb_adapter *adapter = netdev_priv(netdev);
6275	struct mii_ioctl_data *data = if_mii(ifr);
6276
6277	if (adapter->hw.phy.media_type != e1000_media_type_copper)
6278		return -EOPNOTSUPP;
6279
6280	switch (cmd) {
6281	case SIOCGMIIPHY:
6282		data->phy_id = adapter->hw.phy.addr;
6283		break;
6284	case SIOCGMIIREG:
6285		if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
6286		                     &data->val_out))
6287			return -EIO;
6288		break;
6289	case SIOCSMIIREG:
6290	default:
6291		return -EOPNOTSUPP;
6292	}
6293	return 0;
6294}
6295
6296/**
6297 * igb_hwtstamp_ioctl - control hardware time stamping
6298 * @netdev:
6299 * @ifreq:
6300 * @cmd:
6301 *
6302 * Outgoing time stamping can be enabled and disabled. Play nice and
6303 * disable it when requested, although it shouldn't case any overhead
6304 * when no packet needs it. At most one packet in the queue may be
6305 * marked for time stamping, otherwise it would be impossible to tell
6306 * for sure to which packet the hardware time stamp belongs.
6307 *
6308 * Incoming time stamping has to be configured via the hardware
6309 * filters. Not all combinations are supported, in particular event
6310 * type has to be specified. Matching the kind of event packet is
6311 * not supported, with the exception of "all V2 events regardless of
6312 * level 2 or 4".
6313 *
6314 **/
6315static int igb_hwtstamp_ioctl(struct net_device *netdev,
6316			      struct ifreq *ifr, int cmd)
6317{
6318	struct igb_adapter *adapter = netdev_priv(netdev);
6319	struct e1000_hw *hw = &adapter->hw;
6320	struct hwtstamp_config config;
6321	u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
6322	u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6323	u32 tsync_rx_cfg = 0;
6324	bool is_l4 = false;
6325	bool is_l2 = false;
6326	u32 regval;
6327
6328	if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
6329		return -EFAULT;
6330
6331	/* reserved for future extensions */
6332	if (config.flags)
6333		return -EINVAL;
6334
6335	switch (config.tx_type) {
6336	case HWTSTAMP_TX_OFF:
6337		tsync_tx_ctl = 0;
6338	case HWTSTAMP_TX_ON:
6339		break;
6340	default:
6341		return -ERANGE;
6342	}
6343
6344	switch (config.rx_filter) {
6345	case HWTSTAMP_FILTER_NONE:
6346		tsync_rx_ctl = 0;
6347		break;
6348	case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
6349	case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
6350	case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
6351	case HWTSTAMP_FILTER_ALL:
6352		/*
6353		 * register TSYNCRXCFG must be set, therefore it is not
6354		 * possible to time stamp both Sync and Delay_Req messages
6355		 * => fall back to time stamping all packets
6356		 */
6357		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6358		config.rx_filter = HWTSTAMP_FILTER_ALL;
6359		break;
6360	case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
6361		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6362		tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
6363		is_l4 = true;
6364		break;
6365	case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
6366		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6367		tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
6368		is_l4 = true;
6369		break;
6370	case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
6371	case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
6372		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6373		tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
6374		is_l2 = true;
6375		is_l4 = true;
6376		config.rx_filter = HWTSTAMP_FILTER_SOME;
6377		break;
6378	case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
6379	case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
6380		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6381		tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
6382		is_l2 = true;
6383		is_l4 = true;
6384		config.rx_filter = HWTSTAMP_FILTER_SOME;
6385		break;
6386	case HWTSTAMP_FILTER_PTP_V2_EVENT:
6387	case HWTSTAMP_FILTER_PTP_V2_SYNC:
6388	case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
6389		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
6390		config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
6391		is_l2 = true;
6392		is_l4 = true;
6393		break;
6394	default:
6395		return -ERANGE;
6396	}
6397
6398	if (hw->mac.type == e1000_82575) {
6399		if (tsync_rx_ctl | tsync_tx_ctl)
6400			return -EINVAL;
6401		return 0;
6402	}
6403
6404	/*
6405	 * Per-packet timestamping only works if all packets are
6406	 * timestamped, so enable timestamping in all packets as
6407	 * long as one rx filter was configured.
6408	 */
6409	if ((hw->mac.type >= e1000_82580) && tsync_rx_ctl) {
6410		tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6411		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6412	}
6413
6414	/* enable/disable TX */
6415	regval = rd32(E1000_TSYNCTXCTL);
6416	regval &= ~E1000_TSYNCTXCTL_ENABLED;
6417	regval |= tsync_tx_ctl;
6418	wr32(E1000_TSYNCTXCTL, regval);
6419
6420	/* enable/disable RX */
6421	regval = rd32(E1000_TSYNCRXCTL);
6422	regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
6423	regval |= tsync_rx_ctl;
6424	wr32(E1000_TSYNCRXCTL, regval);
6425
6426	/* define which PTP packets are time stamped */
6427	wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
6428
6429	/* define ethertype filter for timestamped packets */
6430	if (is_l2)
6431		wr32(E1000_ETQF(3),
6432		                (E1000_ETQF_FILTER_ENABLE | /* enable filter */
6433		                 E1000_ETQF_1588 | /* enable timestamping */
6434		                 ETH_P_1588));     /* 1588 eth protocol type */
6435	else
6436		wr32(E1000_ETQF(3), 0);
6437
6438#define PTP_PORT 319
6439	/* L4 Queue Filter[3]: filter by destination port and protocol */
6440	if (is_l4) {
6441		u32 ftqf = (IPPROTO_UDP /* UDP */
6442			| E1000_FTQF_VF_BP /* VF not compared */
6443			| E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
6444			| E1000_FTQF_MASK); /* mask all inputs */
6445		ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
6446
6447		wr32(E1000_IMIR(3), htons(PTP_PORT));
6448		wr32(E1000_IMIREXT(3),
6449		     (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
6450		if (hw->mac.type == e1000_82576) {
6451			/* enable source port check */
6452			wr32(E1000_SPQF(3), htons(PTP_PORT));
6453			ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
6454		}
6455		wr32(E1000_FTQF(3), ftqf);
6456	} else {
6457		wr32(E1000_FTQF(3), E1000_FTQF_MASK);
6458	}
6459	wrfl();
6460
6461	adapter->hwtstamp_config = config;
6462
6463	/* clear TX/RX time stamp registers, just to be sure */
6464	regval = rd32(E1000_TXSTMPH);
6465	regval = rd32(E1000_RXSTMPH);
6466
6467	return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
6468		-EFAULT : 0;
6469}
6470
6471/**
6472 * igb_ioctl -
6473 * @netdev:
6474 * @ifreq:
6475 * @cmd:
6476 **/
6477static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6478{
6479	switch (cmd) {
6480	case SIOCGMIIPHY:
6481	case SIOCGMIIREG:
6482	case SIOCSMIIREG:
6483		return igb_mii_ioctl(netdev, ifr, cmd);
6484	case SIOCSHWTSTAMP:
6485		return igb_hwtstamp_ioctl(netdev, ifr, cmd);
6486	default:
6487		return -EOPNOTSUPP;
6488	}
6489}
6490
6491s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6492{
6493	struct igb_adapter *adapter = hw->back;
6494	u16 cap_offset;
6495
6496	cap_offset = adapter->pdev->pcie_cap;
6497	if (!cap_offset)
6498		return -E1000_ERR_CONFIG;
6499
6500	pci_read_config_word(adapter->pdev, cap_offset + reg, value);
6501
6502	return 0;
6503}
6504
6505s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6506{
6507	struct igb_adapter *adapter = hw->back;
6508	u16 cap_offset;
6509
6510	cap_offset = adapter->pdev->pcie_cap;
6511	if (!cap_offset)
6512		return -E1000_ERR_CONFIG;
6513
6514	pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
6515
6516	return 0;
6517}
6518
6519static void igb_vlan_mode(struct net_device *netdev, netdev_features_t features)
6520{
6521	struct igb_adapter *adapter = netdev_priv(netdev);
6522	struct e1000_hw *hw = &adapter->hw;
6523	u32 ctrl, rctl;
6524	bool enable = !!(features & NETIF_F_HW_VLAN_RX);
6525
6526	if (enable) {
6527		/* enable VLAN tag insert/strip */
6528		ctrl = rd32(E1000_CTRL);
6529		ctrl |= E1000_CTRL_VME;
6530		wr32(E1000_CTRL, ctrl);
6531
6532		/* Disable CFI check */
6533		rctl = rd32(E1000_RCTL);
6534		rctl &= ~E1000_RCTL_CFIEN;
6535		wr32(E1000_RCTL, rctl);
6536	} else {
6537		/* disable VLAN tag insert/strip */
6538		ctrl = rd32(E1000_CTRL);
6539		ctrl &= ~E1000_CTRL_VME;
6540		wr32(E1000_CTRL, ctrl);
6541	}
6542
6543	igb_rlpml_set(adapter);
6544}
6545
6546static int igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
6547{
6548	struct igb_adapter *adapter = netdev_priv(netdev);
6549	struct e1000_hw *hw = &adapter->hw;
6550	int pf_id = adapter->vfs_allocated_count;
6551
6552	/* attempt to add filter to vlvf array */
6553	igb_vlvf_set(adapter, vid, true, pf_id);
6554
6555	/* add the filter since PF can receive vlans w/o entry in vlvf */
6556	igb_vfta_set(hw, vid, true);
6557
6558	set_bit(vid, adapter->active_vlans);
6559
6560	return 0;
6561}
6562
6563static int igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
6564{
6565	struct igb_adapter *adapter = netdev_priv(netdev);
6566	struct e1000_hw *hw = &adapter->hw;
6567	int pf_id = adapter->vfs_allocated_count;
6568	s32 err;
6569
6570	/* remove vlan from VLVF table array */
6571	err = igb_vlvf_set(adapter, vid, false, pf_id);
6572
6573	/* if vid was not present in VLVF just remove it from table */
6574	if (err)
6575		igb_vfta_set(hw, vid, false);
6576
6577	clear_bit(vid, adapter->active_vlans);
6578
6579	return 0;
6580}
6581
6582static void igb_restore_vlan(struct igb_adapter *adapter)
6583{
6584	u16 vid;
6585
6586	igb_vlan_mode(adapter->netdev, adapter->netdev->features);
6587
6588	for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID)
6589		igb_vlan_rx_add_vid(adapter->netdev, vid);
6590}
6591
6592int igb_set_spd_dplx(struct igb_adapter *adapter, u32 spd, u8 dplx)
6593{
6594	struct pci_dev *pdev = adapter->pdev;
6595	struct e1000_mac_info *mac = &adapter->hw.mac;
6596
6597	mac->autoneg = 0;
6598
6599	/* Make sure dplx is at most 1 bit and lsb of speed is not set
6600	 * for the switch() below to work */
6601	if ((spd & 1) || (dplx & ~1))
6602		goto err_inval;
6603
6604	/* Fiber NIC's only allow 1000 Gbps Full duplex */
6605	if ((adapter->hw.phy.media_type == e1000_media_type_internal_serdes) &&
6606	    spd != SPEED_1000 &&
6607	    dplx != DUPLEX_FULL)
6608		goto err_inval;
6609
6610	switch (spd + dplx) {
6611	case SPEED_10 + DUPLEX_HALF:
6612		mac->forced_speed_duplex = ADVERTISE_10_HALF;
6613		break;
6614	case SPEED_10 + DUPLEX_FULL:
6615		mac->forced_speed_duplex = ADVERTISE_10_FULL;
6616		break;
6617	case SPEED_100 + DUPLEX_HALF:
6618		mac->forced_speed_duplex = ADVERTISE_100_HALF;
6619		break;
6620	case SPEED_100 + DUPLEX_FULL:
6621		mac->forced_speed_duplex = ADVERTISE_100_FULL;
6622		break;
6623	case SPEED_1000 + DUPLEX_FULL:
6624		mac->autoneg = 1;
6625		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
6626		break;
6627	case SPEED_1000 + DUPLEX_HALF: /* not supported */
6628	default:
6629		goto err_inval;
6630	}
6631	return 0;
6632
6633err_inval:
6634	dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
6635	return -EINVAL;
6636}
6637
6638static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake,
6639			  bool runtime)
6640{
6641	struct net_device *netdev = pci_get_drvdata(pdev);
6642	struct igb_adapter *adapter = netdev_priv(netdev);
6643	struct e1000_hw *hw = &adapter->hw;
6644	u32 ctrl, rctl, status;
6645	u32 wufc = runtime ? E1000_WUFC_LNKC : adapter->wol;
6646#ifdef CONFIG_PM
6647	int retval = 0;
6648#endif
6649
6650	netif_device_detach(netdev);
6651
6652	if (netif_running(netdev))
6653		__igb_close(netdev, true);
6654
6655	igb_clear_interrupt_scheme(adapter);
6656
6657#ifdef CONFIG_PM
6658	retval = pci_save_state(pdev);
6659	if (retval)
6660		return retval;
6661#endif
6662
6663	status = rd32(E1000_STATUS);
6664	if (status & E1000_STATUS_LU)
6665		wufc &= ~E1000_WUFC_LNKC;
6666
6667	if (wufc) {
6668		igb_setup_rctl(adapter);
6669		igb_set_rx_mode(netdev);
6670
6671		/* turn on all-multi mode if wake on multicast is enabled */
6672		if (wufc & E1000_WUFC_MC) {
6673			rctl = rd32(E1000_RCTL);
6674			rctl |= E1000_RCTL_MPE;
6675			wr32(E1000_RCTL, rctl);
6676		}
6677
6678		ctrl = rd32(E1000_CTRL);
6679		/* advertise wake from D3Cold */
6680		#define E1000_CTRL_ADVD3WUC 0x00100000
6681		/* phy power management enable */
6682		#define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
6683		ctrl |= E1000_CTRL_ADVD3WUC;
6684		wr32(E1000_CTRL, ctrl);
6685
6686		/* Allow time for pending master requests to run */
6687		igb_disable_pcie_master(hw);
6688
6689		wr32(E1000_WUC, E1000_WUC_PME_EN);
6690		wr32(E1000_WUFC, wufc);
6691	} else {
6692		wr32(E1000_WUC, 0);
6693		wr32(E1000_WUFC, 0);
6694	}
6695
6696	*enable_wake = wufc || adapter->en_mng_pt;
6697	if (!*enable_wake)
6698		igb_power_down_link(adapter);
6699	else
6700		igb_power_up_link(adapter);
6701
6702	/* Release control of h/w to f/w.  If f/w is AMT enabled, this
6703	 * would have already happened in close and is redundant. */
6704	igb_release_hw_control(adapter);
6705
6706	pci_disable_device(pdev);
6707
6708	return 0;
6709}
6710
6711#ifdef CONFIG_PM
6712static int igb_suspend(struct device *dev)
6713{
6714	int retval;
6715	bool wake;
6716	struct pci_dev *pdev = to_pci_dev(dev);
6717
6718	retval = __igb_shutdown(pdev, &wake, 0);
6719	if (retval)
6720		return retval;
6721
6722	if (wake) {
6723		pci_prepare_to_sleep(pdev);
6724	} else {
6725		pci_wake_from_d3(pdev, false);
6726		pci_set_power_state(pdev, PCI_D3hot);
6727	}
6728
6729	return 0;
6730}
6731
6732static int igb_resume(struct device *dev)
6733{
6734	struct pci_dev *pdev = to_pci_dev(dev);
6735	struct net_device *netdev = pci_get_drvdata(pdev);
6736	struct igb_adapter *adapter = netdev_priv(netdev);
6737	struct e1000_hw *hw = &adapter->hw;
6738	u32 err;
6739
6740	pci_set_power_state(pdev, PCI_D0);
6741	pci_restore_state(pdev);
6742	pci_save_state(pdev);
6743
6744	err = pci_enable_device_mem(pdev);
6745	if (err) {
6746		dev_err(&pdev->dev,
6747			"igb: Cannot enable PCI device from suspend\n");
6748		return err;
6749	}
6750	pci_set_master(pdev);
6751
6752	pci_enable_wake(pdev, PCI_D3hot, 0);
6753	pci_enable_wake(pdev, PCI_D3cold, 0);
6754
6755	if (!rtnl_is_locked()) {
6756		/*
6757		 * shut up ASSERT_RTNL() warning in
6758		 * netif_set_real_num_tx/rx_queues.
6759		 */
6760		rtnl_lock();
6761		err = igb_init_interrupt_scheme(adapter);
6762		rtnl_unlock();
6763	} else {
6764		err = igb_init_interrupt_scheme(adapter);
6765	}
6766	if (err) {
6767		dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
6768		return -ENOMEM;
6769	}
6770
6771	igb_reset(adapter);
6772
6773	/* let the f/w know that the h/w is now under the control of the
6774	 * driver. */
6775	igb_get_hw_control(adapter);
6776
6777	wr32(E1000_WUS, ~0);
6778
6779	if (netdev->flags & IFF_UP) {
6780		err = __igb_open(netdev, true);
6781		if (err)
6782			return err;
6783	}
6784
6785	netif_device_attach(netdev);
6786	return 0;
6787}
6788
6789#ifdef CONFIG_PM_RUNTIME
6790static int igb_runtime_idle(struct device *dev)
6791{
6792	struct pci_dev *pdev = to_pci_dev(dev);
6793	struct net_device *netdev = pci_get_drvdata(pdev);
6794	struct igb_adapter *adapter = netdev_priv(netdev);
6795
6796	if (!igb_has_link(adapter))
6797		pm_schedule_suspend(dev, MSEC_PER_SEC * 5);
6798
6799	return -EBUSY;
6800}
6801
6802static int igb_runtime_suspend(struct device *dev)
6803{
6804	struct pci_dev *pdev = to_pci_dev(dev);
6805	int retval;
6806	bool wake;
6807
6808	retval = __igb_shutdown(pdev, &wake, 1);
6809	if (retval)
6810		return retval;
6811
6812	if (wake) {
6813		pci_prepare_to_sleep(pdev);
6814	} else {
6815		pci_wake_from_d3(pdev, false);
6816		pci_set_power_state(pdev, PCI_D3hot);
6817	}
6818
6819	return 0;
6820}
6821
6822static int igb_runtime_resume(struct device *dev)
6823{
6824	return igb_resume(dev);
6825}
6826#endif /* CONFIG_PM_RUNTIME */
6827#endif
6828
6829static void igb_shutdown(struct pci_dev *pdev)
6830{
6831	bool wake;
6832
6833	__igb_shutdown(pdev, &wake, 0);
6834
6835	if (system_state == SYSTEM_POWER_OFF) {
6836		pci_wake_from_d3(pdev, wake);
6837		pci_set_power_state(pdev, PCI_D3hot);
6838	}
6839}
6840
6841#ifdef CONFIG_NET_POLL_CONTROLLER
6842/*
6843 * Polling 'interrupt' - used by things like netconsole to send skbs
6844 * without having to re-enable interrupts. It's not called while
6845 * the interrupt routine is executing.
6846 */
6847static void igb_netpoll(struct net_device *netdev)
6848{
6849	struct igb_adapter *adapter = netdev_priv(netdev);
6850	struct e1000_hw *hw = &adapter->hw;
6851	struct igb_q_vector *q_vector;
6852	int i;
6853
6854	for (i = 0; i < adapter->num_q_vectors; i++) {
6855		q_vector = adapter->q_vector[i];
6856		if (adapter->msix_entries)
6857			wr32(E1000_EIMC, q_vector->eims_value);
6858		else
6859			igb_irq_disable(adapter);
6860		napi_schedule(&q_vector->napi);
6861	}
6862}
6863#endif /* CONFIG_NET_POLL_CONTROLLER */
6864
6865/**
6866 * igb_io_error_detected - called when PCI error is detected
6867 * @pdev: Pointer to PCI device
6868 * @state: The current pci connection state
6869 *
6870 * This function is called after a PCI bus error affecting
6871 * this device has been detected.
6872 */
6873static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
6874					      pci_channel_state_t state)
6875{
6876	struct net_device *netdev = pci_get_drvdata(pdev);
6877	struct igb_adapter *adapter = netdev_priv(netdev);
6878
6879	netif_device_detach(netdev);
6880
6881	if (state == pci_channel_io_perm_failure)
6882		return PCI_ERS_RESULT_DISCONNECT;
6883
6884	if (netif_running(netdev))
6885		igb_down(adapter);
6886	pci_disable_device(pdev);
6887
6888	/* Request a slot slot reset. */
6889	return PCI_ERS_RESULT_NEED_RESET;
6890}
6891
6892/**
6893 * igb_io_slot_reset - called after the pci bus has been reset.
6894 * @pdev: Pointer to PCI device
6895 *
6896 * Restart the card from scratch, as if from a cold-boot. Implementation
6897 * resembles the first-half of the igb_resume routine.
6898 */
6899static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
6900{
6901	struct net_device *netdev = pci_get_drvdata(pdev);
6902	struct igb_adapter *adapter = netdev_priv(netdev);
6903	struct e1000_hw *hw = &adapter->hw;
6904	pci_ers_result_t result;
6905	int err;
6906
6907	if (pci_enable_device_mem(pdev)) {
6908		dev_err(&pdev->dev,
6909			"Cannot re-enable PCI device after reset.\n");
6910		result = PCI_ERS_RESULT_DISCONNECT;
6911	} else {
6912		pci_set_master(pdev);
6913		pci_restore_state(pdev);
6914		pci_save_state(pdev);
6915
6916		pci_enable_wake(pdev, PCI_D3hot, 0);
6917		pci_enable_wake(pdev, PCI_D3cold, 0);
6918
6919		igb_reset(adapter);
6920		wr32(E1000_WUS, ~0);
6921		result = PCI_ERS_RESULT_RECOVERED;
6922	}
6923
6924	err = pci_cleanup_aer_uncorrect_error_status(pdev);
6925	if (err) {
6926		dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
6927		        "failed 0x%0x\n", err);
6928		/* non-fatal, continue */
6929	}
6930
6931	return result;
6932}
6933
6934/**
6935 * igb_io_resume - called when traffic can start flowing again.
6936 * @pdev: Pointer to PCI device
6937 *
6938 * This callback is called when the error recovery driver tells us that
6939 * its OK to resume normal operation. Implementation resembles the
6940 * second-half of the igb_resume routine.
6941 */
6942static void igb_io_resume(struct pci_dev *pdev)
6943{
6944	struct net_device *netdev = pci_get_drvdata(pdev);
6945	struct igb_adapter *adapter = netdev_priv(netdev);
6946
6947	if (netif_running(netdev)) {
6948		if (igb_up(adapter)) {
6949			dev_err(&pdev->dev, "igb_up failed after reset\n");
6950			return;
6951		}
6952	}
6953
6954	netif_device_attach(netdev);
6955
6956	/* let the f/w know that the h/w is now under the control of the
6957	 * driver. */
6958	igb_get_hw_control(adapter);
6959}
6960
6961static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
6962                             u8 qsel)
6963{
6964	u32 rar_low, rar_high;
6965	struct e1000_hw *hw = &adapter->hw;
6966
6967	/* HW expects these in little endian so we reverse the byte order
6968	 * from network order (big endian) to little endian
6969	 */
6970	rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
6971	          ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
6972	rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
6973
6974	/* Indicate to hardware the Address is Valid. */
6975	rar_high |= E1000_RAH_AV;
6976
6977	if (hw->mac.type == e1000_82575)
6978		rar_high |= E1000_RAH_POOL_1 * qsel;
6979	else
6980		rar_high |= E1000_RAH_POOL_1 << qsel;
6981
6982	wr32(E1000_RAL(index), rar_low);
6983	wrfl();
6984	wr32(E1000_RAH(index), rar_high);
6985	wrfl();
6986}
6987
6988static int igb_set_vf_mac(struct igb_adapter *adapter,
6989                          int vf, unsigned char *mac_addr)
6990{
6991	struct e1000_hw *hw = &adapter->hw;
6992	/* VF MAC addresses start at end of receive addresses and moves
6993	 * torwards the first, as a result a collision should not be possible */
6994	int rar_entry = hw->mac.rar_entry_count - (vf + 1);
6995
6996	memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
6997
6998	igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
6999
7000	return 0;
7001}
7002
7003static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
7004{
7005	struct igb_adapter *adapter = netdev_priv(netdev);
7006	if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
7007		return -EINVAL;
7008	adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
7009	dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
7010	dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
7011				      " change effective.");
7012	if (test_bit(__IGB_DOWN, &adapter->state)) {
7013		dev_warn(&adapter->pdev->dev, "The VF MAC address has been set,"
7014			 " but the PF device is not up.\n");
7015		dev_warn(&adapter->pdev->dev, "Bring the PF device up before"
7016			 " attempting to use the VF device.\n");
7017	}
7018	return igb_set_vf_mac(adapter, vf, mac);
7019}
7020
7021static int igb_link_mbps(int internal_link_speed)
7022{
7023	switch (internal_link_speed) {
7024	case SPEED_100:
7025		return 100;
7026	case SPEED_1000:
7027		return 1000;
7028	default:
7029		return 0;
7030	}
7031}
7032
7033static void igb_set_vf_rate_limit(struct e1000_hw *hw, int vf, int tx_rate,
7034				  int link_speed)
7035{
7036	int rf_dec, rf_int;
7037	u32 bcnrc_val;
7038
7039	if (tx_rate != 0) {
7040		/* Calculate the rate factor values to set */
7041		rf_int = link_speed / tx_rate;
7042		rf_dec = (link_speed - (rf_int * tx_rate));
7043		rf_dec = (rf_dec * (1<<E1000_RTTBCNRC_RF_INT_SHIFT)) / tx_rate;
7044
7045		bcnrc_val = E1000_RTTBCNRC_RS_ENA;
7046		bcnrc_val |= ((rf_int<<E1000_RTTBCNRC_RF_INT_SHIFT) &
7047		               E1000_RTTBCNRC_RF_INT_MASK);
7048		bcnrc_val |= (rf_dec & E1000_RTTBCNRC_RF_DEC_MASK);
7049	} else {
7050		bcnrc_val = 0;
7051	}
7052
7053	wr32(E1000_RTTDQSEL, vf); /* vf X uses queue X */
7054	wr32(E1000_RTTBCNRC, bcnrc_val);
7055}
7056
7057static void igb_check_vf_rate_limit(struct igb_adapter *adapter)
7058{
7059	int actual_link_speed, i;
7060	bool reset_rate = false;
7061
7062	/* VF TX rate limit was not set or not supported */
7063	if ((adapter->vf_rate_link_speed == 0) ||
7064	    (adapter->hw.mac.type != e1000_82576))
7065		return;
7066
7067	actual_link_speed = igb_link_mbps(adapter->link_speed);
7068	if (actual_link_speed != adapter->vf_rate_link_speed) {
7069		reset_rate = true;
7070		adapter->vf_rate_link_speed = 0;
7071		dev_info(&adapter->pdev->dev,
7072		         "Link speed has been changed. VF Transmit "
7073		         "rate is disabled\n");
7074	}
7075
7076	for (i = 0; i < adapter->vfs_allocated_count; i++) {
7077		if (reset_rate)
7078			adapter->vf_data[i].tx_rate = 0;
7079
7080		igb_set_vf_rate_limit(&adapter->hw, i,
7081		                      adapter->vf_data[i].tx_rate,
7082		                      actual_link_speed);
7083	}
7084}
7085
7086static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
7087{
7088	struct igb_adapter *adapter = netdev_priv(netdev);
7089	struct e1000_hw *hw = &adapter->hw;
7090	int actual_link_speed;
7091
7092	if (hw->mac.type != e1000_82576)
7093		return -EOPNOTSUPP;
7094
7095	actual_link_speed = igb_link_mbps(adapter->link_speed);
7096	if ((vf >= adapter->vfs_allocated_count) ||
7097	    (!(rd32(E1000_STATUS) & E1000_STATUS_LU)) ||
7098	    (tx_rate < 0) || (tx_rate > actual_link_speed))
7099		return -EINVAL;
7100
7101	adapter->vf_rate_link_speed = actual_link_speed;
7102	adapter->vf_data[vf].tx_rate = (u16)tx_rate;
7103	igb_set_vf_rate_limit(hw, vf, tx_rate, actual_link_speed);
7104
7105	return 0;
7106}
7107
7108static int igb_ndo_get_vf_config(struct net_device *netdev,
7109				 int vf, struct ifla_vf_info *ivi)
7110{
7111	struct igb_adapter *adapter = netdev_priv(netdev);
7112	if (vf >= adapter->vfs_allocated_count)
7113		return -EINVAL;
7114	ivi->vf = vf;
7115	memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
7116	ivi->tx_rate = adapter->vf_data[vf].tx_rate;
7117	ivi->vlan = adapter->vf_data[vf].pf_vlan;
7118	ivi->qos = adapter->vf_data[vf].pf_qos;
7119	return 0;
7120}
7121
7122static void igb_vmm_control(struct igb_adapter *adapter)
7123{
7124	struct e1000_hw *hw = &adapter->hw;
7125	u32 reg;
7126
7127	switch (hw->mac.type) {
7128	case e1000_82575:
7129	default:
7130		/* replication is not supported for 82575 */
7131		return;
7132	case e1000_82576:
7133		/* notify HW that the MAC is adding vlan tags */
7134		reg = rd32(E1000_DTXCTL);
7135		reg |= E1000_DTXCTL_VLAN_ADDED;
7136		wr32(E1000_DTXCTL, reg);
7137	case e1000_82580:
7138		/* enable replication vlan tag stripping */
7139		reg = rd32(E1000_RPLOLR);
7140		reg |= E1000_RPLOLR_STRVLAN;
7141		wr32(E1000_RPLOLR, reg);
7142	case e1000_i350:
7143		/* none of the above registers are supported by i350 */
7144		break;
7145	}
7146
7147	if (adapter->vfs_allocated_count) {
7148		igb_vmdq_set_loopback_pf(hw, true);
7149		igb_vmdq_set_replication_pf(hw, true);
7150		igb_vmdq_set_anti_spoofing_pf(hw, true,
7151						adapter->vfs_allocated_count);
7152	} else {
7153		igb_vmdq_set_loopback_pf(hw, false);
7154		igb_vmdq_set_replication_pf(hw, false);
7155	}
7156}
7157
7158static void igb_init_dmac(struct igb_adapter *adapter, u32 pba)
7159{
7160	struct e1000_hw *hw = &adapter->hw;
7161	u32 dmac_thr;
7162	u16 hwm;
7163
7164	if (hw->mac.type > e1000_82580) {
7165		if (adapter->flags & IGB_FLAG_DMAC) {
7166			u32 reg;
7167
7168			/* force threshold to 0. */
7169			wr32(E1000_DMCTXTH, 0);
7170
7171			/*
7172			 * DMA Coalescing high water mark needs to be greater
7173			 * than the Rx threshold. Set hwm to PBA - max frame
7174			 * size in 16B units, capping it at PBA - 6KB.
7175			 */
7176			hwm = 64 * pba - adapter->max_frame_size / 16;
7177			if (hwm < 64 * (pba - 6))
7178				hwm = 64 * (pba - 6);
7179			reg = rd32(E1000_FCRTC);
7180			reg &= ~E1000_FCRTC_RTH_COAL_MASK;
7181			reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT)
7182				& E1000_FCRTC_RTH_COAL_MASK);
7183			wr32(E1000_FCRTC, reg);
7184
7185			/*
7186			 * Set the DMA Coalescing Rx threshold to PBA - 2 * max
7187			 * frame size, capping it at PBA - 10KB.
7188			 */
7189			dmac_thr = pba - adapter->max_frame_size / 512;
7190			if (dmac_thr < pba - 10)
7191				dmac_thr = pba - 10;
7192			reg = rd32(E1000_DMACR);
7193			reg &= ~E1000_DMACR_DMACTHR_MASK;
7194			reg |= ((dmac_thr << E1000_DMACR_DMACTHR_SHIFT)
7195				& E1000_DMACR_DMACTHR_MASK);
7196
7197			/* transition to L0x or L1 if available..*/
7198			reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
7199
7200			/* watchdog timer= +-1000 usec in 32usec intervals */
7201			reg |= (1000 >> 5);
7202			wr32(E1000_DMACR, reg);
7203
7204			/*
7205			 * no lower threshold to disable
7206			 * coalescing(smart fifb)-UTRESH=0
7207			 */
7208			wr32(E1000_DMCRTRH, 0);
7209
7210			reg = (IGB_DMCTLX_DCFLUSH_DIS | 0x4);
7211
7212			wr32(E1000_DMCTLX, reg);
7213
7214			/*
7215			 * free space in tx packet buffer to wake from
7216			 * DMA coal
7217			 */
7218			wr32(E1000_DMCTXTH, (IGB_MIN_TXPBSIZE -
7219			     (IGB_TX_BUF_4096 + adapter->max_frame_size)) >> 6);
7220
7221			/*
7222			 * make low power state decision controlled
7223			 * by DMA coal
7224			 */
7225			reg = rd32(E1000_PCIEMISC);
7226			reg &= ~E1000_PCIEMISC_LX_DECISION;
7227			wr32(E1000_PCIEMISC, reg);
7228		} /* endif adapter->dmac is not disabled */
7229	} else if (hw->mac.type == e1000_82580) {
7230		u32 reg = rd32(E1000_PCIEMISC);
7231		wr32(E1000_PCIEMISC, reg & ~E1000_PCIEMISC_LX_DECISION);
7232		wr32(E1000_DMACR, 0);
7233	}
7234}
7235
7236/* igb_main.c */
7237