igb_main.c revision c74d588e2addd9a13cca49a4d9172e0e2948448f
1/*******************************************************************************
2
3  Intel(R) Gigabit Ethernet Linux driver
4  Copyright(c) 2007-2011 Intel Corporation.
5
6  This program is free software; you can redistribute it and/or modify it
7  under the terms and conditions of the GNU General Public License,
8  version 2, as published by the Free Software Foundation.
9
10  This program is distributed in the hope it will be useful, but WITHOUT
11  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13  more details.
14
15  You should have received a copy of the GNU General Public License along with
16  this program; if not, write to the Free Software Foundation, Inc.,
17  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18
19  The full GNU General Public License is included in this distribution in
20  the file called "COPYING".
21
22  Contact Information:
23  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
25
26*******************************************************************************/
27
28#include <linux/module.h>
29#include <linux/types.h>
30#include <linux/init.h>
31#include <linux/bitops.h>
32#include <linux/vmalloc.h>
33#include <linux/pagemap.h>
34#include <linux/netdevice.h>
35#include <linux/ipv6.h>
36#include <linux/slab.h>
37#include <net/checksum.h>
38#include <net/ip6_checksum.h>
39#include <linux/net_tstamp.h>
40#include <linux/mii.h>
41#include <linux/ethtool.h>
42#include <linux/if.h>
43#include <linux/if_vlan.h>
44#include <linux/pci.h>
45#include <linux/pci-aspm.h>
46#include <linux/delay.h>
47#include <linux/interrupt.h>
48#include <linux/ip.h>
49#include <linux/tcp.h>
50#include <linux/sctp.h>
51#include <linux/if_ether.h>
52#include <linux/aer.h>
53#include <linux/prefetch.h>
54#ifdef CONFIG_IGB_DCA
55#include <linux/dca.h>
56#endif
57#include "igb.h"
58
59#define MAJ 3
60#define MIN 0
61#define BUILD 6
62#define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \
63__stringify(BUILD) "-k"
64char igb_driver_name[] = "igb";
65char igb_driver_version[] = DRV_VERSION;
66static const char igb_driver_string[] =
67				"Intel(R) Gigabit Ethernet Network Driver";
68static const char igb_copyright[] = "Copyright (c) 2007-2011 Intel Corporation.";
69
70static const struct e1000_info *igb_info_tbl[] = {
71	[board_82575] = &e1000_82575_info,
72};
73
74static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
75	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER), board_82575 },
76	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER), board_82575 },
77	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES), board_82575 },
78	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII), board_82575 },
79	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
80	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
81	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_QUAD_FIBER), board_82575 },
82	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
83	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
84	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
85	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SGMII), board_82575 },
86	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SERDES), board_82575 },
87	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_BACKPLANE), board_82575 },
88	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SFP), board_82575 },
89	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
90	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
91	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
92	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
93	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
94	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
95	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2), board_82575 },
96	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
97	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
98	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
99	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
100	/* required last entry */
101	{0, }
102};
103
104MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
105
106void igb_reset(struct igb_adapter *);
107static int igb_setup_all_tx_resources(struct igb_adapter *);
108static int igb_setup_all_rx_resources(struct igb_adapter *);
109static void igb_free_all_tx_resources(struct igb_adapter *);
110static void igb_free_all_rx_resources(struct igb_adapter *);
111static void igb_setup_mrqc(struct igb_adapter *);
112static int igb_probe(struct pci_dev *, const struct pci_device_id *);
113static void __devexit igb_remove(struct pci_dev *pdev);
114static void igb_init_hw_timer(struct igb_adapter *adapter);
115static int igb_sw_init(struct igb_adapter *);
116static int igb_open(struct net_device *);
117static int igb_close(struct net_device *);
118static void igb_configure_tx(struct igb_adapter *);
119static void igb_configure_rx(struct igb_adapter *);
120static void igb_clean_all_tx_rings(struct igb_adapter *);
121static void igb_clean_all_rx_rings(struct igb_adapter *);
122static void igb_clean_tx_ring(struct igb_ring *);
123static void igb_clean_rx_ring(struct igb_ring *);
124static void igb_set_rx_mode(struct net_device *);
125static void igb_update_phy_info(unsigned long);
126static void igb_watchdog(unsigned long);
127static void igb_watchdog_task(struct work_struct *);
128static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, struct net_device *);
129static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *dev,
130						 struct rtnl_link_stats64 *stats);
131static int igb_change_mtu(struct net_device *, int);
132static int igb_set_mac(struct net_device *, void *);
133static void igb_set_uta(struct igb_adapter *adapter);
134static irqreturn_t igb_intr(int irq, void *);
135static irqreturn_t igb_intr_msi(int irq, void *);
136static irqreturn_t igb_msix_other(int irq, void *);
137static irqreturn_t igb_msix_ring(int irq, void *);
138#ifdef CONFIG_IGB_DCA
139static void igb_update_dca(struct igb_q_vector *);
140static void igb_setup_dca(struct igb_adapter *);
141#endif /* CONFIG_IGB_DCA */
142static int igb_poll(struct napi_struct *, int);
143static bool igb_clean_tx_irq(struct igb_q_vector *);
144static bool igb_clean_rx_irq(struct igb_q_vector *, int);
145static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
146static void igb_tx_timeout(struct net_device *);
147static void igb_reset_task(struct work_struct *);
148static void igb_vlan_mode(struct net_device *netdev, u32 features);
149static void igb_vlan_rx_add_vid(struct net_device *, u16);
150static void igb_vlan_rx_kill_vid(struct net_device *, u16);
151static void igb_restore_vlan(struct igb_adapter *);
152static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
153static void igb_ping_all_vfs(struct igb_adapter *);
154static void igb_msg_task(struct igb_adapter *);
155static void igb_vmm_control(struct igb_adapter *);
156static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
157static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
158static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
159static int igb_ndo_set_vf_vlan(struct net_device *netdev,
160			       int vf, u16 vlan, u8 qos);
161static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
162static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
163				 struct ifla_vf_info *ivi);
164static void igb_check_vf_rate_limit(struct igb_adapter *);
165
166#ifdef CONFIG_PM
167static int igb_suspend(struct pci_dev *, pm_message_t);
168static int igb_resume(struct pci_dev *);
169#endif
170static void igb_shutdown(struct pci_dev *);
171#ifdef CONFIG_IGB_DCA
172static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
173static struct notifier_block dca_notifier = {
174	.notifier_call	= igb_notify_dca,
175	.next		= NULL,
176	.priority	= 0
177};
178#endif
179#ifdef CONFIG_NET_POLL_CONTROLLER
180/* for netdump / net console */
181static void igb_netpoll(struct net_device *);
182#endif
183#ifdef CONFIG_PCI_IOV
184static unsigned int max_vfs = 0;
185module_param(max_vfs, uint, 0);
186MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
187                 "per physical function");
188#endif /* CONFIG_PCI_IOV */
189
190static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
191		     pci_channel_state_t);
192static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
193static void igb_io_resume(struct pci_dev *);
194
195static struct pci_error_handlers igb_err_handler = {
196	.error_detected = igb_io_error_detected,
197	.slot_reset = igb_io_slot_reset,
198	.resume = igb_io_resume,
199};
200
201
202static struct pci_driver igb_driver = {
203	.name     = igb_driver_name,
204	.id_table = igb_pci_tbl,
205	.probe    = igb_probe,
206	.remove   = __devexit_p(igb_remove),
207#ifdef CONFIG_PM
208	/* Power Management Hooks */
209	.suspend  = igb_suspend,
210	.resume   = igb_resume,
211#endif
212	.shutdown = igb_shutdown,
213	.err_handler = &igb_err_handler
214};
215
216MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
217MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
218MODULE_LICENSE("GPL");
219MODULE_VERSION(DRV_VERSION);
220
221struct igb_reg_info {
222	u32 ofs;
223	char *name;
224};
225
226static const struct igb_reg_info igb_reg_info_tbl[] = {
227
228	/* General Registers */
229	{E1000_CTRL, "CTRL"},
230	{E1000_STATUS, "STATUS"},
231	{E1000_CTRL_EXT, "CTRL_EXT"},
232
233	/* Interrupt Registers */
234	{E1000_ICR, "ICR"},
235
236	/* RX Registers */
237	{E1000_RCTL, "RCTL"},
238	{E1000_RDLEN(0), "RDLEN"},
239	{E1000_RDH(0), "RDH"},
240	{E1000_RDT(0), "RDT"},
241	{E1000_RXDCTL(0), "RXDCTL"},
242	{E1000_RDBAL(0), "RDBAL"},
243	{E1000_RDBAH(0), "RDBAH"},
244
245	/* TX Registers */
246	{E1000_TCTL, "TCTL"},
247	{E1000_TDBAL(0), "TDBAL"},
248	{E1000_TDBAH(0), "TDBAH"},
249	{E1000_TDLEN(0), "TDLEN"},
250	{E1000_TDH(0), "TDH"},
251	{E1000_TDT(0), "TDT"},
252	{E1000_TXDCTL(0), "TXDCTL"},
253	{E1000_TDFH, "TDFH"},
254	{E1000_TDFT, "TDFT"},
255	{E1000_TDFHS, "TDFHS"},
256	{E1000_TDFPC, "TDFPC"},
257
258	/* List Terminator */
259	{}
260};
261
262/*
263 * igb_regdump - register printout routine
264 */
265static void igb_regdump(struct e1000_hw *hw, struct igb_reg_info *reginfo)
266{
267	int n = 0;
268	char rname[16];
269	u32 regs[8];
270
271	switch (reginfo->ofs) {
272	case E1000_RDLEN(0):
273		for (n = 0; n < 4; n++)
274			regs[n] = rd32(E1000_RDLEN(n));
275		break;
276	case E1000_RDH(0):
277		for (n = 0; n < 4; n++)
278			regs[n] = rd32(E1000_RDH(n));
279		break;
280	case E1000_RDT(0):
281		for (n = 0; n < 4; n++)
282			regs[n] = rd32(E1000_RDT(n));
283		break;
284	case E1000_RXDCTL(0):
285		for (n = 0; n < 4; n++)
286			regs[n] = rd32(E1000_RXDCTL(n));
287		break;
288	case E1000_RDBAL(0):
289		for (n = 0; n < 4; n++)
290			regs[n] = rd32(E1000_RDBAL(n));
291		break;
292	case E1000_RDBAH(0):
293		for (n = 0; n < 4; n++)
294			regs[n] = rd32(E1000_RDBAH(n));
295		break;
296	case E1000_TDBAL(0):
297		for (n = 0; n < 4; n++)
298			regs[n] = rd32(E1000_RDBAL(n));
299		break;
300	case E1000_TDBAH(0):
301		for (n = 0; n < 4; n++)
302			regs[n] = rd32(E1000_TDBAH(n));
303		break;
304	case E1000_TDLEN(0):
305		for (n = 0; n < 4; n++)
306			regs[n] = rd32(E1000_TDLEN(n));
307		break;
308	case E1000_TDH(0):
309		for (n = 0; n < 4; n++)
310			regs[n] = rd32(E1000_TDH(n));
311		break;
312	case E1000_TDT(0):
313		for (n = 0; n < 4; n++)
314			regs[n] = rd32(E1000_TDT(n));
315		break;
316	case E1000_TXDCTL(0):
317		for (n = 0; n < 4; n++)
318			regs[n] = rd32(E1000_TXDCTL(n));
319		break;
320	default:
321		printk(KERN_INFO "%-15s %08x\n",
322			reginfo->name, rd32(reginfo->ofs));
323		return;
324	}
325
326	snprintf(rname, 16, "%s%s", reginfo->name, "[0-3]");
327	printk(KERN_INFO "%-15s ", rname);
328	for (n = 0; n < 4; n++)
329		printk(KERN_CONT "%08x ", regs[n]);
330	printk(KERN_CONT "\n");
331}
332
333/*
334 * igb_dump - Print registers, tx-rings and rx-rings
335 */
336static void igb_dump(struct igb_adapter *adapter)
337{
338	struct net_device *netdev = adapter->netdev;
339	struct e1000_hw *hw = &adapter->hw;
340	struct igb_reg_info *reginfo;
341	struct igb_ring *tx_ring;
342	union e1000_adv_tx_desc *tx_desc;
343	struct my_u0 { u64 a; u64 b; } *u0;
344	struct igb_ring *rx_ring;
345	union e1000_adv_rx_desc *rx_desc;
346	u32 staterr;
347	u16 i, n;
348
349	if (!netif_msg_hw(adapter))
350		return;
351
352	/* Print netdevice Info */
353	if (netdev) {
354		dev_info(&adapter->pdev->dev, "Net device Info\n");
355		printk(KERN_INFO "Device Name     state            "
356			"trans_start      last_rx\n");
357		printk(KERN_INFO "%-15s %016lX %016lX %016lX\n",
358		netdev->name,
359		netdev->state,
360		netdev->trans_start,
361		netdev->last_rx);
362	}
363
364	/* Print Registers */
365	dev_info(&adapter->pdev->dev, "Register Dump\n");
366	printk(KERN_INFO " Register Name   Value\n");
367	for (reginfo = (struct igb_reg_info *)igb_reg_info_tbl;
368	     reginfo->name; reginfo++) {
369		igb_regdump(hw, reginfo);
370	}
371
372	/* Print TX Ring Summary */
373	if (!netdev || !netif_running(netdev))
374		goto exit;
375
376	dev_info(&adapter->pdev->dev, "TX Rings Summary\n");
377	printk(KERN_INFO "Queue [NTU] [NTC] [bi(ntc)->dma  ]"
378		" leng ntw timestamp\n");
379	for (n = 0; n < adapter->num_tx_queues; n++) {
380		struct igb_tx_buffer *buffer_info;
381		tx_ring = adapter->tx_ring[n];
382		buffer_info = &tx_ring->tx_buffer_info[tx_ring->next_to_clean];
383		printk(KERN_INFO " %5d %5X %5X %016llX %04X %p %016llX\n",
384			   n, tx_ring->next_to_use, tx_ring->next_to_clean,
385			   (u64)buffer_info->dma,
386			   buffer_info->length,
387			   buffer_info->next_to_watch,
388			   (u64)buffer_info->time_stamp);
389	}
390
391	/* Print TX Rings */
392	if (!netif_msg_tx_done(adapter))
393		goto rx_ring_summary;
394
395	dev_info(&adapter->pdev->dev, "TX Rings Dump\n");
396
397	/* Transmit Descriptor Formats
398	 *
399	 * Advanced Transmit Descriptor
400	 *   +--------------------------------------------------------------+
401	 * 0 |         Buffer Address [63:0]                                |
402	 *   +--------------------------------------------------------------+
403	 * 8 | PAYLEN  | PORTS  |CC|IDX | STA | DCMD  |DTYP|MAC|RSV| DTALEN |
404	 *   +--------------------------------------------------------------+
405	 *   63      46 45    40 39 38 36 35 32 31   24             15       0
406	 */
407
408	for (n = 0; n < adapter->num_tx_queues; n++) {
409		tx_ring = adapter->tx_ring[n];
410		printk(KERN_INFO "------------------------------------\n");
411		printk(KERN_INFO "TX QUEUE INDEX = %d\n", tx_ring->queue_index);
412		printk(KERN_INFO "------------------------------------\n");
413		printk(KERN_INFO "T [desc]     [address 63:0  ] "
414			"[PlPOCIStDDM Ln] [bi->dma       ] "
415			"leng  ntw timestamp        bi->skb\n");
416
417		for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) {
418			struct igb_tx_buffer *buffer_info;
419			tx_desc = IGB_TX_DESC(tx_ring, i);
420			buffer_info = &tx_ring->tx_buffer_info[i];
421			u0 = (struct my_u0 *)tx_desc;
422			printk(KERN_INFO "T [0x%03X]    %016llX %016llX %016llX"
423				" %04X  %p %016llX %p", i,
424				le64_to_cpu(u0->a),
425				le64_to_cpu(u0->b),
426				(u64)buffer_info->dma,
427				buffer_info->length,
428				buffer_info->next_to_watch,
429				(u64)buffer_info->time_stamp,
430				buffer_info->skb);
431			if (i == tx_ring->next_to_use &&
432				i == tx_ring->next_to_clean)
433				printk(KERN_CONT " NTC/U\n");
434			else if (i == tx_ring->next_to_use)
435				printk(KERN_CONT " NTU\n");
436			else if (i == tx_ring->next_to_clean)
437				printk(KERN_CONT " NTC\n");
438			else
439				printk(KERN_CONT "\n");
440
441			if (netif_msg_pktdata(adapter) && buffer_info->dma != 0)
442				print_hex_dump(KERN_INFO, "",
443					DUMP_PREFIX_ADDRESS,
444					16, 1, phys_to_virt(buffer_info->dma),
445					buffer_info->length, true);
446		}
447	}
448
449	/* Print RX Rings Summary */
450rx_ring_summary:
451	dev_info(&adapter->pdev->dev, "RX Rings Summary\n");
452	printk(KERN_INFO "Queue [NTU] [NTC]\n");
453	for (n = 0; n < adapter->num_rx_queues; n++) {
454		rx_ring = adapter->rx_ring[n];
455		printk(KERN_INFO " %5d %5X %5X\n", n,
456			   rx_ring->next_to_use, rx_ring->next_to_clean);
457	}
458
459	/* Print RX Rings */
460	if (!netif_msg_rx_status(adapter))
461		goto exit;
462
463	dev_info(&adapter->pdev->dev, "RX Rings Dump\n");
464
465	/* Advanced Receive Descriptor (Read) Format
466	 *    63                                           1        0
467	 *    +-----------------------------------------------------+
468	 *  0 |       Packet Buffer Address [63:1]           |A0/NSE|
469	 *    +----------------------------------------------+------+
470	 *  8 |       Header Buffer Address [63:1]           |  DD  |
471	 *    +-----------------------------------------------------+
472	 *
473	 *
474	 * Advanced Receive Descriptor (Write-Back) Format
475	 *
476	 *   63       48 47    32 31  30      21 20 17 16   4 3     0
477	 *   +------------------------------------------------------+
478	 * 0 | Packet     IP     |SPH| HDR_LEN   | RSV|Packet|  RSS |
479	 *   | Checksum   Ident  |   |           |    | Type | Type |
480	 *   +------------------------------------------------------+
481	 * 8 | VLAN Tag | Length | Extended Error | Extended Status |
482	 *   +------------------------------------------------------+
483	 *   63       48 47    32 31            20 19               0
484	 */
485
486	for (n = 0; n < adapter->num_rx_queues; n++) {
487		rx_ring = adapter->rx_ring[n];
488		printk(KERN_INFO "------------------------------------\n");
489		printk(KERN_INFO "RX QUEUE INDEX = %d\n", rx_ring->queue_index);
490		printk(KERN_INFO "------------------------------------\n");
491		printk(KERN_INFO "R  [desc]      [ PktBuf     A0] "
492			"[  HeadBuf   DD] [bi->dma       ] [bi->skb] "
493			"<-- Adv Rx Read format\n");
494		printk(KERN_INFO "RWB[desc]      [PcsmIpSHl PtRs] "
495			"[vl er S cks ln] ---------------- [bi->skb] "
496			"<-- Adv Rx Write-Back format\n");
497
498		for (i = 0; i < rx_ring->count; i++) {
499			struct igb_rx_buffer *buffer_info;
500			buffer_info = &rx_ring->rx_buffer_info[i];
501			rx_desc = IGB_RX_DESC(rx_ring, i);
502			u0 = (struct my_u0 *)rx_desc;
503			staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
504			if (staterr & E1000_RXD_STAT_DD) {
505				/* Descriptor Done */
506				printk(KERN_INFO "RWB[0x%03X]     %016llX "
507					"%016llX ---------------- %p", i,
508					le64_to_cpu(u0->a),
509					le64_to_cpu(u0->b),
510					buffer_info->skb);
511			} else {
512				printk(KERN_INFO "R  [0x%03X]     %016llX "
513					"%016llX %016llX %p", i,
514					le64_to_cpu(u0->a),
515					le64_to_cpu(u0->b),
516					(u64)buffer_info->dma,
517					buffer_info->skb);
518
519				if (netif_msg_pktdata(adapter)) {
520					print_hex_dump(KERN_INFO, "",
521						DUMP_PREFIX_ADDRESS,
522						16, 1,
523						phys_to_virt(buffer_info->dma),
524						IGB_RX_HDR_LEN, true);
525					print_hex_dump(KERN_INFO, "",
526					  DUMP_PREFIX_ADDRESS,
527					  16, 1,
528					  phys_to_virt(
529					    buffer_info->page_dma +
530					    buffer_info->page_offset),
531					  PAGE_SIZE/2, true);
532				}
533			}
534
535			if (i == rx_ring->next_to_use)
536				printk(KERN_CONT " NTU\n");
537			else if (i == rx_ring->next_to_clean)
538				printk(KERN_CONT " NTC\n");
539			else
540				printk(KERN_CONT "\n");
541
542		}
543	}
544
545exit:
546	return;
547}
548
549
550/**
551 * igb_read_clock - read raw cycle counter (to be used by time counter)
552 */
553static cycle_t igb_read_clock(const struct cyclecounter *tc)
554{
555	struct igb_adapter *adapter =
556		container_of(tc, struct igb_adapter, cycles);
557	struct e1000_hw *hw = &adapter->hw;
558	u64 stamp = 0;
559	int shift = 0;
560
561	/*
562	 * The timestamp latches on lowest register read. For the 82580
563	 * the lowest register is SYSTIMR instead of SYSTIML.  However we never
564	 * adjusted TIMINCA so SYSTIMR will just read as all 0s so ignore it.
565	 */
566	if (hw->mac.type == e1000_82580) {
567		stamp = rd32(E1000_SYSTIMR) >> 8;
568		shift = IGB_82580_TSYNC_SHIFT;
569	}
570
571	stamp |= (u64)rd32(E1000_SYSTIML) << shift;
572	stamp |= (u64)rd32(E1000_SYSTIMH) << (shift + 32);
573	return stamp;
574}
575
576/**
577 * igb_get_hw_dev - return device
578 * used by hardware layer to print debugging information
579 **/
580struct net_device *igb_get_hw_dev(struct e1000_hw *hw)
581{
582	struct igb_adapter *adapter = hw->back;
583	return adapter->netdev;
584}
585
586/**
587 * igb_init_module - Driver Registration Routine
588 *
589 * igb_init_module is the first routine called when the driver is
590 * loaded. All it does is register with the PCI subsystem.
591 **/
592static int __init igb_init_module(void)
593{
594	int ret;
595	printk(KERN_INFO "%s - version %s\n",
596	       igb_driver_string, igb_driver_version);
597
598	printk(KERN_INFO "%s\n", igb_copyright);
599
600#ifdef CONFIG_IGB_DCA
601	dca_register_notify(&dca_notifier);
602#endif
603	ret = pci_register_driver(&igb_driver);
604	return ret;
605}
606
607module_init(igb_init_module);
608
609/**
610 * igb_exit_module - Driver Exit Cleanup Routine
611 *
612 * igb_exit_module is called just before the driver is removed
613 * from memory.
614 **/
615static void __exit igb_exit_module(void)
616{
617#ifdef CONFIG_IGB_DCA
618	dca_unregister_notify(&dca_notifier);
619#endif
620	pci_unregister_driver(&igb_driver);
621}
622
623module_exit(igb_exit_module);
624
625#define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
626/**
627 * igb_cache_ring_register - Descriptor ring to register mapping
628 * @adapter: board private structure to initialize
629 *
630 * Once we know the feature-set enabled for the device, we'll cache
631 * the register offset the descriptor ring is assigned to.
632 **/
633static void igb_cache_ring_register(struct igb_adapter *adapter)
634{
635	int i = 0, j = 0;
636	u32 rbase_offset = adapter->vfs_allocated_count;
637
638	switch (adapter->hw.mac.type) {
639	case e1000_82576:
640		/* The queues are allocated for virtualization such that VF 0
641		 * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
642		 * In order to avoid collision we start at the first free queue
643		 * and continue consuming queues in the same sequence
644		 */
645		if (adapter->vfs_allocated_count) {
646			for (; i < adapter->rss_queues; i++)
647				adapter->rx_ring[i]->reg_idx = rbase_offset +
648				                               Q_IDX_82576(i);
649		}
650	case e1000_82575:
651	case e1000_82580:
652	case e1000_i350:
653	default:
654		for (; i < adapter->num_rx_queues; i++)
655			adapter->rx_ring[i]->reg_idx = rbase_offset + i;
656		for (; j < adapter->num_tx_queues; j++)
657			adapter->tx_ring[j]->reg_idx = rbase_offset + j;
658		break;
659	}
660}
661
662static void igb_free_queues(struct igb_adapter *adapter)
663{
664	int i;
665
666	for (i = 0; i < adapter->num_tx_queues; i++) {
667		kfree(adapter->tx_ring[i]);
668		adapter->tx_ring[i] = NULL;
669	}
670	for (i = 0; i < adapter->num_rx_queues; i++) {
671		kfree(adapter->rx_ring[i]);
672		adapter->rx_ring[i] = NULL;
673	}
674	adapter->num_rx_queues = 0;
675	adapter->num_tx_queues = 0;
676}
677
678/**
679 * igb_alloc_queues - Allocate memory for all rings
680 * @adapter: board private structure to initialize
681 *
682 * We allocate one ring per queue at run-time since we don't know the
683 * number of queues at compile-time.
684 **/
685static int igb_alloc_queues(struct igb_adapter *adapter)
686{
687	struct igb_ring *ring;
688	int i;
689	int orig_node = adapter->node;
690
691	for (i = 0; i < adapter->num_tx_queues; i++) {
692		if (orig_node == -1) {
693			int cur_node = next_online_node(adapter->node);
694			if (cur_node == MAX_NUMNODES)
695				cur_node = first_online_node;
696			adapter->node = cur_node;
697		}
698		ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL,
699				    adapter->node);
700		if (!ring)
701			ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
702		if (!ring)
703			goto err;
704		ring->count = adapter->tx_ring_count;
705		ring->queue_index = i;
706		ring->dev = &adapter->pdev->dev;
707		ring->netdev = adapter->netdev;
708		ring->numa_node = adapter->node;
709		/* For 82575, context index must be unique per ring. */
710		if (adapter->hw.mac.type == e1000_82575)
711			set_bit(IGB_RING_FLAG_TX_CTX_IDX, &ring->flags);
712		adapter->tx_ring[i] = ring;
713	}
714	/* Restore the adapter's original node */
715	adapter->node = orig_node;
716
717	for (i = 0; i < adapter->num_rx_queues; i++) {
718		if (orig_node == -1) {
719			int cur_node = next_online_node(adapter->node);
720			if (cur_node == MAX_NUMNODES)
721				cur_node = first_online_node;
722			adapter->node = cur_node;
723		}
724		ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL,
725				    adapter->node);
726		if (!ring)
727			ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
728		if (!ring)
729			goto err;
730		ring->count = adapter->rx_ring_count;
731		ring->queue_index = i;
732		ring->dev = &adapter->pdev->dev;
733		ring->netdev = adapter->netdev;
734		ring->numa_node = adapter->node;
735		/* set flag indicating ring supports SCTP checksum offload */
736		if (adapter->hw.mac.type >= e1000_82576)
737			set_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags);
738		adapter->rx_ring[i] = ring;
739	}
740	/* Restore the adapter's original node */
741	adapter->node = orig_node;
742
743	igb_cache_ring_register(adapter);
744
745	return 0;
746
747err:
748	/* Restore the adapter's original node */
749	adapter->node = orig_node;
750	igb_free_queues(adapter);
751
752	return -ENOMEM;
753}
754
755/**
756 *  igb_write_ivar - configure ivar for given MSI-X vector
757 *  @hw: pointer to the HW structure
758 *  @msix_vector: vector number we are allocating to a given ring
759 *  @index: row index of IVAR register to write within IVAR table
760 *  @offset: column offset of in IVAR, should be multiple of 8
761 *
762 *  This function is intended to handle the writing of the IVAR register
763 *  for adapters 82576 and newer.  The IVAR table consists of 2 columns,
764 *  each containing an cause allocation for an Rx and Tx ring, and a
765 *  variable number of rows depending on the number of queues supported.
766 **/
767static void igb_write_ivar(struct e1000_hw *hw, int msix_vector,
768			   int index, int offset)
769{
770	u32 ivar = array_rd32(E1000_IVAR0, index);
771
772	/* clear any bits that are currently set */
773	ivar &= ~((u32)0xFF << offset);
774
775	/* write vector and valid bit */
776	ivar |= (msix_vector | E1000_IVAR_VALID) << offset;
777
778	array_wr32(E1000_IVAR0, index, ivar);
779}
780
781#define IGB_N0_QUEUE -1
782static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
783{
784	struct igb_adapter *adapter = q_vector->adapter;
785	struct e1000_hw *hw = &adapter->hw;
786	int rx_queue = IGB_N0_QUEUE;
787	int tx_queue = IGB_N0_QUEUE;
788	u32 msixbm = 0;
789
790	if (q_vector->rx.ring)
791		rx_queue = q_vector->rx.ring->reg_idx;
792	if (q_vector->tx.ring)
793		tx_queue = q_vector->tx.ring->reg_idx;
794
795	switch (hw->mac.type) {
796	case e1000_82575:
797		/* The 82575 assigns vectors using a bitmask, which matches the
798		   bitmask for the EICR/EIMS/EIMC registers.  To assign one
799		   or more queues to a vector, we write the appropriate bits
800		   into the MSIXBM register for that vector. */
801		if (rx_queue > IGB_N0_QUEUE)
802			msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
803		if (tx_queue > IGB_N0_QUEUE)
804			msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
805		if (!adapter->msix_entries && msix_vector == 0)
806			msixbm |= E1000_EIMS_OTHER;
807		array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
808		q_vector->eims_value = msixbm;
809		break;
810	case e1000_82576:
811		/*
812		 * 82576 uses a table that essentially consists of 2 columns
813		 * with 8 rows.  The ordering is column-major so we use the
814		 * lower 3 bits as the row index, and the 4th bit as the
815		 * column offset.
816		 */
817		if (rx_queue > IGB_N0_QUEUE)
818			igb_write_ivar(hw, msix_vector,
819				       rx_queue & 0x7,
820				       (rx_queue & 0x8) << 1);
821		if (tx_queue > IGB_N0_QUEUE)
822			igb_write_ivar(hw, msix_vector,
823				       tx_queue & 0x7,
824				       ((tx_queue & 0x8) << 1) + 8);
825		q_vector->eims_value = 1 << msix_vector;
826		break;
827	case e1000_82580:
828	case e1000_i350:
829		/*
830		 * On 82580 and newer adapters the scheme is similar to 82576
831		 * however instead of ordering column-major we have things
832		 * ordered row-major.  So we traverse the table by using
833		 * bit 0 as the column offset, and the remaining bits as the
834		 * row index.
835		 */
836		if (rx_queue > IGB_N0_QUEUE)
837			igb_write_ivar(hw, msix_vector,
838				       rx_queue >> 1,
839				       (rx_queue & 0x1) << 4);
840		if (tx_queue > IGB_N0_QUEUE)
841			igb_write_ivar(hw, msix_vector,
842				       tx_queue >> 1,
843				       ((tx_queue & 0x1) << 4) + 8);
844		q_vector->eims_value = 1 << msix_vector;
845		break;
846	default:
847		BUG();
848		break;
849	}
850
851	/* add q_vector eims value to global eims_enable_mask */
852	adapter->eims_enable_mask |= q_vector->eims_value;
853
854	/* configure q_vector to set itr on first interrupt */
855	q_vector->set_itr = 1;
856}
857
858/**
859 * igb_configure_msix - Configure MSI-X hardware
860 *
861 * igb_configure_msix sets up the hardware to properly
862 * generate MSI-X interrupts.
863 **/
864static void igb_configure_msix(struct igb_adapter *adapter)
865{
866	u32 tmp;
867	int i, vector = 0;
868	struct e1000_hw *hw = &adapter->hw;
869
870	adapter->eims_enable_mask = 0;
871
872	/* set vector for other causes, i.e. link changes */
873	switch (hw->mac.type) {
874	case e1000_82575:
875		tmp = rd32(E1000_CTRL_EXT);
876		/* enable MSI-X PBA support*/
877		tmp |= E1000_CTRL_EXT_PBA_CLR;
878
879		/* Auto-Mask interrupts upon ICR read. */
880		tmp |= E1000_CTRL_EXT_EIAME;
881		tmp |= E1000_CTRL_EXT_IRCA;
882
883		wr32(E1000_CTRL_EXT, tmp);
884
885		/* enable msix_other interrupt */
886		array_wr32(E1000_MSIXBM(0), vector++,
887		                      E1000_EIMS_OTHER);
888		adapter->eims_other = E1000_EIMS_OTHER;
889
890		break;
891
892	case e1000_82576:
893	case e1000_82580:
894	case e1000_i350:
895		/* Turn on MSI-X capability first, or our settings
896		 * won't stick.  And it will take days to debug. */
897		wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
898		                E1000_GPIE_PBA | E1000_GPIE_EIAME |
899		                E1000_GPIE_NSICR);
900
901		/* enable msix_other interrupt */
902		adapter->eims_other = 1 << vector;
903		tmp = (vector++ | E1000_IVAR_VALID) << 8;
904
905		wr32(E1000_IVAR_MISC, tmp);
906		break;
907	default:
908		/* do nothing, since nothing else supports MSI-X */
909		break;
910	} /* switch (hw->mac.type) */
911
912	adapter->eims_enable_mask |= adapter->eims_other;
913
914	for (i = 0; i < adapter->num_q_vectors; i++)
915		igb_assign_vector(adapter->q_vector[i], vector++);
916
917	wrfl();
918}
919
920/**
921 * igb_request_msix - Initialize MSI-X interrupts
922 *
923 * igb_request_msix allocates MSI-X vectors and requests interrupts from the
924 * kernel.
925 **/
926static int igb_request_msix(struct igb_adapter *adapter)
927{
928	struct net_device *netdev = adapter->netdev;
929	struct e1000_hw *hw = &adapter->hw;
930	int i, err = 0, vector = 0;
931
932	err = request_irq(adapter->msix_entries[vector].vector,
933	                  igb_msix_other, 0, netdev->name, adapter);
934	if (err)
935		goto out;
936	vector++;
937
938	for (i = 0; i < adapter->num_q_vectors; i++) {
939		struct igb_q_vector *q_vector = adapter->q_vector[i];
940
941		q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
942
943		if (q_vector->rx.ring && q_vector->tx.ring)
944			sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
945				q_vector->rx.ring->queue_index);
946		else if (q_vector->tx.ring)
947			sprintf(q_vector->name, "%s-tx-%u", netdev->name,
948				q_vector->tx.ring->queue_index);
949		else if (q_vector->rx.ring)
950			sprintf(q_vector->name, "%s-rx-%u", netdev->name,
951				q_vector->rx.ring->queue_index);
952		else
953			sprintf(q_vector->name, "%s-unused", netdev->name);
954
955		err = request_irq(adapter->msix_entries[vector].vector,
956		                  igb_msix_ring, 0, q_vector->name,
957		                  q_vector);
958		if (err)
959			goto out;
960		vector++;
961	}
962
963	igb_configure_msix(adapter);
964	return 0;
965out:
966	return err;
967}
968
969static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
970{
971	if (adapter->msix_entries) {
972		pci_disable_msix(adapter->pdev);
973		kfree(adapter->msix_entries);
974		adapter->msix_entries = NULL;
975	} else if (adapter->flags & IGB_FLAG_HAS_MSI) {
976		pci_disable_msi(adapter->pdev);
977	}
978}
979
980/**
981 * igb_free_q_vectors - Free memory allocated for interrupt vectors
982 * @adapter: board private structure to initialize
983 *
984 * This function frees the memory allocated to the q_vectors.  In addition if
985 * NAPI is enabled it will delete any references to the NAPI struct prior
986 * to freeing the q_vector.
987 **/
988static void igb_free_q_vectors(struct igb_adapter *adapter)
989{
990	int v_idx;
991
992	for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
993		struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
994		adapter->q_vector[v_idx] = NULL;
995		if (!q_vector)
996			continue;
997		netif_napi_del(&q_vector->napi);
998		kfree(q_vector);
999	}
1000	adapter->num_q_vectors = 0;
1001}
1002
1003/**
1004 * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
1005 *
1006 * This function resets the device so that it has 0 rx queues, tx queues, and
1007 * MSI-X interrupts allocated.
1008 */
1009static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
1010{
1011	igb_free_queues(adapter);
1012	igb_free_q_vectors(adapter);
1013	igb_reset_interrupt_capability(adapter);
1014}
1015
1016/**
1017 * igb_set_interrupt_capability - set MSI or MSI-X if supported
1018 *
1019 * Attempt to configure interrupts using the best available
1020 * capabilities of the hardware and kernel.
1021 **/
1022static int igb_set_interrupt_capability(struct igb_adapter *adapter)
1023{
1024	int err;
1025	int numvecs, i;
1026
1027	/* Number of supported queues. */
1028	adapter->num_rx_queues = adapter->rss_queues;
1029	if (adapter->vfs_allocated_count)
1030		adapter->num_tx_queues = 1;
1031	else
1032		adapter->num_tx_queues = adapter->rss_queues;
1033
1034	/* start with one vector for every rx queue */
1035	numvecs = adapter->num_rx_queues;
1036
1037	/* if tx handler is separate add 1 for every tx queue */
1038	if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
1039		numvecs += adapter->num_tx_queues;
1040
1041	/* store the number of vectors reserved for queues */
1042	adapter->num_q_vectors = numvecs;
1043
1044	/* add 1 vector for link status interrupts */
1045	numvecs++;
1046	adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
1047					GFP_KERNEL);
1048	if (!adapter->msix_entries)
1049		goto msi_only;
1050
1051	for (i = 0; i < numvecs; i++)
1052		adapter->msix_entries[i].entry = i;
1053
1054	err = pci_enable_msix(adapter->pdev,
1055			      adapter->msix_entries,
1056			      numvecs);
1057	if (err == 0)
1058		goto out;
1059
1060	igb_reset_interrupt_capability(adapter);
1061
1062	/* If we can't do MSI-X, try MSI */
1063msi_only:
1064#ifdef CONFIG_PCI_IOV
1065	/* disable SR-IOV for non MSI-X configurations */
1066	if (adapter->vf_data) {
1067		struct e1000_hw *hw = &adapter->hw;
1068		/* disable iov and allow time for transactions to clear */
1069		pci_disable_sriov(adapter->pdev);
1070		msleep(500);
1071
1072		kfree(adapter->vf_data);
1073		adapter->vf_data = NULL;
1074		wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1075		wrfl();
1076		msleep(100);
1077		dev_info(&adapter->pdev->dev, "IOV Disabled\n");
1078	}
1079#endif
1080	adapter->vfs_allocated_count = 0;
1081	adapter->rss_queues = 1;
1082	adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1083	adapter->num_rx_queues = 1;
1084	adapter->num_tx_queues = 1;
1085	adapter->num_q_vectors = 1;
1086	if (!pci_enable_msi(adapter->pdev))
1087		adapter->flags |= IGB_FLAG_HAS_MSI;
1088out:
1089	/* Notify the stack of the (possibly) reduced queue counts. */
1090	netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues);
1091	return netif_set_real_num_rx_queues(adapter->netdev,
1092					    adapter->num_rx_queues);
1093}
1094
1095/**
1096 * igb_alloc_q_vectors - Allocate memory for interrupt vectors
1097 * @adapter: board private structure to initialize
1098 *
1099 * We allocate one q_vector per queue interrupt.  If allocation fails we
1100 * return -ENOMEM.
1101 **/
1102static int igb_alloc_q_vectors(struct igb_adapter *adapter)
1103{
1104	struct igb_q_vector *q_vector;
1105	struct e1000_hw *hw = &adapter->hw;
1106	int v_idx;
1107	int orig_node = adapter->node;
1108
1109	for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1110		if ((adapter->num_q_vectors == (adapter->num_rx_queues +
1111						adapter->num_tx_queues)) &&
1112		    (adapter->num_rx_queues == v_idx))
1113			adapter->node = orig_node;
1114		if (orig_node == -1) {
1115			int cur_node = next_online_node(adapter->node);
1116			if (cur_node == MAX_NUMNODES)
1117				cur_node = first_online_node;
1118			adapter->node = cur_node;
1119		}
1120		q_vector = kzalloc_node(sizeof(struct igb_q_vector), GFP_KERNEL,
1121					adapter->node);
1122		if (!q_vector)
1123			q_vector = kzalloc(sizeof(struct igb_q_vector),
1124					   GFP_KERNEL);
1125		if (!q_vector)
1126			goto err_out;
1127		q_vector->adapter = adapter;
1128		q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
1129		q_vector->itr_val = IGB_START_ITR;
1130		netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
1131		adapter->q_vector[v_idx] = q_vector;
1132	}
1133	/* Restore the adapter's original node */
1134	adapter->node = orig_node;
1135
1136	return 0;
1137
1138err_out:
1139	/* Restore the adapter's original node */
1140	adapter->node = orig_node;
1141	igb_free_q_vectors(adapter);
1142	return -ENOMEM;
1143}
1144
1145static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
1146                                      int ring_idx, int v_idx)
1147{
1148	struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1149
1150	q_vector->rx.ring = adapter->rx_ring[ring_idx];
1151	q_vector->rx.ring->q_vector = q_vector;
1152	q_vector->rx.count++;
1153	q_vector->itr_val = adapter->rx_itr_setting;
1154	if (q_vector->itr_val && q_vector->itr_val <= 3)
1155		q_vector->itr_val = IGB_START_ITR;
1156}
1157
1158static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
1159                                      int ring_idx, int v_idx)
1160{
1161	struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1162
1163	q_vector->tx.ring = adapter->tx_ring[ring_idx];
1164	q_vector->tx.ring->q_vector = q_vector;
1165	q_vector->tx.count++;
1166	q_vector->itr_val = adapter->tx_itr_setting;
1167	q_vector->tx.work_limit = adapter->tx_work_limit;
1168	if (q_vector->itr_val && q_vector->itr_val <= 3)
1169		q_vector->itr_val = IGB_START_ITR;
1170}
1171
1172/**
1173 * igb_map_ring_to_vector - maps allocated queues to vectors
1174 *
1175 * This function maps the recently allocated queues to vectors.
1176 **/
1177static int igb_map_ring_to_vector(struct igb_adapter *adapter)
1178{
1179	int i;
1180	int v_idx = 0;
1181
1182	if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
1183	    (adapter->num_q_vectors < adapter->num_tx_queues))
1184		return -ENOMEM;
1185
1186	if (adapter->num_q_vectors >=
1187	    (adapter->num_rx_queues + adapter->num_tx_queues)) {
1188		for (i = 0; i < adapter->num_rx_queues; i++)
1189			igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1190		for (i = 0; i < adapter->num_tx_queues; i++)
1191			igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1192	} else {
1193		for (i = 0; i < adapter->num_rx_queues; i++) {
1194			if (i < adapter->num_tx_queues)
1195				igb_map_tx_ring_to_vector(adapter, i, v_idx);
1196			igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1197		}
1198		for (; i < adapter->num_tx_queues; i++)
1199			igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1200	}
1201	return 0;
1202}
1203
1204/**
1205 * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
1206 *
1207 * This function initializes the interrupts and allocates all of the queues.
1208 **/
1209static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
1210{
1211	struct pci_dev *pdev = adapter->pdev;
1212	int err;
1213
1214	err = igb_set_interrupt_capability(adapter);
1215	if (err)
1216		return err;
1217
1218	err = igb_alloc_q_vectors(adapter);
1219	if (err) {
1220		dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
1221		goto err_alloc_q_vectors;
1222	}
1223
1224	err = igb_alloc_queues(adapter);
1225	if (err) {
1226		dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1227		goto err_alloc_queues;
1228	}
1229
1230	err = igb_map_ring_to_vector(adapter);
1231	if (err) {
1232		dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
1233		goto err_map_queues;
1234	}
1235
1236
1237	return 0;
1238err_map_queues:
1239	igb_free_queues(adapter);
1240err_alloc_queues:
1241	igb_free_q_vectors(adapter);
1242err_alloc_q_vectors:
1243	igb_reset_interrupt_capability(adapter);
1244	return err;
1245}
1246
1247/**
1248 * igb_request_irq - initialize interrupts
1249 *
1250 * Attempts to configure interrupts using the best available
1251 * capabilities of the hardware and kernel.
1252 **/
1253static int igb_request_irq(struct igb_adapter *adapter)
1254{
1255	struct net_device *netdev = adapter->netdev;
1256	struct pci_dev *pdev = adapter->pdev;
1257	int err = 0;
1258
1259	if (adapter->msix_entries) {
1260		err = igb_request_msix(adapter);
1261		if (!err)
1262			goto request_done;
1263		/* fall back to MSI */
1264		igb_clear_interrupt_scheme(adapter);
1265		if (!pci_enable_msi(pdev))
1266			adapter->flags |= IGB_FLAG_HAS_MSI;
1267		igb_free_all_tx_resources(adapter);
1268		igb_free_all_rx_resources(adapter);
1269		adapter->num_tx_queues = 1;
1270		adapter->num_rx_queues = 1;
1271		adapter->num_q_vectors = 1;
1272		err = igb_alloc_q_vectors(adapter);
1273		if (err) {
1274			dev_err(&pdev->dev,
1275			        "Unable to allocate memory for vectors\n");
1276			goto request_done;
1277		}
1278		err = igb_alloc_queues(adapter);
1279		if (err) {
1280			dev_err(&pdev->dev,
1281			        "Unable to allocate memory for queues\n");
1282			igb_free_q_vectors(adapter);
1283			goto request_done;
1284		}
1285		igb_setup_all_tx_resources(adapter);
1286		igb_setup_all_rx_resources(adapter);
1287	}
1288
1289	igb_assign_vector(adapter->q_vector[0], 0);
1290
1291	if (adapter->flags & IGB_FLAG_HAS_MSI) {
1292		err = request_irq(pdev->irq, igb_intr_msi, 0,
1293				  netdev->name, adapter);
1294		if (!err)
1295			goto request_done;
1296
1297		/* fall back to legacy interrupts */
1298		igb_reset_interrupt_capability(adapter);
1299		adapter->flags &= ~IGB_FLAG_HAS_MSI;
1300	}
1301
1302	err = request_irq(pdev->irq, igb_intr, IRQF_SHARED,
1303			  netdev->name, adapter);
1304
1305	if (err)
1306		dev_err(&pdev->dev, "Error %d getting interrupt\n",
1307			err);
1308
1309request_done:
1310	return err;
1311}
1312
1313static void igb_free_irq(struct igb_adapter *adapter)
1314{
1315	if (adapter->msix_entries) {
1316		int vector = 0, i;
1317
1318		free_irq(adapter->msix_entries[vector++].vector, adapter);
1319
1320		for (i = 0; i < adapter->num_q_vectors; i++)
1321			free_irq(adapter->msix_entries[vector++].vector,
1322				 adapter->q_vector[i]);
1323	} else {
1324		free_irq(adapter->pdev->irq, adapter);
1325	}
1326}
1327
1328/**
1329 * igb_irq_disable - Mask off interrupt generation on the NIC
1330 * @adapter: board private structure
1331 **/
1332static void igb_irq_disable(struct igb_adapter *adapter)
1333{
1334	struct e1000_hw *hw = &adapter->hw;
1335
1336	/*
1337	 * we need to be careful when disabling interrupts.  The VFs are also
1338	 * mapped into these registers and so clearing the bits can cause
1339	 * issues on the VF drivers so we only need to clear what we set
1340	 */
1341	if (adapter->msix_entries) {
1342		u32 regval = rd32(E1000_EIAM);
1343		wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
1344		wr32(E1000_EIMC, adapter->eims_enable_mask);
1345		regval = rd32(E1000_EIAC);
1346		wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
1347	}
1348
1349	wr32(E1000_IAM, 0);
1350	wr32(E1000_IMC, ~0);
1351	wrfl();
1352	if (adapter->msix_entries) {
1353		int i;
1354		for (i = 0; i < adapter->num_q_vectors; i++)
1355			synchronize_irq(adapter->msix_entries[i].vector);
1356	} else {
1357		synchronize_irq(adapter->pdev->irq);
1358	}
1359}
1360
1361/**
1362 * igb_irq_enable - Enable default interrupt generation settings
1363 * @adapter: board private structure
1364 **/
1365static void igb_irq_enable(struct igb_adapter *adapter)
1366{
1367	struct e1000_hw *hw = &adapter->hw;
1368
1369	if (adapter->msix_entries) {
1370		u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC;
1371		u32 regval = rd32(E1000_EIAC);
1372		wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
1373		regval = rd32(E1000_EIAM);
1374		wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
1375		wr32(E1000_EIMS, adapter->eims_enable_mask);
1376		if (adapter->vfs_allocated_count) {
1377			wr32(E1000_MBVFIMR, 0xFF);
1378			ims |= E1000_IMS_VMMB;
1379		}
1380		if (adapter->hw.mac.type == e1000_82580)
1381			ims |= E1000_IMS_DRSTA;
1382
1383		wr32(E1000_IMS, ims);
1384	} else {
1385		wr32(E1000_IMS, IMS_ENABLE_MASK |
1386				E1000_IMS_DRSTA);
1387		wr32(E1000_IAM, IMS_ENABLE_MASK |
1388				E1000_IMS_DRSTA);
1389	}
1390}
1391
1392static void igb_update_mng_vlan(struct igb_adapter *adapter)
1393{
1394	struct e1000_hw *hw = &adapter->hw;
1395	u16 vid = adapter->hw.mng_cookie.vlan_id;
1396	u16 old_vid = adapter->mng_vlan_id;
1397
1398	if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1399		/* add VID to filter table */
1400		igb_vfta_set(hw, vid, true);
1401		adapter->mng_vlan_id = vid;
1402	} else {
1403		adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1404	}
1405
1406	if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1407	    (vid != old_vid) &&
1408	    !test_bit(old_vid, adapter->active_vlans)) {
1409		/* remove VID from filter table */
1410		igb_vfta_set(hw, old_vid, false);
1411	}
1412}
1413
1414/**
1415 * igb_release_hw_control - release control of the h/w to f/w
1416 * @adapter: address of board private structure
1417 *
1418 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1419 * For ASF and Pass Through versions of f/w this means that the
1420 * driver is no longer loaded.
1421 *
1422 **/
1423static void igb_release_hw_control(struct igb_adapter *adapter)
1424{
1425	struct e1000_hw *hw = &adapter->hw;
1426	u32 ctrl_ext;
1427
1428	/* Let firmware take over control of h/w */
1429	ctrl_ext = rd32(E1000_CTRL_EXT);
1430	wr32(E1000_CTRL_EXT,
1431			ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1432}
1433
1434/**
1435 * igb_get_hw_control - get control of the h/w from f/w
1436 * @adapter: address of board private structure
1437 *
1438 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1439 * For ASF and Pass Through versions of f/w this means that
1440 * the driver is loaded.
1441 *
1442 **/
1443static void igb_get_hw_control(struct igb_adapter *adapter)
1444{
1445	struct e1000_hw *hw = &adapter->hw;
1446	u32 ctrl_ext;
1447
1448	/* Let firmware know the driver has taken over */
1449	ctrl_ext = rd32(E1000_CTRL_EXT);
1450	wr32(E1000_CTRL_EXT,
1451			ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1452}
1453
1454/**
1455 * igb_configure - configure the hardware for RX and TX
1456 * @adapter: private board structure
1457 **/
1458static void igb_configure(struct igb_adapter *adapter)
1459{
1460	struct net_device *netdev = adapter->netdev;
1461	int i;
1462
1463	igb_get_hw_control(adapter);
1464	igb_set_rx_mode(netdev);
1465
1466	igb_restore_vlan(adapter);
1467
1468	igb_setup_tctl(adapter);
1469	igb_setup_mrqc(adapter);
1470	igb_setup_rctl(adapter);
1471
1472	igb_configure_tx(adapter);
1473	igb_configure_rx(adapter);
1474
1475	igb_rx_fifo_flush_82575(&adapter->hw);
1476
1477	/* call igb_desc_unused which always leaves
1478	 * at least 1 descriptor unused to make sure
1479	 * next_to_use != next_to_clean */
1480	for (i = 0; i < adapter->num_rx_queues; i++) {
1481		struct igb_ring *ring = adapter->rx_ring[i];
1482		igb_alloc_rx_buffers(ring, igb_desc_unused(ring));
1483	}
1484}
1485
1486/**
1487 * igb_power_up_link - Power up the phy/serdes link
1488 * @adapter: address of board private structure
1489 **/
1490void igb_power_up_link(struct igb_adapter *adapter)
1491{
1492	if (adapter->hw.phy.media_type == e1000_media_type_copper)
1493		igb_power_up_phy_copper(&adapter->hw);
1494	else
1495		igb_power_up_serdes_link_82575(&adapter->hw);
1496}
1497
1498/**
1499 * igb_power_down_link - Power down the phy/serdes link
1500 * @adapter: address of board private structure
1501 */
1502static void igb_power_down_link(struct igb_adapter *adapter)
1503{
1504	if (adapter->hw.phy.media_type == e1000_media_type_copper)
1505		igb_power_down_phy_copper_82575(&adapter->hw);
1506	else
1507		igb_shutdown_serdes_link_82575(&adapter->hw);
1508}
1509
1510/**
1511 * igb_up - Open the interface and prepare it to handle traffic
1512 * @adapter: board private structure
1513 **/
1514int igb_up(struct igb_adapter *adapter)
1515{
1516	struct e1000_hw *hw = &adapter->hw;
1517	int i;
1518
1519	/* hardware has been reset, we need to reload some things */
1520	igb_configure(adapter);
1521
1522	clear_bit(__IGB_DOWN, &adapter->state);
1523
1524	for (i = 0; i < adapter->num_q_vectors; i++)
1525		napi_enable(&(adapter->q_vector[i]->napi));
1526
1527	if (adapter->msix_entries)
1528		igb_configure_msix(adapter);
1529	else
1530		igb_assign_vector(adapter->q_vector[0], 0);
1531
1532	/* Clear any pending interrupts. */
1533	rd32(E1000_ICR);
1534	igb_irq_enable(adapter);
1535
1536	/* notify VFs that reset has been completed */
1537	if (adapter->vfs_allocated_count) {
1538		u32 reg_data = rd32(E1000_CTRL_EXT);
1539		reg_data |= E1000_CTRL_EXT_PFRSTD;
1540		wr32(E1000_CTRL_EXT, reg_data);
1541	}
1542
1543	netif_tx_start_all_queues(adapter->netdev);
1544
1545	/* start the watchdog. */
1546	hw->mac.get_link_status = 1;
1547	schedule_work(&adapter->watchdog_task);
1548
1549	return 0;
1550}
1551
1552void igb_down(struct igb_adapter *adapter)
1553{
1554	struct net_device *netdev = adapter->netdev;
1555	struct e1000_hw *hw = &adapter->hw;
1556	u32 tctl, rctl;
1557	int i;
1558
1559	/* signal that we're down so the interrupt handler does not
1560	 * reschedule our watchdog timer */
1561	set_bit(__IGB_DOWN, &adapter->state);
1562
1563	/* disable receives in the hardware */
1564	rctl = rd32(E1000_RCTL);
1565	wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1566	/* flush and sleep below */
1567
1568	netif_tx_stop_all_queues(netdev);
1569
1570	/* disable transmits in the hardware */
1571	tctl = rd32(E1000_TCTL);
1572	tctl &= ~E1000_TCTL_EN;
1573	wr32(E1000_TCTL, tctl);
1574	/* flush both disables and wait for them to finish */
1575	wrfl();
1576	msleep(10);
1577
1578	for (i = 0; i < adapter->num_q_vectors; i++)
1579		napi_disable(&(adapter->q_vector[i]->napi));
1580
1581	igb_irq_disable(adapter);
1582
1583	del_timer_sync(&adapter->watchdog_timer);
1584	del_timer_sync(&adapter->phy_info_timer);
1585
1586	netif_carrier_off(netdev);
1587
1588	/* record the stats before reset*/
1589	spin_lock(&adapter->stats64_lock);
1590	igb_update_stats(adapter, &adapter->stats64);
1591	spin_unlock(&adapter->stats64_lock);
1592
1593	adapter->link_speed = 0;
1594	adapter->link_duplex = 0;
1595
1596	if (!pci_channel_offline(adapter->pdev))
1597		igb_reset(adapter);
1598	igb_clean_all_tx_rings(adapter);
1599	igb_clean_all_rx_rings(adapter);
1600#ifdef CONFIG_IGB_DCA
1601
1602	/* since we reset the hardware DCA settings were cleared */
1603	igb_setup_dca(adapter);
1604#endif
1605}
1606
1607void igb_reinit_locked(struct igb_adapter *adapter)
1608{
1609	WARN_ON(in_interrupt());
1610	while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1611		msleep(1);
1612	igb_down(adapter);
1613	igb_up(adapter);
1614	clear_bit(__IGB_RESETTING, &adapter->state);
1615}
1616
1617void igb_reset(struct igb_adapter *adapter)
1618{
1619	struct pci_dev *pdev = adapter->pdev;
1620	struct e1000_hw *hw = &adapter->hw;
1621	struct e1000_mac_info *mac = &hw->mac;
1622	struct e1000_fc_info *fc = &hw->fc;
1623	u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1624	u16 hwm;
1625
1626	/* Repartition Pba for greater than 9k mtu
1627	 * To take effect CTRL.RST is required.
1628	 */
1629	switch (mac->type) {
1630	case e1000_i350:
1631	case e1000_82580:
1632		pba = rd32(E1000_RXPBS);
1633		pba = igb_rxpbs_adjust_82580(pba);
1634		break;
1635	case e1000_82576:
1636		pba = rd32(E1000_RXPBS);
1637		pba &= E1000_RXPBS_SIZE_MASK_82576;
1638		break;
1639	case e1000_82575:
1640	default:
1641		pba = E1000_PBA_34K;
1642		break;
1643	}
1644
1645	if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1646	    (mac->type < e1000_82576)) {
1647		/* adjust PBA for jumbo frames */
1648		wr32(E1000_PBA, pba);
1649
1650		/* To maintain wire speed transmits, the Tx FIFO should be
1651		 * large enough to accommodate two full transmit packets,
1652		 * rounded up to the next 1KB and expressed in KB.  Likewise,
1653		 * the Rx FIFO should be large enough to accommodate at least
1654		 * one full receive packet and is similarly rounded up and
1655		 * expressed in KB. */
1656		pba = rd32(E1000_PBA);
1657		/* upper 16 bits has Tx packet buffer allocation size in KB */
1658		tx_space = pba >> 16;
1659		/* lower 16 bits has Rx packet buffer allocation size in KB */
1660		pba &= 0xffff;
1661		/* the tx fifo also stores 16 bytes of information about the tx
1662		 * but don't include ethernet FCS because hardware appends it */
1663		min_tx_space = (adapter->max_frame_size +
1664				sizeof(union e1000_adv_tx_desc) -
1665				ETH_FCS_LEN) * 2;
1666		min_tx_space = ALIGN(min_tx_space, 1024);
1667		min_tx_space >>= 10;
1668		/* software strips receive CRC, so leave room for it */
1669		min_rx_space = adapter->max_frame_size;
1670		min_rx_space = ALIGN(min_rx_space, 1024);
1671		min_rx_space >>= 10;
1672
1673		/* If current Tx allocation is less than the min Tx FIFO size,
1674		 * and the min Tx FIFO size is less than the current Rx FIFO
1675		 * allocation, take space away from current Rx allocation */
1676		if (tx_space < min_tx_space &&
1677		    ((min_tx_space - tx_space) < pba)) {
1678			pba = pba - (min_tx_space - tx_space);
1679
1680			/* if short on rx space, rx wins and must trump tx
1681			 * adjustment */
1682			if (pba < min_rx_space)
1683				pba = min_rx_space;
1684		}
1685		wr32(E1000_PBA, pba);
1686	}
1687
1688	/* flow control settings */
1689	/* The high water mark must be low enough to fit one full frame
1690	 * (or the size used for early receive) above it in the Rx FIFO.
1691	 * Set it to the lower of:
1692	 * - 90% of the Rx FIFO size, or
1693	 * - the full Rx FIFO size minus one full frame */
1694	hwm = min(((pba << 10) * 9 / 10),
1695			((pba << 10) - 2 * adapter->max_frame_size));
1696
1697	fc->high_water = hwm & 0xFFF0;	/* 16-byte granularity */
1698	fc->low_water = fc->high_water - 16;
1699	fc->pause_time = 0xFFFF;
1700	fc->send_xon = 1;
1701	fc->current_mode = fc->requested_mode;
1702
1703	/* disable receive for all VFs and wait one second */
1704	if (adapter->vfs_allocated_count) {
1705		int i;
1706		for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1707			adapter->vf_data[i].flags &= IGB_VF_FLAG_PF_SET_MAC;
1708
1709		/* ping all the active vfs to let them know we are going down */
1710		igb_ping_all_vfs(adapter);
1711
1712		/* disable transmits and receives */
1713		wr32(E1000_VFRE, 0);
1714		wr32(E1000_VFTE, 0);
1715	}
1716
1717	/* Allow time for pending master requests to run */
1718	hw->mac.ops.reset_hw(hw);
1719	wr32(E1000_WUC, 0);
1720
1721	if (hw->mac.ops.init_hw(hw))
1722		dev_err(&pdev->dev, "Hardware Error\n");
1723	if (hw->mac.type > e1000_82580) {
1724		if (adapter->flags & IGB_FLAG_DMAC) {
1725			u32 reg;
1726
1727			/*
1728			 * DMA Coalescing high water mark needs to be higher
1729			 * than * the * Rx threshold.  The Rx threshold is
1730			 * currently * pba - 6, so we * should use a high water
1731			 * mark of pba * - 4. */
1732			hwm = (pba - 4) << 10;
1733
1734			reg = (((pba-6) << E1000_DMACR_DMACTHR_SHIFT)
1735			       & E1000_DMACR_DMACTHR_MASK);
1736
1737			/* transition to L0x or L1 if available..*/
1738			reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
1739
1740			/* watchdog timer= +-1000 usec in 32usec intervals */
1741			reg |= (1000 >> 5);
1742			wr32(E1000_DMACR, reg);
1743
1744			/* no lower threshold to disable coalescing(smart fifb)
1745			 * -UTRESH=0*/
1746			wr32(E1000_DMCRTRH, 0);
1747
1748			/* set hwm to PBA -  2 * max frame size */
1749			wr32(E1000_FCRTC, hwm);
1750
1751			/*
1752			 * This sets the time to wait before requesting tran-
1753			 * sition to * low power state to number of usecs needed
1754			 * to receive 1 512 * byte frame at gigabit line rate
1755			 */
1756			reg = rd32(E1000_DMCTLX);
1757			reg |= IGB_DMCTLX_DCFLUSH_DIS;
1758
1759			/* Delay 255 usec before entering Lx state. */
1760			reg |= 0xFF;
1761			wr32(E1000_DMCTLX, reg);
1762
1763			/* free space in Tx packet buffer to wake from DMAC */
1764			wr32(E1000_DMCTXTH,
1765			     (IGB_MIN_TXPBSIZE -
1766			     (IGB_TX_BUF_4096 + adapter->max_frame_size))
1767			     >> 6);
1768
1769			/* make low power state decision controlled by DMAC */
1770			reg = rd32(E1000_PCIEMISC);
1771			reg |= E1000_PCIEMISC_LX_DECISION;
1772			wr32(E1000_PCIEMISC, reg);
1773		} /* end if IGB_FLAG_DMAC set */
1774	}
1775	if (hw->mac.type == e1000_82580) {
1776		u32 reg = rd32(E1000_PCIEMISC);
1777		wr32(E1000_PCIEMISC,
1778		                reg & ~E1000_PCIEMISC_LX_DECISION);
1779	}
1780	if (!netif_running(adapter->netdev))
1781		igb_power_down_link(adapter);
1782
1783	igb_update_mng_vlan(adapter);
1784
1785	/* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1786	wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1787
1788	igb_get_phy_info(hw);
1789}
1790
1791static u32 igb_fix_features(struct net_device *netdev, u32 features)
1792{
1793	/*
1794	 * Since there is no support for separate rx/tx vlan accel
1795	 * enable/disable make sure tx flag is always in same state as rx.
1796	 */
1797	if (features & NETIF_F_HW_VLAN_RX)
1798		features |= NETIF_F_HW_VLAN_TX;
1799	else
1800		features &= ~NETIF_F_HW_VLAN_TX;
1801
1802	return features;
1803}
1804
1805static int igb_set_features(struct net_device *netdev, u32 features)
1806{
1807	u32 changed = netdev->features ^ features;
1808
1809	if (changed & NETIF_F_HW_VLAN_RX)
1810		igb_vlan_mode(netdev, features);
1811
1812	return 0;
1813}
1814
1815static const struct net_device_ops igb_netdev_ops = {
1816	.ndo_open		= igb_open,
1817	.ndo_stop		= igb_close,
1818	.ndo_start_xmit		= igb_xmit_frame,
1819	.ndo_get_stats64	= igb_get_stats64,
1820	.ndo_set_rx_mode	= igb_set_rx_mode,
1821	.ndo_set_mac_address	= igb_set_mac,
1822	.ndo_change_mtu		= igb_change_mtu,
1823	.ndo_do_ioctl		= igb_ioctl,
1824	.ndo_tx_timeout		= igb_tx_timeout,
1825	.ndo_validate_addr	= eth_validate_addr,
1826	.ndo_vlan_rx_add_vid	= igb_vlan_rx_add_vid,
1827	.ndo_vlan_rx_kill_vid	= igb_vlan_rx_kill_vid,
1828	.ndo_set_vf_mac		= igb_ndo_set_vf_mac,
1829	.ndo_set_vf_vlan	= igb_ndo_set_vf_vlan,
1830	.ndo_set_vf_tx_rate	= igb_ndo_set_vf_bw,
1831	.ndo_get_vf_config	= igb_ndo_get_vf_config,
1832#ifdef CONFIG_NET_POLL_CONTROLLER
1833	.ndo_poll_controller	= igb_netpoll,
1834#endif
1835	.ndo_fix_features	= igb_fix_features,
1836	.ndo_set_features	= igb_set_features,
1837};
1838
1839/**
1840 * igb_probe - Device Initialization Routine
1841 * @pdev: PCI device information struct
1842 * @ent: entry in igb_pci_tbl
1843 *
1844 * Returns 0 on success, negative on failure
1845 *
1846 * igb_probe initializes an adapter identified by a pci_dev structure.
1847 * The OS initialization, configuring of the adapter private structure,
1848 * and a hardware reset occur.
1849 **/
1850static int __devinit igb_probe(struct pci_dev *pdev,
1851			       const struct pci_device_id *ent)
1852{
1853	struct net_device *netdev;
1854	struct igb_adapter *adapter;
1855	struct e1000_hw *hw;
1856	u16 eeprom_data = 0;
1857	s32 ret_val;
1858	static int global_quad_port_a; /* global quad port a indication */
1859	const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1860	unsigned long mmio_start, mmio_len;
1861	int err, pci_using_dac;
1862	u16 eeprom_apme_mask = IGB_EEPROM_APME;
1863	u8 part_str[E1000_PBANUM_LENGTH];
1864
1865	/* Catch broken hardware that put the wrong VF device ID in
1866	 * the PCIe SR-IOV capability.
1867	 */
1868	if (pdev->is_virtfn) {
1869		WARN(1, KERN_ERR "%s (%hx:%hx) should not be a VF!\n",
1870		     pci_name(pdev), pdev->vendor, pdev->device);
1871		return -EINVAL;
1872	}
1873
1874	err = pci_enable_device_mem(pdev);
1875	if (err)
1876		return err;
1877
1878	pci_using_dac = 0;
1879	err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
1880	if (!err) {
1881		err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
1882		if (!err)
1883			pci_using_dac = 1;
1884	} else {
1885		err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
1886		if (err) {
1887			err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
1888			if (err) {
1889				dev_err(&pdev->dev, "No usable DMA "
1890					"configuration, aborting\n");
1891				goto err_dma;
1892			}
1893		}
1894	}
1895
1896	err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1897	                                   IORESOURCE_MEM),
1898	                                   igb_driver_name);
1899	if (err)
1900		goto err_pci_reg;
1901
1902	pci_enable_pcie_error_reporting(pdev);
1903
1904	pci_set_master(pdev);
1905	pci_save_state(pdev);
1906
1907	err = -ENOMEM;
1908	netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1909				   IGB_MAX_TX_QUEUES);
1910	if (!netdev)
1911		goto err_alloc_etherdev;
1912
1913	SET_NETDEV_DEV(netdev, &pdev->dev);
1914
1915	pci_set_drvdata(pdev, netdev);
1916	adapter = netdev_priv(netdev);
1917	adapter->netdev = netdev;
1918	adapter->pdev = pdev;
1919	hw = &adapter->hw;
1920	hw->back = adapter;
1921	adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE;
1922
1923	mmio_start = pci_resource_start(pdev, 0);
1924	mmio_len = pci_resource_len(pdev, 0);
1925
1926	err = -EIO;
1927	hw->hw_addr = ioremap(mmio_start, mmio_len);
1928	if (!hw->hw_addr)
1929		goto err_ioremap;
1930
1931	netdev->netdev_ops = &igb_netdev_ops;
1932	igb_set_ethtool_ops(netdev);
1933	netdev->watchdog_timeo = 5 * HZ;
1934
1935	strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1936
1937	netdev->mem_start = mmio_start;
1938	netdev->mem_end = mmio_start + mmio_len;
1939
1940	/* PCI config space info */
1941	hw->vendor_id = pdev->vendor;
1942	hw->device_id = pdev->device;
1943	hw->revision_id = pdev->revision;
1944	hw->subsystem_vendor_id = pdev->subsystem_vendor;
1945	hw->subsystem_device_id = pdev->subsystem_device;
1946
1947	/* Copy the default MAC, PHY and NVM function pointers */
1948	memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1949	memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1950	memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1951	/* Initialize skew-specific constants */
1952	err = ei->get_invariants(hw);
1953	if (err)
1954		goto err_sw_init;
1955
1956	/* setup the private structure */
1957	err = igb_sw_init(adapter);
1958	if (err)
1959		goto err_sw_init;
1960
1961	igb_get_bus_info_pcie(hw);
1962
1963	hw->phy.autoneg_wait_to_complete = false;
1964
1965	/* Copper options */
1966	if (hw->phy.media_type == e1000_media_type_copper) {
1967		hw->phy.mdix = AUTO_ALL_MODES;
1968		hw->phy.disable_polarity_correction = false;
1969		hw->phy.ms_type = e1000_ms_hw_default;
1970	}
1971
1972	if (igb_check_reset_block(hw))
1973		dev_info(&pdev->dev,
1974			"PHY reset is blocked due to SOL/IDER session.\n");
1975
1976	/*
1977	 * features is initialized to 0 in allocation, it might have bits
1978	 * set by igb_sw_init so we should use an or instead of an
1979	 * assignment.
1980	 */
1981	netdev->features |= NETIF_F_SG |
1982			    NETIF_F_IP_CSUM |
1983			    NETIF_F_IPV6_CSUM |
1984			    NETIF_F_TSO |
1985			    NETIF_F_TSO6 |
1986			    NETIF_F_RXHASH |
1987			    NETIF_F_RXCSUM |
1988			    NETIF_F_HW_VLAN_RX |
1989			    NETIF_F_HW_VLAN_TX;
1990
1991	/* copy netdev features into list of user selectable features */
1992	netdev->hw_features |= netdev->features;
1993
1994	/* set this bit last since it cannot be part of hw_features */
1995	netdev->features |= NETIF_F_HW_VLAN_FILTER;
1996
1997	netdev->vlan_features |= NETIF_F_TSO |
1998				 NETIF_F_TSO6 |
1999				 NETIF_F_IP_CSUM |
2000				 NETIF_F_IPV6_CSUM |
2001				 NETIF_F_SG;
2002
2003	if (pci_using_dac) {
2004		netdev->features |= NETIF_F_HIGHDMA;
2005		netdev->vlan_features |= NETIF_F_HIGHDMA;
2006	}
2007
2008	if (hw->mac.type >= e1000_82576) {
2009		netdev->hw_features |= NETIF_F_SCTP_CSUM;
2010		netdev->features |= NETIF_F_SCTP_CSUM;
2011	}
2012
2013	netdev->priv_flags |= IFF_UNICAST_FLT;
2014
2015	adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
2016
2017	/* before reading the NVM, reset the controller to put the device in a
2018	 * known good starting state */
2019	hw->mac.ops.reset_hw(hw);
2020
2021	/* make sure the NVM is good */
2022	if (hw->nvm.ops.validate(hw) < 0) {
2023		dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
2024		err = -EIO;
2025		goto err_eeprom;
2026	}
2027
2028	/* copy the MAC address out of the NVM */
2029	if (hw->mac.ops.read_mac_addr(hw))
2030		dev_err(&pdev->dev, "NVM Read Error\n");
2031
2032	memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
2033	memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
2034
2035	if (!is_valid_ether_addr(netdev->perm_addr)) {
2036		dev_err(&pdev->dev, "Invalid MAC Address\n");
2037		err = -EIO;
2038		goto err_eeprom;
2039	}
2040
2041	setup_timer(&adapter->watchdog_timer, igb_watchdog,
2042	            (unsigned long) adapter);
2043	setup_timer(&adapter->phy_info_timer, igb_update_phy_info,
2044	            (unsigned long) adapter);
2045
2046	INIT_WORK(&adapter->reset_task, igb_reset_task);
2047	INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
2048
2049	/* Initialize link properties that are user-changeable */
2050	adapter->fc_autoneg = true;
2051	hw->mac.autoneg = true;
2052	hw->phy.autoneg_advertised = 0x2f;
2053
2054	hw->fc.requested_mode = e1000_fc_default;
2055	hw->fc.current_mode = e1000_fc_default;
2056
2057	igb_validate_mdi_setting(hw);
2058
2059	/* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
2060	 * enable the ACPI Magic Packet filter
2061	 */
2062
2063	if (hw->bus.func == 0)
2064		hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
2065	else if (hw->mac.type >= e1000_82580)
2066		hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
2067		                 NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
2068		                 &eeprom_data);
2069	else if (hw->bus.func == 1)
2070		hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
2071
2072	if (eeprom_data & eeprom_apme_mask)
2073		adapter->eeprom_wol |= E1000_WUFC_MAG;
2074
2075	/* now that we have the eeprom settings, apply the special cases where
2076	 * the eeprom may be wrong or the board simply won't support wake on
2077	 * lan on a particular port */
2078	switch (pdev->device) {
2079	case E1000_DEV_ID_82575GB_QUAD_COPPER:
2080		adapter->eeprom_wol = 0;
2081		break;
2082	case E1000_DEV_ID_82575EB_FIBER_SERDES:
2083	case E1000_DEV_ID_82576_FIBER:
2084	case E1000_DEV_ID_82576_SERDES:
2085		/* Wake events only supported on port A for dual fiber
2086		 * regardless of eeprom setting */
2087		if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
2088			adapter->eeprom_wol = 0;
2089		break;
2090	case E1000_DEV_ID_82576_QUAD_COPPER:
2091	case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
2092		/* if quad port adapter, disable WoL on all but port A */
2093		if (global_quad_port_a != 0)
2094			adapter->eeprom_wol = 0;
2095		else
2096			adapter->flags |= IGB_FLAG_QUAD_PORT_A;
2097		/* Reset for multiple quad port adapters */
2098		if (++global_quad_port_a == 4)
2099			global_quad_port_a = 0;
2100		break;
2101	}
2102
2103	/* initialize the wol settings based on the eeprom settings */
2104	adapter->wol = adapter->eeprom_wol;
2105	device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
2106
2107	/* reset the hardware with the new settings */
2108	igb_reset(adapter);
2109
2110	/* let the f/w know that the h/w is now under the control of the
2111	 * driver. */
2112	igb_get_hw_control(adapter);
2113
2114	strcpy(netdev->name, "eth%d");
2115	err = register_netdev(netdev);
2116	if (err)
2117		goto err_register;
2118
2119	/* carrier off reporting is important to ethtool even BEFORE open */
2120	netif_carrier_off(netdev);
2121
2122#ifdef CONFIG_IGB_DCA
2123	if (dca_add_requester(&pdev->dev) == 0) {
2124		adapter->flags |= IGB_FLAG_DCA_ENABLED;
2125		dev_info(&pdev->dev, "DCA enabled\n");
2126		igb_setup_dca(adapter);
2127	}
2128
2129#endif
2130	/* do hw tstamp init after resetting */
2131	igb_init_hw_timer(adapter);
2132
2133	dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
2134	/* print bus type/speed/width info */
2135	dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
2136		 netdev->name,
2137		 ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
2138		  (hw->bus.speed == e1000_bus_speed_5000) ? "5.0Gb/s" :
2139		                                            "unknown"),
2140		 ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
2141		  (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
2142		  (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
2143		   "unknown"),
2144		 netdev->dev_addr);
2145
2146	ret_val = igb_read_part_string(hw, part_str, E1000_PBANUM_LENGTH);
2147	if (ret_val)
2148		strcpy(part_str, "Unknown");
2149	dev_info(&pdev->dev, "%s: PBA No: %s\n", netdev->name, part_str);
2150	dev_info(&pdev->dev,
2151		"Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
2152		adapter->msix_entries ? "MSI-X" :
2153		(adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
2154		adapter->num_rx_queues, adapter->num_tx_queues);
2155	switch (hw->mac.type) {
2156	case e1000_i350:
2157		igb_set_eee_i350(hw);
2158		break;
2159	default:
2160		break;
2161	}
2162	return 0;
2163
2164err_register:
2165	igb_release_hw_control(adapter);
2166err_eeprom:
2167	if (!igb_check_reset_block(hw))
2168		igb_reset_phy(hw);
2169
2170	if (hw->flash_address)
2171		iounmap(hw->flash_address);
2172err_sw_init:
2173	igb_clear_interrupt_scheme(adapter);
2174	iounmap(hw->hw_addr);
2175err_ioremap:
2176	free_netdev(netdev);
2177err_alloc_etherdev:
2178	pci_release_selected_regions(pdev,
2179	                             pci_select_bars(pdev, IORESOURCE_MEM));
2180err_pci_reg:
2181err_dma:
2182	pci_disable_device(pdev);
2183	return err;
2184}
2185
2186/**
2187 * igb_remove - Device Removal Routine
2188 * @pdev: PCI device information struct
2189 *
2190 * igb_remove is called by the PCI subsystem to alert the driver
2191 * that it should release a PCI device.  The could be caused by a
2192 * Hot-Plug event, or because the driver is going to be removed from
2193 * memory.
2194 **/
2195static void __devexit igb_remove(struct pci_dev *pdev)
2196{
2197	struct net_device *netdev = pci_get_drvdata(pdev);
2198	struct igb_adapter *adapter = netdev_priv(netdev);
2199	struct e1000_hw *hw = &adapter->hw;
2200
2201	/*
2202	 * The watchdog timer may be rescheduled, so explicitly
2203	 * disable watchdog from being rescheduled.
2204	 */
2205	set_bit(__IGB_DOWN, &adapter->state);
2206	del_timer_sync(&adapter->watchdog_timer);
2207	del_timer_sync(&adapter->phy_info_timer);
2208
2209	cancel_work_sync(&adapter->reset_task);
2210	cancel_work_sync(&adapter->watchdog_task);
2211
2212#ifdef CONFIG_IGB_DCA
2213	if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
2214		dev_info(&pdev->dev, "DCA disabled\n");
2215		dca_remove_requester(&pdev->dev);
2216		adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
2217		wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
2218	}
2219#endif
2220
2221	/* Release control of h/w to f/w.  If f/w is AMT enabled, this
2222	 * would have already happened in close and is redundant. */
2223	igb_release_hw_control(adapter);
2224
2225	unregister_netdev(netdev);
2226
2227	igb_clear_interrupt_scheme(adapter);
2228
2229#ifdef CONFIG_PCI_IOV
2230	/* reclaim resources allocated to VFs */
2231	if (adapter->vf_data) {
2232		/* disable iov and allow time for transactions to clear */
2233		pci_disable_sriov(pdev);
2234		msleep(500);
2235
2236		kfree(adapter->vf_data);
2237		adapter->vf_data = NULL;
2238		wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
2239		wrfl();
2240		msleep(100);
2241		dev_info(&pdev->dev, "IOV Disabled\n");
2242	}
2243#endif
2244
2245	iounmap(hw->hw_addr);
2246	if (hw->flash_address)
2247		iounmap(hw->flash_address);
2248	pci_release_selected_regions(pdev,
2249	                             pci_select_bars(pdev, IORESOURCE_MEM));
2250
2251	free_netdev(netdev);
2252
2253	pci_disable_pcie_error_reporting(pdev);
2254
2255	pci_disable_device(pdev);
2256}
2257
2258/**
2259 * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
2260 * @adapter: board private structure to initialize
2261 *
2262 * This function initializes the vf specific data storage and then attempts to
2263 * allocate the VFs.  The reason for ordering it this way is because it is much
2264 * mor expensive time wise to disable SR-IOV than it is to allocate and free
2265 * the memory for the VFs.
2266 **/
2267static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
2268{
2269#ifdef CONFIG_PCI_IOV
2270	struct pci_dev *pdev = adapter->pdev;
2271
2272	if (adapter->vfs_allocated_count) {
2273		adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
2274		                           sizeof(struct vf_data_storage),
2275		                           GFP_KERNEL);
2276		/* if allocation failed then we do not support SR-IOV */
2277		if (!adapter->vf_data) {
2278			adapter->vfs_allocated_count = 0;
2279			dev_err(&pdev->dev, "Unable to allocate memory for VF "
2280			        "Data Storage\n");
2281		}
2282	}
2283
2284	if (pci_enable_sriov(pdev, adapter->vfs_allocated_count)) {
2285		kfree(adapter->vf_data);
2286		adapter->vf_data = NULL;
2287#endif /* CONFIG_PCI_IOV */
2288		adapter->vfs_allocated_count = 0;
2289#ifdef CONFIG_PCI_IOV
2290	} else {
2291		unsigned char mac_addr[ETH_ALEN];
2292		int i;
2293		dev_info(&pdev->dev, "%d vfs allocated\n",
2294		         adapter->vfs_allocated_count);
2295		for (i = 0; i < adapter->vfs_allocated_count; i++) {
2296			random_ether_addr(mac_addr);
2297			igb_set_vf_mac(adapter, i, mac_addr);
2298		}
2299		/* DMA Coalescing is not supported in IOV mode. */
2300		if (adapter->flags & IGB_FLAG_DMAC)
2301			adapter->flags &= ~IGB_FLAG_DMAC;
2302	}
2303#endif /* CONFIG_PCI_IOV */
2304}
2305
2306
2307/**
2308 * igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp
2309 * @adapter: board private structure to initialize
2310 *
2311 * igb_init_hw_timer initializes the function pointer and values for the hw
2312 * timer found in hardware.
2313 **/
2314static void igb_init_hw_timer(struct igb_adapter *adapter)
2315{
2316	struct e1000_hw *hw = &adapter->hw;
2317
2318	switch (hw->mac.type) {
2319	case e1000_i350:
2320	case e1000_82580:
2321		memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2322		adapter->cycles.read = igb_read_clock;
2323		adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2324		adapter->cycles.mult = 1;
2325		/*
2326		 * The 82580 timesync updates the system timer every 8ns by 8ns
2327		 * and the value cannot be shifted.  Instead we need to shift
2328		 * the registers to generate a 64bit timer value.  As a result
2329		 * SYSTIMR/L/H, TXSTMPL/H, RXSTMPL/H all have to be shifted by
2330		 * 24 in order to generate a larger value for synchronization.
2331		 */
2332		adapter->cycles.shift = IGB_82580_TSYNC_SHIFT;
2333		/* disable system timer temporarily by setting bit 31 */
2334		wr32(E1000_TSAUXC, 0x80000000);
2335		wrfl();
2336
2337		/* Set registers so that rollover occurs soon to test this. */
2338		wr32(E1000_SYSTIMR, 0x00000000);
2339		wr32(E1000_SYSTIML, 0x80000000);
2340		wr32(E1000_SYSTIMH, 0x000000FF);
2341		wrfl();
2342
2343		/* enable system timer by clearing bit 31 */
2344		wr32(E1000_TSAUXC, 0x0);
2345		wrfl();
2346
2347		timecounter_init(&adapter->clock,
2348				 &adapter->cycles,
2349				 ktime_to_ns(ktime_get_real()));
2350		/*
2351		 * Synchronize our NIC clock against system wall clock. NIC
2352		 * time stamp reading requires ~3us per sample, each sample
2353		 * was pretty stable even under load => only require 10
2354		 * samples for each offset comparison.
2355		 */
2356		memset(&adapter->compare, 0, sizeof(adapter->compare));
2357		adapter->compare.source = &adapter->clock;
2358		adapter->compare.target = ktime_get_real;
2359		adapter->compare.num_samples = 10;
2360		timecompare_update(&adapter->compare, 0);
2361		break;
2362	case e1000_82576:
2363		/*
2364		 * Initialize hardware timer: we keep it running just in case
2365		 * that some program needs it later on.
2366		 */
2367		memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2368		adapter->cycles.read = igb_read_clock;
2369		adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2370		adapter->cycles.mult = 1;
2371		/**
2372		 * Scale the NIC clock cycle by a large factor so that
2373		 * relatively small clock corrections can be added or
2374		 * subtracted at each clock tick. The drawbacks of a large
2375		 * factor are a) that the clock register overflows more quickly
2376		 * (not such a big deal) and b) that the increment per tick has
2377		 * to fit into 24 bits.  As a result we need to use a shift of
2378		 * 19 so we can fit a value of 16 into the TIMINCA register.
2379		 */
2380		adapter->cycles.shift = IGB_82576_TSYNC_SHIFT;
2381		wr32(E1000_TIMINCA,
2382		                (1 << E1000_TIMINCA_16NS_SHIFT) |
2383		                (16 << IGB_82576_TSYNC_SHIFT));
2384
2385		/* Set registers so that rollover occurs soon to test this. */
2386		wr32(E1000_SYSTIML, 0x00000000);
2387		wr32(E1000_SYSTIMH, 0xFF800000);
2388		wrfl();
2389
2390		timecounter_init(&adapter->clock,
2391				 &adapter->cycles,
2392				 ktime_to_ns(ktime_get_real()));
2393		/*
2394		 * Synchronize our NIC clock against system wall clock. NIC
2395		 * time stamp reading requires ~3us per sample, each sample
2396		 * was pretty stable even under load => only require 10
2397		 * samples for each offset comparison.
2398		 */
2399		memset(&adapter->compare, 0, sizeof(adapter->compare));
2400		adapter->compare.source = &adapter->clock;
2401		adapter->compare.target = ktime_get_real;
2402		adapter->compare.num_samples = 10;
2403		timecompare_update(&adapter->compare, 0);
2404		break;
2405	case e1000_82575:
2406		/* 82575 does not support timesync */
2407	default:
2408		break;
2409	}
2410
2411}
2412
2413/**
2414 * igb_sw_init - Initialize general software structures (struct igb_adapter)
2415 * @adapter: board private structure to initialize
2416 *
2417 * igb_sw_init initializes the Adapter private data structure.
2418 * Fields are initialized based on PCI device information and
2419 * OS network device settings (MTU size).
2420 **/
2421static int __devinit igb_sw_init(struct igb_adapter *adapter)
2422{
2423	struct e1000_hw *hw = &adapter->hw;
2424	struct net_device *netdev = adapter->netdev;
2425	struct pci_dev *pdev = adapter->pdev;
2426
2427	pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
2428
2429	/* set default ring sizes */
2430	adapter->tx_ring_count = IGB_DEFAULT_TXD;
2431	adapter->rx_ring_count = IGB_DEFAULT_RXD;
2432
2433	/* set default ITR values */
2434	adapter->rx_itr_setting = IGB_DEFAULT_ITR;
2435	adapter->tx_itr_setting = IGB_DEFAULT_ITR;
2436
2437	/* set default work limits */
2438	adapter->tx_work_limit = IGB_DEFAULT_TX_WORK;
2439
2440	adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN +
2441				  VLAN_HLEN;
2442	adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
2443
2444	adapter->node = -1;
2445
2446	spin_lock_init(&adapter->stats64_lock);
2447#ifdef CONFIG_PCI_IOV
2448	switch (hw->mac.type) {
2449	case e1000_82576:
2450	case e1000_i350:
2451		if (max_vfs > 7) {
2452			dev_warn(&pdev->dev,
2453				 "Maximum of 7 VFs per PF, using max\n");
2454			adapter->vfs_allocated_count = 7;
2455		} else
2456			adapter->vfs_allocated_count = max_vfs;
2457		break;
2458	default:
2459		break;
2460	}
2461#endif /* CONFIG_PCI_IOV */
2462	adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
2463	/* i350 cannot do RSS and SR-IOV at the same time */
2464	if (hw->mac.type == e1000_i350 && adapter->vfs_allocated_count)
2465		adapter->rss_queues = 1;
2466
2467	/*
2468	 * if rss_queues > 4 or vfs are going to be allocated with rss_queues
2469	 * then we should combine the queues into a queue pair in order to
2470	 * conserve interrupts due to limited supply
2471	 */
2472	if ((adapter->rss_queues > 4) ||
2473	    ((adapter->rss_queues > 1) && (adapter->vfs_allocated_count > 6)))
2474		adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
2475
2476	/* This call may decrease the number of queues */
2477	if (igb_init_interrupt_scheme(adapter)) {
2478		dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
2479		return -ENOMEM;
2480	}
2481
2482	igb_probe_vfs(adapter);
2483
2484	/* Explicitly disable IRQ since the NIC can be in any state. */
2485	igb_irq_disable(adapter);
2486
2487	if (hw->mac.type == e1000_i350)
2488		adapter->flags &= ~IGB_FLAG_DMAC;
2489
2490	set_bit(__IGB_DOWN, &adapter->state);
2491	return 0;
2492}
2493
2494/**
2495 * igb_open - Called when a network interface is made active
2496 * @netdev: network interface device structure
2497 *
2498 * Returns 0 on success, negative value on failure
2499 *
2500 * The open entry point is called when a network interface is made
2501 * active by the system (IFF_UP).  At this point all resources needed
2502 * for transmit and receive operations are allocated, the interrupt
2503 * handler is registered with the OS, the watchdog timer is started,
2504 * and the stack is notified that the interface is ready.
2505 **/
2506static int igb_open(struct net_device *netdev)
2507{
2508	struct igb_adapter *adapter = netdev_priv(netdev);
2509	struct e1000_hw *hw = &adapter->hw;
2510	int err;
2511	int i;
2512
2513	/* disallow open during test */
2514	if (test_bit(__IGB_TESTING, &adapter->state))
2515		return -EBUSY;
2516
2517	netif_carrier_off(netdev);
2518
2519	/* allocate transmit descriptors */
2520	err = igb_setup_all_tx_resources(adapter);
2521	if (err)
2522		goto err_setup_tx;
2523
2524	/* allocate receive descriptors */
2525	err = igb_setup_all_rx_resources(adapter);
2526	if (err)
2527		goto err_setup_rx;
2528
2529	igb_power_up_link(adapter);
2530
2531	/* before we allocate an interrupt, we must be ready to handle it.
2532	 * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2533	 * as soon as we call pci_request_irq, so we have to setup our
2534	 * clean_rx handler before we do so.  */
2535	igb_configure(adapter);
2536
2537	err = igb_request_irq(adapter);
2538	if (err)
2539		goto err_req_irq;
2540
2541	/* From here on the code is the same as igb_up() */
2542	clear_bit(__IGB_DOWN, &adapter->state);
2543
2544	for (i = 0; i < adapter->num_q_vectors; i++)
2545		napi_enable(&(adapter->q_vector[i]->napi));
2546
2547	/* Clear any pending interrupts. */
2548	rd32(E1000_ICR);
2549
2550	igb_irq_enable(adapter);
2551
2552	/* notify VFs that reset has been completed */
2553	if (adapter->vfs_allocated_count) {
2554		u32 reg_data = rd32(E1000_CTRL_EXT);
2555		reg_data |= E1000_CTRL_EXT_PFRSTD;
2556		wr32(E1000_CTRL_EXT, reg_data);
2557	}
2558
2559	netif_tx_start_all_queues(netdev);
2560
2561	/* start the watchdog. */
2562	hw->mac.get_link_status = 1;
2563	schedule_work(&adapter->watchdog_task);
2564
2565	return 0;
2566
2567err_req_irq:
2568	igb_release_hw_control(adapter);
2569	igb_power_down_link(adapter);
2570	igb_free_all_rx_resources(adapter);
2571err_setup_rx:
2572	igb_free_all_tx_resources(adapter);
2573err_setup_tx:
2574	igb_reset(adapter);
2575
2576	return err;
2577}
2578
2579/**
2580 * igb_close - Disables a network interface
2581 * @netdev: network interface device structure
2582 *
2583 * Returns 0, this is not allowed to fail
2584 *
2585 * The close entry point is called when an interface is de-activated
2586 * by the OS.  The hardware is still under the driver's control, but
2587 * needs to be disabled.  A global MAC reset is issued to stop the
2588 * hardware, and all transmit and receive resources are freed.
2589 **/
2590static int igb_close(struct net_device *netdev)
2591{
2592	struct igb_adapter *adapter = netdev_priv(netdev);
2593
2594	WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
2595	igb_down(adapter);
2596
2597	igb_free_irq(adapter);
2598
2599	igb_free_all_tx_resources(adapter);
2600	igb_free_all_rx_resources(adapter);
2601
2602	return 0;
2603}
2604
2605/**
2606 * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2607 * @tx_ring: tx descriptor ring (for a specific queue) to setup
2608 *
2609 * Return 0 on success, negative on failure
2610 **/
2611int igb_setup_tx_resources(struct igb_ring *tx_ring)
2612{
2613	struct device *dev = tx_ring->dev;
2614	int orig_node = dev_to_node(dev);
2615	int size;
2616
2617	size = sizeof(struct igb_tx_buffer) * tx_ring->count;
2618	tx_ring->tx_buffer_info = vzalloc_node(size, tx_ring->numa_node);
2619	if (!tx_ring->tx_buffer_info)
2620		tx_ring->tx_buffer_info = vzalloc(size);
2621	if (!tx_ring->tx_buffer_info)
2622		goto err;
2623
2624	/* round up to nearest 4K */
2625	tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2626	tx_ring->size = ALIGN(tx_ring->size, 4096);
2627
2628	set_dev_node(dev, tx_ring->numa_node);
2629	tx_ring->desc = dma_alloc_coherent(dev,
2630					   tx_ring->size,
2631					   &tx_ring->dma,
2632					   GFP_KERNEL);
2633	set_dev_node(dev, orig_node);
2634	if (!tx_ring->desc)
2635		tx_ring->desc = dma_alloc_coherent(dev,
2636						   tx_ring->size,
2637						   &tx_ring->dma,
2638						   GFP_KERNEL);
2639
2640	if (!tx_ring->desc)
2641		goto err;
2642
2643	tx_ring->next_to_use = 0;
2644	tx_ring->next_to_clean = 0;
2645
2646	return 0;
2647
2648err:
2649	vfree(tx_ring->tx_buffer_info);
2650	dev_err(dev,
2651		"Unable to allocate memory for the transmit descriptor ring\n");
2652	return -ENOMEM;
2653}
2654
2655/**
2656 * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2657 *				  (Descriptors) for all queues
2658 * @adapter: board private structure
2659 *
2660 * Return 0 on success, negative on failure
2661 **/
2662static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2663{
2664	struct pci_dev *pdev = adapter->pdev;
2665	int i, err = 0;
2666
2667	for (i = 0; i < adapter->num_tx_queues; i++) {
2668		err = igb_setup_tx_resources(adapter->tx_ring[i]);
2669		if (err) {
2670			dev_err(&pdev->dev,
2671				"Allocation for Tx Queue %u failed\n", i);
2672			for (i--; i >= 0; i--)
2673				igb_free_tx_resources(adapter->tx_ring[i]);
2674			break;
2675		}
2676	}
2677
2678	return err;
2679}
2680
2681/**
2682 * igb_setup_tctl - configure the transmit control registers
2683 * @adapter: Board private structure
2684 **/
2685void igb_setup_tctl(struct igb_adapter *adapter)
2686{
2687	struct e1000_hw *hw = &adapter->hw;
2688	u32 tctl;
2689
2690	/* disable queue 0 which is enabled by default on 82575 and 82576 */
2691	wr32(E1000_TXDCTL(0), 0);
2692
2693	/* Program the Transmit Control Register */
2694	tctl = rd32(E1000_TCTL);
2695	tctl &= ~E1000_TCTL_CT;
2696	tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2697		(E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2698
2699	igb_config_collision_dist(hw);
2700
2701	/* Enable transmits */
2702	tctl |= E1000_TCTL_EN;
2703
2704	wr32(E1000_TCTL, tctl);
2705}
2706
2707/**
2708 * igb_configure_tx_ring - Configure transmit ring after Reset
2709 * @adapter: board private structure
2710 * @ring: tx ring to configure
2711 *
2712 * Configure a transmit ring after a reset.
2713 **/
2714void igb_configure_tx_ring(struct igb_adapter *adapter,
2715                           struct igb_ring *ring)
2716{
2717	struct e1000_hw *hw = &adapter->hw;
2718	u32 txdctl = 0;
2719	u64 tdba = ring->dma;
2720	int reg_idx = ring->reg_idx;
2721
2722	/* disable the queue */
2723	wr32(E1000_TXDCTL(reg_idx), 0);
2724	wrfl();
2725	mdelay(10);
2726
2727	wr32(E1000_TDLEN(reg_idx),
2728	                ring->count * sizeof(union e1000_adv_tx_desc));
2729	wr32(E1000_TDBAL(reg_idx),
2730	                tdba & 0x00000000ffffffffULL);
2731	wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2732
2733	ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2734	wr32(E1000_TDH(reg_idx), 0);
2735	writel(0, ring->tail);
2736
2737	txdctl |= IGB_TX_PTHRESH;
2738	txdctl |= IGB_TX_HTHRESH << 8;
2739	txdctl |= IGB_TX_WTHRESH << 16;
2740
2741	txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2742	wr32(E1000_TXDCTL(reg_idx), txdctl);
2743}
2744
2745/**
2746 * igb_configure_tx - Configure transmit Unit after Reset
2747 * @adapter: board private structure
2748 *
2749 * Configure the Tx unit of the MAC after a reset.
2750 **/
2751static void igb_configure_tx(struct igb_adapter *adapter)
2752{
2753	int i;
2754
2755	for (i = 0; i < adapter->num_tx_queues; i++)
2756		igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
2757}
2758
2759/**
2760 * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2761 * @rx_ring:    rx descriptor ring (for a specific queue) to setup
2762 *
2763 * Returns 0 on success, negative on failure
2764 **/
2765int igb_setup_rx_resources(struct igb_ring *rx_ring)
2766{
2767	struct device *dev = rx_ring->dev;
2768	int orig_node = dev_to_node(dev);
2769	int size, desc_len;
2770
2771	size = sizeof(struct igb_rx_buffer) * rx_ring->count;
2772	rx_ring->rx_buffer_info = vzalloc_node(size, rx_ring->numa_node);
2773	if (!rx_ring->rx_buffer_info)
2774		rx_ring->rx_buffer_info = vzalloc(size);
2775	if (!rx_ring->rx_buffer_info)
2776		goto err;
2777
2778	desc_len = sizeof(union e1000_adv_rx_desc);
2779
2780	/* Round up to nearest 4K */
2781	rx_ring->size = rx_ring->count * desc_len;
2782	rx_ring->size = ALIGN(rx_ring->size, 4096);
2783
2784	set_dev_node(dev, rx_ring->numa_node);
2785	rx_ring->desc = dma_alloc_coherent(dev,
2786					   rx_ring->size,
2787					   &rx_ring->dma,
2788					   GFP_KERNEL);
2789	set_dev_node(dev, orig_node);
2790	if (!rx_ring->desc)
2791		rx_ring->desc = dma_alloc_coherent(dev,
2792						   rx_ring->size,
2793						   &rx_ring->dma,
2794						   GFP_KERNEL);
2795
2796	if (!rx_ring->desc)
2797		goto err;
2798
2799	rx_ring->next_to_clean = 0;
2800	rx_ring->next_to_use = 0;
2801
2802	return 0;
2803
2804err:
2805	vfree(rx_ring->rx_buffer_info);
2806	rx_ring->rx_buffer_info = NULL;
2807	dev_err(dev, "Unable to allocate memory for the receive descriptor"
2808		" ring\n");
2809	return -ENOMEM;
2810}
2811
2812/**
2813 * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2814 *				  (Descriptors) for all queues
2815 * @adapter: board private structure
2816 *
2817 * Return 0 on success, negative on failure
2818 **/
2819static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2820{
2821	struct pci_dev *pdev = adapter->pdev;
2822	int i, err = 0;
2823
2824	for (i = 0; i < adapter->num_rx_queues; i++) {
2825		err = igb_setup_rx_resources(adapter->rx_ring[i]);
2826		if (err) {
2827			dev_err(&pdev->dev,
2828				"Allocation for Rx Queue %u failed\n", i);
2829			for (i--; i >= 0; i--)
2830				igb_free_rx_resources(adapter->rx_ring[i]);
2831			break;
2832		}
2833	}
2834
2835	return err;
2836}
2837
2838/**
2839 * igb_setup_mrqc - configure the multiple receive queue control registers
2840 * @adapter: Board private structure
2841 **/
2842static void igb_setup_mrqc(struct igb_adapter *adapter)
2843{
2844	struct e1000_hw *hw = &adapter->hw;
2845	u32 mrqc, rxcsum;
2846	u32 j, num_rx_queues, shift = 0, shift2 = 0;
2847	union e1000_reta {
2848		u32 dword;
2849		u8  bytes[4];
2850	} reta;
2851	static const u8 rsshash[40] = {
2852		0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2853		0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2854		0xae, 0x7b, 0x30, 0xb4,	0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2855		0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2856
2857	/* Fill out hash function seeds */
2858	for (j = 0; j < 10; j++) {
2859		u32 rsskey = rsshash[(j * 4)];
2860		rsskey |= rsshash[(j * 4) + 1] << 8;
2861		rsskey |= rsshash[(j * 4) + 2] << 16;
2862		rsskey |= rsshash[(j * 4) + 3] << 24;
2863		array_wr32(E1000_RSSRK(0), j, rsskey);
2864	}
2865
2866	num_rx_queues = adapter->rss_queues;
2867
2868	if (adapter->vfs_allocated_count) {
2869		/* 82575 and 82576 supports 2 RSS queues for VMDq */
2870		switch (hw->mac.type) {
2871		case e1000_i350:
2872		case e1000_82580:
2873			num_rx_queues = 1;
2874			shift = 0;
2875			break;
2876		case e1000_82576:
2877			shift = 3;
2878			num_rx_queues = 2;
2879			break;
2880		case e1000_82575:
2881			shift = 2;
2882			shift2 = 6;
2883		default:
2884			break;
2885		}
2886	} else {
2887		if (hw->mac.type == e1000_82575)
2888			shift = 6;
2889	}
2890
2891	for (j = 0; j < (32 * 4); j++) {
2892		reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2893		if (shift2)
2894			reta.bytes[j & 3] |= num_rx_queues << shift2;
2895		if ((j & 3) == 3)
2896			wr32(E1000_RETA(j >> 2), reta.dword);
2897	}
2898
2899	/*
2900	 * Disable raw packet checksumming so that RSS hash is placed in
2901	 * descriptor on writeback.  No need to enable TCP/UDP/IP checksum
2902	 * offloads as they are enabled by default
2903	 */
2904	rxcsum = rd32(E1000_RXCSUM);
2905	rxcsum |= E1000_RXCSUM_PCSD;
2906
2907	if (adapter->hw.mac.type >= e1000_82576)
2908		/* Enable Receive Checksum Offload for SCTP */
2909		rxcsum |= E1000_RXCSUM_CRCOFL;
2910
2911	/* Don't need to set TUOFL or IPOFL, they default to 1 */
2912	wr32(E1000_RXCSUM, rxcsum);
2913
2914	/* If VMDq is enabled then we set the appropriate mode for that, else
2915	 * we default to RSS so that an RSS hash is calculated per packet even
2916	 * if we are only using one queue */
2917	if (adapter->vfs_allocated_count) {
2918		if (hw->mac.type > e1000_82575) {
2919			/* Set the default pool for the PF's first queue */
2920			u32 vtctl = rd32(E1000_VT_CTL);
2921			vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2922				   E1000_VT_CTL_DISABLE_DEF_POOL);
2923			vtctl |= adapter->vfs_allocated_count <<
2924				E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2925			wr32(E1000_VT_CTL, vtctl);
2926		}
2927		if (adapter->rss_queues > 1)
2928			mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2929		else
2930			mrqc = E1000_MRQC_ENABLE_VMDQ;
2931	} else {
2932		mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2933	}
2934	igb_vmm_control(adapter);
2935
2936	/*
2937	 * Generate RSS hash based on TCP port numbers and/or
2938	 * IPv4/v6 src and dst addresses since UDP cannot be
2939	 * hashed reliably due to IP fragmentation
2940	 */
2941	mrqc |= E1000_MRQC_RSS_FIELD_IPV4 |
2942		E1000_MRQC_RSS_FIELD_IPV4_TCP |
2943		E1000_MRQC_RSS_FIELD_IPV6 |
2944		E1000_MRQC_RSS_FIELD_IPV6_TCP |
2945		E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
2946
2947	wr32(E1000_MRQC, mrqc);
2948}
2949
2950/**
2951 * igb_setup_rctl - configure the receive control registers
2952 * @adapter: Board private structure
2953 **/
2954void igb_setup_rctl(struct igb_adapter *adapter)
2955{
2956	struct e1000_hw *hw = &adapter->hw;
2957	u32 rctl;
2958
2959	rctl = rd32(E1000_RCTL);
2960
2961	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2962	rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2963
2964	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2965		(hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2966
2967	/*
2968	 * enable stripping of CRC. It's unlikely this will break BMC
2969	 * redirection as it did with e1000. Newer features require
2970	 * that the HW strips the CRC.
2971	 */
2972	rctl |= E1000_RCTL_SECRC;
2973
2974	/* disable store bad packets and clear size bits. */
2975	rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
2976
2977	/* enable LPE to prevent packets larger than max_frame_size */
2978	rctl |= E1000_RCTL_LPE;
2979
2980	/* disable queue 0 to prevent tail write w/o re-config */
2981	wr32(E1000_RXDCTL(0), 0);
2982
2983	/* Attention!!!  For SR-IOV PF driver operations you must enable
2984	 * queue drop for all VF and PF queues to prevent head of line blocking
2985	 * if an un-trusted VF does not provide descriptors to hardware.
2986	 */
2987	if (adapter->vfs_allocated_count) {
2988		/* set all queue drop enable bits */
2989		wr32(E1000_QDE, ALL_QUEUES);
2990	}
2991
2992	wr32(E1000_RCTL, rctl);
2993}
2994
2995static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
2996                                   int vfn)
2997{
2998	struct e1000_hw *hw = &adapter->hw;
2999	u32 vmolr;
3000
3001	/* if it isn't the PF check to see if VFs are enabled and
3002	 * increase the size to support vlan tags */
3003	if (vfn < adapter->vfs_allocated_count &&
3004	    adapter->vf_data[vfn].vlans_enabled)
3005		size += VLAN_TAG_SIZE;
3006
3007	vmolr = rd32(E1000_VMOLR(vfn));
3008	vmolr &= ~E1000_VMOLR_RLPML_MASK;
3009	vmolr |= size | E1000_VMOLR_LPE;
3010	wr32(E1000_VMOLR(vfn), vmolr);
3011
3012	return 0;
3013}
3014
3015/**
3016 * igb_rlpml_set - set maximum receive packet size
3017 * @adapter: board private structure
3018 *
3019 * Configure maximum receivable packet size.
3020 **/
3021static void igb_rlpml_set(struct igb_adapter *adapter)
3022{
3023	u32 max_frame_size = adapter->max_frame_size;
3024	struct e1000_hw *hw = &adapter->hw;
3025	u16 pf_id = adapter->vfs_allocated_count;
3026
3027	if (pf_id) {
3028		igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
3029		/*
3030		 * If we're in VMDQ or SR-IOV mode, then set global RLPML
3031		 * to our max jumbo frame size, in case we need to enable
3032		 * jumbo frames on one of the rings later.
3033		 * This will not pass over-length frames into the default
3034		 * queue because it's gated by the VMOLR.RLPML.
3035		 */
3036		max_frame_size = MAX_JUMBO_FRAME_SIZE;
3037	}
3038
3039	wr32(E1000_RLPML, max_frame_size);
3040}
3041
3042static inline void igb_set_vmolr(struct igb_adapter *adapter,
3043				 int vfn, bool aupe)
3044{
3045	struct e1000_hw *hw = &adapter->hw;
3046	u32 vmolr;
3047
3048	/*
3049	 * This register exists only on 82576 and newer so if we are older then
3050	 * we should exit and do nothing
3051	 */
3052	if (hw->mac.type < e1000_82576)
3053		return;
3054
3055	vmolr = rd32(E1000_VMOLR(vfn));
3056	vmolr |= E1000_VMOLR_STRVLAN;      /* Strip vlan tags */
3057	if (aupe)
3058		vmolr |= E1000_VMOLR_AUPE;        /* Accept untagged packets */
3059	else
3060		vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
3061
3062	/* clear all bits that might not be set */
3063	vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
3064
3065	if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
3066		vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
3067	/*
3068	 * for VMDq only allow the VFs and pool 0 to accept broadcast and
3069	 * multicast packets
3070	 */
3071	if (vfn <= adapter->vfs_allocated_count)
3072		vmolr |= E1000_VMOLR_BAM;	   /* Accept broadcast */
3073
3074	wr32(E1000_VMOLR(vfn), vmolr);
3075}
3076
3077/**
3078 * igb_configure_rx_ring - Configure a receive ring after Reset
3079 * @adapter: board private structure
3080 * @ring: receive ring to be configured
3081 *
3082 * Configure the Rx unit of the MAC after a reset.
3083 **/
3084void igb_configure_rx_ring(struct igb_adapter *adapter,
3085                           struct igb_ring *ring)
3086{
3087	struct e1000_hw *hw = &adapter->hw;
3088	u64 rdba = ring->dma;
3089	int reg_idx = ring->reg_idx;
3090	u32 srrctl = 0, rxdctl = 0;
3091
3092	/* disable the queue */
3093	wr32(E1000_RXDCTL(reg_idx), 0);
3094
3095	/* Set DMA base address registers */
3096	wr32(E1000_RDBAL(reg_idx),
3097	     rdba & 0x00000000ffffffffULL);
3098	wr32(E1000_RDBAH(reg_idx), rdba >> 32);
3099	wr32(E1000_RDLEN(reg_idx),
3100	               ring->count * sizeof(union e1000_adv_rx_desc));
3101
3102	/* initialize head and tail */
3103	ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
3104	wr32(E1000_RDH(reg_idx), 0);
3105	writel(0, ring->tail);
3106
3107	/* set descriptor configuration */
3108	srrctl = IGB_RX_HDR_LEN << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
3109#if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
3110	srrctl |= IGB_RXBUFFER_16384 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3111#else
3112	srrctl |= (PAGE_SIZE / 2) >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3113#endif
3114	srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3115	if (hw->mac.type == e1000_82580)
3116		srrctl |= E1000_SRRCTL_TIMESTAMP;
3117	/* Only set Drop Enable if we are supporting multiple queues */
3118	if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
3119		srrctl |= E1000_SRRCTL_DROP_EN;
3120
3121	wr32(E1000_SRRCTL(reg_idx), srrctl);
3122
3123	/* set filtering for VMDQ pools */
3124	igb_set_vmolr(adapter, reg_idx & 0x7, true);
3125
3126	rxdctl |= IGB_RX_PTHRESH;
3127	rxdctl |= IGB_RX_HTHRESH << 8;
3128	rxdctl |= IGB_RX_WTHRESH << 16;
3129
3130	/* enable receive descriptor fetching */
3131	rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
3132	wr32(E1000_RXDCTL(reg_idx), rxdctl);
3133}
3134
3135/**
3136 * igb_configure_rx - Configure receive Unit after Reset
3137 * @adapter: board private structure
3138 *
3139 * Configure the Rx unit of the MAC after a reset.
3140 **/
3141static void igb_configure_rx(struct igb_adapter *adapter)
3142{
3143	int i;
3144
3145	/* set UTA to appropriate mode */
3146	igb_set_uta(adapter);
3147
3148	/* set the correct pool for the PF default MAC address in entry 0 */
3149	igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
3150	                 adapter->vfs_allocated_count);
3151
3152	/* Setup the HW Rx Head and Tail Descriptor Pointers and
3153	 * the Base and Length of the Rx Descriptor Ring */
3154	for (i = 0; i < adapter->num_rx_queues; i++)
3155		igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
3156}
3157
3158/**
3159 * igb_free_tx_resources - Free Tx Resources per Queue
3160 * @tx_ring: Tx descriptor ring for a specific queue
3161 *
3162 * Free all transmit software resources
3163 **/
3164void igb_free_tx_resources(struct igb_ring *tx_ring)
3165{
3166	igb_clean_tx_ring(tx_ring);
3167
3168	vfree(tx_ring->tx_buffer_info);
3169	tx_ring->tx_buffer_info = NULL;
3170
3171	/* if not set, then don't free */
3172	if (!tx_ring->desc)
3173		return;
3174
3175	dma_free_coherent(tx_ring->dev, tx_ring->size,
3176			  tx_ring->desc, tx_ring->dma);
3177
3178	tx_ring->desc = NULL;
3179}
3180
3181/**
3182 * igb_free_all_tx_resources - Free Tx Resources for All Queues
3183 * @adapter: board private structure
3184 *
3185 * Free all transmit software resources
3186 **/
3187static void igb_free_all_tx_resources(struct igb_adapter *adapter)
3188{
3189	int i;
3190
3191	for (i = 0; i < adapter->num_tx_queues; i++)
3192		igb_free_tx_resources(adapter->tx_ring[i]);
3193}
3194
3195void igb_unmap_and_free_tx_resource(struct igb_ring *ring,
3196				    struct igb_tx_buffer *tx_buffer)
3197{
3198	if (tx_buffer->skb) {
3199		dev_kfree_skb_any(tx_buffer->skb);
3200		if (tx_buffer->dma)
3201			dma_unmap_single(ring->dev,
3202					 tx_buffer->dma,
3203					 tx_buffer->length,
3204					 DMA_TO_DEVICE);
3205	} else if (tx_buffer->dma) {
3206		dma_unmap_page(ring->dev,
3207			       tx_buffer->dma,
3208			       tx_buffer->length,
3209			       DMA_TO_DEVICE);
3210	}
3211	tx_buffer->next_to_watch = NULL;
3212	tx_buffer->skb = NULL;
3213	tx_buffer->dma = 0;
3214	/* buffer_info must be completely set up in the transmit path */
3215}
3216
3217/**
3218 * igb_clean_tx_ring - Free Tx Buffers
3219 * @tx_ring: ring to be cleaned
3220 **/
3221static void igb_clean_tx_ring(struct igb_ring *tx_ring)
3222{
3223	struct igb_tx_buffer *buffer_info;
3224	unsigned long size;
3225	u16 i;
3226
3227	if (!tx_ring->tx_buffer_info)
3228		return;
3229	/* Free all the Tx ring sk_buffs */
3230
3231	for (i = 0; i < tx_ring->count; i++) {
3232		buffer_info = &tx_ring->tx_buffer_info[i];
3233		igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3234	}
3235
3236	size = sizeof(struct igb_tx_buffer) * tx_ring->count;
3237	memset(tx_ring->tx_buffer_info, 0, size);
3238
3239	/* Zero out the descriptor ring */
3240	memset(tx_ring->desc, 0, tx_ring->size);
3241
3242	tx_ring->next_to_use = 0;
3243	tx_ring->next_to_clean = 0;
3244}
3245
3246/**
3247 * igb_clean_all_tx_rings - Free Tx Buffers for all queues
3248 * @adapter: board private structure
3249 **/
3250static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
3251{
3252	int i;
3253
3254	for (i = 0; i < adapter->num_tx_queues; i++)
3255		igb_clean_tx_ring(adapter->tx_ring[i]);
3256}
3257
3258/**
3259 * igb_free_rx_resources - Free Rx Resources
3260 * @rx_ring: ring to clean the resources from
3261 *
3262 * Free all receive software resources
3263 **/
3264void igb_free_rx_resources(struct igb_ring *rx_ring)
3265{
3266	igb_clean_rx_ring(rx_ring);
3267
3268	vfree(rx_ring->rx_buffer_info);
3269	rx_ring->rx_buffer_info = NULL;
3270
3271	/* if not set, then don't free */
3272	if (!rx_ring->desc)
3273		return;
3274
3275	dma_free_coherent(rx_ring->dev, rx_ring->size,
3276			  rx_ring->desc, rx_ring->dma);
3277
3278	rx_ring->desc = NULL;
3279}
3280
3281/**
3282 * igb_free_all_rx_resources - Free Rx Resources for All Queues
3283 * @adapter: board private structure
3284 *
3285 * Free all receive software resources
3286 **/
3287static void igb_free_all_rx_resources(struct igb_adapter *adapter)
3288{
3289	int i;
3290
3291	for (i = 0; i < adapter->num_rx_queues; i++)
3292		igb_free_rx_resources(adapter->rx_ring[i]);
3293}
3294
3295/**
3296 * igb_clean_rx_ring - Free Rx Buffers per Queue
3297 * @rx_ring: ring to free buffers from
3298 **/
3299static void igb_clean_rx_ring(struct igb_ring *rx_ring)
3300{
3301	unsigned long size;
3302	u16 i;
3303
3304	if (!rx_ring->rx_buffer_info)
3305		return;
3306
3307	/* Free all the Rx ring sk_buffs */
3308	for (i = 0; i < rx_ring->count; i++) {
3309		struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
3310		if (buffer_info->dma) {
3311			dma_unmap_single(rx_ring->dev,
3312			                 buffer_info->dma,
3313					 IGB_RX_HDR_LEN,
3314					 DMA_FROM_DEVICE);
3315			buffer_info->dma = 0;
3316		}
3317
3318		if (buffer_info->skb) {
3319			dev_kfree_skb(buffer_info->skb);
3320			buffer_info->skb = NULL;
3321		}
3322		if (buffer_info->page_dma) {
3323			dma_unmap_page(rx_ring->dev,
3324			               buffer_info->page_dma,
3325				       PAGE_SIZE / 2,
3326				       DMA_FROM_DEVICE);
3327			buffer_info->page_dma = 0;
3328		}
3329		if (buffer_info->page) {
3330			put_page(buffer_info->page);
3331			buffer_info->page = NULL;
3332			buffer_info->page_offset = 0;
3333		}
3334	}
3335
3336	size = sizeof(struct igb_rx_buffer) * rx_ring->count;
3337	memset(rx_ring->rx_buffer_info, 0, size);
3338
3339	/* Zero out the descriptor ring */
3340	memset(rx_ring->desc, 0, rx_ring->size);
3341
3342	rx_ring->next_to_clean = 0;
3343	rx_ring->next_to_use = 0;
3344}
3345
3346/**
3347 * igb_clean_all_rx_rings - Free Rx Buffers for all queues
3348 * @adapter: board private structure
3349 **/
3350static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
3351{
3352	int i;
3353
3354	for (i = 0; i < adapter->num_rx_queues; i++)
3355		igb_clean_rx_ring(adapter->rx_ring[i]);
3356}
3357
3358/**
3359 * igb_set_mac - Change the Ethernet Address of the NIC
3360 * @netdev: network interface device structure
3361 * @p: pointer to an address structure
3362 *
3363 * Returns 0 on success, negative on failure
3364 **/
3365static int igb_set_mac(struct net_device *netdev, void *p)
3366{
3367	struct igb_adapter *adapter = netdev_priv(netdev);
3368	struct e1000_hw *hw = &adapter->hw;
3369	struct sockaddr *addr = p;
3370
3371	if (!is_valid_ether_addr(addr->sa_data))
3372		return -EADDRNOTAVAIL;
3373
3374	memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
3375	memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
3376
3377	/* set the correct pool for the new PF MAC address in entry 0 */
3378	igb_rar_set_qsel(adapter, hw->mac.addr, 0,
3379	                 adapter->vfs_allocated_count);
3380
3381	return 0;
3382}
3383
3384/**
3385 * igb_write_mc_addr_list - write multicast addresses to MTA
3386 * @netdev: network interface device structure
3387 *
3388 * Writes multicast address list to the MTA hash table.
3389 * Returns: -ENOMEM on failure
3390 *                0 on no addresses written
3391 *                X on writing X addresses to MTA
3392 **/
3393static int igb_write_mc_addr_list(struct net_device *netdev)
3394{
3395	struct igb_adapter *adapter = netdev_priv(netdev);
3396	struct e1000_hw *hw = &adapter->hw;
3397	struct netdev_hw_addr *ha;
3398	u8  *mta_list;
3399	int i;
3400
3401	if (netdev_mc_empty(netdev)) {
3402		/* nothing to program, so clear mc list */
3403		igb_update_mc_addr_list(hw, NULL, 0);
3404		igb_restore_vf_multicasts(adapter);
3405		return 0;
3406	}
3407
3408	mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
3409	if (!mta_list)
3410		return -ENOMEM;
3411
3412	/* The shared function expects a packed array of only addresses. */
3413	i = 0;
3414	netdev_for_each_mc_addr(ha, netdev)
3415		memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
3416
3417	igb_update_mc_addr_list(hw, mta_list, i);
3418	kfree(mta_list);
3419
3420	return netdev_mc_count(netdev);
3421}
3422
3423/**
3424 * igb_write_uc_addr_list - write unicast addresses to RAR table
3425 * @netdev: network interface device structure
3426 *
3427 * Writes unicast address list to the RAR table.
3428 * Returns: -ENOMEM on failure/insufficient address space
3429 *                0 on no addresses written
3430 *                X on writing X addresses to the RAR table
3431 **/
3432static int igb_write_uc_addr_list(struct net_device *netdev)
3433{
3434	struct igb_adapter *adapter = netdev_priv(netdev);
3435	struct e1000_hw *hw = &adapter->hw;
3436	unsigned int vfn = adapter->vfs_allocated_count;
3437	unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
3438	int count = 0;
3439
3440	/* return ENOMEM indicating insufficient memory for addresses */
3441	if (netdev_uc_count(netdev) > rar_entries)
3442		return -ENOMEM;
3443
3444	if (!netdev_uc_empty(netdev) && rar_entries) {
3445		struct netdev_hw_addr *ha;
3446
3447		netdev_for_each_uc_addr(ha, netdev) {
3448			if (!rar_entries)
3449				break;
3450			igb_rar_set_qsel(adapter, ha->addr,
3451			                 rar_entries--,
3452			                 vfn);
3453			count++;
3454		}
3455	}
3456	/* write the addresses in reverse order to avoid write combining */
3457	for (; rar_entries > 0 ; rar_entries--) {
3458		wr32(E1000_RAH(rar_entries), 0);
3459		wr32(E1000_RAL(rar_entries), 0);
3460	}
3461	wrfl();
3462
3463	return count;
3464}
3465
3466/**
3467 * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
3468 * @netdev: network interface device structure
3469 *
3470 * The set_rx_mode entry point is called whenever the unicast or multicast
3471 * address lists or the network interface flags are updated.  This routine is
3472 * responsible for configuring the hardware for proper unicast, multicast,
3473 * promiscuous mode, and all-multi behavior.
3474 **/
3475static void igb_set_rx_mode(struct net_device *netdev)
3476{
3477	struct igb_adapter *adapter = netdev_priv(netdev);
3478	struct e1000_hw *hw = &adapter->hw;
3479	unsigned int vfn = adapter->vfs_allocated_count;
3480	u32 rctl, vmolr = 0;
3481	int count;
3482
3483	/* Check for Promiscuous and All Multicast modes */
3484	rctl = rd32(E1000_RCTL);
3485
3486	/* clear the effected bits */
3487	rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
3488
3489	if (netdev->flags & IFF_PROMISC) {
3490		rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
3491		vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
3492	} else {
3493		if (netdev->flags & IFF_ALLMULTI) {
3494			rctl |= E1000_RCTL_MPE;
3495			vmolr |= E1000_VMOLR_MPME;
3496		} else {
3497			/*
3498			 * Write addresses to the MTA, if the attempt fails
3499			 * then we should just turn on promiscuous mode so
3500			 * that we can at least receive multicast traffic
3501			 */
3502			count = igb_write_mc_addr_list(netdev);
3503			if (count < 0) {
3504				rctl |= E1000_RCTL_MPE;
3505				vmolr |= E1000_VMOLR_MPME;
3506			} else if (count) {
3507				vmolr |= E1000_VMOLR_ROMPE;
3508			}
3509		}
3510		/*
3511		 * Write addresses to available RAR registers, if there is not
3512		 * sufficient space to store all the addresses then enable
3513		 * unicast promiscuous mode
3514		 */
3515		count = igb_write_uc_addr_list(netdev);
3516		if (count < 0) {
3517			rctl |= E1000_RCTL_UPE;
3518			vmolr |= E1000_VMOLR_ROPE;
3519		}
3520		rctl |= E1000_RCTL_VFE;
3521	}
3522	wr32(E1000_RCTL, rctl);
3523
3524	/*
3525	 * In order to support SR-IOV and eventually VMDq it is necessary to set
3526	 * the VMOLR to enable the appropriate modes.  Without this workaround
3527	 * we will have issues with VLAN tag stripping not being done for frames
3528	 * that are only arriving because we are the default pool
3529	 */
3530	if (hw->mac.type < e1000_82576)
3531		return;
3532
3533	vmolr |= rd32(E1000_VMOLR(vfn)) &
3534	         ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
3535	wr32(E1000_VMOLR(vfn), vmolr);
3536	igb_restore_vf_multicasts(adapter);
3537}
3538
3539static void igb_check_wvbr(struct igb_adapter *adapter)
3540{
3541	struct e1000_hw *hw = &adapter->hw;
3542	u32 wvbr = 0;
3543
3544	switch (hw->mac.type) {
3545	case e1000_82576:
3546	case e1000_i350:
3547		if (!(wvbr = rd32(E1000_WVBR)))
3548			return;
3549		break;
3550	default:
3551		break;
3552	}
3553
3554	adapter->wvbr |= wvbr;
3555}
3556
3557#define IGB_STAGGERED_QUEUE_OFFSET 8
3558
3559static void igb_spoof_check(struct igb_adapter *adapter)
3560{
3561	int j;
3562
3563	if (!adapter->wvbr)
3564		return;
3565
3566	for(j = 0; j < adapter->vfs_allocated_count; j++) {
3567		if (adapter->wvbr & (1 << j) ||
3568		    adapter->wvbr & (1 << (j + IGB_STAGGERED_QUEUE_OFFSET))) {
3569			dev_warn(&adapter->pdev->dev,
3570				"Spoof event(s) detected on VF %d\n", j);
3571			adapter->wvbr &=
3572				~((1 << j) |
3573				  (1 << (j + IGB_STAGGERED_QUEUE_OFFSET)));
3574		}
3575	}
3576}
3577
3578/* Need to wait a few seconds after link up to get diagnostic information from
3579 * the phy */
3580static void igb_update_phy_info(unsigned long data)
3581{
3582	struct igb_adapter *adapter = (struct igb_adapter *) data;
3583	igb_get_phy_info(&adapter->hw);
3584}
3585
3586/**
3587 * igb_has_link - check shared code for link and determine up/down
3588 * @adapter: pointer to driver private info
3589 **/
3590bool igb_has_link(struct igb_adapter *adapter)
3591{
3592	struct e1000_hw *hw = &adapter->hw;
3593	bool link_active = false;
3594	s32 ret_val = 0;
3595
3596	/* get_link_status is set on LSC (link status) interrupt or
3597	 * rx sequence error interrupt.  get_link_status will stay
3598	 * false until the e1000_check_for_link establishes link
3599	 * for copper adapters ONLY
3600	 */
3601	switch (hw->phy.media_type) {
3602	case e1000_media_type_copper:
3603		if (hw->mac.get_link_status) {
3604			ret_val = hw->mac.ops.check_for_link(hw);
3605			link_active = !hw->mac.get_link_status;
3606		} else {
3607			link_active = true;
3608		}
3609		break;
3610	case e1000_media_type_internal_serdes:
3611		ret_val = hw->mac.ops.check_for_link(hw);
3612		link_active = hw->mac.serdes_has_link;
3613		break;
3614	default:
3615	case e1000_media_type_unknown:
3616		break;
3617	}
3618
3619	return link_active;
3620}
3621
3622static bool igb_thermal_sensor_event(struct e1000_hw *hw, u32 event)
3623{
3624	bool ret = false;
3625	u32 ctrl_ext, thstat;
3626
3627	/* check for thermal sensor event on i350, copper only */
3628	if (hw->mac.type == e1000_i350) {
3629		thstat = rd32(E1000_THSTAT);
3630		ctrl_ext = rd32(E1000_CTRL_EXT);
3631
3632		if ((hw->phy.media_type == e1000_media_type_copper) &&
3633		    !(ctrl_ext & E1000_CTRL_EXT_LINK_MODE_SGMII)) {
3634			ret = !!(thstat & event);
3635		}
3636	}
3637
3638	return ret;
3639}
3640
3641/**
3642 * igb_watchdog - Timer Call-back
3643 * @data: pointer to adapter cast into an unsigned long
3644 **/
3645static void igb_watchdog(unsigned long data)
3646{
3647	struct igb_adapter *adapter = (struct igb_adapter *)data;
3648	/* Do the rest outside of interrupt context */
3649	schedule_work(&adapter->watchdog_task);
3650}
3651
3652static void igb_watchdog_task(struct work_struct *work)
3653{
3654	struct igb_adapter *adapter = container_of(work,
3655	                                           struct igb_adapter,
3656                                                   watchdog_task);
3657	struct e1000_hw *hw = &adapter->hw;
3658	struct net_device *netdev = adapter->netdev;
3659	u32 link;
3660	int i;
3661
3662	link = igb_has_link(adapter);
3663	if (link) {
3664		if (!netif_carrier_ok(netdev)) {
3665			u32 ctrl;
3666			hw->mac.ops.get_speed_and_duplex(hw,
3667			                                 &adapter->link_speed,
3668			                                 &adapter->link_duplex);
3669
3670			ctrl = rd32(E1000_CTRL);
3671			/* Links status message must follow this format */
3672			printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s, "
3673				 "Flow Control: %s\n",
3674			       netdev->name,
3675			       adapter->link_speed,
3676			       adapter->link_duplex == FULL_DUPLEX ?
3677				 "Full Duplex" : "Half Duplex",
3678			       ((ctrl & E1000_CTRL_TFCE) &&
3679			        (ctrl & E1000_CTRL_RFCE)) ? "RX/TX" :
3680			       ((ctrl & E1000_CTRL_RFCE) ?  "RX" :
3681			       ((ctrl & E1000_CTRL_TFCE) ?  "TX" : "None")));
3682
3683			/* check for thermal sensor event */
3684			if (igb_thermal_sensor_event(hw, E1000_THSTAT_LINK_THROTTLE)) {
3685				printk(KERN_INFO "igb: %s The network adapter "
3686						 "link speed was downshifted "
3687						 "because it overheated.\n",
3688						 netdev->name);
3689			}
3690
3691			/* adjust timeout factor according to speed/duplex */
3692			adapter->tx_timeout_factor = 1;
3693			switch (adapter->link_speed) {
3694			case SPEED_10:
3695				adapter->tx_timeout_factor = 14;
3696				break;
3697			case SPEED_100:
3698				/* maybe add some timeout factor ? */
3699				break;
3700			}
3701
3702			netif_carrier_on(netdev);
3703
3704			igb_ping_all_vfs(adapter);
3705			igb_check_vf_rate_limit(adapter);
3706
3707			/* link state has changed, schedule phy info update */
3708			if (!test_bit(__IGB_DOWN, &adapter->state))
3709				mod_timer(&adapter->phy_info_timer,
3710					  round_jiffies(jiffies + 2 * HZ));
3711		}
3712	} else {
3713		if (netif_carrier_ok(netdev)) {
3714			adapter->link_speed = 0;
3715			adapter->link_duplex = 0;
3716
3717			/* check for thermal sensor event */
3718			if (igb_thermal_sensor_event(hw, E1000_THSTAT_PWR_DOWN)) {
3719				printk(KERN_ERR "igb: %s The network adapter "
3720						"was stopped because it "
3721						"overheated.\n",
3722						netdev->name);
3723			}
3724
3725			/* Links status message must follow this format */
3726			printk(KERN_INFO "igb: %s NIC Link is Down\n",
3727			       netdev->name);
3728			netif_carrier_off(netdev);
3729
3730			igb_ping_all_vfs(adapter);
3731
3732			/* link state has changed, schedule phy info update */
3733			if (!test_bit(__IGB_DOWN, &adapter->state))
3734				mod_timer(&adapter->phy_info_timer,
3735					  round_jiffies(jiffies + 2 * HZ));
3736		}
3737	}
3738
3739	spin_lock(&adapter->stats64_lock);
3740	igb_update_stats(adapter, &adapter->stats64);
3741	spin_unlock(&adapter->stats64_lock);
3742
3743	for (i = 0; i < adapter->num_tx_queues; i++) {
3744		struct igb_ring *tx_ring = adapter->tx_ring[i];
3745		if (!netif_carrier_ok(netdev)) {
3746			/* We've lost link, so the controller stops DMA,
3747			 * but we've got queued Tx work that's never going
3748			 * to get done, so reset controller to flush Tx.
3749			 * (Do the reset outside of interrupt context). */
3750			if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3751				adapter->tx_timeout_count++;
3752				schedule_work(&adapter->reset_task);
3753				/* return immediately since reset is imminent */
3754				return;
3755			}
3756		}
3757
3758		/* Force detection of hung controller every watchdog period */
3759		set_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
3760	}
3761
3762	/* Cause software interrupt to ensure rx ring is cleaned */
3763	if (adapter->msix_entries) {
3764		u32 eics = 0;
3765		for (i = 0; i < adapter->num_q_vectors; i++)
3766			eics |= adapter->q_vector[i]->eims_value;
3767		wr32(E1000_EICS, eics);
3768	} else {
3769		wr32(E1000_ICS, E1000_ICS_RXDMT0);
3770	}
3771
3772	igb_spoof_check(adapter);
3773
3774	/* Reset the timer */
3775	if (!test_bit(__IGB_DOWN, &adapter->state))
3776		mod_timer(&adapter->watchdog_timer,
3777			  round_jiffies(jiffies + 2 * HZ));
3778}
3779
3780enum latency_range {
3781	lowest_latency = 0,
3782	low_latency = 1,
3783	bulk_latency = 2,
3784	latency_invalid = 255
3785};
3786
3787/**
3788 * igb_update_ring_itr - update the dynamic ITR value based on packet size
3789 *
3790 *      Stores a new ITR value based on strictly on packet size.  This
3791 *      algorithm is less sophisticated than that used in igb_update_itr,
3792 *      due to the difficulty of synchronizing statistics across multiple
3793 *      receive rings.  The divisors and thresholds used by this function
3794 *      were determined based on theoretical maximum wire speed and testing
3795 *      data, in order to minimize response time while increasing bulk
3796 *      throughput.
3797 *      This functionality is controlled by the InterruptThrottleRate module
3798 *      parameter (see igb_param.c)
3799 *      NOTE:  This function is called only when operating in a multiqueue
3800 *             receive environment.
3801 * @q_vector: pointer to q_vector
3802 **/
3803static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3804{
3805	int new_val = q_vector->itr_val;
3806	int avg_wire_size = 0;
3807	struct igb_adapter *adapter = q_vector->adapter;
3808	unsigned int packets;
3809
3810	/* For non-gigabit speeds, just fix the interrupt rate at 4000
3811	 * ints/sec - ITR timer value of 120 ticks.
3812	 */
3813	if (adapter->link_speed != SPEED_1000) {
3814		new_val = IGB_4K_ITR;
3815		goto set_itr_val;
3816	}
3817
3818	packets = q_vector->rx.total_packets;
3819	if (packets)
3820		avg_wire_size = q_vector->rx.total_bytes / packets;
3821
3822	packets = q_vector->tx.total_packets;
3823	if (packets)
3824		avg_wire_size = max_t(u32, avg_wire_size,
3825				      q_vector->tx.total_bytes / packets);
3826
3827	/* if avg_wire_size isn't set no work was done */
3828	if (!avg_wire_size)
3829		goto clear_counts;
3830
3831	/* Add 24 bytes to size to account for CRC, preamble, and gap */
3832	avg_wire_size += 24;
3833
3834	/* Don't starve jumbo frames */
3835	avg_wire_size = min(avg_wire_size, 3000);
3836
3837	/* Give a little boost to mid-size frames */
3838	if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3839		new_val = avg_wire_size / 3;
3840	else
3841		new_val = avg_wire_size / 2;
3842
3843	/* conservative mode (itr 3) eliminates the lowest_latency setting */
3844	if (new_val < IGB_20K_ITR &&
3845	    ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
3846	     (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
3847		new_val = IGB_20K_ITR;
3848
3849set_itr_val:
3850	if (new_val != q_vector->itr_val) {
3851		q_vector->itr_val = new_val;
3852		q_vector->set_itr = 1;
3853	}
3854clear_counts:
3855	q_vector->rx.total_bytes = 0;
3856	q_vector->rx.total_packets = 0;
3857	q_vector->tx.total_bytes = 0;
3858	q_vector->tx.total_packets = 0;
3859}
3860
3861/**
3862 * igb_update_itr - update the dynamic ITR value based on statistics
3863 *      Stores a new ITR value based on packets and byte
3864 *      counts during the last interrupt.  The advantage of per interrupt
3865 *      computation is faster updates and more accurate ITR for the current
3866 *      traffic pattern.  Constants in this function were computed
3867 *      based on theoretical maximum wire speed and thresholds were set based
3868 *      on testing data as well as attempting to minimize response time
3869 *      while increasing bulk throughput.
3870 *      this functionality is controlled by the InterruptThrottleRate module
3871 *      parameter (see igb_param.c)
3872 *      NOTE:  These calculations are only valid when operating in a single-
3873 *             queue environment.
3874 * @q_vector: pointer to q_vector
3875 * @ring_container: ring info to update the itr for
3876 **/
3877static void igb_update_itr(struct igb_q_vector *q_vector,
3878			   struct igb_ring_container *ring_container)
3879{
3880	unsigned int packets = ring_container->total_packets;
3881	unsigned int bytes = ring_container->total_bytes;
3882	u8 itrval = ring_container->itr;
3883
3884	/* no packets, exit with status unchanged */
3885	if (packets == 0)
3886		return;
3887
3888	switch (itrval) {
3889	case lowest_latency:
3890		/* handle TSO and jumbo frames */
3891		if (bytes/packets > 8000)
3892			itrval = bulk_latency;
3893		else if ((packets < 5) && (bytes > 512))
3894			itrval = low_latency;
3895		break;
3896	case low_latency:  /* 50 usec aka 20000 ints/s */
3897		if (bytes > 10000) {
3898			/* this if handles the TSO accounting */
3899			if (bytes/packets > 8000) {
3900				itrval = bulk_latency;
3901			} else if ((packets < 10) || ((bytes/packets) > 1200)) {
3902				itrval = bulk_latency;
3903			} else if ((packets > 35)) {
3904				itrval = lowest_latency;
3905			}
3906		} else if (bytes/packets > 2000) {
3907			itrval = bulk_latency;
3908		} else if (packets <= 2 && bytes < 512) {
3909			itrval = lowest_latency;
3910		}
3911		break;
3912	case bulk_latency: /* 250 usec aka 4000 ints/s */
3913		if (bytes > 25000) {
3914			if (packets > 35)
3915				itrval = low_latency;
3916		} else if (bytes < 1500) {
3917			itrval = low_latency;
3918		}
3919		break;
3920	}
3921
3922	/* clear work counters since we have the values we need */
3923	ring_container->total_bytes = 0;
3924	ring_container->total_packets = 0;
3925
3926	/* write updated itr to ring container */
3927	ring_container->itr = itrval;
3928}
3929
3930static void igb_set_itr(struct igb_q_vector *q_vector)
3931{
3932	struct igb_adapter *adapter = q_vector->adapter;
3933	u32 new_itr = q_vector->itr_val;
3934	u8 current_itr = 0;
3935
3936	/* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3937	if (adapter->link_speed != SPEED_1000) {
3938		current_itr = 0;
3939		new_itr = IGB_4K_ITR;
3940		goto set_itr_now;
3941	}
3942
3943	igb_update_itr(q_vector, &q_vector->tx);
3944	igb_update_itr(q_vector, &q_vector->rx);
3945
3946	current_itr = max(q_vector->rx.itr, q_vector->tx.itr);
3947
3948	/* conservative mode (itr 3) eliminates the lowest_latency setting */
3949	if (current_itr == lowest_latency &&
3950	    ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
3951	     (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
3952		current_itr = low_latency;
3953
3954	switch (current_itr) {
3955	/* counts and packets in update_itr are dependent on these numbers */
3956	case lowest_latency:
3957		new_itr = IGB_70K_ITR; /* 70,000 ints/sec */
3958		break;
3959	case low_latency:
3960		new_itr = IGB_20K_ITR; /* 20,000 ints/sec */
3961		break;
3962	case bulk_latency:
3963		new_itr = IGB_4K_ITR;  /* 4,000 ints/sec */
3964		break;
3965	default:
3966		break;
3967	}
3968
3969set_itr_now:
3970	if (new_itr != q_vector->itr_val) {
3971		/* this attempts to bias the interrupt rate towards Bulk
3972		 * by adding intermediate steps when interrupt rate is
3973		 * increasing */
3974		new_itr = new_itr > q_vector->itr_val ?
3975		             max((new_itr * q_vector->itr_val) /
3976		                 (new_itr + (q_vector->itr_val >> 2)),
3977				 new_itr) :
3978			     new_itr;
3979		/* Don't write the value here; it resets the adapter's
3980		 * internal timer, and causes us to delay far longer than
3981		 * we should between interrupts.  Instead, we write the ITR
3982		 * value at the beginning of the next interrupt so the timing
3983		 * ends up being correct.
3984		 */
3985		q_vector->itr_val = new_itr;
3986		q_vector->set_itr = 1;
3987	}
3988}
3989
3990void igb_tx_ctxtdesc(struct igb_ring *tx_ring, u32 vlan_macip_lens,
3991		     u32 type_tucmd, u32 mss_l4len_idx)
3992{
3993	struct e1000_adv_tx_context_desc *context_desc;
3994	u16 i = tx_ring->next_to_use;
3995
3996	context_desc = IGB_TX_CTXTDESC(tx_ring, i);
3997
3998	i++;
3999	tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
4000
4001	/* set bits to identify this as an advanced context descriptor */
4002	type_tucmd |= E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
4003
4004	/* For 82575, context index must be unique per ring. */
4005	if (test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
4006		mss_l4len_idx |= tx_ring->reg_idx << 4;
4007
4008	context_desc->vlan_macip_lens	= cpu_to_le32(vlan_macip_lens);
4009	context_desc->seqnum_seed	= 0;
4010	context_desc->type_tucmd_mlhl	= cpu_to_le32(type_tucmd);
4011	context_desc->mss_l4len_idx	= cpu_to_le32(mss_l4len_idx);
4012}
4013
4014static int igb_tso(struct igb_ring *tx_ring,
4015		   struct igb_tx_buffer *first,
4016		   u8 *hdr_len)
4017{
4018	struct sk_buff *skb = first->skb;
4019	u32 vlan_macip_lens, type_tucmd;
4020	u32 mss_l4len_idx, l4len;
4021
4022	if (!skb_is_gso(skb))
4023		return 0;
4024
4025	if (skb_header_cloned(skb)) {
4026		int err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
4027		if (err)
4028			return err;
4029	}
4030
4031	/* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
4032	type_tucmd = E1000_ADVTXD_TUCMD_L4T_TCP;
4033
4034	if (first->protocol == __constant_htons(ETH_P_IP)) {
4035		struct iphdr *iph = ip_hdr(skb);
4036		iph->tot_len = 0;
4037		iph->check = 0;
4038		tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
4039							 iph->daddr, 0,
4040							 IPPROTO_TCP,
4041							 0);
4042		type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
4043		first->tx_flags |= IGB_TX_FLAGS_TSO |
4044				   IGB_TX_FLAGS_CSUM |
4045				   IGB_TX_FLAGS_IPV4;
4046	} else if (skb_is_gso_v6(skb)) {
4047		ipv6_hdr(skb)->payload_len = 0;
4048		tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
4049						       &ipv6_hdr(skb)->daddr,
4050						       0, IPPROTO_TCP, 0);
4051		first->tx_flags |= IGB_TX_FLAGS_TSO |
4052				   IGB_TX_FLAGS_CSUM;
4053	}
4054
4055	/* compute header lengths */
4056	l4len = tcp_hdrlen(skb);
4057	*hdr_len = skb_transport_offset(skb) + l4len;
4058
4059	/* update gso size and bytecount with header size */
4060	first->gso_segs = skb_shinfo(skb)->gso_segs;
4061	first->bytecount += (first->gso_segs - 1) * *hdr_len;
4062
4063	/* MSS L4LEN IDX */
4064	mss_l4len_idx = l4len << E1000_ADVTXD_L4LEN_SHIFT;
4065	mss_l4len_idx |= skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT;
4066
4067	/* VLAN MACLEN IPLEN */
4068	vlan_macip_lens = skb_network_header_len(skb);
4069	vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
4070	vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
4071
4072	igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
4073
4074	return 1;
4075}
4076
4077static void igb_tx_csum(struct igb_ring *tx_ring, struct igb_tx_buffer *first)
4078{
4079	struct sk_buff *skb = first->skb;
4080	u32 vlan_macip_lens = 0;
4081	u32 mss_l4len_idx = 0;
4082	u32 type_tucmd = 0;
4083
4084	if (skb->ip_summed != CHECKSUM_PARTIAL) {
4085		if (!(first->tx_flags & IGB_TX_FLAGS_VLAN))
4086			return;
4087	} else {
4088		u8 l4_hdr = 0;
4089		switch (first->protocol) {
4090		case __constant_htons(ETH_P_IP):
4091			vlan_macip_lens |= skb_network_header_len(skb);
4092			type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
4093			l4_hdr = ip_hdr(skb)->protocol;
4094			break;
4095		case __constant_htons(ETH_P_IPV6):
4096			vlan_macip_lens |= skb_network_header_len(skb);
4097			l4_hdr = ipv6_hdr(skb)->nexthdr;
4098			break;
4099		default:
4100			if (unlikely(net_ratelimit())) {
4101				dev_warn(tx_ring->dev,
4102				 "partial checksum but proto=%x!\n",
4103				 first->protocol);
4104			}
4105			break;
4106		}
4107
4108		switch (l4_hdr) {
4109		case IPPROTO_TCP:
4110			type_tucmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
4111			mss_l4len_idx = tcp_hdrlen(skb) <<
4112					E1000_ADVTXD_L4LEN_SHIFT;
4113			break;
4114		case IPPROTO_SCTP:
4115			type_tucmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
4116			mss_l4len_idx = sizeof(struct sctphdr) <<
4117					E1000_ADVTXD_L4LEN_SHIFT;
4118			break;
4119		case IPPROTO_UDP:
4120			mss_l4len_idx = sizeof(struct udphdr) <<
4121					E1000_ADVTXD_L4LEN_SHIFT;
4122			break;
4123		default:
4124			if (unlikely(net_ratelimit())) {
4125				dev_warn(tx_ring->dev,
4126				 "partial checksum but l4 proto=%x!\n",
4127				 l4_hdr);
4128			}
4129			break;
4130		}
4131
4132		/* update TX checksum flag */
4133		first->tx_flags |= IGB_TX_FLAGS_CSUM;
4134	}
4135
4136	vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
4137	vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
4138
4139	igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
4140}
4141
4142static __le32 igb_tx_cmd_type(u32 tx_flags)
4143{
4144	/* set type for advanced descriptor with frame checksum insertion */
4145	__le32 cmd_type = cpu_to_le32(E1000_ADVTXD_DTYP_DATA |
4146				      E1000_ADVTXD_DCMD_IFCS |
4147				      E1000_ADVTXD_DCMD_DEXT);
4148
4149	/* set HW vlan bit if vlan is present */
4150	if (tx_flags & IGB_TX_FLAGS_VLAN)
4151		cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_VLE);
4152
4153	/* set timestamp bit if present */
4154	if (tx_flags & IGB_TX_FLAGS_TSTAMP)
4155		cmd_type |= cpu_to_le32(E1000_ADVTXD_MAC_TSTAMP);
4156
4157	/* set segmentation bits for TSO */
4158	if (tx_flags & IGB_TX_FLAGS_TSO)
4159		cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_TSE);
4160
4161	return cmd_type;
4162}
4163
4164static void igb_tx_olinfo_status(struct igb_ring *tx_ring,
4165				 union e1000_adv_tx_desc *tx_desc,
4166				 u32 tx_flags, unsigned int paylen)
4167{
4168	u32 olinfo_status = paylen << E1000_ADVTXD_PAYLEN_SHIFT;
4169
4170	/* 82575 requires a unique index per ring if any offload is enabled */
4171	if ((tx_flags & (IGB_TX_FLAGS_CSUM | IGB_TX_FLAGS_VLAN)) &&
4172	    test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
4173		olinfo_status |= tx_ring->reg_idx << 4;
4174
4175	/* insert L4 checksum */
4176	if (tx_flags & IGB_TX_FLAGS_CSUM) {
4177		olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4178
4179		/* insert IPv4 checksum */
4180		if (tx_flags & IGB_TX_FLAGS_IPV4)
4181			olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
4182	}
4183
4184	tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
4185}
4186
4187/*
4188 * The largest size we can write to the descriptor is 65535.  In order to
4189 * maintain a power of two alignment we have to limit ourselves to 32K.
4190 */
4191#define IGB_MAX_TXD_PWR	15
4192#define IGB_MAX_DATA_PER_TXD	(1<<IGB_MAX_TXD_PWR)
4193
4194static void igb_tx_map(struct igb_ring *tx_ring,
4195		       struct igb_tx_buffer *first,
4196		       const u8 hdr_len)
4197{
4198	struct sk_buff *skb = first->skb;
4199	struct igb_tx_buffer *tx_buffer_info;
4200	union e1000_adv_tx_desc *tx_desc;
4201	dma_addr_t dma;
4202	struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0];
4203	unsigned int data_len = skb->data_len;
4204	unsigned int size = skb_headlen(skb);
4205	unsigned int paylen = skb->len - hdr_len;
4206	__le32 cmd_type;
4207	u32 tx_flags = first->tx_flags;
4208	u16 i = tx_ring->next_to_use;
4209
4210	tx_desc = IGB_TX_DESC(tx_ring, i);
4211
4212	igb_tx_olinfo_status(tx_ring, tx_desc, tx_flags, paylen);
4213	cmd_type = igb_tx_cmd_type(tx_flags);
4214
4215	dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
4216	if (dma_mapping_error(tx_ring->dev, dma))
4217		goto dma_error;
4218
4219	/* record length, and DMA address */
4220	first->length = size;
4221	first->dma = dma;
4222	tx_desc->read.buffer_addr = cpu_to_le64(dma);
4223
4224	for (;;) {
4225		while (unlikely(size > IGB_MAX_DATA_PER_TXD)) {
4226			tx_desc->read.cmd_type_len =
4227				cmd_type | cpu_to_le32(IGB_MAX_DATA_PER_TXD);
4228
4229			i++;
4230			tx_desc++;
4231			if (i == tx_ring->count) {
4232				tx_desc = IGB_TX_DESC(tx_ring, 0);
4233				i = 0;
4234			}
4235
4236			dma += IGB_MAX_DATA_PER_TXD;
4237			size -= IGB_MAX_DATA_PER_TXD;
4238
4239			tx_desc->read.olinfo_status = 0;
4240			tx_desc->read.buffer_addr = cpu_to_le64(dma);
4241		}
4242
4243		if (likely(!data_len))
4244			break;
4245
4246		tx_desc->read.cmd_type_len = cmd_type | cpu_to_le32(size);
4247
4248		i++;
4249		tx_desc++;
4250		if (i == tx_ring->count) {
4251			tx_desc = IGB_TX_DESC(tx_ring, 0);
4252			i = 0;
4253		}
4254
4255		size = frag->size;
4256		data_len -= size;
4257
4258		dma = skb_frag_dma_map(tx_ring->dev, frag, 0,
4259				   size, DMA_TO_DEVICE);
4260		if (dma_mapping_error(tx_ring->dev, dma))
4261			goto dma_error;
4262
4263		tx_buffer_info = &tx_ring->tx_buffer_info[i];
4264		tx_buffer_info->length = size;
4265		tx_buffer_info->dma = dma;
4266
4267		tx_desc->read.olinfo_status = 0;
4268		tx_desc->read.buffer_addr = cpu_to_le64(dma);
4269
4270		frag++;
4271	}
4272
4273	/* write last descriptor with RS and EOP bits */
4274	cmd_type |= cpu_to_le32(size) | cpu_to_le32(IGB_TXD_DCMD);
4275	tx_desc->read.cmd_type_len = cmd_type;
4276
4277	/* set the timestamp */
4278	first->time_stamp = jiffies;
4279
4280	/*
4281	 * Force memory writes to complete before letting h/w know there
4282	 * are new descriptors to fetch.  (Only applicable for weak-ordered
4283	 * memory model archs, such as IA-64).
4284	 *
4285	 * We also need this memory barrier to make certain all of the
4286	 * status bits have been updated before next_to_watch is written.
4287	 */
4288	wmb();
4289
4290	/* set next_to_watch value indicating a packet is present */
4291	first->next_to_watch = tx_desc;
4292
4293	i++;
4294	if (i == tx_ring->count)
4295		i = 0;
4296
4297	tx_ring->next_to_use = i;
4298
4299	writel(i, tx_ring->tail);
4300
4301	/* we need this if more than one processor can write to our tail
4302	 * at a time, it syncronizes IO on IA64/Altix systems */
4303	mmiowb();
4304
4305	return;
4306
4307dma_error:
4308	dev_err(tx_ring->dev, "TX DMA map failed\n");
4309
4310	/* clear dma mappings for failed tx_buffer_info map */
4311	for (;;) {
4312		tx_buffer_info = &tx_ring->tx_buffer_info[i];
4313		igb_unmap_and_free_tx_resource(tx_ring, tx_buffer_info);
4314		if (tx_buffer_info == first)
4315			break;
4316		if (i == 0)
4317			i = tx_ring->count;
4318		i--;
4319	}
4320
4321	tx_ring->next_to_use = i;
4322}
4323
4324static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
4325{
4326	struct net_device *netdev = tx_ring->netdev;
4327
4328	netif_stop_subqueue(netdev, tx_ring->queue_index);
4329
4330	/* Herbert's original patch had:
4331	 *  smp_mb__after_netif_stop_queue();
4332	 * but since that doesn't exist yet, just open code it. */
4333	smp_mb();
4334
4335	/* We need to check again in a case another CPU has just
4336	 * made room available. */
4337	if (igb_desc_unused(tx_ring) < size)
4338		return -EBUSY;
4339
4340	/* A reprieve! */
4341	netif_wake_subqueue(netdev, tx_ring->queue_index);
4342
4343	u64_stats_update_begin(&tx_ring->tx_syncp2);
4344	tx_ring->tx_stats.restart_queue2++;
4345	u64_stats_update_end(&tx_ring->tx_syncp2);
4346
4347	return 0;
4348}
4349
4350static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
4351{
4352	if (igb_desc_unused(tx_ring) >= size)
4353		return 0;
4354	return __igb_maybe_stop_tx(tx_ring, size);
4355}
4356
4357netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb,
4358				struct igb_ring *tx_ring)
4359{
4360	struct igb_tx_buffer *first;
4361	int tso;
4362	u32 tx_flags = 0;
4363	__be16 protocol = vlan_get_protocol(skb);
4364	u8 hdr_len = 0;
4365
4366	/* need: 1 descriptor per page,
4367	 *       + 2 desc gap to keep tail from touching head,
4368	 *       + 1 desc for skb->data,
4369	 *       + 1 desc for context descriptor,
4370	 * otherwise try next time */
4371	if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
4372		/* this is a hard error */
4373		return NETDEV_TX_BUSY;
4374	}
4375
4376	/* record the location of the first descriptor for this packet */
4377	first = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
4378	first->skb = skb;
4379	first->bytecount = skb->len;
4380	first->gso_segs = 1;
4381
4382	if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
4383		skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
4384		tx_flags |= IGB_TX_FLAGS_TSTAMP;
4385	}
4386
4387	if (vlan_tx_tag_present(skb)) {
4388		tx_flags |= IGB_TX_FLAGS_VLAN;
4389		tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
4390	}
4391
4392	/* record initial flags and protocol */
4393	first->tx_flags = tx_flags;
4394	first->protocol = protocol;
4395
4396	tso = igb_tso(tx_ring, first, &hdr_len);
4397	if (tso < 0)
4398		goto out_drop;
4399	else if (!tso)
4400		igb_tx_csum(tx_ring, first);
4401
4402	igb_tx_map(tx_ring, first, hdr_len);
4403
4404	/* Make sure there is space in the ring for the next send. */
4405	igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
4406
4407	return NETDEV_TX_OK;
4408
4409out_drop:
4410	igb_unmap_and_free_tx_resource(tx_ring, first);
4411
4412	return NETDEV_TX_OK;
4413}
4414
4415static inline struct igb_ring *igb_tx_queue_mapping(struct igb_adapter *adapter,
4416						    struct sk_buff *skb)
4417{
4418	unsigned int r_idx = skb->queue_mapping;
4419
4420	if (r_idx >= adapter->num_tx_queues)
4421		r_idx = r_idx % adapter->num_tx_queues;
4422
4423	return adapter->tx_ring[r_idx];
4424}
4425
4426static netdev_tx_t igb_xmit_frame(struct sk_buff *skb,
4427				  struct net_device *netdev)
4428{
4429	struct igb_adapter *adapter = netdev_priv(netdev);
4430
4431	if (test_bit(__IGB_DOWN, &adapter->state)) {
4432		dev_kfree_skb_any(skb);
4433		return NETDEV_TX_OK;
4434	}
4435
4436	if (skb->len <= 0) {
4437		dev_kfree_skb_any(skb);
4438		return NETDEV_TX_OK;
4439	}
4440
4441	/*
4442	 * The minimum packet size with TCTL.PSP set is 17 so pad the skb
4443	 * in order to meet this minimum size requirement.
4444	 */
4445	if (skb->len < 17) {
4446		if (skb_padto(skb, 17))
4447			return NETDEV_TX_OK;
4448		skb->len = 17;
4449	}
4450
4451	return igb_xmit_frame_ring(skb, igb_tx_queue_mapping(adapter, skb));
4452}
4453
4454/**
4455 * igb_tx_timeout - Respond to a Tx Hang
4456 * @netdev: network interface device structure
4457 **/
4458static void igb_tx_timeout(struct net_device *netdev)
4459{
4460	struct igb_adapter *adapter = netdev_priv(netdev);
4461	struct e1000_hw *hw = &adapter->hw;
4462
4463	/* Do the reset outside of interrupt context */
4464	adapter->tx_timeout_count++;
4465
4466	if (hw->mac.type == e1000_82580)
4467		hw->dev_spec._82575.global_device_reset = true;
4468
4469	schedule_work(&adapter->reset_task);
4470	wr32(E1000_EICS,
4471	     (adapter->eims_enable_mask & ~adapter->eims_other));
4472}
4473
4474static void igb_reset_task(struct work_struct *work)
4475{
4476	struct igb_adapter *adapter;
4477	adapter = container_of(work, struct igb_adapter, reset_task);
4478
4479	igb_dump(adapter);
4480	netdev_err(adapter->netdev, "Reset adapter\n");
4481	igb_reinit_locked(adapter);
4482}
4483
4484/**
4485 * igb_get_stats64 - Get System Network Statistics
4486 * @netdev: network interface device structure
4487 * @stats: rtnl_link_stats64 pointer
4488 *
4489 **/
4490static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *netdev,
4491						 struct rtnl_link_stats64 *stats)
4492{
4493	struct igb_adapter *adapter = netdev_priv(netdev);
4494
4495	spin_lock(&adapter->stats64_lock);
4496	igb_update_stats(adapter, &adapter->stats64);
4497	memcpy(stats, &adapter->stats64, sizeof(*stats));
4498	spin_unlock(&adapter->stats64_lock);
4499
4500	return stats;
4501}
4502
4503/**
4504 * igb_change_mtu - Change the Maximum Transfer Unit
4505 * @netdev: network interface device structure
4506 * @new_mtu: new value for maximum frame size
4507 *
4508 * Returns 0 on success, negative on failure
4509 **/
4510static int igb_change_mtu(struct net_device *netdev, int new_mtu)
4511{
4512	struct igb_adapter *adapter = netdev_priv(netdev);
4513	struct pci_dev *pdev = adapter->pdev;
4514	int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
4515
4516	if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
4517		dev_err(&pdev->dev, "Invalid MTU setting\n");
4518		return -EINVAL;
4519	}
4520
4521#define MAX_STD_JUMBO_FRAME_SIZE 9238
4522	if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
4523		dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
4524		return -EINVAL;
4525	}
4526
4527	while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
4528		msleep(1);
4529
4530	/* igb_down has a dependency on max_frame_size */
4531	adapter->max_frame_size = max_frame;
4532
4533	if (netif_running(netdev))
4534		igb_down(adapter);
4535
4536	dev_info(&pdev->dev, "changing MTU from %d to %d\n",
4537		 netdev->mtu, new_mtu);
4538	netdev->mtu = new_mtu;
4539
4540	if (netif_running(netdev))
4541		igb_up(adapter);
4542	else
4543		igb_reset(adapter);
4544
4545	clear_bit(__IGB_RESETTING, &adapter->state);
4546
4547	return 0;
4548}
4549
4550/**
4551 * igb_update_stats - Update the board statistics counters
4552 * @adapter: board private structure
4553 **/
4554
4555void igb_update_stats(struct igb_adapter *adapter,
4556		      struct rtnl_link_stats64 *net_stats)
4557{
4558	struct e1000_hw *hw = &adapter->hw;
4559	struct pci_dev *pdev = adapter->pdev;
4560	u32 reg, mpc;
4561	u16 phy_tmp;
4562	int i;
4563	u64 bytes, packets;
4564	unsigned int start;
4565	u64 _bytes, _packets;
4566
4567#define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
4568
4569	/*
4570	 * Prevent stats update while adapter is being reset, or if the pci
4571	 * connection is down.
4572	 */
4573	if (adapter->link_speed == 0)
4574		return;
4575	if (pci_channel_offline(pdev))
4576		return;
4577
4578	bytes = 0;
4579	packets = 0;
4580	for (i = 0; i < adapter->num_rx_queues; i++) {
4581		u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
4582		struct igb_ring *ring = adapter->rx_ring[i];
4583
4584		ring->rx_stats.drops += rqdpc_tmp;
4585		net_stats->rx_fifo_errors += rqdpc_tmp;
4586
4587		do {
4588			start = u64_stats_fetch_begin_bh(&ring->rx_syncp);
4589			_bytes = ring->rx_stats.bytes;
4590			_packets = ring->rx_stats.packets;
4591		} while (u64_stats_fetch_retry_bh(&ring->rx_syncp, start));
4592		bytes += _bytes;
4593		packets += _packets;
4594	}
4595
4596	net_stats->rx_bytes = bytes;
4597	net_stats->rx_packets = packets;
4598
4599	bytes = 0;
4600	packets = 0;
4601	for (i = 0; i < adapter->num_tx_queues; i++) {
4602		struct igb_ring *ring = adapter->tx_ring[i];
4603		do {
4604			start = u64_stats_fetch_begin_bh(&ring->tx_syncp);
4605			_bytes = ring->tx_stats.bytes;
4606			_packets = ring->tx_stats.packets;
4607		} while (u64_stats_fetch_retry_bh(&ring->tx_syncp, start));
4608		bytes += _bytes;
4609		packets += _packets;
4610	}
4611	net_stats->tx_bytes = bytes;
4612	net_stats->tx_packets = packets;
4613
4614	/* read stats registers */
4615	adapter->stats.crcerrs += rd32(E1000_CRCERRS);
4616	adapter->stats.gprc += rd32(E1000_GPRC);
4617	adapter->stats.gorc += rd32(E1000_GORCL);
4618	rd32(E1000_GORCH); /* clear GORCL */
4619	adapter->stats.bprc += rd32(E1000_BPRC);
4620	adapter->stats.mprc += rd32(E1000_MPRC);
4621	adapter->stats.roc += rd32(E1000_ROC);
4622
4623	adapter->stats.prc64 += rd32(E1000_PRC64);
4624	adapter->stats.prc127 += rd32(E1000_PRC127);
4625	adapter->stats.prc255 += rd32(E1000_PRC255);
4626	adapter->stats.prc511 += rd32(E1000_PRC511);
4627	adapter->stats.prc1023 += rd32(E1000_PRC1023);
4628	adapter->stats.prc1522 += rd32(E1000_PRC1522);
4629	adapter->stats.symerrs += rd32(E1000_SYMERRS);
4630	adapter->stats.sec += rd32(E1000_SEC);
4631
4632	mpc = rd32(E1000_MPC);
4633	adapter->stats.mpc += mpc;
4634	net_stats->rx_fifo_errors += mpc;
4635	adapter->stats.scc += rd32(E1000_SCC);
4636	adapter->stats.ecol += rd32(E1000_ECOL);
4637	adapter->stats.mcc += rd32(E1000_MCC);
4638	adapter->stats.latecol += rd32(E1000_LATECOL);
4639	adapter->stats.dc += rd32(E1000_DC);
4640	adapter->stats.rlec += rd32(E1000_RLEC);
4641	adapter->stats.xonrxc += rd32(E1000_XONRXC);
4642	adapter->stats.xontxc += rd32(E1000_XONTXC);
4643	adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
4644	adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
4645	adapter->stats.fcruc += rd32(E1000_FCRUC);
4646	adapter->stats.gptc += rd32(E1000_GPTC);
4647	adapter->stats.gotc += rd32(E1000_GOTCL);
4648	rd32(E1000_GOTCH); /* clear GOTCL */
4649	adapter->stats.rnbc += rd32(E1000_RNBC);
4650	adapter->stats.ruc += rd32(E1000_RUC);
4651	adapter->stats.rfc += rd32(E1000_RFC);
4652	adapter->stats.rjc += rd32(E1000_RJC);
4653	adapter->stats.tor += rd32(E1000_TORH);
4654	adapter->stats.tot += rd32(E1000_TOTH);
4655	adapter->stats.tpr += rd32(E1000_TPR);
4656
4657	adapter->stats.ptc64 += rd32(E1000_PTC64);
4658	adapter->stats.ptc127 += rd32(E1000_PTC127);
4659	adapter->stats.ptc255 += rd32(E1000_PTC255);
4660	adapter->stats.ptc511 += rd32(E1000_PTC511);
4661	adapter->stats.ptc1023 += rd32(E1000_PTC1023);
4662	adapter->stats.ptc1522 += rd32(E1000_PTC1522);
4663
4664	adapter->stats.mptc += rd32(E1000_MPTC);
4665	adapter->stats.bptc += rd32(E1000_BPTC);
4666
4667	adapter->stats.tpt += rd32(E1000_TPT);
4668	adapter->stats.colc += rd32(E1000_COLC);
4669
4670	adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
4671	/* read internal phy specific stats */
4672	reg = rd32(E1000_CTRL_EXT);
4673	if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
4674		adapter->stats.rxerrc += rd32(E1000_RXERRC);
4675		adapter->stats.tncrs += rd32(E1000_TNCRS);
4676	}
4677
4678	adapter->stats.tsctc += rd32(E1000_TSCTC);
4679	adapter->stats.tsctfc += rd32(E1000_TSCTFC);
4680
4681	adapter->stats.iac += rd32(E1000_IAC);
4682	adapter->stats.icrxoc += rd32(E1000_ICRXOC);
4683	adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
4684	adapter->stats.icrxatc += rd32(E1000_ICRXATC);
4685	adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
4686	adapter->stats.ictxatc += rd32(E1000_ICTXATC);
4687	adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
4688	adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
4689	adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
4690
4691	/* Fill out the OS statistics structure */
4692	net_stats->multicast = adapter->stats.mprc;
4693	net_stats->collisions = adapter->stats.colc;
4694
4695	/* Rx Errors */
4696
4697	/* RLEC on some newer hardware can be incorrect so build
4698	 * our own version based on RUC and ROC */
4699	net_stats->rx_errors = adapter->stats.rxerrc +
4700		adapter->stats.crcerrs + adapter->stats.algnerrc +
4701		adapter->stats.ruc + adapter->stats.roc +
4702		adapter->stats.cexterr;
4703	net_stats->rx_length_errors = adapter->stats.ruc +
4704				      adapter->stats.roc;
4705	net_stats->rx_crc_errors = adapter->stats.crcerrs;
4706	net_stats->rx_frame_errors = adapter->stats.algnerrc;
4707	net_stats->rx_missed_errors = adapter->stats.mpc;
4708
4709	/* Tx Errors */
4710	net_stats->tx_errors = adapter->stats.ecol +
4711			       adapter->stats.latecol;
4712	net_stats->tx_aborted_errors = adapter->stats.ecol;
4713	net_stats->tx_window_errors = adapter->stats.latecol;
4714	net_stats->tx_carrier_errors = adapter->stats.tncrs;
4715
4716	/* Tx Dropped needs to be maintained elsewhere */
4717
4718	/* Phy Stats */
4719	if (hw->phy.media_type == e1000_media_type_copper) {
4720		if ((adapter->link_speed == SPEED_1000) &&
4721		   (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
4722			phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
4723			adapter->phy_stats.idle_errors += phy_tmp;
4724		}
4725	}
4726
4727	/* Management Stats */
4728	adapter->stats.mgptc += rd32(E1000_MGTPTC);
4729	adapter->stats.mgprc += rd32(E1000_MGTPRC);
4730	adapter->stats.mgpdc += rd32(E1000_MGTPDC);
4731
4732	/* OS2BMC Stats */
4733	reg = rd32(E1000_MANC);
4734	if (reg & E1000_MANC_EN_BMC2OS) {
4735		adapter->stats.o2bgptc += rd32(E1000_O2BGPTC);
4736		adapter->stats.o2bspc += rd32(E1000_O2BSPC);
4737		adapter->stats.b2ospc += rd32(E1000_B2OSPC);
4738		adapter->stats.b2ogprc += rd32(E1000_B2OGPRC);
4739	}
4740}
4741
4742static irqreturn_t igb_msix_other(int irq, void *data)
4743{
4744	struct igb_adapter *adapter = data;
4745	struct e1000_hw *hw = &adapter->hw;
4746	u32 icr = rd32(E1000_ICR);
4747	/* reading ICR causes bit 31 of EICR to be cleared */
4748
4749	if (icr & E1000_ICR_DRSTA)
4750		schedule_work(&adapter->reset_task);
4751
4752	if (icr & E1000_ICR_DOUTSYNC) {
4753		/* HW is reporting DMA is out of sync */
4754		adapter->stats.doosync++;
4755		/* The DMA Out of Sync is also indication of a spoof event
4756		 * in IOV mode. Check the Wrong VM Behavior register to
4757		 * see if it is really a spoof event. */
4758		igb_check_wvbr(adapter);
4759	}
4760
4761	/* Check for a mailbox event */
4762	if (icr & E1000_ICR_VMMB)
4763		igb_msg_task(adapter);
4764
4765	if (icr & E1000_ICR_LSC) {
4766		hw->mac.get_link_status = 1;
4767		/* guard against interrupt when we're going down */
4768		if (!test_bit(__IGB_DOWN, &adapter->state))
4769			mod_timer(&adapter->watchdog_timer, jiffies + 1);
4770	}
4771
4772	if (adapter->vfs_allocated_count)
4773		wr32(E1000_IMS, E1000_IMS_LSC |
4774				E1000_IMS_VMMB |
4775				E1000_IMS_DOUTSYNC);
4776	else
4777		wr32(E1000_IMS, E1000_IMS_LSC | E1000_IMS_DOUTSYNC);
4778	wr32(E1000_EIMS, adapter->eims_other);
4779
4780	return IRQ_HANDLED;
4781}
4782
4783static void igb_write_itr(struct igb_q_vector *q_vector)
4784{
4785	struct igb_adapter *adapter = q_vector->adapter;
4786	u32 itr_val = q_vector->itr_val & 0x7FFC;
4787
4788	if (!q_vector->set_itr)
4789		return;
4790
4791	if (!itr_val)
4792		itr_val = 0x4;
4793
4794	if (adapter->hw.mac.type == e1000_82575)
4795		itr_val |= itr_val << 16;
4796	else
4797		itr_val |= E1000_EITR_CNT_IGNR;
4798
4799	writel(itr_val, q_vector->itr_register);
4800	q_vector->set_itr = 0;
4801}
4802
4803static irqreturn_t igb_msix_ring(int irq, void *data)
4804{
4805	struct igb_q_vector *q_vector = data;
4806
4807	/* Write the ITR value calculated from the previous interrupt. */
4808	igb_write_itr(q_vector);
4809
4810	napi_schedule(&q_vector->napi);
4811
4812	return IRQ_HANDLED;
4813}
4814
4815#ifdef CONFIG_IGB_DCA
4816static void igb_update_dca(struct igb_q_vector *q_vector)
4817{
4818	struct igb_adapter *adapter = q_vector->adapter;
4819	struct e1000_hw *hw = &adapter->hw;
4820	int cpu = get_cpu();
4821
4822	if (q_vector->cpu == cpu)
4823		goto out_no_update;
4824
4825	if (q_vector->tx.ring) {
4826		int q = q_vector->tx.ring->reg_idx;
4827		u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4828		if (hw->mac.type == e1000_82575) {
4829			dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4830			dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4831		} else {
4832			dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4833			dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4834			              E1000_DCA_TXCTRL_CPUID_SHIFT;
4835		}
4836		dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4837		wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4838	}
4839	if (q_vector->rx.ring) {
4840		int q = q_vector->rx.ring->reg_idx;
4841		u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4842		if (hw->mac.type == e1000_82575) {
4843			dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4844			dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4845		} else {
4846			dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4847			dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4848			              E1000_DCA_RXCTRL_CPUID_SHIFT;
4849		}
4850		dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4851		dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4852		dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4853		wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4854	}
4855	q_vector->cpu = cpu;
4856out_no_update:
4857	put_cpu();
4858}
4859
4860static void igb_setup_dca(struct igb_adapter *adapter)
4861{
4862	struct e1000_hw *hw = &adapter->hw;
4863	int i;
4864
4865	if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4866		return;
4867
4868	/* Always use CB2 mode, difference is masked in the CB driver. */
4869	wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4870
4871	for (i = 0; i < adapter->num_q_vectors; i++) {
4872		adapter->q_vector[i]->cpu = -1;
4873		igb_update_dca(adapter->q_vector[i]);
4874	}
4875}
4876
4877static int __igb_notify_dca(struct device *dev, void *data)
4878{
4879	struct net_device *netdev = dev_get_drvdata(dev);
4880	struct igb_adapter *adapter = netdev_priv(netdev);
4881	struct pci_dev *pdev = adapter->pdev;
4882	struct e1000_hw *hw = &adapter->hw;
4883	unsigned long event = *(unsigned long *)data;
4884
4885	switch (event) {
4886	case DCA_PROVIDER_ADD:
4887		/* if already enabled, don't do it again */
4888		if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4889			break;
4890		if (dca_add_requester(dev) == 0) {
4891			adapter->flags |= IGB_FLAG_DCA_ENABLED;
4892			dev_info(&pdev->dev, "DCA enabled\n");
4893			igb_setup_dca(adapter);
4894			break;
4895		}
4896		/* Fall Through since DCA is disabled. */
4897	case DCA_PROVIDER_REMOVE:
4898		if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4899			/* without this a class_device is left
4900			 * hanging around in the sysfs model */
4901			dca_remove_requester(dev);
4902			dev_info(&pdev->dev, "DCA disabled\n");
4903			adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4904			wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4905		}
4906		break;
4907	}
4908
4909	return 0;
4910}
4911
4912static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4913                          void *p)
4914{
4915	int ret_val;
4916
4917	ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4918	                                 __igb_notify_dca);
4919
4920	return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4921}
4922#endif /* CONFIG_IGB_DCA */
4923
4924static void igb_ping_all_vfs(struct igb_adapter *adapter)
4925{
4926	struct e1000_hw *hw = &adapter->hw;
4927	u32 ping;
4928	int i;
4929
4930	for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
4931		ping = E1000_PF_CONTROL_MSG;
4932		if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
4933			ping |= E1000_VT_MSGTYPE_CTS;
4934		igb_write_mbx(hw, &ping, 1, i);
4935	}
4936}
4937
4938static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4939{
4940	struct e1000_hw *hw = &adapter->hw;
4941	u32 vmolr = rd32(E1000_VMOLR(vf));
4942	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4943
4944	vf_data->flags &= ~(IGB_VF_FLAG_UNI_PROMISC |
4945	                    IGB_VF_FLAG_MULTI_PROMISC);
4946	vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4947
4948	if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
4949		vmolr |= E1000_VMOLR_MPME;
4950		vf_data->flags |= IGB_VF_FLAG_MULTI_PROMISC;
4951		*msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
4952	} else {
4953		/*
4954		 * if we have hashes and we are clearing a multicast promisc
4955		 * flag we need to write the hashes to the MTA as this step
4956		 * was previously skipped
4957		 */
4958		if (vf_data->num_vf_mc_hashes > 30) {
4959			vmolr |= E1000_VMOLR_MPME;
4960		} else if (vf_data->num_vf_mc_hashes) {
4961			int j;
4962			vmolr |= E1000_VMOLR_ROMPE;
4963			for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4964				igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4965		}
4966	}
4967
4968	wr32(E1000_VMOLR(vf), vmolr);
4969
4970	/* there are flags left unprocessed, likely not supported */
4971	if (*msgbuf & E1000_VT_MSGINFO_MASK)
4972		return -EINVAL;
4973
4974	return 0;
4975
4976}
4977
4978static int igb_set_vf_multicasts(struct igb_adapter *adapter,
4979				  u32 *msgbuf, u32 vf)
4980{
4981	int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4982	u16 *hash_list = (u16 *)&msgbuf[1];
4983	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4984	int i;
4985
4986	/* salt away the number of multicast addresses assigned
4987	 * to this VF for later use to restore when the PF multi cast
4988	 * list changes
4989	 */
4990	vf_data->num_vf_mc_hashes = n;
4991
4992	/* only up to 30 hash values supported */
4993	if (n > 30)
4994		n = 30;
4995
4996	/* store the hashes for later use */
4997	for (i = 0; i < n; i++)
4998		vf_data->vf_mc_hashes[i] = hash_list[i];
4999
5000	/* Flush and reset the mta with the new values */
5001	igb_set_rx_mode(adapter->netdev);
5002
5003	return 0;
5004}
5005
5006static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
5007{
5008	struct e1000_hw *hw = &adapter->hw;
5009	struct vf_data_storage *vf_data;
5010	int i, j;
5011
5012	for (i = 0; i < adapter->vfs_allocated_count; i++) {
5013		u32 vmolr = rd32(E1000_VMOLR(i));
5014		vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
5015
5016		vf_data = &adapter->vf_data[i];
5017
5018		if ((vf_data->num_vf_mc_hashes > 30) ||
5019		    (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
5020			vmolr |= E1000_VMOLR_MPME;
5021		} else if (vf_data->num_vf_mc_hashes) {
5022			vmolr |= E1000_VMOLR_ROMPE;
5023			for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
5024				igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
5025		}
5026		wr32(E1000_VMOLR(i), vmolr);
5027	}
5028}
5029
5030static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
5031{
5032	struct e1000_hw *hw = &adapter->hw;
5033	u32 pool_mask, reg, vid;
5034	int i;
5035
5036	pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5037
5038	/* Find the vlan filter for this id */
5039	for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5040		reg = rd32(E1000_VLVF(i));
5041
5042		/* remove the vf from the pool */
5043		reg &= ~pool_mask;
5044
5045		/* if pool is empty then remove entry from vfta */
5046		if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
5047		    (reg & E1000_VLVF_VLANID_ENABLE)) {
5048			reg = 0;
5049			vid = reg & E1000_VLVF_VLANID_MASK;
5050			igb_vfta_set(hw, vid, false);
5051		}
5052
5053		wr32(E1000_VLVF(i), reg);
5054	}
5055
5056	adapter->vf_data[vf].vlans_enabled = 0;
5057}
5058
5059static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
5060{
5061	struct e1000_hw *hw = &adapter->hw;
5062	u32 reg, i;
5063
5064	/* The vlvf table only exists on 82576 hardware and newer */
5065	if (hw->mac.type < e1000_82576)
5066		return -1;
5067
5068	/* we only need to do this if VMDq is enabled */
5069	if (!adapter->vfs_allocated_count)
5070		return -1;
5071
5072	/* Find the vlan filter for this id */
5073	for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5074		reg = rd32(E1000_VLVF(i));
5075		if ((reg & E1000_VLVF_VLANID_ENABLE) &&
5076		    vid == (reg & E1000_VLVF_VLANID_MASK))
5077			break;
5078	}
5079
5080	if (add) {
5081		if (i == E1000_VLVF_ARRAY_SIZE) {
5082			/* Did not find a matching VLAN ID entry that was
5083			 * enabled.  Search for a free filter entry, i.e.
5084			 * one without the enable bit set
5085			 */
5086			for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5087				reg = rd32(E1000_VLVF(i));
5088				if (!(reg & E1000_VLVF_VLANID_ENABLE))
5089					break;
5090			}
5091		}
5092		if (i < E1000_VLVF_ARRAY_SIZE) {
5093			/* Found an enabled/available entry */
5094			reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5095
5096			/* if !enabled we need to set this up in vfta */
5097			if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
5098				/* add VID to filter table */
5099				igb_vfta_set(hw, vid, true);
5100				reg |= E1000_VLVF_VLANID_ENABLE;
5101			}
5102			reg &= ~E1000_VLVF_VLANID_MASK;
5103			reg |= vid;
5104			wr32(E1000_VLVF(i), reg);
5105
5106			/* do not modify RLPML for PF devices */
5107			if (vf >= adapter->vfs_allocated_count)
5108				return 0;
5109
5110			if (!adapter->vf_data[vf].vlans_enabled) {
5111				u32 size;
5112				reg = rd32(E1000_VMOLR(vf));
5113				size = reg & E1000_VMOLR_RLPML_MASK;
5114				size += 4;
5115				reg &= ~E1000_VMOLR_RLPML_MASK;
5116				reg |= size;
5117				wr32(E1000_VMOLR(vf), reg);
5118			}
5119
5120			adapter->vf_data[vf].vlans_enabled++;
5121		}
5122	} else {
5123		if (i < E1000_VLVF_ARRAY_SIZE) {
5124			/* remove vf from the pool */
5125			reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
5126			/* if pool is empty then remove entry from vfta */
5127			if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
5128				reg = 0;
5129				igb_vfta_set(hw, vid, false);
5130			}
5131			wr32(E1000_VLVF(i), reg);
5132
5133			/* do not modify RLPML for PF devices */
5134			if (vf >= adapter->vfs_allocated_count)
5135				return 0;
5136
5137			adapter->vf_data[vf].vlans_enabled--;
5138			if (!adapter->vf_data[vf].vlans_enabled) {
5139				u32 size;
5140				reg = rd32(E1000_VMOLR(vf));
5141				size = reg & E1000_VMOLR_RLPML_MASK;
5142				size -= 4;
5143				reg &= ~E1000_VMOLR_RLPML_MASK;
5144				reg |= size;
5145				wr32(E1000_VMOLR(vf), reg);
5146			}
5147		}
5148	}
5149	return 0;
5150}
5151
5152static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
5153{
5154	struct e1000_hw *hw = &adapter->hw;
5155
5156	if (vid)
5157		wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
5158	else
5159		wr32(E1000_VMVIR(vf), 0);
5160}
5161
5162static int igb_ndo_set_vf_vlan(struct net_device *netdev,
5163			       int vf, u16 vlan, u8 qos)
5164{
5165	int err = 0;
5166	struct igb_adapter *adapter = netdev_priv(netdev);
5167
5168	if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
5169		return -EINVAL;
5170	if (vlan || qos) {
5171		err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
5172		if (err)
5173			goto out;
5174		igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
5175		igb_set_vmolr(adapter, vf, !vlan);
5176		adapter->vf_data[vf].pf_vlan = vlan;
5177		adapter->vf_data[vf].pf_qos = qos;
5178		dev_info(&adapter->pdev->dev,
5179			 "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
5180		if (test_bit(__IGB_DOWN, &adapter->state)) {
5181			dev_warn(&adapter->pdev->dev,
5182				 "The VF VLAN has been set,"
5183				 " but the PF device is not up.\n");
5184			dev_warn(&adapter->pdev->dev,
5185				 "Bring the PF device up before"
5186				 " attempting to use the VF device.\n");
5187		}
5188	} else {
5189		igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
5190				   false, vf);
5191		igb_set_vmvir(adapter, vlan, vf);
5192		igb_set_vmolr(adapter, vf, true);
5193		adapter->vf_data[vf].pf_vlan = 0;
5194		adapter->vf_data[vf].pf_qos = 0;
5195       }
5196out:
5197       return err;
5198}
5199
5200static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5201{
5202	int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5203	int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
5204
5205	return igb_vlvf_set(adapter, vid, add, vf);
5206}
5207
5208static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
5209{
5210	/* clear flags - except flag that indicates PF has set the MAC */
5211	adapter->vf_data[vf].flags &= IGB_VF_FLAG_PF_SET_MAC;
5212	adapter->vf_data[vf].last_nack = jiffies;
5213
5214	/* reset offloads to defaults */
5215	igb_set_vmolr(adapter, vf, true);
5216
5217	/* reset vlans for device */
5218	igb_clear_vf_vfta(adapter, vf);
5219	if (adapter->vf_data[vf].pf_vlan)
5220		igb_ndo_set_vf_vlan(adapter->netdev, vf,
5221				    adapter->vf_data[vf].pf_vlan,
5222				    adapter->vf_data[vf].pf_qos);
5223	else
5224		igb_clear_vf_vfta(adapter, vf);
5225
5226	/* reset multicast table array for vf */
5227	adapter->vf_data[vf].num_vf_mc_hashes = 0;
5228
5229	/* Flush and reset the mta with the new values */
5230	igb_set_rx_mode(adapter->netdev);
5231}
5232
5233static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
5234{
5235	unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5236
5237	/* generate a new mac address as we were hotplug removed/added */
5238	if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
5239		random_ether_addr(vf_mac);
5240
5241	/* process remaining reset events */
5242	igb_vf_reset(adapter, vf);
5243}
5244
5245static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
5246{
5247	struct e1000_hw *hw = &adapter->hw;
5248	unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5249	int rar_entry = hw->mac.rar_entry_count - (vf + 1);
5250	u32 reg, msgbuf[3];
5251	u8 *addr = (u8 *)(&msgbuf[1]);
5252
5253	/* process all the same items cleared in a function level reset */
5254	igb_vf_reset(adapter, vf);
5255
5256	/* set vf mac address */
5257	igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
5258
5259	/* enable transmit and receive for vf */
5260	reg = rd32(E1000_VFTE);
5261	wr32(E1000_VFTE, reg | (1 << vf));
5262	reg = rd32(E1000_VFRE);
5263	wr32(E1000_VFRE, reg | (1 << vf));
5264
5265	adapter->vf_data[vf].flags |= IGB_VF_FLAG_CTS;
5266
5267	/* reply to reset with ack and vf mac address */
5268	msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
5269	memcpy(addr, vf_mac, 6);
5270	igb_write_mbx(hw, msgbuf, 3, vf);
5271}
5272
5273static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
5274{
5275	/*
5276	 * The VF MAC Address is stored in a packed array of bytes
5277	 * starting at the second 32 bit word of the msg array
5278	 */
5279	unsigned char *addr = (char *)&msg[1];
5280	int err = -1;
5281
5282	if (is_valid_ether_addr(addr))
5283		err = igb_set_vf_mac(adapter, vf, addr);
5284
5285	return err;
5286}
5287
5288static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
5289{
5290	struct e1000_hw *hw = &adapter->hw;
5291	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5292	u32 msg = E1000_VT_MSGTYPE_NACK;
5293
5294	/* if device isn't clear to send it shouldn't be reading either */
5295	if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
5296	    time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
5297		igb_write_mbx(hw, &msg, 1, vf);
5298		vf_data->last_nack = jiffies;
5299	}
5300}
5301
5302static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
5303{
5304	struct pci_dev *pdev = adapter->pdev;
5305	u32 msgbuf[E1000_VFMAILBOX_SIZE];
5306	struct e1000_hw *hw = &adapter->hw;
5307	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5308	s32 retval;
5309
5310	retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
5311
5312	if (retval) {
5313		/* if receive failed revoke VF CTS stats and restart init */
5314		dev_err(&pdev->dev, "Error receiving message from VF\n");
5315		vf_data->flags &= ~IGB_VF_FLAG_CTS;
5316		if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5317			return;
5318		goto out;
5319	}
5320
5321	/* this is a message we already processed, do nothing */
5322	if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
5323		return;
5324
5325	/*
5326	 * until the vf completes a reset it should not be
5327	 * allowed to start any configuration.
5328	 */
5329
5330	if (msgbuf[0] == E1000_VF_RESET) {
5331		igb_vf_reset_msg(adapter, vf);
5332		return;
5333	}
5334
5335	if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
5336		if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5337			return;
5338		retval = -1;
5339		goto out;
5340	}
5341
5342	switch ((msgbuf[0] & 0xFFFF)) {
5343	case E1000_VF_SET_MAC_ADDR:
5344		retval = -EINVAL;
5345		if (!(vf_data->flags & IGB_VF_FLAG_PF_SET_MAC))
5346			retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
5347		else
5348			dev_warn(&pdev->dev,
5349				 "VF %d attempted to override administratively "
5350				 "set MAC address\nReload the VF driver to "
5351				 "resume operations\n", vf);
5352		break;
5353	case E1000_VF_SET_PROMISC:
5354		retval = igb_set_vf_promisc(adapter, msgbuf, vf);
5355		break;
5356	case E1000_VF_SET_MULTICAST:
5357		retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
5358		break;
5359	case E1000_VF_SET_LPE:
5360		retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
5361		break;
5362	case E1000_VF_SET_VLAN:
5363		retval = -1;
5364		if (vf_data->pf_vlan)
5365			dev_warn(&pdev->dev,
5366				 "VF %d attempted to override administratively "
5367				 "set VLAN tag\nReload the VF driver to "
5368				 "resume operations\n", vf);
5369		else
5370			retval = igb_set_vf_vlan(adapter, msgbuf, vf);
5371		break;
5372	default:
5373		dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
5374		retval = -1;
5375		break;
5376	}
5377
5378	msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
5379out:
5380	/* notify the VF of the results of what it sent us */
5381	if (retval)
5382		msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
5383	else
5384		msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
5385
5386	igb_write_mbx(hw, msgbuf, 1, vf);
5387}
5388
5389static void igb_msg_task(struct igb_adapter *adapter)
5390{
5391	struct e1000_hw *hw = &adapter->hw;
5392	u32 vf;
5393
5394	for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
5395		/* process any reset requests */
5396		if (!igb_check_for_rst(hw, vf))
5397			igb_vf_reset_event(adapter, vf);
5398
5399		/* process any messages pending */
5400		if (!igb_check_for_msg(hw, vf))
5401			igb_rcv_msg_from_vf(adapter, vf);
5402
5403		/* process any acks */
5404		if (!igb_check_for_ack(hw, vf))
5405			igb_rcv_ack_from_vf(adapter, vf);
5406	}
5407}
5408
5409/**
5410 *  igb_set_uta - Set unicast filter table address
5411 *  @adapter: board private structure
5412 *
5413 *  The unicast table address is a register array of 32-bit registers.
5414 *  The table is meant to be used in a way similar to how the MTA is used
5415 *  however due to certain limitations in the hardware it is necessary to
5416 *  set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscuous
5417 *  enable bit to allow vlan tag stripping when promiscuous mode is enabled
5418 **/
5419static void igb_set_uta(struct igb_adapter *adapter)
5420{
5421	struct e1000_hw *hw = &adapter->hw;
5422	int i;
5423
5424	/* The UTA table only exists on 82576 hardware and newer */
5425	if (hw->mac.type < e1000_82576)
5426		return;
5427
5428	/* we only need to do this if VMDq is enabled */
5429	if (!adapter->vfs_allocated_count)
5430		return;
5431
5432	for (i = 0; i < hw->mac.uta_reg_count; i++)
5433		array_wr32(E1000_UTA, i, ~0);
5434}
5435
5436/**
5437 * igb_intr_msi - Interrupt Handler
5438 * @irq: interrupt number
5439 * @data: pointer to a network interface device structure
5440 **/
5441static irqreturn_t igb_intr_msi(int irq, void *data)
5442{
5443	struct igb_adapter *adapter = data;
5444	struct igb_q_vector *q_vector = adapter->q_vector[0];
5445	struct e1000_hw *hw = &adapter->hw;
5446	/* read ICR disables interrupts using IAM */
5447	u32 icr = rd32(E1000_ICR);
5448
5449	igb_write_itr(q_vector);
5450
5451	if (icr & E1000_ICR_DRSTA)
5452		schedule_work(&adapter->reset_task);
5453
5454	if (icr & E1000_ICR_DOUTSYNC) {
5455		/* HW is reporting DMA is out of sync */
5456		adapter->stats.doosync++;
5457	}
5458
5459	if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5460		hw->mac.get_link_status = 1;
5461		if (!test_bit(__IGB_DOWN, &adapter->state))
5462			mod_timer(&adapter->watchdog_timer, jiffies + 1);
5463	}
5464
5465	napi_schedule(&q_vector->napi);
5466
5467	return IRQ_HANDLED;
5468}
5469
5470/**
5471 * igb_intr - Legacy Interrupt Handler
5472 * @irq: interrupt number
5473 * @data: pointer to a network interface device structure
5474 **/
5475static irqreturn_t igb_intr(int irq, void *data)
5476{
5477	struct igb_adapter *adapter = data;
5478	struct igb_q_vector *q_vector = adapter->q_vector[0];
5479	struct e1000_hw *hw = &adapter->hw;
5480	/* Interrupt Auto-Mask...upon reading ICR, interrupts are masked.  No
5481	 * need for the IMC write */
5482	u32 icr = rd32(E1000_ICR);
5483
5484	/* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
5485	 * not set, then the adapter didn't send an interrupt */
5486	if (!(icr & E1000_ICR_INT_ASSERTED))
5487		return IRQ_NONE;
5488
5489	igb_write_itr(q_vector);
5490
5491	if (icr & E1000_ICR_DRSTA)
5492		schedule_work(&adapter->reset_task);
5493
5494	if (icr & E1000_ICR_DOUTSYNC) {
5495		/* HW is reporting DMA is out of sync */
5496		adapter->stats.doosync++;
5497	}
5498
5499	if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5500		hw->mac.get_link_status = 1;
5501		/* guard against interrupt when we're going down */
5502		if (!test_bit(__IGB_DOWN, &adapter->state))
5503			mod_timer(&adapter->watchdog_timer, jiffies + 1);
5504	}
5505
5506	napi_schedule(&q_vector->napi);
5507
5508	return IRQ_HANDLED;
5509}
5510
5511void igb_ring_irq_enable(struct igb_q_vector *q_vector)
5512{
5513	struct igb_adapter *adapter = q_vector->adapter;
5514	struct e1000_hw *hw = &adapter->hw;
5515
5516	if ((q_vector->rx.ring && (adapter->rx_itr_setting & 3)) ||
5517	    (!q_vector->rx.ring && (adapter->tx_itr_setting & 3))) {
5518		if ((adapter->num_q_vectors == 1) && !adapter->vf_data)
5519			igb_set_itr(q_vector);
5520		else
5521			igb_update_ring_itr(q_vector);
5522	}
5523
5524	if (!test_bit(__IGB_DOWN, &adapter->state)) {
5525		if (adapter->msix_entries)
5526			wr32(E1000_EIMS, q_vector->eims_value);
5527		else
5528			igb_irq_enable(adapter);
5529	}
5530}
5531
5532/**
5533 * igb_poll - NAPI Rx polling callback
5534 * @napi: napi polling structure
5535 * @budget: count of how many packets we should handle
5536 **/
5537static int igb_poll(struct napi_struct *napi, int budget)
5538{
5539	struct igb_q_vector *q_vector = container_of(napi,
5540	                                             struct igb_q_vector,
5541	                                             napi);
5542	bool clean_complete = true;
5543
5544#ifdef CONFIG_IGB_DCA
5545	if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
5546		igb_update_dca(q_vector);
5547#endif
5548	if (q_vector->tx.ring)
5549		clean_complete = igb_clean_tx_irq(q_vector);
5550
5551	if (q_vector->rx.ring)
5552		clean_complete &= igb_clean_rx_irq(q_vector, budget);
5553
5554	/* If all work not completed, return budget and keep polling */
5555	if (!clean_complete)
5556		return budget;
5557
5558	/* If not enough Rx work done, exit the polling mode */
5559	napi_complete(napi);
5560	igb_ring_irq_enable(q_vector);
5561
5562	return 0;
5563}
5564
5565/**
5566 * igb_systim_to_hwtstamp - convert system time value to hw timestamp
5567 * @adapter: board private structure
5568 * @shhwtstamps: timestamp structure to update
5569 * @regval: unsigned 64bit system time value.
5570 *
5571 * We need to convert the system time value stored in the RX/TXSTMP registers
5572 * into a hwtstamp which can be used by the upper level timestamping functions
5573 */
5574static void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
5575                                   struct skb_shared_hwtstamps *shhwtstamps,
5576                                   u64 regval)
5577{
5578	u64 ns;
5579
5580	/*
5581	 * The 82580 starts with 1ns at bit 0 in RX/TXSTMPL, shift this up to
5582	 * 24 to match clock shift we setup earlier.
5583	 */
5584	if (adapter->hw.mac.type == e1000_82580)
5585		regval <<= IGB_82580_TSYNC_SHIFT;
5586
5587	ns = timecounter_cyc2time(&adapter->clock, regval);
5588	timecompare_update(&adapter->compare, ns);
5589	memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
5590	shhwtstamps->hwtstamp = ns_to_ktime(ns);
5591	shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns);
5592}
5593
5594/**
5595 * igb_tx_hwtstamp - utility function which checks for TX time stamp
5596 * @q_vector: pointer to q_vector containing needed info
5597 * @buffer: pointer to igb_tx_buffer structure
5598 *
5599 * If we were asked to do hardware stamping and such a time stamp is
5600 * available, then it must have been for this skb here because we only
5601 * allow only one such packet into the queue.
5602 */
5603static void igb_tx_hwtstamp(struct igb_q_vector *q_vector,
5604			    struct igb_tx_buffer *buffer_info)
5605{
5606	struct igb_adapter *adapter = q_vector->adapter;
5607	struct e1000_hw *hw = &adapter->hw;
5608	struct skb_shared_hwtstamps shhwtstamps;
5609	u64 regval;
5610
5611	/* if skb does not support hw timestamp or TX stamp not valid exit */
5612	if (likely(!(buffer_info->tx_flags & IGB_TX_FLAGS_TSTAMP)) ||
5613	    !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
5614		return;
5615
5616	regval = rd32(E1000_TXSTMPL);
5617	regval |= (u64)rd32(E1000_TXSTMPH) << 32;
5618
5619	igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
5620	skb_tstamp_tx(buffer_info->skb, &shhwtstamps);
5621}
5622
5623/**
5624 * igb_clean_tx_irq - Reclaim resources after transmit completes
5625 * @q_vector: pointer to q_vector containing needed info
5626 * returns true if ring is completely cleaned
5627 **/
5628static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
5629{
5630	struct igb_adapter *adapter = q_vector->adapter;
5631	struct igb_ring *tx_ring = q_vector->tx.ring;
5632	struct igb_tx_buffer *tx_buffer;
5633	union e1000_adv_tx_desc *tx_desc, *eop_desc;
5634	unsigned int total_bytes = 0, total_packets = 0;
5635	unsigned int budget = q_vector->tx.work_limit;
5636	unsigned int i = tx_ring->next_to_clean;
5637
5638	if (test_bit(__IGB_DOWN, &adapter->state))
5639		return true;
5640
5641	tx_buffer = &tx_ring->tx_buffer_info[i];
5642	tx_desc = IGB_TX_DESC(tx_ring, i);
5643	i -= tx_ring->count;
5644
5645	for (; budget; budget--) {
5646		eop_desc = tx_buffer->next_to_watch;
5647
5648		/* prevent any other reads prior to eop_desc */
5649		rmb();
5650
5651		/* if next_to_watch is not set then there is no work pending */
5652		if (!eop_desc)
5653			break;
5654
5655		/* if DD is not set pending work has not been completed */
5656		if (!(eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)))
5657			break;
5658
5659		/* clear next_to_watch to prevent false hangs */
5660		tx_buffer->next_to_watch = NULL;
5661
5662		/* update the statistics for this packet */
5663		total_bytes += tx_buffer->bytecount;
5664		total_packets += tx_buffer->gso_segs;
5665
5666		/* retrieve hardware timestamp */
5667		igb_tx_hwtstamp(q_vector, tx_buffer);
5668
5669		/* free the skb */
5670		dev_kfree_skb_any(tx_buffer->skb);
5671		tx_buffer->skb = NULL;
5672
5673		/* unmap skb header data */
5674		dma_unmap_single(tx_ring->dev,
5675				 tx_buffer->dma,
5676				 tx_buffer->length,
5677				 DMA_TO_DEVICE);
5678
5679		/* clear last DMA location and unmap remaining buffers */
5680		while (tx_desc != eop_desc) {
5681			tx_buffer->dma = 0;
5682
5683			tx_buffer++;
5684			tx_desc++;
5685			i++;
5686			if (unlikely(!i)) {
5687				i -= tx_ring->count;
5688				tx_buffer = tx_ring->tx_buffer_info;
5689				tx_desc = IGB_TX_DESC(tx_ring, 0);
5690			}
5691
5692			/* unmap any remaining paged data */
5693			if (tx_buffer->dma) {
5694				dma_unmap_page(tx_ring->dev,
5695					       tx_buffer->dma,
5696					       tx_buffer->length,
5697					       DMA_TO_DEVICE);
5698			}
5699		}
5700
5701		/* clear last DMA location */
5702		tx_buffer->dma = 0;
5703
5704		/* move us one more past the eop_desc for start of next pkt */
5705		tx_buffer++;
5706		tx_desc++;
5707		i++;
5708		if (unlikely(!i)) {
5709			i -= tx_ring->count;
5710			tx_buffer = tx_ring->tx_buffer_info;
5711			tx_desc = IGB_TX_DESC(tx_ring, 0);
5712		}
5713	}
5714
5715	i += tx_ring->count;
5716	tx_ring->next_to_clean = i;
5717	u64_stats_update_begin(&tx_ring->tx_syncp);
5718	tx_ring->tx_stats.bytes += total_bytes;
5719	tx_ring->tx_stats.packets += total_packets;
5720	u64_stats_update_end(&tx_ring->tx_syncp);
5721	q_vector->tx.total_bytes += total_bytes;
5722	q_vector->tx.total_packets += total_packets;
5723
5724	if (test_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags)) {
5725		struct e1000_hw *hw = &adapter->hw;
5726
5727		eop_desc = tx_buffer->next_to_watch;
5728
5729		/* Detect a transmit hang in hardware, this serializes the
5730		 * check with the clearing of time_stamp and movement of i */
5731		clear_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
5732		if (eop_desc &&
5733		    time_after(jiffies, tx_buffer->time_stamp +
5734			       (adapter->tx_timeout_factor * HZ)) &&
5735		    !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
5736
5737			/* detected Tx unit hang */
5738			dev_err(tx_ring->dev,
5739				"Detected Tx Unit Hang\n"
5740				"  Tx Queue             <%d>\n"
5741				"  TDH                  <%x>\n"
5742				"  TDT                  <%x>\n"
5743				"  next_to_use          <%x>\n"
5744				"  next_to_clean        <%x>\n"
5745				"buffer_info[next_to_clean]\n"
5746				"  time_stamp           <%lx>\n"
5747				"  next_to_watch        <%p>\n"
5748				"  jiffies              <%lx>\n"
5749				"  desc.status          <%x>\n",
5750				tx_ring->queue_index,
5751				rd32(E1000_TDH(tx_ring->reg_idx)),
5752				readl(tx_ring->tail),
5753				tx_ring->next_to_use,
5754				tx_ring->next_to_clean,
5755				tx_buffer->time_stamp,
5756				eop_desc,
5757				jiffies,
5758				eop_desc->wb.status);
5759			netif_stop_subqueue(tx_ring->netdev,
5760					    tx_ring->queue_index);
5761
5762			/* we are about to reset, no point in enabling stuff */
5763			return true;
5764		}
5765	}
5766
5767	if (unlikely(total_packets &&
5768		     netif_carrier_ok(tx_ring->netdev) &&
5769		     igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
5770		/* Make sure that anybody stopping the queue after this
5771		 * sees the new next_to_clean.
5772		 */
5773		smp_mb();
5774		if (__netif_subqueue_stopped(tx_ring->netdev,
5775					     tx_ring->queue_index) &&
5776		    !(test_bit(__IGB_DOWN, &adapter->state))) {
5777			netif_wake_subqueue(tx_ring->netdev,
5778					    tx_ring->queue_index);
5779
5780			u64_stats_update_begin(&tx_ring->tx_syncp);
5781			tx_ring->tx_stats.restart_queue++;
5782			u64_stats_update_end(&tx_ring->tx_syncp);
5783		}
5784	}
5785
5786	return !!budget;
5787}
5788
5789static inline void igb_rx_checksum(struct igb_ring *ring,
5790				   union e1000_adv_rx_desc *rx_desc,
5791				   struct sk_buff *skb)
5792{
5793	skb_checksum_none_assert(skb);
5794
5795	/* Ignore Checksum bit is set */
5796	if (igb_test_staterr(rx_desc, E1000_RXD_STAT_IXSM))
5797		return;
5798
5799	/* Rx checksum disabled via ethtool */
5800	if (!(ring->netdev->features & NETIF_F_RXCSUM))
5801		return;
5802
5803	/* TCP/UDP checksum error bit is set */
5804	if (igb_test_staterr(rx_desc,
5805			     E1000_RXDEXT_STATERR_TCPE |
5806			     E1000_RXDEXT_STATERR_IPE)) {
5807		/*
5808		 * work around errata with sctp packets where the TCPE aka
5809		 * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
5810		 * packets, (aka let the stack check the crc32c)
5811		 */
5812		if (!((skb->len == 60) &&
5813		      test_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags))) {
5814			u64_stats_update_begin(&ring->rx_syncp);
5815			ring->rx_stats.csum_err++;
5816			u64_stats_update_end(&ring->rx_syncp);
5817		}
5818		/* let the stack verify checksum errors */
5819		return;
5820	}
5821	/* It must be a TCP or UDP packet with a valid checksum */
5822	if (igb_test_staterr(rx_desc, E1000_RXD_STAT_TCPCS |
5823				      E1000_RXD_STAT_UDPCS))
5824		skb->ip_summed = CHECKSUM_UNNECESSARY;
5825
5826	dev_dbg(ring->dev, "cksum success: bits %08X\n",
5827		le32_to_cpu(rx_desc->wb.upper.status_error));
5828}
5829
5830static inline void igb_rx_hash(struct igb_ring *ring,
5831			       union e1000_adv_rx_desc *rx_desc,
5832			       struct sk_buff *skb)
5833{
5834	if (ring->netdev->features & NETIF_F_RXHASH)
5835		skb->rxhash = le32_to_cpu(rx_desc->wb.lower.hi_dword.rss);
5836}
5837
5838static void igb_rx_hwtstamp(struct igb_q_vector *q_vector,
5839			    union e1000_adv_rx_desc *rx_desc,
5840			    struct sk_buff *skb)
5841{
5842	struct igb_adapter *adapter = q_vector->adapter;
5843	struct e1000_hw *hw = &adapter->hw;
5844	u64 regval;
5845
5846	if (!igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP |
5847				       E1000_RXDADV_STAT_TS))
5848		return;
5849
5850	/*
5851	 * If this bit is set, then the RX registers contain the time stamp. No
5852	 * other packet will be time stamped until we read these registers, so
5853	 * read the registers to make them available again. Because only one
5854	 * packet can be time stamped at a time, we know that the register
5855	 * values must belong to this one here and therefore we don't need to
5856	 * compare any of the additional attributes stored for it.
5857	 *
5858	 * If nothing went wrong, then it should have a shared tx_flags that we
5859	 * can turn into a skb_shared_hwtstamps.
5860	 */
5861	if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) {
5862		u32 *stamp = (u32 *)skb->data;
5863		regval = le32_to_cpu(*(stamp + 2));
5864		regval |= (u64)le32_to_cpu(*(stamp + 3)) << 32;
5865		skb_pull(skb, IGB_TS_HDR_LEN);
5866	} else {
5867		if(!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
5868			return;
5869
5870		regval = rd32(E1000_RXSTMPL);
5871		regval |= (u64)rd32(E1000_RXSTMPH) << 32;
5872	}
5873
5874	igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
5875}
5876static inline u16 igb_get_hlen(union e1000_adv_rx_desc *rx_desc)
5877{
5878	/* HW will not DMA in data larger than the given buffer, even if it
5879	 * parses the (NFS, of course) header to be larger.  In that case, it
5880	 * fills the header buffer and spills the rest into the page.
5881	 */
5882	u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
5883	           E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
5884	if (hlen > IGB_RX_HDR_LEN)
5885		hlen = IGB_RX_HDR_LEN;
5886	return hlen;
5887}
5888
5889static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, int budget)
5890{
5891	struct igb_ring *rx_ring = q_vector->rx.ring;
5892	union e1000_adv_rx_desc *rx_desc;
5893	const int current_node = numa_node_id();
5894	unsigned int total_bytes = 0, total_packets = 0;
5895	u16 cleaned_count = igb_desc_unused(rx_ring);
5896	u16 i = rx_ring->next_to_clean;
5897
5898	rx_desc = IGB_RX_DESC(rx_ring, i);
5899
5900	while (igb_test_staterr(rx_desc, E1000_RXD_STAT_DD)) {
5901		struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
5902		struct sk_buff *skb = buffer_info->skb;
5903		union e1000_adv_rx_desc *next_rxd;
5904
5905		buffer_info->skb = NULL;
5906		prefetch(skb->data);
5907
5908		i++;
5909		if (i == rx_ring->count)
5910			i = 0;
5911
5912		next_rxd = IGB_RX_DESC(rx_ring, i);
5913		prefetch(next_rxd);
5914
5915		/*
5916		 * This memory barrier is needed to keep us from reading
5917		 * any other fields out of the rx_desc until we know the
5918		 * RXD_STAT_DD bit is set
5919		 */
5920		rmb();
5921
5922		if (!skb_is_nonlinear(skb)) {
5923			__skb_put(skb, igb_get_hlen(rx_desc));
5924			dma_unmap_single(rx_ring->dev, buffer_info->dma,
5925					 IGB_RX_HDR_LEN,
5926					 DMA_FROM_DEVICE);
5927			buffer_info->dma = 0;
5928		}
5929
5930		if (rx_desc->wb.upper.length) {
5931			u16 length = le16_to_cpu(rx_desc->wb.upper.length);
5932
5933			skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
5934						buffer_info->page,
5935						buffer_info->page_offset,
5936						length);
5937
5938			skb->len += length;
5939			skb->data_len += length;
5940			skb->truesize += length;
5941
5942			if ((page_count(buffer_info->page) != 1) ||
5943			    (page_to_nid(buffer_info->page) != current_node))
5944				buffer_info->page = NULL;
5945			else
5946				get_page(buffer_info->page);
5947
5948			dma_unmap_page(rx_ring->dev, buffer_info->page_dma,
5949				       PAGE_SIZE / 2, DMA_FROM_DEVICE);
5950			buffer_info->page_dma = 0;
5951		}
5952
5953		if (!igb_test_staterr(rx_desc, E1000_RXD_STAT_EOP)) {
5954			struct igb_rx_buffer *next_buffer;
5955			next_buffer = &rx_ring->rx_buffer_info[i];
5956			buffer_info->skb = next_buffer->skb;
5957			buffer_info->dma = next_buffer->dma;
5958			next_buffer->skb = skb;
5959			next_buffer->dma = 0;
5960			goto next_desc;
5961		}
5962
5963		if (igb_test_staterr(rx_desc,
5964				     E1000_RXDEXT_ERR_FRAME_ERR_MASK)) {
5965			dev_kfree_skb_any(skb);
5966			goto next_desc;
5967		}
5968
5969		igb_rx_hwtstamp(q_vector, rx_desc, skb);
5970		igb_rx_hash(rx_ring, rx_desc, skb);
5971		igb_rx_checksum(rx_ring, rx_desc, skb);
5972
5973		if (igb_test_staterr(rx_desc, E1000_RXD_STAT_VP)) {
5974			u16 vid = le16_to_cpu(rx_desc->wb.upper.vlan);
5975
5976			__vlan_hwaccel_put_tag(skb, vid);
5977		}
5978
5979		total_bytes += skb->len;
5980		total_packets++;
5981
5982		skb->protocol = eth_type_trans(skb, rx_ring->netdev);
5983
5984		napi_gro_receive(&q_vector->napi, skb);
5985
5986		budget--;
5987next_desc:
5988		if (!budget)
5989			break;
5990
5991		cleaned_count++;
5992		/* return some buffers to hardware, one at a time is too slow */
5993		if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
5994			igb_alloc_rx_buffers(rx_ring, cleaned_count);
5995			cleaned_count = 0;
5996		}
5997
5998		/* use prefetched values */
5999		rx_desc = next_rxd;
6000	}
6001
6002	rx_ring->next_to_clean = i;
6003	u64_stats_update_begin(&rx_ring->rx_syncp);
6004	rx_ring->rx_stats.packets += total_packets;
6005	rx_ring->rx_stats.bytes += total_bytes;
6006	u64_stats_update_end(&rx_ring->rx_syncp);
6007	q_vector->rx.total_packets += total_packets;
6008	q_vector->rx.total_bytes += total_bytes;
6009
6010	if (cleaned_count)
6011		igb_alloc_rx_buffers(rx_ring, cleaned_count);
6012
6013	return !!budget;
6014}
6015
6016static bool igb_alloc_mapped_skb(struct igb_ring *rx_ring,
6017				 struct igb_rx_buffer *bi)
6018{
6019	struct sk_buff *skb = bi->skb;
6020	dma_addr_t dma = bi->dma;
6021
6022	if (dma)
6023		return true;
6024
6025	if (likely(!skb)) {
6026		skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
6027						IGB_RX_HDR_LEN);
6028		bi->skb = skb;
6029		if (!skb) {
6030			rx_ring->rx_stats.alloc_failed++;
6031			return false;
6032		}
6033
6034		/* initialize skb for ring */
6035		skb_record_rx_queue(skb, rx_ring->queue_index);
6036	}
6037
6038	dma = dma_map_single(rx_ring->dev, skb->data,
6039			     IGB_RX_HDR_LEN, DMA_FROM_DEVICE);
6040
6041	if (dma_mapping_error(rx_ring->dev, dma)) {
6042		rx_ring->rx_stats.alloc_failed++;
6043		return false;
6044	}
6045
6046	bi->dma = dma;
6047	return true;
6048}
6049
6050static bool igb_alloc_mapped_page(struct igb_ring *rx_ring,
6051				  struct igb_rx_buffer *bi)
6052{
6053	struct page *page = bi->page;
6054	dma_addr_t page_dma = bi->page_dma;
6055	unsigned int page_offset = bi->page_offset ^ (PAGE_SIZE / 2);
6056
6057	if (page_dma)
6058		return true;
6059
6060	if (!page) {
6061		page = netdev_alloc_page(rx_ring->netdev);
6062		bi->page = page;
6063		if (unlikely(!page)) {
6064			rx_ring->rx_stats.alloc_failed++;
6065			return false;
6066		}
6067	}
6068
6069	page_dma = dma_map_page(rx_ring->dev, page,
6070				page_offset, PAGE_SIZE / 2,
6071				DMA_FROM_DEVICE);
6072
6073	if (dma_mapping_error(rx_ring->dev, page_dma)) {
6074		rx_ring->rx_stats.alloc_failed++;
6075		return false;
6076	}
6077
6078	bi->page_dma = page_dma;
6079	bi->page_offset = page_offset;
6080	return true;
6081}
6082
6083/**
6084 * igb_alloc_rx_buffers - Replace used receive buffers; packet split
6085 * @adapter: address of board private structure
6086 **/
6087void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count)
6088{
6089	union e1000_adv_rx_desc *rx_desc;
6090	struct igb_rx_buffer *bi;
6091	u16 i = rx_ring->next_to_use;
6092
6093	rx_desc = IGB_RX_DESC(rx_ring, i);
6094	bi = &rx_ring->rx_buffer_info[i];
6095	i -= rx_ring->count;
6096
6097	while (cleaned_count--) {
6098		if (!igb_alloc_mapped_skb(rx_ring, bi))
6099			break;
6100
6101		/* Refresh the desc even if buffer_addrs didn't change
6102		 * because each write-back erases this info. */
6103		rx_desc->read.hdr_addr = cpu_to_le64(bi->dma);
6104
6105		if (!igb_alloc_mapped_page(rx_ring, bi))
6106			break;
6107
6108		rx_desc->read.pkt_addr = cpu_to_le64(bi->page_dma);
6109
6110		rx_desc++;
6111		bi++;
6112		i++;
6113		if (unlikely(!i)) {
6114			rx_desc = IGB_RX_DESC(rx_ring, 0);
6115			bi = rx_ring->rx_buffer_info;
6116			i -= rx_ring->count;
6117		}
6118
6119		/* clear the hdr_addr for the next_to_use descriptor */
6120		rx_desc->read.hdr_addr = 0;
6121	}
6122
6123	i += rx_ring->count;
6124
6125	if (rx_ring->next_to_use != i) {
6126		rx_ring->next_to_use = i;
6127
6128		/* Force memory writes to complete before letting h/w
6129		 * know there are new descriptors to fetch.  (Only
6130		 * applicable for weak-ordered memory model archs,
6131		 * such as IA-64). */
6132		wmb();
6133		writel(i, rx_ring->tail);
6134	}
6135}
6136
6137/**
6138 * igb_mii_ioctl -
6139 * @netdev:
6140 * @ifreq:
6141 * @cmd:
6142 **/
6143static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6144{
6145	struct igb_adapter *adapter = netdev_priv(netdev);
6146	struct mii_ioctl_data *data = if_mii(ifr);
6147
6148	if (adapter->hw.phy.media_type != e1000_media_type_copper)
6149		return -EOPNOTSUPP;
6150
6151	switch (cmd) {
6152	case SIOCGMIIPHY:
6153		data->phy_id = adapter->hw.phy.addr;
6154		break;
6155	case SIOCGMIIREG:
6156		if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
6157		                     &data->val_out))
6158			return -EIO;
6159		break;
6160	case SIOCSMIIREG:
6161	default:
6162		return -EOPNOTSUPP;
6163	}
6164	return 0;
6165}
6166
6167/**
6168 * igb_hwtstamp_ioctl - control hardware time stamping
6169 * @netdev:
6170 * @ifreq:
6171 * @cmd:
6172 *
6173 * Outgoing time stamping can be enabled and disabled. Play nice and
6174 * disable it when requested, although it shouldn't case any overhead
6175 * when no packet needs it. At most one packet in the queue may be
6176 * marked for time stamping, otherwise it would be impossible to tell
6177 * for sure to which packet the hardware time stamp belongs.
6178 *
6179 * Incoming time stamping has to be configured via the hardware
6180 * filters. Not all combinations are supported, in particular event
6181 * type has to be specified. Matching the kind of event packet is
6182 * not supported, with the exception of "all V2 events regardless of
6183 * level 2 or 4".
6184 *
6185 **/
6186static int igb_hwtstamp_ioctl(struct net_device *netdev,
6187			      struct ifreq *ifr, int cmd)
6188{
6189	struct igb_adapter *adapter = netdev_priv(netdev);
6190	struct e1000_hw *hw = &adapter->hw;
6191	struct hwtstamp_config config;
6192	u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
6193	u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6194	u32 tsync_rx_cfg = 0;
6195	bool is_l4 = false;
6196	bool is_l2 = false;
6197	u32 regval;
6198
6199	if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
6200		return -EFAULT;
6201
6202	/* reserved for future extensions */
6203	if (config.flags)
6204		return -EINVAL;
6205
6206	switch (config.tx_type) {
6207	case HWTSTAMP_TX_OFF:
6208		tsync_tx_ctl = 0;
6209	case HWTSTAMP_TX_ON:
6210		break;
6211	default:
6212		return -ERANGE;
6213	}
6214
6215	switch (config.rx_filter) {
6216	case HWTSTAMP_FILTER_NONE:
6217		tsync_rx_ctl = 0;
6218		break;
6219	case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
6220	case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
6221	case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
6222	case HWTSTAMP_FILTER_ALL:
6223		/*
6224		 * register TSYNCRXCFG must be set, therefore it is not
6225		 * possible to time stamp both Sync and Delay_Req messages
6226		 * => fall back to time stamping all packets
6227		 */
6228		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6229		config.rx_filter = HWTSTAMP_FILTER_ALL;
6230		break;
6231	case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
6232		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6233		tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
6234		is_l4 = true;
6235		break;
6236	case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
6237		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6238		tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
6239		is_l4 = true;
6240		break;
6241	case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
6242	case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
6243		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6244		tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
6245		is_l2 = true;
6246		is_l4 = true;
6247		config.rx_filter = HWTSTAMP_FILTER_SOME;
6248		break;
6249	case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
6250	case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
6251		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6252		tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
6253		is_l2 = true;
6254		is_l4 = true;
6255		config.rx_filter = HWTSTAMP_FILTER_SOME;
6256		break;
6257	case HWTSTAMP_FILTER_PTP_V2_EVENT:
6258	case HWTSTAMP_FILTER_PTP_V2_SYNC:
6259	case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
6260		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
6261		config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
6262		is_l2 = true;
6263		break;
6264	default:
6265		return -ERANGE;
6266	}
6267
6268	if (hw->mac.type == e1000_82575) {
6269		if (tsync_rx_ctl | tsync_tx_ctl)
6270			return -EINVAL;
6271		return 0;
6272	}
6273
6274	/*
6275	 * Per-packet timestamping only works if all packets are
6276	 * timestamped, so enable timestamping in all packets as
6277	 * long as one rx filter was configured.
6278	 */
6279	if ((hw->mac.type == e1000_82580) && tsync_rx_ctl) {
6280		tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6281		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6282	}
6283
6284	/* enable/disable TX */
6285	regval = rd32(E1000_TSYNCTXCTL);
6286	regval &= ~E1000_TSYNCTXCTL_ENABLED;
6287	regval |= tsync_tx_ctl;
6288	wr32(E1000_TSYNCTXCTL, regval);
6289
6290	/* enable/disable RX */
6291	regval = rd32(E1000_TSYNCRXCTL);
6292	regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
6293	regval |= tsync_rx_ctl;
6294	wr32(E1000_TSYNCRXCTL, regval);
6295
6296	/* define which PTP packets are time stamped */
6297	wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
6298
6299	/* define ethertype filter for timestamped packets */
6300	if (is_l2)
6301		wr32(E1000_ETQF(3),
6302		                (E1000_ETQF_FILTER_ENABLE | /* enable filter */
6303		                 E1000_ETQF_1588 | /* enable timestamping */
6304		                 ETH_P_1588));     /* 1588 eth protocol type */
6305	else
6306		wr32(E1000_ETQF(3), 0);
6307
6308#define PTP_PORT 319
6309	/* L4 Queue Filter[3]: filter by destination port and protocol */
6310	if (is_l4) {
6311		u32 ftqf = (IPPROTO_UDP /* UDP */
6312			| E1000_FTQF_VF_BP /* VF not compared */
6313			| E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
6314			| E1000_FTQF_MASK); /* mask all inputs */
6315		ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
6316
6317		wr32(E1000_IMIR(3), htons(PTP_PORT));
6318		wr32(E1000_IMIREXT(3),
6319		     (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
6320		if (hw->mac.type == e1000_82576) {
6321			/* enable source port check */
6322			wr32(E1000_SPQF(3), htons(PTP_PORT));
6323			ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
6324		}
6325		wr32(E1000_FTQF(3), ftqf);
6326	} else {
6327		wr32(E1000_FTQF(3), E1000_FTQF_MASK);
6328	}
6329	wrfl();
6330
6331	adapter->hwtstamp_config = config;
6332
6333	/* clear TX/RX time stamp registers, just to be sure */
6334	regval = rd32(E1000_TXSTMPH);
6335	regval = rd32(E1000_RXSTMPH);
6336
6337	return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
6338		-EFAULT : 0;
6339}
6340
6341/**
6342 * igb_ioctl -
6343 * @netdev:
6344 * @ifreq:
6345 * @cmd:
6346 **/
6347static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6348{
6349	switch (cmd) {
6350	case SIOCGMIIPHY:
6351	case SIOCGMIIREG:
6352	case SIOCSMIIREG:
6353		return igb_mii_ioctl(netdev, ifr, cmd);
6354	case SIOCSHWTSTAMP:
6355		return igb_hwtstamp_ioctl(netdev, ifr, cmd);
6356	default:
6357		return -EOPNOTSUPP;
6358	}
6359}
6360
6361s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6362{
6363	struct igb_adapter *adapter = hw->back;
6364	u16 cap_offset;
6365
6366	cap_offset = adapter->pdev->pcie_cap;
6367	if (!cap_offset)
6368		return -E1000_ERR_CONFIG;
6369
6370	pci_read_config_word(adapter->pdev, cap_offset + reg, value);
6371
6372	return 0;
6373}
6374
6375s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6376{
6377	struct igb_adapter *adapter = hw->back;
6378	u16 cap_offset;
6379
6380	cap_offset = adapter->pdev->pcie_cap;
6381	if (!cap_offset)
6382		return -E1000_ERR_CONFIG;
6383
6384	pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
6385
6386	return 0;
6387}
6388
6389static void igb_vlan_mode(struct net_device *netdev, u32 features)
6390{
6391	struct igb_adapter *adapter = netdev_priv(netdev);
6392	struct e1000_hw *hw = &adapter->hw;
6393	u32 ctrl, rctl;
6394	bool enable = !!(features & NETIF_F_HW_VLAN_RX);
6395
6396	if (enable) {
6397		/* enable VLAN tag insert/strip */
6398		ctrl = rd32(E1000_CTRL);
6399		ctrl |= E1000_CTRL_VME;
6400		wr32(E1000_CTRL, ctrl);
6401
6402		/* Disable CFI check */
6403		rctl = rd32(E1000_RCTL);
6404		rctl &= ~E1000_RCTL_CFIEN;
6405		wr32(E1000_RCTL, rctl);
6406	} else {
6407		/* disable VLAN tag insert/strip */
6408		ctrl = rd32(E1000_CTRL);
6409		ctrl &= ~E1000_CTRL_VME;
6410		wr32(E1000_CTRL, ctrl);
6411	}
6412
6413	igb_rlpml_set(adapter);
6414}
6415
6416static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
6417{
6418	struct igb_adapter *adapter = netdev_priv(netdev);
6419	struct e1000_hw *hw = &adapter->hw;
6420	int pf_id = adapter->vfs_allocated_count;
6421
6422	/* attempt to add filter to vlvf array */
6423	igb_vlvf_set(adapter, vid, true, pf_id);
6424
6425	/* add the filter since PF can receive vlans w/o entry in vlvf */
6426	igb_vfta_set(hw, vid, true);
6427
6428	set_bit(vid, adapter->active_vlans);
6429}
6430
6431static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
6432{
6433	struct igb_adapter *adapter = netdev_priv(netdev);
6434	struct e1000_hw *hw = &adapter->hw;
6435	int pf_id = adapter->vfs_allocated_count;
6436	s32 err;
6437
6438	/* remove vlan from VLVF table array */
6439	err = igb_vlvf_set(adapter, vid, false, pf_id);
6440
6441	/* if vid was not present in VLVF just remove it from table */
6442	if (err)
6443		igb_vfta_set(hw, vid, false);
6444
6445	clear_bit(vid, adapter->active_vlans);
6446}
6447
6448static void igb_restore_vlan(struct igb_adapter *adapter)
6449{
6450	u16 vid;
6451
6452	igb_vlan_mode(adapter->netdev, adapter->netdev->features);
6453
6454	for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID)
6455		igb_vlan_rx_add_vid(adapter->netdev, vid);
6456}
6457
6458int igb_set_spd_dplx(struct igb_adapter *adapter, u32 spd, u8 dplx)
6459{
6460	struct pci_dev *pdev = adapter->pdev;
6461	struct e1000_mac_info *mac = &adapter->hw.mac;
6462
6463	mac->autoneg = 0;
6464
6465	/* Make sure dplx is at most 1 bit and lsb of speed is not set
6466	 * for the switch() below to work */
6467	if ((spd & 1) || (dplx & ~1))
6468		goto err_inval;
6469
6470	/* Fiber NIC's only allow 1000 Gbps Full duplex */
6471	if ((adapter->hw.phy.media_type == e1000_media_type_internal_serdes) &&
6472	    spd != SPEED_1000 &&
6473	    dplx != DUPLEX_FULL)
6474		goto err_inval;
6475
6476	switch (spd + dplx) {
6477	case SPEED_10 + DUPLEX_HALF:
6478		mac->forced_speed_duplex = ADVERTISE_10_HALF;
6479		break;
6480	case SPEED_10 + DUPLEX_FULL:
6481		mac->forced_speed_duplex = ADVERTISE_10_FULL;
6482		break;
6483	case SPEED_100 + DUPLEX_HALF:
6484		mac->forced_speed_duplex = ADVERTISE_100_HALF;
6485		break;
6486	case SPEED_100 + DUPLEX_FULL:
6487		mac->forced_speed_duplex = ADVERTISE_100_FULL;
6488		break;
6489	case SPEED_1000 + DUPLEX_FULL:
6490		mac->autoneg = 1;
6491		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
6492		break;
6493	case SPEED_1000 + DUPLEX_HALF: /* not supported */
6494	default:
6495		goto err_inval;
6496	}
6497	return 0;
6498
6499err_inval:
6500	dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
6501	return -EINVAL;
6502}
6503
6504static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake)
6505{
6506	struct net_device *netdev = pci_get_drvdata(pdev);
6507	struct igb_adapter *adapter = netdev_priv(netdev);
6508	struct e1000_hw *hw = &adapter->hw;
6509	u32 ctrl, rctl, status;
6510	u32 wufc = adapter->wol;
6511#ifdef CONFIG_PM
6512	int retval = 0;
6513#endif
6514
6515	netif_device_detach(netdev);
6516
6517	if (netif_running(netdev))
6518		igb_close(netdev);
6519
6520	igb_clear_interrupt_scheme(adapter);
6521
6522#ifdef CONFIG_PM
6523	retval = pci_save_state(pdev);
6524	if (retval)
6525		return retval;
6526#endif
6527
6528	status = rd32(E1000_STATUS);
6529	if (status & E1000_STATUS_LU)
6530		wufc &= ~E1000_WUFC_LNKC;
6531
6532	if (wufc) {
6533		igb_setup_rctl(adapter);
6534		igb_set_rx_mode(netdev);
6535
6536		/* turn on all-multi mode if wake on multicast is enabled */
6537		if (wufc & E1000_WUFC_MC) {
6538			rctl = rd32(E1000_RCTL);
6539			rctl |= E1000_RCTL_MPE;
6540			wr32(E1000_RCTL, rctl);
6541		}
6542
6543		ctrl = rd32(E1000_CTRL);
6544		/* advertise wake from D3Cold */
6545		#define E1000_CTRL_ADVD3WUC 0x00100000
6546		/* phy power management enable */
6547		#define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
6548		ctrl |= E1000_CTRL_ADVD3WUC;
6549		wr32(E1000_CTRL, ctrl);
6550
6551		/* Allow time for pending master requests to run */
6552		igb_disable_pcie_master(hw);
6553
6554		wr32(E1000_WUC, E1000_WUC_PME_EN);
6555		wr32(E1000_WUFC, wufc);
6556	} else {
6557		wr32(E1000_WUC, 0);
6558		wr32(E1000_WUFC, 0);
6559	}
6560
6561	*enable_wake = wufc || adapter->en_mng_pt;
6562	if (!*enable_wake)
6563		igb_power_down_link(adapter);
6564	else
6565		igb_power_up_link(adapter);
6566
6567	/* Release control of h/w to f/w.  If f/w is AMT enabled, this
6568	 * would have already happened in close and is redundant. */
6569	igb_release_hw_control(adapter);
6570
6571	pci_disable_device(pdev);
6572
6573	return 0;
6574}
6575
6576#ifdef CONFIG_PM
6577static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
6578{
6579	int retval;
6580	bool wake;
6581
6582	retval = __igb_shutdown(pdev, &wake);
6583	if (retval)
6584		return retval;
6585
6586	if (wake) {
6587		pci_prepare_to_sleep(pdev);
6588	} else {
6589		pci_wake_from_d3(pdev, false);
6590		pci_set_power_state(pdev, PCI_D3hot);
6591	}
6592
6593	return 0;
6594}
6595
6596static int igb_resume(struct pci_dev *pdev)
6597{
6598	struct net_device *netdev = pci_get_drvdata(pdev);
6599	struct igb_adapter *adapter = netdev_priv(netdev);
6600	struct e1000_hw *hw = &adapter->hw;
6601	u32 err;
6602
6603	pci_set_power_state(pdev, PCI_D0);
6604	pci_restore_state(pdev);
6605	pci_save_state(pdev);
6606
6607	err = pci_enable_device_mem(pdev);
6608	if (err) {
6609		dev_err(&pdev->dev,
6610			"igb: Cannot enable PCI device from suspend\n");
6611		return err;
6612	}
6613	pci_set_master(pdev);
6614
6615	pci_enable_wake(pdev, PCI_D3hot, 0);
6616	pci_enable_wake(pdev, PCI_D3cold, 0);
6617
6618	if (igb_init_interrupt_scheme(adapter)) {
6619		dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
6620		return -ENOMEM;
6621	}
6622
6623	igb_reset(adapter);
6624
6625	/* let the f/w know that the h/w is now under the control of the
6626	 * driver. */
6627	igb_get_hw_control(adapter);
6628
6629	wr32(E1000_WUS, ~0);
6630
6631	if (netif_running(netdev)) {
6632		err = igb_open(netdev);
6633		if (err)
6634			return err;
6635	}
6636
6637	netif_device_attach(netdev);
6638
6639	return 0;
6640}
6641#endif
6642
6643static void igb_shutdown(struct pci_dev *pdev)
6644{
6645	bool wake;
6646
6647	__igb_shutdown(pdev, &wake);
6648
6649	if (system_state == SYSTEM_POWER_OFF) {
6650		pci_wake_from_d3(pdev, wake);
6651		pci_set_power_state(pdev, PCI_D3hot);
6652	}
6653}
6654
6655#ifdef CONFIG_NET_POLL_CONTROLLER
6656/*
6657 * Polling 'interrupt' - used by things like netconsole to send skbs
6658 * without having to re-enable interrupts. It's not called while
6659 * the interrupt routine is executing.
6660 */
6661static void igb_netpoll(struct net_device *netdev)
6662{
6663	struct igb_adapter *adapter = netdev_priv(netdev);
6664	struct e1000_hw *hw = &adapter->hw;
6665	struct igb_q_vector *q_vector;
6666	int i;
6667
6668	for (i = 0; i < adapter->num_q_vectors; i++) {
6669		q_vector = adapter->q_vector[i];
6670		if (adapter->msix_entries)
6671			wr32(E1000_EIMC, q_vector->eims_value);
6672		else
6673			igb_irq_disable(adapter);
6674		napi_schedule(&q_vector->napi);
6675	}
6676}
6677#endif /* CONFIG_NET_POLL_CONTROLLER */
6678
6679/**
6680 * igb_io_error_detected - called when PCI error is detected
6681 * @pdev: Pointer to PCI device
6682 * @state: The current pci connection state
6683 *
6684 * This function is called after a PCI bus error affecting
6685 * this device has been detected.
6686 */
6687static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
6688					      pci_channel_state_t state)
6689{
6690	struct net_device *netdev = pci_get_drvdata(pdev);
6691	struct igb_adapter *adapter = netdev_priv(netdev);
6692
6693	netif_device_detach(netdev);
6694
6695	if (state == pci_channel_io_perm_failure)
6696		return PCI_ERS_RESULT_DISCONNECT;
6697
6698	if (netif_running(netdev))
6699		igb_down(adapter);
6700	pci_disable_device(pdev);
6701
6702	/* Request a slot slot reset. */
6703	return PCI_ERS_RESULT_NEED_RESET;
6704}
6705
6706/**
6707 * igb_io_slot_reset - called after the pci bus has been reset.
6708 * @pdev: Pointer to PCI device
6709 *
6710 * Restart the card from scratch, as if from a cold-boot. Implementation
6711 * resembles the first-half of the igb_resume routine.
6712 */
6713static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
6714{
6715	struct net_device *netdev = pci_get_drvdata(pdev);
6716	struct igb_adapter *adapter = netdev_priv(netdev);
6717	struct e1000_hw *hw = &adapter->hw;
6718	pci_ers_result_t result;
6719	int err;
6720
6721	if (pci_enable_device_mem(pdev)) {
6722		dev_err(&pdev->dev,
6723			"Cannot re-enable PCI device after reset.\n");
6724		result = PCI_ERS_RESULT_DISCONNECT;
6725	} else {
6726		pci_set_master(pdev);
6727		pci_restore_state(pdev);
6728		pci_save_state(pdev);
6729
6730		pci_enable_wake(pdev, PCI_D3hot, 0);
6731		pci_enable_wake(pdev, PCI_D3cold, 0);
6732
6733		igb_reset(adapter);
6734		wr32(E1000_WUS, ~0);
6735		result = PCI_ERS_RESULT_RECOVERED;
6736	}
6737
6738	err = pci_cleanup_aer_uncorrect_error_status(pdev);
6739	if (err) {
6740		dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
6741		        "failed 0x%0x\n", err);
6742		/* non-fatal, continue */
6743	}
6744
6745	return result;
6746}
6747
6748/**
6749 * igb_io_resume - called when traffic can start flowing again.
6750 * @pdev: Pointer to PCI device
6751 *
6752 * This callback is called when the error recovery driver tells us that
6753 * its OK to resume normal operation. Implementation resembles the
6754 * second-half of the igb_resume routine.
6755 */
6756static void igb_io_resume(struct pci_dev *pdev)
6757{
6758	struct net_device *netdev = pci_get_drvdata(pdev);
6759	struct igb_adapter *adapter = netdev_priv(netdev);
6760
6761	if (netif_running(netdev)) {
6762		if (igb_up(adapter)) {
6763			dev_err(&pdev->dev, "igb_up failed after reset\n");
6764			return;
6765		}
6766	}
6767
6768	netif_device_attach(netdev);
6769
6770	/* let the f/w know that the h/w is now under the control of the
6771	 * driver. */
6772	igb_get_hw_control(adapter);
6773}
6774
6775static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
6776                             u8 qsel)
6777{
6778	u32 rar_low, rar_high;
6779	struct e1000_hw *hw = &adapter->hw;
6780
6781	/* HW expects these in little endian so we reverse the byte order
6782	 * from network order (big endian) to little endian
6783	 */
6784	rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
6785	          ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
6786	rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
6787
6788	/* Indicate to hardware the Address is Valid. */
6789	rar_high |= E1000_RAH_AV;
6790
6791	if (hw->mac.type == e1000_82575)
6792		rar_high |= E1000_RAH_POOL_1 * qsel;
6793	else
6794		rar_high |= E1000_RAH_POOL_1 << qsel;
6795
6796	wr32(E1000_RAL(index), rar_low);
6797	wrfl();
6798	wr32(E1000_RAH(index), rar_high);
6799	wrfl();
6800}
6801
6802static int igb_set_vf_mac(struct igb_adapter *adapter,
6803                          int vf, unsigned char *mac_addr)
6804{
6805	struct e1000_hw *hw = &adapter->hw;
6806	/* VF MAC addresses start at end of receive addresses and moves
6807	 * torwards the first, as a result a collision should not be possible */
6808	int rar_entry = hw->mac.rar_entry_count - (vf + 1);
6809
6810	memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
6811
6812	igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
6813
6814	return 0;
6815}
6816
6817static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
6818{
6819	struct igb_adapter *adapter = netdev_priv(netdev);
6820	if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
6821		return -EINVAL;
6822	adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
6823	dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
6824	dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
6825				      " change effective.");
6826	if (test_bit(__IGB_DOWN, &adapter->state)) {
6827		dev_warn(&adapter->pdev->dev, "The VF MAC address has been set,"
6828			 " but the PF device is not up.\n");
6829		dev_warn(&adapter->pdev->dev, "Bring the PF device up before"
6830			 " attempting to use the VF device.\n");
6831	}
6832	return igb_set_vf_mac(adapter, vf, mac);
6833}
6834
6835static int igb_link_mbps(int internal_link_speed)
6836{
6837	switch (internal_link_speed) {
6838	case SPEED_100:
6839		return 100;
6840	case SPEED_1000:
6841		return 1000;
6842	default:
6843		return 0;
6844	}
6845}
6846
6847static void igb_set_vf_rate_limit(struct e1000_hw *hw, int vf, int tx_rate,
6848				  int link_speed)
6849{
6850	int rf_dec, rf_int;
6851	u32 bcnrc_val;
6852
6853	if (tx_rate != 0) {
6854		/* Calculate the rate factor values to set */
6855		rf_int = link_speed / tx_rate;
6856		rf_dec = (link_speed - (rf_int * tx_rate));
6857		rf_dec = (rf_dec * (1<<E1000_RTTBCNRC_RF_INT_SHIFT)) / tx_rate;
6858
6859		bcnrc_val = E1000_RTTBCNRC_RS_ENA;
6860		bcnrc_val |= ((rf_int<<E1000_RTTBCNRC_RF_INT_SHIFT) &
6861		               E1000_RTTBCNRC_RF_INT_MASK);
6862		bcnrc_val |= (rf_dec & E1000_RTTBCNRC_RF_DEC_MASK);
6863	} else {
6864		bcnrc_val = 0;
6865	}
6866
6867	wr32(E1000_RTTDQSEL, vf); /* vf X uses queue X */
6868	wr32(E1000_RTTBCNRC, bcnrc_val);
6869}
6870
6871static void igb_check_vf_rate_limit(struct igb_adapter *adapter)
6872{
6873	int actual_link_speed, i;
6874	bool reset_rate = false;
6875
6876	/* VF TX rate limit was not set or not supported */
6877	if ((adapter->vf_rate_link_speed == 0) ||
6878	    (adapter->hw.mac.type != e1000_82576))
6879		return;
6880
6881	actual_link_speed = igb_link_mbps(adapter->link_speed);
6882	if (actual_link_speed != adapter->vf_rate_link_speed) {
6883		reset_rate = true;
6884		adapter->vf_rate_link_speed = 0;
6885		dev_info(&adapter->pdev->dev,
6886		         "Link speed has been changed. VF Transmit "
6887		         "rate is disabled\n");
6888	}
6889
6890	for (i = 0; i < adapter->vfs_allocated_count; i++) {
6891		if (reset_rate)
6892			adapter->vf_data[i].tx_rate = 0;
6893
6894		igb_set_vf_rate_limit(&adapter->hw, i,
6895		                      adapter->vf_data[i].tx_rate,
6896		                      actual_link_speed);
6897	}
6898}
6899
6900static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
6901{
6902	struct igb_adapter *adapter = netdev_priv(netdev);
6903	struct e1000_hw *hw = &adapter->hw;
6904	int actual_link_speed;
6905
6906	if (hw->mac.type != e1000_82576)
6907		return -EOPNOTSUPP;
6908
6909	actual_link_speed = igb_link_mbps(adapter->link_speed);
6910	if ((vf >= adapter->vfs_allocated_count) ||
6911	    (!(rd32(E1000_STATUS) & E1000_STATUS_LU)) ||
6912	    (tx_rate < 0) || (tx_rate > actual_link_speed))
6913		return -EINVAL;
6914
6915	adapter->vf_rate_link_speed = actual_link_speed;
6916	adapter->vf_data[vf].tx_rate = (u16)tx_rate;
6917	igb_set_vf_rate_limit(hw, vf, tx_rate, actual_link_speed);
6918
6919	return 0;
6920}
6921
6922static int igb_ndo_get_vf_config(struct net_device *netdev,
6923				 int vf, struct ifla_vf_info *ivi)
6924{
6925	struct igb_adapter *adapter = netdev_priv(netdev);
6926	if (vf >= adapter->vfs_allocated_count)
6927		return -EINVAL;
6928	ivi->vf = vf;
6929	memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
6930	ivi->tx_rate = adapter->vf_data[vf].tx_rate;
6931	ivi->vlan = adapter->vf_data[vf].pf_vlan;
6932	ivi->qos = adapter->vf_data[vf].pf_qos;
6933	return 0;
6934}
6935
6936static void igb_vmm_control(struct igb_adapter *adapter)
6937{
6938	struct e1000_hw *hw = &adapter->hw;
6939	u32 reg;
6940
6941	switch (hw->mac.type) {
6942	case e1000_82575:
6943	default:
6944		/* replication is not supported for 82575 */
6945		return;
6946	case e1000_82576:
6947		/* notify HW that the MAC is adding vlan tags */
6948		reg = rd32(E1000_DTXCTL);
6949		reg |= E1000_DTXCTL_VLAN_ADDED;
6950		wr32(E1000_DTXCTL, reg);
6951	case e1000_82580:
6952		/* enable replication vlan tag stripping */
6953		reg = rd32(E1000_RPLOLR);
6954		reg |= E1000_RPLOLR_STRVLAN;
6955		wr32(E1000_RPLOLR, reg);
6956	case e1000_i350:
6957		/* none of the above registers are supported by i350 */
6958		break;
6959	}
6960
6961	if (adapter->vfs_allocated_count) {
6962		igb_vmdq_set_loopback_pf(hw, true);
6963		igb_vmdq_set_replication_pf(hw, true);
6964		igb_vmdq_set_anti_spoofing_pf(hw, true,
6965						adapter->vfs_allocated_count);
6966	} else {
6967		igb_vmdq_set_loopback_pf(hw, false);
6968		igb_vmdq_set_replication_pf(hw, false);
6969	}
6970}
6971
6972/* igb_main.c */
6973