igb_main.c revision 3ceb90fd4898853cdac43084f0c6ee7270cb15f3
1/*******************************************************************************
2
3  Intel(R) Gigabit Ethernet Linux driver
4  Copyright(c) 2007-2011 Intel Corporation.
5
6  This program is free software; you can redistribute it and/or modify it
7  under the terms and conditions of the GNU General Public License,
8  version 2, as published by the Free Software Foundation.
9
10  This program is distributed in the hope it will be useful, but WITHOUT
11  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13  more details.
14
15  You should have received a copy of the GNU General Public License along with
16  this program; if not, write to the Free Software Foundation, Inc.,
17  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18
19  The full GNU General Public License is included in this distribution in
20  the file called "COPYING".
21
22  Contact Information:
23  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
25
26*******************************************************************************/
27
28#include <linux/module.h>
29#include <linux/types.h>
30#include <linux/init.h>
31#include <linux/bitops.h>
32#include <linux/vmalloc.h>
33#include <linux/pagemap.h>
34#include <linux/netdevice.h>
35#include <linux/ipv6.h>
36#include <linux/slab.h>
37#include <net/checksum.h>
38#include <net/ip6_checksum.h>
39#include <linux/net_tstamp.h>
40#include <linux/mii.h>
41#include <linux/ethtool.h>
42#include <linux/if.h>
43#include <linux/if_vlan.h>
44#include <linux/pci.h>
45#include <linux/pci-aspm.h>
46#include <linux/delay.h>
47#include <linux/interrupt.h>
48#include <linux/ip.h>
49#include <linux/tcp.h>
50#include <linux/sctp.h>
51#include <linux/if_ether.h>
52#include <linux/aer.h>
53#include <linux/prefetch.h>
54#ifdef CONFIG_IGB_DCA
55#include <linux/dca.h>
56#endif
57#include "igb.h"
58
59#define MAJ 3
60#define MIN 0
61#define BUILD 6
62#define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \
63__stringify(BUILD) "-k"
64char igb_driver_name[] = "igb";
65char igb_driver_version[] = DRV_VERSION;
66static const char igb_driver_string[] =
67				"Intel(R) Gigabit Ethernet Network Driver";
68static const char igb_copyright[] = "Copyright (c) 2007-2011 Intel Corporation.";
69
70static const struct e1000_info *igb_info_tbl[] = {
71	[board_82575] = &e1000_82575_info,
72};
73
74static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
75	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER), board_82575 },
76	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER), board_82575 },
77	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES), board_82575 },
78	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII), board_82575 },
79	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
80	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
81	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_QUAD_FIBER), board_82575 },
82	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
83	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
84	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
85	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SGMII), board_82575 },
86	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SERDES), board_82575 },
87	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_BACKPLANE), board_82575 },
88	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SFP), board_82575 },
89	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
90	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
91	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
92	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
93	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
94	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
95	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2), board_82575 },
96	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
97	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
98	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
99	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
100	/* required last entry */
101	{0, }
102};
103
104MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
105
106void igb_reset(struct igb_adapter *);
107static int igb_setup_all_tx_resources(struct igb_adapter *);
108static int igb_setup_all_rx_resources(struct igb_adapter *);
109static void igb_free_all_tx_resources(struct igb_adapter *);
110static void igb_free_all_rx_resources(struct igb_adapter *);
111static void igb_setup_mrqc(struct igb_adapter *);
112static int igb_probe(struct pci_dev *, const struct pci_device_id *);
113static void __devexit igb_remove(struct pci_dev *pdev);
114static void igb_init_hw_timer(struct igb_adapter *adapter);
115static int igb_sw_init(struct igb_adapter *);
116static int igb_open(struct net_device *);
117static int igb_close(struct net_device *);
118static void igb_configure_tx(struct igb_adapter *);
119static void igb_configure_rx(struct igb_adapter *);
120static void igb_clean_all_tx_rings(struct igb_adapter *);
121static void igb_clean_all_rx_rings(struct igb_adapter *);
122static void igb_clean_tx_ring(struct igb_ring *);
123static void igb_clean_rx_ring(struct igb_ring *);
124static void igb_set_rx_mode(struct net_device *);
125static void igb_update_phy_info(unsigned long);
126static void igb_watchdog(unsigned long);
127static void igb_watchdog_task(struct work_struct *);
128static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, struct net_device *);
129static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *dev,
130						 struct rtnl_link_stats64 *stats);
131static int igb_change_mtu(struct net_device *, int);
132static int igb_set_mac(struct net_device *, void *);
133static void igb_set_uta(struct igb_adapter *adapter);
134static irqreturn_t igb_intr(int irq, void *);
135static irqreturn_t igb_intr_msi(int irq, void *);
136static irqreturn_t igb_msix_other(int irq, void *);
137static irqreturn_t igb_msix_ring(int irq, void *);
138#ifdef CONFIG_IGB_DCA
139static void igb_update_dca(struct igb_q_vector *);
140static void igb_setup_dca(struct igb_adapter *);
141#endif /* CONFIG_IGB_DCA */
142static int igb_poll(struct napi_struct *, int);
143static bool igb_clean_tx_irq(struct igb_q_vector *);
144static bool igb_clean_rx_irq(struct igb_q_vector *, int);
145static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
146static void igb_tx_timeout(struct net_device *);
147static void igb_reset_task(struct work_struct *);
148static void igb_vlan_mode(struct net_device *netdev, u32 features);
149static void igb_vlan_rx_add_vid(struct net_device *, u16);
150static void igb_vlan_rx_kill_vid(struct net_device *, u16);
151static void igb_restore_vlan(struct igb_adapter *);
152static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
153static void igb_ping_all_vfs(struct igb_adapter *);
154static void igb_msg_task(struct igb_adapter *);
155static void igb_vmm_control(struct igb_adapter *);
156static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
157static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
158static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
159static int igb_ndo_set_vf_vlan(struct net_device *netdev,
160			       int vf, u16 vlan, u8 qos);
161static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
162static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
163				 struct ifla_vf_info *ivi);
164static void igb_check_vf_rate_limit(struct igb_adapter *);
165
166#ifdef CONFIG_PM
167static int igb_suspend(struct pci_dev *, pm_message_t);
168static int igb_resume(struct pci_dev *);
169#endif
170static void igb_shutdown(struct pci_dev *);
171#ifdef CONFIG_IGB_DCA
172static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
173static struct notifier_block dca_notifier = {
174	.notifier_call	= igb_notify_dca,
175	.next		= NULL,
176	.priority	= 0
177};
178#endif
179#ifdef CONFIG_NET_POLL_CONTROLLER
180/* for netdump / net console */
181static void igb_netpoll(struct net_device *);
182#endif
183#ifdef CONFIG_PCI_IOV
184static unsigned int max_vfs = 0;
185module_param(max_vfs, uint, 0);
186MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
187                 "per physical function");
188#endif /* CONFIG_PCI_IOV */
189
190static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
191		     pci_channel_state_t);
192static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
193static void igb_io_resume(struct pci_dev *);
194
195static struct pci_error_handlers igb_err_handler = {
196	.error_detected = igb_io_error_detected,
197	.slot_reset = igb_io_slot_reset,
198	.resume = igb_io_resume,
199};
200
201
202static struct pci_driver igb_driver = {
203	.name     = igb_driver_name,
204	.id_table = igb_pci_tbl,
205	.probe    = igb_probe,
206	.remove   = __devexit_p(igb_remove),
207#ifdef CONFIG_PM
208	/* Power Management Hooks */
209	.suspend  = igb_suspend,
210	.resume   = igb_resume,
211#endif
212	.shutdown = igb_shutdown,
213	.err_handler = &igb_err_handler
214};
215
216MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
217MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
218MODULE_LICENSE("GPL");
219MODULE_VERSION(DRV_VERSION);
220
221struct igb_reg_info {
222	u32 ofs;
223	char *name;
224};
225
226static const struct igb_reg_info igb_reg_info_tbl[] = {
227
228	/* General Registers */
229	{E1000_CTRL, "CTRL"},
230	{E1000_STATUS, "STATUS"},
231	{E1000_CTRL_EXT, "CTRL_EXT"},
232
233	/* Interrupt Registers */
234	{E1000_ICR, "ICR"},
235
236	/* RX Registers */
237	{E1000_RCTL, "RCTL"},
238	{E1000_RDLEN(0), "RDLEN"},
239	{E1000_RDH(0), "RDH"},
240	{E1000_RDT(0), "RDT"},
241	{E1000_RXDCTL(0), "RXDCTL"},
242	{E1000_RDBAL(0), "RDBAL"},
243	{E1000_RDBAH(0), "RDBAH"},
244
245	/* TX Registers */
246	{E1000_TCTL, "TCTL"},
247	{E1000_TDBAL(0), "TDBAL"},
248	{E1000_TDBAH(0), "TDBAH"},
249	{E1000_TDLEN(0), "TDLEN"},
250	{E1000_TDH(0), "TDH"},
251	{E1000_TDT(0), "TDT"},
252	{E1000_TXDCTL(0), "TXDCTL"},
253	{E1000_TDFH, "TDFH"},
254	{E1000_TDFT, "TDFT"},
255	{E1000_TDFHS, "TDFHS"},
256	{E1000_TDFPC, "TDFPC"},
257
258	/* List Terminator */
259	{}
260};
261
262/*
263 * igb_regdump - register printout routine
264 */
265static void igb_regdump(struct e1000_hw *hw, struct igb_reg_info *reginfo)
266{
267	int n = 0;
268	char rname[16];
269	u32 regs[8];
270
271	switch (reginfo->ofs) {
272	case E1000_RDLEN(0):
273		for (n = 0; n < 4; n++)
274			regs[n] = rd32(E1000_RDLEN(n));
275		break;
276	case E1000_RDH(0):
277		for (n = 0; n < 4; n++)
278			regs[n] = rd32(E1000_RDH(n));
279		break;
280	case E1000_RDT(0):
281		for (n = 0; n < 4; n++)
282			regs[n] = rd32(E1000_RDT(n));
283		break;
284	case E1000_RXDCTL(0):
285		for (n = 0; n < 4; n++)
286			regs[n] = rd32(E1000_RXDCTL(n));
287		break;
288	case E1000_RDBAL(0):
289		for (n = 0; n < 4; n++)
290			regs[n] = rd32(E1000_RDBAL(n));
291		break;
292	case E1000_RDBAH(0):
293		for (n = 0; n < 4; n++)
294			regs[n] = rd32(E1000_RDBAH(n));
295		break;
296	case E1000_TDBAL(0):
297		for (n = 0; n < 4; n++)
298			regs[n] = rd32(E1000_RDBAL(n));
299		break;
300	case E1000_TDBAH(0):
301		for (n = 0; n < 4; n++)
302			regs[n] = rd32(E1000_TDBAH(n));
303		break;
304	case E1000_TDLEN(0):
305		for (n = 0; n < 4; n++)
306			regs[n] = rd32(E1000_TDLEN(n));
307		break;
308	case E1000_TDH(0):
309		for (n = 0; n < 4; n++)
310			regs[n] = rd32(E1000_TDH(n));
311		break;
312	case E1000_TDT(0):
313		for (n = 0; n < 4; n++)
314			regs[n] = rd32(E1000_TDT(n));
315		break;
316	case E1000_TXDCTL(0):
317		for (n = 0; n < 4; n++)
318			regs[n] = rd32(E1000_TXDCTL(n));
319		break;
320	default:
321		printk(KERN_INFO "%-15s %08x\n",
322			reginfo->name, rd32(reginfo->ofs));
323		return;
324	}
325
326	snprintf(rname, 16, "%s%s", reginfo->name, "[0-3]");
327	printk(KERN_INFO "%-15s ", rname);
328	for (n = 0; n < 4; n++)
329		printk(KERN_CONT "%08x ", regs[n]);
330	printk(KERN_CONT "\n");
331}
332
333/*
334 * igb_dump - Print registers, tx-rings and rx-rings
335 */
336static void igb_dump(struct igb_adapter *adapter)
337{
338	struct net_device *netdev = adapter->netdev;
339	struct e1000_hw *hw = &adapter->hw;
340	struct igb_reg_info *reginfo;
341	struct igb_ring *tx_ring;
342	union e1000_adv_tx_desc *tx_desc;
343	struct my_u0 { u64 a; u64 b; } *u0;
344	struct igb_ring *rx_ring;
345	union e1000_adv_rx_desc *rx_desc;
346	u32 staterr;
347	u16 i, n;
348
349	if (!netif_msg_hw(adapter))
350		return;
351
352	/* Print netdevice Info */
353	if (netdev) {
354		dev_info(&adapter->pdev->dev, "Net device Info\n");
355		printk(KERN_INFO "Device Name     state            "
356			"trans_start      last_rx\n");
357		printk(KERN_INFO "%-15s %016lX %016lX %016lX\n",
358		netdev->name,
359		netdev->state,
360		netdev->trans_start,
361		netdev->last_rx);
362	}
363
364	/* Print Registers */
365	dev_info(&adapter->pdev->dev, "Register Dump\n");
366	printk(KERN_INFO " Register Name   Value\n");
367	for (reginfo = (struct igb_reg_info *)igb_reg_info_tbl;
368	     reginfo->name; reginfo++) {
369		igb_regdump(hw, reginfo);
370	}
371
372	/* Print TX Ring Summary */
373	if (!netdev || !netif_running(netdev))
374		goto exit;
375
376	dev_info(&adapter->pdev->dev, "TX Rings Summary\n");
377	printk(KERN_INFO "Queue [NTU] [NTC] [bi(ntc)->dma  ]"
378		" leng ntw timestamp\n");
379	for (n = 0; n < adapter->num_tx_queues; n++) {
380		struct igb_tx_buffer *buffer_info;
381		tx_ring = adapter->tx_ring[n];
382		buffer_info = &tx_ring->tx_buffer_info[tx_ring->next_to_clean];
383		printk(KERN_INFO " %5d %5X %5X %016llX %04X %p %016llX\n",
384			   n, tx_ring->next_to_use, tx_ring->next_to_clean,
385			   (u64)buffer_info->dma,
386			   buffer_info->length,
387			   buffer_info->next_to_watch,
388			   (u64)buffer_info->time_stamp);
389	}
390
391	/* Print TX Rings */
392	if (!netif_msg_tx_done(adapter))
393		goto rx_ring_summary;
394
395	dev_info(&adapter->pdev->dev, "TX Rings Dump\n");
396
397	/* Transmit Descriptor Formats
398	 *
399	 * Advanced Transmit Descriptor
400	 *   +--------------------------------------------------------------+
401	 * 0 |         Buffer Address [63:0]                                |
402	 *   +--------------------------------------------------------------+
403	 * 8 | PAYLEN  | PORTS  |CC|IDX | STA | DCMD  |DTYP|MAC|RSV| DTALEN |
404	 *   +--------------------------------------------------------------+
405	 *   63      46 45    40 39 38 36 35 32 31   24             15       0
406	 */
407
408	for (n = 0; n < adapter->num_tx_queues; n++) {
409		tx_ring = adapter->tx_ring[n];
410		printk(KERN_INFO "------------------------------------\n");
411		printk(KERN_INFO "TX QUEUE INDEX = %d\n", tx_ring->queue_index);
412		printk(KERN_INFO "------------------------------------\n");
413		printk(KERN_INFO "T [desc]     [address 63:0  ] "
414			"[PlPOCIStDDM Ln] [bi->dma       ] "
415			"leng  ntw timestamp        bi->skb\n");
416
417		for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) {
418			struct igb_tx_buffer *buffer_info;
419			tx_desc = IGB_TX_DESC(tx_ring, i);
420			buffer_info = &tx_ring->tx_buffer_info[i];
421			u0 = (struct my_u0 *)tx_desc;
422			printk(KERN_INFO "T [0x%03X]    %016llX %016llX %016llX"
423				" %04X  %p %016llX %p", i,
424				le64_to_cpu(u0->a),
425				le64_to_cpu(u0->b),
426				(u64)buffer_info->dma,
427				buffer_info->length,
428				buffer_info->next_to_watch,
429				(u64)buffer_info->time_stamp,
430				buffer_info->skb);
431			if (i == tx_ring->next_to_use &&
432				i == tx_ring->next_to_clean)
433				printk(KERN_CONT " NTC/U\n");
434			else if (i == tx_ring->next_to_use)
435				printk(KERN_CONT " NTU\n");
436			else if (i == tx_ring->next_to_clean)
437				printk(KERN_CONT " NTC\n");
438			else
439				printk(KERN_CONT "\n");
440
441			if (netif_msg_pktdata(adapter) && buffer_info->dma != 0)
442				print_hex_dump(KERN_INFO, "",
443					DUMP_PREFIX_ADDRESS,
444					16, 1, phys_to_virt(buffer_info->dma),
445					buffer_info->length, true);
446		}
447	}
448
449	/* Print RX Rings Summary */
450rx_ring_summary:
451	dev_info(&adapter->pdev->dev, "RX Rings Summary\n");
452	printk(KERN_INFO "Queue [NTU] [NTC]\n");
453	for (n = 0; n < adapter->num_rx_queues; n++) {
454		rx_ring = adapter->rx_ring[n];
455		printk(KERN_INFO " %5d %5X %5X\n", n,
456			   rx_ring->next_to_use, rx_ring->next_to_clean);
457	}
458
459	/* Print RX Rings */
460	if (!netif_msg_rx_status(adapter))
461		goto exit;
462
463	dev_info(&adapter->pdev->dev, "RX Rings Dump\n");
464
465	/* Advanced Receive Descriptor (Read) Format
466	 *    63                                           1        0
467	 *    +-----------------------------------------------------+
468	 *  0 |       Packet Buffer Address [63:1]           |A0/NSE|
469	 *    +----------------------------------------------+------+
470	 *  8 |       Header Buffer Address [63:1]           |  DD  |
471	 *    +-----------------------------------------------------+
472	 *
473	 *
474	 * Advanced Receive Descriptor (Write-Back) Format
475	 *
476	 *   63       48 47    32 31  30      21 20 17 16   4 3     0
477	 *   +------------------------------------------------------+
478	 * 0 | Packet     IP     |SPH| HDR_LEN   | RSV|Packet|  RSS |
479	 *   | Checksum   Ident  |   |           |    | Type | Type |
480	 *   +------------------------------------------------------+
481	 * 8 | VLAN Tag | Length | Extended Error | Extended Status |
482	 *   +------------------------------------------------------+
483	 *   63       48 47    32 31            20 19               0
484	 */
485
486	for (n = 0; n < adapter->num_rx_queues; n++) {
487		rx_ring = adapter->rx_ring[n];
488		printk(KERN_INFO "------------------------------------\n");
489		printk(KERN_INFO "RX QUEUE INDEX = %d\n", rx_ring->queue_index);
490		printk(KERN_INFO "------------------------------------\n");
491		printk(KERN_INFO "R  [desc]      [ PktBuf     A0] "
492			"[  HeadBuf   DD] [bi->dma       ] [bi->skb] "
493			"<-- Adv Rx Read format\n");
494		printk(KERN_INFO "RWB[desc]      [PcsmIpSHl PtRs] "
495			"[vl er S cks ln] ---------------- [bi->skb] "
496			"<-- Adv Rx Write-Back format\n");
497
498		for (i = 0; i < rx_ring->count; i++) {
499			struct igb_rx_buffer *buffer_info;
500			buffer_info = &rx_ring->rx_buffer_info[i];
501			rx_desc = IGB_RX_DESC(rx_ring, i);
502			u0 = (struct my_u0 *)rx_desc;
503			staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
504			if (staterr & E1000_RXD_STAT_DD) {
505				/* Descriptor Done */
506				printk(KERN_INFO "RWB[0x%03X]     %016llX "
507					"%016llX ---------------- %p", i,
508					le64_to_cpu(u0->a),
509					le64_to_cpu(u0->b),
510					buffer_info->skb);
511			} else {
512				printk(KERN_INFO "R  [0x%03X]     %016llX "
513					"%016llX %016llX %p", i,
514					le64_to_cpu(u0->a),
515					le64_to_cpu(u0->b),
516					(u64)buffer_info->dma,
517					buffer_info->skb);
518
519				if (netif_msg_pktdata(adapter)) {
520					print_hex_dump(KERN_INFO, "",
521						DUMP_PREFIX_ADDRESS,
522						16, 1,
523						phys_to_virt(buffer_info->dma),
524						IGB_RX_HDR_LEN, true);
525					print_hex_dump(KERN_INFO, "",
526					  DUMP_PREFIX_ADDRESS,
527					  16, 1,
528					  phys_to_virt(
529					    buffer_info->page_dma +
530					    buffer_info->page_offset),
531					  PAGE_SIZE/2, true);
532				}
533			}
534
535			if (i == rx_ring->next_to_use)
536				printk(KERN_CONT " NTU\n");
537			else if (i == rx_ring->next_to_clean)
538				printk(KERN_CONT " NTC\n");
539			else
540				printk(KERN_CONT "\n");
541
542		}
543	}
544
545exit:
546	return;
547}
548
549
550/**
551 * igb_read_clock - read raw cycle counter (to be used by time counter)
552 */
553static cycle_t igb_read_clock(const struct cyclecounter *tc)
554{
555	struct igb_adapter *adapter =
556		container_of(tc, struct igb_adapter, cycles);
557	struct e1000_hw *hw = &adapter->hw;
558	u64 stamp = 0;
559	int shift = 0;
560
561	/*
562	 * The timestamp latches on lowest register read. For the 82580
563	 * the lowest register is SYSTIMR instead of SYSTIML.  However we never
564	 * adjusted TIMINCA so SYSTIMR will just read as all 0s so ignore it.
565	 */
566	if (hw->mac.type == e1000_82580) {
567		stamp = rd32(E1000_SYSTIMR) >> 8;
568		shift = IGB_82580_TSYNC_SHIFT;
569	}
570
571	stamp |= (u64)rd32(E1000_SYSTIML) << shift;
572	stamp |= (u64)rd32(E1000_SYSTIMH) << (shift + 32);
573	return stamp;
574}
575
576/**
577 * igb_get_hw_dev - return device
578 * used by hardware layer to print debugging information
579 **/
580struct net_device *igb_get_hw_dev(struct e1000_hw *hw)
581{
582	struct igb_adapter *adapter = hw->back;
583	return adapter->netdev;
584}
585
586/**
587 * igb_init_module - Driver Registration Routine
588 *
589 * igb_init_module is the first routine called when the driver is
590 * loaded. All it does is register with the PCI subsystem.
591 **/
592static int __init igb_init_module(void)
593{
594	int ret;
595	printk(KERN_INFO "%s - version %s\n",
596	       igb_driver_string, igb_driver_version);
597
598	printk(KERN_INFO "%s\n", igb_copyright);
599
600#ifdef CONFIG_IGB_DCA
601	dca_register_notify(&dca_notifier);
602#endif
603	ret = pci_register_driver(&igb_driver);
604	return ret;
605}
606
607module_init(igb_init_module);
608
609/**
610 * igb_exit_module - Driver Exit Cleanup Routine
611 *
612 * igb_exit_module is called just before the driver is removed
613 * from memory.
614 **/
615static void __exit igb_exit_module(void)
616{
617#ifdef CONFIG_IGB_DCA
618	dca_unregister_notify(&dca_notifier);
619#endif
620	pci_unregister_driver(&igb_driver);
621}
622
623module_exit(igb_exit_module);
624
625#define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
626/**
627 * igb_cache_ring_register - Descriptor ring to register mapping
628 * @adapter: board private structure to initialize
629 *
630 * Once we know the feature-set enabled for the device, we'll cache
631 * the register offset the descriptor ring is assigned to.
632 **/
633static void igb_cache_ring_register(struct igb_adapter *adapter)
634{
635	int i = 0, j = 0;
636	u32 rbase_offset = adapter->vfs_allocated_count;
637
638	switch (adapter->hw.mac.type) {
639	case e1000_82576:
640		/* The queues are allocated for virtualization such that VF 0
641		 * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
642		 * In order to avoid collision we start at the first free queue
643		 * and continue consuming queues in the same sequence
644		 */
645		if (adapter->vfs_allocated_count) {
646			for (; i < adapter->rss_queues; i++)
647				adapter->rx_ring[i]->reg_idx = rbase_offset +
648				                               Q_IDX_82576(i);
649		}
650	case e1000_82575:
651	case e1000_82580:
652	case e1000_i350:
653	default:
654		for (; i < adapter->num_rx_queues; i++)
655			adapter->rx_ring[i]->reg_idx = rbase_offset + i;
656		for (; j < adapter->num_tx_queues; j++)
657			adapter->tx_ring[j]->reg_idx = rbase_offset + j;
658		break;
659	}
660}
661
662static void igb_free_queues(struct igb_adapter *adapter)
663{
664	int i;
665
666	for (i = 0; i < adapter->num_tx_queues; i++) {
667		kfree(adapter->tx_ring[i]);
668		adapter->tx_ring[i] = NULL;
669	}
670	for (i = 0; i < adapter->num_rx_queues; i++) {
671		kfree(adapter->rx_ring[i]);
672		adapter->rx_ring[i] = NULL;
673	}
674	adapter->num_rx_queues = 0;
675	adapter->num_tx_queues = 0;
676}
677
678/**
679 * igb_alloc_queues - Allocate memory for all rings
680 * @adapter: board private structure to initialize
681 *
682 * We allocate one ring per queue at run-time since we don't know the
683 * number of queues at compile-time.
684 **/
685static int igb_alloc_queues(struct igb_adapter *adapter)
686{
687	struct igb_ring *ring;
688	int i;
689	int orig_node = adapter->node;
690
691	for (i = 0; i < adapter->num_tx_queues; i++) {
692		if (orig_node == -1) {
693			int cur_node = next_online_node(adapter->node);
694			if (cur_node == MAX_NUMNODES)
695				cur_node = first_online_node;
696			adapter->node = cur_node;
697		}
698		ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL,
699				    adapter->node);
700		if (!ring)
701			ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
702		if (!ring)
703			goto err;
704		ring->count = adapter->tx_ring_count;
705		ring->queue_index = i;
706		ring->dev = &adapter->pdev->dev;
707		ring->netdev = adapter->netdev;
708		ring->numa_node = adapter->node;
709		/* For 82575, context index must be unique per ring. */
710		if (adapter->hw.mac.type == e1000_82575)
711			set_bit(IGB_RING_FLAG_TX_CTX_IDX, &ring->flags);
712		adapter->tx_ring[i] = ring;
713	}
714	/* Restore the adapter's original node */
715	adapter->node = orig_node;
716
717	for (i = 0; i < adapter->num_rx_queues; i++) {
718		if (orig_node == -1) {
719			int cur_node = next_online_node(adapter->node);
720			if (cur_node == MAX_NUMNODES)
721				cur_node = first_online_node;
722			adapter->node = cur_node;
723		}
724		ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL,
725				    adapter->node);
726		if (!ring)
727			ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
728		if (!ring)
729			goto err;
730		ring->count = adapter->rx_ring_count;
731		ring->queue_index = i;
732		ring->dev = &adapter->pdev->dev;
733		ring->netdev = adapter->netdev;
734		ring->numa_node = adapter->node;
735		/* set flag indicating ring supports SCTP checksum offload */
736		if (adapter->hw.mac.type >= e1000_82576)
737			set_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags);
738		adapter->rx_ring[i] = ring;
739	}
740	/* Restore the adapter's original node */
741	adapter->node = orig_node;
742
743	igb_cache_ring_register(adapter);
744
745	return 0;
746
747err:
748	/* Restore the adapter's original node */
749	adapter->node = orig_node;
750	igb_free_queues(adapter);
751
752	return -ENOMEM;
753}
754
755/**
756 *  igb_write_ivar - configure ivar for given MSI-X vector
757 *  @hw: pointer to the HW structure
758 *  @msix_vector: vector number we are allocating to a given ring
759 *  @index: row index of IVAR register to write within IVAR table
760 *  @offset: column offset of in IVAR, should be multiple of 8
761 *
762 *  This function is intended to handle the writing of the IVAR register
763 *  for adapters 82576 and newer.  The IVAR table consists of 2 columns,
764 *  each containing an cause allocation for an Rx and Tx ring, and a
765 *  variable number of rows depending on the number of queues supported.
766 **/
767static void igb_write_ivar(struct e1000_hw *hw, int msix_vector,
768			   int index, int offset)
769{
770	u32 ivar = array_rd32(E1000_IVAR0, index);
771
772	/* clear any bits that are currently set */
773	ivar &= ~((u32)0xFF << offset);
774
775	/* write vector and valid bit */
776	ivar |= (msix_vector | E1000_IVAR_VALID) << offset;
777
778	array_wr32(E1000_IVAR0, index, ivar);
779}
780
781#define IGB_N0_QUEUE -1
782static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
783{
784	struct igb_adapter *adapter = q_vector->adapter;
785	struct e1000_hw *hw = &adapter->hw;
786	int rx_queue = IGB_N0_QUEUE;
787	int tx_queue = IGB_N0_QUEUE;
788	u32 msixbm = 0;
789
790	if (q_vector->rx.ring)
791		rx_queue = q_vector->rx.ring->reg_idx;
792	if (q_vector->tx.ring)
793		tx_queue = q_vector->tx.ring->reg_idx;
794
795	switch (hw->mac.type) {
796	case e1000_82575:
797		/* The 82575 assigns vectors using a bitmask, which matches the
798		   bitmask for the EICR/EIMS/EIMC registers.  To assign one
799		   or more queues to a vector, we write the appropriate bits
800		   into the MSIXBM register for that vector. */
801		if (rx_queue > IGB_N0_QUEUE)
802			msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
803		if (tx_queue > IGB_N0_QUEUE)
804			msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
805		if (!adapter->msix_entries && msix_vector == 0)
806			msixbm |= E1000_EIMS_OTHER;
807		array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
808		q_vector->eims_value = msixbm;
809		break;
810	case e1000_82576:
811		/*
812		 * 82576 uses a table that essentially consists of 2 columns
813		 * with 8 rows.  The ordering is column-major so we use the
814		 * lower 3 bits as the row index, and the 4th bit as the
815		 * column offset.
816		 */
817		if (rx_queue > IGB_N0_QUEUE)
818			igb_write_ivar(hw, msix_vector,
819				       rx_queue & 0x7,
820				       (rx_queue & 0x8) << 1);
821		if (tx_queue > IGB_N0_QUEUE)
822			igb_write_ivar(hw, msix_vector,
823				       tx_queue & 0x7,
824				       ((tx_queue & 0x8) << 1) + 8);
825		q_vector->eims_value = 1 << msix_vector;
826		break;
827	case e1000_82580:
828	case e1000_i350:
829		/*
830		 * On 82580 and newer adapters the scheme is similar to 82576
831		 * however instead of ordering column-major we have things
832		 * ordered row-major.  So we traverse the table by using
833		 * bit 0 as the column offset, and the remaining bits as the
834		 * row index.
835		 */
836		if (rx_queue > IGB_N0_QUEUE)
837			igb_write_ivar(hw, msix_vector,
838				       rx_queue >> 1,
839				       (rx_queue & 0x1) << 4);
840		if (tx_queue > IGB_N0_QUEUE)
841			igb_write_ivar(hw, msix_vector,
842				       tx_queue >> 1,
843				       ((tx_queue & 0x1) << 4) + 8);
844		q_vector->eims_value = 1 << msix_vector;
845		break;
846	default:
847		BUG();
848		break;
849	}
850
851	/* add q_vector eims value to global eims_enable_mask */
852	adapter->eims_enable_mask |= q_vector->eims_value;
853
854	/* configure q_vector to set itr on first interrupt */
855	q_vector->set_itr = 1;
856}
857
858/**
859 * igb_configure_msix - Configure MSI-X hardware
860 *
861 * igb_configure_msix sets up the hardware to properly
862 * generate MSI-X interrupts.
863 **/
864static void igb_configure_msix(struct igb_adapter *adapter)
865{
866	u32 tmp;
867	int i, vector = 0;
868	struct e1000_hw *hw = &adapter->hw;
869
870	adapter->eims_enable_mask = 0;
871
872	/* set vector for other causes, i.e. link changes */
873	switch (hw->mac.type) {
874	case e1000_82575:
875		tmp = rd32(E1000_CTRL_EXT);
876		/* enable MSI-X PBA support*/
877		tmp |= E1000_CTRL_EXT_PBA_CLR;
878
879		/* Auto-Mask interrupts upon ICR read. */
880		tmp |= E1000_CTRL_EXT_EIAME;
881		tmp |= E1000_CTRL_EXT_IRCA;
882
883		wr32(E1000_CTRL_EXT, tmp);
884
885		/* enable msix_other interrupt */
886		array_wr32(E1000_MSIXBM(0), vector++,
887		                      E1000_EIMS_OTHER);
888		adapter->eims_other = E1000_EIMS_OTHER;
889
890		break;
891
892	case e1000_82576:
893	case e1000_82580:
894	case e1000_i350:
895		/* Turn on MSI-X capability first, or our settings
896		 * won't stick.  And it will take days to debug. */
897		wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
898		                E1000_GPIE_PBA | E1000_GPIE_EIAME |
899		                E1000_GPIE_NSICR);
900
901		/* enable msix_other interrupt */
902		adapter->eims_other = 1 << vector;
903		tmp = (vector++ | E1000_IVAR_VALID) << 8;
904
905		wr32(E1000_IVAR_MISC, tmp);
906		break;
907	default:
908		/* do nothing, since nothing else supports MSI-X */
909		break;
910	} /* switch (hw->mac.type) */
911
912	adapter->eims_enable_mask |= adapter->eims_other;
913
914	for (i = 0; i < adapter->num_q_vectors; i++)
915		igb_assign_vector(adapter->q_vector[i], vector++);
916
917	wrfl();
918}
919
920/**
921 * igb_request_msix - Initialize MSI-X interrupts
922 *
923 * igb_request_msix allocates MSI-X vectors and requests interrupts from the
924 * kernel.
925 **/
926static int igb_request_msix(struct igb_adapter *adapter)
927{
928	struct net_device *netdev = adapter->netdev;
929	struct e1000_hw *hw = &adapter->hw;
930	int i, err = 0, vector = 0;
931
932	err = request_irq(adapter->msix_entries[vector].vector,
933	                  igb_msix_other, 0, netdev->name, adapter);
934	if (err)
935		goto out;
936	vector++;
937
938	for (i = 0; i < adapter->num_q_vectors; i++) {
939		struct igb_q_vector *q_vector = adapter->q_vector[i];
940
941		q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
942
943		if (q_vector->rx.ring && q_vector->tx.ring)
944			sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
945				q_vector->rx.ring->queue_index);
946		else if (q_vector->tx.ring)
947			sprintf(q_vector->name, "%s-tx-%u", netdev->name,
948				q_vector->tx.ring->queue_index);
949		else if (q_vector->rx.ring)
950			sprintf(q_vector->name, "%s-rx-%u", netdev->name,
951				q_vector->rx.ring->queue_index);
952		else
953			sprintf(q_vector->name, "%s-unused", netdev->name);
954
955		err = request_irq(adapter->msix_entries[vector].vector,
956		                  igb_msix_ring, 0, q_vector->name,
957		                  q_vector);
958		if (err)
959			goto out;
960		vector++;
961	}
962
963	igb_configure_msix(adapter);
964	return 0;
965out:
966	return err;
967}
968
969static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
970{
971	if (adapter->msix_entries) {
972		pci_disable_msix(adapter->pdev);
973		kfree(adapter->msix_entries);
974		adapter->msix_entries = NULL;
975	} else if (adapter->flags & IGB_FLAG_HAS_MSI) {
976		pci_disable_msi(adapter->pdev);
977	}
978}
979
980/**
981 * igb_free_q_vectors - Free memory allocated for interrupt vectors
982 * @adapter: board private structure to initialize
983 *
984 * This function frees the memory allocated to the q_vectors.  In addition if
985 * NAPI is enabled it will delete any references to the NAPI struct prior
986 * to freeing the q_vector.
987 **/
988static void igb_free_q_vectors(struct igb_adapter *adapter)
989{
990	int v_idx;
991
992	for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
993		struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
994		adapter->q_vector[v_idx] = NULL;
995		if (!q_vector)
996			continue;
997		netif_napi_del(&q_vector->napi);
998		kfree(q_vector);
999	}
1000	adapter->num_q_vectors = 0;
1001}
1002
1003/**
1004 * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
1005 *
1006 * This function resets the device so that it has 0 rx queues, tx queues, and
1007 * MSI-X interrupts allocated.
1008 */
1009static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
1010{
1011	igb_free_queues(adapter);
1012	igb_free_q_vectors(adapter);
1013	igb_reset_interrupt_capability(adapter);
1014}
1015
1016/**
1017 * igb_set_interrupt_capability - set MSI or MSI-X if supported
1018 *
1019 * Attempt to configure interrupts using the best available
1020 * capabilities of the hardware and kernel.
1021 **/
1022static int igb_set_interrupt_capability(struct igb_adapter *adapter)
1023{
1024	int err;
1025	int numvecs, i;
1026
1027	/* Number of supported queues. */
1028	adapter->num_rx_queues = adapter->rss_queues;
1029	if (adapter->vfs_allocated_count)
1030		adapter->num_tx_queues = 1;
1031	else
1032		adapter->num_tx_queues = adapter->rss_queues;
1033
1034	/* start with one vector for every rx queue */
1035	numvecs = adapter->num_rx_queues;
1036
1037	/* if tx handler is separate add 1 for every tx queue */
1038	if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
1039		numvecs += adapter->num_tx_queues;
1040
1041	/* store the number of vectors reserved for queues */
1042	adapter->num_q_vectors = numvecs;
1043
1044	/* add 1 vector for link status interrupts */
1045	numvecs++;
1046	adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
1047					GFP_KERNEL);
1048	if (!adapter->msix_entries)
1049		goto msi_only;
1050
1051	for (i = 0; i < numvecs; i++)
1052		adapter->msix_entries[i].entry = i;
1053
1054	err = pci_enable_msix(adapter->pdev,
1055			      adapter->msix_entries,
1056			      numvecs);
1057	if (err == 0)
1058		goto out;
1059
1060	igb_reset_interrupt_capability(adapter);
1061
1062	/* If we can't do MSI-X, try MSI */
1063msi_only:
1064#ifdef CONFIG_PCI_IOV
1065	/* disable SR-IOV for non MSI-X configurations */
1066	if (adapter->vf_data) {
1067		struct e1000_hw *hw = &adapter->hw;
1068		/* disable iov and allow time for transactions to clear */
1069		pci_disable_sriov(adapter->pdev);
1070		msleep(500);
1071
1072		kfree(adapter->vf_data);
1073		adapter->vf_data = NULL;
1074		wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1075		wrfl();
1076		msleep(100);
1077		dev_info(&adapter->pdev->dev, "IOV Disabled\n");
1078	}
1079#endif
1080	adapter->vfs_allocated_count = 0;
1081	adapter->rss_queues = 1;
1082	adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1083	adapter->num_rx_queues = 1;
1084	adapter->num_tx_queues = 1;
1085	adapter->num_q_vectors = 1;
1086	if (!pci_enable_msi(adapter->pdev))
1087		adapter->flags |= IGB_FLAG_HAS_MSI;
1088out:
1089	/* Notify the stack of the (possibly) reduced queue counts. */
1090	netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues);
1091	return netif_set_real_num_rx_queues(adapter->netdev,
1092					    adapter->num_rx_queues);
1093}
1094
1095/**
1096 * igb_alloc_q_vectors - Allocate memory for interrupt vectors
1097 * @adapter: board private structure to initialize
1098 *
1099 * We allocate one q_vector per queue interrupt.  If allocation fails we
1100 * return -ENOMEM.
1101 **/
1102static int igb_alloc_q_vectors(struct igb_adapter *adapter)
1103{
1104	struct igb_q_vector *q_vector;
1105	struct e1000_hw *hw = &adapter->hw;
1106	int v_idx;
1107	int orig_node = adapter->node;
1108
1109	for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1110		if ((adapter->num_q_vectors == (adapter->num_rx_queues +
1111						adapter->num_tx_queues)) &&
1112		    (adapter->num_rx_queues == v_idx))
1113			adapter->node = orig_node;
1114		if (orig_node == -1) {
1115			int cur_node = next_online_node(adapter->node);
1116			if (cur_node == MAX_NUMNODES)
1117				cur_node = first_online_node;
1118			adapter->node = cur_node;
1119		}
1120		q_vector = kzalloc_node(sizeof(struct igb_q_vector), GFP_KERNEL,
1121					adapter->node);
1122		if (!q_vector)
1123			q_vector = kzalloc(sizeof(struct igb_q_vector),
1124					   GFP_KERNEL);
1125		if (!q_vector)
1126			goto err_out;
1127		q_vector->adapter = adapter;
1128		q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
1129		q_vector->itr_val = IGB_START_ITR;
1130		netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
1131		adapter->q_vector[v_idx] = q_vector;
1132	}
1133	/* Restore the adapter's original node */
1134	adapter->node = orig_node;
1135
1136	return 0;
1137
1138err_out:
1139	/* Restore the adapter's original node */
1140	adapter->node = orig_node;
1141	igb_free_q_vectors(adapter);
1142	return -ENOMEM;
1143}
1144
1145static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
1146                                      int ring_idx, int v_idx)
1147{
1148	struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1149
1150	q_vector->rx.ring = adapter->rx_ring[ring_idx];
1151	q_vector->rx.ring->q_vector = q_vector;
1152	q_vector->rx.count++;
1153	q_vector->itr_val = adapter->rx_itr_setting;
1154	if (q_vector->itr_val && q_vector->itr_val <= 3)
1155		q_vector->itr_val = IGB_START_ITR;
1156}
1157
1158static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
1159                                      int ring_idx, int v_idx)
1160{
1161	struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1162
1163	q_vector->tx.ring = adapter->tx_ring[ring_idx];
1164	q_vector->tx.ring->q_vector = q_vector;
1165	q_vector->tx.count++;
1166	q_vector->itr_val = adapter->tx_itr_setting;
1167	q_vector->tx.work_limit = adapter->tx_work_limit;
1168	if (q_vector->itr_val && q_vector->itr_val <= 3)
1169		q_vector->itr_val = IGB_START_ITR;
1170}
1171
1172/**
1173 * igb_map_ring_to_vector - maps allocated queues to vectors
1174 *
1175 * This function maps the recently allocated queues to vectors.
1176 **/
1177static int igb_map_ring_to_vector(struct igb_adapter *adapter)
1178{
1179	int i;
1180	int v_idx = 0;
1181
1182	if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
1183	    (adapter->num_q_vectors < adapter->num_tx_queues))
1184		return -ENOMEM;
1185
1186	if (adapter->num_q_vectors >=
1187	    (adapter->num_rx_queues + adapter->num_tx_queues)) {
1188		for (i = 0; i < adapter->num_rx_queues; i++)
1189			igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1190		for (i = 0; i < adapter->num_tx_queues; i++)
1191			igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1192	} else {
1193		for (i = 0; i < adapter->num_rx_queues; i++) {
1194			if (i < adapter->num_tx_queues)
1195				igb_map_tx_ring_to_vector(adapter, i, v_idx);
1196			igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1197		}
1198		for (; i < adapter->num_tx_queues; i++)
1199			igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1200	}
1201	return 0;
1202}
1203
1204/**
1205 * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
1206 *
1207 * This function initializes the interrupts and allocates all of the queues.
1208 **/
1209static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
1210{
1211	struct pci_dev *pdev = adapter->pdev;
1212	int err;
1213
1214	err = igb_set_interrupt_capability(adapter);
1215	if (err)
1216		return err;
1217
1218	err = igb_alloc_q_vectors(adapter);
1219	if (err) {
1220		dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
1221		goto err_alloc_q_vectors;
1222	}
1223
1224	err = igb_alloc_queues(adapter);
1225	if (err) {
1226		dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1227		goto err_alloc_queues;
1228	}
1229
1230	err = igb_map_ring_to_vector(adapter);
1231	if (err) {
1232		dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
1233		goto err_map_queues;
1234	}
1235
1236
1237	return 0;
1238err_map_queues:
1239	igb_free_queues(adapter);
1240err_alloc_queues:
1241	igb_free_q_vectors(adapter);
1242err_alloc_q_vectors:
1243	igb_reset_interrupt_capability(adapter);
1244	return err;
1245}
1246
1247/**
1248 * igb_request_irq - initialize interrupts
1249 *
1250 * Attempts to configure interrupts using the best available
1251 * capabilities of the hardware and kernel.
1252 **/
1253static int igb_request_irq(struct igb_adapter *adapter)
1254{
1255	struct net_device *netdev = adapter->netdev;
1256	struct pci_dev *pdev = adapter->pdev;
1257	int err = 0;
1258
1259	if (adapter->msix_entries) {
1260		err = igb_request_msix(adapter);
1261		if (!err)
1262			goto request_done;
1263		/* fall back to MSI */
1264		igb_clear_interrupt_scheme(adapter);
1265		if (!pci_enable_msi(adapter->pdev))
1266			adapter->flags |= IGB_FLAG_HAS_MSI;
1267		igb_free_all_tx_resources(adapter);
1268		igb_free_all_rx_resources(adapter);
1269		adapter->num_tx_queues = 1;
1270		adapter->num_rx_queues = 1;
1271		adapter->num_q_vectors = 1;
1272		err = igb_alloc_q_vectors(adapter);
1273		if (err) {
1274			dev_err(&pdev->dev,
1275			        "Unable to allocate memory for vectors\n");
1276			goto request_done;
1277		}
1278		err = igb_alloc_queues(adapter);
1279		if (err) {
1280			dev_err(&pdev->dev,
1281			        "Unable to allocate memory for queues\n");
1282			igb_free_q_vectors(adapter);
1283			goto request_done;
1284		}
1285		igb_setup_all_tx_resources(adapter);
1286		igb_setup_all_rx_resources(adapter);
1287	} else {
1288		igb_assign_vector(adapter->q_vector[0], 0);
1289	}
1290
1291	if (adapter->flags & IGB_FLAG_HAS_MSI) {
1292		err = request_irq(adapter->pdev->irq, igb_intr_msi, 0,
1293				  netdev->name, adapter);
1294		if (!err)
1295			goto request_done;
1296
1297		/* fall back to legacy interrupts */
1298		igb_reset_interrupt_capability(adapter);
1299		adapter->flags &= ~IGB_FLAG_HAS_MSI;
1300	}
1301
1302	err = request_irq(adapter->pdev->irq, igb_intr, IRQF_SHARED,
1303			  netdev->name, adapter);
1304
1305	if (err)
1306		dev_err(&adapter->pdev->dev, "Error %d getting interrupt\n",
1307			err);
1308
1309request_done:
1310	return err;
1311}
1312
1313static void igb_free_irq(struct igb_adapter *adapter)
1314{
1315	if (adapter->msix_entries) {
1316		int vector = 0, i;
1317
1318		free_irq(adapter->msix_entries[vector++].vector, adapter);
1319
1320		for (i = 0; i < adapter->num_q_vectors; i++) {
1321			struct igb_q_vector *q_vector = adapter->q_vector[i];
1322			free_irq(adapter->msix_entries[vector++].vector,
1323			         q_vector);
1324		}
1325	} else {
1326		free_irq(adapter->pdev->irq, adapter);
1327	}
1328}
1329
1330/**
1331 * igb_irq_disable - Mask off interrupt generation on the NIC
1332 * @adapter: board private structure
1333 **/
1334static void igb_irq_disable(struct igb_adapter *adapter)
1335{
1336	struct e1000_hw *hw = &adapter->hw;
1337
1338	/*
1339	 * we need to be careful when disabling interrupts.  The VFs are also
1340	 * mapped into these registers and so clearing the bits can cause
1341	 * issues on the VF drivers so we only need to clear what we set
1342	 */
1343	if (adapter->msix_entries) {
1344		u32 regval = rd32(E1000_EIAM);
1345		wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
1346		wr32(E1000_EIMC, adapter->eims_enable_mask);
1347		regval = rd32(E1000_EIAC);
1348		wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
1349	}
1350
1351	wr32(E1000_IAM, 0);
1352	wr32(E1000_IMC, ~0);
1353	wrfl();
1354	if (adapter->msix_entries) {
1355		int i;
1356		for (i = 0; i < adapter->num_q_vectors; i++)
1357			synchronize_irq(adapter->msix_entries[i].vector);
1358	} else {
1359		synchronize_irq(adapter->pdev->irq);
1360	}
1361}
1362
1363/**
1364 * igb_irq_enable - Enable default interrupt generation settings
1365 * @adapter: board private structure
1366 **/
1367static void igb_irq_enable(struct igb_adapter *adapter)
1368{
1369	struct e1000_hw *hw = &adapter->hw;
1370
1371	if (adapter->msix_entries) {
1372		u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC;
1373		u32 regval = rd32(E1000_EIAC);
1374		wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
1375		regval = rd32(E1000_EIAM);
1376		wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
1377		wr32(E1000_EIMS, adapter->eims_enable_mask);
1378		if (adapter->vfs_allocated_count) {
1379			wr32(E1000_MBVFIMR, 0xFF);
1380			ims |= E1000_IMS_VMMB;
1381		}
1382		if (adapter->hw.mac.type == e1000_82580)
1383			ims |= E1000_IMS_DRSTA;
1384
1385		wr32(E1000_IMS, ims);
1386	} else {
1387		wr32(E1000_IMS, IMS_ENABLE_MASK |
1388				E1000_IMS_DRSTA);
1389		wr32(E1000_IAM, IMS_ENABLE_MASK |
1390				E1000_IMS_DRSTA);
1391	}
1392}
1393
1394static void igb_update_mng_vlan(struct igb_adapter *adapter)
1395{
1396	struct e1000_hw *hw = &adapter->hw;
1397	u16 vid = adapter->hw.mng_cookie.vlan_id;
1398	u16 old_vid = adapter->mng_vlan_id;
1399
1400	if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1401		/* add VID to filter table */
1402		igb_vfta_set(hw, vid, true);
1403		adapter->mng_vlan_id = vid;
1404	} else {
1405		adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1406	}
1407
1408	if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1409	    (vid != old_vid) &&
1410	    !test_bit(old_vid, adapter->active_vlans)) {
1411		/* remove VID from filter table */
1412		igb_vfta_set(hw, old_vid, false);
1413	}
1414}
1415
1416/**
1417 * igb_release_hw_control - release control of the h/w to f/w
1418 * @adapter: address of board private structure
1419 *
1420 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1421 * For ASF and Pass Through versions of f/w this means that the
1422 * driver is no longer loaded.
1423 *
1424 **/
1425static void igb_release_hw_control(struct igb_adapter *adapter)
1426{
1427	struct e1000_hw *hw = &adapter->hw;
1428	u32 ctrl_ext;
1429
1430	/* Let firmware take over control of h/w */
1431	ctrl_ext = rd32(E1000_CTRL_EXT);
1432	wr32(E1000_CTRL_EXT,
1433			ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1434}
1435
1436/**
1437 * igb_get_hw_control - get control of the h/w from f/w
1438 * @adapter: address of board private structure
1439 *
1440 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1441 * For ASF and Pass Through versions of f/w this means that
1442 * the driver is loaded.
1443 *
1444 **/
1445static void igb_get_hw_control(struct igb_adapter *adapter)
1446{
1447	struct e1000_hw *hw = &adapter->hw;
1448	u32 ctrl_ext;
1449
1450	/* Let firmware know the driver has taken over */
1451	ctrl_ext = rd32(E1000_CTRL_EXT);
1452	wr32(E1000_CTRL_EXT,
1453			ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1454}
1455
1456/**
1457 * igb_configure - configure the hardware for RX and TX
1458 * @adapter: private board structure
1459 **/
1460static void igb_configure(struct igb_adapter *adapter)
1461{
1462	struct net_device *netdev = adapter->netdev;
1463	int i;
1464
1465	igb_get_hw_control(adapter);
1466	igb_set_rx_mode(netdev);
1467
1468	igb_restore_vlan(adapter);
1469
1470	igb_setup_tctl(adapter);
1471	igb_setup_mrqc(adapter);
1472	igb_setup_rctl(adapter);
1473
1474	igb_configure_tx(adapter);
1475	igb_configure_rx(adapter);
1476
1477	igb_rx_fifo_flush_82575(&adapter->hw);
1478
1479	/* call igb_desc_unused which always leaves
1480	 * at least 1 descriptor unused to make sure
1481	 * next_to_use != next_to_clean */
1482	for (i = 0; i < adapter->num_rx_queues; i++) {
1483		struct igb_ring *ring = adapter->rx_ring[i];
1484		igb_alloc_rx_buffers(ring, igb_desc_unused(ring));
1485	}
1486}
1487
1488/**
1489 * igb_power_up_link - Power up the phy/serdes link
1490 * @adapter: address of board private structure
1491 **/
1492void igb_power_up_link(struct igb_adapter *adapter)
1493{
1494	if (adapter->hw.phy.media_type == e1000_media_type_copper)
1495		igb_power_up_phy_copper(&adapter->hw);
1496	else
1497		igb_power_up_serdes_link_82575(&adapter->hw);
1498}
1499
1500/**
1501 * igb_power_down_link - Power down the phy/serdes link
1502 * @adapter: address of board private structure
1503 */
1504static void igb_power_down_link(struct igb_adapter *adapter)
1505{
1506	if (adapter->hw.phy.media_type == e1000_media_type_copper)
1507		igb_power_down_phy_copper_82575(&adapter->hw);
1508	else
1509		igb_shutdown_serdes_link_82575(&adapter->hw);
1510}
1511
1512/**
1513 * igb_up - Open the interface and prepare it to handle traffic
1514 * @adapter: board private structure
1515 **/
1516int igb_up(struct igb_adapter *adapter)
1517{
1518	struct e1000_hw *hw = &adapter->hw;
1519	int i;
1520
1521	/* hardware has been reset, we need to reload some things */
1522	igb_configure(adapter);
1523
1524	clear_bit(__IGB_DOWN, &adapter->state);
1525
1526	for (i = 0; i < adapter->num_q_vectors; i++) {
1527		struct igb_q_vector *q_vector = adapter->q_vector[i];
1528		napi_enable(&q_vector->napi);
1529	}
1530	if (adapter->msix_entries)
1531		igb_configure_msix(adapter);
1532	else
1533		igb_assign_vector(adapter->q_vector[0], 0);
1534
1535	/* Clear any pending interrupts. */
1536	rd32(E1000_ICR);
1537	igb_irq_enable(adapter);
1538
1539	/* notify VFs that reset has been completed */
1540	if (adapter->vfs_allocated_count) {
1541		u32 reg_data = rd32(E1000_CTRL_EXT);
1542		reg_data |= E1000_CTRL_EXT_PFRSTD;
1543		wr32(E1000_CTRL_EXT, reg_data);
1544	}
1545
1546	netif_tx_start_all_queues(adapter->netdev);
1547
1548	/* start the watchdog. */
1549	hw->mac.get_link_status = 1;
1550	schedule_work(&adapter->watchdog_task);
1551
1552	return 0;
1553}
1554
1555void igb_down(struct igb_adapter *adapter)
1556{
1557	struct net_device *netdev = adapter->netdev;
1558	struct e1000_hw *hw = &adapter->hw;
1559	u32 tctl, rctl;
1560	int i;
1561
1562	/* signal that we're down so the interrupt handler does not
1563	 * reschedule our watchdog timer */
1564	set_bit(__IGB_DOWN, &adapter->state);
1565
1566	/* disable receives in the hardware */
1567	rctl = rd32(E1000_RCTL);
1568	wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1569	/* flush and sleep below */
1570
1571	netif_tx_stop_all_queues(netdev);
1572
1573	/* disable transmits in the hardware */
1574	tctl = rd32(E1000_TCTL);
1575	tctl &= ~E1000_TCTL_EN;
1576	wr32(E1000_TCTL, tctl);
1577	/* flush both disables and wait for them to finish */
1578	wrfl();
1579	msleep(10);
1580
1581	for (i = 0; i < adapter->num_q_vectors; i++) {
1582		struct igb_q_vector *q_vector = adapter->q_vector[i];
1583		napi_disable(&q_vector->napi);
1584	}
1585
1586	igb_irq_disable(adapter);
1587
1588	del_timer_sync(&adapter->watchdog_timer);
1589	del_timer_sync(&adapter->phy_info_timer);
1590
1591	netif_carrier_off(netdev);
1592
1593	/* record the stats before reset*/
1594	spin_lock(&adapter->stats64_lock);
1595	igb_update_stats(adapter, &adapter->stats64);
1596	spin_unlock(&adapter->stats64_lock);
1597
1598	adapter->link_speed = 0;
1599	adapter->link_duplex = 0;
1600
1601	if (!pci_channel_offline(adapter->pdev))
1602		igb_reset(adapter);
1603	igb_clean_all_tx_rings(adapter);
1604	igb_clean_all_rx_rings(adapter);
1605#ifdef CONFIG_IGB_DCA
1606
1607	/* since we reset the hardware DCA settings were cleared */
1608	igb_setup_dca(adapter);
1609#endif
1610}
1611
1612void igb_reinit_locked(struct igb_adapter *adapter)
1613{
1614	WARN_ON(in_interrupt());
1615	while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1616		msleep(1);
1617	igb_down(adapter);
1618	igb_up(adapter);
1619	clear_bit(__IGB_RESETTING, &adapter->state);
1620}
1621
1622void igb_reset(struct igb_adapter *adapter)
1623{
1624	struct pci_dev *pdev = adapter->pdev;
1625	struct e1000_hw *hw = &adapter->hw;
1626	struct e1000_mac_info *mac = &hw->mac;
1627	struct e1000_fc_info *fc = &hw->fc;
1628	u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1629	u16 hwm;
1630
1631	/* Repartition Pba for greater than 9k mtu
1632	 * To take effect CTRL.RST is required.
1633	 */
1634	switch (mac->type) {
1635	case e1000_i350:
1636	case e1000_82580:
1637		pba = rd32(E1000_RXPBS);
1638		pba = igb_rxpbs_adjust_82580(pba);
1639		break;
1640	case e1000_82576:
1641		pba = rd32(E1000_RXPBS);
1642		pba &= E1000_RXPBS_SIZE_MASK_82576;
1643		break;
1644	case e1000_82575:
1645	default:
1646		pba = E1000_PBA_34K;
1647		break;
1648	}
1649
1650	if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1651	    (mac->type < e1000_82576)) {
1652		/* adjust PBA for jumbo frames */
1653		wr32(E1000_PBA, pba);
1654
1655		/* To maintain wire speed transmits, the Tx FIFO should be
1656		 * large enough to accommodate two full transmit packets,
1657		 * rounded up to the next 1KB and expressed in KB.  Likewise,
1658		 * the Rx FIFO should be large enough to accommodate at least
1659		 * one full receive packet and is similarly rounded up and
1660		 * expressed in KB. */
1661		pba = rd32(E1000_PBA);
1662		/* upper 16 bits has Tx packet buffer allocation size in KB */
1663		tx_space = pba >> 16;
1664		/* lower 16 bits has Rx packet buffer allocation size in KB */
1665		pba &= 0xffff;
1666		/* the tx fifo also stores 16 bytes of information about the tx
1667		 * but don't include ethernet FCS because hardware appends it */
1668		min_tx_space = (adapter->max_frame_size +
1669				sizeof(union e1000_adv_tx_desc) -
1670				ETH_FCS_LEN) * 2;
1671		min_tx_space = ALIGN(min_tx_space, 1024);
1672		min_tx_space >>= 10;
1673		/* software strips receive CRC, so leave room for it */
1674		min_rx_space = adapter->max_frame_size;
1675		min_rx_space = ALIGN(min_rx_space, 1024);
1676		min_rx_space >>= 10;
1677
1678		/* If current Tx allocation is less than the min Tx FIFO size,
1679		 * and the min Tx FIFO size is less than the current Rx FIFO
1680		 * allocation, take space away from current Rx allocation */
1681		if (tx_space < min_tx_space &&
1682		    ((min_tx_space - tx_space) < pba)) {
1683			pba = pba - (min_tx_space - tx_space);
1684
1685			/* if short on rx space, rx wins and must trump tx
1686			 * adjustment */
1687			if (pba < min_rx_space)
1688				pba = min_rx_space;
1689		}
1690		wr32(E1000_PBA, pba);
1691	}
1692
1693	/* flow control settings */
1694	/* The high water mark must be low enough to fit one full frame
1695	 * (or the size used for early receive) above it in the Rx FIFO.
1696	 * Set it to the lower of:
1697	 * - 90% of the Rx FIFO size, or
1698	 * - the full Rx FIFO size minus one full frame */
1699	hwm = min(((pba << 10) * 9 / 10),
1700			((pba << 10) - 2 * adapter->max_frame_size));
1701
1702	fc->high_water = hwm & 0xFFF0;	/* 16-byte granularity */
1703	fc->low_water = fc->high_water - 16;
1704	fc->pause_time = 0xFFFF;
1705	fc->send_xon = 1;
1706	fc->current_mode = fc->requested_mode;
1707
1708	/* disable receive for all VFs and wait one second */
1709	if (adapter->vfs_allocated_count) {
1710		int i;
1711		for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1712			adapter->vf_data[i].flags &= IGB_VF_FLAG_PF_SET_MAC;
1713
1714		/* ping all the active vfs to let them know we are going down */
1715		igb_ping_all_vfs(adapter);
1716
1717		/* disable transmits and receives */
1718		wr32(E1000_VFRE, 0);
1719		wr32(E1000_VFTE, 0);
1720	}
1721
1722	/* Allow time for pending master requests to run */
1723	hw->mac.ops.reset_hw(hw);
1724	wr32(E1000_WUC, 0);
1725
1726	if (hw->mac.ops.init_hw(hw))
1727		dev_err(&pdev->dev, "Hardware Error\n");
1728	if (hw->mac.type > e1000_82580) {
1729		if (adapter->flags & IGB_FLAG_DMAC) {
1730			u32 reg;
1731
1732			/*
1733			 * DMA Coalescing high water mark needs to be higher
1734			 * than * the * Rx threshold.  The Rx threshold is
1735			 * currently * pba - 6, so we * should use a high water
1736			 * mark of pba * - 4. */
1737			hwm = (pba - 4) << 10;
1738
1739			reg = (((pba-6) << E1000_DMACR_DMACTHR_SHIFT)
1740			       & E1000_DMACR_DMACTHR_MASK);
1741
1742			/* transition to L0x or L1 if available..*/
1743			reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
1744
1745			/* watchdog timer= +-1000 usec in 32usec intervals */
1746			reg |= (1000 >> 5);
1747			wr32(E1000_DMACR, reg);
1748
1749			/* no lower threshold to disable coalescing(smart fifb)
1750			 * -UTRESH=0*/
1751			wr32(E1000_DMCRTRH, 0);
1752
1753			/* set hwm to PBA -  2 * max frame size */
1754			wr32(E1000_FCRTC, hwm);
1755
1756			/*
1757			 * This sets the time to wait before requesting tran-
1758			 * sition to * low power state to number of usecs needed
1759			 * to receive 1 512 * byte frame at gigabit line rate
1760			 */
1761			reg = rd32(E1000_DMCTLX);
1762			reg |= IGB_DMCTLX_DCFLUSH_DIS;
1763
1764			/* Delay 255 usec before entering Lx state. */
1765			reg |= 0xFF;
1766			wr32(E1000_DMCTLX, reg);
1767
1768			/* free space in Tx packet buffer to wake from DMAC */
1769			wr32(E1000_DMCTXTH,
1770			     (IGB_MIN_TXPBSIZE -
1771			     (IGB_TX_BUF_4096 + adapter->max_frame_size))
1772			     >> 6);
1773
1774			/* make low power state decision controlled by DMAC */
1775			reg = rd32(E1000_PCIEMISC);
1776			reg |= E1000_PCIEMISC_LX_DECISION;
1777			wr32(E1000_PCIEMISC, reg);
1778		} /* end if IGB_FLAG_DMAC set */
1779	}
1780	if (hw->mac.type == e1000_82580) {
1781		u32 reg = rd32(E1000_PCIEMISC);
1782		wr32(E1000_PCIEMISC,
1783		                reg & ~E1000_PCIEMISC_LX_DECISION);
1784	}
1785	if (!netif_running(adapter->netdev))
1786		igb_power_down_link(adapter);
1787
1788	igb_update_mng_vlan(adapter);
1789
1790	/* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1791	wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1792
1793	igb_get_phy_info(hw);
1794}
1795
1796static u32 igb_fix_features(struct net_device *netdev, u32 features)
1797{
1798	/*
1799	 * Since there is no support for separate rx/tx vlan accel
1800	 * enable/disable make sure tx flag is always in same state as rx.
1801	 */
1802	if (features & NETIF_F_HW_VLAN_RX)
1803		features |= NETIF_F_HW_VLAN_TX;
1804	else
1805		features &= ~NETIF_F_HW_VLAN_TX;
1806
1807	return features;
1808}
1809
1810static int igb_set_features(struct net_device *netdev, u32 features)
1811{
1812	u32 changed = netdev->features ^ features;
1813
1814	if (changed & NETIF_F_HW_VLAN_RX)
1815		igb_vlan_mode(netdev, features);
1816
1817	return 0;
1818}
1819
1820static const struct net_device_ops igb_netdev_ops = {
1821	.ndo_open		= igb_open,
1822	.ndo_stop		= igb_close,
1823	.ndo_start_xmit		= igb_xmit_frame,
1824	.ndo_get_stats64	= igb_get_stats64,
1825	.ndo_set_rx_mode	= igb_set_rx_mode,
1826	.ndo_set_mac_address	= igb_set_mac,
1827	.ndo_change_mtu		= igb_change_mtu,
1828	.ndo_do_ioctl		= igb_ioctl,
1829	.ndo_tx_timeout		= igb_tx_timeout,
1830	.ndo_validate_addr	= eth_validate_addr,
1831	.ndo_vlan_rx_add_vid	= igb_vlan_rx_add_vid,
1832	.ndo_vlan_rx_kill_vid	= igb_vlan_rx_kill_vid,
1833	.ndo_set_vf_mac		= igb_ndo_set_vf_mac,
1834	.ndo_set_vf_vlan	= igb_ndo_set_vf_vlan,
1835	.ndo_set_vf_tx_rate	= igb_ndo_set_vf_bw,
1836	.ndo_get_vf_config	= igb_ndo_get_vf_config,
1837#ifdef CONFIG_NET_POLL_CONTROLLER
1838	.ndo_poll_controller	= igb_netpoll,
1839#endif
1840	.ndo_fix_features	= igb_fix_features,
1841	.ndo_set_features	= igb_set_features,
1842};
1843
1844/**
1845 * igb_probe - Device Initialization Routine
1846 * @pdev: PCI device information struct
1847 * @ent: entry in igb_pci_tbl
1848 *
1849 * Returns 0 on success, negative on failure
1850 *
1851 * igb_probe initializes an adapter identified by a pci_dev structure.
1852 * The OS initialization, configuring of the adapter private structure,
1853 * and a hardware reset occur.
1854 **/
1855static int __devinit igb_probe(struct pci_dev *pdev,
1856			       const struct pci_device_id *ent)
1857{
1858	struct net_device *netdev;
1859	struct igb_adapter *adapter;
1860	struct e1000_hw *hw;
1861	u16 eeprom_data = 0;
1862	s32 ret_val;
1863	static int global_quad_port_a; /* global quad port a indication */
1864	const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1865	unsigned long mmio_start, mmio_len;
1866	int err, pci_using_dac;
1867	u16 eeprom_apme_mask = IGB_EEPROM_APME;
1868	u8 part_str[E1000_PBANUM_LENGTH];
1869
1870	/* Catch broken hardware that put the wrong VF device ID in
1871	 * the PCIe SR-IOV capability.
1872	 */
1873	if (pdev->is_virtfn) {
1874		WARN(1, KERN_ERR "%s (%hx:%hx) should not be a VF!\n",
1875		     pci_name(pdev), pdev->vendor, pdev->device);
1876		return -EINVAL;
1877	}
1878
1879	err = pci_enable_device_mem(pdev);
1880	if (err)
1881		return err;
1882
1883	pci_using_dac = 0;
1884	err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
1885	if (!err) {
1886		err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
1887		if (!err)
1888			pci_using_dac = 1;
1889	} else {
1890		err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
1891		if (err) {
1892			err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
1893			if (err) {
1894				dev_err(&pdev->dev, "No usable DMA "
1895					"configuration, aborting\n");
1896				goto err_dma;
1897			}
1898		}
1899	}
1900
1901	err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1902	                                   IORESOURCE_MEM),
1903	                                   igb_driver_name);
1904	if (err)
1905		goto err_pci_reg;
1906
1907	pci_enable_pcie_error_reporting(pdev);
1908
1909	pci_set_master(pdev);
1910	pci_save_state(pdev);
1911
1912	err = -ENOMEM;
1913	netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1914				   IGB_MAX_TX_QUEUES);
1915	if (!netdev)
1916		goto err_alloc_etherdev;
1917
1918	SET_NETDEV_DEV(netdev, &pdev->dev);
1919
1920	pci_set_drvdata(pdev, netdev);
1921	adapter = netdev_priv(netdev);
1922	adapter->netdev = netdev;
1923	adapter->pdev = pdev;
1924	hw = &adapter->hw;
1925	hw->back = adapter;
1926	adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE;
1927
1928	mmio_start = pci_resource_start(pdev, 0);
1929	mmio_len = pci_resource_len(pdev, 0);
1930
1931	err = -EIO;
1932	hw->hw_addr = ioremap(mmio_start, mmio_len);
1933	if (!hw->hw_addr)
1934		goto err_ioremap;
1935
1936	netdev->netdev_ops = &igb_netdev_ops;
1937	igb_set_ethtool_ops(netdev);
1938	netdev->watchdog_timeo = 5 * HZ;
1939
1940	strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1941
1942	netdev->mem_start = mmio_start;
1943	netdev->mem_end = mmio_start + mmio_len;
1944
1945	/* PCI config space info */
1946	hw->vendor_id = pdev->vendor;
1947	hw->device_id = pdev->device;
1948	hw->revision_id = pdev->revision;
1949	hw->subsystem_vendor_id = pdev->subsystem_vendor;
1950	hw->subsystem_device_id = pdev->subsystem_device;
1951
1952	/* Copy the default MAC, PHY and NVM function pointers */
1953	memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1954	memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1955	memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1956	/* Initialize skew-specific constants */
1957	err = ei->get_invariants(hw);
1958	if (err)
1959		goto err_sw_init;
1960
1961	/* setup the private structure */
1962	err = igb_sw_init(adapter);
1963	if (err)
1964		goto err_sw_init;
1965
1966	igb_get_bus_info_pcie(hw);
1967
1968	hw->phy.autoneg_wait_to_complete = false;
1969
1970	/* Copper options */
1971	if (hw->phy.media_type == e1000_media_type_copper) {
1972		hw->phy.mdix = AUTO_ALL_MODES;
1973		hw->phy.disable_polarity_correction = false;
1974		hw->phy.ms_type = e1000_ms_hw_default;
1975	}
1976
1977	if (igb_check_reset_block(hw))
1978		dev_info(&pdev->dev,
1979			"PHY reset is blocked due to SOL/IDER session.\n");
1980
1981	netdev->hw_features = NETIF_F_SG |
1982			   NETIF_F_IP_CSUM |
1983			   NETIF_F_IPV6_CSUM |
1984			   NETIF_F_TSO |
1985			   NETIF_F_TSO6 |
1986			   NETIF_F_RXCSUM |
1987			   NETIF_F_HW_VLAN_RX;
1988
1989	netdev->features = netdev->hw_features |
1990			   NETIF_F_HW_VLAN_TX |
1991			   NETIF_F_HW_VLAN_FILTER;
1992
1993	netdev->vlan_features |= NETIF_F_TSO;
1994	netdev->vlan_features |= NETIF_F_TSO6;
1995	netdev->vlan_features |= NETIF_F_IP_CSUM;
1996	netdev->vlan_features |= NETIF_F_IPV6_CSUM;
1997	netdev->vlan_features |= NETIF_F_SG;
1998
1999	if (pci_using_dac) {
2000		netdev->features |= NETIF_F_HIGHDMA;
2001		netdev->vlan_features |= NETIF_F_HIGHDMA;
2002	}
2003
2004	if (hw->mac.type >= e1000_82576) {
2005		netdev->hw_features |= NETIF_F_SCTP_CSUM;
2006		netdev->features |= NETIF_F_SCTP_CSUM;
2007	}
2008
2009	netdev->priv_flags |= IFF_UNICAST_FLT;
2010
2011	adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
2012
2013	/* before reading the NVM, reset the controller to put the device in a
2014	 * known good starting state */
2015	hw->mac.ops.reset_hw(hw);
2016
2017	/* make sure the NVM is good */
2018	if (hw->nvm.ops.validate(hw) < 0) {
2019		dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
2020		err = -EIO;
2021		goto err_eeprom;
2022	}
2023
2024	/* copy the MAC address out of the NVM */
2025	if (hw->mac.ops.read_mac_addr(hw))
2026		dev_err(&pdev->dev, "NVM Read Error\n");
2027
2028	memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
2029	memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
2030
2031	if (!is_valid_ether_addr(netdev->perm_addr)) {
2032		dev_err(&pdev->dev, "Invalid MAC Address\n");
2033		err = -EIO;
2034		goto err_eeprom;
2035	}
2036
2037	setup_timer(&adapter->watchdog_timer, igb_watchdog,
2038	            (unsigned long) adapter);
2039	setup_timer(&adapter->phy_info_timer, igb_update_phy_info,
2040	            (unsigned long) adapter);
2041
2042	INIT_WORK(&adapter->reset_task, igb_reset_task);
2043	INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
2044
2045	/* Initialize link properties that are user-changeable */
2046	adapter->fc_autoneg = true;
2047	hw->mac.autoneg = true;
2048	hw->phy.autoneg_advertised = 0x2f;
2049
2050	hw->fc.requested_mode = e1000_fc_default;
2051	hw->fc.current_mode = e1000_fc_default;
2052
2053	igb_validate_mdi_setting(hw);
2054
2055	/* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
2056	 * enable the ACPI Magic Packet filter
2057	 */
2058
2059	if (hw->bus.func == 0)
2060		hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
2061	else if (hw->mac.type >= e1000_82580)
2062		hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
2063		                 NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
2064		                 &eeprom_data);
2065	else if (hw->bus.func == 1)
2066		hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
2067
2068	if (eeprom_data & eeprom_apme_mask)
2069		adapter->eeprom_wol |= E1000_WUFC_MAG;
2070
2071	/* now that we have the eeprom settings, apply the special cases where
2072	 * the eeprom may be wrong or the board simply won't support wake on
2073	 * lan on a particular port */
2074	switch (pdev->device) {
2075	case E1000_DEV_ID_82575GB_QUAD_COPPER:
2076		adapter->eeprom_wol = 0;
2077		break;
2078	case E1000_DEV_ID_82575EB_FIBER_SERDES:
2079	case E1000_DEV_ID_82576_FIBER:
2080	case E1000_DEV_ID_82576_SERDES:
2081		/* Wake events only supported on port A for dual fiber
2082		 * regardless of eeprom setting */
2083		if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
2084			adapter->eeprom_wol = 0;
2085		break;
2086	case E1000_DEV_ID_82576_QUAD_COPPER:
2087	case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
2088		/* if quad port adapter, disable WoL on all but port A */
2089		if (global_quad_port_a != 0)
2090			adapter->eeprom_wol = 0;
2091		else
2092			adapter->flags |= IGB_FLAG_QUAD_PORT_A;
2093		/* Reset for multiple quad port adapters */
2094		if (++global_quad_port_a == 4)
2095			global_quad_port_a = 0;
2096		break;
2097	}
2098
2099	/* initialize the wol settings based on the eeprom settings */
2100	adapter->wol = adapter->eeprom_wol;
2101	device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
2102
2103	/* reset the hardware with the new settings */
2104	igb_reset(adapter);
2105
2106	/* let the f/w know that the h/w is now under the control of the
2107	 * driver. */
2108	igb_get_hw_control(adapter);
2109
2110	strcpy(netdev->name, "eth%d");
2111	err = register_netdev(netdev);
2112	if (err)
2113		goto err_register;
2114
2115	igb_vlan_mode(netdev, netdev->features);
2116
2117	/* carrier off reporting is important to ethtool even BEFORE open */
2118	netif_carrier_off(netdev);
2119
2120#ifdef CONFIG_IGB_DCA
2121	if (dca_add_requester(&pdev->dev) == 0) {
2122		adapter->flags |= IGB_FLAG_DCA_ENABLED;
2123		dev_info(&pdev->dev, "DCA enabled\n");
2124		igb_setup_dca(adapter);
2125	}
2126
2127#endif
2128	/* do hw tstamp init after resetting */
2129	igb_init_hw_timer(adapter);
2130
2131	dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
2132	/* print bus type/speed/width info */
2133	dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
2134		 netdev->name,
2135		 ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
2136		  (hw->bus.speed == e1000_bus_speed_5000) ? "5.0Gb/s" :
2137		                                            "unknown"),
2138		 ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
2139		  (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
2140		  (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
2141		   "unknown"),
2142		 netdev->dev_addr);
2143
2144	ret_val = igb_read_part_string(hw, part_str, E1000_PBANUM_LENGTH);
2145	if (ret_val)
2146		strcpy(part_str, "Unknown");
2147	dev_info(&pdev->dev, "%s: PBA No: %s\n", netdev->name, part_str);
2148	dev_info(&pdev->dev,
2149		"Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
2150		adapter->msix_entries ? "MSI-X" :
2151		(adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
2152		adapter->num_rx_queues, adapter->num_tx_queues);
2153	switch (hw->mac.type) {
2154	case e1000_i350:
2155		igb_set_eee_i350(hw);
2156		break;
2157	default:
2158		break;
2159	}
2160	return 0;
2161
2162err_register:
2163	igb_release_hw_control(adapter);
2164err_eeprom:
2165	if (!igb_check_reset_block(hw))
2166		igb_reset_phy(hw);
2167
2168	if (hw->flash_address)
2169		iounmap(hw->flash_address);
2170err_sw_init:
2171	igb_clear_interrupt_scheme(adapter);
2172	iounmap(hw->hw_addr);
2173err_ioremap:
2174	free_netdev(netdev);
2175err_alloc_etherdev:
2176	pci_release_selected_regions(pdev,
2177	                             pci_select_bars(pdev, IORESOURCE_MEM));
2178err_pci_reg:
2179err_dma:
2180	pci_disable_device(pdev);
2181	return err;
2182}
2183
2184/**
2185 * igb_remove - Device Removal Routine
2186 * @pdev: PCI device information struct
2187 *
2188 * igb_remove is called by the PCI subsystem to alert the driver
2189 * that it should release a PCI device.  The could be caused by a
2190 * Hot-Plug event, or because the driver is going to be removed from
2191 * memory.
2192 **/
2193static void __devexit igb_remove(struct pci_dev *pdev)
2194{
2195	struct net_device *netdev = pci_get_drvdata(pdev);
2196	struct igb_adapter *adapter = netdev_priv(netdev);
2197	struct e1000_hw *hw = &adapter->hw;
2198
2199	/*
2200	 * The watchdog timer may be rescheduled, so explicitly
2201	 * disable watchdog from being rescheduled.
2202	 */
2203	set_bit(__IGB_DOWN, &adapter->state);
2204	del_timer_sync(&adapter->watchdog_timer);
2205	del_timer_sync(&adapter->phy_info_timer);
2206
2207	cancel_work_sync(&adapter->reset_task);
2208	cancel_work_sync(&adapter->watchdog_task);
2209
2210#ifdef CONFIG_IGB_DCA
2211	if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
2212		dev_info(&pdev->dev, "DCA disabled\n");
2213		dca_remove_requester(&pdev->dev);
2214		adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
2215		wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
2216	}
2217#endif
2218
2219	/* Release control of h/w to f/w.  If f/w is AMT enabled, this
2220	 * would have already happened in close and is redundant. */
2221	igb_release_hw_control(adapter);
2222
2223	unregister_netdev(netdev);
2224
2225	igb_clear_interrupt_scheme(adapter);
2226
2227#ifdef CONFIG_PCI_IOV
2228	/* reclaim resources allocated to VFs */
2229	if (adapter->vf_data) {
2230		/* disable iov and allow time for transactions to clear */
2231		pci_disable_sriov(pdev);
2232		msleep(500);
2233
2234		kfree(adapter->vf_data);
2235		adapter->vf_data = NULL;
2236		wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
2237		wrfl();
2238		msleep(100);
2239		dev_info(&pdev->dev, "IOV Disabled\n");
2240	}
2241#endif
2242
2243	iounmap(hw->hw_addr);
2244	if (hw->flash_address)
2245		iounmap(hw->flash_address);
2246	pci_release_selected_regions(pdev,
2247	                             pci_select_bars(pdev, IORESOURCE_MEM));
2248
2249	free_netdev(netdev);
2250
2251	pci_disable_pcie_error_reporting(pdev);
2252
2253	pci_disable_device(pdev);
2254}
2255
2256/**
2257 * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
2258 * @adapter: board private structure to initialize
2259 *
2260 * This function initializes the vf specific data storage and then attempts to
2261 * allocate the VFs.  The reason for ordering it this way is because it is much
2262 * mor expensive time wise to disable SR-IOV than it is to allocate and free
2263 * the memory for the VFs.
2264 **/
2265static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
2266{
2267#ifdef CONFIG_PCI_IOV
2268	struct pci_dev *pdev = adapter->pdev;
2269
2270	if (adapter->vfs_allocated_count) {
2271		adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
2272		                           sizeof(struct vf_data_storage),
2273		                           GFP_KERNEL);
2274		/* if allocation failed then we do not support SR-IOV */
2275		if (!adapter->vf_data) {
2276			adapter->vfs_allocated_count = 0;
2277			dev_err(&pdev->dev, "Unable to allocate memory for VF "
2278			        "Data Storage\n");
2279		}
2280	}
2281
2282	if (pci_enable_sriov(pdev, adapter->vfs_allocated_count)) {
2283		kfree(adapter->vf_data);
2284		adapter->vf_data = NULL;
2285#endif /* CONFIG_PCI_IOV */
2286		adapter->vfs_allocated_count = 0;
2287#ifdef CONFIG_PCI_IOV
2288	} else {
2289		unsigned char mac_addr[ETH_ALEN];
2290		int i;
2291		dev_info(&pdev->dev, "%d vfs allocated\n",
2292		         adapter->vfs_allocated_count);
2293		for (i = 0; i < adapter->vfs_allocated_count; i++) {
2294			random_ether_addr(mac_addr);
2295			igb_set_vf_mac(adapter, i, mac_addr);
2296		}
2297		/* DMA Coalescing is not supported in IOV mode. */
2298		if (adapter->flags & IGB_FLAG_DMAC)
2299			adapter->flags &= ~IGB_FLAG_DMAC;
2300	}
2301#endif /* CONFIG_PCI_IOV */
2302}
2303
2304
2305/**
2306 * igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp
2307 * @adapter: board private structure to initialize
2308 *
2309 * igb_init_hw_timer initializes the function pointer and values for the hw
2310 * timer found in hardware.
2311 **/
2312static void igb_init_hw_timer(struct igb_adapter *adapter)
2313{
2314	struct e1000_hw *hw = &adapter->hw;
2315
2316	switch (hw->mac.type) {
2317	case e1000_i350:
2318	case e1000_82580:
2319		memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2320		adapter->cycles.read = igb_read_clock;
2321		adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2322		adapter->cycles.mult = 1;
2323		/*
2324		 * The 82580 timesync updates the system timer every 8ns by 8ns
2325		 * and the value cannot be shifted.  Instead we need to shift
2326		 * the registers to generate a 64bit timer value.  As a result
2327		 * SYSTIMR/L/H, TXSTMPL/H, RXSTMPL/H all have to be shifted by
2328		 * 24 in order to generate a larger value for synchronization.
2329		 */
2330		adapter->cycles.shift = IGB_82580_TSYNC_SHIFT;
2331		/* disable system timer temporarily by setting bit 31 */
2332		wr32(E1000_TSAUXC, 0x80000000);
2333		wrfl();
2334
2335		/* Set registers so that rollover occurs soon to test this. */
2336		wr32(E1000_SYSTIMR, 0x00000000);
2337		wr32(E1000_SYSTIML, 0x80000000);
2338		wr32(E1000_SYSTIMH, 0x000000FF);
2339		wrfl();
2340
2341		/* enable system timer by clearing bit 31 */
2342		wr32(E1000_TSAUXC, 0x0);
2343		wrfl();
2344
2345		timecounter_init(&adapter->clock,
2346				 &adapter->cycles,
2347				 ktime_to_ns(ktime_get_real()));
2348		/*
2349		 * Synchronize our NIC clock against system wall clock. NIC
2350		 * time stamp reading requires ~3us per sample, each sample
2351		 * was pretty stable even under load => only require 10
2352		 * samples for each offset comparison.
2353		 */
2354		memset(&adapter->compare, 0, sizeof(adapter->compare));
2355		adapter->compare.source = &adapter->clock;
2356		adapter->compare.target = ktime_get_real;
2357		adapter->compare.num_samples = 10;
2358		timecompare_update(&adapter->compare, 0);
2359		break;
2360	case e1000_82576:
2361		/*
2362		 * Initialize hardware timer: we keep it running just in case
2363		 * that some program needs it later on.
2364		 */
2365		memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2366		adapter->cycles.read = igb_read_clock;
2367		adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2368		adapter->cycles.mult = 1;
2369		/**
2370		 * Scale the NIC clock cycle by a large factor so that
2371		 * relatively small clock corrections can be added or
2372		 * subtracted at each clock tick. The drawbacks of a large
2373		 * factor are a) that the clock register overflows more quickly
2374		 * (not such a big deal) and b) that the increment per tick has
2375		 * to fit into 24 bits.  As a result we need to use a shift of
2376		 * 19 so we can fit a value of 16 into the TIMINCA register.
2377		 */
2378		adapter->cycles.shift = IGB_82576_TSYNC_SHIFT;
2379		wr32(E1000_TIMINCA,
2380		                (1 << E1000_TIMINCA_16NS_SHIFT) |
2381		                (16 << IGB_82576_TSYNC_SHIFT));
2382
2383		/* Set registers so that rollover occurs soon to test this. */
2384		wr32(E1000_SYSTIML, 0x00000000);
2385		wr32(E1000_SYSTIMH, 0xFF800000);
2386		wrfl();
2387
2388		timecounter_init(&adapter->clock,
2389				 &adapter->cycles,
2390				 ktime_to_ns(ktime_get_real()));
2391		/*
2392		 * Synchronize our NIC clock against system wall clock. NIC
2393		 * time stamp reading requires ~3us per sample, each sample
2394		 * was pretty stable even under load => only require 10
2395		 * samples for each offset comparison.
2396		 */
2397		memset(&adapter->compare, 0, sizeof(adapter->compare));
2398		adapter->compare.source = &adapter->clock;
2399		adapter->compare.target = ktime_get_real;
2400		adapter->compare.num_samples = 10;
2401		timecompare_update(&adapter->compare, 0);
2402		break;
2403	case e1000_82575:
2404		/* 82575 does not support timesync */
2405	default:
2406		break;
2407	}
2408
2409}
2410
2411/**
2412 * igb_sw_init - Initialize general software structures (struct igb_adapter)
2413 * @adapter: board private structure to initialize
2414 *
2415 * igb_sw_init initializes the Adapter private data structure.
2416 * Fields are initialized based on PCI device information and
2417 * OS network device settings (MTU size).
2418 **/
2419static int __devinit igb_sw_init(struct igb_adapter *adapter)
2420{
2421	struct e1000_hw *hw = &adapter->hw;
2422	struct net_device *netdev = adapter->netdev;
2423	struct pci_dev *pdev = adapter->pdev;
2424
2425	pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
2426
2427	/* set default ring sizes */
2428	adapter->tx_ring_count = IGB_DEFAULT_TXD;
2429	adapter->rx_ring_count = IGB_DEFAULT_RXD;
2430
2431	/* set default ITR values */
2432	adapter->rx_itr_setting = IGB_DEFAULT_ITR;
2433	adapter->tx_itr_setting = IGB_DEFAULT_ITR;
2434
2435	/* set default work limits */
2436	adapter->tx_work_limit = IGB_DEFAULT_TX_WORK;
2437
2438	adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN +
2439				  VLAN_HLEN;
2440	adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
2441
2442	adapter->node = -1;
2443
2444	spin_lock_init(&adapter->stats64_lock);
2445#ifdef CONFIG_PCI_IOV
2446	switch (hw->mac.type) {
2447	case e1000_82576:
2448	case e1000_i350:
2449		if (max_vfs > 7) {
2450			dev_warn(&pdev->dev,
2451				 "Maximum of 7 VFs per PF, using max\n");
2452			adapter->vfs_allocated_count = 7;
2453		} else
2454			adapter->vfs_allocated_count = max_vfs;
2455		break;
2456	default:
2457		break;
2458	}
2459#endif /* CONFIG_PCI_IOV */
2460	adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
2461	/* i350 cannot do RSS and SR-IOV at the same time */
2462	if (hw->mac.type == e1000_i350 && adapter->vfs_allocated_count)
2463		adapter->rss_queues = 1;
2464
2465	/*
2466	 * if rss_queues > 4 or vfs are going to be allocated with rss_queues
2467	 * then we should combine the queues into a queue pair in order to
2468	 * conserve interrupts due to limited supply
2469	 */
2470	if ((adapter->rss_queues > 4) ||
2471	    ((adapter->rss_queues > 1) && (adapter->vfs_allocated_count > 6)))
2472		adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
2473
2474	/* This call may decrease the number of queues */
2475	if (igb_init_interrupt_scheme(adapter)) {
2476		dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
2477		return -ENOMEM;
2478	}
2479
2480	igb_probe_vfs(adapter);
2481
2482	/* Explicitly disable IRQ since the NIC can be in any state. */
2483	igb_irq_disable(adapter);
2484
2485	if (hw->mac.type == e1000_i350)
2486		adapter->flags &= ~IGB_FLAG_DMAC;
2487
2488	set_bit(__IGB_DOWN, &adapter->state);
2489	return 0;
2490}
2491
2492/**
2493 * igb_open - Called when a network interface is made active
2494 * @netdev: network interface device structure
2495 *
2496 * Returns 0 on success, negative value on failure
2497 *
2498 * The open entry point is called when a network interface is made
2499 * active by the system (IFF_UP).  At this point all resources needed
2500 * for transmit and receive operations are allocated, the interrupt
2501 * handler is registered with the OS, the watchdog timer is started,
2502 * and the stack is notified that the interface is ready.
2503 **/
2504static int igb_open(struct net_device *netdev)
2505{
2506	struct igb_adapter *adapter = netdev_priv(netdev);
2507	struct e1000_hw *hw = &adapter->hw;
2508	int err;
2509	int i;
2510
2511	/* disallow open during test */
2512	if (test_bit(__IGB_TESTING, &adapter->state))
2513		return -EBUSY;
2514
2515	netif_carrier_off(netdev);
2516
2517	/* allocate transmit descriptors */
2518	err = igb_setup_all_tx_resources(adapter);
2519	if (err)
2520		goto err_setup_tx;
2521
2522	/* allocate receive descriptors */
2523	err = igb_setup_all_rx_resources(adapter);
2524	if (err)
2525		goto err_setup_rx;
2526
2527	igb_power_up_link(adapter);
2528
2529	/* before we allocate an interrupt, we must be ready to handle it.
2530	 * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2531	 * as soon as we call pci_request_irq, so we have to setup our
2532	 * clean_rx handler before we do so.  */
2533	igb_configure(adapter);
2534
2535	err = igb_request_irq(adapter);
2536	if (err)
2537		goto err_req_irq;
2538
2539	/* From here on the code is the same as igb_up() */
2540	clear_bit(__IGB_DOWN, &adapter->state);
2541
2542	for (i = 0; i < adapter->num_q_vectors; i++) {
2543		struct igb_q_vector *q_vector = adapter->q_vector[i];
2544		napi_enable(&q_vector->napi);
2545	}
2546
2547	/* Clear any pending interrupts. */
2548	rd32(E1000_ICR);
2549
2550	igb_irq_enable(adapter);
2551
2552	/* notify VFs that reset has been completed */
2553	if (adapter->vfs_allocated_count) {
2554		u32 reg_data = rd32(E1000_CTRL_EXT);
2555		reg_data |= E1000_CTRL_EXT_PFRSTD;
2556		wr32(E1000_CTRL_EXT, reg_data);
2557	}
2558
2559	netif_tx_start_all_queues(netdev);
2560
2561	/* start the watchdog. */
2562	hw->mac.get_link_status = 1;
2563	schedule_work(&adapter->watchdog_task);
2564
2565	return 0;
2566
2567err_req_irq:
2568	igb_release_hw_control(adapter);
2569	igb_power_down_link(adapter);
2570	igb_free_all_rx_resources(adapter);
2571err_setup_rx:
2572	igb_free_all_tx_resources(adapter);
2573err_setup_tx:
2574	igb_reset(adapter);
2575
2576	return err;
2577}
2578
2579/**
2580 * igb_close - Disables a network interface
2581 * @netdev: network interface device structure
2582 *
2583 * Returns 0, this is not allowed to fail
2584 *
2585 * The close entry point is called when an interface is de-activated
2586 * by the OS.  The hardware is still under the driver's control, but
2587 * needs to be disabled.  A global MAC reset is issued to stop the
2588 * hardware, and all transmit and receive resources are freed.
2589 **/
2590static int igb_close(struct net_device *netdev)
2591{
2592	struct igb_adapter *adapter = netdev_priv(netdev);
2593
2594	WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
2595	igb_down(adapter);
2596
2597	igb_free_irq(adapter);
2598
2599	igb_free_all_tx_resources(adapter);
2600	igb_free_all_rx_resources(adapter);
2601
2602	return 0;
2603}
2604
2605/**
2606 * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2607 * @tx_ring: tx descriptor ring (for a specific queue) to setup
2608 *
2609 * Return 0 on success, negative on failure
2610 **/
2611int igb_setup_tx_resources(struct igb_ring *tx_ring)
2612{
2613	struct device *dev = tx_ring->dev;
2614	int orig_node = dev_to_node(dev);
2615	int size;
2616
2617	size = sizeof(struct igb_tx_buffer) * tx_ring->count;
2618	tx_ring->tx_buffer_info = vzalloc_node(size, tx_ring->numa_node);
2619	if (!tx_ring->tx_buffer_info)
2620		tx_ring->tx_buffer_info = vzalloc(size);
2621	if (!tx_ring->tx_buffer_info)
2622		goto err;
2623
2624	/* round up to nearest 4K */
2625	tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2626	tx_ring->size = ALIGN(tx_ring->size, 4096);
2627
2628	set_dev_node(dev, tx_ring->numa_node);
2629	tx_ring->desc = dma_alloc_coherent(dev,
2630					   tx_ring->size,
2631					   &tx_ring->dma,
2632					   GFP_KERNEL);
2633	set_dev_node(dev, orig_node);
2634	if (!tx_ring->desc)
2635		tx_ring->desc = dma_alloc_coherent(dev,
2636						   tx_ring->size,
2637						   &tx_ring->dma,
2638						   GFP_KERNEL);
2639
2640	if (!tx_ring->desc)
2641		goto err;
2642
2643	tx_ring->next_to_use = 0;
2644	tx_ring->next_to_clean = 0;
2645
2646	return 0;
2647
2648err:
2649	vfree(tx_ring->tx_buffer_info);
2650	dev_err(dev,
2651		"Unable to allocate memory for the transmit descriptor ring\n");
2652	return -ENOMEM;
2653}
2654
2655/**
2656 * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2657 *				  (Descriptors) for all queues
2658 * @adapter: board private structure
2659 *
2660 * Return 0 on success, negative on failure
2661 **/
2662static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2663{
2664	struct pci_dev *pdev = adapter->pdev;
2665	int i, err = 0;
2666
2667	for (i = 0; i < adapter->num_tx_queues; i++) {
2668		err = igb_setup_tx_resources(adapter->tx_ring[i]);
2669		if (err) {
2670			dev_err(&pdev->dev,
2671				"Allocation for Tx Queue %u failed\n", i);
2672			for (i--; i >= 0; i--)
2673				igb_free_tx_resources(adapter->tx_ring[i]);
2674			break;
2675		}
2676	}
2677
2678	return err;
2679}
2680
2681/**
2682 * igb_setup_tctl - configure the transmit control registers
2683 * @adapter: Board private structure
2684 **/
2685void igb_setup_tctl(struct igb_adapter *adapter)
2686{
2687	struct e1000_hw *hw = &adapter->hw;
2688	u32 tctl;
2689
2690	/* disable queue 0 which is enabled by default on 82575 and 82576 */
2691	wr32(E1000_TXDCTL(0), 0);
2692
2693	/* Program the Transmit Control Register */
2694	tctl = rd32(E1000_TCTL);
2695	tctl &= ~E1000_TCTL_CT;
2696	tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2697		(E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2698
2699	igb_config_collision_dist(hw);
2700
2701	/* Enable transmits */
2702	tctl |= E1000_TCTL_EN;
2703
2704	wr32(E1000_TCTL, tctl);
2705}
2706
2707/**
2708 * igb_configure_tx_ring - Configure transmit ring after Reset
2709 * @adapter: board private structure
2710 * @ring: tx ring to configure
2711 *
2712 * Configure a transmit ring after a reset.
2713 **/
2714void igb_configure_tx_ring(struct igb_adapter *adapter,
2715                           struct igb_ring *ring)
2716{
2717	struct e1000_hw *hw = &adapter->hw;
2718	u32 txdctl = 0;
2719	u64 tdba = ring->dma;
2720	int reg_idx = ring->reg_idx;
2721
2722	/* disable the queue */
2723	wr32(E1000_TXDCTL(reg_idx), 0);
2724	wrfl();
2725	mdelay(10);
2726
2727	wr32(E1000_TDLEN(reg_idx),
2728	                ring->count * sizeof(union e1000_adv_tx_desc));
2729	wr32(E1000_TDBAL(reg_idx),
2730	                tdba & 0x00000000ffffffffULL);
2731	wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2732
2733	ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2734	wr32(E1000_TDH(reg_idx), 0);
2735	writel(0, ring->tail);
2736
2737	txdctl |= IGB_TX_PTHRESH;
2738	txdctl |= IGB_TX_HTHRESH << 8;
2739	txdctl |= IGB_TX_WTHRESH << 16;
2740
2741	txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2742	wr32(E1000_TXDCTL(reg_idx), txdctl);
2743}
2744
2745/**
2746 * igb_configure_tx - Configure transmit Unit after Reset
2747 * @adapter: board private structure
2748 *
2749 * Configure the Tx unit of the MAC after a reset.
2750 **/
2751static void igb_configure_tx(struct igb_adapter *adapter)
2752{
2753	int i;
2754
2755	for (i = 0; i < adapter->num_tx_queues; i++)
2756		igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
2757}
2758
2759/**
2760 * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2761 * @rx_ring:    rx descriptor ring (for a specific queue) to setup
2762 *
2763 * Returns 0 on success, negative on failure
2764 **/
2765int igb_setup_rx_resources(struct igb_ring *rx_ring)
2766{
2767	struct device *dev = rx_ring->dev;
2768	int orig_node = dev_to_node(dev);
2769	int size, desc_len;
2770
2771	size = sizeof(struct igb_rx_buffer) * rx_ring->count;
2772	rx_ring->rx_buffer_info = vzalloc_node(size, rx_ring->numa_node);
2773	if (!rx_ring->rx_buffer_info)
2774		rx_ring->rx_buffer_info = vzalloc(size);
2775	if (!rx_ring->rx_buffer_info)
2776		goto err;
2777
2778	desc_len = sizeof(union e1000_adv_rx_desc);
2779
2780	/* Round up to nearest 4K */
2781	rx_ring->size = rx_ring->count * desc_len;
2782	rx_ring->size = ALIGN(rx_ring->size, 4096);
2783
2784	set_dev_node(dev, rx_ring->numa_node);
2785	rx_ring->desc = dma_alloc_coherent(dev,
2786					   rx_ring->size,
2787					   &rx_ring->dma,
2788					   GFP_KERNEL);
2789	set_dev_node(dev, orig_node);
2790	if (!rx_ring->desc)
2791		rx_ring->desc = dma_alloc_coherent(dev,
2792						   rx_ring->size,
2793						   &rx_ring->dma,
2794						   GFP_KERNEL);
2795
2796	if (!rx_ring->desc)
2797		goto err;
2798
2799	rx_ring->next_to_clean = 0;
2800	rx_ring->next_to_use = 0;
2801
2802	return 0;
2803
2804err:
2805	vfree(rx_ring->rx_buffer_info);
2806	rx_ring->rx_buffer_info = NULL;
2807	dev_err(dev, "Unable to allocate memory for the receive descriptor"
2808		" ring\n");
2809	return -ENOMEM;
2810}
2811
2812/**
2813 * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2814 *				  (Descriptors) for all queues
2815 * @adapter: board private structure
2816 *
2817 * Return 0 on success, negative on failure
2818 **/
2819static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2820{
2821	struct pci_dev *pdev = adapter->pdev;
2822	int i, err = 0;
2823
2824	for (i = 0; i < adapter->num_rx_queues; i++) {
2825		err = igb_setup_rx_resources(adapter->rx_ring[i]);
2826		if (err) {
2827			dev_err(&pdev->dev,
2828				"Allocation for Rx Queue %u failed\n", i);
2829			for (i--; i >= 0; i--)
2830				igb_free_rx_resources(adapter->rx_ring[i]);
2831			break;
2832		}
2833	}
2834
2835	return err;
2836}
2837
2838/**
2839 * igb_setup_mrqc - configure the multiple receive queue control registers
2840 * @adapter: Board private structure
2841 **/
2842static void igb_setup_mrqc(struct igb_adapter *adapter)
2843{
2844	struct e1000_hw *hw = &adapter->hw;
2845	u32 mrqc, rxcsum;
2846	u32 j, num_rx_queues, shift = 0, shift2 = 0;
2847	union e1000_reta {
2848		u32 dword;
2849		u8  bytes[4];
2850	} reta;
2851	static const u8 rsshash[40] = {
2852		0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2853		0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2854		0xae, 0x7b, 0x30, 0xb4,	0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2855		0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2856
2857	/* Fill out hash function seeds */
2858	for (j = 0; j < 10; j++) {
2859		u32 rsskey = rsshash[(j * 4)];
2860		rsskey |= rsshash[(j * 4) + 1] << 8;
2861		rsskey |= rsshash[(j * 4) + 2] << 16;
2862		rsskey |= rsshash[(j * 4) + 3] << 24;
2863		array_wr32(E1000_RSSRK(0), j, rsskey);
2864	}
2865
2866	num_rx_queues = adapter->rss_queues;
2867
2868	if (adapter->vfs_allocated_count) {
2869		/* 82575 and 82576 supports 2 RSS queues for VMDq */
2870		switch (hw->mac.type) {
2871		case e1000_i350:
2872		case e1000_82580:
2873			num_rx_queues = 1;
2874			shift = 0;
2875			break;
2876		case e1000_82576:
2877			shift = 3;
2878			num_rx_queues = 2;
2879			break;
2880		case e1000_82575:
2881			shift = 2;
2882			shift2 = 6;
2883		default:
2884			break;
2885		}
2886	} else {
2887		if (hw->mac.type == e1000_82575)
2888			shift = 6;
2889	}
2890
2891	for (j = 0; j < (32 * 4); j++) {
2892		reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2893		if (shift2)
2894			reta.bytes[j & 3] |= num_rx_queues << shift2;
2895		if ((j & 3) == 3)
2896			wr32(E1000_RETA(j >> 2), reta.dword);
2897	}
2898
2899	/*
2900	 * Disable raw packet checksumming so that RSS hash is placed in
2901	 * descriptor on writeback.  No need to enable TCP/UDP/IP checksum
2902	 * offloads as they are enabled by default
2903	 */
2904	rxcsum = rd32(E1000_RXCSUM);
2905	rxcsum |= E1000_RXCSUM_PCSD;
2906
2907	if (adapter->hw.mac.type >= e1000_82576)
2908		/* Enable Receive Checksum Offload for SCTP */
2909		rxcsum |= E1000_RXCSUM_CRCOFL;
2910
2911	/* Don't need to set TUOFL or IPOFL, they default to 1 */
2912	wr32(E1000_RXCSUM, rxcsum);
2913
2914	/* If VMDq is enabled then we set the appropriate mode for that, else
2915	 * we default to RSS so that an RSS hash is calculated per packet even
2916	 * if we are only using one queue */
2917	if (adapter->vfs_allocated_count) {
2918		if (hw->mac.type > e1000_82575) {
2919			/* Set the default pool for the PF's first queue */
2920			u32 vtctl = rd32(E1000_VT_CTL);
2921			vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2922				   E1000_VT_CTL_DISABLE_DEF_POOL);
2923			vtctl |= adapter->vfs_allocated_count <<
2924				E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2925			wr32(E1000_VT_CTL, vtctl);
2926		}
2927		if (adapter->rss_queues > 1)
2928			mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2929		else
2930			mrqc = E1000_MRQC_ENABLE_VMDQ;
2931	} else {
2932		mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2933	}
2934	igb_vmm_control(adapter);
2935
2936	/*
2937	 * Generate RSS hash based on TCP port numbers and/or
2938	 * IPv4/v6 src and dst addresses since UDP cannot be
2939	 * hashed reliably due to IP fragmentation
2940	 */
2941	mrqc |= E1000_MRQC_RSS_FIELD_IPV4 |
2942		E1000_MRQC_RSS_FIELD_IPV4_TCP |
2943		E1000_MRQC_RSS_FIELD_IPV6 |
2944		E1000_MRQC_RSS_FIELD_IPV6_TCP |
2945		E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
2946
2947	wr32(E1000_MRQC, mrqc);
2948}
2949
2950/**
2951 * igb_setup_rctl - configure the receive control registers
2952 * @adapter: Board private structure
2953 **/
2954void igb_setup_rctl(struct igb_adapter *adapter)
2955{
2956	struct e1000_hw *hw = &adapter->hw;
2957	u32 rctl;
2958
2959	rctl = rd32(E1000_RCTL);
2960
2961	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2962	rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2963
2964	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2965		(hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2966
2967	/*
2968	 * enable stripping of CRC. It's unlikely this will break BMC
2969	 * redirection as it did with e1000. Newer features require
2970	 * that the HW strips the CRC.
2971	 */
2972	rctl |= E1000_RCTL_SECRC;
2973
2974	/* disable store bad packets and clear size bits. */
2975	rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
2976
2977	/* enable LPE to prevent packets larger than max_frame_size */
2978	rctl |= E1000_RCTL_LPE;
2979
2980	/* disable queue 0 to prevent tail write w/o re-config */
2981	wr32(E1000_RXDCTL(0), 0);
2982
2983	/* Attention!!!  For SR-IOV PF driver operations you must enable
2984	 * queue drop for all VF and PF queues to prevent head of line blocking
2985	 * if an un-trusted VF does not provide descriptors to hardware.
2986	 */
2987	if (adapter->vfs_allocated_count) {
2988		/* set all queue drop enable bits */
2989		wr32(E1000_QDE, ALL_QUEUES);
2990	}
2991
2992	wr32(E1000_RCTL, rctl);
2993}
2994
2995static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
2996                                   int vfn)
2997{
2998	struct e1000_hw *hw = &adapter->hw;
2999	u32 vmolr;
3000
3001	/* if it isn't the PF check to see if VFs are enabled and
3002	 * increase the size to support vlan tags */
3003	if (vfn < adapter->vfs_allocated_count &&
3004	    adapter->vf_data[vfn].vlans_enabled)
3005		size += VLAN_TAG_SIZE;
3006
3007	vmolr = rd32(E1000_VMOLR(vfn));
3008	vmolr &= ~E1000_VMOLR_RLPML_MASK;
3009	vmolr |= size | E1000_VMOLR_LPE;
3010	wr32(E1000_VMOLR(vfn), vmolr);
3011
3012	return 0;
3013}
3014
3015/**
3016 * igb_rlpml_set - set maximum receive packet size
3017 * @adapter: board private structure
3018 *
3019 * Configure maximum receivable packet size.
3020 **/
3021static void igb_rlpml_set(struct igb_adapter *adapter)
3022{
3023	u32 max_frame_size = adapter->max_frame_size;
3024	struct e1000_hw *hw = &adapter->hw;
3025	u16 pf_id = adapter->vfs_allocated_count;
3026
3027	if (pf_id) {
3028		igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
3029		/*
3030		 * If we're in VMDQ or SR-IOV mode, then set global RLPML
3031		 * to our max jumbo frame size, in case we need to enable
3032		 * jumbo frames on one of the rings later.
3033		 * This will not pass over-length frames into the default
3034		 * queue because it's gated by the VMOLR.RLPML.
3035		 */
3036		max_frame_size = MAX_JUMBO_FRAME_SIZE;
3037	}
3038
3039	wr32(E1000_RLPML, max_frame_size);
3040}
3041
3042static inline void igb_set_vmolr(struct igb_adapter *adapter,
3043				 int vfn, bool aupe)
3044{
3045	struct e1000_hw *hw = &adapter->hw;
3046	u32 vmolr;
3047
3048	/*
3049	 * This register exists only on 82576 and newer so if we are older then
3050	 * we should exit and do nothing
3051	 */
3052	if (hw->mac.type < e1000_82576)
3053		return;
3054
3055	vmolr = rd32(E1000_VMOLR(vfn));
3056	vmolr |= E1000_VMOLR_STRVLAN;      /* Strip vlan tags */
3057	if (aupe)
3058		vmolr |= E1000_VMOLR_AUPE;        /* Accept untagged packets */
3059	else
3060		vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
3061
3062	/* clear all bits that might not be set */
3063	vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
3064
3065	if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
3066		vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
3067	/*
3068	 * for VMDq only allow the VFs and pool 0 to accept broadcast and
3069	 * multicast packets
3070	 */
3071	if (vfn <= adapter->vfs_allocated_count)
3072		vmolr |= E1000_VMOLR_BAM;	   /* Accept broadcast */
3073
3074	wr32(E1000_VMOLR(vfn), vmolr);
3075}
3076
3077/**
3078 * igb_configure_rx_ring - Configure a receive ring after Reset
3079 * @adapter: board private structure
3080 * @ring: receive ring to be configured
3081 *
3082 * Configure the Rx unit of the MAC after a reset.
3083 **/
3084void igb_configure_rx_ring(struct igb_adapter *adapter,
3085                           struct igb_ring *ring)
3086{
3087	struct e1000_hw *hw = &adapter->hw;
3088	u64 rdba = ring->dma;
3089	int reg_idx = ring->reg_idx;
3090	u32 srrctl = 0, rxdctl = 0;
3091
3092	/* disable the queue */
3093	wr32(E1000_RXDCTL(reg_idx), 0);
3094
3095	/* Set DMA base address registers */
3096	wr32(E1000_RDBAL(reg_idx),
3097	     rdba & 0x00000000ffffffffULL);
3098	wr32(E1000_RDBAH(reg_idx), rdba >> 32);
3099	wr32(E1000_RDLEN(reg_idx),
3100	               ring->count * sizeof(union e1000_adv_rx_desc));
3101
3102	/* initialize head and tail */
3103	ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
3104	wr32(E1000_RDH(reg_idx), 0);
3105	writel(0, ring->tail);
3106
3107	/* set descriptor configuration */
3108	srrctl = IGB_RX_HDR_LEN << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
3109#if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
3110	srrctl |= IGB_RXBUFFER_16384 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3111#else
3112	srrctl |= (PAGE_SIZE / 2) >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3113#endif
3114	srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3115	if (hw->mac.type == e1000_82580)
3116		srrctl |= E1000_SRRCTL_TIMESTAMP;
3117	/* Only set Drop Enable if we are supporting multiple queues */
3118	if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
3119		srrctl |= E1000_SRRCTL_DROP_EN;
3120
3121	wr32(E1000_SRRCTL(reg_idx), srrctl);
3122
3123	/* set filtering for VMDQ pools */
3124	igb_set_vmolr(adapter, reg_idx & 0x7, true);
3125
3126	rxdctl |= IGB_RX_PTHRESH;
3127	rxdctl |= IGB_RX_HTHRESH << 8;
3128	rxdctl |= IGB_RX_WTHRESH << 16;
3129
3130	/* enable receive descriptor fetching */
3131	rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
3132	wr32(E1000_RXDCTL(reg_idx), rxdctl);
3133}
3134
3135/**
3136 * igb_configure_rx - Configure receive Unit after Reset
3137 * @adapter: board private structure
3138 *
3139 * Configure the Rx unit of the MAC after a reset.
3140 **/
3141static void igb_configure_rx(struct igb_adapter *adapter)
3142{
3143	int i;
3144
3145	/* set UTA to appropriate mode */
3146	igb_set_uta(adapter);
3147
3148	/* set the correct pool for the PF default MAC address in entry 0 */
3149	igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
3150	                 adapter->vfs_allocated_count);
3151
3152	/* Setup the HW Rx Head and Tail Descriptor Pointers and
3153	 * the Base and Length of the Rx Descriptor Ring */
3154	for (i = 0; i < adapter->num_rx_queues; i++)
3155		igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
3156}
3157
3158/**
3159 * igb_free_tx_resources - Free Tx Resources per Queue
3160 * @tx_ring: Tx descriptor ring for a specific queue
3161 *
3162 * Free all transmit software resources
3163 **/
3164void igb_free_tx_resources(struct igb_ring *tx_ring)
3165{
3166	igb_clean_tx_ring(tx_ring);
3167
3168	vfree(tx_ring->tx_buffer_info);
3169	tx_ring->tx_buffer_info = NULL;
3170
3171	/* if not set, then don't free */
3172	if (!tx_ring->desc)
3173		return;
3174
3175	dma_free_coherent(tx_ring->dev, tx_ring->size,
3176			  tx_ring->desc, tx_ring->dma);
3177
3178	tx_ring->desc = NULL;
3179}
3180
3181/**
3182 * igb_free_all_tx_resources - Free Tx Resources for All Queues
3183 * @adapter: board private structure
3184 *
3185 * Free all transmit software resources
3186 **/
3187static void igb_free_all_tx_resources(struct igb_adapter *adapter)
3188{
3189	int i;
3190
3191	for (i = 0; i < adapter->num_tx_queues; i++)
3192		igb_free_tx_resources(adapter->tx_ring[i]);
3193}
3194
3195void igb_unmap_and_free_tx_resource(struct igb_ring *ring,
3196				    struct igb_tx_buffer *tx_buffer)
3197{
3198	if (tx_buffer->skb) {
3199		dev_kfree_skb_any(tx_buffer->skb);
3200		if (tx_buffer->dma)
3201			dma_unmap_single(ring->dev,
3202					 tx_buffer->dma,
3203					 tx_buffer->length,
3204					 DMA_TO_DEVICE);
3205	} else if (tx_buffer->dma) {
3206		dma_unmap_page(ring->dev,
3207			       tx_buffer->dma,
3208			       tx_buffer->length,
3209			       DMA_TO_DEVICE);
3210	}
3211	tx_buffer->next_to_watch = NULL;
3212	tx_buffer->skb = NULL;
3213	tx_buffer->dma = 0;
3214	/* buffer_info must be completely set up in the transmit path */
3215}
3216
3217/**
3218 * igb_clean_tx_ring - Free Tx Buffers
3219 * @tx_ring: ring to be cleaned
3220 **/
3221static void igb_clean_tx_ring(struct igb_ring *tx_ring)
3222{
3223	struct igb_tx_buffer *buffer_info;
3224	unsigned long size;
3225	u16 i;
3226
3227	if (!tx_ring->tx_buffer_info)
3228		return;
3229	/* Free all the Tx ring sk_buffs */
3230
3231	for (i = 0; i < tx_ring->count; i++) {
3232		buffer_info = &tx_ring->tx_buffer_info[i];
3233		igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3234	}
3235
3236	size = sizeof(struct igb_tx_buffer) * tx_ring->count;
3237	memset(tx_ring->tx_buffer_info, 0, size);
3238
3239	/* Zero out the descriptor ring */
3240	memset(tx_ring->desc, 0, tx_ring->size);
3241
3242	tx_ring->next_to_use = 0;
3243	tx_ring->next_to_clean = 0;
3244}
3245
3246/**
3247 * igb_clean_all_tx_rings - Free Tx Buffers for all queues
3248 * @adapter: board private structure
3249 **/
3250static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
3251{
3252	int i;
3253
3254	for (i = 0; i < adapter->num_tx_queues; i++)
3255		igb_clean_tx_ring(adapter->tx_ring[i]);
3256}
3257
3258/**
3259 * igb_free_rx_resources - Free Rx Resources
3260 * @rx_ring: ring to clean the resources from
3261 *
3262 * Free all receive software resources
3263 **/
3264void igb_free_rx_resources(struct igb_ring *rx_ring)
3265{
3266	igb_clean_rx_ring(rx_ring);
3267
3268	vfree(rx_ring->rx_buffer_info);
3269	rx_ring->rx_buffer_info = NULL;
3270
3271	/* if not set, then don't free */
3272	if (!rx_ring->desc)
3273		return;
3274
3275	dma_free_coherent(rx_ring->dev, rx_ring->size,
3276			  rx_ring->desc, rx_ring->dma);
3277
3278	rx_ring->desc = NULL;
3279}
3280
3281/**
3282 * igb_free_all_rx_resources - Free Rx Resources for All Queues
3283 * @adapter: board private structure
3284 *
3285 * Free all receive software resources
3286 **/
3287static void igb_free_all_rx_resources(struct igb_adapter *adapter)
3288{
3289	int i;
3290
3291	for (i = 0; i < adapter->num_rx_queues; i++)
3292		igb_free_rx_resources(adapter->rx_ring[i]);
3293}
3294
3295/**
3296 * igb_clean_rx_ring - Free Rx Buffers per Queue
3297 * @rx_ring: ring to free buffers from
3298 **/
3299static void igb_clean_rx_ring(struct igb_ring *rx_ring)
3300{
3301	unsigned long size;
3302	u16 i;
3303
3304	if (!rx_ring->rx_buffer_info)
3305		return;
3306
3307	/* Free all the Rx ring sk_buffs */
3308	for (i = 0; i < rx_ring->count; i++) {
3309		struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
3310		if (buffer_info->dma) {
3311			dma_unmap_single(rx_ring->dev,
3312			                 buffer_info->dma,
3313					 IGB_RX_HDR_LEN,
3314					 DMA_FROM_DEVICE);
3315			buffer_info->dma = 0;
3316		}
3317
3318		if (buffer_info->skb) {
3319			dev_kfree_skb(buffer_info->skb);
3320			buffer_info->skb = NULL;
3321		}
3322		if (buffer_info->page_dma) {
3323			dma_unmap_page(rx_ring->dev,
3324			               buffer_info->page_dma,
3325				       PAGE_SIZE / 2,
3326				       DMA_FROM_DEVICE);
3327			buffer_info->page_dma = 0;
3328		}
3329		if (buffer_info->page) {
3330			put_page(buffer_info->page);
3331			buffer_info->page = NULL;
3332			buffer_info->page_offset = 0;
3333		}
3334	}
3335
3336	size = sizeof(struct igb_rx_buffer) * rx_ring->count;
3337	memset(rx_ring->rx_buffer_info, 0, size);
3338
3339	/* Zero out the descriptor ring */
3340	memset(rx_ring->desc, 0, rx_ring->size);
3341
3342	rx_ring->next_to_clean = 0;
3343	rx_ring->next_to_use = 0;
3344}
3345
3346/**
3347 * igb_clean_all_rx_rings - Free Rx Buffers for all queues
3348 * @adapter: board private structure
3349 **/
3350static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
3351{
3352	int i;
3353
3354	for (i = 0; i < adapter->num_rx_queues; i++)
3355		igb_clean_rx_ring(adapter->rx_ring[i]);
3356}
3357
3358/**
3359 * igb_set_mac - Change the Ethernet Address of the NIC
3360 * @netdev: network interface device structure
3361 * @p: pointer to an address structure
3362 *
3363 * Returns 0 on success, negative on failure
3364 **/
3365static int igb_set_mac(struct net_device *netdev, void *p)
3366{
3367	struct igb_adapter *adapter = netdev_priv(netdev);
3368	struct e1000_hw *hw = &adapter->hw;
3369	struct sockaddr *addr = p;
3370
3371	if (!is_valid_ether_addr(addr->sa_data))
3372		return -EADDRNOTAVAIL;
3373
3374	memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
3375	memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
3376
3377	/* set the correct pool for the new PF MAC address in entry 0 */
3378	igb_rar_set_qsel(adapter, hw->mac.addr, 0,
3379	                 adapter->vfs_allocated_count);
3380
3381	return 0;
3382}
3383
3384/**
3385 * igb_write_mc_addr_list - write multicast addresses to MTA
3386 * @netdev: network interface device structure
3387 *
3388 * Writes multicast address list to the MTA hash table.
3389 * Returns: -ENOMEM on failure
3390 *                0 on no addresses written
3391 *                X on writing X addresses to MTA
3392 **/
3393static int igb_write_mc_addr_list(struct net_device *netdev)
3394{
3395	struct igb_adapter *adapter = netdev_priv(netdev);
3396	struct e1000_hw *hw = &adapter->hw;
3397	struct netdev_hw_addr *ha;
3398	u8  *mta_list;
3399	int i;
3400
3401	if (netdev_mc_empty(netdev)) {
3402		/* nothing to program, so clear mc list */
3403		igb_update_mc_addr_list(hw, NULL, 0);
3404		igb_restore_vf_multicasts(adapter);
3405		return 0;
3406	}
3407
3408	mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
3409	if (!mta_list)
3410		return -ENOMEM;
3411
3412	/* The shared function expects a packed array of only addresses. */
3413	i = 0;
3414	netdev_for_each_mc_addr(ha, netdev)
3415		memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
3416
3417	igb_update_mc_addr_list(hw, mta_list, i);
3418	kfree(mta_list);
3419
3420	return netdev_mc_count(netdev);
3421}
3422
3423/**
3424 * igb_write_uc_addr_list - write unicast addresses to RAR table
3425 * @netdev: network interface device structure
3426 *
3427 * Writes unicast address list to the RAR table.
3428 * Returns: -ENOMEM on failure/insufficient address space
3429 *                0 on no addresses written
3430 *                X on writing X addresses to the RAR table
3431 **/
3432static int igb_write_uc_addr_list(struct net_device *netdev)
3433{
3434	struct igb_adapter *adapter = netdev_priv(netdev);
3435	struct e1000_hw *hw = &adapter->hw;
3436	unsigned int vfn = adapter->vfs_allocated_count;
3437	unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
3438	int count = 0;
3439
3440	/* return ENOMEM indicating insufficient memory for addresses */
3441	if (netdev_uc_count(netdev) > rar_entries)
3442		return -ENOMEM;
3443
3444	if (!netdev_uc_empty(netdev) && rar_entries) {
3445		struct netdev_hw_addr *ha;
3446
3447		netdev_for_each_uc_addr(ha, netdev) {
3448			if (!rar_entries)
3449				break;
3450			igb_rar_set_qsel(adapter, ha->addr,
3451			                 rar_entries--,
3452			                 vfn);
3453			count++;
3454		}
3455	}
3456	/* write the addresses in reverse order to avoid write combining */
3457	for (; rar_entries > 0 ; rar_entries--) {
3458		wr32(E1000_RAH(rar_entries), 0);
3459		wr32(E1000_RAL(rar_entries), 0);
3460	}
3461	wrfl();
3462
3463	return count;
3464}
3465
3466/**
3467 * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
3468 * @netdev: network interface device structure
3469 *
3470 * The set_rx_mode entry point is called whenever the unicast or multicast
3471 * address lists or the network interface flags are updated.  This routine is
3472 * responsible for configuring the hardware for proper unicast, multicast,
3473 * promiscuous mode, and all-multi behavior.
3474 **/
3475static void igb_set_rx_mode(struct net_device *netdev)
3476{
3477	struct igb_adapter *adapter = netdev_priv(netdev);
3478	struct e1000_hw *hw = &adapter->hw;
3479	unsigned int vfn = adapter->vfs_allocated_count;
3480	u32 rctl, vmolr = 0;
3481	int count;
3482
3483	/* Check for Promiscuous and All Multicast modes */
3484	rctl = rd32(E1000_RCTL);
3485
3486	/* clear the effected bits */
3487	rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
3488
3489	if (netdev->flags & IFF_PROMISC) {
3490		rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
3491		vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
3492	} else {
3493		if (netdev->flags & IFF_ALLMULTI) {
3494			rctl |= E1000_RCTL_MPE;
3495			vmolr |= E1000_VMOLR_MPME;
3496		} else {
3497			/*
3498			 * Write addresses to the MTA, if the attempt fails
3499			 * then we should just turn on promiscuous mode so
3500			 * that we can at least receive multicast traffic
3501			 */
3502			count = igb_write_mc_addr_list(netdev);
3503			if (count < 0) {
3504				rctl |= E1000_RCTL_MPE;
3505				vmolr |= E1000_VMOLR_MPME;
3506			} else if (count) {
3507				vmolr |= E1000_VMOLR_ROMPE;
3508			}
3509		}
3510		/*
3511		 * Write addresses to available RAR registers, if there is not
3512		 * sufficient space to store all the addresses then enable
3513		 * unicast promiscuous mode
3514		 */
3515		count = igb_write_uc_addr_list(netdev);
3516		if (count < 0) {
3517			rctl |= E1000_RCTL_UPE;
3518			vmolr |= E1000_VMOLR_ROPE;
3519		}
3520		rctl |= E1000_RCTL_VFE;
3521	}
3522	wr32(E1000_RCTL, rctl);
3523
3524	/*
3525	 * In order to support SR-IOV and eventually VMDq it is necessary to set
3526	 * the VMOLR to enable the appropriate modes.  Without this workaround
3527	 * we will have issues with VLAN tag stripping not being done for frames
3528	 * that are only arriving because we are the default pool
3529	 */
3530	if (hw->mac.type < e1000_82576)
3531		return;
3532
3533	vmolr |= rd32(E1000_VMOLR(vfn)) &
3534	         ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
3535	wr32(E1000_VMOLR(vfn), vmolr);
3536	igb_restore_vf_multicasts(adapter);
3537}
3538
3539static void igb_check_wvbr(struct igb_adapter *adapter)
3540{
3541	struct e1000_hw *hw = &adapter->hw;
3542	u32 wvbr = 0;
3543
3544	switch (hw->mac.type) {
3545	case e1000_82576:
3546	case e1000_i350:
3547		if (!(wvbr = rd32(E1000_WVBR)))
3548			return;
3549		break;
3550	default:
3551		break;
3552	}
3553
3554	adapter->wvbr |= wvbr;
3555}
3556
3557#define IGB_STAGGERED_QUEUE_OFFSET 8
3558
3559static void igb_spoof_check(struct igb_adapter *adapter)
3560{
3561	int j;
3562
3563	if (!adapter->wvbr)
3564		return;
3565
3566	for(j = 0; j < adapter->vfs_allocated_count; j++) {
3567		if (adapter->wvbr & (1 << j) ||
3568		    adapter->wvbr & (1 << (j + IGB_STAGGERED_QUEUE_OFFSET))) {
3569			dev_warn(&adapter->pdev->dev,
3570				"Spoof event(s) detected on VF %d\n", j);
3571			adapter->wvbr &=
3572				~((1 << j) |
3573				  (1 << (j + IGB_STAGGERED_QUEUE_OFFSET)));
3574		}
3575	}
3576}
3577
3578/* Need to wait a few seconds after link up to get diagnostic information from
3579 * the phy */
3580static void igb_update_phy_info(unsigned long data)
3581{
3582	struct igb_adapter *adapter = (struct igb_adapter *) data;
3583	igb_get_phy_info(&adapter->hw);
3584}
3585
3586/**
3587 * igb_has_link - check shared code for link and determine up/down
3588 * @adapter: pointer to driver private info
3589 **/
3590bool igb_has_link(struct igb_adapter *adapter)
3591{
3592	struct e1000_hw *hw = &adapter->hw;
3593	bool link_active = false;
3594	s32 ret_val = 0;
3595
3596	/* get_link_status is set on LSC (link status) interrupt or
3597	 * rx sequence error interrupt.  get_link_status will stay
3598	 * false until the e1000_check_for_link establishes link
3599	 * for copper adapters ONLY
3600	 */
3601	switch (hw->phy.media_type) {
3602	case e1000_media_type_copper:
3603		if (hw->mac.get_link_status) {
3604			ret_val = hw->mac.ops.check_for_link(hw);
3605			link_active = !hw->mac.get_link_status;
3606		} else {
3607			link_active = true;
3608		}
3609		break;
3610	case e1000_media_type_internal_serdes:
3611		ret_val = hw->mac.ops.check_for_link(hw);
3612		link_active = hw->mac.serdes_has_link;
3613		break;
3614	default:
3615	case e1000_media_type_unknown:
3616		break;
3617	}
3618
3619	return link_active;
3620}
3621
3622static bool igb_thermal_sensor_event(struct e1000_hw *hw, u32 event)
3623{
3624	bool ret = false;
3625	u32 ctrl_ext, thstat;
3626
3627	/* check for thermal sensor event on i350, copper only */
3628	if (hw->mac.type == e1000_i350) {
3629		thstat = rd32(E1000_THSTAT);
3630		ctrl_ext = rd32(E1000_CTRL_EXT);
3631
3632		if ((hw->phy.media_type == e1000_media_type_copper) &&
3633		    !(ctrl_ext & E1000_CTRL_EXT_LINK_MODE_SGMII)) {
3634			ret = !!(thstat & event);
3635		}
3636	}
3637
3638	return ret;
3639}
3640
3641/**
3642 * igb_watchdog - Timer Call-back
3643 * @data: pointer to adapter cast into an unsigned long
3644 **/
3645static void igb_watchdog(unsigned long data)
3646{
3647	struct igb_adapter *adapter = (struct igb_adapter *)data;
3648	/* Do the rest outside of interrupt context */
3649	schedule_work(&adapter->watchdog_task);
3650}
3651
3652static void igb_watchdog_task(struct work_struct *work)
3653{
3654	struct igb_adapter *adapter = container_of(work,
3655	                                           struct igb_adapter,
3656                                                   watchdog_task);
3657	struct e1000_hw *hw = &adapter->hw;
3658	struct net_device *netdev = adapter->netdev;
3659	u32 link;
3660	int i;
3661
3662	link = igb_has_link(adapter);
3663	if (link) {
3664		if (!netif_carrier_ok(netdev)) {
3665			u32 ctrl;
3666			hw->mac.ops.get_speed_and_duplex(hw,
3667			                                 &adapter->link_speed,
3668			                                 &adapter->link_duplex);
3669
3670			ctrl = rd32(E1000_CTRL);
3671			/* Links status message must follow this format */
3672			printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s, "
3673				 "Flow Control: %s\n",
3674			       netdev->name,
3675			       adapter->link_speed,
3676			       adapter->link_duplex == FULL_DUPLEX ?
3677				 "Full Duplex" : "Half Duplex",
3678			       ((ctrl & E1000_CTRL_TFCE) &&
3679			        (ctrl & E1000_CTRL_RFCE)) ? "RX/TX" :
3680			       ((ctrl & E1000_CTRL_RFCE) ?  "RX" :
3681			       ((ctrl & E1000_CTRL_TFCE) ?  "TX" : "None")));
3682
3683			/* check for thermal sensor event */
3684			if (igb_thermal_sensor_event(hw, E1000_THSTAT_LINK_THROTTLE)) {
3685				printk(KERN_INFO "igb: %s The network adapter "
3686						 "link speed was downshifted "
3687						 "because it overheated.\n",
3688						 netdev->name);
3689			}
3690
3691			/* adjust timeout factor according to speed/duplex */
3692			adapter->tx_timeout_factor = 1;
3693			switch (adapter->link_speed) {
3694			case SPEED_10:
3695				adapter->tx_timeout_factor = 14;
3696				break;
3697			case SPEED_100:
3698				/* maybe add some timeout factor ? */
3699				break;
3700			}
3701
3702			netif_carrier_on(netdev);
3703
3704			igb_ping_all_vfs(adapter);
3705			igb_check_vf_rate_limit(adapter);
3706
3707			/* link state has changed, schedule phy info update */
3708			if (!test_bit(__IGB_DOWN, &adapter->state))
3709				mod_timer(&adapter->phy_info_timer,
3710					  round_jiffies(jiffies + 2 * HZ));
3711		}
3712	} else {
3713		if (netif_carrier_ok(netdev)) {
3714			adapter->link_speed = 0;
3715			adapter->link_duplex = 0;
3716
3717			/* check for thermal sensor event */
3718			if (igb_thermal_sensor_event(hw, E1000_THSTAT_PWR_DOWN)) {
3719				printk(KERN_ERR "igb: %s The network adapter "
3720						"was stopped because it "
3721						"overheated.\n",
3722						netdev->name);
3723			}
3724
3725			/* Links status message must follow this format */
3726			printk(KERN_INFO "igb: %s NIC Link is Down\n",
3727			       netdev->name);
3728			netif_carrier_off(netdev);
3729
3730			igb_ping_all_vfs(adapter);
3731
3732			/* link state has changed, schedule phy info update */
3733			if (!test_bit(__IGB_DOWN, &adapter->state))
3734				mod_timer(&adapter->phy_info_timer,
3735					  round_jiffies(jiffies + 2 * HZ));
3736		}
3737	}
3738
3739	spin_lock(&adapter->stats64_lock);
3740	igb_update_stats(adapter, &adapter->stats64);
3741	spin_unlock(&adapter->stats64_lock);
3742
3743	for (i = 0; i < adapter->num_tx_queues; i++) {
3744		struct igb_ring *tx_ring = adapter->tx_ring[i];
3745		if (!netif_carrier_ok(netdev)) {
3746			/* We've lost link, so the controller stops DMA,
3747			 * but we've got queued Tx work that's never going
3748			 * to get done, so reset controller to flush Tx.
3749			 * (Do the reset outside of interrupt context). */
3750			if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3751				adapter->tx_timeout_count++;
3752				schedule_work(&adapter->reset_task);
3753				/* return immediately since reset is imminent */
3754				return;
3755			}
3756		}
3757
3758		/* Force detection of hung controller every watchdog period */
3759		tx_ring->detect_tx_hung = true;
3760	}
3761
3762	/* Cause software interrupt to ensure rx ring is cleaned */
3763	if (adapter->msix_entries) {
3764		u32 eics = 0;
3765		for (i = 0; i < adapter->num_q_vectors; i++) {
3766			struct igb_q_vector *q_vector = adapter->q_vector[i];
3767			eics |= q_vector->eims_value;
3768		}
3769		wr32(E1000_EICS, eics);
3770	} else {
3771		wr32(E1000_ICS, E1000_ICS_RXDMT0);
3772	}
3773
3774	igb_spoof_check(adapter);
3775
3776	/* Reset the timer */
3777	if (!test_bit(__IGB_DOWN, &adapter->state))
3778		mod_timer(&adapter->watchdog_timer,
3779			  round_jiffies(jiffies + 2 * HZ));
3780}
3781
3782enum latency_range {
3783	lowest_latency = 0,
3784	low_latency = 1,
3785	bulk_latency = 2,
3786	latency_invalid = 255
3787};
3788
3789/**
3790 * igb_update_ring_itr - update the dynamic ITR value based on packet size
3791 *
3792 *      Stores a new ITR value based on strictly on packet size.  This
3793 *      algorithm is less sophisticated than that used in igb_update_itr,
3794 *      due to the difficulty of synchronizing statistics across multiple
3795 *      receive rings.  The divisors and thresholds used by this function
3796 *      were determined based on theoretical maximum wire speed and testing
3797 *      data, in order to minimize response time while increasing bulk
3798 *      throughput.
3799 *      This functionality is controlled by the InterruptThrottleRate module
3800 *      parameter (see igb_param.c)
3801 *      NOTE:  This function is called only when operating in a multiqueue
3802 *             receive environment.
3803 * @q_vector: pointer to q_vector
3804 **/
3805static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3806{
3807	int new_val = q_vector->itr_val;
3808	int avg_wire_size = 0;
3809	struct igb_adapter *adapter = q_vector->adapter;
3810	unsigned int packets;
3811
3812	/* For non-gigabit speeds, just fix the interrupt rate at 4000
3813	 * ints/sec - ITR timer value of 120 ticks.
3814	 */
3815	if (adapter->link_speed != SPEED_1000) {
3816		new_val = IGB_4K_ITR;
3817		goto set_itr_val;
3818	}
3819
3820	packets = q_vector->rx.total_packets;
3821	if (packets)
3822		avg_wire_size = q_vector->rx.total_bytes / packets;
3823
3824	packets = q_vector->tx.total_packets;
3825	if (packets)
3826		avg_wire_size = max_t(u32, avg_wire_size,
3827				      q_vector->tx.total_bytes / packets);
3828
3829	/* if avg_wire_size isn't set no work was done */
3830	if (!avg_wire_size)
3831		goto clear_counts;
3832
3833	/* Add 24 bytes to size to account for CRC, preamble, and gap */
3834	avg_wire_size += 24;
3835
3836	/* Don't starve jumbo frames */
3837	avg_wire_size = min(avg_wire_size, 3000);
3838
3839	/* Give a little boost to mid-size frames */
3840	if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3841		new_val = avg_wire_size / 3;
3842	else
3843		new_val = avg_wire_size / 2;
3844
3845	/* conservative mode (itr 3) eliminates the lowest_latency setting */
3846	if (new_val < IGB_20K_ITR &&
3847	    ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
3848	     (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
3849		new_val = IGB_20K_ITR;
3850
3851set_itr_val:
3852	if (new_val != q_vector->itr_val) {
3853		q_vector->itr_val = new_val;
3854		q_vector->set_itr = 1;
3855	}
3856clear_counts:
3857	q_vector->rx.total_bytes = 0;
3858	q_vector->rx.total_packets = 0;
3859	q_vector->tx.total_bytes = 0;
3860	q_vector->tx.total_packets = 0;
3861}
3862
3863/**
3864 * igb_update_itr - update the dynamic ITR value based on statistics
3865 *      Stores a new ITR value based on packets and byte
3866 *      counts during the last interrupt.  The advantage of per interrupt
3867 *      computation is faster updates and more accurate ITR for the current
3868 *      traffic pattern.  Constants in this function were computed
3869 *      based on theoretical maximum wire speed and thresholds were set based
3870 *      on testing data as well as attempting to minimize response time
3871 *      while increasing bulk throughput.
3872 *      this functionality is controlled by the InterruptThrottleRate module
3873 *      parameter (see igb_param.c)
3874 *      NOTE:  These calculations are only valid when operating in a single-
3875 *             queue environment.
3876 * @q_vector: pointer to q_vector
3877 * @ring_container: ring info to update the itr for
3878 **/
3879static void igb_update_itr(struct igb_q_vector *q_vector,
3880			   struct igb_ring_container *ring_container)
3881{
3882	unsigned int packets = ring_container->total_packets;
3883	unsigned int bytes = ring_container->total_bytes;
3884	u8 itrval = ring_container->itr;
3885
3886	/* no packets, exit with status unchanged */
3887	if (packets == 0)
3888		return;
3889
3890	switch (itrval) {
3891	case lowest_latency:
3892		/* handle TSO and jumbo frames */
3893		if (bytes/packets > 8000)
3894			itrval = bulk_latency;
3895		else if ((packets < 5) && (bytes > 512))
3896			itrval = low_latency;
3897		break;
3898	case low_latency:  /* 50 usec aka 20000 ints/s */
3899		if (bytes > 10000) {
3900			/* this if handles the TSO accounting */
3901			if (bytes/packets > 8000) {
3902				itrval = bulk_latency;
3903			} else if ((packets < 10) || ((bytes/packets) > 1200)) {
3904				itrval = bulk_latency;
3905			} else if ((packets > 35)) {
3906				itrval = lowest_latency;
3907			}
3908		} else if (bytes/packets > 2000) {
3909			itrval = bulk_latency;
3910		} else if (packets <= 2 && bytes < 512) {
3911			itrval = lowest_latency;
3912		}
3913		break;
3914	case bulk_latency: /* 250 usec aka 4000 ints/s */
3915		if (bytes > 25000) {
3916			if (packets > 35)
3917				itrval = low_latency;
3918		} else if (bytes < 1500) {
3919			itrval = low_latency;
3920		}
3921		break;
3922	}
3923
3924	/* clear work counters since we have the values we need */
3925	ring_container->total_bytes = 0;
3926	ring_container->total_packets = 0;
3927
3928	/* write updated itr to ring container */
3929	ring_container->itr = itrval;
3930}
3931
3932static void igb_set_itr(struct igb_q_vector *q_vector)
3933{
3934	struct igb_adapter *adapter = q_vector->adapter;
3935	u32 new_itr = q_vector->itr_val;
3936	u8 current_itr = 0;
3937
3938	/* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3939	if (adapter->link_speed != SPEED_1000) {
3940		current_itr = 0;
3941		new_itr = IGB_4K_ITR;
3942		goto set_itr_now;
3943	}
3944
3945	igb_update_itr(q_vector, &q_vector->tx);
3946	igb_update_itr(q_vector, &q_vector->rx);
3947
3948	current_itr = max(q_vector->rx.itr, q_vector->tx.itr);
3949
3950	/* conservative mode (itr 3) eliminates the lowest_latency setting */
3951	if (current_itr == lowest_latency &&
3952	    ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
3953	     (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
3954		current_itr = low_latency;
3955
3956	switch (current_itr) {
3957	/* counts and packets in update_itr are dependent on these numbers */
3958	case lowest_latency:
3959		new_itr = IGB_70K_ITR; /* 70,000 ints/sec */
3960		break;
3961	case low_latency:
3962		new_itr = IGB_20K_ITR; /* 20,000 ints/sec */
3963		break;
3964	case bulk_latency:
3965		new_itr = IGB_4K_ITR;  /* 4,000 ints/sec */
3966		break;
3967	default:
3968		break;
3969	}
3970
3971set_itr_now:
3972	if (new_itr != q_vector->itr_val) {
3973		/* this attempts to bias the interrupt rate towards Bulk
3974		 * by adding intermediate steps when interrupt rate is
3975		 * increasing */
3976		new_itr = new_itr > q_vector->itr_val ?
3977		             max((new_itr * q_vector->itr_val) /
3978		                 (new_itr + (q_vector->itr_val >> 2)),
3979				 new_itr) :
3980			     new_itr;
3981		/* Don't write the value here; it resets the adapter's
3982		 * internal timer, and causes us to delay far longer than
3983		 * we should between interrupts.  Instead, we write the ITR
3984		 * value at the beginning of the next interrupt so the timing
3985		 * ends up being correct.
3986		 */
3987		q_vector->itr_val = new_itr;
3988		q_vector->set_itr = 1;
3989	}
3990}
3991
3992void igb_tx_ctxtdesc(struct igb_ring *tx_ring, u32 vlan_macip_lens,
3993		     u32 type_tucmd, u32 mss_l4len_idx)
3994{
3995	struct e1000_adv_tx_context_desc *context_desc;
3996	u16 i = tx_ring->next_to_use;
3997
3998	context_desc = IGB_TX_CTXTDESC(tx_ring, i);
3999
4000	i++;
4001	tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
4002
4003	/* set bits to identify this as an advanced context descriptor */
4004	type_tucmd |= E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
4005
4006	/* For 82575, context index must be unique per ring. */
4007	if (test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
4008		mss_l4len_idx |= tx_ring->reg_idx << 4;
4009
4010	context_desc->vlan_macip_lens	= cpu_to_le32(vlan_macip_lens);
4011	context_desc->seqnum_seed	= 0;
4012	context_desc->type_tucmd_mlhl	= cpu_to_le32(type_tucmd);
4013	context_desc->mss_l4len_idx	= cpu_to_le32(mss_l4len_idx);
4014}
4015
4016static int igb_tso(struct igb_ring *tx_ring,
4017		   struct igb_tx_buffer *first,
4018		   u8 *hdr_len)
4019{
4020	struct sk_buff *skb = first->skb;
4021	u32 vlan_macip_lens, type_tucmd;
4022	u32 mss_l4len_idx, l4len;
4023
4024	if (!skb_is_gso(skb))
4025		return 0;
4026
4027	if (skb_header_cloned(skb)) {
4028		int err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
4029		if (err)
4030			return err;
4031	}
4032
4033	/* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
4034	type_tucmd = E1000_ADVTXD_TUCMD_L4T_TCP;
4035
4036	if (first->protocol == __constant_htons(ETH_P_IP)) {
4037		struct iphdr *iph = ip_hdr(skb);
4038		iph->tot_len = 0;
4039		iph->check = 0;
4040		tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
4041							 iph->daddr, 0,
4042							 IPPROTO_TCP,
4043							 0);
4044		type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
4045		first->tx_flags |= IGB_TX_FLAGS_TSO |
4046				   IGB_TX_FLAGS_CSUM |
4047				   IGB_TX_FLAGS_IPV4;
4048	} else if (skb_is_gso_v6(skb)) {
4049		ipv6_hdr(skb)->payload_len = 0;
4050		tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
4051						       &ipv6_hdr(skb)->daddr,
4052						       0, IPPROTO_TCP, 0);
4053		first->tx_flags |= IGB_TX_FLAGS_TSO |
4054				   IGB_TX_FLAGS_CSUM;
4055	}
4056
4057	/* compute header lengths */
4058	l4len = tcp_hdrlen(skb);
4059	*hdr_len = skb_transport_offset(skb) + l4len;
4060
4061	/* update gso size and bytecount with header size */
4062	first->gso_segs = skb_shinfo(skb)->gso_segs;
4063	first->bytecount += (first->gso_segs - 1) * *hdr_len;
4064
4065	/* MSS L4LEN IDX */
4066	mss_l4len_idx = l4len << E1000_ADVTXD_L4LEN_SHIFT;
4067	mss_l4len_idx |= skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT;
4068
4069	/* VLAN MACLEN IPLEN */
4070	vlan_macip_lens = skb_network_header_len(skb);
4071	vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
4072	vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
4073
4074	igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
4075
4076	return 1;
4077}
4078
4079static void igb_tx_csum(struct igb_ring *tx_ring, struct igb_tx_buffer *first)
4080{
4081	struct sk_buff *skb = first->skb;
4082	u32 vlan_macip_lens = 0;
4083	u32 mss_l4len_idx = 0;
4084	u32 type_tucmd = 0;
4085
4086	if (skb->ip_summed != CHECKSUM_PARTIAL) {
4087		if (!(first->tx_flags & IGB_TX_FLAGS_VLAN))
4088			return;
4089	} else {
4090		u8 l4_hdr = 0;
4091		switch (first->protocol) {
4092		case __constant_htons(ETH_P_IP):
4093			vlan_macip_lens |= skb_network_header_len(skb);
4094			type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
4095			l4_hdr = ip_hdr(skb)->protocol;
4096			break;
4097		case __constant_htons(ETH_P_IPV6):
4098			vlan_macip_lens |= skb_network_header_len(skb);
4099			l4_hdr = ipv6_hdr(skb)->nexthdr;
4100			break;
4101		default:
4102			if (unlikely(net_ratelimit())) {
4103				dev_warn(tx_ring->dev,
4104				 "partial checksum but proto=%x!\n",
4105				 first->protocol);
4106			}
4107			break;
4108		}
4109
4110		switch (l4_hdr) {
4111		case IPPROTO_TCP:
4112			type_tucmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
4113			mss_l4len_idx = tcp_hdrlen(skb) <<
4114					E1000_ADVTXD_L4LEN_SHIFT;
4115			break;
4116		case IPPROTO_SCTP:
4117			type_tucmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
4118			mss_l4len_idx = sizeof(struct sctphdr) <<
4119					E1000_ADVTXD_L4LEN_SHIFT;
4120			break;
4121		case IPPROTO_UDP:
4122			mss_l4len_idx = sizeof(struct udphdr) <<
4123					E1000_ADVTXD_L4LEN_SHIFT;
4124			break;
4125		default:
4126			if (unlikely(net_ratelimit())) {
4127				dev_warn(tx_ring->dev,
4128				 "partial checksum but l4 proto=%x!\n",
4129				 l4_hdr);
4130			}
4131			break;
4132		}
4133
4134		/* update TX checksum flag */
4135		first->tx_flags |= IGB_TX_FLAGS_CSUM;
4136	}
4137
4138	vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
4139	vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
4140
4141	igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
4142}
4143
4144static __le32 igb_tx_cmd_type(u32 tx_flags)
4145{
4146	/* set type for advanced descriptor with frame checksum insertion */
4147	__le32 cmd_type = cpu_to_le32(E1000_ADVTXD_DTYP_DATA |
4148				      E1000_ADVTXD_DCMD_IFCS |
4149				      E1000_ADVTXD_DCMD_DEXT);
4150
4151	/* set HW vlan bit if vlan is present */
4152	if (tx_flags & IGB_TX_FLAGS_VLAN)
4153		cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_VLE);
4154
4155	/* set timestamp bit if present */
4156	if (tx_flags & IGB_TX_FLAGS_TSTAMP)
4157		cmd_type |= cpu_to_le32(E1000_ADVTXD_MAC_TSTAMP);
4158
4159	/* set segmentation bits for TSO */
4160	if (tx_flags & IGB_TX_FLAGS_TSO)
4161		cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_TSE);
4162
4163	return cmd_type;
4164}
4165
4166static void igb_tx_olinfo_status(struct igb_ring *tx_ring,
4167				 union e1000_adv_tx_desc *tx_desc,
4168				 u32 tx_flags, unsigned int paylen)
4169{
4170	u32 olinfo_status = paylen << E1000_ADVTXD_PAYLEN_SHIFT;
4171
4172	/* 82575 requires a unique index per ring if any offload is enabled */
4173	if ((tx_flags & (IGB_TX_FLAGS_CSUM | IGB_TX_FLAGS_VLAN)) &&
4174	    test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
4175		olinfo_status |= tx_ring->reg_idx << 4;
4176
4177	/* insert L4 checksum */
4178	if (tx_flags & IGB_TX_FLAGS_CSUM) {
4179		olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4180
4181		/* insert IPv4 checksum */
4182		if (tx_flags & IGB_TX_FLAGS_IPV4)
4183			olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
4184	}
4185
4186	tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
4187}
4188
4189/*
4190 * The largest size we can write to the descriptor is 65535.  In order to
4191 * maintain a power of two alignment we have to limit ourselves to 32K.
4192 */
4193#define IGB_MAX_TXD_PWR	15
4194#define IGB_MAX_DATA_PER_TXD	(1<<IGB_MAX_TXD_PWR)
4195
4196static void igb_tx_map(struct igb_ring *tx_ring,
4197		       struct igb_tx_buffer *first,
4198		       const u8 hdr_len)
4199{
4200	struct sk_buff *skb = first->skb;
4201	struct igb_tx_buffer *tx_buffer_info;
4202	union e1000_adv_tx_desc *tx_desc;
4203	dma_addr_t dma;
4204	struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0];
4205	unsigned int data_len = skb->data_len;
4206	unsigned int size = skb_headlen(skb);
4207	unsigned int paylen = skb->len - hdr_len;
4208	__le32 cmd_type;
4209	u32 tx_flags = first->tx_flags;
4210	u16 i = tx_ring->next_to_use;
4211
4212	tx_desc = IGB_TX_DESC(tx_ring, i);
4213
4214	igb_tx_olinfo_status(tx_ring, tx_desc, tx_flags, paylen);
4215	cmd_type = igb_tx_cmd_type(tx_flags);
4216
4217	dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
4218	if (dma_mapping_error(tx_ring->dev, dma))
4219		goto dma_error;
4220
4221	/* record length, and DMA address */
4222	first->length = size;
4223	first->dma = dma;
4224	tx_desc->read.buffer_addr = cpu_to_le64(dma);
4225
4226	for (;;) {
4227		while (unlikely(size > IGB_MAX_DATA_PER_TXD)) {
4228			tx_desc->read.cmd_type_len =
4229				cmd_type | cpu_to_le32(IGB_MAX_DATA_PER_TXD);
4230
4231			i++;
4232			tx_desc++;
4233			if (i == tx_ring->count) {
4234				tx_desc = IGB_TX_DESC(tx_ring, 0);
4235				i = 0;
4236			}
4237
4238			dma += IGB_MAX_DATA_PER_TXD;
4239			size -= IGB_MAX_DATA_PER_TXD;
4240
4241			tx_desc->read.olinfo_status = 0;
4242			tx_desc->read.buffer_addr = cpu_to_le64(dma);
4243		}
4244
4245		if (likely(!data_len))
4246			break;
4247
4248		tx_desc->read.cmd_type_len = cmd_type | cpu_to_le32(size);
4249
4250		i++;
4251		tx_desc++;
4252		if (i == tx_ring->count) {
4253			tx_desc = IGB_TX_DESC(tx_ring, 0);
4254			i = 0;
4255		}
4256
4257		size = frag->size;
4258		data_len -= size;
4259
4260		dma = skb_frag_dma_map(tx_ring->dev, frag, 0,
4261				   size, DMA_TO_DEVICE);
4262		if (dma_mapping_error(tx_ring->dev, dma))
4263			goto dma_error;
4264
4265		tx_buffer_info = &tx_ring->tx_buffer_info[i];
4266		tx_buffer_info->length = size;
4267		tx_buffer_info->dma = dma;
4268
4269		tx_desc->read.olinfo_status = 0;
4270		tx_desc->read.buffer_addr = cpu_to_le64(dma);
4271
4272		frag++;
4273	}
4274
4275	/* write last descriptor with RS and EOP bits */
4276	cmd_type |= cpu_to_le32(size) | cpu_to_le32(IGB_TXD_DCMD);
4277	tx_desc->read.cmd_type_len = cmd_type;
4278
4279	/* set the timestamp */
4280	first->time_stamp = jiffies;
4281
4282	/*
4283	 * Force memory writes to complete before letting h/w know there
4284	 * are new descriptors to fetch.  (Only applicable for weak-ordered
4285	 * memory model archs, such as IA-64).
4286	 *
4287	 * We also need this memory barrier to make certain all of the
4288	 * status bits have been updated before next_to_watch is written.
4289	 */
4290	wmb();
4291
4292	/* set next_to_watch value indicating a packet is present */
4293	first->next_to_watch = tx_desc;
4294
4295	i++;
4296	if (i == tx_ring->count)
4297		i = 0;
4298
4299	tx_ring->next_to_use = i;
4300
4301	writel(i, tx_ring->tail);
4302
4303	/* we need this if more than one processor can write to our tail
4304	 * at a time, it syncronizes IO on IA64/Altix systems */
4305	mmiowb();
4306
4307	return;
4308
4309dma_error:
4310	dev_err(tx_ring->dev, "TX DMA map failed\n");
4311
4312	/* clear dma mappings for failed tx_buffer_info map */
4313	for (;;) {
4314		tx_buffer_info = &tx_ring->tx_buffer_info[i];
4315		igb_unmap_and_free_tx_resource(tx_ring, tx_buffer_info);
4316		if (tx_buffer_info == first)
4317			break;
4318		if (i == 0)
4319			i = tx_ring->count;
4320		i--;
4321	}
4322
4323	tx_ring->next_to_use = i;
4324}
4325
4326static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
4327{
4328	struct net_device *netdev = tx_ring->netdev;
4329
4330	netif_stop_subqueue(netdev, tx_ring->queue_index);
4331
4332	/* Herbert's original patch had:
4333	 *  smp_mb__after_netif_stop_queue();
4334	 * but since that doesn't exist yet, just open code it. */
4335	smp_mb();
4336
4337	/* We need to check again in a case another CPU has just
4338	 * made room available. */
4339	if (igb_desc_unused(tx_ring) < size)
4340		return -EBUSY;
4341
4342	/* A reprieve! */
4343	netif_wake_subqueue(netdev, tx_ring->queue_index);
4344
4345	u64_stats_update_begin(&tx_ring->tx_syncp2);
4346	tx_ring->tx_stats.restart_queue2++;
4347	u64_stats_update_end(&tx_ring->tx_syncp2);
4348
4349	return 0;
4350}
4351
4352static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
4353{
4354	if (igb_desc_unused(tx_ring) >= size)
4355		return 0;
4356	return __igb_maybe_stop_tx(tx_ring, size);
4357}
4358
4359netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb,
4360				struct igb_ring *tx_ring)
4361{
4362	struct igb_tx_buffer *first;
4363	int tso;
4364	u32 tx_flags = 0;
4365	__be16 protocol = vlan_get_protocol(skb);
4366	u8 hdr_len = 0;
4367
4368	/* need: 1 descriptor per page,
4369	 *       + 2 desc gap to keep tail from touching head,
4370	 *       + 1 desc for skb->data,
4371	 *       + 1 desc for context descriptor,
4372	 * otherwise try next time */
4373	if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
4374		/* this is a hard error */
4375		return NETDEV_TX_BUSY;
4376	}
4377
4378	/* record the location of the first descriptor for this packet */
4379	first = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
4380	first->skb = skb;
4381	first->bytecount = skb->len;
4382	first->gso_segs = 1;
4383
4384	if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
4385		skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
4386		tx_flags |= IGB_TX_FLAGS_TSTAMP;
4387	}
4388
4389	if (vlan_tx_tag_present(skb)) {
4390		tx_flags |= IGB_TX_FLAGS_VLAN;
4391		tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
4392	}
4393
4394	/* record initial flags and protocol */
4395	first->tx_flags = tx_flags;
4396	first->protocol = protocol;
4397
4398	tso = igb_tso(tx_ring, first, &hdr_len);
4399	if (tso < 0)
4400		goto out_drop;
4401	else if (!tso)
4402		igb_tx_csum(tx_ring, first);
4403
4404	igb_tx_map(tx_ring, first, hdr_len);
4405
4406	/* Make sure there is space in the ring for the next send. */
4407	igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
4408
4409	return NETDEV_TX_OK;
4410
4411out_drop:
4412	igb_unmap_and_free_tx_resource(tx_ring, first);
4413
4414	return NETDEV_TX_OK;
4415}
4416
4417static inline struct igb_ring *igb_tx_queue_mapping(struct igb_adapter *adapter,
4418						    struct sk_buff *skb)
4419{
4420	unsigned int r_idx = skb->queue_mapping;
4421
4422	if (r_idx >= adapter->num_tx_queues)
4423		r_idx = r_idx % adapter->num_tx_queues;
4424
4425	return adapter->tx_ring[r_idx];
4426}
4427
4428static netdev_tx_t igb_xmit_frame(struct sk_buff *skb,
4429				  struct net_device *netdev)
4430{
4431	struct igb_adapter *adapter = netdev_priv(netdev);
4432
4433	if (test_bit(__IGB_DOWN, &adapter->state)) {
4434		dev_kfree_skb_any(skb);
4435		return NETDEV_TX_OK;
4436	}
4437
4438	if (skb->len <= 0) {
4439		dev_kfree_skb_any(skb);
4440		return NETDEV_TX_OK;
4441	}
4442
4443	/*
4444	 * The minimum packet size with TCTL.PSP set is 17 so pad the skb
4445	 * in order to meet this minimum size requirement.
4446	 */
4447	if (skb->len < 17) {
4448		if (skb_padto(skb, 17))
4449			return NETDEV_TX_OK;
4450		skb->len = 17;
4451	}
4452
4453	return igb_xmit_frame_ring(skb, igb_tx_queue_mapping(adapter, skb));
4454}
4455
4456/**
4457 * igb_tx_timeout - Respond to a Tx Hang
4458 * @netdev: network interface device structure
4459 **/
4460static void igb_tx_timeout(struct net_device *netdev)
4461{
4462	struct igb_adapter *adapter = netdev_priv(netdev);
4463	struct e1000_hw *hw = &adapter->hw;
4464
4465	/* Do the reset outside of interrupt context */
4466	adapter->tx_timeout_count++;
4467
4468	if (hw->mac.type == e1000_82580)
4469		hw->dev_spec._82575.global_device_reset = true;
4470
4471	schedule_work(&adapter->reset_task);
4472	wr32(E1000_EICS,
4473	     (adapter->eims_enable_mask & ~adapter->eims_other));
4474}
4475
4476static void igb_reset_task(struct work_struct *work)
4477{
4478	struct igb_adapter *adapter;
4479	adapter = container_of(work, struct igb_adapter, reset_task);
4480
4481	igb_dump(adapter);
4482	netdev_err(adapter->netdev, "Reset adapter\n");
4483	igb_reinit_locked(adapter);
4484}
4485
4486/**
4487 * igb_get_stats64 - Get System Network Statistics
4488 * @netdev: network interface device structure
4489 * @stats: rtnl_link_stats64 pointer
4490 *
4491 **/
4492static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *netdev,
4493						 struct rtnl_link_stats64 *stats)
4494{
4495	struct igb_adapter *adapter = netdev_priv(netdev);
4496
4497	spin_lock(&adapter->stats64_lock);
4498	igb_update_stats(adapter, &adapter->stats64);
4499	memcpy(stats, &adapter->stats64, sizeof(*stats));
4500	spin_unlock(&adapter->stats64_lock);
4501
4502	return stats;
4503}
4504
4505/**
4506 * igb_change_mtu - Change the Maximum Transfer Unit
4507 * @netdev: network interface device structure
4508 * @new_mtu: new value for maximum frame size
4509 *
4510 * Returns 0 on success, negative on failure
4511 **/
4512static int igb_change_mtu(struct net_device *netdev, int new_mtu)
4513{
4514	struct igb_adapter *adapter = netdev_priv(netdev);
4515	struct pci_dev *pdev = adapter->pdev;
4516	int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
4517
4518	if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
4519		dev_err(&pdev->dev, "Invalid MTU setting\n");
4520		return -EINVAL;
4521	}
4522
4523#define MAX_STD_JUMBO_FRAME_SIZE 9238
4524	if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
4525		dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
4526		return -EINVAL;
4527	}
4528
4529	while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
4530		msleep(1);
4531
4532	/* igb_down has a dependency on max_frame_size */
4533	adapter->max_frame_size = max_frame;
4534
4535	if (netif_running(netdev))
4536		igb_down(adapter);
4537
4538	dev_info(&pdev->dev, "changing MTU from %d to %d\n",
4539		 netdev->mtu, new_mtu);
4540	netdev->mtu = new_mtu;
4541
4542	if (netif_running(netdev))
4543		igb_up(adapter);
4544	else
4545		igb_reset(adapter);
4546
4547	clear_bit(__IGB_RESETTING, &adapter->state);
4548
4549	return 0;
4550}
4551
4552/**
4553 * igb_update_stats - Update the board statistics counters
4554 * @adapter: board private structure
4555 **/
4556
4557void igb_update_stats(struct igb_adapter *adapter,
4558		      struct rtnl_link_stats64 *net_stats)
4559{
4560	struct e1000_hw *hw = &adapter->hw;
4561	struct pci_dev *pdev = adapter->pdev;
4562	u32 reg, mpc;
4563	u16 phy_tmp;
4564	int i;
4565	u64 bytes, packets;
4566	unsigned int start;
4567	u64 _bytes, _packets;
4568
4569#define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
4570
4571	/*
4572	 * Prevent stats update while adapter is being reset, or if the pci
4573	 * connection is down.
4574	 */
4575	if (adapter->link_speed == 0)
4576		return;
4577	if (pci_channel_offline(pdev))
4578		return;
4579
4580	bytes = 0;
4581	packets = 0;
4582	for (i = 0; i < adapter->num_rx_queues; i++) {
4583		u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
4584		struct igb_ring *ring = adapter->rx_ring[i];
4585
4586		ring->rx_stats.drops += rqdpc_tmp;
4587		net_stats->rx_fifo_errors += rqdpc_tmp;
4588
4589		do {
4590			start = u64_stats_fetch_begin_bh(&ring->rx_syncp);
4591			_bytes = ring->rx_stats.bytes;
4592			_packets = ring->rx_stats.packets;
4593		} while (u64_stats_fetch_retry_bh(&ring->rx_syncp, start));
4594		bytes += _bytes;
4595		packets += _packets;
4596	}
4597
4598	net_stats->rx_bytes = bytes;
4599	net_stats->rx_packets = packets;
4600
4601	bytes = 0;
4602	packets = 0;
4603	for (i = 0; i < adapter->num_tx_queues; i++) {
4604		struct igb_ring *ring = adapter->tx_ring[i];
4605		do {
4606			start = u64_stats_fetch_begin_bh(&ring->tx_syncp);
4607			_bytes = ring->tx_stats.bytes;
4608			_packets = ring->tx_stats.packets;
4609		} while (u64_stats_fetch_retry_bh(&ring->tx_syncp, start));
4610		bytes += _bytes;
4611		packets += _packets;
4612	}
4613	net_stats->tx_bytes = bytes;
4614	net_stats->tx_packets = packets;
4615
4616	/* read stats registers */
4617	adapter->stats.crcerrs += rd32(E1000_CRCERRS);
4618	adapter->stats.gprc += rd32(E1000_GPRC);
4619	adapter->stats.gorc += rd32(E1000_GORCL);
4620	rd32(E1000_GORCH); /* clear GORCL */
4621	adapter->stats.bprc += rd32(E1000_BPRC);
4622	adapter->stats.mprc += rd32(E1000_MPRC);
4623	adapter->stats.roc += rd32(E1000_ROC);
4624
4625	adapter->stats.prc64 += rd32(E1000_PRC64);
4626	adapter->stats.prc127 += rd32(E1000_PRC127);
4627	adapter->stats.prc255 += rd32(E1000_PRC255);
4628	adapter->stats.prc511 += rd32(E1000_PRC511);
4629	adapter->stats.prc1023 += rd32(E1000_PRC1023);
4630	adapter->stats.prc1522 += rd32(E1000_PRC1522);
4631	adapter->stats.symerrs += rd32(E1000_SYMERRS);
4632	adapter->stats.sec += rd32(E1000_SEC);
4633
4634	mpc = rd32(E1000_MPC);
4635	adapter->stats.mpc += mpc;
4636	net_stats->rx_fifo_errors += mpc;
4637	adapter->stats.scc += rd32(E1000_SCC);
4638	adapter->stats.ecol += rd32(E1000_ECOL);
4639	adapter->stats.mcc += rd32(E1000_MCC);
4640	adapter->stats.latecol += rd32(E1000_LATECOL);
4641	adapter->stats.dc += rd32(E1000_DC);
4642	adapter->stats.rlec += rd32(E1000_RLEC);
4643	adapter->stats.xonrxc += rd32(E1000_XONRXC);
4644	adapter->stats.xontxc += rd32(E1000_XONTXC);
4645	adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
4646	adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
4647	adapter->stats.fcruc += rd32(E1000_FCRUC);
4648	adapter->stats.gptc += rd32(E1000_GPTC);
4649	adapter->stats.gotc += rd32(E1000_GOTCL);
4650	rd32(E1000_GOTCH); /* clear GOTCL */
4651	adapter->stats.rnbc += rd32(E1000_RNBC);
4652	adapter->stats.ruc += rd32(E1000_RUC);
4653	adapter->stats.rfc += rd32(E1000_RFC);
4654	adapter->stats.rjc += rd32(E1000_RJC);
4655	adapter->stats.tor += rd32(E1000_TORH);
4656	adapter->stats.tot += rd32(E1000_TOTH);
4657	adapter->stats.tpr += rd32(E1000_TPR);
4658
4659	adapter->stats.ptc64 += rd32(E1000_PTC64);
4660	adapter->stats.ptc127 += rd32(E1000_PTC127);
4661	adapter->stats.ptc255 += rd32(E1000_PTC255);
4662	adapter->stats.ptc511 += rd32(E1000_PTC511);
4663	adapter->stats.ptc1023 += rd32(E1000_PTC1023);
4664	adapter->stats.ptc1522 += rd32(E1000_PTC1522);
4665
4666	adapter->stats.mptc += rd32(E1000_MPTC);
4667	adapter->stats.bptc += rd32(E1000_BPTC);
4668
4669	adapter->stats.tpt += rd32(E1000_TPT);
4670	adapter->stats.colc += rd32(E1000_COLC);
4671
4672	adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
4673	/* read internal phy specific stats */
4674	reg = rd32(E1000_CTRL_EXT);
4675	if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
4676		adapter->stats.rxerrc += rd32(E1000_RXERRC);
4677		adapter->stats.tncrs += rd32(E1000_TNCRS);
4678	}
4679
4680	adapter->stats.tsctc += rd32(E1000_TSCTC);
4681	adapter->stats.tsctfc += rd32(E1000_TSCTFC);
4682
4683	adapter->stats.iac += rd32(E1000_IAC);
4684	adapter->stats.icrxoc += rd32(E1000_ICRXOC);
4685	adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
4686	adapter->stats.icrxatc += rd32(E1000_ICRXATC);
4687	adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
4688	adapter->stats.ictxatc += rd32(E1000_ICTXATC);
4689	adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
4690	adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
4691	adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
4692
4693	/* Fill out the OS statistics structure */
4694	net_stats->multicast = adapter->stats.mprc;
4695	net_stats->collisions = adapter->stats.colc;
4696
4697	/* Rx Errors */
4698
4699	/* RLEC on some newer hardware can be incorrect so build
4700	 * our own version based on RUC and ROC */
4701	net_stats->rx_errors = adapter->stats.rxerrc +
4702		adapter->stats.crcerrs + adapter->stats.algnerrc +
4703		adapter->stats.ruc + adapter->stats.roc +
4704		adapter->stats.cexterr;
4705	net_stats->rx_length_errors = adapter->stats.ruc +
4706				      adapter->stats.roc;
4707	net_stats->rx_crc_errors = adapter->stats.crcerrs;
4708	net_stats->rx_frame_errors = adapter->stats.algnerrc;
4709	net_stats->rx_missed_errors = adapter->stats.mpc;
4710
4711	/* Tx Errors */
4712	net_stats->tx_errors = adapter->stats.ecol +
4713			       adapter->stats.latecol;
4714	net_stats->tx_aborted_errors = adapter->stats.ecol;
4715	net_stats->tx_window_errors = adapter->stats.latecol;
4716	net_stats->tx_carrier_errors = adapter->stats.tncrs;
4717
4718	/* Tx Dropped needs to be maintained elsewhere */
4719
4720	/* Phy Stats */
4721	if (hw->phy.media_type == e1000_media_type_copper) {
4722		if ((adapter->link_speed == SPEED_1000) &&
4723		   (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
4724			phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
4725			adapter->phy_stats.idle_errors += phy_tmp;
4726		}
4727	}
4728
4729	/* Management Stats */
4730	adapter->stats.mgptc += rd32(E1000_MGTPTC);
4731	adapter->stats.mgprc += rd32(E1000_MGTPRC);
4732	adapter->stats.mgpdc += rd32(E1000_MGTPDC);
4733
4734	/* OS2BMC Stats */
4735	reg = rd32(E1000_MANC);
4736	if (reg & E1000_MANC_EN_BMC2OS) {
4737		adapter->stats.o2bgptc += rd32(E1000_O2BGPTC);
4738		adapter->stats.o2bspc += rd32(E1000_O2BSPC);
4739		adapter->stats.b2ospc += rd32(E1000_B2OSPC);
4740		adapter->stats.b2ogprc += rd32(E1000_B2OGPRC);
4741	}
4742}
4743
4744static irqreturn_t igb_msix_other(int irq, void *data)
4745{
4746	struct igb_adapter *adapter = data;
4747	struct e1000_hw *hw = &adapter->hw;
4748	u32 icr = rd32(E1000_ICR);
4749	/* reading ICR causes bit 31 of EICR to be cleared */
4750
4751	if (icr & E1000_ICR_DRSTA)
4752		schedule_work(&adapter->reset_task);
4753
4754	if (icr & E1000_ICR_DOUTSYNC) {
4755		/* HW is reporting DMA is out of sync */
4756		adapter->stats.doosync++;
4757		/* The DMA Out of Sync is also indication of a spoof event
4758		 * in IOV mode. Check the Wrong VM Behavior register to
4759		 * see if it is really a spoof event. */
4760		igb_check_wvbr(adapter);
4761	}
4762
4763	/* Check for a mailbox event */
4764	if (icr & E1000_ICR_VMMB)
4765		igb_msg_task(adapter);
4766
4767	if (icr & E1000_ICR_LSC) {
4768		hw->mac.get_link_status = 1;
4769		/* guard against interrupt when we're going down */
4770		if (!test_bit(__IGB_DOWN, &adapter->state))
4771			mod_timer(&adapter->watchdog_timer, jiffies + 1);
4772	}
4773
4774	if (adapter->vfs_allocated_count)
4775		wr32(E1000_IMS, E1000_IMS_LSC |
4776				E1000_IMS_VMMB |
4777				E1000_IMS_DOUTSYNC);
4778	else
4779		wr32(E1000_IMS, E1000_IMS_LSC | E1000_IMS_DOUTSYNC);
4780	wr32(E1000_EIMS, adapter->eims_other);
4781
4782	return IRQ_HANDLED;
4783}
4784
4785static void igb_write_itr(struct igb_q_vector *q_vector)
4786{
4787	struct igb_adapter *adapter = q_vector->adapter;
4788	u32 itr_val = q_vector->itr_val & 0x7FFC;
4789
4790	if (!q_vector->set_itr)
4791		return;
4792
4793	if (!itr_val)
4794		itr_val = 0x4;
4795
4796	if (adapter->hw.mac.type == e1000_82575)
4797		itr_val |= itr_val << 16;
4798	else
4799		itr_val |= E1000_EITR_CNT_IGNR;
4800
4801	writel(itr_val, q_vector->itr_register);
4802	q_vector->set_itr = 0;
4803}
4804
4805static irqreturn_t igb_msix_ring(int irq, void *data)
4806{
4807	struct igb_q_vector *q_vector = data;
4808
4809	/* Write the ITR value calculated from the previous interrupt. */
4810	igb_write_itr(q_vector);
4811
4812	napi_schedule(&q_vector->napi);
4813
4814	return IRQ_HANDLED;
4815}
4816
4817#ifdef CONFIG_IGB_DCA
4818static void igb_update_dca(struct igb_q_vector *q_vector)
4819{
4820	struct igb_adapter *adapter = q_vector->adapter;
4821	struct e1000_hw *hw = &adapter->hw;
4822	int cpu = get_cpu();
4823
4824	if (q_vector->cpu == cpu)
4825		goto out_no_update;
4826
4827	if (q_vector->tx.ring) {
4828		int q = q_vector->tx.ring->reg_idx;
4829		u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4830		if (hw->mac.type == e1000_82575) {
4831			dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4832			dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4833		} else {
4834			dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4835			dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4836			              E1000_DCA_TXCTRL_CPUID_SHIFT;
4837		}
4838		dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4839		wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4840	}
4841	if (q_vector->rx.ring) {
4842		int q = q_vector->rx.ring->reg_idx;
4843		u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4844		if (hw->mac.type == e1000_82575) {
4845			dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4846			dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4847		} else {
4848			dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4849			dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4850			              E1000_DCA_RXCTRL_CPUID_SHIFT;
4851		}
4852		dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4853		dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4854		dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4855		wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4856	}
4857	q_vector->cpu = cpu;
4858out_no_update:
4859	put_cpu();
4860}
4861
4862static void igb_setup_dca(struct igb_adapter *adapter)
4863{
4864	struct e1000_hw *hw = &adapter->hw;
4865	int i;
4866
4867	if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4868		return;
4869
4870	/* Always use CB2 mode, difference is masked in the CB driver. */
4871	wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4872
4873	for (i = 0; i < adapter->num_q_vectors; i++) {
4874		adapter->q_vector[i]->cpu = -1;
4875		igb_update_dca(adapter->q_vector[i]);
4876	}
4877}
4878
4879static int __igb_notify_dca(struct device *dev, void *data)
4880{
4881	struct net_device *netdev = dev_get_drvdata(dev);
4882	struct igb_adapter *adapter = netdev_priv(netdev);
4883	struct pci_dev *pdev = adapter->pdev;
4884	struct e1000_hw *hw = &adapter->hw;
4885	unsigned long event = *(unsigned long *)data;
4886
4887	switch (event) {
4888	case DCA_PROVIDER_ADD:
4889		/* if already enabled, don't do it again */
4890		if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4891			break;
4892		if (dca_add_requester(dev) == 0) {
4893			adapter->flags |= IGB_FLAG_DCA_ENABLED;
4894			dev_info(&pdev->dev, "DCA enabled\n");
4895			igb_setup_dca(adapter);
4896			break;
4897		}
4898		/* Fall Through since DCA is disabled. */
4899	case DCA_PROVIDER_REMOVE:
4900		if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4901			/* without this a class_device is left
4902			 * hanging around in the sysfs model */
4903			dca_remove_requester(dev);
4904			dev_info(&pdev->dev, "DCA disabled\n");
4905			adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4906			wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4907		}
4908		break;
4909	}
4910
4911	return 0;
4912}
4913
4914static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4915                          void *p)
4916{
4917	int ret_val;
4918
4919	ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4920	                                 __igb_notify_dca);
4921
4922	return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4923}
4924#endif /* CONFIG_IGB_DCA */
4925
4926static void igb_ping_all_vfs(struct igb_adapter *adapter)
4927{
4928	struct e1000_hw *hw = &adapter->hw;
4929	u32 ping;
4930	int i;
4931
4932	for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
4933		ping = E1000_PF_CONTROL_MSG;
4934		if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
4935			ping |= E1000_VT_MSGTYPE_CTS;
4936		igb_write_mbx(hw, &ping, 1, i);
4937	}
4938}
4939
4940static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4941{
4942	struct e1000_hw *hw = &adapter->hw;
4943	u32 vmolr = rd32(E1000_VMOLR(vf));
4944	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4945
4946	vf_data->flags &= ~(IGB_VF_FLAG_UNI_PROMISC |
4947	                    IGB_VF_FLAG_MULTI_PROMISC);
4948	vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4949
4950	if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
4951		vmolr |= E1000_VMOLR_MPME;
4952		vf_data->flags |= IGB_VF_FLAG_MULTI_PROMISC;
4953		*msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
4954	} else {
4955		/*
4956		 * if we have hashes and we are clearing a multicast promisc
4957		 * flag we need to write the hashes to the MTA as this step
4958		 * was previously skipped
4959		 */
4960		if (vf_data->num_vf_mc_hashes > 30) {
4961			vmolr |= E1000_VMOLR_MPME;
4962		} else if (vf_data->num_vf_mc_hashes) {
4963			int j;
4964			vmolr |= E1000_VMOLR_ROMPE;
4965			for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4966				igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4967		}
4968	}
4969
4970	wr32(E1000_VMOLR(vf), vmolr);
4971
4972	/* there are flags left unprocessed, likely not supported */
4973	if (*msgbuf & E1000_VT_MSGINFO_MASK)
4974		return -EINVAL;
4975
4976	return 0;
4977
4978}
4979
4980static int igb_set_vf_multicasts(struct igb_adapter *adapter,
4981				  u32 *msgbuf, u32 vf)
4982{
4983	int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4984	u16 *hash_list = (u16 *)&msgbuf[1];
4985	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4986	int i;
4987
4988	/* salt away the number of multicast addresses assigned
4989	 * to this VF for later use to restore when the PF multi cast
4990	 * list changes
4991	 */
4992	vf_data->num_vf_mc_hashes = n;
4993
4994	/* only up to 30 hash values supported */
4995	if (n > 30)
4996		n = 30;
4997
4998	/* store the hashes for later use */
4999	for (i = 0; i < n; i++)
5000		vf_data->vf_mc_hashes[i] = hash_list[i];
5001
5002	/* Flush and reset the mta with the new values */
5003	igb_set_rx_mode(adapter->netdev);
5004
5005	return 0;
5006}
5007
5008static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
5009{
5010	struct e1000_hw *hw = &adapter->hw;
5011	struct vf_data_storage *vf_data;
5012	int i, j;
5013
5014	for (i = 0; i < adapter->vfs_allocated_count; i++) {
5015		u32 vmolr = rd32(E1000_VMOLR(i));
5016		vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
5017
5018		vf_data = &adapter->vf_data[i];
5019
5020		if ((vf_data->num_vf_mc_hashes > 30) ||
5021		    (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
5022			vmolr |= E1000_VMOLR_MPME;
5023		} else if (vf_data->num_vf_mc_hashes) {
5024			vmolr |= E1000_VMOLR_ROMPE;
5025			for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
5026				igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
5027		}
5028		wr32(E1000_VMOLR(i), vmolr);
5029	}
5030}
5031
5032static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
5033{
5034	struct e1000_hw *hw = &adapter->hw;
5035	u32 pool_mask, reg, vid;
5036	int i;
5037
5038	pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5039
5040	/* Find the vlan filter for this id */
5041	for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5042		reg = rd32(E1000_VLVF(i));
5043
5044		/* remove the vf from the pool */
5045		reg &= ~pool_mask;
5046
5047		/* if pool is empty then remove entry from vfta */
5048		if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
5049		    (reg & E1000_VLVF_VLANID_ENABLE)) {
5050			reg = 0;
5051			vid = reg & E1000_VLVF_VLANID_MASK;
5052			igb_vfta_set(hw, vid, false);
5053		}
5054
5055		wr32(E1000_VLVF(i), reg);
5056	}
5057
5058	adapter->vf_data[vf].vlans_enabled = 0;
5059}
5060
5061static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
5062{
5063	struct e1000_hw *hw = &adapter->hw;
5064	u32 reg, i;
5065
5066	/* The vlvf table only exists on 82576 hardware and newer */
5067	if (hw->mac.type < e1000_82576)
5068		return -1;
5069
5070	/* we only need to do this if VMDq is enabled */
5071	if (!adapter->vfs_allocated_count)
5072		return -1;
5073
5074	/* Find the vlan filter for this id */
5075	for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5076		reg = rd32(E1000_VLVF(i));
5077		if ((reg & E1000_VLVF_VLANID_ENABLE) &&
5078		    vid == (reg & E1000_VLVF_VLANID_MASK))
5079			break;
5080	}
5081
5082	if (add) {
5083		if (i == E1000_VLVF_ARRAY_SIZE) {
5084			/* Did not find a matching VLAN ID entry that was
5085			 * enabled.  Search for a free filter entry, i.e.
5086			 * one without the enable bit set
5087			 */
5088			for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5089				reg = rd32(E1000_VLVF(i));
5090				if (!(reg & E1000_VLVF_VLANID_ENABLE))
5091					break;
5092			}
5093		}
5094		if (i < E1000_VLVF_ARRAY_SIZE) {
5095			/* Found an enabled/available entry */
5096			reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5097
5098			/* if !enabled we need to set this up in vfta */
5099			if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
5100				/* add VID to filter table */
5101				igb_vfta_set(hw, vid, true);
5102				reg |= E1000_VLVF_VLANID_ENABLE;
5103			}
5104			reg &= ~E1000_VLVF_VLANID_MASK;
5105			reg |= vid;
5106			wr32(E1000_VLVF(i), reg);
5107
5108			/* do not modify RLPML for PF devices */
5109			if (vf >= adapter->vfs_allocated_count)
5110				return 0;
5111
5112			if (!adapter->vf_data[vf].vlans_enabled) {
5113				u32 size;
5114				reg = rd32(E1000_VMOLR(vf));
5115				size = reg & E1000_VMOLR_RLPML_MASK;
5116				size += 4;
5117				reg &= ~E1000_VMOLR_RLPML_MASK;
5118				reg |= size;
5119				wr32(E1000_VMOLR(vf), reg);
5120			}
5121
5122			adapter->vf_data[vf].vlans_enabled++;
5123			return 0;
5124		}
5125	} else {
5126		if (i < E1000_VLVF_ARRAY_SIZE) {
5127			/* remove vf from the pool */
5128			reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
5129			/* if pool is empty then remove entry from vfta */
5130			if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
5131				reg = 0;
5132				igb_vfta_set(hw, vid, false);
5133			}
5134			wr32(E1000_VLVF(i), reg);
5135
5136			/* do not modify RLPML for PF devices */
5137			if (vf >= adapter->vfs_allocated_count)
5138				return 0;
5139
5140			adapter->vf_data[vf].vlans_enabled--;
5141			if (!adapter->vf_data[vf].vlans_enabled) {
5142				u32 size;
5143				reg = rd32(E1000_VMOLR(vf));
5144				size = reg & E1000_VMOLR_RLPML_MASK;
5145				size -= 4;
5146				reg &= ~E1000_VMOLR_RLPML_MASK;
5147				reg |= size;
5148				wr32(E1000_VMOLR(vf), reg);
5149			}
5150		}
5151	}
5152	return 0;
5153}
5154
5155static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
5156{
5157	struct e1000_hw *hw = &adapter->hw;
5158
5159	if (vid)
5160		wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
5161	else
5162		wr32(E1000_VMVIR(vf), 0);
5163}
5164
5165static int igb_ndo_set_vf_vlan(struct net_device *netdev,
5166			       int vf, u16 vlan, u8 qos)
5167{
5168	int err = 0;
5169	struct igb_adapter *adapter = netdev_priv(netdev);
5170
5171	if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
5172		return -EINVAL;
5173	if (vlan || qos) {
5174		err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
5175		if (err)
5176			goto out;
5177		igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
5178		igb_set_vmolr(adapter, vf, !vlan);
5179		adapter->vf_data[vf].pf_vlan = vlan;
5180		adapter->vf_data[vf].pf_qos = qos;
5181		dev_info(&adapter->pdev->dev,
5182			 "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
5183		if (test_bit(__IGB_DOWN, &adapter->state)) {
5184			dev_warn(&adapter->pdev->dev,
5185				 "The VF VLAN has been set,"
5186				 " but the PF device is not up.\n");
5187			dev_warn(&adapter->pdev->dev,
5188				 "Bring the PF device up before"
5189				 " attempting to use the VF device.\n");
5190		}
5191	} else {
5192		igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
5193				   false, vf);
5194		igb_set_vmvir(adapter, vlan, vf);
5195		igb_set_vmolr(adapter, vf, true);
5196		adapter->vf_data[vf].pf_vlan = 0;
5197		adapter->vf_data[vf].pf_qos = 0;
5198       }
5199out:
5200       return err;
5201}
5202
5203static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5204{
5205	int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5206	int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
5207
5208	return igb_vlvf_set(adapter, vid, add, vf);
5209}
5210
5211static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
5212{
5213	/* clear flags - except flag that indicates PF has set the MAC */
5214	adapter->vf_data[vf].flags &= IGB_VF_FLAG_PF_SET_MAC;
5215	adapter->vf_data[vf].last_nack = jiffies;
5216
5217	/* reset offloads to defaults */
5218	igb_set_vmolr(adapter, vf, true);
5219
5220	/* reset vlans for device */
5221	igb_clear_vf_vfta(adapter, vf);
5222	if (adapter->vf_data[vf].pf_vlan)
5223		igb_ndo_set_vf_vlan(adapter->netdev, vf,
5224				    adapter->vf_data[vf].pf_vlan,
5225				    adapter->vf_data[vf].pf_qos);
5226	else
5227		igb_clear_vf_vfta(adapter, vf);
5228
5229	/* reset multicast table array for vf */
5230	adapter->vf_data[vf].num_vf_mc_hashes = 0;
5231
5232	/* Flush and reset the mta with the new values */
5233	igb_set_rx_mode(adapter->netdev);
5234}
5235
5236static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
5237{
5238	unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5239
5240	/* generate a new mac address as we were hotplug removed/added */
5241	if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
5242		random_ether_addr(vf_mac);
5243
5244	/* process remaining reset events */
5245	igb_vf_reset(adapter, vf);
5246}
5247
5248static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
5249{
5250	struct e1000_hw *hw = &adapter->hw;
5251	unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5252	int rar_entry = hw->mac.rar_entry_count - (vf + 1);
5253	u32 reg, msgbuf[3];
5254	u8 *addr = (u8 *)(&msgbuf[1]);
5255
5256	/* process all the same items cleared in a function level reset */
5257	igb_vf_reset(adapter, vf);
5258
5259	/* set vf mac address */
5260	igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
5261
5262	/* enable transmit and receive for vf */
5263	reg = rd32(E1000_VFTE);
5264	wr32(E1000_VFTE, reg | (1 << vf));
5265	reg = rd32(E1000_VFRE);
5266	wr32(E1000_VFRE, reg | (1 << vf));
5267
5268	adapter->vf_data[vf].flags |= IGB_VF_FLAG_CTS;
5269
5270	/* reply to reset with ack and vf mac address */
5271	msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
5272	memcpy(addr, vf_mac, 6);
5273	igb_write_mbx(hw, msgbuf, 3, vf);
5274}
5275
5276static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
5277{
5278	/*
5279	 * The VF MAC Address is stored in a packed array of bytes
5280	 * starting at the second 32 bit word of the msg array
5281	 */
5282	unsigned char *addr = (char *)&msg[1];
5283	int err = -1;
5284
5285	if (is_valid_ether_addr(addr))
5286		err = igb_set_vf_mac(adapter, vf, addr);
5287
5288	return err;
5289}
5290
5291static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
5292{
5293	struct e1000_hw *hw = &adapter->hw;
5294	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5295	u32 msg = E1000_VT_MSGTYPE_NACK;
5296
5297	/* if device isn't clear to send it shouldn't be reading either */
5298	if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
5299	    time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
5300		igb_write_mbx(hw, &msg, 1, vf);
5301		vf_data->last_nack = jiffies;
5302	}
5303}
5304
5305static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
5306{
5307	struct pci_dev *pdev = adapter->pdev;
5308	u32 msgbuf[E1000_VFMAILBOX_SIZE];
5309	struct e1000_hw *hw = &adapter->hw;
5310	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5311	s32 retval;
5312
5313	retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
5314
5315	if (retval) {
5316		/* if receive failed revoke VF CTS stats and restart init */
5317		dev_err(&pdev->dev, "Error receiving message from VF\n");
5318		vf_data->flags &= ~IGB_VF_FLAG_CTS;
5319		if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5320			return;
5321		goto out;
5322	}
5323
5324	/* this is a message we already processed, do nothing */
5325	if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
5326		return;
5327
5328	/*
5329	 * until the vf completes a reset it should not be
5330	 * allowed to start any configuration.
5331	 */
5332
5333	if (msgbuf[0] == E1000_VF_RESET) {
5334		igb_vf_reset_msg(adapter, vf);
5335		return;
5336	}
5337
5338	if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
5339		if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5340			return;
5341		retval = -1;
5342		goto out;
5343	}
5344
5345	switch ((msgbuf[0] & 0xFFFF)) {
5346	case E1000_VF_SET_MAC_ADDR:
5347		retval = -EINVAL;
5348		if (!(vf_data->flags & IGB_VF_FLAG_PF_SET_MAC))
5349			retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
5350		else
5351			dev_warn(&pdev->dev,
5352				 "VF %d attempted to override administratively "
5353				 "set MAC address\nReload the VF driver to "
5354				 "resume operations\n", vf);
5355		break;
5356	case E1000_VF_SET_PROMISC:
5357		retval = igb_set_vf_promisc(adapter, msgbuf, vf);
5358		break;
5359	case E1000_VF_SET_MULTICAST:
5360		retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
5361		break;
5362	case E1000_VF_SET_LPE:
5363		retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
5364		break;
5365	case E1000_VF_SET_VLAN:
5366		retval = -1;
5367		if (vf_data->pf_vlan)
5368			dev_warn(&pdev->dev,
5369				 "VF %d attempted to override administratively "
5370				 "set VLAN tag\nReload the VF driver to "
5371				 "resume operations\n", vf);
5372		else
5373			retval = igb_set_vf_vlan(adapter, msgbuf, vf);
5374		break;
5375	default:
5376		dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
5377		retval = -1;
5378		break;
5379	}
5380
5381	msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
5382out:
5383	/* notify the VF of the results of what it sent us */
5384	if (retval)
5385		msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
5386	else
5387		msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
5388
5389	igb_write_mbx(hw, msgbuf, 1, vf);
5390}
5391
5392static void igb_msg_task(struct igb_adapter *adapter)
5393{
5394	struct e1000_hw *hw = &adapter->hw;
5395	u32 vf;
5396
5397	for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
5398		/* process any reset requests */
5399		if (!igb_check_for_rst(hw, vf))
5400			igb_vf_reset_event(adapter, vf);
5401
5402		/* process any messages pending */
5403		if (!igb_check_for_msg(hw, vf))
5404			igb_rcv_msg_from_vf(adapter, vf);
5405
5406		/* process any acks */
5407		if (!igb_check_for_ack(hw, vf))
5408			igb_rcv_ack_from_vf(adapter, vf);
5409	}
5410}
5411
5412/**
5413 *  igb_set_uta - Set unicast filter table address
5414 *  @adapter: board private structure
5415 *
5416 *  The unicast table address is a register array of 32-bit registers.
5417 *  The table is meant to be used in a way similar to how the MTA is used
5418 *  however due to certain limitations in the hardware it is necessary to
5419 *  set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscuous
5420 *  enable bit to allow vlan tag stripping when promiscuous mode is enabled
5421 **/
5422static void igb_set_uta(struct igb_adapter *adapter)
5423{
5424	struct e1000_hw *hw = &adapter->hw;
5425	int i;
5426
5427	/* The UTA table only exists on 82576 hardware and newer */
5428	if (hw->mac.type < e1000_82576)
5429		return;
5430
5431	/* we only need to do this if VMDq is enabled */
5432	if (!adapter->vfs_allocated_count)
5433		return;
5434
5435	for (i = 0; i < hw->mac.uta_reg_count; i++)
5436		array_wr32(E1000_UTA, i, ~0);
5437}
5438
5439/**
5440 * igb_intr_msi - Interrupt Handler
5441 * @irq: interrupt number
5442 * @data: pointer to a network interface device structure
5443 **/
5444static irqreturn_t igb_intr_msi(int irq, void *data)
5445{
5446	struct igb_adapter *adapter = data;
5447	struct igb_q_vector *q_vector = adapter->q_vector[0];
5448	struct e1000_hw *hw = &adapter->hw;
5449	/* read ICR disables interrupts using IAM */
5450	u32 icr = rd32(E1000_ICR);
5451
5452	igb_write_itr(q_vector);
5453
5454	if (icr & E1000_ICR_DRSTA)
5455		schedule_work(&adapter->reset_task);
5456
5457	if (icr & E1000_ICR_DOUTSYNC) {
5458		/* HW is reporting DMA is out of sync */
5459		adapter->stats.doosync++;
5460	}
5461
5462	if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5463		hw->mac.get_link_status = 1;
5464		if (!test_bit(__IGB_DOWN, &adapter->state))
5465			mod_timer(&adapter->watchdog_timer, jiffies + 1);
5466	}
5467
5468	napi_schedule(&q_vector->napi);
5469
5470	return IRQ_HANDLED;
5471}
5472
5473/**
5474 * igb_intr - Legacy Interrupt Handler
5475 * @irq: interrupt number
5476 * @data: pointer to a network interface device structure
5477 **/
5478static irqreturn_t igb_intr(int irq, void *data)
5479{
5480	struct igb_adapter *adapter = data;
5481	struct igb_q_vector *q_vector = adapter->q_vector[0];
5482	struct e1000_hw *hw = &adapter->hw;
5483	/* Interrupt Auto-Mask...upon reading ICR, interrupts are masked.  No
5484	 * need for the IMC write */
5485	u32 icr = rd32(E1000_ICR);
5486
5487	/* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
5488	 * not set, then the adapter didn't send an interrupt */
5489	if (!(icr & E1000_ICR_INT_ASSERTED))
5490		return IRQ_NONE;
5491
5492	igb_write_itr(q_vector);
5493
5494	if (icr & E1000_ICR_DRSTA)
5495		schedule_work(&adapter->reset_task);
5496
5497	if (icr & E1000_ICR_DOUTSYNC) {
5498		/* HW is reporting DMA is out of sync */
5499		adapter->stats.doosync++;
5500	}
5501
5502	if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5503		hw->mac.get_link_status = 1;
5504		/* guard against interrupt when we're going down */
5505		if (!test_bit(__IGB_DOWN, &adapter->state))
5506			mod_timer(&adapter->watchdog_timer, jiffies + 1);
5507	}
5508
5509	napi_schedule(&q_vector->napi);
5510
5511	return IRQ_HANDLED;
5512}
5513
5514void igb_ring_irq_enable(struct igb_q_vector *q_vector)
5515{
5516	struct igb_adapter *adapter = q_vector->adapter;
5517	struct e1000_hw *hw = &adapter->hw;
5518
5519	if ((q_vector->rx.ring && (adapter->rx_itr_setting & 3)) ||
5520	    (!q_vector->rx.ring && (adapter->tx_itr_setting & 3))) {
5521		if ((adapter->num_q_vectors == 1) && !adapter->vf_data)
5522			igb_set_itr(q_vector);
5523		else
5524			igb_update_ring_itr(q_vector);
5525	}
5526
5527	if (!test_bit(__IGB_DOWN, &adapter->state)) {
5528		if (adapter->msix_entries)
5529			wr32(E1000_EIMS, q_vector->eims_value);
5530		else
5531			igb_irq_enable(adapter);
5532	}
5533}
5534
5535/**
5536 * igb_poll - NAPI Rx polling callback
5537 * @napi: napi polling structure
5538 * @budget: count of how many packets we should handle
5539 **/
5540static int igb_poll(struct napi_struct *napi, int budget)
5541{
5542	struct igb_q_vector *q_vector = container_of(napi,
5543	                                             struct igb_q_vector,
5544	                                             napi);
5545	bool clean_complete = true;
5546
5547#ifdef CONFIG_IGB_DCA
5548	if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
5549		igb_update_dca(q_vector);
5550#endif
5551	if (q_vector->tx.ring)
5552		clean_complete = igb_clean_tx_irq(q_vector);
5553
5554	if (q_vector->rx.ring)
5555		clean_complete &= igb_clean_rx_irq(q_vector, budget);
5556
5557	/* If all work not completed, return budget and keep polling */
5558	if (!clean_complete)
5559		return budget;
5560
5561	/* If not enough Rx work done, exit the polling mode */
5562	napi_complete(napi);
5563	igb_ring_irq_enable(q_vector);
5564
5565	return 0;
5566}
5567
5568/**
5569 * igb_systim_to_hwtstamp - convert system time value to hw timestamp
5570 * @adapter: board private structure
5571 * @shhwtstamps: timestamp structure to update
5572 * @regval: unsigned 64bit system time value.
5573 *
5574 * We need to convert the system time value stored in the RX/TXSTMP registers
5575 * into a hwtstamp which can be used by the upper level timestamping functions
5576 */
5577static void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
5578                                   struct skb_shared_hwtstamps *shhwtstamps,
5579                                   u64 regval)
5580{
5581	u64 ns;
5582
5583	/*
5584	 * The 82580 starts with 1ns at bit 0 in RX/TXSTMPL, shift this up to
5585	 * 24 to match clock shift we setup earlier.
5586	 */
5587	if (adapter->hw.mac.type == e1000_82580)
5588		regval <<= IGB_82580_TSYNC_SHIFT;
5589
5590	ns = timecounter_cyc2time(&adapter->clock, regval);
5591	timecompare_update(&adapter->compare, ns);
5592	memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
5593	shhwtstamps->hwtstamp = ns_to_ktime(ns);
5594	shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns);
5595}
5596
5597/**
5598 * igb_tx_hwtstamp - utility function which checks for TX time stamp
5599 * @q_vector: pointer to q_vector containing needed info
5600 * @buffer: pointer to igb_tx_buffer structure
5601 *
5602 * If we were asked to do hardware stamping and such a time stamp is
5603 * available, then it must have been for this skb here because we only
5604 * allow only one such packet into the queue.
5605 */
5606static void igb_tx_hwtstamp(struct igb_q_vector *q_vector,
5607			    struct igb_tx_buffer *buffer_info)
5608{
5609	struct igb_adapter *adapter = q_vector->adapter;
5610	struct e1000_hw *hw = &adapter->hw;
5611	struct skb_shared_hwtstamps shhwtstamps;
5612	u64 regval;
5613
5614	/* if skb does not support hw timestamp or TX stamp not valid exit */
5615	if (likely(!(buffer_info->tx_flags & IGB_TX_FLAGS_TSTAMP)) ||
5616	    !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
5617		return;
5618
5619	regval = rd32(E1000_TXSTMPL);
5620	regval |= (u64)rd32(E1000_TXSTMPH) << 32;
5621
5622	igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
5623	skb_tstamp_tx(buffer_info->skb, &shhwtstamps);
5624}
5625
5626/**
5627 * igb_clean_tx_irq - Reclaim resources after transmit completes
5628 * @q_vector: pointer to q_vector containing needed info
5629 * returns true if ring is completely cleaned
5630 **/
5631static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
5632{
5633	struct igb_adapter *adapter = q_vector->adapter;
5634	struct igb_ring *tx_ring = q_vector->tx.ring;
5635	struct igb_tx_buffer *tx_buffer;
5636	union e1000_adv_tx_desc *tx_desc, *eop_desc;
5637	unsigned int total_bytes = 0, total_packets = 0;
5638	unsigned int budget = q_vector->tx.work_limit;
5639	unsigned int i = tx_ring->next_to_clean;
5640
5641	if (test_bit(__IGB_DOWN, &adapter->state))
5642		return true;
5643
5644	tx_buffer = &tx_ring->tx_buffer_info[i];
5645	tx_desc = IGB_TX_DESC(tx_ring, i);
5646	i -= tx_ring->count;
5647
5648	for (; budget; budget--) {
5649		eop_desc = tx_buffer->next_to_watch;
5650
5651		/* prevent any other reads prior to eop_desc */
5652		rmb();
5653
5654		/* if next_to_watch is not set then there is no work pending */
5655		if (!eop_desc)
5656			break;
5657
5658		/* if DD is not set pending work has not been completed */
5659		if (!(eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)))
5660			break;
5661
5662		/* clear next_to_watch to prevent false hangs */
5663		tx_buffer->next_to_watch = NULL;
5664
5665		/* update the statistics for this packet */
5666		total_bytes += tx_buffer->bytecount;
5667		total_packets += tx_buffer->gso_segs;
5668
5669		/* retrieve hardware timestamp */
5670		igb_tx_hwtstamp(q_vector, tx_buffer);
5671
5672		/* free the skb */
5673		dev_kfree_skb_any(tx_buffer->skb);
5674		tx_buffer->skb = NULL;
5675
5676		/* unmap skb header data */
5677		dma_unmap_single(tx_ring->dev,
5678				 tx_buffer->dma,
5679				 tx_buffer->length,
5680				 DMA_TO_DEVICE);
5681
5682		/* clear last DMA location and unmap remaining buffers */
5683		while (tx_desc != eop_desc) {
5684			tx_buffer->dma = 0;
5685
5686			tx_buffer++;
5687			tx_desc++;
5688			i++;
5689			if (unlikely(!i)) {
5690				i -= tx_ring->count;
5691				tx_buffer = tx_ring->tx_buffer_info;
5692				tx_desc = IGB_TX_DESC(tx_ring, 0);
5693			}
5694
5695			/* unmap any remaining paged data */
5696			if (tx_buffer->dma) {
5697				dma_unmap_page(tx_ring->dev,
5698					       tx_buffer->dma,
5699					       tx_buffer->length,
5700					       DMA_TO_DEVICE);
5701			}
5702		}
5703
5704		/* clear last DMA location */
5705		tx_buffer->dma = 0;
5706
5707		/* move us one more past the eop_desc for start of next pkt */
5708		tx_buffer++;
5709		tx_desc++;
5710		i++;
5711		if (unlikely(!i)) {
5712			i -= tx_ring->count;
5713			tx_buffer = tx_ring->tx_buffer_info;
5714			tx_desc = IGB_TX_DESC(tx_ring, 0);
5715		}
5716	}
5717
5718	i += tx_ring->count;
5719	tx_ring->next_to_clean = i;
5720	u64_stats_update_begin(&tx_ring->tx_syncp);
5721	tx_ring->tx_stats.bytes += total_bytes;
5722	tx_ring->tx_stats.packets += total_packets;
5723	u64_stats_update_end(&tx_ring->tx_syncp);
5724	q_vector->tx.total_bytes += total_bytes;
5725	q_vector->tx.total_packets += total_packets;
5726
5727	if (tx_ring->detect_tx_hung) {
5728		struct e1000_hw *hw = &adapter->hw;
5729
5730		eop_desc = tx_buffer->next_to_watch;
5731
5732		/* Detect a transmit hang in hardware, this serializes the
5733		 * check with the clearing of time_stamp and movement of i */
5734		tx_ring->detect_tx_hung = false;
5735		if (eop_desc &&
5736		    time_after(jiffies, tx_buffer->time_stamp +
5737			       (adapter->tx_timeout_factor * HZ)) &&
5738		    !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
5739
5740			/* detected Tx unit hang */
5741			dev_err(tx_ring->dev,
5742				"Detected Tx Unit Hang\n"
5743				"  Tx Queue             <%d>\n"
5744				"  TDH                  <%x>\n"
5745				"  TDT                  <%x>\n"
5746				"  next_to_use          <%x>\n"
5747				"  next_to_clean        <%x>\n"
5748				"buffer_info[next_to_clean]\n"
5749				"  time_stamp           <%lx>\n"
5750				"  next_to_watch        <%p>\n"
5751				"  jiffies              <%lx>\n"
5752				"  desc.status          <%x>\n",
5753				tx_ring->queue_index,
5754				rd32(E1000_TDH(tx_ring->reg_idx)),
5755				readl(tx_ring->tail),
5756				tx_ring->next_to_use,
5757				tx_ring->next_to_clean,
5758				tx_buffer->time_stamp,
5759				eop_desc,
5760				jiffies,
5761				eop_desc->wb.status);
5762			netif_stop_subqueue(tx_ring->netdev,
5763					    tx_ring->queue_index);
5764
5765			/* we are about to reset, no point in enabling stuff */
5766			return true;
5767		}
5768	}
5769
5770	if (unlikely(total_packets &&
5771		     netif_carrier_ok(tx_ring->netdev) &&
5772		     igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
5773		/* Make sure that anybody stopping the queue after this
5774		 * sees the new next_to_clean.
5775		 */
5776		smp_mb();
5777		if (__netif_subqueue_stopped(tx_ring->netdev,
5778					     tx_ring->queue_index) &&
5779		    !(test_bit(__IGB_DOWN, &adapter->state))) {
5780			netif_wake_subqueue(tx_ring->netdev,
5781					    tx_ring->queue_index);
5782
5783			u64_stats_update_begin(&tx_ring->tx_syncp);
5784			tx_ring->tx_stats.restart_queue++;
5785			u64_stats_update_end(&tx_ring->tx_syncp);
5786		}
5787	}
5788
5789	return !!budget;
5790}
5791
5792static inline void igb_rx_checksum(struct igb_ring *ring,
5793				   union e1000_adv_rx_desc *rx_desc,
5794				   struct sk_buff *skb)
5795{
5796	skb_checksum_none_assert(skb);
5797
5798	/* Ignore Checksum bit is set */
5799	if (igb_test_staterr(rx_desc, E1000_RXD_STAT_IXSM))
5800		return;
5801
5802	/* Rx checksum disabled via ethtool */
5803	if (!(ring->netdev->features & NETIF_F_RXCSUM))
5804		return;
5805
5806	/* TCP/UDP checksum error bit is set */
5807	if (igb_test_staterr(rx_desc,
5808			     E1000_RXDEXT_STATERR_TCPE |
5809			     E1000_RXDEXT_STATERR_IPE)) {
5810		/*
5811		 * work around errata with sctp packets where the TCPE aka
5812		 * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
5813		 * packets, (aka let the stack check the crc32c)
5814		 */
5815		if (!((skb->len == 60) &&
5816		      test_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags))) {
5817			u64_stats_update_begin(&ring->rx_syncp);
5818			ring->rx_stats.csum_err++;
5819			u64_stats_update_end(&ring->rx_syncp);
5820		}
5821		/* let the stack verify checksum errors */
5822		return;
5823	}
5824	/* It must be a TCP or UDP packet with a valid checksum */
5825	if (igb_test_staterr(rx_desc, E1000_RXD_STAT_TCPCS |
5826				      E1000_RXD_STAT_UDPCS))
5827		skb->ip_summed = CHECKSUM_UNNECESSARY;
5828
5829	dev_dbg(ring->dev, "cksum success: bits %08X\n",
5830		le32_to_cpu(rx_desc->wb.upper.status_error));
5831}
5832
5833static void igb_rx_hwtstamp(struct igb_q_vector *q_vector,
5834			    union e1000_adv_rx_desc *rx_desc,
5835			    struct sk_buff *skb)
5836{
5837	struct igb_adapter *adapter = q_vector->adapter;
5838	struct e1000_hw *hw = &adapter->hw;
5839	u64 regval;
5840
5841	if (!igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP |
5842				       E1000_RXDADV_STAT_TS))
5843		return;
5844
5845	/*
5846	 * If this bit is set, then the RX registers contain the time stamp. No
5847	 * other packet will be time stamped until we read these registers, so
5848	 * read the registers to make them available again. Because only one
5849	 * packet can be time stamped at a time, we know that the register
5850	 * values must belong to this one here and therefore we don't need to
5851	 * compare any of the additional attributes stored for it.
5852	 *
5853	 * If nothing went wrong, then it should have a shared tx_flags that we
5854	 * can turn into a skb_shared_hwtstamps.
5855	 */
5856	if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) {
5857		u32 *stamp = (u32 *)skb->data;
5858		regval = le32_to_cpu(*(stamp + 2));
5859		regval |= (u64)le32_to_cpu(*(stamp + 3)) << 32;
5860		skb_pull(skb, IGB_TS_HDR_LEN);
5861	} else {
5862		if(!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
5863			return;
5864
5865		regval = rd32(E1000_RXSTMPL);
5866		regval |= (u64)rd32(E1000_RXSTMPH) << 32;
5867	}
5868
5869	igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
5870}
5871static inline u16 igb_get_hlen(union e1000_adv_rx_desc *rx_desc)
5872{
5873	/* HW will not DMA in data larger than the given buffer, even if it
5874	 * parses the (NFS, of course) header to be larger.  In that case, it
5875	 * fills the header buffer and spills the rest into the page.
5876	 */
5877	u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
5878	           E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
5879	if (hlen > IGB_RX_HDR_LEN)
5880		hlen = IGB_RX_HDR_LEN;
5881	return hlen;
5882}
5883
5884static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, int budget)
5885{
5886	struct igb_ring *rx_ring = q_vector->rx.ring;
5887	union e1000_adv_rx_desc *rx_desc;
5888	const int current_node = numa_node_id();
5889	unsigned int total_bytes = 0, total_packets = 0;
5890	u16 cleaned_count = igb_desc_unused(rx_ring);
5891	u16 i = rx_ring->next_to_clean;
5892
5893	rx_desc = IGB_RX_DESC(rx_ring, i);
5894
5895	while (igb_test_staterr(rx_desc, E1000_RXD_STAT_DD)) {
5896		struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
5897		struct sk_buff *skb = buffer_info->skb;
5898		union e1000_adv_rx_desc *next_rxd;
5899
5900		buffer_info->skb = NULL;
5901		prefetch(skb->data);
5902
5903		i++;
5904		if (i == rx_ring->count)
5905			i = 0;
5906
5907		next_rxd = IGB_RX_DESC(rx_ring, i);
5908		prefetch(next_rxd);
5909
5910		/*
5911		 * This memory barrier is needed to keep us from reading
5912		 * any other fields out of the rx_desc until we know the
5913		 * RXD_STAT_DD bit is set
5914		 */
5915		rmb();
5916
5917		if (!skb_is_nonlinear(skb)) {
5918			__skb_put(skb, igb_get_hlen(rx_desc));
5919			dma_unmap_single(rx_ring->dev, buffer_info->dma,
5920					 IGB_RX_HDR_LEN,
5921					 DMA_FROM_DEVICE);
5922			buffer_info->dma = 0;
5923		}
5924
5925		if (rx_desc->wb.upper.length) {
5926			u16 length = le16_to_cpu(rx_desc->wb.upper.length);
5927
5928			skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
5929						buffer_info->page,
5930						buffer_info->page_offset,
5931						length);
5932
5933			skb->len += length;
5934			skb->data_len += length;
5935			skb->truesize += length;
5936
5937			if ((page_count(buffer_info->page) != 1) ||
5938			    (page_to_nid(buffer_info->page) != current_node))
5939				buffer_info->page = NULL;
5940			else
5941				get_page(buffer_info->page);
5942
5943			dma_unmap_page(rx_ring->dev, buffer_info->page_dma,
5944				       PAGE_SIZE / 2, DMA_FROM_DEVICE);
5945			buffer_info->page_dma = 0;
5946		}
5947
5948		if (!igb_test_staterr(rx_desc, E1000_RXD_STAT_EOP)) {
5949			struct igb_rx_buffer *next_buffer;
5950			next_buffer = &rx_ring->rx_buffer_info[i];
5951			buffer_info->skb = next_buffer->skb;
5952			buffer_info->dma = next_buffer->dma;
5953			next_buffer->skb = skb;
5954			next_buffer->dma = 0;
5955			goto next_desc;
5956		}
5957
5958		if (igb_test_staterr(rx_desc,
5959				     E1000_RXDEXT_ERR_FRAME_ERR_MASK)) {
5960			dev_kfree_skb_any(skb);
5961			goto next_desc;
5962		}
5963
5964		igb_rx_hwtstamp(q_vector, rx_desc, skb);
5965		igb_rx_checksum(rx_ring, rx_desc, skb);
5966
5967		if (igb_test_staterr(rx_desc, E1000_RXD_STAT_VP)) {
5968			u16 vid = le16_to_cpu(rx_desc->wb.upper.vlan);
5969
5970			__vlan_hwaccel_put_tag(skb, vid);
5971		}
5972
5973		total_bytes += skb->len;
5974		total_packets++;
5975
5976		skb->protocol = eth_type_trans(skb, rx_ring->netdev);
5977
5978		napi_gro_receive(&q_vector->napi, skb);
5979
5980		budget--;
5981next_desc:
5982		if (!budget)
5983			break;
5984
5985		cleaned_count++;
5986		/* return some buffers to hardware, one at a time is too slow */
5987		if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
5988			igb_alloc_rx_buffers(rx_ring, cleaned_count);
5989			cleaned_count = 0;
5990		}
5991
5992		/* use prefetched values */
5993		rx_desc = next_rxd;
5994	}
5995
5996	rx_ring->next_to_clean = i;
5997	u64_stats_update_begin(&rx_ring->rx_syncp);
5998	rx_ring->rx_stats.packets += total_packets;
5999	rx_ring->rx_stats.bytes += total_bytes;
6000	u64_stats_update_end(&rx_ring->rx_syncp);
6001	q_vector->rx.total_packets += total_packets;
6002	q_vector->rx.total_bytes += total_bytes;
6003
6004	if (cleaned_count)
6005		igb_alloc_rx_buffers(rx_ring, cleaned_count);
6006
6007	return !!budget;
6008}
6009
6010static bool igb_alloc_mapped_skb(struct igb_ring *rx_ring,
6011				 struct igb_rx_buffer *bi)
6012{
6013	struct sk_buff *skb = bi->skb;
6014	dma_addr_t dma = bi->dma;
6015
6016	if (dma)
6017		return true;
6018
6019	if (likely(!skb)) {
6020		skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
6021						IGB_RX_HDR_LEN);
6022		bi->skb = skb;
6023		if (!skb) {
6024			rx_ring->rx_stats.alloc_failed++;
6025			return false;
6026		}
6027
6028		/* initialize skb for ring */
6029		skb_record_rx_queue(skb, rx_ring->queue_index);
6030	}
6031
6032	dma = dma_map_single(rx_ring->dev, skb->data,
6033			     IGB_RX_HDR_LEN, DMA_FROM_DEVICE);
6034
6035	if (dma_mapping_error(rx_ring->dev, dma)) {
6036		rx_ring->rx_stats.alloc_failed++;
6037		return false;
6038	}
6039
6040	bi->dma = dma;
6041	return true;
6042}
6043
6044static bool igb_alloc_mapped_page(struct igb_ring *rx_ring,
6045				  struct igb_rx_buffer *bi)
6046{
6047	struct page *page = bi->page;
6048	dma_addr_t page_dma = bi->page_dma;
6049	unsigned int page_offset = bi->page_offset ^ (PAGE_SIZE / 2);
6050
6051	if (page_dma)
6052		return true;
6053
6054	if (!page) {
6055		page = netdev_alloc_page(rx_ring->netdev);
6056		bi->page = page;
6057		if (unlikely(!page)) {
6058			rx_ring->rx_stats.alloc_failed++;
6059			return false;
6060		}
6061	}
6062
6063	page_dma = dma_map_page(rx_ring->dev, page,
6064				page_offset, PAGE_SIZE / 2,
6065				DMA_FROM_DEVICE);
6066
6067	if (dma_mapping_error(rx_ring->dev, page_dma)) {
6068		rx_ring->rx_stats.alloc_failed++;
6069		return false;
6070	}
6071
6072	bi->page_dma = page_dma;
6073	bi->page_offset = page_offset;
6074	return true;
6075}
6076
6077/**
6078 * igb_alloc_rx_buffers - Replace used receive buffers; packet split
6079 * @adapter: address of board private structure
6080 **/
6081void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count)
6082{
6083	union e1000_adv_rx_desc *rx_desc;
6084	struct igb_rx_buffer *bi;
6085	u16 i = rx_ring->next_to_use;
6086
6087	rx_desc = IGB_RX_DESC(rx_ring, i);
6088	bi = &rx_ring->rx_buffer_info[i];
6089	i -= rx_ring->count;
6090
6091	while (cleaned_count--) {
6092		if (!igb_alloc_mapped_skb(rx_ring, bi))
6093			break;
6094
6095		/* Refresh the desc even if buffer_addrs didn't change
6096		 * because each write-back erases this info. */
6097		rx_desc->read.hdr_addr = cpu_to_le64(bi->dma);
6098
6099		if (!igb_alloc_mapped_page(rx_ring, bi))
6100			break;
6101
6102		rx_desc->read.pkt_addr = cpu_to_le64(bi->page_dma);
6103
6104		rx_desc++;
6105		bi++;
6106		i++;
6107		if (unlikely(!i)) {
6108			rx_desc = IGB_RX_DESC(rx_ring, 0);
6109			bi = rx_ring->rx_buffer_info;
6110			i -= rx_ring->count;
6111		}
6112
6113		/* clear the hdr_addr for the next_to_use descriptor */
6114		rx_desc->read.hdr_addr = 0;
6115	}
6116
6117	i += rx_ring->count;
6118
6119	if (rx_ring->next_to_use != i) {
6120		rx_ring->next_to_use = i;
6121
6122		/* Force memory writes to complete before letting h/w
6123		 * know there are new descriptors to fetch.  (Only
6124		 * applicable for weak-ordered memory model archs,
6125		 * such as IA-64). */
6126		wmb();
6127		writel(i, rx_ring->tail);
6128	}
6129}
6130
6131/**
6132 * igb_mii_ioctl -
6133 * @netdev:
6134 * @ifreq:
6135 * @cmd:
6136 **/
6137static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6138{
6139	struct igb_adapter *adapter = netdev_priv(netdev);
6140	struct mii_ioctl_data *data = if_mii(ifr);
6141
6142	if (adapter->hw.phy.media_type != e1000_media_type_copper)
6143		return -EOPNOTSUPP;
6144
6145	switch (cmd) {
6146	case SIOCGMIIPHY:
6147		data->phy_id = adapter->hw.phy.addr;
6148		break;
6149	case SIOCGMIIREG:
6150		if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
6151		                     &data->val_out))
6152			return -EIO;
6153		break;
6154	case SIOCSMIIREG:
6155	default:
6156		return -EOPNOTSUPP;
6157	}
6158	return 0;
6159}
6160
6161/**
6162 * igb_hwtstamp_ioctl - control hardware time stamping
6163 * @netdev:
6164 * @ifreq:
6165 * @cmd:
6166 *
6167 * Outgoing time stamping can be enabled and disabled. Play nice and
6168 * disable it when requested, although it shouldn't case any overhead
6169 * when no packet needs it. At most one packet in the queue may be
6170 * marked for time stamping, otherwise it would be impossible to tell
6171 * for sure to which packet the hardware time stamp belongs.
6172 *
6173 * Incoming time stamping has to be configured via the hardware
6174 * filters. Not all combinations are supported, in particular event
6175 * type has to be specified. Matching the kind of event packet is
6176 * not supported, with the exception of "all V2 events regardless of
6177 * level 2 or 4".
6178 *
6179 **/
6180static int igb_hwtstamp_ioctl(struct net_device *netdev,
6181			      struct ifreq *ifr, int cmd)
6182{
6183	struct igb_adapter *adapter = netdev_priv(netdev);
6184	struct e1000_hw *hw = &adapter->hw;
6185	struct hwtstamp_config config;
6186	u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
6187	u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6188	u32 tsync_rx_cfg = 0;
6189	bool is_l4 = false;
6190	bool is_l2 = false;
6191	u32 regval;
6192
6193	if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
6194		return -EFAULT;
6195
6196	/* reserved for future extensions */
6197	if (config.flags)
6198		return -EINVAL;
6199
6200	switch (config.tx_type) {
6201	case HWTSTAMP_TX_OFF:
6202		tsync_tx_ctl = 0;
6203	case HWTSTAMP_TX_ON:
6204		break;
6205	default:
6206		return -ERANGE;
6207	}
6208
6209	switch (config.rx_filter) {
6210	case HWTSTAMP_FILTER_NONE:
6211		tsync_rx_ctl = 0;
6212		break;
6213	case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
6214	case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
6215	case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
6216	case HWTSTAMP_FILTER_ALL:
6217		/*
6218		 * register TSYNCRXCFG must be set, therefore it is not
6219		 * possible to time stamp both Sync and Delay_Req messages
6220		 * => fall back to time stamping all packets
6221		 */
6222		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6223		config.rx_filter = HWTSTAMP_FILTER_ALL;
6224		break;
6225	case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
6226		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6227		tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
6228		is_l4 = true;
6229		break;
6230	case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
6231		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6232		tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
6233		is_l4 = true;
6234		break;
6235	case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
6236	case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
6237		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6238		tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
6239		is_l2 = true;
6240		is_l4 = true;
6241		config.rx_filter = HWTSTAMP_FILTER_SOME;
6242		break;
6243	case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
6244	case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
6245		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6246		tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
6247		is_l2 = true;
6248		is_l4 = true;
6249		config.rx_filter = HWTSTAMP_FILTER_SOME;
6250		break;
6251	case HWTSTAMP_FILTER_PTP_V2_EVENT:
6252	case HWTSTAMP_FILTER_PTP_V2_SYNC:
6253	case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
6254		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
6255		config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
6256		is_l2 = true;
6257		break;
6258	default:
6259		return -ERANGE;
6260	}
6261
6262	if (hw->mac.type == e1000_82575) {
6263		if (tsync_rx_ctl | tsync_tx_ctl)
6264			return -EINVAL;
6265		return 0;
6266	}
6267
6268	/*
6269	 * Per-packet timestamping only works if all packets are
6270	 * timestamped, so enable timestamping in all packets as
6271	 * long as one rx filter was configured.
6272	 */
6273	if ((hw->mac.type == e1000_82580) && tsync_rx_ctl) {
6274		tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6275		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6276	}
6277
6278	/* enable/disable TX */
6279	regval = rd32(E1000_TSYNCTXCTL);
6280	regval &= ~E1000_TSYNCTXCTL_ENABLED;
6281	regval |= tsync_tx_ctl;
6282	wr32(E1000_TSYNCTXCTL, regval);
6283
6284	/* enable/disable RX */
6285	regval = rd32(E1000_TSYNCRXCTL);
6286	regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
6287	regval |= tsync_rx_ctl;
6288	wr32(E1000_TSYNCRXCTL, regval);
6289
6290	/* define which PTP packets are time stamped */
6291	wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
6292
6293	/* define ethertype filter for timestamped packets */
6294	if (is_l2)
6295		wr32(E1000_ETQF(3),
6296		                (E1000_ETQF_FILTER_ENABLE | /* enable filter */
6297		                 E1000_ETQF_1588 | /* enable timestamping */
6298		                 ETH_P_1588));     /* 1588 eth protocol type */
6299	else
6300		wr32(E1000_ETQF(3), 0);
6301
6302#define PTP_PORT 319
6303	/* L4 Queue Filter[3]: filter by destination port and protocol */
6304	if (is_l4) {
6305		u32 ftqf = (IPPROTO_UDP /* UDP */
6306			| E1000_FTQF_VF_BP /* VF not compared */
6307			| E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
6308			| E1000_FTQF_MASK); /* mask all inputs */
6309		ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
6310
6311		wr32(E1000_IMIR(3), htons(PTP_PORT));
6312		wr32(E1000_IMIREXT(3),
6313		     (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
6314		if (hw->mac.type == e1000_82576) {
6315			/* enable source port check */
6316			wr32(E1000_SPQF(3), htons(PTP_PORT));
6317			ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
6318		}
6319		wr32(E1000_FTQF(3), ftqf);
6320	} else {
6321		wr32(E1000_FTQF(3), E1000_FTQF_MASK);
6322	}
6323	wrfl();
6324
6325	adapter->hwtstamp_config = config;
6326
6327	/* clear TX/RX time stamp registers, just to be sure */
6328	regval = rd32(E1000_TXSTMPH);
6329	regval = rd32(E1000_RXSTMPH);
6330
6331	return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
6332		-EFAULT : 0;
6333}
6334
6335/**
6336 * igb_ioctl -
6337 * @netdev:
6338 * @ifreq:
6339 * @cmd:
6340 **/
6341static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6342{
6343	switch (cmd) {
6344	case SIOCGMIIPHY:
6345	case SIOCGMIIREG:
6346	case SIOCSMIIREG:
6347		return igb_mii_ioctl(netdev, ifr, cmd);
6348	case SIOCSHWTSTAMP:
6349		return igb_hwtstamp_ioctl(netdev, ifr, cmd);
6350	default:
6351		return -EOPNOTSUPP;
6352	}
6353}
6354
6355s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6356{
6357	struct igb_adapter *adapter = hw->back;
6358	u16 cap_offset;
6359
6360	cap_offset = adapter->pdev->pcie_cap;
6361	if (!cap_offset)
6362		return -E1000_ERR_CONFIG;
6363
6364	pci_read_config_word(adapter->pdev, cap_offset + reg, value);
6365
6366	return 0;
6367}
6368
6369s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6370{
6371	struct igb_adapter *adapter = hw->back;
6372	u16 cap_offset;
6373
6374	cap_offset = adapter->pdev->pcie_cap;
6375	if (!cap_offset)
6376		return -E1000_ERR_CONFIG;
6377
6378	pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
6379
6380	return 0;
6381}
6382
6383static void igb_vlan_mode(struct net_device *netdev, u32 features)
6384{
6385	struct igb_adapter *adapter = netdev_priv(netdev);
6386	struct e1000_hw *hw = &adapter->hw;
6387	u32 ctrl, rctl;
6388
6389	igb_irq_disable(adapter);
6390
6391	if (features & NETIF_F_HW_VLAN_RX) {
6392		/* enable VLAN tag insert/strip */
6393		ctrl = rd32(E1000_CTRL);
6394		ctrl |= E1000_CTRL_VME;
6395		wr32(E1000_CTRL, ctrl);
6396
6397		/* Disable CFI check */
6398		rctl = rd32(E1000_RCTL);
6399		rctl &= ~E1000_RCTL_CFIEN;
6400		wr32(E1000_RCTL, rctl);
6401	} else {
6402		/* disable VLAN tag insert/strip */
6403		ctrl = rd32(E1000_CTRL);
6404		ctrl &= ~E1000_CTRL_VME;
6405		wr32(E1000_CTRL, ctrl);
6406	}
6407
6408	igb_rlpml_set(adapter);
6409
6410	if (!test_bit(__IGB_DOWN, &adapter->state))
6411		igb_irq_enable(adapter);
6412}
6413
6414static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
6415{
6416	struct igb_adapter *adapter = netdev_priv(netdev);
6417	struct e1000_hw *hw = &adapter->hw;
6418	int pf_id = adapter->vfs_allocated_count;
6419
6420	/* attempt to add filter to vlvf array */
6421	igb_vlvf_set(adapter, vid, true, pf_id);
6422
6423	/* add the filter since PF can receive vlans w/o entry in vlvf */
6424	igb_vfta_set(hw, vid, true);
6425
6426	set_bit(vid, adapter->active_vlans);
6427}
6428
6429static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
6430{
6431	struct igb_adapter *adapter = netdev_priv(netdev);
6432	struct e1000_hw *hw = &adapter->hw;
6433	int pf_id = adapter->vfs_allocated_count;
6434	s32 err;
6435
6436	igb_irq_disable(adapter);
6437
6438	if (!test_bit(__IGB_DOWN, &adapter->state))
6439		igb_irq_enable(adapter);
6440
6441	/* remove vlan from VLVF table array */
6442	err = igb_vlvf_set(adapter, vid, false, pf_id);
6443
6444	/* if vid was not present in VLVF just remove it from table */
6445	if (err)
6446		igb_vfta_set(hw, vid, false);
6447
6448	clear_bit(vid, adapter->active_vlans);
6449}
6450
6451static void igb_restore_vlan(struct igb_adapter *adapter)
6452{
6453	u16 vid;
6454
6455	for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID)
6456		igb_vlan_rx_add_vid(adapter->netdev, vid);
6457}
6458
6459int igb_set_spd_dplx(struct igb_adapter *adapter, u32 spd, u8 dplx)
6460{
6461	struct pci_dev *pdev = adapter->pdev;
6462	struct e1000_mac_info *mac = &adapter->hw.mac;
6463
6464	mac->autoneg = 0;
6465
6466	/* Make sure dplx is at most 1 bit and lsb of speed is not set
6467	 * for the switch() below to work */
6468	if ((spd & 1) || (dplx & ~1))
6469		goto err_inval;
6470
6471	/* Fiber NIC's only allow 1000 Gbps Full duplex */
6472	if ((adapter->hw.phy.media_type == e1000_media_type_internal_serdes) &&
6473	    spd != SPEED_1000 &&
6474	    dplx != DUPLEX_FULL)
6475		goto err_inval;
6476
6477	switch (spd + dplx) {
6478	case SPEED_10 + DUPLEX_HALF:
6479		mac->forced_speed_duplex = ADVERTISE_10_HALF;
6480		break;
6481	case SPEED_10 + DUPLEX_FULL:
6482		mac->forced_speed_duplex = ADVERTISE_10_FULL;
6483		break;
6484	case SPEED_100 + DUPLEX_HALF:
6485		mac->forced_speed_duplex = ADVERTISE_100_HALF;
6486		break;
6487	case SPEED_100 + DUPLEX_FULL:
6488		mac->forced_speed_duplex = ADVERTISE_100_FULL;
6489		break;
6490	case SPEED_1000 + DUPLEX_FULL:
6491		mac->autoneg = 1;
6492		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
6493		break;
6494	case SPEED_1000 + DUPLEX_HALF: /* not supported */
6495	default:
6496		goto err_inval;
6497	}
6498	return 0;
6499
6500err_inval:
6501	dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
6502	return -EINVAL;
6503}
6504
6505static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake)
6506{
6507	struct net_device *netdev = pci_get_drvdata(pdev);
6508	struct igb_adapter *adapter = netdev_priv(netdev);
6509	struct e1000_hw *hw = &adapter->hw;
6510	u32 ctrl, rctl, status;
6511	u32 wufc = adapter->wol;
6512#ifdef CONFIG_PM
6513	int retval = 0;
6514#endif
6515
6516	netif_device_detach(netdev);
6517
6518	if (netif_running(netdev))
6519		igb_close(netdev);
6520
6521	igb_clear_interrupt_scheme(adapter);
6522
6523#ifdef CONFIG_PM
6524	retval = pci_save_state(pdev);
6525	if (retval)
6526		return retval;
6527#endif
6528
6529	status = rd32(E1000_STATUS);
6530	if (status & E1000_STATUS_LU)
6531		wufc &= ~E1000_WUFC_LNKC;
6532
6533	if (wufc) {
6534		igb_setup_rctl(adapter);
6535		igb_set_rx_mode(netdev);
6536
6537		/* turn on all-multi mode if wake on multicast is enabled */
6538		if (wufc & E1000_WUFC_MC) {
6539			rctl = rd32(E1000_RCTL);
6540			rctl |= E1000_RCTL_MPE;
6541			wr32(E1000_RCTL, rctl);
6542		}
6543
6544		ctrl = rd32(E1000_CTRL);
6545		/* advertise wake from D3Cold */
6546		#define E1000_CTRL_ADVD3WUC 0x00100000
6547		/* phy power management enable */
6548		#define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
6549		ctrl |= E1000_CTRL_ADVD3WUC;
6550		wr32(E1000_CTRL, ctrl);
6551
6552		/* Allow time for pending master requests to run */
6553		igb_disable_pcie_master(hw);
6554
6555		wr32(E1000_WUC, E1000_WUC_PME_EN);
6556		wr32(E1000_WUFC, wufc);
6557	} else {
6558		wr32(E1000_WUC, 0);
6559		wr32(E1000_WUFC, 0);
6560	}
6561
6562	*enable_wake = wufc || adapter->en_mng_pt;
6563	if (!*enable_wake)
6564		igb_power_down_link(adapter);
6565	else
6566		igb_power_up_link(adapter);
6567
6568	/* Release control of h/w to f/w.  If f/w is AMT enabled, this
6569	 * would have already happened in close and is redundant. */
6570	igb_release_hw_control(adapter);
6571
6572	pci_disable_device(pdev);
6573
6574	return 0;
6575}
6576
6577#ifdef CONFIG_PM
6578static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
6579{
6580	int retval;
6581	bool wake;
6582
6583	retval = __igb_shutdown(pdev, &wake);
6584	if (retval)
6585		return retval;
6586
6587	if (wake) {
6588		pci_prepare_to_sleep(pdev);
6589	} else {
6590		pci_wake_from_d3(pdev, false);
6591		pci_set_power_state(pdev, PCI_D3hot);
6592	}
6593
6594	return 0;
6595}
6596
6597static int igb_resume(struct pci_dev *pdev)
6598{
6599	struct net_device *netdev = pci_get_drvdata(pdev);
6600	struct igb_adapter *adapter = netdev_priv(netdev);
6601	struct e1000_hw *hw = &adapter->hw;
6602	u32 err;
6603
6604	pci_set_power_state(pdev, PCI_D0);
6605	pci_restore_state(pdev);
6606	pci_save_state(pdev);
6607
6608	err = pci_enable_device_mem(pdev);
6609	if (err) {
6610		dev_err(&pdev->dev,
6611			"igb: Cannot enable PCI device from suspend\n");
6612		return err;
6613	}
6614	pci_set_master(pdev);
6615
6616	pci_enable_wake(pdev, PCI_D3hot, 0);
6617	pci_enable_wake(pdev, PCI_D3cold, 0);
6618
6619	if (igb_init_interrupt_scheme(adapter)) {
6620		dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
6621		return -ENOMEM;
6622	}
6623
6624	igb_reset(adapter);
6625
6626	/* let the f/w know that the h/w is now under the control of the
6627	 * driver. */
6628	igb_get_hw_control(adapter);
6629
6630	wr32(E1000_WUS, ~0);
6631
6632	if (netif_running(netdev)) {
6633		err = igb_open(netdev);
6634		if (err)
6635			return err;
6636	}
6637
6638	netif_device_attach(netdev);
6639
6640	return 0;
6641}
6642#endif
6643
6644static void igb_shutdown(struct pci_dev *pdev)
6645{
6646	bool wake;
6647
6648	__igb_shutdown(pdev, &wake);
6649
6650	if (system_state == SYSTEM_POWER_OFF) {
6651		pci_wake_from_d3(pdev, wake);
6652		pci_set_power_state(pdev, PCI_D3hot);
6653	}
6654}
6655
6656#ifdef CONFIG_NET_POLL_CONTROLLER
6657/*
6658 * Polling 'interrupt' - used by things like netconsole to send skbs
6659 * without having to re-enable interrupts. It's not called while
6660 * the interrupt routine is executing.
6661 */
6662static void igb_netpoll(struct net_device *netdev)
6663{
6664	struct igb_adapter *adapter = netdev_priv(netdev);
6665	struct e1000_hw *hw = &adapter->hw;
6666	int i;
6667
6668	if (!adapter->msix_entries) {
6669		struct igb_q_vector *q_vector = adapter->q_vector[0];
6670		igb_irq_disable(adapter);
6671		napi_schedule(&q_vector->napi);
6672		return;
6673	}
6674
6675	for (i = 0; i < adapter->num_q_vectors; i++) {
6676		struct igb_q_vector *q_vector = adapter->q_vector[i];
6677		wr32(E1000_EIMC, q_vector->eims_value);
6678		napi_schedule(&q_vector->napi);
6679	}
6680}
6681#endif /* CONFIG_NET_POLL_CONTROLLER */
6682
6683/**
6684 * igb_io_error_detected - called when PCI error is detected
6685 * @pdev: Pointer to PCI device
6686 * @state: The current pci connection state
6687 *
6688 * This function is called after a PCI bus error affecting
6689 * this device has been detected.
6690 */
6691static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
6692					      pci_channel_state_t state)
6693{
6694	struct net_device *netdev = pci_get_drvdata(pdev);
6695	struct igb_adapter *adapter = netdev_priv(netdev);
6696
6697	netif_device_detach(netdev);
6698
6699	if (state == pci_channel_io_perm_failure)
6700		return PCI_ERS_RESULT_DISCONNECT;
6701
6702	if (netif_running(netdev))
6703		igb_down(adapter);
6704	pci_disable_device(pdev);
6705
6706	/* Request a slot slot reset. */
6707	return PCI_ERS_RESULT_NEED_RESET;
6708}
6709
6710/**
6711 * igb_io_slot_reset - called after the pci bus has been reset.
6712 * @pdev: Pointer to PCI device
6713 *
6714 * Restart the card from scratch, as if from a cold-boot. Implementation
6715 * resembles the first-half of the igb_resume routine.
6716 */
6717static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
6718{
6719	struct net_device *netdev = pci_get_drvdata(pdev);
6720	struct igb_adapter *adapter = netdev_priv(netdev);
6721	struct e1000_hw *hw = &adapter->hw;
6722	pci_ers_result_t result;
6723	int err;
6724
6725	if (pci_enable_device_mem(pdev)) {
6726		dev_err(&pdev->dev,
6727			"Cannot re-enable PCI device after reset.\n");
6728		result = PCI_ERS_RESULT_DISCONNECT;
6729	} else {
6730		pci_set_master(pdev);
6731		pci_restore_state(pdev);
6732		pci_save_state(pdev);
6733
6734		pci_enable_wake(pdev, PCI_D3hot, 0);
6735		pci_enable_wake(pdev, PCI_D3cold, 0);
6736
6737		igb_reset(adapter);
6738		wr32(E1000_WUS, ~0);
6739		result = PCI_ERS_RESULT_RECOVERED;
6740	}
6741
6742	err = pci_cleanup_aer_uncorrect_error_status(pdev);
6743	if (err) {
6744		dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
6745		        "failed 0x%0x\n", err);
6746		/* non-fatal, continue */
6747	}
6748
6749	return result;
6750}
6751
6752/**
6753 * igb_io_resume - called when traffic can start flowing again.
6754 * @pdev: Pointer to PCI device
6755 *
6756 * This callback is called when the error recovery driver tells us that
6757 * its OK to resume normal operation. Implementation resembles the
6758 * second-half of the igb_resume routine.
6759 */
6760static void igb_io_resume(struct pci_dev *pdev)
6761{
6762	struct net_device *netdev = pci_get_drvdata(pdev);
6763	struct igb_adapter *adapter = netdev_priv(netdev);
6764
6765	if (netif_running(netdev)) {
6766		if (igb_up(adapter)) {
6767			dev_err(&pdev->dev, "igb_up failed after reset\n");
6768			return;
6769		}
6770	}
6771
6772	netif_device_attach(netdev);
6773
6774	/* let the f/w know that the h/w is now under the control of the
6775	 * driver. */
6776	igb_get_hw_control(adapter);
6777}
6778
6779static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
6780                             u8 qsel)
6781{
6782	u32 rar_low, rar_high;
6783	struct e1000_hw *hw = &adapter->hw;
6784
6785	/* HW expects these in little endian so we reverse the byte order
6786	 * from network order (big endian) to little endian
6787	 */
6788	rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
6789	          ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
6790	rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
6791
6792	/* Indicate to hardware the Address is Valid. */
6793	rar_high |= E1000_RAH_AV;
6794
6795	if (hw->mac.type == e1000_82575)
6796		rar_high |= E1000_RAH_POOL_1 * qsel;
6797	else
6798		rar_high |= E1000_RAH_POOL_1 << qsel;
6799
6800	wr32(E1000_RAL(index), rar_low);
6801	wrfl();
6802	wr32(E1000_RAH(index), rar_high);
6803	wrfl();
6804}
6805
6806static int igb_set_vf_mac(struct igb_adapter *adapter,
6807                          int vf, unsigned char *mac_addr)
6808{
6809	struct e1000_hw *hw = &adapter->hw;
6810	/* VF MAC addresses start at end of receive addresses and moves
6811	 * torwards the first, as a result a collision should not be possible */
6812	int rar_entry = hw->mac.rar_entry_count - (vf + 1);
6813
6814	memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
6815
6816	igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
6817
6818	return 0;
6819}
6820
6821static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
6822{
6823	struct igb_adapter *adapter = netdev_priv(netdev);
6824	if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
6825		return -EINVAL;
6826	adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
6827	dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
6828	dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
6829				      " change effective.");
6830	if (test_bit(__IGB_DOWN, &adapter->state)) {
6831		dev_warn(&adapter->pdev->dev, "The VF MAC address has been set,"
6832			 " but the PF device is not up.\n");
6833		dev_warn(&adapter->pdev->dev, "Bring the PF device up before"
6834			 " attempting to use the VF device.\n");
6835	}
6836	return igb_set_vf_mac(adapter, vf, mac);
6837}
6838
6839static int igb_link_mbps(int internal_link_speed)
6840{
6841	switch (internal_link_speed) {
6842	case SPEED_100:
6843		return 100;
6844	case SPEED_1000:
6845		return 1000;
6846	default:
6847		return 0;
6848	}
6849}
6850
6851static void igb_set_vf_rate_limit(struct e1000_hw *hw, int vf, int tx_rate,
6852				  int link_speed)
6853{
6854	int rf_dec, rf_int;
6855	u32 bcnrc_val;
6856
6857	if (tx_rate != 0) {
6858		/* Calculate the rate factor values to set */
6859		rf_int = link_speed / tx_rate;
6860		rf_dec = (link_speed - (rf_int * tx_rate));
6861		rf_dec = (rf_dec * (1<<E1000_RTTBCNRC_RF_INT_SHIFT)) / tx_rate;
6862
6863		bcnrc_val = E1000_RTTBCNRC_RS_ENA;
6864		bcnrc_val |= ((rf_int<<E1000_RTTBCNRC_RF_INT_SHIFT) &
6865		               E1000_RTTBCNRC_RF_INT_MASK);
6866		bcnrc_val |= (rf_dec & E1000_RTTBCNRC_RF_DEC_MASK);
6867	} else {
6868		bcnrc_val = 0;
6869	}
6870
6871	wr32(E1000_RTTDQSEL, vf); /* vf X uses queue X */
6872	wr32(E1000_RTTBCNRC, bcnrc_val);
6873}
6874
6875static void igb_check_vf_rate_limit(struct igb_adapter *adapter)
6876{
6877	int actual_link_speed, i;
6878	bool reset_rate = false;
6879
6880	/* VF TX rate limit was not set or not supported */
6881	if ((adapter->vf_rate_link_speed == 0) ||
6882	    (adapter->hw.mac.type != e1000_82576))
6883		return;
6884
6885	actual_link_speed = igb_link_mbps(adapter->link_speed);
6886	if (actual_link_speed != adapter->vf_rate_link_speed) {
6887		reset_rate = true;
6888		adapter->vf_rate_link_speed = 0;
6889		dev_info(&adapter->pdev->dev,
6890		         "Link speed has been changed. VF Transmit "
6891		         "rate is disabled\n");
6892	}
6893
6894	for (i = 0; i < adapter->vfs_allocated_count; i++) {
6895		if (reset_rate)
6896			adapter->vf_data[i].tx_rate = 0;
6897
6898		igb_set_vf_rate_limit(&adapter->hw, i,
6899		                      adapter->vf_data[i].tx_rate,
6900		                      actual_link_speed);
6901	}
6902}
6903
6904static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
6905{
6906	struct igb_adapter *adapter = netdev_priv(netdev);
6907	struct e1000_hw *hw = &adapter->hw;
6908	int actual_link_speed;
6909
6910	if (hw->mac.type != e1000_82576)
6911		return -EOPNOTSUPP;
6912
6913	actual_link_speed = igb_link_mbps(adapter->link_speed);
6914	if ((vf >= adapter->vfs_allocated_count) ||
6915	    (!(rd32(E1000_STATUS) & E1000_STATUS_LU)) ||
6916	    (tx_rate < 0) || (tx_rate > actual_link_speed))
6917		return -EINVAL;
6918
6919	adapter->vf_rate_link_speed = actual_link_speed;
6920	adapter->vf_data[vf].tx_rate = (u16)tx_rate;
6921	igb_set_vf_rate_limit(hw, vf, tx_rate, actual_link_speed);
6922
6923	return 0;
6924}
6925
6926static int igb_ndo_get_vf_config(struct net_device *netdev,
6927				 int vf, struct ifla_vf_info *ivi)
6928{
6929	struct igb_adapter *adapter = netdev_priv(netdev);
6930	if (vf >= adapter->vfs_allocated_count)
6931		return -EINVAL;
6932	ivi->vf = vf;
6933	memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
6934	ivi->tx_rate = adapter->vf_data[vf].tx_rate;
6935	ivi->vlan = adapter->vf_data[vf].pf_vlan;
6936	ivi->qos = adapter->vf_data[vf].pf_qos;
6937	return 0;
6938}
6939
6940static void igb_vmm_control(struct igb_adapter *adapter)
6941{
6942	struct e1000_hw *hw = &adapter->hw;
6943	u32 reg;
6944
6945	switch (hw->mac.type) {
6946	case e1000_82575:
6947	default:
6948		/* replication is not supported for 82575 */
6949		return;
6950	case e1000_82576:
6951		/* notify HW that the MAC is adding vlan tags */
6952		reg = rd32(E1000_DTXCTL);
6953		reg |= E1000_DTXCTL_VLAN_ADDED;
6954		wr32(E1000_DTXCTL, reg);
6955	case e1000_82580:
6956		/* enable replication vlan tag stripping */
6957		reg = rd32(E1000_RPLOLR);
6958		reg |= E1000_RPLOLR_STRVLAN;
6959		wr32(E1000_RPLOLR, reg);
6960	case e1000_i350:
6961		/* none of the above registers are supported by i350 */
6962		break;
6963	}
6964
6965	if (adapter->vfs_allocated_count) {
6966		igb_vmdq_set_loopback_pf(hw, true);
6967		igb_vmdq_set_replication_pf(hw, true);
6968		igb_vmdq_set_anti_spoofing_pf(hw, true,
6969						adapter->vfs_allocated_count);
6970	} else {
6971		igb_vmdq_set_loopback_pf(hw, false);
6972		igb_vmdq_set_replication_pf(hw, false);
6973	}
6974}
6975
6976/* igb_main.c */
6977