igb_main.c revision 9ab64ba3c74540cfb8716232834df486bcc6120d
1/*******************************************************************************
2
3  Intel(R) Gigabit Ethernet Linux driver
4  Copyright(c) 2007-2011 Intel Corporation.
5
6  This program is free software; you can redistribute it and/or modify it
7  under the terms and conditions of the GNU General Public License,
8  version 2, as published by the Free Software Foundation.
9
10  This program is distributed in the hope it will be useful, but WITHOUT
11  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13  more details.
14
15  You should have received a copy of the GNU General Public License along with
16  this program; if not, write to the Free Software Foundation, Inc.,
17  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18
19  The full GNU General Public License is included in this distribution in
20  the file called "COPYING".
21
22  Contact Information:
23  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
25
26*******************************************************************************/
27
28#include <linux/module.h>
29#include <linux/types.h>
30#include <linux/init.h>
31#include <linux/bitops.h>
32#include <linux/vmalloc.h>
33#include <linux/pagemap.h>
34#include <linux/netdevice.h>
35#include <linux/ipv6.h>
36#include <linux/slab.h>
37#include <net/checksum.h>
38#include <net/ip6_checksum.h>
39#include <linux/net_tstamp.h>
40#include <linux/mii.h>
41#include <linux/ethtool.h>
42#include <linux/if.h>
43#include <linux/if_vlan.h>
44#include <linux/pci.h>
45#include <linux/pci-aspm.h>
46#include <linux/delay.h>
47#include <linux/interrupt.h>
48#include <linux/ip.h>
49#include <linux/tcp.h>
50#include <linux/sctp.h>
51#include <linux/if_ether.h>
52#include <linux/aer.h>
53#include <linux/prefetch.h>
54#ifdef CONFIG_IGB_DCA
55#include <linux/dca.h>
56#endif
57#include "igb.h"
58
59#define MAJ 3
60#define MIN 0
61#define BUILD 6
62#define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \
63__stringify(BUILD) "-k"
64char igb_driver_name[] = "igb";
65char igb_driver_version[] = DRV_VERSION;
66static const char igb_driver_string[] =
67				"Intel(R) Gigabit Ethernet Network Driver";
68static const char igb_copyright[] = "Copyright (c) 2007-2011 Intel Corporation.";
69
70static const struct e1000_info *igb_info_tbl[] = {
71	[board_82575] = &e1000_82575_info,
72};
73
74static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
75	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER), board_82575 },
76	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER), board_82575 },
77	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES), board_82575 },
78	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII), board_82575 },
79	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
80	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
81	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_QUAD_FIBER), board_82575 },
82	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
83	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
84	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
85	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SGMII), board_82575 },
86	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SERDES), board_82575 },
87	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_BACKPLANE), board_82575 },
88	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SFP), board_82575 },
89	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
90	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
91	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
92	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
93	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
94	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
95	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2), board_82575 },
96	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
97	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
98	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
99	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
100	/* required last entry */
101	{0, }
102};
103
104MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
105
106void igb_reset(struct igb_adapter *);
107static int igb_setup_all_tx_resources(struct igb_adapter *);
108static int igb_setup_all_rx_resources(struct igb_adapter *);
109static void igb_free_all_tx_resources(struct igb_adapter *);
110static void igb_free_all_rx_resources(struct igb_adapter *);
111static void igb_setup_mrqc(struct igb_adapter *);
112static int igb_probe(struct pci_dev *, const struct pci_device_id *);
113static void __devexit igb_remove(struct pci_dev *pdev);
114static void igb_init_hw_timer(struct igb_adapter *adapter);
115static int igb_sw_init(struct igb_adapter *);
116static int igb_open(struct net_device *);
117static int igb_close(struct net_device *);
118static void igb_configure_tx(struct igb_adapter *);
119static void igb_configure_rx(struct igb_adapter *);
120static void igb_clean_all_tx_rings(struct igb_adapter *);
121static void igb_clean_all_rx_rings(struct igb_adapter *);
122static void igb_clean_tx_ring(struct igb_ring *);
123static void igb_clean_rx_ring(struct igb_ring *);
124static void igb_set_rx_mode(struct net_device *);
125static void igb_update_phy_info(unsigned long);
126static void igb_watchdog(unsigned long);
127static void igb_watchdog_task(struct work_struct *);
128static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, struct net_device *);
129static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *dev,
130						 struct rtnl_link_stats64 *stats);
131static int igb_change_mtu(struct net_device *, int);
132static int igb_set_mac(struct net_device *, void *);
133static void igb_set_uta(struct igb_adapter *adapter);
134static irqreturn_t igb_intr(int irq, void *);
135static irqreturn_t igb_intr_msi(int irq, void *);
136static irqreturn_t igb_msix_other(int irq, void *);
137static irqreturn_t igb_msix_ring(int irq, void *);
138#ifdef CONFIG_IGB_DCA
139static void igb_update_dca(struct igb_q_vector *);
140static void igb_setup_dca(struct igb_adapter *);
141#endif /* CONFIG_IGB_DCA */
142static int igb_poll(struct napi_struct *, int);
143static bool igb_clean_tx_irq(struct igb_q_vector *);
144static bool igb_clean_rx_irq(struct igb_q_vector *, int);
145static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
146static void igb_tx_timeout(struct net_device *);
147static void igb_reset_task(struct work_struct *);
148static void igb_vlan_mode(struct net_device *netdev, u32 features);
149static void igb_vlan_rx_add_vid(struct net_device *, u16);
150static void igb_vlan_rx_kill_vid(struct net_device *, u16);
151static void igb_restore_vlan(struct igb_adapter *);
152static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
153static void igb_ping_all_vfs(struct igb_adapter *);
154static void igb_msg_task(struct igb_adapter *);
155static void igb_vmm_control(struct igb_adapter *);
156static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
157static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
158static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
159static int igb_ndo_set_vf_vlan(struct net_device *netdev,
160			       int vf, u16 vlan, u8 qos);
161static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
162static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
163				 struct ifla_vf_info *ivi);
164static void igb_check_vf_rate_limit(struct igb_adapter *);
165
166#ifdef CONFIG_PM
167static int igb_suspend(struct pci_dev *, pm_message_t);
168static int igb_resume(struct pci_dev *);
169#endif
170static void igb_shutdown(struct pci_dev *);
171#ifdef CONFIG_IGB_DCA
172static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
173static struct notifier_block dca_notifier = {
174	.notifier_call	= igb_notify_dca,
175	.next		= NULL,
176	.priority	= 0
177};
178#endif
179#ifdef CONFIG_NET_POLL_CONTROLLER
180/* for netdump / net console */
181static void igb_netpoll(struct net_device *);
182#endif
183#ifdef CONFIG_PCI_IOV
184static unsigned int max_vfs = 0;
185module_param(max_vfs, uint, 0);
186MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
187                 "per physical function");
188#endif /* CONFIG_PCI_IOV */
189
190static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
191		     pci_channel_state_t);
192static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
193static void igb_io_resume(struct pci_dev *);
194
195static struct pci_error_handlers igb_err_handler = {
196	.error_detected = igb_io_error_detected,
197	.slot_reset = igb_io_slot_reset,
198	.resume = igb_io_resume,
199};
200
201
202static struct pci_driver igb_driver = {
203	.name     = igb_driver_name,
204	.id_table = igb_pci_tbl,
205	.probe    = igb_probe,
206	.remove   = __devexit_p(igb_remove),
207#ifdef CONFIG_PM
208	/* Power Management Hooks */
209	.suspend  = igb_suspend,
210	.resume   = igb_resume,
211#endif
212	.shutdown = igb_shutdown,
213	.err_handler = &igb_err_handler
214};
215
216MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
217MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
218MODULE_LICENSE("GPL");
219MODULE_VERSION(DRV_VERSION);
220
221struct igb_reg_info {
222	u32 ofs;
223	char *name;
224};
225
226static const struct igb_reg_info igb_reg_info_tbl[] = {
227
228	/* General Registers */
229	{E1000_CTRL, "CTRL"},
230	{E1000_STATUS, "STATUS"},
231	{E1000_CTRL_EXT, "CTRL_EXT"},
232
233	/* Interrupt Registers */
234	{E1000_ICR, "ICR"},
235
236	/* RX Registers */
237	{E1000_RCTL, "RCTL"},
238	{E1000_RDLEN(0), "RDLEN"},
239	{E1000_RDH(0), "RDH"},
240	{E1000_RDT(0), "RDT"},
241	{E1000_RXDCTL(0), "RXDCTL"},
242	{E1000_RDBAL(0), "RDBAL"},
243	{E1000_RDBAH(0), "RDBAH"},
244
245	/* TX Registers */
246	{E1000_TCTL, "TCTL"},
247	{E1000_TDBAL(0), "TDBAL"},
248	{E1000_TDBAH(0), "TDBAH"},
249	{E1000_TDLEN(0), "TDLEN"},
250	{E1000_TDH(0), "TDH"},
251	{E1000_TDT(0), "TDT"},
252	{E1000_TXDCTL(0), "TXDCTL"},
253	{E1000_TDFH, "TDFH"},
254	{E1000_TDFT, "TDFT"},
255	{E1000_TDFHS, "TDFHS"},
256	{E1000_TDFPC, "TDFPC"},
257
258	/* List Terminator */
259	{}
260};
261
262/*
263 * igb_regdump - register printout routine
264 */
265static void igb_regdump(struct e1000_hw *hw, struct igb_reg_info *reginfo)
266{
267	int n = 0;
268	char rname[16];
269	u32 regs[8];
270
271	switch (reginfo->ofs) {
272	case E1000_RDLEN(0):
273		for (n = 0; n < 4; n++)
274			regs[n] = rd32(E1000_RDLEN(n));
275		break;
276	case E1000_RDH(0):
277		for (n = 0; n < 4; n++)
278			regs[n] = rd32(E1000_RDH(n));
279		break;
280	case E1000_RDT(0):
281		for (n = 0; n < 4; n++)
282			regs[n] = rd32(E1000_RDT(n));
283		break;
284	case E1000_RXDCTL(0):
285		for (n = 0; n < 4; n++)
286			regs[n] = rd32(E1000_RXDCTL(n));
287		break;
288	case E1000_RDBAL(0):
289		for (n = 0; n < 4; n++)
290			regs[n] = rd32(E1000_RDBAL(n));
291		break;
292	case E1000_RDBAH(0):
293		for (n = 0; n < 4; n++)
294			regs[n] = rd32(E1000_RDBAH(n));
295		break;
296	case E1000_TDBAL(0):
297		for (n = 0; n < 4; n++)
298			regs[n] = rd32(E1000_RDBAL(n));
299		break;
300	case E1000_TDBAH(0):
301		for (n = 0; n < 4; n++)
302			regs[n] = rd32(E1000_TDBAH(n));
303		break;
304	case E1000_TDLEN(0):
305		for (n = 0; n < 4; n++)
306			regs[n] = rd32(E1000_TDLEN(n));
307		break;
308	case E1000_TDH(0):
309		for (n = 0; n < 4; n++)
310			regs[n] = rd32(E1000_TDH(n));
311		break;
312	case E1000_TDT(0):
313		for (n = 0; n < 4; n++)
314			regs[n] = rd32(E1000_TDT(n));
315		break;
316	case E1000_TXDCTL(0):
317		for (n = 0; n < 4; n++)
318			regs[n] = rd32(E1000_TXDCTL(n));
319		break;
320	default:
321		printk(KERN_INFO "%-15s %08x\n",
322			reginfo->name, rd32(reginfo->ofs));
323		return;
324	}
325
326	snprintf(rname, 16, "%s%s", reginfo->name, "[0-3]");
327	printk(KERN_INFO "%-15s ", rname);
328	for (n = 0; n < 4; n++)
329		printk(KERN_CONT "%08x ", regs[n]);
330	printk(KERN_CONT "\n");
331}
332
333/*
334 * igb_dump - Print registers, tx-rings and rx-rings
335 */
336static void igb_dump(struct igb_adapter *adapter)
337{
338	struct net_device *netdev = adapter->netdev;
339	struct e1000_hw *hw = &adapter->hw;
340	struct igb_reg_info *reginfo;
341	struct igb_ring *tx_ring;
342	union e1000_adv_tx_desc *tx_desc;
343	struct my_u0 { u64 a; u64 b; } *u0;
344	struct igb_ring *rx_ring;
345	union e1000_adv_rx_desc *rx_desc;
346	u32 staterr;
347	u16 i, n;
348
349	if (!netif_msg_hw(adapter))
350		return;
351
352	/* Print netdevice Info */
353	if (netdev) {
354		dev_info(&adapter->pdev->dev, "Net device Info\n");
355		printk(KERN_INFO "Device Name     state            "
356			"trans_start      last_rx\n");
357		printk(KERN_INFO "%-15s %016lX %016lX %016lX\n",
358		netdev->name,
359		netdev->state,
360		netdev->trans_start,
361		netdev->last_rx);
362	}
363
364	/* Print Registers */
365	dev_info(&adapter->pdev->dev, "Register Dump\n");
366	printk(KERN_INFO " Register Name   Value\n");
367	for (reginfo = (struct igb_reg_info *)igb_reg_info_tbl;
368	     reginfo->name; reginfo++) {
369		igb_regdump(hw, reginfo);
370	}
371
372	/* Print TX Ring Summary */
373	if (!netdev || !netif_running(netdev))
374		goto exit;
375
376	dev_info(&adapter->pdev->dev, "TX Rings Summary\n");
377	printk(KERN_INFO "Queue [NTU] [NTC] [bi(ntc)->dma  ]"
378		" leng ntw timestamp\n");
379	for (n = 0; n < adapter->num_tx_queues; n++) {
380		struct igb_tx_buffer *buffer_info;
381		tx_ring = adapter->tx_ring[n];
382		buffer_info = &tx_ring->tx_buffer_info[tx_ring->next_to_clean];
383		printk(KERN_INFO " %5d %5X %5X %016llX %04X %p %016llX\n",
384			   n, tx_ring->next_to_use, tx_ring->next_to_clean,
385			   (u64)buffer_info->dma,
386			   buffer_info->length,
387			   buffer_info->next_to_watch,
388			   (u64)buffer_info->time_stamp);
389	}
390
391	/* Print TX Rings */
392	if (!netif_msg_tx_done(adapter))
393		goto rx_ring_summary;
394
395	dev_info(&adapter->pdev->dev, "TX Rings Dump\n");
396
397	/* Transmit Descriptor Formats
398	 *
399	 * Advanced Transmit Descriptor
400	 *   +--------------------------------------------------------------+
401	 * 0 |         Buffer Address [63:0]                                |
402	 *   +--------------------------------------------------------------+
403	 * 8 | PAYLEN  | PORTS  |CC|IDX | STA | DCMD  |DTYP|MAC|RSV| DTALEN |
404	 *   +--------------------------------------------------------------+
405	 *   63      46 45    40 39 38 36 35 32 31   24             15       0
406	 */
407
408	for (n = 0; n < adapter->num_tx_queues; n++) {
409		tx_ring = adapter->tx_ring[n];
410		printk(KERN_INFO "------------------------------------\n");
411		printk(KERN_INFO "TX QUEUE INDEX = %d\n", tx_ring->queue_index);
412		printk(KERN_INFO "------------------------------------\n");
413		printk(KERN_INFO "T [desc]     [address 63:0  ] "
414			"[PlPOCIStDDM Ln] [bi->dma       ] "
415			"leng  ntw timestamp        bi->skb\n");
416
417		for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) {
418			struct igb_tx_buffer *buffer_info;
419			tx_desc = IGB_TX_DESC(tx_ring, i);
420			buffer_info = &tx_ring->tx_buffer_info[i];
421			u0 = (struct my_u0 *)tx_desc;
422			printk(KERN_INFO "T [0x%03X]    %016llX %016llX %016llX"
423				" %04X  %p %016llX %p", i,
424				le64_to_cpu(u0->a),
425				le64_to_cpu(u0->b),
426				(u64)buffer_info->dma,
427				buffer_info->length,
428				buffer_info->next_to_watch,
429				(u64)buffer_info->time_stamp,
430				buffer_info->skb);
431			if (i == tx_ring->next_to_use &&
432				i == tx_ring->next_to_clean)
433				printk(KERN_CONT " NTC/U\n");
434			else if (i == tx_ring->next_to_use)
435				printk(KERN_CONT " NTU\n");
436			else if (i == tx_ring->next_to_clean)
437				printk(KERN_CONT " NTC\n");
438			else
439				printk(KERN_CONT "\n");
440
441			if (netif_msg_pktdata(adapter) && buffer_info->dma != 0)
442				print_hex_dump(KERN_INFO, "",
443					DUMP_PREFIX_ADDRESS,
444					16, 1, phys_to_virt(buffer_info->dma),
445					buffer_info->length, true);
446		}
447	}
448
449	/* Print RX Rings Summary */
450rx_ring_summary:
451	dev_info(&adapter->pdev->dev, "RX Rings Summary\n");
452	printk(KERN_INFO "Queue [NTU] [NTC]\n");
453	for (n = 0; n < adapter->num_rx_queues; n++) {
454		rx_ring = adapter->rx_ring[n];
455		printk(KERN_INFO " %5d %5X %5X\n", n,
456			   rx_ring->next_to_use, rx_ring->next_to_clean);
457	}
458
459	/* Print RX Rings */
460	if (!netif_msg_rx_status(adapter))
461		goto exit;
462
463	dev_info(&adapter->pdev->dev, "RX Rings Dump\n");
464
465	/* Advanced Receive Descriptor (Read) Format
466	 *    63                                           1        0
467	 *    +-----------------------------------------------------+
468	 *  0 |       Packet Buffer Address [63:1]           |A0/NSE|
469	 *    +----------------------------------------------+------+
470	 *  8 |       Header Buffer Address [63:1]           |  DD  |
471	 *    +-----------------------------------------------------+
472	 *
473	 *
474	 * Advanced Receive Descriptor (Write-Back) Format
475	 *
476	 *   63       48 47    32 31  30      21 20 17 16   4 3     0
477	 *   +------------------------------------------------------+
478	 * 0 | Packet     IP     |SPH| HDR_LEN   | RSV|Packet|  RSS |
479	 *   | Checksum   Ident  |   |           |    | Type | Type |
480	 *   +------------------------------------------------------+
481	 * 8 | VLAN Tag | Length | Extended Error | Extended Status |
482	 *   +------------------------------------------------------+
483	 *   63       48 47    32 31            20 19               0
484	 */
485
486	for (n = 0; n < adapter->num_rx_queues; n++) {
487		rx_ring = adapter->rx_ring[n];
488		printk(KERN_INFO "------------------------------------\n");
489		printk(KERN_INFO "RX QUEUE INDEX = %d\n", rx_ring->queue_index);
490		printk(KERN_INFO "------------------------------------\n");
491		printk(KERN_INFO "R  [desc]      [ PktBuf     A0] "
492			"[  HeadBuf   DD] [bi->dma       ] [bi->skb] "
493			"<-- Adv Rx Read format\n");
494		printk(KERN_INFO "RWB[desc]      [PcsmIpSHl PtRs] "
495			"[vl er S cks ln] ---------------- [bi->skb] "
496			"<-- Adv Rx Write-Back format\n");
497
498		for (i = 0; i < rx_ring->count; i++) {
499			struct igb_rx_buffer *buffer_info;
500			buffer_info = &rx_ring->rx_buffer_info[i];
501			rx_desc = IGB_RX_DESC(rx_ring, i);
502			u0 = (struct my_u0 *)rx_desc;
503			staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
504			if (staterr & E1000_RXD_STAT_DD) {
505				/* Descriptor Done */
506				printk(KERN_INFO "RWB[0x%03X]     %016llX "
507					"%016llX ---------------- %p", i,
508					le64_to_cpu(u0->a),
509					le64_to_cpu(u0->b),
510					buffer_info->skb);
511			} else {
512				printk(KERN_INFO "R  [0x%03X]     %016llX "
513					"%016llX %016llX %p", i,
514					le64_to_cpu(u0->a),
515					le64_to_cpu(u0->b),
516					(u64)buffer_info->dma,
517					buffer_info->skb);
518
519				if (netif_msg_pktdata(adapter)) {
520					print_hex_dump(KERN_INFO, "",
521						DUMP_PREFIX_ADDRESS,
522						16, 1,
523						phys_to_virt(buffer_info->dma),
524						IGB_RX_HDR_LEN, true);
525					print_hex_dump(KERN_INFO, "",
526					  DUMP_PREFIX_ADDRESS,
527					  16, 1,
528					  phys_to_virt(
529					    buffer_info->page_dma +
530					    buffer_info->page_offset),
531					  PAGE_SIZE/2, true);
532				}
533			}
534
535			if (i == rx_ring->next_to_use)
536				printk(KERN_CONT " NTU\n");
537			else if (i == rx_ring->next_to_clean)
538				printk(KERN_CONT " NTC\n");
539			else
540				printk(KERN_CONT "\n");
541
542		}
543	}
544
545exit:
546	return;
547}
548
549
550/**
551 * igb_read_clock - read raw cycle counter (to be used by time counter)
552 */
553static cycle_t igb_read_clock(const struct cyclecounter *tc)
554{
555	struct igb_adapter *adapter =
556		container_of(tc, struct igb_adapter, cycles);
557	struct e1000_hw *hw = &adapter->hw;
558	u64 stamp = 0;
559	int shift = 0;
560
561	/*
562	 * The timestamp latches on lowest register read. For the 82580
563	 * the lowest register is SYSTIMR instead of SYSTIML.  However we never
564	 * adjusted TIMINCA so SYSTIMR will just read as all 0s so ignore it.
565	 */
566	if (hw->mac.type >= e1000_82580) {
567		stamp = rd32(E1000_SYSTIMR) >> 8;
568		shift = IGB_82580_TSYNC_SHIFT;
569	}
570
571	stamp |= (u64)rd32(E1000_SYSTIML) << shift;
572	stamp |= (u64)rd32(E1000_SYSTIMH) << (shift + 32);
573	return stamp;
574}
575
576/**
577 * igb_get_hw_dev - return device
578 * used by hardware layer to print debugging information
579 **/
580struct net_device *igb_get_hw_dev(struct e1000_hw *hw)
581{
582	struct igb_adapter *adapter = hw->back;
583	return adapter->netdev;
584}
585
586/**
587 * igb_init_module - Driver Registration Routine
588 *
589 * igb_init_module is the first routine called when the driver is
590 * loaded. All it does is register with the PCI subsystem.
591 **/
592static int __init igb_init_module(void)
593{
594	int ret;
595	printk(KERN_INFO "%s - version %s\n",
596	       igb_driver_string, igb_driver_version);
597
598	printk(KERN_INFO "%s\n", igb_copyright);
599
600#ifdef CONFIG_IGB_DCA
601	dca_register_notify(&dca_notifier);
602#endif
603	ret = pci_register_driver(&igb_driver);
604	return ret;
605}
606
607module_init(igb_init_module);
608
609/**
610 * igb_exit_module - Driver Exit Cleanup Routine
611 *
612 * igb_exit_module is called just before the driver is removed
613 * from memory.
614 **/
615static void __exit igb_exit_module(void)
616{
617#ifdef CONFIG_IGB_DCA
618	dca_unregister_notify(&dca_notifier);
619#endif
620	pci_unregister_driver(&igb_driver);
621}
622
623module_exit(igb_exit_module);
624
625#define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
626/**
627 * igb_cache_ring_register - Descriptor ring to register mapping
628 * @adapter: board private structure to initialize
629 *
630 * Once we know the feature-set enabled for the device, we'll cache
631 * the register offset the descriptor ring is assigned to.
632 **/
633static void igb_cache_ring_register(struct igb_adapter *adapter)
634{
635	int i = 0, j = 0;
636	u32 rbase_offset = adapter->vfs_allocated_count;
637
638	switch (adapter->hw.mac.type) {
639	case e1000_82576:
640		/* The queues are allocated for virtualization such that VF 0
641		 * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
642		 * In order to avoid collision we start at the first free queue
643		 * and continue consuming queues in the same sequence
644		 */
645		if (adapter->vfs_allocated_count) {
646			for (; i < adapter->rss_queues; i++)
647				adapter->rx_ring[i]->reg_idx = rbase_offset +
648				                               Q_IDX_82576(i);
649		}
650	case e1000_82575:
651	case e1000_82580:
652	case e1000_i350:
653	default:
654		for (; i < adapter->num_rx_queues; i++)
655			adapter->rx_ring[i]->reg_idx = rbase_offset + i;
656		for (; j < adapter->num_tx_queues; j++)
657			adapter->tx_ring[j]->reg_idx = rbase_offset + j;
658		break;
659	}
660}
661
662static void igb_free_queues(struct igb_adapter *adapter)
663{
664	int i;
665
666	for (i = 0; i < adapter->num_tx_queues; i++) {
667		kfree(adapter->tx_ring[i]);
668		adapter->tx_ring[i] = NULL;
669	}
670	for (i = 0; i < adapter->num_rx_queues; i++) {
671		kfree(adapter->rx_ring[i]);
672		adapter->rx_ring[i] = NULL;
673	}
674	adapter->num_rx_queues = 0;
675	adapter->num_tx_queues = 0;
676}
677
678/**
679 * igb_alloc_queues - Allocate memory for all rings
680 * @adapter: board private structure to initialize
681 *
682 * We allocate one ring per queue at run-time since we don't know the
683 * number of queues at compile-time.
684 **/
685static int igb_alloc_queues(struct igb_adapter *adapter)
686{
687	struct igb_ring *ring;
688	int i;
689	int orig_node = adapter->node;
690
691	for (i = 0; i < adapter->num_tx_queues; i++) {
692		if (orig_node == -1) {
693			int cur_node = next_online_node(adapter->node);
694			if (cur_node == MAX_NUMNODES)
695				cur_node = first_online_node;
696			adapter->node = cur_node;
697		}
698		ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL,
699				    adapter->node);
700		if (!ring)
701			ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
702		if (!ring)
703			goto err;
704		ring->count = adapter->tx_ring_count;
705		ring->queue_index = i;
706		ring->dev = &adapter->pdev->dev;
707		ring->netdev = adapter->netdev;
708		ring->numa_node = adapter->node;
709		/* For 82575, context index must be unique per ring. */
710		if (adapter->hw.mac.type == e1000_82575)
711			set_bit(IGB_RING_FLAG_TX_CTX_IDX, &ring->flags);
712		adapter->tx_ring[i] = ring;
713	}
714	/* Restore the adapter's original node */
715	adapter->node = orig_node;
716
717	for (i = 0; i < adapter->num_rx_queues; i++) {
718		if (orig_node == -1) {
719			int cur_node = next_online_node(adapter->node);
720			if (cur_node == MAX_NUMNODES)
721				cur_node = first_online_node;
722			adapter->node = cur_node;
723		}
724		ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL,
725				    adapter->node);
726		if (!ring)
727			ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
728		if (!ring)
729			goto err;
730		ring->count = adapter->rx_ring_count;
731		ring->queue_index = i;
732		ring->dev = &adapter->pdev->dev;
733		ring->netdev = adapter->netdev;
734		ring->numa_node = adapter->node;
735		/* set flag indicating ring supports SCTP checksum offload */
736		if (adapter->hw.mac.type >= e1000_82576)
737			set_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags);
738		adapter->rx_ring[i] = ring;
739	}
740	/* Restore the adapter's original node */
741	adapter->node = orig_node;
742
743	igb_cache_ring_register(adapter);
744
745	return 0;
746
747err:
748	/* Restore the adapter's original node */
749	adapter->node = orig_node;
750	igb_free_queues(adapter);
751
752	return -ENOMEM;
753}
754
755/**
756 *  igb_write_ivar - configure ivar for given MSI-X vector
757 *  @hw: pointer to the HW structure
758 *  @msix_vector: vector number we are allocating to a given ring
759 *  @index: row index of IVAR register to write within IVAR table
760 *  @offset: column offset of in IVAR, should be multiple of 8
761 *
762 *  This function is intended to handle the writing of the IVAR register
763 *  for adapters 82576 and newer.  The IVAR table consists of 2 columns,
764 *  each containing an cause allocation for an Rx and Tx ring, and a
765 *  variable number of rows depending on the number of queues supported.
766 **/
767static void igb_write_ivar(struct e1000_hw *hw, int msix_vector,
768			   int index, int offset)
769{
770	u32 ivar = array_rd32(E1000_IVAR0, index);
771
772	/* clear any bits that are currently set */
773	ivar &= ~((u32)0xFF << offset);
774
775	/* write vector and valid bit */
776	ivar |= (msix_vector | E1000_IVAR_VALID) << offset;
777
778	array_wr32(E1000_IVAR0, index, ivar);
779}
780
781#define IGB_N0_QUEUE -1
782static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
783{
784	struct igb_adapter *adapter = q_vector->adapter;
785	struct e1000_hw *hw = &adapter->hw;
786	int rx_queue = IGB_N0_QUEUE;
787	int tx_queue = IGB_N0_QUEUE;
788	u32 msixbm = 0;
789
790	if (q_vector->rx.ring)
791		rx_queue = q_vector->rx.ring->reg_idx;
792	if (q_vector->tx.ring)
793		tx_queue = q_vector->tx.ring->reg_idx;
794
795	switch (hw->mac.type) {
796	case e1000_82575:
797		/* The 82575 assigns vectors using a bitmask, which matches the
798		   bitmask for the EICR/EIMS/EIMC registers.  To assign one
799		   or more queues to a vector, we write the appropriate bits
800		   into the MSIXBM register for that vector. */
801		if (rx_queue > IGB_N0_QUEUE)
802			msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
803		if (tx_queue > IGB_N0_QUEUE)
804			msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
805		if (!adapter->msix_entries && msix_vector == 0)
806			msixbm |= E1000_EIMS_OTHER;
807		array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
808		q_vector->eims_value = msixbm;
809		break;
810	case e1000_82576:
811		/*
812		 * 82576 uses a table that essentially consists of 2 columns
813		 * with 8 rows.  The ordering is column-major so we use the
814		 * lower 3 bits as the row index, and the 4th bit as the
815		 * column offset.
816		 */
817		if (rx_queue > IGB_N0_QUEUE)
818			igb_write_ivar(hw, msix_vector,
819				       rx_queue & 0x7,
820				       (rx_queue & 0x8) << 1);
821		if (tx_queue > IGB_N0_QUEUE)
822			igb_write_ivar(hw, msix_vector,
823				       tx_queue & 0x7,
824				       ((tx_queue & 0x8) << 1) + 8);
825		q_vector->eims_value = 1 << msix_vector;
826		break;
827	case e1000_82580:
828	case e1000_i350:
829		/*
830		 * On 82580 and newer adapters the scheme is similar to 82576
831		 * however instead of ordering column-major we have things
832		 * ordered row-major.  So we traverse the table by using
833		 * bit 0 as the column offset, and the remaining bits as the
834		 * row index.
835		 */
836		if (rx_queue > IGB_N0_QUEUE)
837			igb_write_ivar(hw, msix_vector,
838				       rx_queue >> 1,
839				       (rx_queue & 0x1) << 4);
840		if (tx_queue > IGB_N0_QUEUE)
841			igb_write_ivar(hw, msix_vector,
842				       tx_queue >> 1,
843				       ((tx_queue & 0x1) << 4) + 8);
844		q_vector->eims_value = 1 << msix_vector;
845		break;
846	default:
847		BUG();
848		break;
849	}
850
851	/* add q_vector eims value to global eims_enable_mask */
852	adapter->eims_enable_mask |= q_vector->eims_value;
853
854	/* configure q_vector to set itr on first interrupt */
855	q_vector->set_itr = 1;
856}
857
858/**
859 * igb_configure_msix - Configure MSI-X hardware
860 *
861 * igb_configure_msix sets up the hardware to properly
862 * generate MSI-X interrupts.
863 **/
864static void igb_configure_msix(struct igb_adapter *adapter)
865{
866	u32 tmp;
867	int i, vector = 0;
868	struct e1000_hw *hw = &adapter->hw;
869
870	adapter->eims_enable_mask = 0;
871
872	/* set vector for other causes, i.e. link changes */
873	switch (hw->mac.type) {
874	case e1000_82575:
875		tmp = rd32(E1000_CTRL_EXT);
876		/* enable MSI-X PBA support*/
877		tmp |= E1000_CTRL_EXT_PBA_CLR;
878
879		/* Auto-Mask interrupts upon ICR read. */
880		tmp |= E1000_CTRL_EXT_EIAME;
881		tmp |= E1000_CTRL_EXT_IRCA;
882
883		wr32(E1000_CTRL_EXT, tmp);
884
885		/* enable msix_other interrupt */
886		array_wr32(E1000_MSIXBM(0), vector++,
887		                      E1000_EIMS_OTHER);
888		adapter->eims_other = E1000_EIMS_OTHER;
889
890		break;
891
892	case e1000_82576:
893	case e1000_82580:
894	case e1000_i350:
895		/* Turn on MSI-X capability first, or our settings
896		 * won't stick.  And it will take days to debug. */
897		wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
898		                E1000_GPIE_PBA | E1000_GPIE_EIAME |
899		                E1000_GPIE_NSICR);
900
901		/* enable msix_other interrupt */
902		adapter->eims_other = 1 << vector;
903		tmp = (vector++ | E1000_IVAR_VALID) << 8;
904
905		wr32(E1000_IVAR_MISC, tmp);
906		break;
907	default:
908		/* do nothing, since nothing else supports MSI-X */
909		break;
910	} /* switch (hw->mac.type) */
911
912	adapter->eims_enable_mask |= adapter->eims_other;
913
914	for (i = 0; i < adapter->num_q_vectors; i++)
915		igb_assign_vector(adapter->q_vector[i], vector++);
916
917	wrfl();
918}
919
920/**
921 * igb_request_msix - Initialize MSI-X interrupts
922 *
923 * igb_request_msix allocates MSI-X vectors and requests interrupts from the
924 * kernel.
925 **/
926static int igb_request_msix(struct igb_adapter *adapter)
927{
928	struct net_device *netdev = adapter->netdev;
929	struct e1000_hw *hw = &adapter->hw;
930	int i, err = 0, vector = 0;
931
932	err = request_irq(adapter->msix_entries[vector].vector,
933	                  igb_msix_other, 0, netdev->name, adapter);
934	if (err)
935		goto out;
936	vector++;
937
938	for (i = 0; i < adapter->num_q_vectors; i++) {
939		struct igb_q_vector *q_vector = adapter->q_vector[i];
940
941		q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
942
943		if (q_vector->rx.ring && q_vector->tx.ring)
944			sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
945				q_vector->rx.ring->queue_index);
946		else if (q_vector->tx.ring)
947			sprintf(q_vector->name, "%s-tx-%u", netdev->name,
948				q_vector->tx.ring->queue_index);
949		else if (q_vector->rx.ring)
950			sprintf(q_vector->name, "%s-rx-%u", netdev->name,
951				q_vector->rx.ring->queue_index);
952		else
953			sprintf(q_vector->name, "%s-unused", netdev->name);
954
955		err = request_irq(adapter->msix_entries[vector].vector,
956		                  igb_msix_ring, 0, q_vector->name,
957		                  q_vector);
958		if (err)
959			goto out;
960		vector++;
961	}
962
963	igb_configure_msix(adapter);
964	return 0;
965out:
966	return err;
967}
968
969static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
970{
971	if (adapter->msix_entries) {
972		pci_disable_msix(adapter->pdev);
973		kfree(adapter->msix_entries);
974		adapter->msix_entries = NULL;
975	} else if (adapter->flags & IGB_FLAG_HAS_MSI) {
976		pci_disable_msi(adapter->pdev);
977	}
978}
979
980/**
981 * igb_free_q_vectors - Free memory allocated for interrupt vectors
982 * @adapter: board private structure to initialize
983 *
984 * This function frees the memory allocated to the q_vectors.  In addition if
985 * NAPI is enabled it will delete any references to the NAPI struct prior
986 * to freeing the q_vector.
987 **/
988static void igb_free_q_vectors(struct igb_adapter *adapter)
989{
990	int v_idx;
991
992	for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
993		struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
994		adapter->q_vector[v_idx] = NULL;
995		if (!q_vector)
996			continue;
997		netif_napi_del(&q_vector->napi);
998		kfree(q_vector);
999	}
1000	adapter->num_q_vectors = 0;
1001}
1002
1003/**
1004 * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
1005 *
1006 * This function resets the device so that it has 0 rx queues, tx queues, and
1007 * MSI-X interrupts allocated.
1008 */
1009static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
1010{
1011	igb_free_queues(adapter);
1012	igb_free_q_vectors(adapter);
1013	igb_reset_interrupt_capability(adapter);
1014}
1015
1016/**
1017 * igb_set_interrupt_capability - set MSI or MSI-X if supported
1018 *
1019 * Attempt to configure interrupts using the best available
1020 * capabilities of the hardware and kernel.
1021 **/
1022static int igb_set_interrupt_capability(struct igb_adapter *adapter)
1023{
1024	int err;
1025	int numvecs, i;
1026
1027	/* Number of supported queues. */
1028	adapter->num_rx_queues = adapter->rss_queues;
1029	if (adapter->vfs_allocated_count)
1030		adapter->num_tx_queues = 1;
1031	else
1032		adapter->num_tx_queues = adapter->rss_queues;
1033
1034	/* start with one vector for every rx queue */
1035	numvecs = adapter->num_rx_queues;
1036
1037	/* if tx handler is separate add 1 for every tx queue */
1038	if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
1039		numvecs += adapter->num_tx_queues;
1040
1041	/* store the number of vectors reserved for queues */
1042	adapter->num_q_vectors = numvecs;
1043
1044	/* add 1 vector for link status interrupts */
1045	numvecs++;
1046	adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
1047					GFP_KERNEL);
1048	if (!adapter->msix_entries)
1049		goto msi_only;
1050
1051	for (i = 0; i < numvecs; i++)
1052		adapter->msix_entries[i].entry = i;
1053
1054	err = pci_enable_msix(adapter->pdev,
1055			      adapter->msix_entries,
1056			      numvecs);
1057	if (err == 0)
1058		goto out;
1059
1060	igb_reset_interrupt_capability(adapter);
1061
1062	/* If we can't do MSI-X, try MSI */
1063msi_only:
1064#ifdef CONFIG_PCI_IOV
1065	/* disable SR-IOV for non MSI-X configurations */
1066	if (adapter->vf_data) {
1067		struct e1000_hw *hw = &adapter->hw;
1068		/* disable iov and allow time for transactions to clear */
1069		pci_disable_sriov(adapter->pdev);
1070		msleep(500);
1071
1072		kfree(adapter->vf_data);
1073		adapter->vf_data = NULL;
1074		wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1075		wrfl();
1076		msleep(100);
1077		dev_info(&adapter->pdev->dev, "IOV Disabled\n");
1078	}
1079#endif
1080	adapter->vfs_allocated_count = 0;
1081	adapter->rss_queues = 1;
1082	adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1083	adapter->num_rx_queues = 1;
1084	adapter->num_tx_queues = 1;
1085	adapter->num_q_vectors = 1;
1086	if (!pci_enable_msi(adapter->pdev))
1087		adapter->flags |= IGB_FLAG_HAS_MSI;
1088out:
1089	/* Notify the stack of the (possibly) reduced queue counts. */
1090	netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues);
1091	return netif_set_real_num_rx_queues(adapter->netdev,
1092					    adapter->num_rx_queues);
1093}
1094
1095/**
1096 * igb_alloc_q_vectors - Allocate memory for interrupt vectors
1097 * @adapter: board private structure to initialize
1098 *
1099 * We allocate one q_vector per queue interrupt.  If allocation fails we
1100 * return -ENOMEM.
1101 **/
1102static int igb_alloc_q_vectors(struct igb_adapter *adapter)
1103{
1104	struct igb_q_vector *q_vector;
1105	struct e1000_hw *hw = &adapter->hw;
1106	int v_idx;
1107	int orig_node = adapter->node;
1108
1109	for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1110		if ((adapter->num_q_vectors == (adapter->num_rx_queues +
1111						adapter->num_tx_queues)) &&
1112		    (adapter->num_rx_queues == v_idx))
1113			adapter->node = orig_node;
1114		if (orig_node == -1) {
1115			int cur_node = next_online_node(adapter->node);
1116			if (cur_node == MAX_NUMNODES)
1117				cur_node = first_online_node;
1118			adapter->node = cur_node;
1119		}
1120		q_vector = kzalloc_node(sizeof(struct igb_q_vector), GFP_KERNEL,
1121					adapter->node);
1122		if (!q_vector)
1123			q_vector = kzalloc(sizeof(struct igb_q_vector),
1124					   GFP_KERNEL);
1125		if (!q_vector)
1126			goto err_out;
1127		q_vector->adapter = adapter;
1128		q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
1129		q_vector->itr_val = IGB_START_ITR;
1130		netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
1131		adapter->q_vector[v_idx] = q_vector;
1132	}
1133	/* Restore the adapter's original node */
1134	adapter->node = orig_node;
1135
1136	return 0;
1137
1138err_out:
1139	/* Restore the adapter's original node */
1140	adapter->node = orig_node;
1141	igb_free_q_vectors(adapter);
1142	return -ENOMEM;
1143}
1144
1145static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
1146                                      int ring_idx, int v_idx)
1147{
1148	struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1149
1150	q_vector->rx.ring = adapter->rx_ring[ring_idx];
1151	q_vector->rx.ring->q_vector = q_vector;
1152	q_vector->rx.count++;
1153	q_vector->itr_val = adapter->rx_itr_setting;
1154	if (q_vector->itr_val && q_vector->itr_val <= 3)
1155		q_vector->itr_val = IGB_START_ITR;
1156}
1157
1158static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
1159                                      int ring_idx, int v_idx)
1160{
1161	struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1162
1163	q_vector->tx.ring = adapter->tx_ring[ring_idx];
1164	q_vector->tx.ring->q_vector = q_vector;
1165	q_vector->tx.count++;
1166	q_vector->itr_val = adapter->tx_itr_setting;
1167	q_vector->tx.work_limit = adapter->tx_work_limit;
1168	if (q_vector->itr_val && q_vector->itr_val <= 3)
1169		q_vector->itr_val = IGB_START_ITR;
1170}
1171
1172/**
1173 * igb_map_ring_to_vector - maps allocated queues to vectors
1174 *
1175 * This function maps the recently allocated queues to vectors.
1176 **/
1177static int igb_map_ring_to_vector(struct igb_adapter *adapter)
1178{
1179	int i;
1180	int v_idx = 0;
1181
1182	if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
1183	    (adapter->num_q_vectors < adapter->num_tx_queues))
1184		return -ENOMEM;
1185
1186	if (adapter->num_q_vectors >=
1187	    (adapter->num_rx_queues + adapter->num_tx_queues)) {
1188		for (i = 0; i < adapter->num_rx_queues; i++)
1189			igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1190		for (i = 0; i < adapter->num_tx_queues; i++)
1191			igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1192	} else {
1193		for (i = 0; i < adapter->num_rx_queues; i++) {
1194			if (i < adapter->num_tx_queues)
1195				igb_map_tx_ring_to_vector(adapter, i, v_idx);
1196			igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1197		}
1198		for (; i < adapter->num_tx_queues; i++)
1199			igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1200	}
1201	return 0;
1202}
1203
1204/**
1205 * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
1206 *
1207 * This function initializes the interrupts and allocates all of the queues.
1208 **/
1209static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
1210{
1211	struct pci_dev *pdev = adapter->pdev;
1212	int err;
1213
1214	err = igb_set_interrupt_capability(adapter);
1215	if (err)
1216		return err;
1217
1218	err = igb_alloc_q_vectors(adapter);
1219	if (err) {
1220		dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
1221		goto err_alloc_q_vectors;
1222	}
1223
1224	err = igb_alloc_queues(adapter);
1225	if (err) {
1226		dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1227		goto err_alloc_queues;
1228	}
1229
1230	err = igb_map_ring_to_vector(adapter);
1231	if (err) {
1232		dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
1233		goto err_map_queues;
1234	}
1235
1236
1237	return 0;
1238err_map_queues:
1239	igb_free_queues(adapter);
1240err_alloc_queues:
1241	igb_free_q_vectors(adapter);
1242err_alloc_q_vectors:
1243	igb_reset_interrupt_capability(adapter);
1244	return err;
1245}
1246
1247/**
1248 * igb_request_irq - initialize interrupts
1249 *
1250 * Attempts to configure interrupts using the best available
1251 * capabilities of the hardware and kernel.
1252 **/
1253static int igb_request_irq(struct igb_adapter *adapter)
1254{
1255	struct net_device *netdev = adapter->netdev;
1256	struct pci_dev *pdev = adapter->pdev;
1257	int err = 0;
1258
1259	if (adapter->msix_entries) {
1260		err = igb_request_msix(adapter);
1261		if (!err)
1262			goto request_done;
1263		/* fall back to MSI */
1264		igb_clear_interrupt_scheme(adapter);
1265		if (!pci_enable_msi(pdev))
1266			adapter->flags |= IGB_FLAG_HAS_MSI;
1267		igb_free_all_tx_resources(adapter);
1268		igb_free_all_rx_resources(adapter);
1269		adapter->num_tx_queues = 1;
1270		adapter->num_rx_queues = 1;
1271		adapter->num_q_vectors = 1;
1272		err = igb_alloc_q_vectors(adapter);
1273		if (err) {
1274			dev_err(&pdev->dev,
1275			        "Unable to allocate memory for vectors\n");
1276			goto request_done;
1277		}
1278		err = igb_alloc_queues(adapter);
1279		if (err) {
1280			dev_err(&pdev->dev,
1281			        "Unable to allocate memory for queues\n");
1282			igb_free_q_vectors(adapter);
1283			goto request_done;
1284		}
1285		igb_setup_all_tx_resources(adapter);
1286		igb_setup_all_rx_resources(adapter);
1287	}
1288
1289	igb_assign_vector(adapter->q_vector[0], 0);
1290
1291	if (adapter->flags & IGB_FLAG_HAS_MSI) {
1292		err = request_irq(pdev->irq, igb_intr_msi, 0,
1293				  netdev->name, adapter);
1294		if (!err)
1295			goto request_done;
1296
1297		/* fall back to legacy interrupts */
1298		igb_reset_interrupt_capability(adapter);
1299		adapter->flags &= ~IGB_FLAG_HAS_MSI;
1300	}
1301
1302	err = request_irq(pdev->irq, igb_intr, IRQF_SHARED,
1303			  netdev->name, adapter);
1304
1305	if (err)
1306		dev_err(&pdev->dev, "Error %d getting interrupt\n",
1307			err);
1308
1309request_done:
1310	return err;
1311}
1312
1313static void igb_free_irq(struct igb_adapter *adapter)
1314{
1315	if (adapter->msix_entries) {
1316		int vector = 0, i;
1317
1318		free_irq(adapter->msix_entries[vector++].vector, adapter);
1319
1320		for (i = 0; i < adapter->num_q_vectors; i++)
1321			free_irq(adapter->msix_entries[vector++].vector,
1322				 adapter->q_vector[i]);
1323	} else {
1324		free_irq(adapter->pdev->irq, adapter);
1325	}
1326}
1327
1328/**
1329 * igb_irq_disable - Mask off interrupt generation on the NIC
1330 * @adapter: board private structure
1331 **/
1332static void igb_irq_disable(struct igb_adapter *adapter)
1333{
1334	struct e1000_hw *hw = &adapter->hw;
1335
1336	/*
1337	 * we need to be careful when disabling interrupts.  The VFs are also
1338	 * mapped into these registers and so clearing the bits can cause
1339	 * issues on the VF drivers so we only need to clear what we set
1340	 */
1341	if (adapter->msix_entries) {
1342		u32 regval = rd32(E1000_EIAM);
1343		wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
1344		wr32(E1000_EIMC, adapter->eims_enable_mask);
1345		regval = rd32(E1000_EIAC);
1346		wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
1347	}
1348
1349	wr32(E1000_IAM, 0);
1350	wr32(E1000_IMC, ~0);
1351	wrfl();
1352	if (adapter->msix_entries) {
1353		int i;
1354		for (i = 0; i < adapter->num_q_vectors; i++)
1355			synchronize_irq(adapter->msix_entries[i].vector);
1356	} else {
1357		synchronize_irq(adapter->pdev->irq);
1358	}
1359}
1360
1361/**
1362 * igb_irq_enable - Enable default interrupt generation settings
1363 * @adapter: board private structure
1364 **/
1365static void igb_irq_enable(struct igb_adapter *adapter)
1366{
1367	struct e1000_hw *hw = &adapter->hw;
1368
1369	if (adapter->msix_entries) {
1370		u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC | E1000_IMS_DRSTA;
1371		u32 regval = rd32(E1000_EIAC);
1372		wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
1373		regval = rd32(E1000_EIAM);
1374		wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
1375		wr32(E1000_EIMS, adapter->eims_enable_mask);
1376		if (adapter->vfs_allocated_count) {
1377			wr32(E1000_MBVFIMR, 0xFF);
1378			ims |= E1000_IMS_VMMB;
1379		}
1380		wr32(E1000_IMS, ims);
1381	} else {
1382		wr32(E1000_IMS, IMS_ENABLE_MASK |
1383				E1000_IMS_DRSTA);
1384		wr32(E1000_IAM, IMS_ENABLE_MASK |
1385				E1000_IMS_DRSTA);
1386	}
1387}
1388
1389static void igb_update_mng_vlan(struct igb_adapter *adapter)
1390{
1391	struct e1000_hw *hw = &adapter->hw;
1392	u16 vid = adapter->hw.mng_cookie.vlan_id;
1393	u16 old_vid = adapter->mng_vlan_id;
1394
1395	if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1396		/* add VID to filter table */
1397		igb_vfta_set(hw, vid, true);
1398		adapter->mng_vlan_id = vid;
1399	} else {
1400		adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1401	}
1402
1403	if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1404	    (vid != old_vid) &&
1405	    !test_bit(old_vid, adapter->active_vlans)) {
1406		/* remove VID from filter table */
1407		igb_vfta_set(hw, old_vid, false);
1408	}
1409}
1410
1411/**
1412 * igb_release_hw_control - release control of the h/w to f/w
1413 * @adapter: address of board private structure
1414 *
1415 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1416 * For ASF and Pass Through versions of f/w this means that the
1417 * driver is no longer loaded.
1418 *
1419 **/
1420static void igb_release_hw_control(struct igb_adapter *adapter)
1421{
1422	struct e1000_hw *hw = &adapter->hw;
1423	u32 ctrl_ext;
1424
1425	/* Let firmware take over control of h/w */
1426	ctrl_ext = rd32(E1000_CTRL_EXT);
1427	wr32(E1000_CTRL_EXT,
1428			ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1429}
1430
1431/**
1432 * igb_get_hw_control - get control of the h/w from f/w
1433 * @adapter: address of board private structure
1434 *
1435 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1436 * For ASF and Pass Through versions of f/w this means that
1437 * the driver is loaded.
1438 *
1439 **/
1440static void igb_get_hw_control(struct igb_adapter *adapter)
1441{
1442	struct e1000_hw *hw = &adapter->hw;
1443	u32 ctrl_ext;
1444
1445	/* Let firmware know the driver has taken over */
1446	ctrl_ext = rd32(E1000_CTRL_EXT);
1447	wr32(E1000_CTRL_EXT,
1448			ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1449}
1450
1451/**
1452 * igb_configure - configure the hardware for RX and TX
1453 * @adapter: private board structure
1454 **/
1455static void igb_configure(struct igb_adapter *adapter)
1456{
1457	struct net_device *netdev = adapter->netdev;
1458	int i;
1459
1460	igb_get_hw_control(adapter);
1461	igb_set_rx_mode(netdev);
1462
1463	igb_restore_vlan(adapter);
1464
1465	igb_setup_tctl(adapter);
1466	igb_setup_mrqc(adapter);
1467	igb_setup_rctl(adapter);
1468
1469	igb_configure_tx(adapter);
1470	igb_configure_rx(adapter);
1471
1472	igb_rx_fifo_flush_82575(&adapter->hw);
1473
1474	/* call igb_desc_unused which always leaves
1475	 * at least 1 descriptor unused to make sure
1476	 * next_to_use != next_to_clean */
1477	for (i = 0; i < adapter->num_rx_queues; i++) {
1478		struct igb_ring *ring = adapter->rx_ring[i];
1479		igb_alloc_rx_buffers(ring, igb_desc_unused(ring));
1480	}
1481}
1482
1483/**
1484 * igb_power_up_link - Power up the phy/serdes link
1485 * @adapter: address of board private structure
1486 **/
1487void igb_power_up_link(struct igb_adapter *adapter)
1488{
1489	if (adapter->hw.phy.media_type == e1000_media_type_copper)
1490		igb_power_up_phy_copper(&adapter->hw);
1491	else
1492		igb_power_up_serdes_link_82575(&adapter->hw);
1493}
1494
1495/**
1496 * igb_power_down_link - Power down the phy/serdes link
1497 * @adapter: address of board private structure
1498 */
1499static void igb_power_down_link(struct igb_adapter *adapter)
1500{
1501	if (adapter->hw.phy.media_type == e1000_media_type_copper)
1502		igb_power_down_phy_copper_82575(&adapter->hw);
1503	else
1504		igb_shutdown_serdes_link_82575(&adapter->hw);
1505}
1506
1507/**
1508 * igb_up - Open the interface and prepare it to handle traffic
1509 * @adapter: board private structure
1510 **/
1511int igb_up(struct igb_adapter *adapter)
1512{
1513	struct e1000_hw *hw = &adapter->hw;
1514	int i;
1515
1516	/* hardware has been reset, we need to reload some things */
1517	igb_configure(adapter);
1518
1519	clear_bit(__IGB_DOWN, &adapter->state);
1520
1521	for (i = 0; i < adapter->num_q_vectors; i++)
1522		napi_enable(&(adapter->q_vector[i]->napi));
1523
1524	if (adapter->msix_entries)
1525		igb_configure_msix(adapter);
1526	else
1527		igb_assign_vector(adapter->q_vector[0], 0);
1528
1529	/* Clear any pending interrupts. */
1530	rd32(E1000_ICR);
1531	igb_irq_enable(adapter);
1532
1533	/* notify VFs that reset has been completed */
1534	if (adapter->vfs_allocated_count) {
1535		u32 reg_data = rd32(E1000_CTRL_EXT);
1536		reg_data |= E1000_CTRL_EXT_PFRSTD;
1537		wr32(E1000_CTRL_EXT, reg_data);
1538	}
1539
1540	netif_tx_start_all_queues(adapter->netdev);
1541
1542	/* start the watchdog. */
1543	hw->mac.get_link_status = 1;
1544	schedule_work(&adapter->watchdog_task);
1545
1546	return 0;
1547}
1548
1549void igb_down(struct igb_adapter *adapter)
1550{
1551	struct net_device *netdev = adapter->netdev;
1552	struct e1000_hw *hw = &adapter->hw;
1553	u32 tctl, rctl;
1554	int i;
1555
1556	/* signal that we're down so the interrupt handler does not
1557	 * reschedule our watchdog timer */
1558	set_bit(__IGB_DOWN, &adapter->state);
1559
1560	/* disable receives in the hardware */
1561	rctl = rd32(E1000_RCTL);
1562	wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1563	/* flush and sleep below */
1564
1565	netif_tx_stop_all_queues(netdev);
1566
1567	/* disable transmits in the hardware */
1568	tctl = rd32(E1000_TCTL);
1569	tctl &= ~E1000_TCTL_EN;
1570	wr32(E1000_TCTL, tctl);
1571	/* flush both disables and wait for them to finish */
1572	wrfl();
1573	msleep(10);
1574
1575	for (i = 0; i < adapter->num_q_vectors; i++)
1576		napi_disable(&(adapter->q_vector[i]->napi));
1577
1578	igb_irq_disable(adapter);
1579
1580	del_timer_sync(&adapter->watchdog_timer);
1581	del_timer_sync(&adapter->phy_info_timer);
1582
1583	netif_carrier_off(netdev);
1584
1585	/* record the stats before reset*/
1586	spin_lock(&adapter->stats64_lock);
1587	igb_update_stats(adapter, &adapter->stats64);
1588	spin_unlock(&adapter->stats64_lock);
1589
1590	adapter->link_speed = 0;
1591	adapter->link_duplex = 0;
1592
1593	if (!pci_channel_offline(adapter->pdev))
1594		igb_reset(adapter);
1595	igb_clean_all_tx_rings(adapter);
1596	igb_clean_all_rx_rings(adapter);
1597#ifdef CONFIG_IGB_DCA
1598
1599	/* since we reset the hardware DCA settings were cleared */
1600	igb_setup_dca(adapter);
1601#endif
1602}
1603
1604void igb_reinit_locked(struct igb_adapter *adapter)
1605{
1606	WARN_ON(in_interrupt());
1607	while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1608		msleep(1);
1609	igb_down(adapter);
1610	igb_up(adapter);
1611	clear_bit(__IGB_RESETTING, &adapter->state);
1612}
1613
1614void igb_reset(struct igb_adapter *adapter)
1615{
1616	struct pci_dev *pdev = adapter->pdev;
1617	struct e1000_hw *hw = &adapter->hw;
1618	struct e1000_mac_info *mac = &hw->mac;
1619	struct e1000_fc_info *fc = &hw->fc;
1620	u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1621	u16 hwm;
1622
1623	/* Repartition Pba for greater than 9k mtu
1624	 * To take effect CTRL.RST is required.
1625	 */
1626	switch (mac->type) {
1627	case e1000_i350:
1628	case e1000_82580:
1629		pba = rd32(E1000_RXPBS);
1630		pba = igb_rxpbs_adjust_82580(pba);
1631		break;
1632	case e1000_82576:
1633		pba = rd32(E1000_RXPBS);
1634		pba &= E1000_RXPBS_SIZE_MASK_82576;
1635		break;
1636	case e1000_82575:
1637	default:
1638		pba = E1000_PBA_34K;
1639		break;
1640	}
1641
1642	if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1643	    (mac->type < e1000_82576)) {
1644		/* adjust PBA for jumbo frames */
1645		wr32(E1000_PBA, pba);
1646
1647		/* To maintain wire speed transmits, the Tx FIFO should be
1648		 * large enough to accommodate two full transmit packets,
1649		 * rounded up to the next 1KB and expressed in KB.  Likewise,
1650		 * the Rx FIFO should be large enough to accommodate at least
1651		 * one full receive packet and is similarly rounded up and
1652		 * expressed in KB. */
1653		pba = rd32(E1000_PBA);
1654		/* upper 16 bits has Tx packet buffer allocation size in KB */
1655		tx_space = pba >> 16;
1656		/* lower 16 bits has Rx packet buffer allocation size in KB */
1657		pba &= 0xffff;
1658		/* the tx fifo also stores 16 bytes of information about the tx
1659		 * but don't include ethernet FCS because hardware appends it */
1660		min_tx_space = (adapter->max_frame_size +
1661				sizeof(union e1000_adv_tx_desc) -
1662				ETH_FCS_LEN) * 2;
1663		min_tx_space = ALIGN(min_tx_space, 1024);
1664		min_tx_space >>= 10;
1665		/* software strips receive CRC, so leave room for it */
1666		min_rx_space = adapter->max_frame_size;
1667		min_rx_space = ALIGN(min_rx_space, 1024);
1668		min_rx_space >>= 10;
1669
1670		/* If current Tx allocation is less than the min Tx FIFO size,
1671		 * and the min Tx FIFO size is less than the current Rx FIFO
1672		 * allocation, take space away from current Rx allocation */
1673		if (tx_space < min_tx_space &&
1674		    ((min_tx_space - tx_space) < pba)) {
1675			pba = pba - (min_tx_space - tx_space);
1676
1677			/* if short on rx space, rx wins and must trump tx
1678			 * adjustment */
1679			if (pba < min_rx_space)
1680				pba = min_rx_space;
1681		}
1682		wr32(E1000_PBA, pba);
1683	}
1684
1685	/* flow control settings */
1686	/* The high water mark must be low enough to fit one full frame
1687	 * (or the size used for early receive) above it in the Rx FIFO.
1688	 * Set it to the lower of:
1689	 * - 90% of the Rx FIFO size, or
1690	 * - the full Rx FIFO size minus one full frame */
1691	hwm = min(((pba << 10) * 9 / 10),
1692			((pba << 10) - 2 * adapter->max_frame_size));
1693
1694	fc->high_water = hwm & 0xFFF0;	/* 16-byte granularity */
1695	fc->low_water = fc->high_water - 16;
1696	fc->pause_time = 0xFFFF;
1697	fc->send_xon = 1;
1698	fc->current_mode = fc->requested_mode;
1699
1700	/* disable receive for all VFs and wait one second */
1701	if (adapter->vfs_allocated_count) {
1702		int i;
1703		for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1704			adapter->vf_data[i].flags &= IGB_VF_FLAG_PF_SET_MAC;
1705
1706		/* ping all the active vfs to let them know we are going down */
1707		igb_ping_all_vfs(adapter);
1708
1709		/* disable transmits and receives */
1710		wr32(E1000_VFRE, 0);
1711		wr32(E1000_VFTE, 0);
1712	}
1713
1714	/* Allow time for pending master requests to run */
1715	hw->mac.ops.reset_hw(hw);
1716	wr32(E1000_WUC, 0);
1717
1718	if (hw->mac.ops.init_hw(hw))
1719		dev_err(&pdev->dev, "Hardware Error\n");
1720	if (hw->mac.type > e1000_82580) {
1721		if (adapter->flags & IGB_FLAG_DMAC) {
1722			u32 reg;
1723
1724			/*
1725			 * DMA Coalescing high water mark needs to be higher
1726			 * than * the * Rx threshold.  The Rx threshold is
1727			 * currently * pba - 6, so we * should use a high water
1728			 * mark of pba * - 4. */
1729			hwm = (pba - 4) << 10;
1730
1731			reg = (((pba-6) << E1000_DMACR_DMACTHR_SHIFT)
1732			       & E1000_DMACR_DMACTHR_MASK);
1733
1734			/* transition to L0x or L1 if available..*/
1735			reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
1736
1737			/* watchdog timer= +-1000 usec in 32usec intervals */
1738			reg |= (1000 >> 5);
1739			wr32(E1000_DMACR, reg);
1740
1741			/* no lower threshold to disable coalescing(smart fifb)
1742			 * -UTRESH=0*/
1743			wr32(E1000_DMCRTRH, 0);
1744
1745			/* set hwm to PBA -  2 * max frame size */
1746			wr32(E1000_FCRTC, hwm);
1747
1748			/*
1749			 * This sets the time to wait before requesting tran-
1750			 * sition to * low power state to number of usecs needed
1751			 * to receive 1 512 * byte frame at gigabit line rate
1752			 */
1753			reg = rd32(E1000_DMCTLX);
1754			reg |= IGB_DMCTLX_DCFLUSH_DIS;
1755
1756			/* Delay 255 usec before entering Lx state. */
1757			reg |= 0xFF;
1758			wr32(E1000_DMCTLX, reg);
1759
1760			/* free space in Tx packet buffer to wake from DMAC */
1761			wr32(E1000_DMCTXTH,
1762			     (IGB_MIN_TXPBSIZE -
1763			     (IGB_TX_BUF_4096 + adapter->max_frame_size))
1764			     >> 6);
1765
1766			/* make low power state decision controlled by DMAC */
1767			reg = rd32(E1000_PCIEMISC);
1768			reg |= E1000_PCIEMISC_LX_DECISION;
1769			wr32(E1000_PCIEMISC, reg);
1770		} /* end if IGB_FLAG_DMAC set */
1771	}
1772	if (hw->mac.type == e1000_82580) {
1773		u32 reg = rd32(E1000_PCIEMISC);
1774		wr32(E1000_PCIEMISC,
1775		                reg & ~E1000_PCIEMISC_LX_DECISION);
1776	}
1777	if (!netif_running(adapter->netdev))
1778		igb_power_down_link(adapter);
1779
1780	igb_update_mng_vlan(adapter);
1781
1782	/* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1783	wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1784
1785	igb_get_phy_info(hw);
1786}
1787
1788static u32 igb_fix_features(struct net_device *netdev, u32 features)
1789{
1790	/*
1791	 * Since there is no support for separate rx/tx vlan accel
1792	 * enable/disable make sure tx flag is always in same state as rx.
1793	 */
1794	if (features & NETIF_F_HW_VLAN_RX)
1795		features |= NETIF_F_HW_VLAN_TX;
1796	else
1797		features &= ~NETIF_F_HW_VLAN_TX;
1798
1799	return features;
1800}
1801
1802static int igb_set_features(struct net_device *netdev, u32 features)
1803{
1804	u32 changed = netdev->features ^ features;
1805
1806	if (changed & NETIF_F_HW_VLAN_RX)
1807		igb_vlan_mode(netdev, features);
1808
1809	return 0;
1810}
1811
1812static const struct net_device_ops igb_netdev_ops = {
1813	.ndo_open		= igb_open,
1814	.ndo_stop		= igb_close,
1815	.ndo_start_xmit		= igb_xmit_frame,
1816	.ndo_get_stats64	= igb_get_stats64,
1817	.ndo_set_rx_mode	= igb_set_rx_mode,
1818	.ndo_set_mac_address	= igb_set_mac,
1819	.ndo_change_mtu		= igb_change_mtu,
1820	.ndo_do_ioctl		= igb_ioctl,
1821	.ndo_tx_timeout		= igb_tx_timeout,
1822	.ndo_validate_addr	= eth_validate_addr,
1823	.ndo_vlan_rx_add_vid	= igb_vlan_rx_add_vid,
1824	.ndo_vlan_rx_kill_vid	= igb_vlan_rx_kill_vid,
1825	.ndo_set_vf_mac		= igb_ndo_set_vf_mac,
1826	.ndo_set_vf_vlan	= igb_ndo_set_vf_vlan,
1827	.ndo_set_vf_tx_rate	= igb_ndo_set_vf_bw,
1828	.ndo_get_vf_config	= igb_ndo_get_vf_config,
1829#ifdef CONFIG_NET_POLL_CONTROLLER
1830	.ndo_poll_controller	= igb_netpoll,
1831#endif
1832	.ndo_fix_features	= igb_fix_features,
1833	.ndo_set_features	= igb_set_features,
1834};
1835
1836/**
1837 * igb_probe - Device Initialization Routine
1838 * @pdev: PCI device information struct
1839 * @ent: entry in igb_pci_tbl
1840 *
1841 * Returns 0 on success, negative on failure
1842 *
1843 * igb_probe initializes an adapter identified by a pci_dev structure.
1844 * The OS initialization, configuring of the adapter private structure,
1845 * and a hardware reset occur.
1846 **/
1847static int __devinit igb_probe(struct pci_dev *pdev,
1848			       const struct pci_device_id *ent)
1849{
1850	struct net_device *netdev;
1851	struct igb_adapter *adapter;
1852	struct e1000_hw *hw;
1853	u16 eeprom_data = 0;
1854	s32 ret_val;
1855	static int global_quad_port_a; /* global quad port a indication */
1856	const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1857	unsigned long mmio_start, mmio_len;
1858	int err, pci_using_dac;
1859	u16 eeprom_apme_mask = IGB_EEPROM_APME;
1860	u8 part_str[E1000_PBANUM_LENGTH];
1861
1862	/* Catch broken hardware that put the wrong VF device ID in
1863	 * the PCIe SR-IOV capability.
1864	 */
1865	if (pdev->is_virtfn) {
1866		WARN(1, KERN_ERR "%s (%hx:%hx) should not be a VF!\n",
1867		     pci_name(pdev), pdev->vendor, pdev->device);
1868		return -EINVAL;
1869	}
1870
1871	err = pci_enable_device_mem(pdev);
1872	if (err)
1873		return err;
1874
1875	pci_using_dac = 0;
1876	err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
1877	if (!err) {
1878		err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
1879		if (!err)
1880			pci_using_dac = 1;
1881	} else {
1882		err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
1883		if (err) {
1884			err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
1885			if (err) {
1886				dev_err(&pdev->dev, "No usable DMA "
1887					"configuration, aborting\n");
1888				goto err_dma;
1889			}
1890		}
1891	}
1892
1893	err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1894	                                   IORESOURCE_MEM),
1895	                                   igb_driver_name);
1896	if (err)
1897		goto err_pci_reg;
1898
1899	pci_enable_pcie_error_reporting(pdev);
1900
1901	pci_set_master(pdev);
1902	pci_save_state(pdev);
1903
1904	err = -ENOMEM;
1905	netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1906				   IGB_MAX_TX_QUEUES);
1907	if (!netdev)
1908		goto err_alloc_etherdev;
1909
1910	SET_NETDEV_DEV(netdev, &pdev->dev);
1911
1912	pci_set_drvdata(pdev, netdev);
1913	adapter = netdev_priv(netdev);
1914	adapter->netdev = netdev;
1915	adapter->pdev = pdev;
1916	hw = &adapter->hw;
1917	hw->back = adapter;
1918	adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE;
1919
1920	mmio_start = pci_resource_start(pdev, 0);
1921	mmio_len = pci_resource_len(pdev, 0);
1922
1923	err = -EIO;
1924	hw->hw_addr = ioremap(mmio_start, mmio_len);
1925	if (!hw->hw_addr)
1926		goto err_ioremap;
1927
1928	netdev->netdev_ops = &igb_netdev_ops;
1929	igb_set_ethtool_ops(netdev);
1930	netdev->watchdog_timeo = 5 * HZ;
1931
1932	strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1933
1934	netdev->mem_start = mmio_start;
1935	netdev->mem_end = mmio_start + mmio_len;
1936
1937	/* PCI config space info */
1938	hw->vendor_id = pdev->vendor;
1939	hw->device_id = pdev->device;
1940	hw->revision_id = pdev->revision;
1941	hw->subsystem_vendor_id = pdev->subsystem_vendor;
1942	hw->subsystem_device_id = pdev->subsystem_device;
1943
1944	/* Copy the default MAC, PHY and NVM function pointers */
1945	memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1946	memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1947	memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1948	/* Initialize skew-specific constants */
1949	err = ei->get_invariants(hw);
1950	if (err)
1951		goto err_sw_init;
1952
1953	/* setup the private structure */
1954	err = igb_sw_init(adapter);
1955	if (err)
1956		goto err_sw_init;
1957
1958	igb_get_bus_info_pcie(hw);
1959
1960	hw->phy.autoneg_wait_to_complete = false;
1961
1962	/* Copper options */
1963	if (hw->phy.media_type == e1000_media_type_copper) {
1964		hw->phy.mdix = AUTO_ALL_MODES;
1965		hw->phy.disable_polarity_correction = false;
1966		hw->phy.ms_type = e1000_ms_hw_default;
1967	}
1968
1969	if (igb_check_reset_block(hw))
1970		dev_info(&pdev->dev,
1971			"PHY reset is blocked due to SOL/IDER session.\n");
1972
1973	/*
1974	 * features is initialized to 0 in allocation, it might have bits
1975	 * set by igb_sw_init so we should use an or instead of an
1976	 * assignment.
1977	 */
1978	netdev->features |= NETIF_F_SG |
1979			    NETIF_F_IP_CSUM |
1980			    NETIF_F_IPV6_CSUM |
1981			    NETIF_F_TSO |
1982			    NETIF_F_TSO6 |
1983			    NETIF_F_RXHASH |
1984			    NETIF_F_RXCSUM |
1985			    NETIF_F_HW_VLAN_RX |
1986			    NETIF_F_HW_VLAN_TX;
1987
1988	/* copy netdev features into list of user selectable features */
1989	netdev->hw_features |= netdev->features;
1990
1991	/* set this bit last since it cannot be part of hw_features */
1992	netdev->features |= NETIF_F_HW_VLAN_FILTER;
1993
1994	netdev->vlan_features |= NETIF_F_TSO |
1995				 NETIF_F_TSO6 |
1996				 NETIF_F_IP_CSUM |
1997				 NETIF_F_IPV6_CSUM |
1998				 NETIF_F_SG;
1999
2000	if (pci_using_dac) {
2001		netdev->features |= NETIF_F_HIGHDMA;
2002		netdev->vlan_features |= NETIF_F_HIGHDMA;
2003	}
2004
2005	if (hw->mac.type >= e1000_82576) {
2006		netdev->hw_features |= NETIF_F_SCTP_CSUM;
2007		netdev->features |= NETIF_F_SCTP_CSUM;
2008	}
2009
2010	netdev->priv_flags |= IFF_UNICAST_FLT;
2011
2012	adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
2013
2014	/* before reading the NVM, reset the controller to put the device in a
2015	 * known good starting state */
2016	hw->mac.ops.reset_hw(hw);
2017
2018	/* make sure the NVM is good */
2019	if (hw->nvm.ops.validate(hw) < 0) {
2020		dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
2021		err = -EIO;
2022		goto err_eeprom;
2023	}
2024
2025	/* copy the MAC address out of the NVM */
2026	if (hw->mac.ops.read_mac_addr(hw))
2027		dev_err(&pdev->dev, "NVM Read Error\n");
2028
2029	memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
2030	memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
2031
2032	if (!is_valid_ether_addr(netdev->perm_addr)) {
2033		dev_err(&pdev->dev, "Invalid MAC Address\n");
2034		err = -EIO;
2035		goto err_eeprom;
2036	}
2037
2038	setup_timer(&adapter->watchdog_timer, igb_watchdog,
2039	            (unsigned long) adapter);
2040	setup_timer(&adapter->phy_info_timer, igb_update_phy_info,
2041	            (unsigned long) adapter);
2042
2043	INIT_WORK(&adapter->reset_task, igb_reset_task);
2044	INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
2045
2046	/* Initialize link properties that are user-changeable */
2047	adapter->fc_autoneg = true;
2048	hw->mac.autoneg = true;
2049	hw->phy.autoneg_advertised = 0x2f;
2050
2051	hw->fc.requested_mode = e1000_fc_default;
2052	hw->fc.current_mode = e1000_fc_default;
2053
2054	igb_validate_mdi_setting(hw);
2055
2056	/* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
2057	 * enable the ACPI Magic Packet filter
2058	 */
2059
2060	if (hw->bus.func == 0)
2061		hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
2062	else if (hw->mac.type >= e1000_82580)
2063		hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
2064		                 NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
2065		                 &eeprom_data);
2066	else if (hw->bus.func == 1)
2067		hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
2068
2069	if (eeprom_data & eeprom_apme_mask)
2070		adapter->eeprom_wol |= E1000_WUFC_MAG;
2071
2072	/* now that we have the eeprom settings, apply the special cases where
2073	 * the eeprom may be wrong or the board simply won't support wake on
2074	 * lan on a particular port */
2075	switch (pdev->device) {
2076	case E1000_DEV_ID_82575GB_QUAD_COPPER:
2077		adapter->eeprom_wol = 0;
2078		break;
2079	case E1000_DEV_ID_82575EB_FIBER_SERDES:
2080	case E1000_DEV_ID_82576_FIBER:
2081	case E1000_DEV_ID_82576_SERDES:
2082		/* Wake events only supported on port A for dual fiber
2083		 * regardless of eeprom setting */
2084		if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
2085			adapter->eeprom_wol = 0;
2086		break;
2087	case E1000_DEV_ID_82576_QUAD_COPPER:
2088	case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
2089		/* if quad port adapter, disable WoL on all but port A */
2090		if (global_quad_port_a != 0)
2091			adapter->eeprom_wol = 0;
2092		else
2093			adapter->flags |= IGB_FLAG_QUAD_PORT_A;
2094		/* Reset for multiple quad port adapters */
2095		if (++global_quad_port_a == 4)
2096			global_quad_port_a = 0;
2097		break;
2098	}
2099
2100	/* initialize the wol settings based on the eeprom settings */
2101	adapter->wol = adapter->eeprom_wol;
2102	device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
2103
2104	/* reset the hardware with the new settings */
2105	igb_reset(adapter);
2106
2107	/* let the f/w know that the h/w is now under the control of the
2108	 * driver. */
2109	igb_get_hw_control(adapter);
2110
2111	strcpy(netdev->name, "eth%d");
2112	err = register_netdev(netdev);
2113	if (err)
2114		goto err_register;
2115
2116	/* carrier off reporting is important to ethtool even BEFORE open */
2117	netif_carrier_off(netdev);
2118
2119#ifdef CONFIG_IGB_DCA
2120	if (dca_add_requester(&pdev->dev) == 0) {
2121		adapter->flags |= IGB_FLAG_DCA_ENABLED;
2122		dev_info(&pdev->dev, "DCA enabled\n");
2123		igb_setup_dca(adapter);
2124	}
2125
2126#endif
2127	/* do hw tstamp init after resetting */
2128	igb_init_hw_timer(adapter);
2129
2130	dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
2131	/* print bus type/speed/width info */
2132	dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
2133		 netdev->name,
2134		 ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
2135		  (hw->bus.speed == e1000_bus_speed_5000) ? "5.0Gb/s" :
2136		                                            "unknown"),
2137		 ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
2138		  (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
2139		  (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
2140		   "unknown"),
2141		 netdev->dev_addr);
2142
2143	ret_val = igb_read_part_string(hw, part_str, E1000_PBANUM_LENGTH);
2144	if (ret_val)
2145		strcpy(part_str, "Unknown");
2146	dev_info(&pdev->dev, "%s: PBA No: %s\n", netdev->name, part_str);
2147	dev_info(&pdev->dev,
2148		"Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
2149		adapter->msix_entries ? "MSI-X" :
2150		(adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
2151		adapter->num_rx_queues, adapter->num_tx_queues);
2152	switch (hw->mac.type) {
2153	case e1000_i350:
2154		igb_set_eee_i350(hw);
2155		break;
2156	default:
2157		break;
2158	}
2159	return 0;
2160
2161err_register:
2162	igb_release_hw_control(adapter);
2163err_eeprom:
2164	if (!igb_check_reset_block(hw))
2165		igb_reset_phy(hw);
2166
2167	if (hw->flash_address)
2168		iounmap(hw->flash_address);
2169err_sw_init:
2170	igb_clear_interrupt_scheme(adapter);
2171	iounmap(hw->hw_addr);
2172err_ioremap:
2173	free_netdev(netdev);
2174err_alloc_etherdev:
2175	pci_release_selected_regions(pdev,
2176	                             pci_select_bars(pdev, IORESOURCE_MEM));
2177err_pci_reg:
2178err_dma:
2179	pci_disable_device(pdev);
2180	return err;
2181}
2182
2183/**
2184 * igb_remove - Device Removal Routine
2185 * @pdev: PCI device information struct
2186 *
2187 * igb_remove is called by the PCI subsystem to alert the driver
2188 * that it should release a PCI device.  The could be caused by a
2189 * Hot-Plug event, or because the driver is going to be removed from
2190 * memory.
2191 **/
2192static void __devexit igb_remove(struct pci_dev *pdev)
2193{
2194	struct net_device *netdev = pci_get_drvdata(pdev);
2195	struct igb_adapter *adapter = netdev_priv(netdev);
2196	struct e1000_hw *hw = &adapter->hw;
2197
2198	/*
2199	 * The watchdog timer may be rescheduled, so explicitly
2200	 * disable watchdog from being rescheduled.
2201	 */
2202	set_bit(__IGB_DOWN, &adapter->state);
2203	del_timer_sync(&adapter->watchdog_timer);
2204	del_timer_sync(&adapter->phy_info_timer);
2205
2206	cancel_work_sync(&adapter->reset_task);
2207	cancel_work_sync(&adapter->watchdog_task);
2208
2209#ifdef CONFIG_IGB_DCA
2210	if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
2211		dev_info(&pdev->dev, "DCA disabled\n");
2212		dca_remove_requester(&pdev->dev);
2213		adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
2214		wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
2215	}
2216#endif
2217
2218	/* Release control of h/w to f/w.  If f/w is AMT enabled, this
2219	 * would have already happened in close and is redundant. */
2220	igb_release_hw_control(adapter);
2221
2222	unregister_netdev(netdev);
2223
2224	igb_clear_interrupt_scheme(adapter);
2225
2226#ifdef CONFIG_PCI_IOV
2227	/* reclaim resources allocated to VFs */
2228	if (adapter->vf_data) {
2229		/* disable iov and allow time for transactions to clear */
2230		pci_disable_sriov(pdev);
2231		msleep(500);
2232
2233		kfree(adapter->vf_data);
2234		adapter->vf_data = NULL;
2235		wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
2236		wrfl();
2237		msleep(100);
2238		dev_info(&pdev->dev, "IOV Disabled\n");
2239	}
2240#endif
2241
2242	iounmap(hw->hw_addr);
2243	if (hw->flash_address)
2244		iounmap(hw->flash_address);
2245	pci_release_selected_regions(pdev,
2246	                             pci_select_bars(pdev, IORESOURCE_MEM));
2247
2248	free_netdev(netdev);
2249
2250	pci_disable_pcie_error_reporting(pdev);
2251
2252	pci_disable_device(pdev);
2253}
2254
2255/**
2256 * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
2257 * @adapter: board private structure to initialize
2258 *
2259 * This function initializes the vf specific data storage and then attempts to
2260 * allocate the VFs.  The reason for ordering it this way is because it is much
2261 * mor expensive time wise to disable SR-IOV than it is to allocate and free
2262 * the memory for the VFs.
2263 **/
2264static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
2265{
2266#ifdef CONFIG_PCI_IOV
2267	struct pci_dev *pdev = adapter->pdev;
2268
2269	if (adapter->vfs_allocated_count) {
2270		adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
2271		                           sizeof(struct vf_data_storage),
2272		                           GFP_KERNEL);
2273		/* if allocation failed then we do not support SR-IOV */
2274		if (!adapter->vf_data) {
2275			adapter->vfs_allocated_count = 0;
2276			dev_err(&pdev->dev, "Unable to allocate memory for VF "
2277			        "Data Storage\n");
2278		}
2279	}
2280
2281	if (pci_enable_sriov(pdev, adapter->vfs_allocated_count)) {
2282		kfree(adapter->vf_data);
2283		adapter->vf_data = NULL;
2284#endif /* CONFIG_PCI_IOV */
2285		adapter->vfs_allocated_count = 0;
2286#ifdef CONFIG_PCI_IOV
2287	} else {
2288		unsigned char mac_addr[ETH_ALEN];
2289		int i;
2290		dev_info(&pdev->dev, "%d vfs allocated\n",
2291		         adapter->vfs_allocated_count);
2292		for (i = 0; i < adapter->vfs_allocated_count; i++) {
2293			random_ether_addr(mac_addr);
2294			igb_set_vf_mac(adapter, i, mac_addr);
2295		}
2296		/* DMA Coalescing is not supported in IOV mode. */
2297		if (adapter->flags & IGB_FLAG_DMAC)
2298			adapter->flags &= ~IGB_FLAG_DMAC;
2299	}
2300#endif /* CONFIG_PCI_IOV */
2301}
2302
2303
2304/**
2305 * igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp
2306 * @adapter: board private structure to initialize
2307 *
2308 * igb_init_hw_timer initializes the function pointer and values for the hw
2309 * timer found in hardware.
2310 **/
2311static void igb_init_hw_timer(struct igb_adapter *adapter)
2312{
2313	struct e1000_hw *hw = &adapter->hw;
2314
2315	switch (hw->mac.type) {
2316	case e1000_i350:
2317	case e1000_82580:
2318		memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2319		adapter->cycles.read = igb_read_clock;
2320		adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2321		adapter->cycles.mult = 1;
2322		/*
2323		 * The 82580 timesync updates the system timer every 8ns by 8ns
2324		 * and the value cannot be shifted.  Instead we need to shift
2325		 * the registers to generate a 64bit timer value.  As a result
2326		 * SYSTIMR/L/H, TXSTMPL/H, RXSTMPL/H all have to be shifted by
2327		 * 24 in order to generate a larger value for synchronization.
2328		 */
2329		adapter->cycles.shift = IGB_82580_TSYNC_SHIFT;
2330		/* disable system timer temporarily by setting bit 31 */
2331		wr32(E1000_TSAUXC, 0x80000000);
2332		wrfl();
2333
2334		/* Set registers so that rollover occurs soon to test this. */
2335		wr32(E1000_SYSTIMR, 0x00000000);
2336		wr32(E1000_SYSTIML, 0x80000000);
2337		wr32(E1000_SYSTIMH, 0x000000FF);
2338		wrfl();
2339
2340		/* enable system timer by clearing bit 31 */
2341		wr32(E1000_TSAUXC, 0x0);
2342		wrfl();
2343
2344		timecounter_init(&adapter->clock,
2345				 &adapter->cycles,
2346				 ktime_to_ns(ktime_get_real()));
2347		/*
2348		 * Synchronize our NIC clock against system wall clock. NIC
2349		 * time stamp reading requires ~3us per sample, each sample
2350		 * was pretty stable even under load => only require 10
2351		 * samples for each offset comparison.
2352		 */
2353		memset(&adapter->compare, 0, sizeof(adapter->compare));
2354		adapter->compare.source = &adapter->clock;
2355		adapter->compare.target = ktime_get_real;
2356		adapter->compare.num_samples = 10;
2357		timecompare_update(&adapter->compare, 0);
2358		break;
2359	case e1000_82576:
2360		/*
2361		 * Initialize hardware timer: we keep it running just in case
2362		 * that some program needs it later on.
2363		 */
2364		memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2365		adapter->cycles.read = igb_read_clock;
2366		adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2367		adapter->cycles.mult = 1;
2368		/**
2369		 * Scale the NIC clock cycle by a large factor so that
2370		 * relatively small clock corrections can be added or
2371		 * subtracted at each clock tick. The drawbacks of a large
2372		 * factor are a) that the clock register overflows more quickly
2373		 * (not such a big deal) and b) that the increment per tick has
2374		 * to fit into 24 bits.  As a result we need to use a shift of
2375		 * 19 so we can fit a value of 16 into the TIMINCA register.
2376		 */
2377		adapter->cycles.shift = IGB_82576_TSYNC_SHIFT;
2378		wr32(E1000_TIMINCA,
2379		                (1 << E1000_TIMINCA_16NS_SHIFT) |
2380		                (16 << IGB_82576_TSYNC_SHIFT));
2381
2382		/* Set registers so that rollover occurs soon to test this. */
2383		wr32(E1000_SYSTIML, 0x00000000);
2384		wr32(E1000_SYSTIMH, 0xFF800000);
2385		wrfl();
2386
2387		timecounter_init(&adapter->clock,
2388				 &adapter->cycles,
2389				 ktime_to_ns(ktime_get_real()));
2390		/*
2391		 * Synchronize our NIC clock against system wall clock. NIC
2392		 * time stamp reading requires ~3us per sample, each sample
2393		 * was pretty stable even under load => only require 10
2394		 * samples for each offset comparison.
2395		 */
2396		memset(&adapter->compare, 0, sizeof(adapter->compare));
2397		adapter->compare.source = &adapter->clock;
2398		adapter->compare.target = ktime_get_real;
2399		adapter->compare.num_samples = 10;
2400		timecompare_update(&adapter->compare, 0);
2401		break;
2402	case e1000_82575:
2403		/* 82575 does not support timesync */
2404	default:
2405		break;
2406	}
2407
2408}
2409
2410/**
2411 * igb_sw_init - Initialize general software structures (struct igb_adapter)
2412 * @adapter: board private structure to initialize
2413 *
2414 * igb_sw_init initializes the Adapter private data structure.
2415 * Fields are initialized based on PCI device information and
2416 * OS network device settings (MTU size).
2417 **/
2418static int __devinit igb_sw_init(struct igb_adapter *adapter)
2419{
2420	struct e1000_hw *hw = &adapter->hw;
2421	struct net_device *netdev = adapter->netdev;
2422	struct pci_dev *pdev = adapter->pdev;
2423
2424	pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
2425
2426	/* set default ring sizes */
2427	adapter->tx_ring_count = IGB_DEFAULT_TXD;
2428	adapter->rx_ring_count = IGB_DEFAULT_RXD;
2429
2430	/* set default ITR values */
2431	adapter->rx_itr_setting = IGB_DEFAULT_ITR;
2432	adapter->tx_itr_setting = IGB_DEFAULT_ITR;
2433
2434	/* set default work limits */
2435	adapter->tx_work_limit = IGB_DEFAULT_TX_WORK;
2436
2437	adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN +
2438				  VLAN_HLEN;
2439	adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
2440
2441	adapter->node = -1;
2442
2443	spin_lock_init(&adapter->stats64_lock);
2444#ifdef CONFIG_PCI_IOV
2445	switch (hw->mac.type) {
2446	case e1000_82576:
2447	case e1000_i350:
2448		if (max_vfs > 7) {
2449			dev_warn(&pdev->dev,
2450				 "Maximum of 7 VFs per PF, using max\n");
2451			adapter->vfs_allocated_count = 7;
2452		} else
2453			adapter->vfs_allocated_count = max_vfs;
2454		break;
2455	default:
2456		break;
2457	}
2458#endif /* CONFIG_PCI_IOV */
2459	adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
2460	/* i350 cannot do RSS and SR-IOV at the same time */
2461	if (hw->mac.type == e1000_i350 && adapter->vfs_allocated_count)
2462		adapter->rss_queues = 1;
2463
2464	/*
2465	 * if rss_queues > 4 or vfs are going to be allocated with rss_queues
2466	 * then we should combine the queues into a queue pair in order to
2467	 * conserve interrupts due to limited supply
2468	 */
2469	if ((adapter->rss_queues > 4) ||
2470	    ((adapter->rss_queues > 1) && (adapter->vfs_allocated_count > 6)))
2471		adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
2472
2473	/* This call may decrease the number of queues */
2474	if (igb_init_interrupt_scheme(adapter)) {
2475		dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
2476		return -ENOMEM;
2477	}
2478
2479	igb_probe_vfs(adapter);
2480
2481	/* Explicitly disable IRQ since the NIC can be in any state. */
2482	igb_irq_disable(adapter);
2483
2484	if (hw->mac.type == e1000_i350)
2485		adapter->flags &= ~IGB_FLAG_DMAC;
2486
2487	set_bit(__IGB_DOWN, &adapter->state);
2488	return 0;
2489}
2490
2491/**
2492 * igb_open - Called when a network interface is made active
2493 * @netdev: network interface device structure
2494 *
2495 * Returns 0 on success, negative value on failure
2496 *
2497 * The open entry point is called when a network interface is made
2498 * active by the system (IFF_UP).  At this point all resources needed
2499 * for transmit and receive operations are allocated, the interrupt
2500 * handler is registered with the OS, the watchdog timer is started,
2501 * and the stack is notified that the interface is ready.
2502 **/
2503static int igb_open(struct net_device *netdev)
2504{
2505	struct igb_adapter *adapter = netdev_priv(netdev);
2506	struct e1000_hw *hw = &adapter->hw;
2507	int err;
2508	int i;
2509
2510	/* disallow open during test */
2511	if (test_bit(__IGB_TESTING, &adapter->state))
2512		return -EBUSY;
2513
2514	netif_carrier_off(netdev);
2515
2516	/* allocate transmit descriptors */
2517	err = igb_setup_all_tx_resources(adapter);
2518	if (err)
2519		goto err_setup_tx;
2520
2521	/* allocate receive descriptors */
2522	err = igb_setup_all_rx_resources(adapter);
2523	if (err)
2524		goto err_setup_rx;
2525
2526	igb_power_up_link(adapter);
2527
2528	/* before we allocate an interrupt, we must be ready to handle it.
2529	 * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2530	 * as soon as we call pci_request_irq, so we have to setup our
2531	 * clean_rx handler before we do so.  */
2532	igb_configure(adapter);
2533
2534	err = igb_request_irq(adapter);
2535	if (err)
2536		goto err_req_irq;
2537
2538	/* From here on the code is the same as igb_up() */
2539	clear_bit(__IGB_DOWN, &adapter->state);
2540
2541	for (i = 0; i < adapter->num_q_vectors; i++)
2542		napi_enable(&(adapter->q_vector[i]->napi));
2543
2544	/* Clear any pending interrupts. */
2545	rd32(E1000_ICR);
2546
2547	igb_irq_enable(adapter);
2548
2549	/* notify VFs that reset has been completed */
2550	if (adapter->vfs_allocated_count) {
2551		u32 reg_data = rd32(E1000_CTRL_EXT);
2552		reg_data |= E1000_CTRL_EXT_PFRSTD;
2553		wr32(E1000_CTRL_EXT, reg_data);
2554	}
2555
2556	netif_tx_start_all_queues(netdev);
2557
2558	/* start the watchdog. */
2559	hw->mac.get_link_status = 1;
2560	schedule_work(&adapter->watchdog_task);
2561
2562	return 0;
2563
2564err_req_irq:
2565	igb_release_hw_control(adapter);
2566	igb_power_down_link(adapter);
2567	igb_free_all_rx_resources(adapter);
2568err_setup_rx:
2569	igb_free_all_tx_resources(adapter);
2570err_setup_tx:
2571	igb_reset(adapter);
2572
2573	return err;
2574}
2575
2576/**
2577 * igb_close - Disables a network interface
2578 * @netdev: network interface device structure
2579 *
2580 * Returns 0, this is not allowed to fail
2581 *
2582 * The close entry point is called when an interface is de-activated
2583 * by the OS.  The hardware is still under the driver's control, but
2584 * needs to be disabled.  A global MAC reset is issued to stop the
2585 * hardware, and all transmit and receive resources are freed.
2586 **/
2587static int igb_close(struct net_device *netdev)
2588{
2589	struct igb_adapter *adapter = netdev_priv(netdev);
2590
2591	WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
2592	igb_down(adapter);
2593
2594	igb_free_irq(adapter);
2595
2596	igb_free_all_tx_resources(adapter);
2597	igb_free_all_rx_resources(adapter);
2598
2599	return 0;
2600}
2601
2602/**
2603 * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2604 * @tx_ring: tx descriptor ring (for a specific queue) to setup
2605 *
2606 * Return 0 on success, negative on failure
2607 **/
2608int igb_setup_tx_resources(struct igb_ring *tx_ring)
2609{
2610	struct device *dev = tx_ring->dev;
2611	int orig_node = dev_to_node(dev);
2612	int size;
2613
2614	size = sizeof(struct igb_tx_buffer) * tx_ring->count;
2615	tx_ring->tx_buffer_info = vzalloc_node(size, tx_ring->numa_node);
2616	if (!tx_ring->tx_buffer_info)
2617		tx_ring->tx_buffer_info = vzalloc(size);
2618	if (!tx_ring->tx_buffer_info)
2619		goto err;
2620
2621	/* round up to nearest 4K */
2622	tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2623	tx_ring->size = ALIGN(tx_ring->size, 4096);
2624
2625	set_dev_node(dev, tx_ring->numa_node);
2626	tx_ring->desc = dma_alloc_coherent(dev,
2627					   tx_ring->size,
2628					   &tx_ring->dma,
2629					   GFP_KERNEL);
2630	set_dev_node(dev, orig_node);
2631	if (!tx_ring->desc)
2632		tx_ring->desc = dma_alloc_coherent(dev,
2633						   tx_ring->size,
2634						   &tx_ring->dma,
2635						   GFP_KERNEL);
2636
2637	if (!tx_ring->desc)
2638		goto err;
2639
2640	tx_ring->next_to_use = 0;
2641	tx_ring->next_to_clean = 0;
2642
2643	return 0;
2644
2645err:
2646	vfree(tx_ring->tx_buffer_info);
2647	dev_err(dev,
2648		"Unable to allocate memory for the transmit descriptor ring\n");
2649	return -ENOMEM;
2650}
2651
2652/**
2653 * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2654 *				  (Descriptors) for all queues
2655 * @adapter: board private structure
2656 *
2657 * Return 0 on success, negative on failure
2658 **/
2659static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2660{
2661	struct pci_dev *pdev = adapter->pdev;
2662	int i, err = 0;
2663
2664	for (i = 0; i < adapter->num_tx_queues; i++) {
2665		err = igb_setup_tx_resources(adapter->tx_ring[i]);
2666		if (err) {
2667			dev_err(&pdev->dev,
2668				"Allocation for Tx Queue %u failed\n", i);
2669			for (i--; i >= 0; i--)
2670				igb_free_tx_resources(adapter->tx_ring[i]);
2671			break;
2672		}
2673	}
2674
2675	return err;
2676}
2677
2678/**
2679 * igb_setup_tctl - configure the transmit control registers
2680 * @adapter: Board private structure
2681 **/
2682void igb_setup_tctl(struct igb_adapter *adapter)
2683{
2684	struct e1000_hw *hw = &adapter->hw;
2685	u32 tctl;
2686
2687	/* disable queue 0 which is enabled by default on 82575 and 82576 */
2688	wr32(E1000_TXDCTL(0), 0);
2689
2690	/* Program the Transmit Control Register */
2691	tctl = rd32(E1000_TCTL);
2692	tctl &= ~E1000_TCTL_CT;
2693	tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2694		(E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2695
2696	igb_config_collision_dist(hw);
2697
2698	/* Enable transmits */
2699	tctl |= E1000_TCTL_EN;
2700
2701	wr32(E1000_TCTL, tctl);
2702}
2703
2704/**
2705 * igb_configure_tx_ring - Configure transmit ring after Reset
2706 * @adapter: board private structure
2707 * @ring: tx ring to configure
2708 *
2709 * Configure a transmit ring after a reset.
2710 **/
2711void igb_configure_tx_ring(struct igb_adapter *adapter,
2712                           struct igb_ring *ring)
2713{
2714	struct e1000_hw *hw = &adapter->hw;
2715	u32 txdctl = 0;
2716	u64 tdba = ring->dma;
2717	int reg_idx = ring->reg_idx;
2718
2719	/* disable the queue */
2720	wr32(E1000_TXDCTL(reg_idx), 0);
2721	wrfl();
2722	mdelay(10);
2723
2724	wr32(E1000_TDLEN(reg_idx),
2725	                ring->count * sizeof(union e1000_adv_tx_desc));
2726	wr32(E1000_TDBAL(reg_idx),
2727	                tdba & 0x00000000ffffffffULL);
2728	wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2729
2730	ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2731	wr32(E1000_TDH(reg_idx), 0);
2732	writel(0, ring->tail);
2733
2734	txdctl |= IGB_TX_PTHRESH;
2735	txdctl |= IGB_TX_HTHRESH << 8;
2736	txdctl |= IGB_TX_WTHRESH << 16;
2737
2738	txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2739	wr32(E1000_TXDCTL(reg_idx), txdctl);
2740}
2741
2742/**
2743 * igb_configure_tx - Configure transmit Unit after Reset
2744 * @adapter: board private structure
2745 *
2746 * Configure the Tx unit of the MAC after a reset.
2747 **/
2748static void igb_configure_tx(struct igb_adapter *adapter)
2749{
2750	int i;
2751
2752	for (i = 0; i < adapter->num_tx_queues; i++)
2753		igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
2754}
2755
2756/**
2757 * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2758 * @rx_ring:    rx descriptor ring (for a specific queue) to setup
2759 *
2760 * Returns 0 on success, negative on failure
2761 **/
2762int igb_setup_rx_resources(struct igb_ring *rx_ring)
2763{
2764	struct device *dev = rx_ring->dev;
2765	int orig_node = dev_to_node(dev);
2766	int size, desc_len;
2767
2768	size = sizeof(struct igb_rx_buffer) * rx_ring->count;
2769	rx_ring->rx_buffer_info = vzalloc_node(size, rx_ring->numa_node);
2770	if (!rx_ring->rx_buffer_info)
2771		rx_ring->rx_buffer_info = vzalloc(size);
2772	if (!rx_ring->rx_buffer_info)
2773		goto err;
2774
2775	desc_len = sizeof(union e1000_adv_rx_desc);
2776
2777	/* Round up to nearest 4K */
2778	rx_ring->size = rx_ring->count * desc_len;
2779	rx_ring->size = ALIGN(rx_ring->size, 4096);
2780
2781	set_dev_node(dev, rx_ring->numa_node);
2782	rx_ring->desc = dma_alloc_coherent(dev,
2783					   rx_ring->size,
2784					   &rx_ring->dma,
2785					   GFP_KERNEL);
2786	set_dev_node(dev, orig_node);
2787	if (!rx_ring->desc)
2788		rx_ring->desc = dma_alloc_coherent(dev,
2789						   rx_ring->size,
2790						   &rx_ring->dma,
2791						   GFP_KERNEL);
2792
2793	if (!rx_ring->desc)
2794		goto err;
2795
2796	rx_ring->next_to_clean = 0;
2797	rx_ring->next_to_use = 0;
2798
2799	return 0;
2800
2801err:
2802	vfree(rx_ring->rx_buffer_info);
2803	rx_ring->rx_buffer_info = NULL;
2804	dev_err(dev, "Unable to allocate memory for the receive descriptor"
2805		" ring\n");
2806	return -ENOMEM;
2807}
2808
2809/**
2810 * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2811 *				  (Descriptors) for all queues
2812 * @adapter: board private structure
2813 *
2814 * Return 0 on success, negative on failure
2815 **/
2816static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2817{
2818	struct pci_dev *pdev = adapter->pdev;
2819	int i, err = 0;
2820
2821	for (i = 0; i < adapter->num_rx_queues; i++) {
2822		err = igb_setup_rx_resources(adapter->rx_ring[i]);
2823		if (err) {
2824			dev_err(&pdev->dev,
2825				"Allocation for Rx Queue %u failed\n", i);
2826			for (i--; i >= 0; i--)
2827				igb_free_rx_resources(adapter->rx_ring[i]);
2828			break;
2829		}
2830	}
2831
2832	return err;
2833}
2834
2835/**
2836 * igb_setup_mrqc - configure the multiple receive queue control registers
2837 * @adapter: Board private structure
2838 **/
2839static void igb_setup_mrqc(struct igb_adapter *adapter)
2840{
2841	struct e1000_hw *hw = &adapter->hw;
2842	u32 mrqc, rxcsum;
2843	u32 j, num_rx_queues, shift = 0, shift2 = 0;
2844	union e1000_reta {
2845		u32 dword;
2846		u8  bytes[4];
2847	} reta;
2848	static const u8 rsshash[40] = {
2849		0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2850		0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2851		0xae, 0x7b, 0x30, 0xb4,	0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2852		0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2853
2854	/* Fill out hash function seeds */
2855	for (j = 0; j < 10; j++) {
2856		u32 rsskey = rsshash[(j * 4)];
2857		rsskey |= rsshash[(j * 4) + 1] << 8;
2858		rsskey |= rsshash[(j * 4) + 2] << 16;
2859		rsskey |= rsshash[(j * 4) + 3] << 24;
2860		array_wr32(E1000_RSSRK(0), j, rsskey);
2861	}
2862
2863	num_rx_queues = adapter->rss_queues;
2864
2865	if (adapter->vfs_allocated_count) {
2866		/* 82575 and 82576 supports 2 RSS queues for VMDq */
2867		switch (hw->mac.type) {
2868		case e1000_i350:
2869		case e1000_82580:
2870			num_rx_queues = 1;
2871			shift = 0;
2872			break;
2873		case e1000_82576:
2874			shift = 3;
2875			num_rx_queues = 2;
2876			break;
2877		case e1000_82575:
2878			shift = 2;
2879			shift2 = 6;
2880		default:
2881			break;
2882		}
2883	} else {
2884		if (hw->mac.type == e1000_82575)
2885			shift = 6;
2886	}
2887
2888	for (j = 0; j < (32 * 4); j++) {
2889		reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2890		if (shift2)
2891			reta.bytes[j & 3] |= num_rx_queues << shift2;
2892		if ((j & 3) == 3)
2893			wr32(E1000_RETA(j >> 2), reta.dword);
2894	}
2895
2896	/*
2897	 * Disable raw packet checksumming so that RSS hash is placed in
2898	 * descriptor on writeback.  No need to enable TCP/UDP/IP checksum
2899	 * offloads as they are enabled by default
2900	 */
2901	rxcsum = rd32(E1000_RXCSUM);
2902	rxcsum |= E1000_RXCSUM_PCSD;
2903
2904	if (adapter->hw.mac.type >= e1000_82576)
2905		/* Enable Receive Checksum Offload for SCTP */
2906		rxcsum |= E1000_RXCSUM_CRCOFL;
2907
2908	/* Don't need to set TUOFL or IPOFL, they default to 1 */
2909	wr32(E1000_RXCSUM, rxcsum);
2910
2911	/* If VMDq is enabled then we set the appropriate mode for that, else
2912	 * we default to RSS so that an RSS hash is calculated per packet even
2913	 * if we are only using one queue */
2914	if (adapter->vfs_allocated_count) {
2915		if (hw->mac.type > e1000_82575) {
2916			/* Set the default pool for the PF's first queue */
2917			u32 vtctl = rd32(E1000_VT_CTL);
2918			vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2919				   E1000_VT_CTL_DISABLE_DEF_POOL);
2920			vtctl |= adapter->vfs_allocated_count <<
2921				E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2922			wr32(E1000_VT_CTL, vtctl);
2923		}
2924		if (adapter->rss_queues > 1)
2925			mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2926		else
2927			mrqc = E1000_MRQC_ENABLE_VMDQ;
2928	} else {
2929		mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2930	}
2931	igb_vmm_control(adapter);
2932
2933	/*
2934	 * Generate RSS hash based on TCP port numbers and/or
2935	 * IPv4/v6 src and dst addresses since UDP cannot be
2936	 * hashed reliably due to IP fragmentation
2937	 */
2938	mrqc |= E1000_MRQC_RSS_FIELD_IPV4 |
2939		E1000_MRQC_RSS_FIELD_IPV4_TCP |
2940		E1000_MRQC_RSS_FIELD_IPV6 |
2941		E1000_MRQC_RSS_FIELD_IPV6_TCP |
2942		E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
2943
2944	wr32(E1000_MRQC, mrqc);
2945}
2946
2947/**
2948 * igb_setup_rctl - configure the receive control registers
2949 * @adapter: Board private structure
2950 **/
2951void igb_setup_rctl(struct igb_adapter *adapter)
2952{
2953	struct e1000_hw *hw = &adapter->hw;
2954	u32 rctl;
2955
2956	rctl = rd32(E1000_RCTL);
2957
2958	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2959	rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2960
2961	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2962		(hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2963
2964	/*
2965	 * enable stripping of CRC. It's unlikely this will break BMC
2966	 * redirection as it did with e1000. Newer features require
2967	 * that the HW strips the CRC.
2968	 */
2969	rctl |= E1000_RCTL_SECRC;
2970
2971	/* disable store bad packets and clear size bits. */
2972	rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
2973
2974	/* enable LPE to prevent packets larger than max_frame_size */
2975	rctl |= E1000_RCTL_LPE;
2976
2977	/* disable queue 0 to prevent tail write w/o re-config */
2978	wr32(E1000_RXDCTL(0), 0);
2979
2980	/* Attention!!!  For SR-IOV PF driver operations you must enable
2981	 * queue drop for all VF and PF queues to prevent head of line blocking
2982	 * if an un-trusted VF does not provide descriptors to hardware.
2983	 */
2984	if (adapter->vfs_allocated_count) {
2985		/* set all queue drop enable bits */
2986		wr32(E1000_QDE, ALL_QUEUES);
2987	}
2988
2989	wr32(E1000_RCTL, rctl);
2990}
2991
2992static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
2993                                   int vfn)
2994{
2995	struct e1000_hw *hw = &adapter->hw;
2996	u32 vmolr;
2997
2998	/* if it isn't the PF check to see if VFs are enabled and
2999	 * increase the size to support vlan tags */
3000	if (vfn < adapter->vfs_allocated_count &&
3001	    adapter->vf_data[vfn].vlans_enabled)
3002		size += VLAN_TAG_SIZE;
3003
3004	vmolr = rd32(E1000_VMOLR(vfn));
3005	vmolr &= ~E1000_VMOLR_RLPML_MASK;
3006	vmolr |= size | E1000_VMOLR_LPE;
3007	wr32(E1000_VMOLR(vfn), vmolr);
3008
3009	return 0;
3010}
3011
3012/**
3013 * igb_rlpml_set - set maximum receive packet size
3014 * @adapter: board private structure
3015 *
3016 * Configure maximum receivable packet size.
3017 **/
3018static void igb_rlpml_set(struct igb_adapter *adapter)
3019{
3020	u32 max_frame_size = adapter->max_frame_size;
3021	struct e1000_hw *hw = &adapter->hw;
3022	u16 pf_id = adapter->vfs_allocated_count;
3023
3024	if (pf_id) {
3025		igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
3026		/*
3027		 * If we're in VMDQ or SR-IOV mode, then set global RLPML
3028		 * to our max jumbo frame size, in case we need to enable
3029		 * jumbo frames on one of the rings later.
3030		 * This will not pass over-length frames into the default
3031		 * queue because it's gated by the VMOLR.RLPML.
3032		 */
3033		max_frame_size = MAX_JUMBO_FRAME_SIZE;
3034	}
3035
3036	wr32(E1000_RLPML, max_frame_size);
3037}
3038
3039static inline void igb_set_vmolr(struct igb_adapter *adapter,
3040				 int vfn, bool aupe)
3041{
3042	struct e1000_hw *hw = &adapter->hw;
3043	u32 vmolr;
3044
3045	/*
3046	 * This register exists only on 82576 and newer so if we are older then
3047	 * we should exit and do nothing
3048	 */
3049	if (hw->mac.type < e1000_82576)
3050		return;
3051
3052	vmolr = rd32(E1000_VMOLR(vfn));
3053	vmolr |= E1000_VMOLR_STRVLAN;      /* Strip vlan tags */
3054	if (aupe)
3055		vmolr |= E1000_VMOLR_AUPE;        /* Accept untagged packets */
3056	else
3057		vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
3058
3059	/* clear all bits that might not be set */
3060	vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
3061
3062	if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
3063		vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
3064	/*
3065	 * for VMDq only allow the VFs and pool 0 to accept broadcast and
3066	 * multicast packets
3067	 */
3068	if (vfn <= adapter->vfs_allocated_count)
3069		vmolr |= E1000_VMOLR_BAM;	   /* Accept broadcast */
3070
3071	wr32(E1000_VMOLR(vfn), vmolr);
3072}
3073
3074/**
3075 * igb_configure_rx_ring - Configure a receive ring after Reset
3076 * @adapter: board private structure
3077 * @ring: receive ring to be configured
3078 *
3079 * Configure the Rx unit of the MAC after a reset.
3080 **/
3081void igb_configure_rx_ring(struct igb_adapter *adapter,
3082                           struct igb_ring *ring)
3083{
3084	struct e1000_hw *hw = &adapter->hw;
3085	u64 rdba = ring->dma;
3086	int reg_idx = ring->reg_idx;
3087	u32 srrctl = 0, rxdctl = 0;
3088
3089	/* disable the queue */
3090	wr32(E1000_RXDCTL(reg_idx), 0);
3091
3092	/* Set DMA base address registers */
3093	wr32(E1000_RDBAL(reg_idx),
3094	     rdba & 0x00000000ffffffffULL);
3095	wr32(E1000_RDBAH(reg_idx), rdba >> 32);
3096	wr32(E1000_RDLEN(reg_idx),
3097	               ring->count * sizeof(union e1000_adv_rx_desc));
3098
3099	/* initialize head and tail */
3100	ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
3101	wr32(E1000_RDH(reg_idx), 0);
3102	writel(0, ring->tail);
3103
3104	/* set descriptor configuration */
3105	srrctl = IGB_RX_HDR_LEN << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
3106#if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
3107	srrctl |= IGB_RXBUFFER_16384 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3108#else
3109	srrctl |= (PAGE_SIZE / 2) >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3110#endif
3111	srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3112	if (hw->mac.type >= e1000_82580)
3113		srrctl |= E1000_SRRCTL_TIMESTAMP;
3114	/* Only set Drop Enable if we are supporting multiple queues */
3115	if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
3116		srrctl |= E1000_SRRCTL_DROP_EN;
3117
3118	wr32(E1000_SRRCTL(reg_idx), srrctl);
3119
3120	/* set filtering for VMDQ pools */
3121	igb_set_vmolr(adapter, reg_idx & 0x7, true);
3122
3123	rxdctl |= IGB_RX_PTHRESH;
3124	rxdctl |= IGB_RX_HTHRESH << 8;
3125	rxdctl |= IGB_RX_WTHRESH << 16;
3126
3127	/* enable receive descriptor fetching */
3128	rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
3129	wr32(E1000_RXDCTL(reg_idx), rxdctl);
3130}
3131
3132/**
3133 * igb_configure_rx - Configure receive Unit after Reset
3134 * @adapter: board private structure
3135 *
3136 * Configure the Rx unit of the MAC after a reset.
3137 **/
3138static void igb_configure_rx(struct igb_adapter *adapter)
3139{
3140	int i;
3141
3142	/* set UTA to appropriate mode */
3143	igb_set_uta(adapter);
3144
3145	/* set the correct pool for the PF default MAC address in entry 0 */
3146	igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
3147	                 adapter->vfs_allocated_count);
3148
3149	/* Setup the HW Rx Head and Tail Descriptor Pointers and
3150	 * the Base and Length of the Rx Descriptor Ring */
3151	for (i = 0; i < adapter->num_rx_queues; i++)
3152		igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
3153}
3154
3155/**
3156 * igb_free_tx_resources - Free Tx Resources per Queue
3157 * @tx_ring: Tx descriptor ring for a specific queue
3158 *
3159 * Free all transmit software resources
3160 **/
3161void igb_free_tx_resources(struct igb_ring *tx_ring)
3162{
3163	igb_clean_tx_ring(tx_ring);
3164
3165	vfree(tx_ring->tx_buffer_info);
3166	tx_ring->tx_buffer_info = NULL;
3167
3168	/* if not set, then don't free */
3169	if (!tx_ring->desc)
3170		return;
3171
3172	dma_free_coherent(tx_ring->dev, tx_ring->size,
3173			  tx_ring->desc, tx_ring->dma);
3174
3175	tx_ring->desc = NULL;
3176}
3177
3178/**
3179 * igb_free_all_tx_resources - Free Tx Resources for All Queues
3180 * @adapter: board private structure
3181 *
3182 * Free all transmit software resources
3183 **/
3184static void igb_free_all_tx_resources(struct igb_adapter *adapter)
3185{
3186	int i;
3187
3188	for (i = 0; i < adapter->num_tx_queues; i++)
3189		igb_free_tx_resources(adapter->tx_ring[i]);
3190}
3191
3192void igb_unmap_and_free_tx_resource(struct igb_ring *ring,
3193				    struct igb_tx_buffer *tx_buffer)
3194{
3195	if (tx_buffer->skb) {
3196		dev_kfree_skb_any(tx_buffer->skb);
3197		if (tx_buffer->dma)
3198			dma_unmap_single(ring->dev,
3199					 tx_buffer->dma,
3200					 tx_buffer->length,
3201					 DMA_TO_DEVICE);
3202	} else if (tx_buffer->dma) {
3203		dma_unmap_page(ring->dev,
3204			       tx_buffer->dma,
3205			       tx_buffer->length,
3206			       DMA_TO_DEVICE);
3207	}
3208	tx_buffer->next_to_watch = NULL;
3209	tx_buffer->skb = NULL;
3210	tx_buffer->dma = 0;
3211	/* buffer_info must be completely set up in the transmit path */
3212}
3213
3214/**
3215 * igb_clean_tx_ring - Free Tx Buffers
3216 * @tx_ring: ring to be cleaned
3217 **/
3218static void igb_clean_tx_ring(struct igb_ring *tx_ring)
3219{
3220	struct igb_tx_buffer *buffer_info;
3221	unsigned long size;
3222	u16 i;
3223
3224	if (!tx_ring->tx_buffer_info)
3225		return;
3226	/* Free all the Tx ring sk_buffs */
3227
3228	for (i = 0; i < tx_ring->count; i++) {
3229		buffer_info = &tx_ring->tx_buffer_info[i];
3230		igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3231	}
3232
3233	size = sizeof(struct igb_tx_buffer) * tx_ring->count;
3234	memset(tx_ring->tx_buffer_info, 0, size);
3235
3236	/* Zero out the descriptor ring */
3237	memset(tx_ring->desc, 0, tx_ring->size);
3238
3239	tx_ring->next_to_use = 0;
3240	tx_ring->next_to_clean = 0;
3241}
3242
3243/**
3244 * igb_clean_all_tx_rings - Free Tx Buffers for all queues
3245 * @adapter: board private structure
3246 **/
3247static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
3248{
3249	int i;
3250
3251	for (i = 0; i < adapter->num_tx_queues; i++)
3252		igb_clean_tx_ring(adapter->tx_ring[i]);
3253}
3254
3255/**
3256 * igb_free_rx_resources - Free Rx Resources
3257 * @rx_ring: ring to clean the resources from
3258 *
3259 * Free all receive software resources
3260 **/
3261void igb_free_rx_resources(struct igb_ring *rx_ring)
3262{
3263	igb_clean_rx_ring(rx_ring);
3264
3265	vfree(rx_ring->rx_buffer_info);
3266	rx_ring->rx_buffer_info = NULL;
3267
3268	/* if not set, then don't free */
3269	if (!rx_ring->desc)
3270		return;
3271
3272	dma_free_coherent(rx_ring->dev, rx_ring->size,
3273			  rx_ring->desc, rx_ring->dma);
3274
3275	rx_ring->desc = NULL;
3276}
3277
3278/**
3279 * igb_free_all_rx_resources - Free Rx Resources for All Queues
3280 * @adapter: board private structure
3281 *
3282 * Free all receive software resources
3283 **/
3284static void igb_free_all_rx_resources(struct igb_adapter *adapter)
3285{
3286	int i;
3287
3288	for (i = 0; i < adapter->num_rx_queues; i++)
3289		igb_free_rx_resources(adapter->rx_ring[i]);
3290}
3291
3292/**
3293 * igb_clean_rx_ring - Free Rx Buffers per Queue
3294 * @rx_ring: ring to free buffers from
3295 **/
3296static void igb_clean_rx_ring(struct igb_ring *rx_ring)
3297{
3298	unsigned long size;
3299	u16 i;
3300
3301	if (!rx_ring->rx_buffer_info)
3302		return;
3303
3304	/* Free all the Rx ring sk_buffs */
3305	for (i = 0; i < rx_ring->count; i++) {
3306		struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
3307		if (buffer_info->dma) {
3308			dma_unmap_single(rx_ring->dev,
3309			                 buffer_info->dma,
3310					 IGB_RX_HDR_LEN,
3311					 DMA_FROM_DEVICE);
3312			buffer_info->dma = 0;
3313		}
3314
3315		if (buffer_info->skb) {
3316			dev_kfree_skb(buffer_info->skb);
3317			buffer_info->skb = NULL;
3318		}
3319		if (buffer_info->page_dma) {
3320			dma_unmap_page(rx_ring->dev,
3321			               buffer_info->page_dma,
3322				       PAGE_SIZE / 2,
3323				       DMA_FROM_DEVICE);
3324			buffer_info->page_dma = 0;
3325		}
3326		if (buffer_info->page) {
3327			put_page(buffer_info->page);
3328			buffer_info->page = NULL;
3329			buffer_info->page_offset = 0;
3330		}
3331	}
3332
3333	size = sizeof(struct igb_rx_buffer) * rx_ring->count;
3334	memset(rx_ring->rx_buffer_info, 0, size);
3335
3336	/* Zero out the descriptor ring */
3337	memset(rx_ring->desc, 0, rx_ring->size);
3338
3339	rx_ring->next_to_clean = 0;
3340	rx_ring->next_to_use = 0;
3341}
3342
3343/**
3344 * igb_clean_all_rx_rings - Free Rx Buffers for all queues
3345 * @adapter: board private structure
3346 **/
3347static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
3348{
3349	int i;
3350
3351	for (i = 0; i < adapter->num_rx_queues; i++)
3352		igb_clean_rx_ring(adapter->rx_ring[i]);
3353}
3354
3355/**
3356 * igb_set_mac - Change the Ethernet Address of the NIC
3357 * @netdev: network interface device structure
3358 * @p: pointer to an address structure
3359 *
3360 * Returns 0 on success, negative on failure
3361 **/
3362static int igb_set_mac(struct net_device *netdev, void *p)
3363{
3364	struct igb_adapter *adapter = netdev_priv(netdev);
3365	struct e1000_hw *hw = &adapter->hw;
3366	struct sockaddr *addr = p;
3367
3368	if (!is_valid_ether_addr(addr->sa_data))
3369		return -EADDRNOTAVAIL;
3370
3371	memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
3372	memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
3373
3374	/* set the correct pool for the new PF MAC address in entry 0 */
3375	igb_rar_set_qsel(adapter, hw->mac.addr, 0,
3376	                 adapter->vfs_allocated_count);
3377
3378	return 0;
3379}
3380
3381/**
3382 * igb_write_mc_addr_list - write multicast addresses to MTA
3383 * @netdev: network interface device structure
3384 *
3385 * Writes multicast address list to the MTA hash table.
3386 * Returns: -ENOMEM on failure
3387 *                0 on no addresses written
3388 *                X on writing X addresses to MTA
3389 **/
3390static int igb_write_mc_addr_list(struct net_device *netdev)
3391{
3392	struct igb_adapter *adapter = netdev_priv(netdev);
3393	struct e1000_hw *hw = &adapter->hw;
3394	struct netdev_hw_addr *ha;
3395	u8  *mta_list;
3396	int i;
3397
3398	if (netdev_mc_empty(netdev)) {
3399		/* nothing to program, so clear mc list */
3400		igb_update_mc_addr_list(hw, NULL, 0);
3401		igb_restore_vf_multicasts(adapter);
3402		return 0;
3403	}
3404
3405	mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
3406	if (!mta_list)
3407		return -ENOMEM;
3408
3409	/* The shared function expects a packed array of only addresses. */
3410	i = 0;
3411	netdev_for_each_mc_addr(ha, netdev)
3412		memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
3413
3414	igb_update_mc_addr_list(hw, mta_list, i);
3415	kfree(mta_list);
3416
3417	return netdev_mc_count(netdev);
3418}
3419
3420/**
3421 * igb_write_uc_addr_list - write unicast addresses to RAR table
3422 * @netdev: network interface device structure
3423 *
3424 * Writes unicast address list to the RAR table.
3425 * Returns: -ENOMEM on failure/insufficient address space
3426 *                0 on no addresses written
3427 *                X on writing X addresses to the RAR table
3428 **/
3429static int igb_write_uc_addr_list(struct net_device *netdev)
3430{
3431	struct igb_adapter *adapter = netdev_priv(netdev);
3432	struct e1000_hw *hw = &adapter->hw;
3433	unsigned int vfn = adapter->vfs_allocated_count;
3434	unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
3435	int count = 0;
3436
3437	/* return ENOMEM indicating insufficient memory for addresses */
3438	if (netdev_uc_count(netdev) > rar_entries)
3439		return -ENOMEM;
3440
3441	if (!netdev_uc_empty(netdev) && rar_entries) {
3442		struct netdev_hw_addr *ha;
3443
3444		netdev_for_each_uc_addr(ha, netdev) {
3445			if (!rar_entries)
3446				break;
3447			igb_rar_set_qsel(adapter, ha->addr,
3448			                 rar_entries--,
3449			                 vfn);
3450			count++;
3451		}
3452	}
3453	/* write the addresses in reverse order to avoid write combining */
3454	for (; rar_entries > 0 ; rar_entries--) {
3455		wr32(E1000_RAH(rar_entries), 0);
3456		wr32(E1000_RAL(rar_entries), 0);
3457	}
3458	wrfl();
3459
3460	return count;
3461}
3462
3463/**
3464 * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
3465 * @netdev: network interface device structure
3466 *
3467 * The set_rx_mode entry point is called whenever the unicast or multicast
3468 * address lists or the network interface flags are updated.  This routine is
3469 * responsible for configuring the hardware for proper unicast, multicast,
3470 * promiscuous mode, and all-multi behavior.
3471 **/
3472static void igb_set_rx_mode(struct net_device *netdev)
3473{
3474	struct igb_adapter *adapter = netdev_priv(netdev);
3475	struct e1000_hw *hw = &adapter->hw;
3476	unsigned int vfn = adapter->vfs_allocated_count;
3477	u32 rctl, vmolr = 0;
3478	int count;
3479
3480	/* Check for Promiscuous and All Multicast modes */
3481	rctl = rd32(E1000_RCTL);
3482
3483	/* clear the effected bits */
3484	rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
3485
3486	if (netdev->flags & IFF_PROMISC) {
3487		rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
3488		vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
3489	} else {
3490		if (netdev->flags & IFF_ALLMULTI) {
3491			rctl |= E1000_RCTL_MPE;
3492			vmolr |= E1000_VMOLR_MPME;
3493		} else {
3494			/*
3495			 * Write addresses to the MTA, if the attempt fails
3496			 * then we should just turn on promiscuous mode so
3497			 * that we can at least receive multicast traffic
3498			 */
3499			count = igb_write_mc_addr_list(netdev);
3500			if (count < 0) {
3501				rctl |= E1000_RCTL_MPE;
3502				vmolr |= E1000_VMOLR_MPME;
3503			} else if (count) {
3504				vmolr |= E1000_VMOLR_ROMPE;
3505			}
3506		}
3507		/*
3508		 * Write addresses to available RAR registers, if there is not
3509		 * sufficient space to store all the addresses then enable
3510		 * unicast promiscuous mode
3511		 */
3512		count = igb_write_uc_addr_list(netdev);
3513		if (count < 0) {
3514			rctl |= E1000_RCTL_UPE;
3515			vmolr |= E1000_VMOLR_ROPE;
3516		}
3517		rctl |= E1000_RCTL_VFE;
3518	}
3519	wr32(E1000_RCTL, rctl);
3520
3521	/*
3522	 * In order to support SR-IOV and eventually VMDq it is necessary to set
3523	 * the VMOLR to enable the appropriate modes.  Without this workaround
3524	 * we will have issues with VLAN tag stripping not being done for frames
3525	 * that are only arriving because we are the default pool
3526	 */
3527	if (hw->mac.type < e1000_82576)
3528		return;
3529
3530	vmolr |= rd32(E1000_VMOLR(vfn)) &
3531	         ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
3532	wr32(E1000_VMOLR(vfn), vmolr);
3533	igb_restore_vf_multicasts(adapter);
3534}
3535
3536static void igb_check_wvbr(struct igb_adapter *adapter)
3537{
3538	struct e1000_hw *hw = &adapter->hw;
3539	u32 wvbr = 0;
3540
3541	switch (hw->mac.type) {
3542	case e1000_82576:
3543	case e1000_i350:
3544		if (!(wvbr = rd32(E1000_WVBR)))
3545			return;
3546		break;
3547	default:
3548		break;
3549	}
3550
3551	adapter->wvbr |= wvbr;
3552}
3553
3554#define IGB_STAGGERED_QUEUE_OFFSET 8
3555
3556static void igb_spoof_check(struct igb_adapter *adapter)
3557{
3558	int j;
3559
3560	if (!adapter->wvbr)
3561		return;
3562
3563	for(j = 0; j < adapter->vfs_allocated_count; j++) {
3564		if (adapter->wvbr & (1 << j) ||
3565		    adapter->wvbr & (1 << (j + IGB_STAGGERED_QUEUE_OFFSET))) {
3566			dev_warn(&adapter->pdev->dev,
3567				"Spoof event(s) detected on VF %d\n", j);
3568			adapter->wvbr &=
3569				~((1 << j) |
3570				  (1 << (j + IGB_STAGGERED_QUEUE_OFFSET)));
3571		}
3572	}
3573}
3574
3575/* Need to wait a few seconds after link up to get diagnostic information from
3576 * the phy */
3577static void igb_update_phy_info(unsigned long data)
3578{
3579	struct igb_adapter *adapter = (struct igb_adapter *) data;
3580	igb_get_phy_info(&adapter->hw);
3581}
3582
3583/**
3584 * igb_has_link - check shared code for link and determine up/down
3585 * @adapter: pointer to driver private info
3586 **/
3587bool igb_has_link(struct igb_adapter *adapter)
3588{
3589	struct e1000_hw *hw = &adapter->hw;
3590	bool link_active = false;
3591	s32 ret_val = 0;
3592
3593	/* get_link_status is set on LSC (link status) interrupt or
3594	 * rx sequence error interrupt.  get_link_status will stay
3595	 * false until the e1000_check_for_link establishes link
3596	 * for copper adapters ONLY
3597	 */
3598	switch (hw->phy.media_type) {
3599	case e1000_media_type_copper:
3600		if (hw->mac.get_link_status) {
3601			ret_val = hw->mac.ops.check_for_link(hw);
3602			link_active = !hw->mac.get_link_status;
3603		} else {
3604			link_active = true;
3605		}
3606		break;
3607	case e1000_media_type_internal_serdes:
3608		ret_val = hw->mac.ops.check_for_link(hw);
3609		link_active = hw->mac.serdes_has_link;
3610		break;
3611	default:
3612	case e1000_media_type_unknown:
3613		break;
3614	}
3615
3616	return link_active;
3617}
3618
3619static bool igb_thermal_sensor_event(struct e1000_hw *hw, u32 event)
3620{
3621	bool ret = false;
3622	u32 ctrl_ext, thstat;
3623
3624	/* check for thermal sensor event on i350, copper only */
3625	if (hw->mac.type == e1000_i350) {
3626		thstat = rd32(E1000_THSTAT);
3627		ctrl_ext = rd32(E1000_CTRL_EXT);
3628
3629		if ((hw->phy.media_type == e1000_media_type_copper) &&
3630		    !(ctrl_ext & E1000_CTRL_EXT_LINK_MODE_SGMII)) {
3631			ret = !!(thstat & event);
3632		}
3633	}
3634
3635	return ret;
3636}
3637
3638/**
3639 * igb_watchdog - Timer Call-back
3640 * @data: pointer to adapter cast into an unsigned long
3641 **/
3642static void igb_watchdog(unsigned long data)
3643{
3644	struct igb_adapter *adapter = (struct igb_adapter *)data;
3645	/* Do the rest outside of interrupt context */
3646	schedule_work(&adapter->watchdog_task);
3647}
3648
3649static void igb_watchdog_task(struct work_struct *work)
3650{
3651	struct igb_adapter *adapter = container_of(work,
3652	                                           struct igb_adapter,
3653                                                   watchdog_task);
3654	struct e1000_hw *hw = &adapter->hw;
3655	struct net_device *netdev = adapter->netdev;
3656	u32 link;
3657	int i;
3658
3659	link = igb_has_link(adapter);
3660	if (link) {
3661		if (!netif_carrier_ok(netdev)) {
3662			u32 ctrl;
3663			hw->mac.ops.get_speed_and_duplex(hw,
3664			                                 &adapter->link_speed,
3665			                                 &adapter->link_duplex);
3666
3667			ctrl = rd32(E1000_CTRL);
3668			/* Links status message must follow this format */
3669			printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s, "
3670				 "Flow Control: %s\n",
3671			       netdev->name,
3672			       adapter->link_speed,
3673			       adapter->link_duplex == FULL_DUPLEX ?
3674				 "Full Duplex" : "Half Duplex",
3675			       ((ctrl & E1000_CTRL_TFCE) &&
3676			        (ctrl & E1000_CTRL_RFCE)) ? "RX/TX" :
3677			       ((ctrl & E1000_CTRL_RFCE) ?  "RX" :
3678			       ((ctrl & E1000_CTRL_TFCE) ?  "TX" : "None")));
3679
3680			/* check for thermal sensor event */
3681			if (igb_thermal_sensor_event(hw, E1000_THSTAT_LINK_THROTTLE)) {
3682				printk(KERN_INFO "igb: %s The network adapter "
3683						 "link speed was downshifted "
3684						 "because it overheated.\n",
3685						 netdev->name);
3686			}
3687
3688			/* adjust timeout factor according to speed/duplex */
3689			adapter->tx_timeout_factor = 1;
3690			switch (adapter->link_speed) {
3691			case SPEED_10:
3692				adapter->tx_timeout_factor = 14;
3693				break;
3694			case SPEED_100:
3695				/* maybe add some timeout factor ? */
3696				break;
3697			}
3698
3699			netif_carrier_on(netdev);
3700
3701			igb_ping_all_vfs(adapter);
3702			igb_check_vf_rate_limit(adapter);
3703
3704			/* link state has changed, schedule phy info update */
3705			if (!test_bit(__IGB_DOWN, &adapter->state))
3706				mod_timer(&adapter->phy_info_timer,
3707					  round_jiffies(jiffies + 2 * HZ));
3708		}
3709	} else {
3710		if (netif_carrier_ok(netdev)) {
3711			adapter->link_speed = 0;
3712			adapter->link_duplex = 0;
3713
3714			/* check for thermal sensor event */
3715			if (igb_thermal_sensor_event(hw, E1000_THSTAT_PWR_DOWN)) {
3716				printk(KERN_ERR "igb: %s The network adapter "
3717						"was stopped because it "
3718						"overheated.\n",
3719						netdev->name);
3720			}
3721
3722			/* Links status message must follow this format */
3723			printk(KERN_INFO "igb: %s NIC Link is Down\n",
3724			       netdev->name);
3725			netif_carrier_off(netdev);
3726
3727			igb_ping_all_vfs(adapter);
3728
3729			/* link state has changed, schedule phy info update */
3730			if (!test_bit(__IGB_DOWN, &adapter->state))
3731				mod_timer(&adapter->phy_info_timer,
3732					  round_jiffies(jiffies + 2 * HZ));
3733		}
3734	}
3735
3736	spin_lock(&adapter->stats64_lock);
3737	igb_update_stats(adapter, &adapter->stats64);
3738	spin_unlock(&adapter->stats64_lock);
3739
3740	for (i = 0; i < adapter->num_tx_queues; i++) {
3741		struct igb_ring *tx_ring = adapter->tx_ring[i];
3742		if (!netif_carrier_ok(netdev)) {
3743			/* We've lost link, so the controller stops DMA,
3744			 * but we've got queued Tx work that's never going
3745			 * to get done, so reset controller to flush Tx.
3746			 * (Do the reset outside of interrupt context). */
3747			if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3748				adapter->tx_timeout_count++;
3749				schedule_work(&adapter->reset_task);
3750				/* return immediately since reset is imminent */
3751				return;
3752			}
3753		}
3754
3755		/* Force detection of hung controller every watchdog period */
3756		set_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
3757	}
3758
3759	/* Cause software interrupt to ensure rx ring is cleaned */
3760	if (adapter->msix_entries) {
3761		u32 eics = 0;
3762		for (i = 0; i < adapter->num_q_vectors; i++)
3763			eics |= adapter->q_vector[i]->eims_value;
3764		wr32(E1000_EICS, eics);
3765	} else {
3766		wr32(E1000_ICS, E1000_ICS_RXDMT0);
3767	}
3768
3769	igb_spoof_check(adapter);
3770
3771	/* Reset the timer */
3772	if (!test_bit(__IGB_DOWN, &adapter->state))
3773		mod_timer(&adapter->watchdog_timer,
3774			  round_jiffies(jiffies + 2 * HZ));
3775}
3776
3777enum latency_range {
3778	lowest_latency = 0,
3779	low_latency = 1,
3780	bulk_latency = 2,
3781	latency_invalid = 255
3782};
3783
3784/**
3785 * igb_update_ring_itr - update the dynamic ITR value based on packet size
3786 *
3787 *      Stores a new ITR value based on strictly on packet size.  This
3788 *      algorithm is less sophisticated than that used in igb_update_itr,
3789 *      due to the difficulty of synchronizing statistics across multiple
3790 *      receive rings.  The divisors and thresholds used by this function
3791 *      were determined based on theoretical maximum wire speed and testing
3792 *      data, in order to minimize response time while increasing bulk
3793 *      throughput.
3794 *      This functionality is controlled by the InterruptThrottleRate module
3795 *      parameter (see igb_param.c)
3796 *      NOTE:  This function is called only when operating in a multiqueue
3797 *             receive environment.
3798 * @q_vector: pointer to q_vector
3799 **/
3800static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3801{
3802	int new_val = q_vector->itr_val;
3803	int avg_wire_size = 0;
3804	struct igb_adapter *adapter = q_vector->adapter;
3805	unsigned int packets;
3806
3807	/* For non-gigabit speeds, just fix the interrupt rate at 4000
3808	 * ints/sec - ITR timer value of 120 ticks.
3809	 */
3810	if (adapter->link_speed != SPEED_1000) {
3811		new_val = IGB_4K_ITR;
3812		goto set_itr_val;
3813	}
3814
3815	packets = q_vector->rx.total_packets;
3816	if (packets)
3817		avg_wire_size = q_vector->rx.total_bytes / packets;
3818
3819	packets = q_vector->tx.total_packets;
3820	if (packets)
3821		avg_wire_size = max_t(u32, avg_wire_size,
3822				      q_vector->tx.total_bytes / packets);
3823
3824	/* if avg_wire_size isn't set no work was done */
3825	if (!avg_wire_size)
3826		goto clear_counts;
3827
3828	/* Add 24 bytes to size to account for CRC, preamble, and gap */
3829	avg_wire_size += 24;
3830
3831	/* Don't starve jumbo frames */
3832	avg_wire_size = min(avg_wire_size, 3000);
3833
3834	/* Give a little boost to mid-size frames */
3835	if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3836		new_val = avg_wire_size / 3;
3837	else
3838		new_val = avg_wire_size / 2;
3839
3840	/* conservative mode (itr 3) eliminates the lowest_latency setting */
3841	if (new_val < IGB_20K_ITR &&
3842	    ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
3843	     (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
3844		new_val = IGB_20K_ITR;
3845
3846set_itr_val:
3847	if (new_val != q_vector->itr_val) {
3848		q_vector->itr_val = new_val;
3849		q_vector->set_itr = 1;
3850	}
3851clear_counts:
3852	q_vector->rx.total_bytes = 0;
3853	q_vector->rx.total_packets = 0;
3854	q_vector->tx.total_bytes = 0;
3855	q_vector->tx.total_packets = 0;
3856}
3857
3858/**
3859 * igb_update_itr - update the dynamic ITR value based on statistics
3860 *      Stores a new ITR value based on packets and byte
3861 *      counts during the last interrupt.  The advantage of per interrupt
3862 *      computation is faster updates and more accurate ITR for the current
3863 *      traffic pattern.  Constants in this function were computed
3864 *      based on theoretical maximum wire speed and thresholds were set based
3865 *      on testing data as well as attempting to minimize response time
3866 *      while increasing bulk throughput.
3867 *      this functionality is controlled by the InterruptThrottleRate module
3868 *      parameter (see igb_param.c)
3869 *      NOTE:  These calculations are only valid when operating in a single-
3870 *             queue environment.
3871 * @q_vector: pointer to q_vector
3872 * @ring_container: ring info to update the itr for
3873 **/
3874static void igb_update_itr(struct igb_q_vector *q_vector,
3875			   struct igb_ring_container *ring_container)
3876{
3877	unsigned int packets = ring_container->total_packets;
3878	unsigned int bytes = ring_container->total_bytes;
3879	u8 itrval = ring_container->itr;
3880
3881	/* no packets, exit with status unchanged */
3882	if (packets == 0)
3883		return;
3884
3885	switch (itrval) {
3886	case lowest_latency:
3887		/* handle TSO and jumbo frames */
3888		if (bytes/packets > 8000)
3889			itrval = bulk_latency;
3890		else if ((packets < 5) && (bytes > 512))
3891			itrval = low_latency;
3892		break;
3893	case low_latency:  /* 50 usec aka 20000 ints/s */
3894		if (bytes > 10000) {
3895			/* this if handles the TSO accounting */
3896			if (bytes/packets > 8000) {
3897				itrval = bulk_latency;
3898			} else if ((packets < 10) || ((bytes/packets) > 1200)) {
3899				itrval = bulk_latency;
3900			} else if ((packets > 35)) {
3901				itrval = lowest_latency;
3902			}
3903		} else if (bytes/packets > 2000) {
3904			itrval = bulk_latency;
3905		} else if (packets <= 2 && bytes < 512) {
3906			itrval = lowest_latency;
3907		}
3908		break;
3909	case bulk_latency: /* 250 usec aka 4000 ints/s */
3910		if (bytes > 25000) {
3911			if (packets > 35)
3912				itrval = low_latency;
3913		} else if (bytes < 1500) {
3914			itrval = low_latency;
3915		}
3916		break;
3917	}
3918
3919	/* clear work counters since we have the values we need */
3920	ring_container->total_bytes = 0;
3921	ring_container->total_packets = 0;
3922
3923	/* write updated itr to ring container */
3924	ring_container->itr = itrval;
3925}
3926
3927static void igb_set_itr(struct igb_q_vector *q_vector)
3928{
3929	struct igb_adapter *adapter = q_vector->adapter;
3930	u32 new_itr = q_vector->itr_val;
3931	u8 current_itr = 0;
3932
3933	/* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3934	if (adapter->link_speed != SPEED_1000) {
3935		current_itr = 0;
3936		new_itr = IGB_4K_ITR;
3937		goto set_itr_now;
3938	}
3939
3940	igb_update_itr(q_vector, &q_vector->tx);
3941	igb_update_itr(q_vector, &q_vector->rx);
3942
3943	current_itr = max(q_vector->rx.itr, q_vector->tx.itr);
3944
3945	/* conservative mode (itr 3) eliminates the lowest_latency setting */
3946	if (current_itr == lowest_latency &&
3947	    ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
3948	     (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
3949		current_itr = low_latency;
3950
3951	switch (current_itr) {
3952	/* counts and packets in update_itr are dependent on these numbers */
3953	case lowest_latency:
3954		new_itr = IGB_70K_ITR; /* 70,000 ints/sec */
3955		break;
3956	case low_latency:
3957		new_itr = IGB_20K_ITR; /* 20,000 ints/sec */
3958		break;
3959	case bulk_latency:
3960		new_itr = IGB_4K_ITR;  /* 4,000 ints/sec */
3961		break;
3962	default:
3963		break;
3964	}
3965
3966set_itr_now:
3967	if (new_itr != q_vector->itr_val) {
3968		/* this attempts to bias the interrupt rate towards Bulk
3969		 * by adding intermediate steps when interrupt rate is
3970		 * increasing */
3971		new_itr = new_itr > q_vector->itr_val ?
3972		             max((new_itr * q_vector->itr_val) /
3973		                 (new_itr + (q_vector->itr_val >> 2)),
3974				 new_itr) :
3975			     new_itr;
3976		/* Don't write the value here; it resets the adapter's
3977		 * internal timer, and causes us to delay far longer than
3978		 * we should between interrupts.  Instead, we write the ITR
3979		 * value at the beginning of the next interrupt so the timing
3980		 * ends up being correct.
3981		 */
3982		q_vector->itr_val = new_itr;
3983		q_vector->set_itr = 1;
3984	}
3985}
3986
3987void igb_tx_ctxtdesc(struct igb_ring *tx_ring, u32 vlan_macip_lens,
3988		     u32 type_tucmd, u32 mss_l4len_idx)
3989{
3990	struct e1000_adv_tx_context_desc *context_desc;
3991	u16 i = tx_ring->next_to_use;
3992
3993	context_desc = IGB_TX_CTXTDESC(tx_ring, i);
3994
3995	i++;
3996	tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
3997
3998	/* set bits to identify this as an advanced context descriptor */
3999	type_tucmd |= E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
4000
4001	/* For 82575, context index must be unique per ring. */
4002	if (test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
4003		mss_l4len_idx |= tx_ring->reg_idx << 4;
4004
4005	context_desc->vlan_macip_lens	= cpu_to_le32(vlan_macip_lens);
4006	context_desc->seqnum_seed	= 0;
4007	context_desc->type_tucmd_mlhl	= cpu_to_le32(type_tucmd);
4008	context_desc->mss_l4len_idx	= cpu_to_le32(mss_l4len_idx);
4009}
4010
4011static int igb_tso(struct igb_ring *tx_ring,
4012		   struct igb_tx_buffer *first,
4013		   u8 *hdr_len)
4014{
4015	struct sk_buff *skb = first->skb;
4016	u32 vlan_macip_lens, type_tucmd;
4017	u32 mss_l4len_idx, l4len;
4018
4019	if (!skb_is_gso(skb))
4020		return 0;
4021
4022	if (skb_header_cloned(skb)) {
4023		int err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
4024		if (err)
4025			return err;
4026	}
4027
4028	/* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
4029	type_tucmd = E1000_ADVTXD_TUCMD_L4T_TCP;
4030
4031	if (first->protocol == __constant_htons(ETH_P_IP)) {
4032		struct iphdr *iph = ip_hdr(skb);
4033		iph->tot_len = 0;
4034		iph->check = 0;
4035		tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
4036							 iph->daddr, 0,
4037							 IPPROTO_TCP,
4038							 0);
4039		type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
4040		first->tx_flags |= IGB_TX_FLAGS_TSO |
4041				   IGB_TX_FLAGS_CSUM |
4042				   IGB_TX_FLAGS_IPV4;
4043	} else if (skb_is_gso_v6(skb)) {
4044		ipv6_hdr(skb)->payload_len = 0;
4045		tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
4046						       &ipv6_hdr(skb)->daddr,
4047						       0, IPPROTO_TCP, 0);
4048		first->tx_flags |= IGB_TX_FLAGS_TSO |
4049				   IGB_TX_FLAGS_CSUM;
4050	}
4051
4052	/* compute header lengths */
4053	l4len = tcp_hdrlen(skb);
4054	*hdr_len = skb_transport_offset(skb) + l4len;
4055
4056	/* update gso size and bytecount with header size */
4057	first->gso_segs = skb_shinfo(skb)->gso_segs;
4058	first->bytecount += (first->gso_segs - 1) * *hdr_len;
4059
4060	/* MSS L4LEN IDX */
4061	mss_l4len_idx = l4len << E1000_ADVTXD_L4LEN_SHIFT;
4062	mss_l4len_idx |= skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT;
4063
4064	/* VLAN MACLEN IPLEN */
4065	vlan_macip_lens = skb_network_header_len(skb);
4066	vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
4067	vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
4068
4069	igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
4070
4071	return 1;
4072}
4073
4074static void igb_tx_csum(struct igb_ring *tx_ring, struct igb_tx_buffer *first)
4075{
4076	struct sk_buff *skb = first->skb;
4077	u32 vlan_macip_lens = 0;
4078	u32 mss_l4len_idx = 0;
4079	u32 type_tucmd = 0;
4080
4081	if (skb->ip_summed != CHECKSUM_PARTIAL) {
4082		if (!(first->tx_flags & IGB_TX_FLAGS_VLAN))
4083			return;
4084	} else {
4085		u8 l4_hdr = 0;
4086		switch (first->protocol) {
4087		case __constant_htons(ETH_P_IP):
4088			vlan_macip_lens |= skb_network_header_len(skb);
4089			type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
4090			l4_hdr = ip_hdr(skb)->protocol;
4091			break;
4092		case __constant_htons(ETH_P_IPV6):
4093			vlan_macip_lens |= skb_network_header_len(skb);
4094			l4_hdr = ipv6_hdr(skb)->nexthdr;
4095			break;
4096		default:
4097			if (unlikely(net_ratelimit())) {
4098				dev_warn(tx_ring->dev,
4099				 "partial checksum but proto=%x!\n",
4100				 first->protocol);
4101			}
4102			break;
4103		}
4104
4105		switch (l4_hdr) {
4106		case IPPROTO_TCP:
4107			type_tucmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
4108			mss_l4len_idx = tcp_hdrlen(skb) <<
4109					E1000_ADVTXD_L4LEN_SHIFT;
4110			break;
4111		case IPPROTO_SCTP:
4112			type_tucmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
4113			mss_l4len_idx = sizeof(struct sctphdr) <<
4114					E1000_ADVTXD_L4LEN_SHIFT;
4115			break;
4116		case IPPROTO_UDP:
4117			mss_l4len_idx = sizeof(struct udphdr) <<
4118					E1000_ADVTXD_L4LEN_SHIFT;
4119			break;
4120		default:
4121			if (unlikely(net_ratelimit())) {
4122				dev_warn(tx_ring->dev,
4123				 "partial checksum but l4 proto=%x!\n",
4124				 l4_hdr);
4125			}
4126			break;
4127		}
4128
4129		/* update TX checksum flag */
4130		first->tx_flags |= IGB_TX_FLAGS_CSUM;
4131	}
4132
4133	vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
4134	vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
4135
4136	igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
4137}
4138
4139static __le32 igb_tx_cmd_type(u32 tx_flags)
4140{
4141	/* set type for advanced descriptor with frame checksum insertion */
4142	__le32 cmd_type = cpu_to_le32(E1000_ADVTXD_DTYP_DATA |
4143				      E1000_ADVTXD_DCMD_IFCS |
4144				      E1000_ADVTXD_DCMD_DEXT);
4145
4146	/* set HW vlan bit if vlan is present */
4147	if (tx_flags & IGB_TX_FLAGS_VLAN)
4148		cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_VLE);
4149
4150	/* set timestamp bit if present */
4151	if (tx_flags & IGB_TX_FLAGS_TSTAMP)
4152		cmd_type |= cpu_to_le32(E1000_ADVTXD_MAC_TSTAMP);
4153
4154	/* set segmentation bits for TSO */
4155	if (tx_flags & IGB_TX_FLAGS_TSO)
4156		cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_TSE);
4157
4158	return cmd_type;
4159}
4160
4161static void igb_tx_olinfo_status(struct igb_ring *tx_ring,
4162				 union e1000_adv_tx_desc *tx_desc,
4163				 u32 tx_flags, unsigned int paylen)
4164{
4165	u32 olinfo_status = paylen << E1000_ADVTXD_PAYLEN_SHIFT;
4166
4167	/* 82575 requires a unique index per ring if any offload is enabled */
4168	if ((tx_flags & (IGB_TX_FLAGS_CSUM | IGB_TX_FLAGS_VLAN)) &&
4169	    test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
4170		olinfo_status |= tx_ring->reg_idx << 4;
4171
4172	/* insert L4 checksum */
4173	if (tx_flags & IGB_TX_FLAGS_CSUM) {
4174		olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4175
4176		/* insert IPv4 checksum */
4177		if (tx_flags & IGB_TX_FLAGS_IPV4)
4178			olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
4179	}
4180
4181	tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
4182}
4183
4184/*
4185 * The largest size we can write to the descriptor is 65535.  In order to
4186 * maintain a power of two alignment we have to limit ourselves to 32K.
4187 */
4188#define IGB_MAX_TXD_PWR	15
4189#define IGB_MAX_DATA_PER_TXD	(1<<IGB_MAX_TXD_PWR)
4190
4191static void igb_tx_map(struct igb_ring *tx_ring,
4192		       struct igb_tx_buffer *first,
4193		       const u8 hdr_len)
4194{
4195	struct sk_buff *skb = first->skb;
4196	struct igb_tx_buffer *tx_buffer_info;
4197	union e1000_adv_tx_desc *tx_desc;
4198	dma_addr_t dma;
4199	struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0];
4200	unsigned int data_len = skb->data_len;
4201	unsigned int size = skb_headlen(skb);
4202	unsigned int paylen = skb->len - hdr_len;
4203	__le32 cmd_type;
4204	u32 tx_flags = first->tx_flags;
4205	u16 i = tx_ring->next_to_use;
4206
4207	tx_desc = IGB_TX_DESC(tx_ring, i);
4208
4209	igb_tx_olinfo_status(tx_ring, tx_desc, tx_flags, paylen);
4210	cmd_type = igb_tx_cmd_type(tx_flags);
4211
4212	dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
4213	if (dma_mapping_error(tx_ring->dev, dma))
4214		goto dma_error;
4215
4216	/* record length, and DMA address */
4217	first->length = size;
4218	first->dma = dma;
4219	tx_desc->read.buffer_addr = cpu_to_le64(dma);
4220
4221	for (;;) {
4222		while (unlikely(size > IGB_MAX_DATA_PER_TXD)) {
4223			tx_desc->read.cmd_type_len =
4224				cmd_type | cpu_to_le32(IGB_MAX_DATA_PER_TXD);
4225
4226			i++;
4227			tx_desc++;
4228			if (i == tx_ring->count) {
4229				tx_desc = IGB_TX_DESC(tx_ring, 0);
4230				i = 0;
4231			}
4232
4233			dma += IGB_MAX_DATA_PER_TXD;
4234			size -= IGB_MAX_DATA_PER_TXD;
4235
4236			tx_desc->read.olinfo_status = 0;
4237			tx_desc->read.buffer_addr = cpu_to_le64(dma);
4238		}
4239
4240		if (likely(!data_len))
4241			break;
4242
4243		tx_desc->read.cmd_type_len = cmd_type | cpu_to_le32(size);
4244
4245		i++;
4246		tx_desc++;
4247		if (i == tx_ring->count) {
4248			tx_desc = IGB_TX_DESC(tx_ring, 0);
4249			i = 0;
4250		}
4251
4252		size = frag->size;
4253		data_len -= size;
4254
4255		dma = skb_frag_dma_map(tx_ring->dev, frag, 0,
4256				   size, DMA_TO_DEVICE);
4257		if (dma_mapping_error(tx_ring->dev, dma))
4258			goto dma_error;
4259
4260		tx_buffer_info = &tx_ring->tx_buffer_info[i];
4261		tx_buffer_info->length = size;
4262		tx_buffer_info->dma = dma;
4263
4264		tx_desc->read.olinfo_status = 0;
4265		tx_desc->read.buffer_addr = cpu_to_le64(dma);
4266
4267		frag++;
4268	}
4269
4270	/* write last descriptor with RS and EOP bits */
4271	cmd_type |= cpu_to_le32(size) | cpu_to_le32(IGB_TXD_DCMD);
4272	tx_desc->read.cmd_type_len = cmd_type;
4273
4274	/* set the timestamp */
4275	first->time_stamp = jiffies;
4276
4277	/*
4278	 * Force memory writes to complete before letting h/w know there
4279	 * are new descriptors to fetch.  (Only applicable for weak-ordered
4280	 * memory model archs, such as IA-64).
4281	 *
4282	 * We also need this memory barrier to make certain all of the
4283	 * status bits have been updated before next_to_watch is written.
4284	 */
4285	wmb();
4286
4287	/* set next_to_watch value indicating a packet is present */
4288	first->next_to_watch = tx_desc;
4289
4290	i++;
4291	if (i == tx_ring->count)
4292		i = 0;
4293
4294	tx_ring->next_to_use = i;
4295
4296	writel(i, tx_ring->tail);
4297
4298	/* we need this if more than one processor can write to our tail
4299	 * at a time, it syncronizes IO on IA64/Altix systems */
4300	mmiowb();
4301
4302	return;
4303
4304dma_error:
4305	dev_err(tx_ring->dev, "TX DMA map failed\n");
4306
4307	/* clear dma mappings for failed tx_buffer_info map */
4308	for (;;) {
4309		tx_buffer_info = &tx_ring->tx_buffer_info[i];
4310		igb_unmap_and_free_tx_resource(tx_ring, tx_buffer_info);
4311		if (tx_buffer_info == first)
4312			break;
4313		if (i == 0)
4314			i = tx_ring->count;
4315		i--;
4316	}
4317
4318	tx_ring->next_to_use = i;
4319}
4320
4321static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
4322{
4323	struct net_device *netdev = tx_ring->netdev;
4324
4325	netif_stop_subqueue(netdev, tx_ring->queue_index);
4326
4327	/* Herbert's original patch had:
4328	 *  smp_mb__after_netif_stop_queue();
4329	 * but since that doesn't exist yet, just open code it. */
4330	smp_mb();
4331
4332	/* We need to check again in a case another CPU has just
4333	 * made room available. */
4334	if (igb_desc_unused(tx_ring) < size)
4335		return -EBUSY;
4336
4337	/* A reprieve! */
4338	netif_wake_subqueue(netdev, tx_ring->queue_index);
4339
4340	u64_stats_update_begin(&tx_ring->tx_syncp2);
4341	tx_ring->tx_stats.restart_queue2++;
4342	u64_stats_update_end(&tx_ring->tx_syncp2);
4343
4344	return 0;
4345}
4346
4347static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
4348{
4349	if (igb_desc_unused(tx_ring) >= size)
4350		return 0;
4351	return __igb_maybe_stop_tx(tx_ring, size);
4352}
4353
4354netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb,
4355				struct igb_ring *tx_ring)
4356{
4357	struct igb_tx_buffer *first;
4358	int tso;
4359	u32 tx_flags = 0;
4360	__be16 protocol = vlan_get_protocol(skb);
4361	u8 hdr_len = 0;
4362
4363	/* need: 1 descriptor per page,
4364	 *       + 2 desc gap to keep tail from touching head,
4365	 *       + 1 desc for skb->data,
4366	 *       + 1 desc for context descriptor,
4367	 * otherwise try next time */
4368	if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
4369		/* this is a hard error */
4370		return NETDEV_TX_BUSY;
4371	}
4372
4373	/* record the location of the first descriptor for this packet */
4374	first = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
4375	first->skb = skb;
4376	first->bytecount = skb->len;
4377	first->gso_segs = 1;
4378
4379	if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
4380		skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
4381		tx_flags |= IGB_TX_FLAGS_TSTAMP;
4382	}
4383
4384	if (vlan_tx_tag_present(skb)) {
4385		tx_flags |= IGB_TX_FLAGS_VLAN;
4386		tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
4387	}
4388
4389	/* record initial flags and protocol */
4390	first->tx_flags = tx_flags;
4391	first->protocol = protocol;
4392
4393	tso = igb_tso(tx_ring, first, &hdr_len);
4394	if (tso < 0)
4395		goto out_drop;
4396	else if (!tso)
4397		igb_tx_csum(tx_ring, first);
4398
4399	igb_tx_map(tx_ring, first, hdr_len);
4400
4401	/* Make sure there is space in the ring for the next send. */
4402	igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
4403
4404	return NETDEV_TX_OK;
4405
4406out_drop:
4407	igb_unmap_and_free_tx_resource(tx_ring, first);
4408
4409	return NETDEV_TX_OK;
4410}
4411
4412static inline struct igb_ring *igb_tx_queue_mapping(struct igb_adapter *adapter,
4413						    struct sk_buff *skb)
4414{
4415	unsigned int r_idx = skb->queue_mapping;
4416
4417	if (r_idx >= adapter->num_tx_queues)
4418		r_idx = r_idx % adapter->num_tx_queues;
4419
4420	return adapter->tx_ring[r_idx];
4421}
4422
4423static netdev_tx_t igb_xmit_frame(struct sk_buff *skb,
4424				  struct net_device *netdev)
4425{
4426	struct igb_adapter *adapter = netdev_priv(netdev);
4427
4428	if (test_bit(__IGB_DOWN, &adapter->state)) {
4429		dev_kfree_skb_any(skb);
4430		return NETDEV_TX_OK;
4431	}
4432
4433	if (skb->len <= 0) {
4434		dev_kfree_skb_any(skb);
4435		return NETDEV_TX_OK;
4436	}
4437
4438	/*
4439	 * The minimum packet size with TCTL.PSP set is 17 so pad the skb
4440	 * in order to meet this minimum size requirement.
4441	 */
4442	if (skb->len < 17) {
4443		if (skb_padto(skb, 17))
4444			return NETDEV_TX_OK;
4445		skb->len = 17;
4446	}
4447
4448	return igb_xmit_frame_ring(skb, igb_tx_queue_mapping(adapter, skb));
4449}
4450
4451/**
4452 * igb_tx_timeout - Respond to a Tx Hang
4453 * @netdev: network interface device structure
4454 **/
4455static void igb_tx_timeout(struct net_device *netdev)
4456{
4457	struct igb_adapter *adapter = netdev_priv(netdev);
4458	struct e1000_hw *hw = &adapter->hw;
4459
4460	/* Do the reset outside of interrupt context */
4461	adapter->tx_timeout_count++;
4462
4463	if (hw->mac.type >= e1000_82580)
4464		hw->dev_spec._82575.global_device_reset = true;
4465
4466	schedule_work(&adapter->reset_task);
4467	wr32(E1000_EICS,
4468	     (adapter->eims_enable_mask & ~adapter->eims_other));
4469}
4470
4471static void igb_reset_task(struct work_struct *work)
4472{
4473	struct igb_adapter *adapter;
4474	adapter = container_of(work, struct igb_adapter, reset_task);
4475
4476	igb_dump(adapter);
4477	netdev_err(adapter->netdev, "Reset adapter\n");
4478	igb_reinit_locked(adapter);
4479}
4480
4481/**
4482 * igb_get_stats64 - Get System Network Statistics
4483 * @netdev: network interface device structure
4484 * @stats: rtnl_link_stats64 pointer
4485 *
4486 **/
4487static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *netdev,
4488						 struct rtnl_link_stats64 *stats)
4489{
4490	struct igb_adapter *adapter = netdev_priv(netdev);
4491
4492	spin_lock(&adapter->stats64_lock);
4493	igb_update_stats(adapter, &adapter->stats64);
4494	memcpy(stats, &adapter->stats64, sizeof(*stats));
4495	spin_unlock(&adapter->stats64_lock);
4496
4497	return stats;
4498}
4499
4500/**
4501 * igb_change_mtu - Change the Maximum Transfer Unit
4502 * @netdev: network interface device structure
4503 * @new_mtu: new value for maximum frame size
4504 *
4505 * Returns 0 on success, negative on failure
4506 **/
4507static int igb_change_mtu(struct net_device *netdev, int new_mtu)
4508{
4509	struct igb_adapter *adapter = netdev_priv(netdev);
4510	struct pci_dev *pdev = adapter->pdev;
4511	int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
4512
4513	if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
4514		dev_err(&pdev->dev, "Invalid MTU setting\n");
4515		return -EINVAL;
4516	}
4517
4518#define MAX_STD_JUMBO_FRAME_SIZE 9238
4519	if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
4520		dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
4521		return -EINVAL;
4522	}
4523
4524	while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
4525		msleep(1);
4526
4527	/* igb_down has a dependency on max_frame_size */
4528	adapter->max_frame_size = max_frame;
4529
4530	if (netif_running(netdev))
4531		igb_down(adapter);
4532
4533	dev_info(&pdev->dev, "changing MTU from %d to %d\n",
4534		 netdev->mtu, new_mtu);
4535	netdev->mtu = new_mtu;
4536
4537	if (netif_running(netdev))
4538		igb_up(adapter);
4539	else
4540		igb_reset(adapter);
4541
4542	clear_bit(__IGB_RESETTING, &adapter->state);
4543
4544	return 0;
4545}
4546
4547/**
4548 * igb_update_stats - Update the board statistics counters
4549 * @adapter: board private structure
4550 **/
4551
4552void igb_update_stats(struct igb_adapter *adapter,
4553		      struct rtnl_link_stats64 *net_stats)
4554{
4555	struct e1000_hw *hw = &adapter->hw;
4556	struct pci_dev *pdev = adapter->pdev;
4557	u32 reg, mpc;
4558	u16 phy_tmp;
4559	int i;
4560	u64 bytes, packets;
4561	unsigned int start;
4562	u64 _bytes, _packets;
4563
4564#define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
4565
4566	/*
4567	 * Prevent stats update while adapter is being reset, or if the pci
4568	 * connection is down.
4569	 */
4570	if (adapter->link_speed == 0)
4571		return;
4572	if (pci_channel_offline(pdev))
4573		return;
4574
4575	bytes = 0;
4576	packets = 0;
4577	for (i = 0; i < adapter->num_rx_queues; i++) {
4578		u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
4579		struct igb_ring *ring = adapter->rx_ring[i];
4580
4581		ring->rx_stats.drops += rqdpc_tmp;
4582		net_stats->rx_fifo_errors += rqdpc_tmp;
4583
4584		do {
4585			start = u64_stats_fetch_begin_bh(&ring->rx_syncp);
4586			_bytes = ring->rx_stats.bytes;
4587			_packets = ring->rx_stats.packets;
4588		} while (u64_stats_fetch_retry_bh(&ring->rx_syncp, start));
4589		bytes += _bytes;
4590		packets += _packets;
4591	}
4592
4593	net_stats->rx_bytes = bytes;
4594	net_stats->rx_packets = packets;
4595
4596	bytes = 0;
4597	packets = 0;
4598	for (i = 0; i < adapter->num_tx_queues; i++) {
4599		struct igb_ring *ring = adapter->tx_ring[i];
4600		do {
4601			start = u64_stats_fetch_begin_bh(&ring->tx_syncp);
4602			_bytes = ring->tx_stats.bytes;
4603			_packets = ring->tx_stats.packets;
4604		} while (u64_stats_fetch_retry_bh(&ring->tx_syncp, start));
4605		bytes += _bytes;
4606		packets += _packets;
4607	}
4608	net_stats->tx_bytes = bytes;
4609	net_stats->tx_packets = packets;
4610
4611	/* read stats registers */
4612	adapter->stats.crcerrs += rd32(E1000_CRCERRS);
4613	adapter->stats.gprc += rd32(E1000_GPRC);
4614	adapter->stats.gorc += rd32(E1000_GORCL);
4615	rd32(E1000_GORCH); /* clear GORCL */
4616	adapter->stats.bprc += rd32(E1000_BPRC);
4617	adapter->stats.mprc += rd32(E1000_MPRC);
4618	adapter->stats.roc += rd32(E1000_ROC);
4619
4620	adapter->stats.prc64 += rd32(E1000_PRC64);
4621	adapter->stats.prc127 += rd32(E1000_PRC127);
4622	adapter->stats.prc255 += rd32(E1000_PRC255);
4623	adapter->stats.prc511 += rd32(E1000_PRC511);
4624	adapter->stats.prc1023 += rd32(E1000_PRC1023);
4625	adapter->stats.prc1522 += rd32(E1000_PRC1522);
4626	adapter->stats.symerrs += rd32(E1000_SYMERRS);
4627	adapter->stats.sec += rd32(E1000_SEC);
4628
4629	mpc = rd32(E1000_MPC);
4630	adapter->stats.mpc += mpc;
4631	net_stats->rx_fifo_errors += mpc;
4632	adapter->stats.scc += rd32(E1000_SCC);
4633	adapter->stats.ecol += rd32(E1000_ECOL);
4634	adapter->stats.mcc += rd32(E1000_MCC);
4635	adapter->stats.latecol += rd32(E1000_LATECOL);
4636	adapter->stats.dc += rd32(E1000_DC);
4637	adapter->stats.rlec += rd32(E1000_RLEC);
4638	adapter->stats.xonrxc += rd32(E1000_XONRXC);
4639	adapter->stats.xontxc += rd32(E1000_XONTXC);
4640	adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
4641	adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
4642	adapter->stats.fcruc += rd32(E1000_FCRUC);
4643	adapter->stats.gptc += rd32(E1000_GPTC);
4644	adapter->stats.gotc += rd32(E1000_GOTCL);
4645	rd32(E1000_GOTCH); /* clear GOTCL */
4646	adapter->stats.rnbc += rd32(E1000_RNBC);
4647	adapter->stats.ruc += rd32(E1000_RUC);
4648	adapter->stats.rfc += rd32(E1000_RFC);
4649	adapter->stats.rjc += rd32(E1000_RJC);
4650	adapter->stats.tor += rd32(E1000_TORH);
4651	adapter->stats.tot += rd32(E1000_TOTH);
4652	adapter->stats.tpr += rd32(E1000_TPR);
4653
4654	adapter->stats.ptc64 += rd32(E1000_PTC64);
4655	adapter->stats.ptc127 += rd32(E1000_PTC127);
4656	adapter->stats.ptc255 += rd32(E1000_PTC255);
4657	adapter->stats.ptc511 += rd32(E1000_PTC511);
4658	adapter->stats.ptc1023 += rd32(E1000_PTC1023);
4659	adapter->stats.ptc1522 += rd32(E1000_PTC1522);
4660
4661	adapter->stats.mptc += rd32(E1000_MPTC);
4662	adapter->stats.bptc += rd32(E1000_BPTC);
4663
4664	adapter->stats.tpt += rd32(E1000_TPT);
4665	adapter->stats.colc += rd32(E1000_COLC);
4666
4667	adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
4668	/* read internal phy specific stats */
4669	reg = rd32(E1000_CTRL_EXT);
4670	if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
4671		adapter->stats.rxerrc += rd32(E1000_RXERRC);
4672		adapter->stats.tncrs += rd32(E1000_TNCRS);
4673	}
4674
4675	adapter->stats.tsctc += rd32(E1000_TSCTC);
4676	adapter->stats.tsctfc += rd32(E1000_TSCTFC);
4677
4678	adapter->stats.iac += rd32(E1000_IAC);
4679	adapter->stats.icrxoc += rd32(E1000_ICRXOC);
4680	adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
4681	adapter->stats.icrxatc += rd32(E1000_ICRXATC);
4682	adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
4683	adapter->stats.ictxatc += rd32(E1000_ICTXATC);
4684	adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
4685	adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
4686	adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
4687
4688	/* Fill out the OS statistics structure */
4689	net_stats->multicast = adapter->stats.mprc;
4690	net_stats->collisions = adapter->stats.colc;
4691
4692	/* Rx Errors */
4693
4694	/* RLEC on some newer hardware can be incorrect so build
4695	 * our own version based on RUC and ROC */
4696	net_stats->rx_errors = adapter->stats.rxerrc +
4697		adapter->stats.crcerrs + adapter->stats.algnerrc +
4698		adapter->stats.ruc + adapter->stats.roc +
4699		adapter->stats.cexterr;
4700	net_stats->rx_length_errors = adapter->stats.ruc +
4701				      adapter->stats.roc;
4702	net_stats->rx_crc_errors = adapter->stats.crcerrs;
4703	net_stats->rx_frame_errors = adapter->stats.algnerrc;
4704	net_stats->rx_missed_errors = adapter->stats.mpc;
4705
4706	/* Tx Errors */
4707	net_stats->tx_errors = adapter->stats.ecol +
4708			       adapter->stats.latecol;
4709	net_stats->tx_aborted_errors = adapter->stats.ecol;
4710	net_stats->tx_window_errors = adapter->stats.latecol;
4711	net_stats->tx_carrier_errors = adapter->stats.tncrs;
4712
4713	/* Tx Dropped needs to be maintained elsewhere */
4714
4715	/* Phy Stats */
4716	if (hw->phy.media_type == e1000_media_type_copper) {
4717		if ((adapter->link_speed == SPEED_1000) &&
4718		   (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
4719			phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
4720			adapter->phy_stats.idle_errors += phy_tmp;
4721		}
4722	}
4723
4724	/* Management Stats */
4725	adapter->stats.mgptc += rd32(E1000_MGTPTC);
4726	adapter->stats.mgprc += rd32(E1000_MGTPRC);
4727	adapter->stats.mgpdc += rd32(E1000_MGTPDC);
4728
4729	/* OS2BMC Stats */
4730	reg = rd32(E1000_MANC);
4731	if (reg & E1000_MANC_EN_BMC2OS) {
4732		adapter->stats.o2bgptc += rd32(E1000_O2BGPTC);
4733		adapter->stats.o2bspc += rd32(E1000_O2BSPC);
4734		adapter->stats.b2ospc += rd32(E1000_B2OSPC);
4735		adapter->stats.b2ogprc += rd32(E1000_B2OGPRC);
4736	}
4737}
4738
4739static irqreturn_t igb_msix_other(int irq, void *data)
4740{
4741	struct igb_adapter *adapter = data;
4742	struct e1000_hw *hw = &adapter->hw;
4743	u32 icr = rd32(E1000_ICR);
4744	/* reading ICR causes bit 31 of EICR to be cleared */
4745
4746	if (icr & E1000_ICR_DRSTA)
4747		schedule_work(&adapter->reset_task);
4748
4749	if (icr & E1000_ICR_DOUTSYNC) {
4750		/* HW is reporting DMA is out of sync */
4751		adapter->stats.doosync++;
4752		/* The DMA Out of Sync is also indication of a spoof event
4753		 * in IOV mode. Check the Wrong VM Behavior register to
4754		 * see if it is really a spoof event. */
4755		igb_check_wvbr(adapter);
4756	}
4757
4758	/* Check for a mailbox event */
4759	if (icr & E1000_ICR_VMMB)
4760		igb_msg_task(adapter);
4761
4762	if (icr & E1000_ICR_LSC) {
4763		hw->mac.get_link_status = 1;
4764		/* guard against interrupt when we're going down */
4765		if (!test_bit(__IGB_DOWN, &adapter->state))
4766			mod_timer(&adapter->watchdog_timer, jiffies + 1);
4767	}
4768
4769	wr32(E1000_EIMS, adapter->eims_other);
4770
4771	return IRQ_HANDLED;
4772}
4773
4774static void igb_write_itr(struct igb_q_vector *q_vector)
4775{
4776	struct igb_adapter *adapter = q_vector->adapter;
4777	u32 itr_val = q_vector->itr_val & 0x7FFC;
4778
4779	if (!q_vector->set_itr)
4780		return;
4781
4782	if (!itr_val)
4783		itr_val = 0x4;
4784
4785	if (adapter->hw.mac.type == e1000_82575)
4786		itr_val |= itr_val << 16;
4787	else
4788		itr_val |= E1000_EITR_CNT_IGNR;
4789
4790	writel(itr_val, q_vector->itr_register);
4791	q_vector->set_itr = 0;
4792}
4793
4794static irqreturn_t igb_msix_ring(int irq, void *data)
4795{
4796	struct igb_q_vector *q_vector = data;
4797
4798	/* Write the ITR value calculated from the previous interrupt. */
4799	igb_write_itr(q_vector);
4800
4801	napi_schedule(&q_vector->napi);
4802
4803	return IRQ_HANDLED;
4804}
4805
4806#ifdef CONFIG_IGB_DCA
4807static void igb_update_dca(struct igb_q_vector *q_vector)
4808{
4809	struct igb_adapter *adapter = q_vector->adapter;
4810	struct e1000_hw *hw = &adapter->hw;
4811	int cpu = get_cpu();
4812
4813	if (q_vector->cpu == cpu)
4814		goto out_no_update;
4815
4816	if (q_vector->tx.ring) {
4817		int q = q_vector->tx.ring->reg_idx;
4818		u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4819		if (hw->mac.type == e1000_82575) {
4820			dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4821			dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4822		} else {
4823			dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4824			dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4825			              E1000_DCA_TXCTRL_CPUID_SHIFT;
4826		}
4827		dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4828		wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4829	}
4830	if (q_vector->rx.ring) {
4831		int q = q_vector->rx.ring->reg_idx;
4832		u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4833		if (hw->mac.type == e1000_82575) {
4834			dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4835			dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4836		} else {
4837			dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4838			dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4839			              E1000_DCA_RXCTRL_CPUID_SHIFT;
4840		}
4841		dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4842		dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4843		dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4844		wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4845	}
4846	q_vector->cpu = cpu;
4847out_no_update:
4848	put_cpu();
4849}
4850
4851static void igb_setup_dca(struct igb_adapter *adapter)
4852{
4853	struct e1000_hw *hw = &adapter->hw;
4854	int i;
4855
4856	if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4857		return;
4858
4859	/* Always use CB2 mode, difference is masked in the CB driver. */
4860	wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4861
4862	for (i = 0; i < adapter->num_q_vectors; i++) {
4863		adapter->q_vector[i]->cpu = -1;
4864		igb_update_dca(adapter->q_vector[i]);
4865	}
4866}
4867
4868static int __igb_notify_dca(struct device *dev, void *data)
4869{
4870	struct net_device *netdev = dev_get_drvdata(dev);
4871	struct igb_adapter *adapter = netdev_priv(netdev);
4872	struct pci_dev *pdev = adapter->pdev;
4873	struct e1000_hw *hw = &adapter->hw;
4874	unsigned long event = *(unsigned long *)data;
4875
4876	switch (event) {
4877	case DCA_PROVIDER_ADD:
4878		/* if already enabled, don't do it again */
4879		if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4880			break;
4881		if (dca_add_requester(dev) == 0) {
4882			adapter->flags |= IGB_FLAG_DCA_ENABLED;
4883			dev_info(&pdev->dev, "DCA enabled\n");
4884			igb_setup_dca(adapter);
4885			break;
4886		}
4887		/* Fall Through since DCA is disabled. */
4888	case DCA_PROVIDER_REMOVE:
4889		if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4890			/* without this a class_device is left
4891			 * hanging around in the sysfs model */
4892			dca_remove_requester(dev);
4893			dev_info(&pdev->dev, "DCA disabled\n");
4894			adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4895			wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4896		}
4897		break;
4898	}
4899
4900	return 0;
4901}
4902
4903static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4904                          void *p)
4905{
4906	int ret_val;
4907
4908	ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4909	                                 __igb_notify_dca);
4910
4911	return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4912}
4913#endif /* CONFIG_IGB_DCA */
4914
4915static void igb_ping_all_vfs(struct igb_adapter *adapter)
4916{
4917	struct e1000_hw *hw = &adapter->hw;
4918	u32 ping;
4919	int i;
4920
4921	for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
4922		ping = E1000_PF_CONTROL_MSG;
4923		if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
4924			ping |= E1000_VT_MSGTYPE_CTS;
4925		igb_write_mbx(hw, &ping, 1, i);
4926	}
4927}
4928
4929static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4930{
4931	struct e1000_hw *hw = &adapter->hw;
4932	u32 vmolr = rd32(E1000_VMOLR(vf));
4933	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4934
4935	vf_data->flags &= ~(IGB_VF_FLAG_UNI_PROMISC |
4936	                    IGB_VF_FLAG_MULTI_PROMISC);
4937	vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4938
4939	if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
4940		vmolr |= E1000_VMOLR_MPME;
4941		vf_data->flags |= IGB_VF_FLAG_MULTI_PROMISC;
4942		*msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
4943	} else {
4944		/*
4945		 * if we have hashes and we are clearing a multicast promisc
4946		 * flag we need to write the hashes to the MTA as this step
4947		 * was previously skipped
4948		 */
4949		if (vf_data->num_vf_mc_hashes > 30) {
4950			vmolr |= E1000_VMOLR_MPME;
4951		} else if (vf_data->num_vf_mc_hashes) {
4952			int j;
4953			vmolr |= E1000_VMOLR_ROMPE;
4954			for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4955				igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4956		}
4957	}
4958
4959	wr32(E1000_VMOLR(vf), vmolr);
4960
4961	/* there are flags left unprocessed, likely not supported */
4962	if (*msgbuf & E1000_VT_MSGINFO_MASK)
4963		return -EINVAL;
4964
4965	return 0;
4966
4967}
4968
4969static int igb_set_vf_multicasts(struct igb_adapter *adapter,
4970				  u32 *msgbuf, u32 vf)
4971{
4972	int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4973	u16 *hash_list = (u16 *)&msgbuf[1];
4974	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4975	int i;
4976
4977	/* salt away the number of multicast addresses assigned
4978	 * to this VF for later use to restore when the PF multi cast
4979	 * list changes
4980	 */
4981	vf_data->num_vf_mc_hashes = n;
4982
4983	/* only up to 30 hash values supported */
4984	if (n > 30)
4985		n = 30;
4986
4987	/* store the hashes for later use */
4988	for (i = 0; i < n; i++)
4989		vf_data->vf_mc_hashes[i] = hash_list[i];
4990
4991	/* Flush and reset the mta with the new values */
4992	igb_set_rx_mode(adapter->netdev);
4993
4994	return 0;
4995}
4996
4997static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
4998{
4999	struct e1000_hw *hw = &adapter->hw;
5000	struct vf_data_storage *vf_data;
5001	int i, j;
5002
5003	for (i = 0; i < adapter->vfs_allocated_count; i++) {
5004		u32 vmolr = rd32(E1000_VMOLR(i));
5005		vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
5006
5007		vf_data = &adapter->vf_data[i];
5008
5009		if ((vf_data->num_vf_mc_hashes > 30) ||
5010		    (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
5011			vmolr |= E1000_VMOLR_MPME;
5012		} else if (vf_data->num_vf_mc_hashes) {
5013			vmolr |= E1000_VMOLR_ROMPE;
5014			for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
5015				igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
5016		}
5017		wr32(E1000_VMOLR(i), vmolr);
5018	}
5019}
5020
5021static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
5022{
5023	struct e1000_hw *hw = &adapter->hw;
5024	u32 pool_mask, reg, vid;
5025	int i;
5026
5027	pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5028
5029	/* Find the vlan filter for this id */
5030	for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5031		reg = rd32(E1000_VLVF(i));
5032
5033		/* remove the vf from the pool */
5034		reg &= ~pool_mask;
5035
5036		/* if pool is empty then remove entry from vfta */
5037		if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
5038		    (reg & E1000_VLVF_VLANID_ENABLE)) {
5039			reg = 0;
5040			vid = reg & E1000_VLVF_VLANID_MASK;
5041			igb_vfta_set(hw, vid, false);
5042		}
5043
5044		wr32(E1000_VLVF(i), reg);
5045	}
5046
5047	adapter->vf_data[vf].vlans_enabled = 0;
5048}
5049
5050static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
5051{
5052	struct e1000_hw *hw = &adapter->hw;
5053	u32 reg, i;
5054
5055	/* The vlvf table only exists on 82576 hardware and newer */
5056	if (hw->mac.type < e1000_82576)
5057		return -1;
5058
5059	/* we only need to do this if VMDq is enabled */
5060	if (!adapter->vfs_allocated_count)
5061		return -1;
5062
5063	/* Find the vlan filter for this id */
5064	for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5065		reg = rd32(E1000_VLVF(i));
5066		if ((reg & E1000_VLVF_VLANID_ENABLE) &&
5067		    vid == (reg & E1000_VLVF_VLANID_MASK))
5068			break;
5069	}
5070
5071	if (add) {
5072		if (i == E1000_VLVF_ARRAY_SIZE) {
5073			/* Did not find a matching VLAN ID entry that was
5074			 * enabled.  Search for a free filter entry, i.e.
5075			 * one without the enable bit set
5076			 */
5077			for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5078				reg = rd32(E1000_VLVF(i));
5079				if (!(reg & E1000_VLVF_VLANID_ENABLE))
5080					break;
5081			}
5082		}
5083		if (i < E1000_VLVF_ARRAY_SIZE) {
5084			/* Found an enabled/available entry */
5085			reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5086
5087			/* if !enabled we need to set this up in vfta */
5088			if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
5089				/* add VID to filter table */
5090				igb_vfta_set(hw, vid, true);
5091				reg |= E1000_VLVF_VLANID_ENABLE;
5092			}
5093			reg &= ~E1000_VLVF_VLANID_MASK;
5094			reg |= vid;
5095			wr32(E1000_VLVF(i), reg);
5096
5097			/* do not modify RLPML for PF devices */
5098			if (vf >= adapter->vfs_allocated_count)
5099				return 0;
5100
5101			if (!adapter->vf_data[vf].vlans_enabled) {
5102				u32 size;
5103				reg = rd32(E1000_VMOLR(vf));
5104				size = reg & E1000_VMOLR_RLPML_MASK;
5105				size += 4;
5106				reg &= ~E1000_VMOLR_RLPML_MASK;
5107				reg |= size;
5108				wr32(E1000_VMOLR(vf), reg);
5109			}
5110
5111			adapter->vf_data[vf].vlans_enabled++;
5112		}
5113	} else {
5114		if (i < E1000_VLVF_ARRAY_SIZE) {
5115			/* remove vf from the pool */
5116			reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
5117			/* if pool is empty then remove entry from vfta */
5118			if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
5119				reg = 0;
5120				igb_vfta_set(hw, vid, false);
5121			}
5122			wr32(E1000_VLVF(i), reg);
5123
5124			/* do not modify RLPML for PF devices */
5125			if (vf >= adapter->vfs_allocated_count)
5126				return 0;
5127
5128			adapter->vf_data[vf].vlans_enabled--;
5129			if (!adapter->vf_data[vf].vlans_enabled) {
5130				u32 size;
5131				reg = rd32(E1000_VMOLR(vf));
5132				size = reg & E1000_VMOLR_RLPML_MASK;
5133				size -= 4;
5134				reg &= ~E1000_VMOLR_RLPML_MASK;
5135				reg |= size;
5136				wr32(E1000_VMOLR(vf), reg);
5137			}
5138		}
5139	}
5140	return 0;
5141}
5142
5143static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
5144{
5145	struct e1000_hw *hw = &adapter->hw;
5146
5147	if (vid)
5148		wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
5149	else
5150		wr32(E1000_VMVIR(vf), 0);
5151}
5152
5153static int igb_ndo_set_vf_vlan(struct net_device *netdev,
5154			       int vf, u16 vlan, u8 qos)
5155{
5156	int err = 0;
5157	struct igb_adapter *adapter = netdev_priv(netdev);
5158
5159	if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
5160		return -EINVAL;
5161	if (vlan || qos) {
5162		err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
5163		if (err)
5164			goto out;
5165		igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
5166		igb_set_vmolr(adapter, vf, !vlan);
5167		adapter->vf_data[vf].pf_vlan = vlan;
5168		adapter->vf_data[vf].pf_qos = qos;
5169		dev_info(&adapter->pdev->dev,
5170			 "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
5171		if (test_bit(__IGB_DOWN, &adapter->state)) {
5172			dev_warn(&adapter->pdev->dev,
5173				 "The VF VLAN has been set,"
5174				 " but the PF device is not up.\n");
5175			dev_warn(&adapter->pdev->dev,
5176				 "Bring the PF device up before"
5177				 " attempting to use the VF device.\n");
5178		}
5179	} else {
5180		igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
5181				   false, vf);
5182		igb_set_vmvir(adapter, vlan, vf);
5183		igb_set_vmolr(adapter, vf, true);
5184		adapter->vf_data[vf].pf_vlan = 0;
5185		adapter->vf_data[vf].pf_qos = 0;
5186       }
5187out:
5188       return err;
5189}
5190
5191static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5192{
5193	int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5194	int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
5195
5196	return igb_vlvf_set(adapter, vid, add, vf);
5197}
5198
5199static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
5200{
5201	/* clear flags - except flag that indicates PF has set the MAC */
5202	adapter->vf_data[vf].flags &= IGB_VF_FLAG_PF_SET_MAC;
5203	adapter->vf_data[vf].last_nack = jiffies;
5204
5205	/* reset offloads to defaults */
5206	igb_set_vmolr(adapter, vf, true);
5207
5208	/* reset vlans for device */
5209	igb_clear_vf_vfta(adapter, vf);
5210	if (adapter->vf_data[vf].pf_vlan)
5211		igb_ndo_set_vf_vlan(adapter->netdev, vf,
5212				    adapter->vf_data[vf].pf_vlan,
5213				    adapter->vf_data[vf].pf_qos);
5214	else
5215		igb_clear_vf_vfta(adapter, vf);
5216
5217	/* reset multicast table array for vf */
5218	adapter->vf_data[vf].num_vf_mc_hashes = 0;
5219
5220	/* Flush and reset the mta with the new values */
5221	igb_set_rx_mode(adapter->netdev);
5222}
5223
5224static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
5225{
5226	unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5227
5228	/* generate a new mac address as we were hotplug removed/added */
5229	if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
5230		random_ether_addr(vf_mac);
5231
5232	/* process remaining reset events */
5233	igb_vf_reset(adapter, vf);
5234}
5235
5236static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
5237{
5238	struct e1000_hw *hw = &adapter->hw;
5239	unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5240	int rar_entry = hw->mac.rar_entry_count - (vf + 1);
5241	u32 reg, msgbuf[3];
5242	u8 *addr = (u8 *)(&msgbuf[1]);
5243
5244	/* process all the same items cleared in a function level reset */
5245	igb_vf_reset(adapter, vf);
5246
5247	/* set vf mac address */
5248	igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
5249
5250	/* enable transmit and receive for vf */
5251	reg = rd32(E1000_VFTE);
5252	wr32(E1000_VFTE, reg | (1 << vf));
5253	reg = rd32(E1000_VFRE);
5254	wr32(E1000_VFRE, reg | (1 << vf));
5255
5256	adapter->vf_data[vf].flags |= IGB_VF_FLAG_CTS;
5257
5258	/* reply to reset with ack and vf mac address */
5259	msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
5260	memcpy(addr, vf_mac, 6);
5261	igb_write_mbx(hw, msgbuf, 3, vf);
5262}
5263
5264static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
5265{
5266	/*
5267	 * The VF MAC Address is stored in a packed array of bytes
5268	 * starting at the second 32 bit word of the msg array
5269	 */
5270	unsigned char *addr = (char *)&msg[1];
5271	int err = -1;
5272
5273	if (is_valid_ether_addr(addr))
5274		err = igb_set_vf_mac(adapter, vf, addr);
5275
5276	return err;
5277}
5278
5279static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
5280{
5281	struct e1000_hw *hw = &adapter->hw;
5282	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5283	u32 msg = E1000_VT_MSGTYPE_NACK;
5284
5285	/* if device isn't clear to send it shouldn't be reading either */
5286	if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
5287	    time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
5288		igb_write_mbx(hw, &msg, 1, vf);
5289		vf_data->last_nack = jiffies;
5290	}
5291}
5292
5293static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
5294{
5295	struct pci_dev *pdev = adapter->pdev;
5296	u32 msgbuf[E1000_VFMAILBOX_SIZE];
5297	struct e1000_hw *hw = &adapter->hw;
5298	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5299	s32 retval;
5300
5301	retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
5302
5303	if (retval) {
5304		/* if receive failed revoke VF CTS stats and restart init */
5305		dev_err(&pdev->dev, "Error receiving message from VF\n");
5306		vf_data->flags &= ~IGB_VF_FLAG_CTS;
5307		if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5308			return;
5309		goto out;
5310	}
5311
5312	/* this is a message we already processed, do nothing */
5313	if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
5314		return;
5315
5316	/*
5317	 * until the vf completes a reset it should not be
5318	 * allowed to start any configuration.
5319	 */
5320
5321	if (msgbuf[0] == E1000_VF_RESET) {
5322		igb_vf_reset_msg(adapter, vf);
5323		return;
5324	}
5325
5326	if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
5327		if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5328			return;
5329		retval = -1;
5330		goto out;
5331	}
5332
5333	switch ((msgbuf[0] & 0xFFFF)) {
5334	case E1000_VF_SET_MAC_ADDR:
5335		retval = -EINVAL;
5336		if (!(vf_data->flags & IGB_VF_FLAG_PF_SET_MAC))
5337			retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
5338		else
5339			dev_warn(&pdev->dev,
5340				 "VF %d attempted to override administratively "
5341				 "set MAC address\nReload the VF driver to "
5342				 "resume operations\n", vf);
5343		break;
5344	case E1000_VF_SET_PROMISC:
5345		retval = igb_set_vf_promisc(adapter, msgbuf, vf);
5346		break;
5347	case E1000_VF_SET_MULTICAST:
5348		retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
5349		break;
5350	case E1000_VF_SET_LPE:
5351		retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
5352		break;
5353	case E1000_VF_SET_VLAN:
5354		retval = -1;
5355		if (vf_data->pf_vlan)
5356			dev_warn(&pdev->dev,
5357				 "VF %d attempted to override administratively "
5358				 "set VLAN tag\nReload the VF driver to "
5359				 "resume operations\n", vf);
5360		else
5361			retval = igb_set_vf_vlan(adapter, msgbuf, vf);
5362		break;
5363	default:
5364		dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
5365		retval = -1;
5366		break;
5367	}
5368
5369	msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
5370out:
5371	/* notify the VF of the results of what it sent us */
5372	if (retval)
5373		msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
5374	else
5375		msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
5376
5377	igb_write_mbx(hw, msgbuf, 1, vf);
5378}
5379
5380static void igb_msg_task(struct igb_adapter *adapter)
5381{
5382	struct e1000_hw *hw = &adapter->hw;
5383	u32 vf;
5384
5385	for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
5386		/* process any reset requests */
5387		if (!igb_check_for_rst(hw, vf))
5388			igb_vf_reset_event(adapter, vf);
5389
5390		/* process any messages pending */
5391		if (!igb_check_for_msg(hw, vf))
5392			igb_rcv_msg_from_vf(adapter, vf);
5393
5394		/* process any acks */
5395		if (!igb_check_for_ack(hw, vf))
5396			igb_rcv_ack_from_vf(adapter, vf);
5397	}
5398}
5399
5400/**
5401 *  igb_set_uta - Set unicast filter table address
5402 *  @adapter: board private structure
5403 *
5404 *  The unicast table address is a register array of 32-bit registers.
5405 *  The table is meant to be used in a way similar to how the MTA is used
5406 *  however due to certain limitations in the hardware it is necessary to
5407 *  set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscuous
5408 *  enable bit to allow vlan tag stripping when promiscuous mode is enabled
5409 **/
5410static void igb_set_uta(struct igb_adapter *adapter)
5411{
5412	struct e1000_hw *hw = &adapter->hw;
5413	int i;
5414
5415	/* The UTA table only exists on 82576 hardware and newer */
5416	if (hw->mac.type < e1000_82576)
5417		return;
5418
5419	/* we only need to do this if VMDq is enabled */
5420	if (!adapter->vfs_allocated_count)
5421		return;
5422
5423	for (i = 0; i < hw->mac.uta_reg_count; i++)
5424		array_wr32(E1000_UTA, i, ~0);
5425}
5426
5427/**
5428 * igb_intr_msi - Interrupt Handler
5429 * @irq: interrupt number
5430 * @data: pointer to a network interface device structure
5431 **/
5432static irqreturn_t igb_intr_msi(int irq, void *data)
5433{
5434	struct igb_adapter *adapter = data;
5435	struct igb_q_vector *q_vector = adapter->q_vector[0];
5436	struct e1000_hw *hw = &adapter->hw;
5437	/* read ICR disables interrupts using IAM */
5438	u32 icr = rd32(E1000_ICR);
5439
5440	igb_write_itr(q_vector);
5441
5442	if (icr & E1000_ICR_DRSTA)
5443		schedule_work(&adapter->reset_task);
5444
5445	if (icr & E1000_ICR_DOUTSYNC) {
5446		/* HW is reporting DMA is out of sync */
5447		adapter->stats.doosync++;
5448	}
5449
5450	if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5451		hw->mac.get_link_status = 1;
5452		if (!test_bit(__IGB_DOWN, &adapter->state))
5453			mod_timer(&adapter->watchdog_timer, jiffies + 1);
5454	}
5455
5456	napi_schedule(&q_vector->napi);
5457
5458	return IRQ_HANDLED;
5459}
5460
5461/**
5462 * igb_intr - Legacy Interrupt Handler
5463 * @irq: interrupt number
5464 * @data: pointer to a network interface device structure
5465 **/
5466static irqreturn_t igb_intr(int irq, void *data)
5467{
5468	struct igb_adapter *adapter = data;
5469	struct igb_q_vector *q_vector = adapter->q_vector[0];
5470	struct e1000_hw *hw = &adapter->hw;
5471	/* Interrupt Auto-Mask...upon reading ICR, interrupts are masked.  No
5472	 * need for the IMC write */
5473	u32 icr = rd32(E1000_ICR);
5474
5475	/* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
5476	 * not set, then the adapter didn't send an interrupt */
5477	if (!(icr & E1000_ICR_INT_ASSERTED))
5478		return IRQ_NONE;
5479
5480	igb_write_itr(q_vector);
5481
5482	if (icr & E1000_ICR_DRSTA)
5483		schedule_work(&adapter->reset_task);
5484
5485	if (icr & E1000_ICR_DOUTSYNC) {
5486		/* HW is reporting DMA is out of sync */
5487		adapter->stats.doosync++;
5488	}
5489
5490	if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5491		hw->mac.get_link_status = 1;
5492		/* guard against interrupt when we're going down */
5493		if (!test_bit(__IGB_DOWN, &adapter->state))
5494			mod_timer(&adapter->watchdog_timer, jiffies + 1);
5495	}
5496
5497	napi_schedule(&q_vector->napi);
5498
5499	return IRQ_HANDLED;
5500}
5501
5502void igb_ring_irq_enable(struct igb_q_vector *q_vector)
5503{
5504	struct igb_adapter *adapter = q_vector->adapter;
5505	struct e1000_hw *hw = &adapter->hw;
5506
5507	if ((q_vector->rx.ring && (adapter->rx_itr_setting & 3)) ||
5508	    (!q_vector->rx.ring && (adapter->tx_itr_setting & 3))) {
5509		if ((adapter->num_q_vectors == 1) && !adapter->vf_data)
5510			igb_set_itr(q_vector);
5511		else
5512			igb_update_ring_itr(q_vector);
5513	}
5514
5515	if (!test_bit(__IGB_DOWN, &adapter->state)) {
5516		if (adapter->msix_entries)
5517			wr32(E1000_EIMS, q_vector->eims_value);
5518		else
5519			igb_irq_enable(adapter);
5520	}
5521}
5522
5523/**
5524 * igb_poll - NAPI Rx polling callback
5525 * @napi: napi polling structure
5526 * @budget: count of how many packets we should handle
5527 **/
5528static int igb_poll(struct napi_struct *napi, int budget)
5529{
5530	struct igb_q_vector *q_vector = container_of(napi,
5531	                                             struct igb_q_vector,
5532	                                             napi);
5533	bool clean_complete = true;
5534
5535#ifdef CONFIG_IGB_DCA
5536	if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
5537		igb_update_dca(q_vector);
5538#endif
5539	if (q_vector->tx.ring)
5540		clean_complete = igb_clean_tx_irq(q_vector);
5541
5542	if (q_vector->rx.ring)
5543		clean_complete &= igb_clean_rx_irq(q_vector, budget);
5544
5545	/* If all work not completed, return budget and keep polling */
5546	if (!clean_complete)
5547		return budget;
5548
5549	/* If not enough Rx work done, exit the polling mode */
5550	napi_complete(napi);
5551	igb_ring_irq_enable(q_vector);
5552
5553	return 0;
5554}
5555
5556/**
5557 * igb_systim_to_hwtstamp - convert system time value to hw timestamp
5558 * @adapter: board private structure
5559 * @shhwtstamps: timestamp structure to update
5560 * @regval: unsigned 64bit system time value.
5561 *
5562 * We need to convert the system time value stored in the RX/TXSTMP registers
5563 * into a hwtstamp which can be used by the upper level timestamping functions
5564 */
5565static void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
5566                                   struct skb_shared_hwtstamps *shhwtstamps,
5567                                   u64 regval)
5568{
5569	u64 ns;
5570
5571	/*
5572	 * The 82580 starts with 1ns at bit 0 in RX/TXSTMPL, shift this up to
5573	 * 24 to match clock shift we setup earlier.
5574	 */
5575	if (adapter->hw.mac.type >= e1000_82580)
5576		regval <<= IGB_82580_TSYNC_SHIFT;
5577
5578	ns = timecounter_cyc2time(&adapter->clock, regval);
5579	timecompare_update(&adapter->compare, ns);
5580	memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
5581	shhwtstamps->hwtstamp = ns_to_ktime(ns);
5582	shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns);
5583}
5584
5585/**
5586 * igb_tx_hwtstamp - utility function which checks for TX time stamp
5587 * @q_vector: pointer to q_vector containing needed info
5588 * @buffer: pointer to igb_tx_buffer structure
5589 *
5590 * If we were asked to do hardware stamping and such a time stamp is
5591 * available, then it must have been for this skb here because we only
5592 * allow only one such packet into the queue.
5593 */
5594static void igb_tx_hwtstamp(struct igb_q_vector *q_vector,
5595			    struct igb_tx_buffer *buffer_info)
5596{
5597	struct igb_adapter *adapter = q_vector->adapter;
5598	struct e1000_hw *hw = &adapter->hw;
5599	struct skb_shared_hwtstamps shhwtstamps;
5600	u64 regval;
5601
5602	/* if skb does not support hw timestamp or TX stamp not valid exit */
5603	if (likely(!(buffer_info->tx_flags & IGB_TX_FLAGS_TSTAMP)) ||
5604	    !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
5605		return;
5606
5607	regval = rd32(E1000_TXSTMPL);
5608	regval |= (u64)rd32(E1000_TXSTMPH) << 32;
5609
5610	igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
5611	skb_tstamp_tx(buffer_info->skb, &shhwtstamps);
5612}
5613
5614/**
5615 * igb_clean_tx_irq - Reclaim resources after transmit completes
5616 * @q_vector: pointer to q_vector containing needed info
5617 * returns true if ring is completely cleaned
5618 **/
5619static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
5620{
5621	struct igb_adapter *adapter = q_vector->adapter;
5622	struct igb_ring *tx_ring = q_vector->tx.ring;
5623	struct igb_tx_buffer *tx_buffer;
5624	union e1000_adv_tx_desc *tx_desc, *eop_desc;
5625	unsigned int total_bytes = 0, total_packets = 0;
5626	unsigned int budget = q_vector->tx.work_limit;
5627	unsigned int i = tx_ring->next_to_clean;
5628
5629	if (test_bit(__IGB_DOWN, &adapter->state))
5630		return true;
5631
5632	tx_buffer = &tx_ring->tx_buffer_info[i];
5633	tx_desc = IGB_TX_DESC(tx_ring, i);
5634	i -= tx_ring->count;
5635
5636	for (; budget; budget--) {
5637		eop_desc = tx_buffer->next_to_watch;
5638
5639		/* prevent any other reads prior to eop_desc */
5640		rmb();
5641
5642		/* if next_to_watch is not set then there is no work pending */
5643		if (!eop_desc)
5644			break;
5645
5646		/* if DD is not set pending work has not been completed */
5647		if (!(eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)))
5648			break;
5649
5650		/* clear next_to_watch to prevent false hangs */
5651		tx_buffer->next_to_watch = NULL;
5652
5653		/* update the statistics for this packet */
5654		total_bytes += tx_buffer->bytecount;
5655		total_packets += tx_buffer->gso_segs;
5656
5657		/* retrieve hardware timestamp */
5658		igb_tx_hwtstamp(q_vector, tx_buffer);
5659
5660		/* free the skb */
5661		dev_kfree_skb_any(tx_buffer->skb);
5662		tx_buffer->skb = NULL;
5663
5664		/* unmap skb header data */
5665		dma_unmap_single(tx_ring->dev,
5666				 tx_buffer->dma,
5667				 tx_buffer->length,
5668				 DMA_TO_DEVICE);
5669
5670		/* clear last DMA location and unmap remaining buffers */
5671		while (tx_desc != eop_desc) {
5672			tx_buffer->dma = 0;
5673
5674			tx_buffer++;
5675			tx_desc++;
5676			i++;
5677			if (unlikely(!i)) {
5678				i -= tx_ring->count;
5679				tx_buffer = tx_ring->tx_buffer_info;
5680				tx_desc = IGB_TX_DESC(tx_ring, 0);
5681			}
5682
5683			/* unmap any remaining paged data */
5684			if (tx_buffer->dma) {
5685				dma_unmap_page(tx_ring->dev,
5686					       tx_buffer->dma,
5687					       tx_buffer->length,
5688					       DMA_TO_DEVICE);
5689			}
5690		}
5691
5692		/* clear last DMA location */
5693		tx_buffer->dma = 0;
5694
5695		/* move us one more past the eop_desc for start of next pkt */
5696		tx_buffer++;
5697		tx_desc++;
5698		i++;
5699		if (unlikely(!i)) {
5700			i -= tx_ring->count;
5701			tx_buffer = tx_ring->tx_buffer_info;
5702			tx_desc = IGB_TX_DESC(tx_ring, 0);
5703		}
5704	}
5705
5706	i += tx_ring->count;
5707	tx_ring->next_to_clean = i;
5708	u64_stats_update_begin(&tx_ring->tx_syncp);
5709	tx_ring->tx_stats.bytes += total_bytes;
5710	tx_ring->tx_stats.packets += total_packets;
5711	u64_stats_update_end(&tx_ring->tx_syncp);
5712	q_vector->tx.total_bytes += total_bytes;
5713	q_vector->tx.total_packets += total_packets;
5714
5715	if (test_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags)) {
5716		struct e1000_hw *hw = &adapter->hw;
5717
5718		eop_desc = tx_buffer->next_to_watch;
5719
5720		/* Detect a transmit hang in hardware, this serializes the
5721		 * check with the clearing of time_stamp and movement of i */
5722		clear_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
5723		if (eop_desc &&
5724		    time_after(jiffies, tx_buffer->time_stamp +
5725			       (adapter->tx_timeout_factor * HZ)) &&
5726		    !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
5727
5728			/* detected Tx unit hang */
5729			dev_err(tx_ring->dev,
5730				"Detected Tx Unit Hang\n"
5731				"  Tx Queue             <%d>\n"
5732				"  TDH                  <%x>\n"
5733				"  TDT                  <%x>\n"
5734				"  next_to_use          <%x>\n"
5735				"  next_to_clean        <%x>\n"
5736				"buffer_info[next_to_clean]\n"
5737				"  time_stamp           <%lx>\n"
5738				"  next_to_watch        <%p>\n"
5739				"  jiffies              <%lx>\n"
5740				"  desc.status          <%x>\n",
5741				tx_ring->queue_index,
5742				rd32(E1000_TDH(tx_ring->reg_idx)),
5743				readl(tx_ring->tail),
5744				tx_ring->next_to_use,
5745				tx_ring->next_to_clean,
5746				tx_buffer->time_stamp,
5747				eop_desc,
5748				jiffies,
5749				eop_desc->wb.status);
5750			netif_stop_subqueue(tx_ring->netdev,
5751					    tx_ring->queue_index);
5752
5753			/* we are about to reset, no point in enabling stuff */
5754			return true;
5755		}
5756	}
5757
5758	if (unlikely(total_packets &&
5759		     netif_carrier_ok(tx_ring->netdev) &&
5760		     igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
5761		/* Make sure that anybody stopping the queue after this
5762		 * sees the new next_to_clean.
5763		 */
5764		smp_mb();
5765		if (__netif_subqueue_stopped(tx_ring->netdev,
5766					     tx_ring->queue_index) &&
5767		    !(test_bit(__IGB_DOWN, &adapter->state))) {
5768			netif_wake_subqueue(tx_ring->netdev,
5769					    tx_ring->queue_index);
5770
5771			u64_stats_update_begin(&tx_ring->tx_syncp);
5772			tx_ring->tx_stats.restart_queue++;
5773			u64_stats_update_end(&tx_ring->tx_syncp);
5774		}
5775	}
5776
5777	return !!budget;
5778}
5779
5780static inline void igb_rx_checksum(struct igb_ring *ring,
5781				   union e1000_adv_rx_desc *rx_desc,
5782				   struct sk_buff *skb)
5783{
5784	skb_checksum_none_assert(skb);
5785
5786	/* Ignore Checksum bit is set */
5787	if (igb_test_staterr(rx_desc, E1000_RXD_STAT_IXSM))
5788		return;
5789
5790	/* Rx checksum disabled via ethtool */
5791	if (!(ring->netdev->features & NETIF_F_RXCSUM))
5792		return;
5793
5794	/* TCP/UDP checksum error bit is set */
5795	if (igb_test_staterr(rx_desc,
5796			     E1000_RXDEXT_STATERR_TCPE |
5797			     E1000_RXDEXT_STATERR_IPE)) {
5798		/*
5799		 * work around errata with sctp packets where the TCPE aka
5800		 * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
5801		 * packets, (aka let the stack check the crc32c)
5802		 */
5803		if (!((skb->len == 60) &&
5804		      test_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags))) {
5805			u64_stats_update_begin(&ring->rx_syncp);
5806			ring->rx_stats.csum_err++;
5807			u64_stats_update_end(&ring->rx_syncp);
5808		}
5809		/* let the stack verify checksum errors */
5810		return;
5811	}
5812	/* It must be a TCP or UDP packet with a valid checksum */
5813	if (igb_test_staterr(rx_desc, E1000_RXD_STAT_TCPCS |
5814				      E1000_RXD_STAT_UDPCS))
5815		skb->ip_summed = CHECKSUM_UNNECESSARY;
5816
5817	dev_dbg(ring->dev, "cksum success: bits %08X\n",
5818		le32_to_cpu(rx_desc->wb.upper.status_error));
5819}
5820
5821static inline void igb_rx_hash(struct igb_ring *ring,
5822			       union e1000_adv_rx_desc *rx_desc,
5823			       struct sk_buff *skb)
5824{
5825	if (ring->netdev->features & NETIF_F_RXHASH)
5826		skb->rxhash = le32_to_cpu(rx_desc->wb.lower.hi_dword.rss);
5827}
5828
5829static void igb_rx_hwtstamp(struct igb_q_vector *q_vector,
5830			    union e1000_adv_rx_desc *rx_desc,
5831			    struct sk_buff *skb)
5832{
5833	struct igb_adapter *adapter = q_vector->adapter;
5834	struct e1000_hw *hw = &adapter->hw;
5835	u64 regval;
5836
5837	if (!igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP |
5838				       E1000_RXDADV_STAT_TS))
5839		return;
5840
5841	/*
5842	 * If this bit is set, then the RX registers contain the time stamp. No
5843	 * other packet will be time stamped until we read these registers, so
5844	 * read the registers to make them available again. Because only one
5845	 * packet can be time stamped at a time, we know that the register
5846	 * values must belong to this one here and therefore we don't need to
5847	 * compare any of the additional attributes stored for it.
5848	 *
5849	 * If nothing went wrong, then it should have a shared tx_flags that we
5850	 * can turn into a skb_shared_hwtstamps.
5851	 */
5852	if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) {
5853		u32 *stamp = (u32 *)skb->data;
5854		regval = le32_to_cpu(*(stamp + 2));
5855		regval |= (u64)le32_to_cpu(*(stamp + 3)) << 32;
5856		skb_pull(skb, IGB_TS_HDR_LEN);
5857	} else {
5858		if(!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
5859			return;
5860
5861		regval = rd32(E1000_RXSTMPL);
5862		regval |= (u64)rd32(E1000_RXSTMPH) << 32;
5863	}
5864
5865	igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
5866}
5867static inline u16 igb_get_hlen(union e1000_adv_rx_desc *rx_desc)
5868{
5869	/* HW will not DMA in data larger than the given buffer, even if it
5870	 * parses the (NFS, of course) header to be larger.  In that case, it
5871	 * fills the header buffer and spills the rest into the page.
5872	 */
5873	u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
5874	           E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
5875	if (hlen > IGB_RX_HDR_LEN)
5876		hlen = IGB_RX_HDR_LEN;
5877	return hlen;
5878}
5879
5880static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, int budget)
5881{
5882	struct igb_ring *rx_ring = q_vector->rx.ring;
5883	union e1000_adv_rx_desc *rx_desc;
5884	const int current_node = numa_node_id();
5885	unsigned int total_bytes = 0, total_packets = 0;
5886	u16 cleaned_count = igb_desc_unused(rx_ring);
5887	u16 i = rx_ring->next_to_clean;
5888
5889	rx_desc = IGB_RX_DESC(rx_ring, i);
5890
5891	while (igb_test_staterr(rx_desc, E1000_RXD_STAT_DD)) {
5892		struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
5893		struct sk_buff *skb = buffer_info->skb;
5894		union e1000_adv_rx_desc *next_rxd;
5895
5896		buffer_info->skb = NULL;
5897		prefetch(skb->data);
5898
5899		i++;
5900		if (i == rx_ring->count)
5901			i = 0;
5902
5903		next_rxd = IGB_RX_DESC(rx_ring, i);
5904		prefetch(next_rxd);
5905
5906		/*
5907		 * This memory barrier is needed to keep us from reading
5908		 * any other fields out of the rx_desc until we know the
5909		 * RXD_STAT_DD bit is set
5910		 */
5911		rmb();
5912
5913		if (!skb_is_nonlinear(skb)) {
5914			__skb_put(skb, igb_get_hlen(rx_desc));
5915			dma_unmap_single(rx_ring->dev, buffer_info->dma,
5916					 IGB_RX_HDR_LEN,
5917					 DMA_FROM_DEVICE);
5918			buffer_info->dma = 0;
5919		}
5920
5921		if (rx_desc->wb.upper.length) {
5922			u16 length = le16_to_cpu(rx_desc->wb.upper.length);
5923
5924			skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
5925						buffer_info->page,
5926						buffer_info->page_offset,
5927						length);
5928
5929			skb->len += length;
5930			skb->data_len += length;
5931			skb->truesize += length;
5932
5933			if ((page_count(buffer_info->page) != 1) ||
5934			    (page_to_nid(buffer_info->page) != current_node))
5935				buffer_info->page = NULL;
5936			else
5937				get_page(buffer_info->page);
5938
5939			dma_unmap_page(rx_ring->dev, buffer_info->page_dma,
5940				       PAGE_SIZE / 2, DMA_FROM_DEVICE);
5941			buffer_info->page_dma = 0;
5942		}
5943
5944		if (!igb_test_staterr(rx_desc, E1000_RXD_STAT_EOP)) {
5945			struct igb_rx_buffer *next_buffer;
5946			next_buffer = &rx_ring->rx_buffer_info[i];
5947			buffer_info->skb = next_buffer->skb;
5948			buffer_info->dma = next_buffer->dma;
5949			next_buffer->skb = skb;
5950			next_buffer->dma = 0;
5951			goto next_desc;
5952		}
5953
5954		if (igb_test_staterr(rx_desc,
5955				     E1000_RXDEXT_ERR_FRAME_ERR_MASK)) {
5956			dev_kfree_skb_any(skb);
5957			goto next_desc;
5958		}
5959
5960		igb_rx_hwtstamp(q_vector, rx_desc, skb);
5961		igb_rx_hash(rx_ring, rx_desc, skb);
5962		igb_rx_checksum(rx_ring, rx_desc, skb);
5963
5964		if (igb_test_staterr(rx_desc, E1000_RXD_STAT_VP)) {
5965			u16 vid = le16_to_cpu(rx_desc->wb.upper.vlan);
5966
5967			__vlan_hwaccel_put_tag(skb, vid);
5968		}
5969
5970		total_bytes += skb->len;
5971		total_packets++;
5972
5973		skb->protocol = eth_type_trans(skb, rx_ring->netdev);
5974
5975		napi_gro_receive(&q_vector->napi, skb);
5976
5977		budget--;
5978next_desc:
5979		if (!budget)
5980			break;
5981
5982		cleaned_count++;
5983		/* return some buffers to hardware, one at a time is too slow */
5984		if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
5985			igb_alloc_rx_buffers(rx_ring, cleaned_count);
5986			cleaned_count = 0;
5987		}
5988
5989		/* use prefetched values */
5990		rx_desc = next_rxd;
5991	}
5992
5993	rx_ring->next_to_clean = i;
5994	u64_stats_update_begin(&rx_ring->rx_syncp);
5995	rx_ring->rx_stats.packets += total_packets;
5996	rx_ring->rx_stats.bytes += total_bytes;
5997	u64_stats_update_end(&rx_ring->rx_syncp);
5998	q_vector->rx.total_packets += total_packets;
5999	q_vector->rx.total_bytes += total_bytes;
6000
6001	if (cleaned_count)
6002		igb_alloc_rx_buffers(rx_ring, cleaned_count);
6003
6004	return !!budget;
6005}
6006
6007static bool igb_alloc_mapped_skb(struct igb_ring *rx_ring,
6008				 struct igb_rx_buffer *bi)
6009{
6010	struct sk_buff *skb = bi->skb;
6011	dma_addr_t dma = bi->dma;
6012
6013	if (dma)
6014		return true;
6015
6016	if (likely(!skb)) {
6017		skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
6018						IGB_RX_HDR_LEN);
6019		bi->skb = skb;
6020		if (!skb) {
6021			rx_ring->rx_stats.alloc_failed++;
6022			return false;
6023		}
6024
6025		/* initialize skb for ring */
6026		skb_record_rx_queue(skb, rx_ring->queue_index);
6027	}
6028
6029	dma = dma_map_single(rx_ring->dev, skb->data,
6030			     IGB_RX_HDR_LEN, DMA_FROM_DEVICE);
6031
6032	if (dma_mapping_error(rx_ring->dev, dma)) {
6033		rx_ring->rx_stats.alloc_failed++;
6034		return false;
6035	}
6036
6037	bi->dma = dma;
6038	return true;
6039}
6040
6041static bool igb_alloc_mapped_page(struct igb_ring *rx_ring,
6042				  struct igb_rx_buffer *bi)
6043{
6044	struct page *page = bi->page;
6045	dma_addr_t page_dma = bi->page_dma;
6046	unsigned int page_offset = bi->page_offset ^ (PAGE_SIZE / 2);
6047
6048	if (page_dma)
6049		return true;
6050
6051	if (!page) {
6052		page = netdev_alloc_page(rx_ring->netdev);
6053		bi->page = page;
6054		if (unlikely(!page)) {
6055			rx_ring->rx_stats.alloc_failed++;
6056			return false;
6057		}
6058	}
6059
6060	page_dma = dma_map_page(rx_ring->dev, page,
6061				page_offset, PAGE_SIZE / 2,
6062				DMA_FROM_DEVICE);
6063
6064	if (dma_mapping_error(rx_ring->dev, page_dma)) {
6065		rx_ring->rx_stats.alloc_failed++;
6066		return false;
6067	}
6068
6069	bi->page_dma = page_dma;
6070	bi->page_offset = page_offset;
6071	return true;
6072}
6073
6074/**
6075 * igb_alloc_rx_buffers - Replace used receive buffers; packet split
6076 * @adapter: address of board private structure
6077 **/
6078void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count)
6079{
6080	union e1000_adv_rx_desc *rx_desc;
6081	struct igb_rx_buffer *bi;
6082	u16 i = rx_ring->next_to_use;
6083
6084	rx_desc = IGB_RX_DESC(rx_ring, i);
6085	bi = &rx_ring->rx_buffer_info[i];
6086	i -= rx_ring->count;
6087
6088	while (cleaned_count--) {
6089		if (!igb_alloc_mapped_skb(rx_ring, bi))
6090			break;
6091
6092		/* Refresh the desc even if buffer_addrs didn't change
6093		 * because each write-back erases this info. */
6094		rx_desc->read.hdr_addr = cpu_to_le64(bi->dma);
6095
6096		if (!igb_alloc_mapped_page(rx_ring, bi))
6097			break;
6098
6099		rx_desc->read.pkt_addr = cpu_to_le64(bi->page_dma);
6100
6101		rx_desc++;
6102		bi++;
6103		i++;
6104		if (unlikely(!i)) {
6105			rx_desc = IGB_RX_DESC(rx_ring, 0);
6106			bi = rx_ring->rx_buffer_info;
6107			i -= rx_ring->count;
6108		}
6109
6110		/* clear the hdr_addr for the next_to_use descriptor */
6111		rx_desc->read.hdr_addr = 0;
6112	}
6113
6114	i += rx_ring->count;
6115
6116	if (rx_ring->next_to_use != i) {
6117		rx_ring->next_to_use = i;
6118
6119		/* Force memory writes to complete before letting h/w
6120		 * know there are new descriptors to fetch.  (Only
6121		 * applicable for weak-ordered memory model archs,
6122		 * such as IA-64). */
6123		wmb();
6124		writel(i, rx_ring->tail);
6125	}
6126}
6127
6128/**
6129 * igb_mii_ioctl -
6130 * @netdev:
6131 * @ifreq:
6132 * @cmd:
6133 **/
6134static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6135{
6136	struct igb_adapter *adapter = netdev_priv(netdev);
6137	struct mii_ioctl_data *data = if_mii(ifr);
6138
6139	if (adapter->hw.phy.media_type != e1000_media_type_copper)
6140		return -EOPNOTSUPP;
6141
6142	switch (cmd) {
6143	case SIOCGMIIPHY:
6144		data->phy_id = adapter->hw.phy.addr;
6145		break;
6146	case SIOCGMIIREG:
6147		if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
6148		                     &data->val_out))
6149			return -EIO;
6150		break;
6151	case SIOCSMIIREG:
6152	default:
6153		return -EOPNOTSUPP;
6154	}
6155	return 0;
6156}
6157
6158/**
6159 * igb_hwtstamp_ioctl - control hardware time stamping
6160 * @netdev:
6161 * @ifreq:
6162 * @cmd:
6163 *
6164 * Outgoing time stamping can be enabled and disabled. Play nice and
6165 * disable it when requested, although it shouldn't case any overhead
6166 * when no packet needs it. At most one packet in the queue may be
6167 * marked for time stamping, otherwise it would be impossible to tell
6168 * for sure to which packet the hardware time stamp belongs.
6169 *
6170 * Incoming time stamping has to be configured via the hardware
6171 * filters. Not all combinations are supported, in particular event
6172 * type has to be specified. Matching the kind of event packet is
6173 * not supported, with the exception of "all V2 events regardless of
6174 * level 2 or 4".
6175 *
6176 **/
6177static int igb_hwtstamp_ioctl(struct net_device *netdev,
6178			      struct ifreq *ifr, int cmd)
6179{
6180	struct igb_adapter *adapter = netdev_priv(netdev);
6181	struct e1000_hw *hw = &adapter->hw;
6182	struct hwtstamp_config config;
6183	u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
6184	u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6185	u32 tsync_rx_cfg = 0;
6186	bool is_l4 = false;
6187	bool is_l2 = false;
6188	u32 regval;
6189
6190	if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
6191		return -EFAULT;
6192
6193	/* reserved for future extensions */
6194	if (config.flags)
6195		return -EINVAL;
6196
6197	switch (config.tx_type) {
6198	case HWTSTAMP_TX_OFF:
6199		tsync_tx_ctl = 0;
6200	case HWTSTAMP_TX_ON:
6201		break;
6202	default:
6203		return -ERANGE;
6204	}
6205
6206	switch (config.rx_filter) {
6207	case HWTSTAMP_FILTER_NONE:
6208		tsync_rx_ctl = 0;
6209		break;
6210	case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
6211	case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
6212	case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
6213	case HWTSTAMP_FILTER_ALL:
6214		/*
6215		 * register TSYNCRXCFG must be set, therefore it is not
6216		 * possible to time stamp both Sync and Delay_Req messages
6217		 * => fall back to time stamping all packets
6218		 */
6219		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6220		config.rx_filter = HWTSTAMP_FILTER_ALL;
6221		break;
6222	case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
6223		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6224		tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
6225		is_l4 = true;
6226		break;
6227	case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
6228		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6229		tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
6230		is_l4 = true;
6231		break;
6232	case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
6233	case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
6234		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6235		tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
6236		is_l2 = true;
6237		is_l4 = true;
6238		config.rx_filter = HWTSTAMP_FILTER_SOME;
6239		break;
6240	case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
6241	case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
6242		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6243		tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
6244		is_l2 = true;
6245		is_l4 = true;
6246		config.rx_filter = HWTSTAMP_FILTER_SOME;
6247		break;
6248	case HWTSTAMP_FILTER_PTP_V2_EVENT:
6249	case HWTSTAMP_FILTER_PTP_V2_SYNC:
6250	case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
6251		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
6252		config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
6253		is_l2 = true;
6254		break;
6255	default:
6256		return -ERANGE;
6257	}
6258
6259	if (hw->mac.type == e1000_82575) {
6260		if (tsync_rx_ctl | tsync_tx_ctl)
6261			return -EINVAL;
6262		return 0;
6263	}
6264
6265	/*
6266	 * Per-packet timestamping only works if all packets are
6267	 * timestamped, so enable timestamping in all packets as
6268	 * long as one rx filter was configured.
6269	 */
6270	if ((hw->mac.type >= e1000_82580) && tsync_rx_ctl) {
6271		tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6272		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6273	}
6274
6275	/* enable/disable TX */
6276	regval = rd32(E1000_TSYNCTXCTL);
6277	regval &= ~E1000_TSYNCTXCTL_ENABLED;
6278	regval |= tsync_tx_ctl;
6279	wr32(E1000_TSYNCTXCTL, regval);
6280
6281	/* enable/disable RX */
6282	regval = rd32(E1000_TSYNCRXCTL);
6283	regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
6284	regval |= tsync_rx_ctl;
6285	wr32(E1000_TSYNCRXCTL, regval);
6286
6287	/* define which PTP packets are time stamped */
6288	wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
6289
6290	/* define ethertype filter for timestamped packets */
6291	if (is_l2)
6292		wr32(E1000_ETQF(3),
6293		                (E1000_ETQF_FILTER_ENABLE | /* enable filter */
6294		                 E1000_ETQF_1588 | /* enable timestamping */
6295		                 ETH_P_1588));     /* 1588 eth protocol type */
6296	else
6297		wr32(E1000_ETQF(3), 0);
6298
6299#define PTP_PORT 319
6300	/* L4 Queue Filter[3]: filter by destination port and protocol */
6301	if (is_l4) {
6302		u32 ftqf = (IPPROTO_UDP /* UDP */
6303			| E1000_FTQF_VF_BP /* VF not compared */
6304			| E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
6305			| E1000_FTQF_MASK); /* mask all inputs */
6306		ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
6307
6308		wr32(E1000_IMIR(3), htons(PTP_PORT));
6309		wr32(E1000_IMIREXT(3),
6310		     (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
6311		if (hw->mac.type == e1000_82576) {
6312			/* enable source port check */
6313			wr32(E1000_SPQF(3), htons(PTP_PORT));
6314			ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
6315		}
6316		wr32(E1000_FTQF(3), ftqf);
6317	} else {
6318		wr32(E1000_FTQF(3), E1000_FTQF_MASK);
6319	}
6320	wrfl();
6321
6322	adapter->hwtstamp_config = config;
6323
6324	/* clear TX/RX time stamp registers, just to be sure */
6325	regval = rd32(E1000_TXSTMPH);
6326	regval = rd32(E1000_RXSTMPH);
6327
6328	return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
6329		-EFAULT : 0;
6330}
6331
6332/**
6333 * igb_ioctl -
6334 * @netdev:
6335 * @ifreq:
6336 * @cmd:
6337 **/
6338static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6339{
6340	switch (cmd) {
6341	case SIOCGMIIPHY:
6342	case SIOCGMIIREG:
6343	case SIOCSMIIREG:
6344		return igb_mii_ioctl(netdev, ifr, cmd);
6345	case SIOCSHWTSTAMP:
6346		return igb_hwtstamp_ioctl(netdev, ifr, cmd);
6347	default:
6348		return -EOPNOTSUPP;
6349	}
6350}
6351
6352s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6353{
6354	struct igb_adapter *adapter = hw->back;
6355	u16 cap_offset;
6356
6357	cap_offset = adapter->pdev->pcie_cap;
6358	if (!cap_offset)
6359		return -E1000_ERR_CONFIG;
6360
6361	pci_read_config_word(adapter->pdev, cap_offset + reg, value);
6362
6363	return 0;
6364}
6365
6366s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6367{
6368	struct igb_adapter *adapter = hw->back;
6369	u16 cap_offset;
6370
6371	cap_offset = adapter->pdev->pcie_cap;
6372	if (!cap_offset)
6373		return -E1000_ERR_CONFIG;
6374
6375	pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
6376
6377	return 0;
6378}
6379
6380static void igb_vlan_mode(struct net_device *netdev, u32 features)
6381{
6382	struct igb_adapter *adapter = netdev_priv(netdev);
6383	struct e1000_hw *hw = &adapter->hw;
6384	u32 ctrl, rctl;
6385	bool enable = !!(features & NETIF_F_HW_VLAN_RX);
6386
6387	if (enable) {
6388		/* enable VLAN tag insert/strip */
6389		ctrl = rd32(E1000_CTRL);
6390		ctrl |= E1000_CTRL_VME;
6391		wr32(E1000_CTRL, ctrl);
6392
6393		/* Disable CFI check */
6394		rctl = rd32(E1000_RCTL);
6395		rctl &= ~E1000_RCTL_CFIEN;
6396		wr32(E1000_RCTL, rctl);
6397	} else {
6398		/* disable VLAN tag insert/strip */
6399		ctrl = rd32(E1000_CTRL);
6400		ctrl &= ~E1000_CTRL_VME;
6401		wr32(E1000_CTRL, ctrl);
6402	}
6403
6404	igb_rlpml_set(adapter);
6405}
6406
6407static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
6408{
6409	struct igb_adapter *adapter = netdev_priv(netdev);
6410	struct e1000_hw *hw = &adapter->hw;
6411	int pf_id = adapter->vfs_allocated_count;
6412
6413	/* attempt to add filter to vlvf array */
6414	igb_vlvf_set(adapter, vid, true, pf_id);
6415
6416	/* add the filter since PF can receive vlans w/o entry in vlvf */
6417	igb_vfta_set(hw, vid, true);
6418
6419	set_bit(vid, adapter->active_vlans);
6420}
6421
6422static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
6423{
6424	struct igb_adapter *adapter = netdev_priv(netdev);
6425	struct e1000_hw *hw = &adapter->hw;
6426	int pf_id = adapter->vfs_allocated_count;
6427	s32 err;
6428
6429	/* remove vlan from VLVF table array */
6430	err = igb_vlvf_set(adapter, vid, false, pf_id);
6431
6432	/* if vid was not present in VLVF just remove it from table */
6433	if (err)
6434		igb_vfta_set(hw, vid, false);
6435
6436	clear_bit(vid, adapter->active_vlans);
6437}
6438
6439static void igb_restore_vlan(struct igb_adapter *adapter)
6440{
6441	u16 vid;
6442
6443	igb_vlan_mode(adapter->netdev, adapter->netdev->features);
6444
6445	for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID)
6446		igb_vlan_rx_add_vid(adapter->netdev, vid);
6447}
6448
6449int igb_set_spd_dplx(struct igb_adapter *adapter, u32 spd, u8 dplx)
6450{
6451	struct pci_dev *pdev = adapter->pdev;
6452	struct e1000_mac_info *mac = &adapter->hw.mac;
6453
6454	mac->autoneg = 0;
6455
6456	/* Make sure dplx is at most 1 bit and lsb of speed is not set
6457	 * for the switch() below to work */
6458	if ((spd & 1) || (dplx & ~1))
6459		goto err_inval;
6460
6461	/* Fiber NIC's only allow 1000 Gbps Full duplex */
6462	if ((adapter->hw.phy.media_type == e1000_media_type_internal_serdes) &&
6463	    spd != SPEED_1000 &&
6464	    dplx != DUPLEX_FULL)
6465		goto err_inval;
6466
6467	switch (spd + dplx) {
6468	case SPEED_10 + DUPLEX_HALF:
6469		mac->forced_speed_duplex = ADVERTISE_10_HALF;
6470		break;
6471	case SPEED_10 + DUPLEX_FULL:
6472		mac->forced_speed_duplex = ADVERTISE_10_FULL;
6473		break;
6474	case SPEED_100 + DUPLEX_HALF:
6475		mac->forced_speed_duplex = ADVERTISE_100_HALF;
6476		break;
6477	case SPEED_100 + DUPLEX_FULL:
6478		mac->forced_speed_duplex = ADVERTISE_100_FULL;
6479		break;
6480	case SPEED_1000 + DUPLEX_FULL:
6481		mac->autoneg = 1;
6482		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
6483		break;
6484	case SPEED_1000 + DUPLEX_HALF: /* not supported */
6485	default:
6486		goto err_inval;
6487	}
6488	return 0;
6489
6490err_inval:
6491	dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
6492	return -EINVAL;
6493}
6494
6495static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake)
6496{
6497	struct net_device *netdev = pci_get_drvdata(pdev);
6498	struct igb_adapter *adapter = netdev_priv(netdev);
6499	struct e1000_hw *hw = &adapter->hw;
6500	u32 ctrl, rctl, status;
6501	u32 wufc = adapter->wol;
6502#ifdef CONFIG_PM
6503	int retval = 0;
6504#endif
6505
6506	netif_device_detach(netdev);
6507
6508	if (netif_running(netdev))
6509		igb_close(netdev);
6510
6511	igb_clear_interrupt_scheme(adapter);
6512
6513#ifdef CONFIG_PM
6514	retval = pci_save_state(pdev);
6515	if (retval)
6516		return retval;
6517#endif
6518
6519	status = rd32(E1000_STATUS);
6520	if (status & E1000_STATUS_LU)
6521		wufc &= ~E1000_WUFC_LNKC;
6522
6523	if (wufc) {
6524		igb_setup_rctl(adapter);
6525		igb_set_rx_mode(netdev);
6526
6527		/* turn on all-multi mode if wake on multicast is enabled */
6528		if (wufc & E1000_WUFC_MC) {
6529			rctl = rd32(E1000_RCTL);
6530			rctl |= E1000_RCTL_MPE;
6531			wr32(E1000_RCTL, rctl);
6532		}
6533
6534		ctrl = rd32(E1000_CTRL);
6535		/* advertise wake from D3Cold */
6536		#define E1000_CTRL_ADVD3WUC 0x00100000
6537		/* phy power management enable */
6538		#define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
6539		ctrl |= E1000_CTRL_ADVD3WUC;
6540		wr32(E1000_CTRL, ctrl);
6541
6542		/* Allow time for pending master requests to run */
6543		igb_disable_pcie_master(hw);
6544
6545		wr32(E1000_WUC, E1000_WUC_PME_EN);
6546		wr32(E1000_WUFC, wufc);
6547	} else {
6548		wr32(E1000_WUC, 0);
6549		wr32(E1000_WUFC, 0);
6550	}
6551
6552	*enable_wake = wufc || adapter->en_mng_pt;
6553	if (!*enable_wake)
6554		igb_power_down_link(adapter);
6555	else
6556		igb_power_up_link(adapter);
6557
6558	/* Release control of h/w to f/w.  If f/w is AMT enabled, this
6559	 * would have already happened in close and is redundant. */
6560	igb_release_hw_control(adapter);
6561
6562	pci_disable_device(pdev);
6563
6564	return 0;
6565}
6566
6567#ifdef CONFIG_PM
6568static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
6569{
6570	int retval;
6571	bool wake;
6572
6573	retval = __igb_shutdown(pdev, &wake);
6574	if (retval)
6575		return retval;
6576
6577	if (wake) {
6578		pci_prepare_to_sleep(pdev);
6579	} else {
6580		pci_wake_from_d3(pdev, false);
6581		pci_set_power_state(pdev, PCI_D3hot);
6582	}
6583
6584	return 0;
6585}
6586
6587static int igb_resume(struct pci_dev *pdev)
6588{
6589	struct net_device *netdev = pci_get_drvdata(pdev);
6590	struct igb_adapter *adapter = netdev_priv(netdev);
6591	struct e1000_hw *hw = &adapter->hw;
6592	u32 err;
6593
6594	pci_set_power_state(pdev, PCI_D0);
6595	pci_restore_state(pdev);
6596	pci_save_state(pdev);
6597
6598	err = pci_enable_device_mem(pdev);
6599	if (err) {
6600		dev_err(&pdev->dev,
6601			"igb: Cannot enable PCI device from suspend\n");
6602		return err;
6603	}
6604	pci_set_master(pdev);
6605
6606	pci_enable_wake(pdev, PCI_D3hot, 0);
6607	pci_enable_wake(pdev, PCI_D3cold, 0);
6608
6609	if (igb_init_interrupt_scheme(adapter)) {
6610		dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
6611		return -ENOMEM;
6612	}
6613
6614	igb_reset(adapter);
6615
6616	/* let the f/w know that the h/w is now under the control of the
6617	 * driver. */
6618	igb_get_hw_control(adapter);
6619
6620	wr32(E1000_WUS, ~0);
6621
6622	if (netif_running(netdev)) {
6623		err = igb_open(netdev);
6624		if (err)
6625			return err;
6626	}
6627
6628	netif_device_attach(netdev);
6629
6630	return 0;
6631}
6632#endif
6633
6634static void igb_shutdown(struct pci_dev *pdev)
6635{
6636	bool wake;
6637
6638	__igb_shutdown(pdev, &wake);
6639
6640	if (system_state == SYSTEM_POWER_OFF) {
6641		pci_wake_from_d3(pdev, wake);
6642		pci_set_power_state(pdev, PCI_D3hot);
6643	}
6644}
6645
6646#ifdef CONFIG_NET_POLL_CONTROLLER
6647/*
6648 * Polling 'interrupt' - used by things like netconsole to send skbs
6649 * without having to re-enable interrupts. It's not called while
6650 * the interrupt routine is executing.
6651 */
6652static void igb_netpoll(struct net_device *netdev)
6653{
6654	struct igb_adapter *adapter = netdev_priv(netdev);
6655	struct e1000_hw *hw = &adapter->hw;
6656	struct igb_q_vector *q_vector;
6657	int i;
6658
6659	for (i = 0; i < adapter->num_q_vectors; i++) {
6660		q_vector = adapter->q_vector[i];
6661		if (adapter->msix_entries)
6662			wr32(E1000_EIMC, q_vector->eims_value);
6663		else
6664			igb_irq_disable(adapter);
6665		napi_schedule(&q_vector->napi);
6666	}
6667}
6668#endif /* CONFIG_NET_POLL_CONTROLLER */
6669
6670/**
6671 * igb_io_error_detected - called when PCI error is detected
6672 * @pdev: Pointer to PCI device
6673 * @state: The current pci connection state
6674 *
6675 * This function is called after a PCI bus error affecting
6676 * this device has been detected.
6677 */
6678static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
6679					      pci_channel_state_t state)
6680{
6681	struct net_device *netdev = pci_get_drvdata(pdev);
6682	struct igb_adapter *adapter = netdev_priv(netdev);
6683
6684	netif_device_detach(netdev);
6685
6686	if (state == pci_channel_io_perm_failure)
6687		return PCI_ERS_RESULT_DISCONNECT;
6688
6689	if (netif_running(netdev))
6690		igb_down(adapter);
6691	pci_disable_device(pdev);
6692
6693	/* Request a slot slot reset. */
6694	return PCI_ERS_RESULT_NEED_RESET;
6695}
6696
6697/**
6698 * igb_io_slot_reset - called after the pci bus has been reset.
6699 * @pdev: Pointer to PCI device
6700 *
6701 * Restart the card from scratch, as if from a cold-boot. Implementation
6702 * resembles the first-half of the igb_resume routine.
6703 */
6704static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
6705{
6706	struct net_device *netdev = pci_get_drvdata(pdev);
6707	struct igb_adapter *adapter = netdev_priv(netdev);
6708	struct e1000_hw *hw = &adapter->hw;
6709	pci_ers_result_t result;
6710	int err;
6711
6712	if (pci_enable_device_mem(pdev)) {
6713		dev_err(&pdev->dev,
6714			"Cannot re-enable PCI device after reset.\n");
6715		result = PCI_ERS_RESULT_DISCONNECT;
6716	} else {
6717		pci_set_master(pdev);
6718		pci_restore_state(pdev);
6719		pci_save_state(pdev);
6720
6721		pci_enable_wake(pdev, PCI_D3hot, 0);
6722		pci_enable_wake(pdev, PCI_D3cold, 0);
6723
6724		igb_reset(adapter);
6725		wr32(E1000_WUS, ~0);
6726		result = PCI_ERS_RESULT_RECOVERED;
6727	}
6728
6729	err = pci_cleanup_aer_uncorrect_error_status(pdev);
6730	if (err) {
6731		dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
6732		        "failed 0x%0x\n", err);
6733		/* non-fatal, continue */
6734	}
6735
6736	return result;
6737}
6738
6739/**
6740 * igb_io_resume - called when traffic can start flowing again.
6741 * @pdev: Pointer to PCI device
6742 *
6743 * This callback is called when the error recovery driver tells us that
6744 * its OK to resume normal operation. Implementation resembles the
6745 * second-half of the igb_resume routine.
6746 */
6747static void igb_io_resume(struct pci_dev *pdev)
6748{
6749	struct net_device *netdev = pci_get_drvdata(pdev);
6750	struct igb_adapter *adapter = netdev_priv(netdev);
6751
6752	if (netif_running(netdev)) {
6753		if (igb_up(adapter)) {
6754			dev_err(&pdev->dev, "igb_up failed after reset\n");
6755			return;
6756		}
6757	}
6758
6759	netif_device_attach(netdev);
6760
6761	/* let the f/w know that the h/w is now under the control of the
6762	 * driver. */
6763	igb_get_hw_control(adapter);
6764}
6765
6766static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
6767                             u8 qsel)
6768{
6769	u32 rar_low, rar_high;
6770	struct e1000_hw *hw = &adapter->hw;
6771
6772	/* HW expects these in little endian so we reverse the byte order
6773	 * from network order (big endian) to little endian
6774	 */
6775	rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
6776	          ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
6777	rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
6778
6779	/* Indicate to hardware the Address is Valid. */
6780	rar_high |= E1000_RAH_AV;
6781
6782	if (hw->mac.type == e1000_82575)
6783		rar_high |= E1000_RAH_POOL_1 * qsel;
6784	else
6785		rar_high |= E1000_RAH_POOL_1 << qsel;
6786
6787	wr32(E1000_RAL(index), rar_low);
6788	wrfl();
6789	wr32(E1000_RAH(index), rar_high);
6790	wrfl();
6791}
6792
6793static int igb_set_vf_mac(struct igb_adapter *adapter,
6794                          int vf, unsigned char *mac_addr)
6795{
6796	struct e1000_hw *hw = &adapter->hw;
6797	/* VF MAC addresses start at end of receive addresses and moves
6798	 * torwards the first, as a result a collision should not be possible */
6799	int rar_entry = hw->mac.rar_entry_count - (vf + 1);
6800
6801	memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
6802
6803	igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
6804
6805	return 0;
6806}
6807
6808static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
6809{
6810	struct igb_adapter *adapter = netdev_priv(netdev);
6811	if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
6812		return -EINVAL;
6813	adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
6814	dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
6815	dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
6816				      " change effective.");
6817	if (test_bit(__IGB_DOWN, &adapter->state)) {
6818		dev_warn(&adapter->pdev->dev, "The VF MAC address has been set,"
6819			 " but the PF device is not up.\n");
6820		dev_warn(&adapter->pdev->dev, "Bring the PF device up before"
6821			 " attempting to use the VF device.\n");
6822	}
6823	return igb_set_vf_mac(adapter, vf, mac);
6824}
6825
6826static int igb_link_mbps(int internal_link_speed)
6827{
6828	switch (internal_link_speed) {
6829	case SPEED_100:
6830		return 100;
6831	case SPEED_1000:
6832		return 1000;
6833	default:
6834		return 0;
6835	}
6836}
6837
6838static void igb_set_vf_rate_limit(struct e1000_hw *hw, int vf, int tx_rate,
6839				  int link_speed)
6840{
6841	int rf_dec, rf_int;
6842	u32 bcnrc_val;
6843
6844	if (tx_rate != 0) {
6845		/* Calculate the rate factor values to set */
6846		rf_int = link_speed / tx_rate;
6847		rf_dec = (link_speed - (rf_int * tx_rate));
6848		rf_dec = (rf_dec * (1<<E1000_RTTBCNRC_RF_INT_SHIFT)) / tx_rate;
6849
6850		bcnrc_val = E1000_RTTBCNRC_RS_ENA;
6851		bcnrc_val |= ((rf_int<<E1000_RTTBCNRC_RF_INT_SHIFT) &
6852		               E1000_RTTBCNRC_RF_INT_MASK);
6853		bcnrc_val |= (rf_dec & E1000_RTTBCNRC_RF_DEC_MASK);
6854	} else {
6855		bcnrc_val = 0;
6856	}
6857
6858	wr32(E1000_RTTDQSEL, vf); /* vf X uses queue X */
6859	wr32(E1000_RTTBCNRC, bcnrc_val);
6860}
6861
6862static void igb_check_vf_rate_limit(struct igb_adapter *adapter)
6863{
6864	int actual_link_speed, i;
6865	bool reset_rate = false;
6866
6867	/* VF TX rate limit was not set or not supported */
6868	if ((adapter->vf_rate_link_speed == 0) ||
6869	    (adapter->hw.mac.type != e1000_82576))
6870		return;
6871
6872	actual_link_speed = igb_link_mbps(adapter->link_speed);
6873	if (actual_link_speed != adapter->vf_rate_link_speed) {
6874		reset_rate = true;
6875		adapter->vf_rate_link_speed = 0;
6876		dev_info(&adapter->pdev->dev,
6877		         "Link speed has been changed. VF Transmit "
6878		         "rate is disabled\n");
6879	}
6880
6881	for (i = 0; i < adapter->vfs_allocated_count; i++) {
6882		if (reset_rate)
6883			adapter->vf_data[i].tx_rate = 0;
6884
6885		igb_set_vf_rate_limit(&adapter->hw, i,
6886		                      adapter->vf_data[i].tx_rate,
6887		                      actual_link_speed);
6888	}
6889}
6890
6891static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
6892{
6893	struct igb_adapter *adapter = netdev_priv(netdev);
6894	struct e1000_hw *hw = &adapter->hw;
6895	int actual_link_speed;
6896
6897	if (hw->mac.type != e1000_82576)
6898		return -EOPNOTSUPP;
6899
6900	actual_link_speed = igb_link_mbps(adapter->link_speed);
6901	if ((vf >= adapter->vfs_allocated_count) ||
6902	    (!(rd32(E1000_STATUS) & E1000_STATUS_LU)) ||
6903	    (tx_rate < 0) || (tx_rate > actual_link_speed))
6904		return -EINVAL;
6905
6906	adapter->vf_rate_link_speed = actual_link_speed;
6907	adapter->vf_data[vf].tx_rate = (u16)tx_rate;
6908	igb_set_vf_rate_limit(hw, vf, tx_rate, actual_link_speed);
6909
6910	return 0;
6911}
6912
6913static int igb_ndo_get_vf_config(struct net_device *netdev,
6914				 int vf, struct ifla_vf_info *ivi)
6915{
6916	struct igb_adapter *adapter = netdev_priv(netdev);
6917	if (vf >= adapter->vfs_allocated_count)
6918		return -EINVAL;
6919	ivi->vf = vf;
6920	memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
6921	ivi->tx_rate = adapter->vf_data[vf].tx_rate;
6922	ivi->vlan = adapter->vf_data[vf].pf_vlan;
6923	ivi->qos = adapter->vf_data[vf].pf_qos;
6924	return 0;
6925}
6926
6927static void igb_vmm_control(struct igb_adapter *adapter)
6928{
6929	struct e1000_hw *hw = &adapter->hw;
6930	u32 reg;
6931
6932	switch (hw->mac.type) {
6933	case e1000_82575:
6934	default:
6935		/* replication is not supported for 82575 */
6936		return;
6937	case e1000_82576:
6938		/* notify HW that the MAC is adding vlan tags */
6939		reg = rd32(E1000_DTXCTL);
6940		reg |= E1000_DTXCTL_VLAN_ADDED;
6941		wr32(E1000_DTXCTL, reg);
6942	case e1000_82580:
6943		/* enable replication vlan tag stripping */
6944		reg = rd32(E1000_RPLOLR);
6945		reg |= E1000_RPLOLR_STRVLAN;
6946		wr32(E1000_RPLOLR, reg);
6947	case e1000_i350:
6948		/* none of the above registers are supported by i350 */
6949		break;
6950	}
6951
6952	if (adapter->vfs_allocated_count) {
6953		igb_vmdq_set_loopback_pf(hw, true);
6954		igb_vmdq_set_replication_pf(hw, true);
6955		igb_vmdq_set_anti_spoofing_pf(hw, true,
6956						adapter->vfs_allocated_count);
6957	} else {
6958		igb_vmdq_set_loopback_pf(hw, false);
6959		igb_vmdq_set_replication_pf(hw, false);
6960	}
6961}
6962
6963/* igb_main.c */
6964