igb_main.c revision 11ba69e876e1141fa4b11a7c0efb256a8df9ae7d
1/*******************************************************************************
2
3  Intel(R) Gigabit Ethernet Linux driver
4  Copyright(c) 2007-2011 Intel Corporation.
5
6  This program is free software; you can redistribute it and/or modify it
7  under the terms and conditions of the GNU General Public License,
8  version 2, as published by the Free Software Foundation.
9
10  This program is distributed in the hope it will be useful, but WITHOUT
11  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13  more details.
14
15  You should have received a copy of the GNU General Public License along with
16  this program; if not, write to the Free Software Foundation, Inc.,
17  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18
19  The full GNU General Public License is included in this distribution in
20  the file called "COPYING".
21
22  Contact Information:
23  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
25
26*******************************************************************************/
27
28#include <linux/module.h>
29#include <linux/types.h>
30#include <linux/init.h>
31#include <linux/bitops.h>
32#include <linux/vmalloc.h>
33#include <linux/pagemap.h>
34#include <linux/netdevice.h>
35#include <linux/ipv6.h>
36#include <linux/slab.h>
37#include <net/checksum.h>
38#include <net/ip6_checksum.h>
39#include <linux/net_tstamp.h>
40#include <linux/mii.h>
41#include <linux/ethtool.h>
42#include <linux/if.h>
43#include <linux/if_vlan.h>
44#include <linux/pci.h>
45#include <linux/pci-aspm.h>
46#include <linux/delay.h>
47#include <linux/interrupt.h>
48#include <linux/ip.h>
49#include <linux/tcp.h>
50#include <linux/sctp.h>
51#include <linux/if_ether.h>
52#include <linux/aer.h>
53#include <linux/prefetch.h>
54#ifdef CONFIG_IGB_DCA
55#include <linux/dca.h>
56#endif
57#include "igb.h"
58
59#define MAJ 3
60#define MIN 2
61#define BUILD 10
62#define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \
63__stringify(BUILD) "-k"
64char igb_driver_name[] = "igb";
65char igb_driver_version[] = DRV_VERSION;
66static const char igb_driver_string[] =
67				"Intel(R) Gigabit Ethernet Network Driver";
68static const char igb_copyright[] = "Copyright (c) 2007-2011 Intel Corporation.";
69
70static const struct e1000_info *igb_info_tbl[] = {
71	[board_82575] = &e1000_82575_info,
72};
73
74static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
75	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER), board_82575 },
76	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER), board_82575 },
77	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES), board_82575 },
78	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII), board_82575 },
79	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
80	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
81	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_QUAD_FIBER), board_82575 },
82	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
83	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
84	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
85	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SGMII), board_82575 },
86	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SERDES), board_82575 },
87	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_BACKPLANE), board_82575 },
88	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SFP), board_82575 },
89	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
90	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
91	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
92	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
93	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
94	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
95	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2), board_82575 },
96	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
97	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
98	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
99	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
100	/* required last entry */
101	{0, }
102};
103
104MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
105
106void igb_reset(struct igb_adapter *);
107static int igb_setup_all_tx_resources(struct igb_adapter *);
108static int igb_setup_all_rx_resources(struct igb_adapter *);
109static void igb_free_all_tx_resources(struct igb_adapter *);
110static void igb_free_all_rx_resources(struct igb_adapter *);
111static void igb_setup_mrqc(struct igb_adapter *);
112static int igb_probe(struct pci_dev *, const struct pci_device_id *);
113static void __devexit igb_remove(struct pci_dev *pdev);
114static void igb_init_hw_timer(struct igb_adapter *adapter);
115static int igb_sw_init(struct igb_adapter *);
116static int igb_open(struct net_device *);
117static int igb_close(struct net_device *);
118static void igb_configure_tx(struct igb_adapter *);
119static void igb_configure_rx(struct igb_adapter *);
120static void igb_clean_all_tx_rings(struct igb_adapter *);
121static void igb_clean_all_rx_rings(struct igb_adapter *);
122static void igb_clean_tx_ring(struct igb_ring *);
123static void igb_clean_rx_ring(struct igb_ring *);
124static void igb_set_rx_mode(struct net_device *);
125static void igb_update_phy_info(unsigned long);
126static void igb_watchdog(unsigned long);
127static void igb_watchdog_task(struct work_struct *);
128static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, struct net_device *);
129static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *dev,
130						 struct rtnl_link_stats64 *stats);
131static int igb_change_mtu(struct net_device *, int);
132static int igb_set_mac(struct net_device *, void *);
133static void igb_set_uta(struct igb_adapter *adapter);
134static irqreturn_t igb_intr(int irq, void *);
135static irqreturn_t igb_intr_msi(int irq, void *);
136static irqreturn_t igb_msix_other(int irq, void *);
137static irqreturn_t igb_msix_ring(int irq, void *);
138#ifdef CONFIG_IGB_DCA
139static void igb_update_dca(struct igb_q_vector *);
140static void igb_setup_dca(struct igb_adapter *);
141#endif /* CONFIG_IGB_DCA */
142static int igb_poll(struct napi_struct *, int);
143static bool igb_clean_tx_irq(struct igb_q_vector *);
144static bool igb_clean_rx_irq(struct igb_q_vector *, int);
145static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
146static void igb_tx_timeout(struct net_device *);
147static void igb_reset_task(struct work_struct *);
148static void igb_vlan_mode(struct net_device *netdev, u32 features);
149static void igb_vlan_rx_add_vid(struct net_device *, u16);
150static void igb_vlan_rx_kill_vid(struct net_device *, u16);
151static void igb_restore_vlan(struct igb_adapter *);
152static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
153static void igb_ping_all_vfs(struct igb_adapter *);
154static void igb_msg_task(struct igb_adapter *);
155static void igb_vmm_control(struct igb_adapter *);
156static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
157static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
158static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
159static int igb_ndo_set_vf_vlan(struct net_device *netdev,
160			       int vf, u16 vlan, u8 qos);
161static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
162static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
163				 struct ifla_vf_info *ivi);
164static void igb_check_vf_rate_limit(struct igb_adapter *);
165
166#ifdef CONFIG_PM
167static int igb_suspend(struct pci_dev *, pm_message_t);
168static int igb_resume(struct pci_dev *);
169#endif
170static void igb_shutdown(struct pci_dev *);
171#ifdef CONFIG_IGB_DCA
172static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
173static struct notifier_block dca_notifier = {
174	.notifier_call	= igb_notify_dca,
175	.next		= NULL,
176	.priority	= 0
177};
178#endif
179#ifdef CONFIG_NET_POLL_CONTROLLER
180/* for netdump / net console */
181static void igb_netpoll(struct net_device *);
182#endif
183#ifdef CONFIG_PCI_IOV
184static unsigned int max_vfs = 0;
185module_param(max_vfs, uint, 0);
186MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
187                 "per physical function");
188#endif /* CONFIG_PCI_IOV */
189
190static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
191		     pci_channel_state_t);
192static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
193static void igb_io_resume(struct pci_dev *);
194
195static struct pci_error_handlers igb_err_handler = {
196	.error_detected = igb_io_error_detected,
197	.slot_reset = igb_io_slot_reset,
198	.resume = igb_io_resume,
199};
200
201
202static struct pci_driver igb_driver = {
203	.name     = igb_driver_name,
204	.id_table = igb_pci_tbl,
205	.probe    = igb_probe,
206	.remove   = __devexit_p(igb_remove),
207#ifdef CONFIG_PM
208	/* Power Management Hooks */
209	.suspend  = igb_suspend,
210	.resume   = igb_resume,
211#endif
212	.shutdown = igb_shutdown,
213	.err_handler = &igb_err_handler
214};
215
216MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
217MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
218MODULE_LICENSE("GPL");
219MODULE_VERSION(DRV_VERSION);
220
221struct igb_reg_info {
222	u32 ofs;
223	char *name;
224};
225
226static const struct igb_reg_info igb_reg_info_tbl[] = {
227
228	/* General Registers */
229	{E1000_CTRL, "CTRL"},
230	{E1000_STATUS, "STATUS"},
231	{E1000_CTRL_EXT, "CTRL_EXT"},
232
233	/* Interrupt Registers */
234	{E1000_ICR, "ICR"},
235
236	/* RX Registers */
237	{E1000_RCTL, "RCTL"},
238	{E1000_RDLEN(0), "RDLEN"},
239	{E1000_RDH(0), "RDH"},
240	{E1000_RDT(0), "RDT"},
241	{E1000_RXDCTL(0), "RXDCTL"},
242	{E1000_RDBAL(0), "RDBAL"},
243	{E1000_RDBAH(0), "RDBAH"},
244
245	/* TX Registers */
246	{E1000_TCTL, "TCTL"},
247	{E1000_TDBAL(0), "TDBAL"},
248	{E1000_TDBAH(0), "TDBAH"},
249	{E1000_TDLEN(0), "TDLEN"},
250	{E1000_TDH(0), "TDH"},
251	{E1000_TDT(0), "TDT"},
252	{E1000_TXDCTL(0), "TXDCTL"},
253	{E1000_TDFH, "TDFH"},
254	{E1000_TDFT, "TDFT"},
255	{E1000_TDFHS, "TDFHS"},
256	{E1000_TDFPC, "TDFPC"},
257
258	/* List Terminator */
259	{}
260};
261
262/*
263 * igb_regdump - register printout routine
264 */
265static void igb_regdump(struct e1000_hw *hw, struct igb_reg_info *reginfo)
266{
267	int n = 0;
268	char rname[16];
269	u32 regs[8];
270
271	switch (reginfo->ofs) {
272	case E1000_RDLEN(0):
273		for (n = 0; n < 4; n++)
274			regs[n] = rd32(E1000_RDLEN(n));
275		break;
276	case E1000_RDH(0):
277		for (n = 0; n < 4; n++)
278			regs[n] = rd32(E1000_RDH(n));
279		break;
280	case E1000_RDT(0):
281		for (n = 0; n < 4; n++)
282			regs[n] = rd32(E1000_RDT(n));
283		break;
284	case E1000_RXDCTL(0):
285		for (n = 0; n < 4; n++)
286			regs[n] = rd32(E1000_RXDCTL(n));
287		break;
288	case E1000_RDBAL(0):
289		for (n = 0; n < 4; n++)
290			regs[n] = rd32(E1000_RDBAL(n));
291		break;
292	case E1000_RDBAH(0):
293		for (n = 0; n < 4; n++)
294			regs[n] = rd32(E1000_RDBAH(n));
295		break;
296	case E1000_TDBAL(0):
297		for (n = 0; n < 4; n++)
298			regs[n] = rd32(E1000_RDBAL(n));
299		break;
300	case E1000_TDBAH(0):
301		for (n = 0; n < 4; n++)
302			regs[n] = rd32(E1000_TDBAH(n));
303		break;
304	case E1000_TDLEN(0):
305		for (n = 0; n < 4; n++)
306			regs[n] = rd32(E1000_TDLEN(n));
307		break;
308	case E1000_TDH(0):
309		for (n = 0; n < 4; n++)
310			regs[n] = rd32(E1000_TDH(n));
311		break;
312	case E1000_TDT(0):
313		for (n = 0; n < 4; n++)
314			regs[n] = rd32(E1000_TDT(n));
315		break;
316	case E1000_TXDCTL(0):
317		for (n = 0; n < 4; n++)
318			regs[n] = rd32(E1000_TXDCTL(n));
319		break;
320	default:
321		printk(KERN_INFO "%-15s %08x\n",
322			reginfo->name, rd32(reginfo->ofs));
323		return;
324	}
325
326	snprintf(rname, 16, "%s%s", reginfo->name, "[0-3]");
327	printk(KERN_INFO "%-15s ", rname);
328	for (n = 0; n < 4; n++)
329		printk(KERN_CONT "%08x ", regs[n]);
330	printk(KERN_CONT "\n");
331}
332
333/*
334 * igb_dump - Print registers, tx-rings and rx-rings
335 */
336static void igb_dump(struct igb_adapter *adapter)
337{
338	struct net_device *netdev = adapter->netdev;
339	struct e1000_hw *hw = &adapter->hw;
340	struct igb_reg_info *reginfo;
341	struct igb_ring *tx_ring;
342	union e1000_adv_tx_desc *tx_desc;
343	struct my_u0 { u64 a; u64 b; } *u0;
344	struct igb_ring *rx_ring;
345	union e1000_adv_rx_desc *rx_desc;
346	u32 staterr;
347	u16 i, n;
348
349	if (!netif_msg_hw(adapter))
350		return;
351
352	/* Print netdevice Info */
353	if (netdev) {
354		dev_info(&adapter->pdev->dev, "Net device Info\n");
355		printk(KERN_INFO "Device Name     state            "
356			"trans_start      last_rx\n");
357		printk(KERN_INFO "%-15s %016lX %016lX %016lX\n",
358		netdev->name,
359		netdev->state,
360		netdev->trans_start,
361		netdev->last_rx);
362	}
363
364	/* Print Registers */
365	dev_info(&adapter->pdev->dev, "Register Dump\n");
366	printk(KERN_INFO " Register Name   Value\n");
367	for (reginfo = (struct igb_reg_info *)igb_reg_info_tbl;
368	     reginfo->name; reginfo++) {
369		igb_regdump(hw, reginfo);
370	}
371
372	/* Print TX Ring Summary */
373	if (!netdev || !netif_running(netdev))
374		goto exit;
375
376	dev_info(&adapter->pdev->dev, "TX Rings Summary\n");
377	printk(KERN_INFO "Queue [NTU] [NTC] [bi(ntc)->dma  ]"
378		" leng ntw timestamp\n");
379	for (n = 0; n < adapter->num_tx_queues; n++) {
380		struct igb_tx_buffer *buffer_info;
381		tx_ring = adapter->tx_ring[n];
382		buffer_info = &tx_ring->tx_buffer_info[tx_ring->next_to_clean];
383		printk(KERN_INFO " %5d %5X %5X %016llX %04X %p %016llX\n",
384			   n, tx_ring->next_to_use, tx_ring->next_to_clean,
385			   (u64)buffer_info->dma,
386			   buffer_info->length,
387			   buffer_info->next_to_watch,
388			   (u64)buffer_info->time_stamp);
389	}
390
391	/* Print TX Rings */
392	if (!netif_msg_tx_done(adapter))
393		goto rx_ring_summary;
394
395	dev_info(&adapter->pdev->dev, "TX Rings Dump\n");
396
397	/* Transmit Descriptor Formats
398	 *
399	 * Advanced Transmit Descriptor
400	 *   +--------------------------------------------------------------+
401	 * 0 |         Buffer Address [63:0]                                |
402	 *   +--------------------------------------------------------------+
403	 * 8 | PAYLEN  | PORTS  |CC|IDX | STA | DCMD  |DTYP|MAC|RSV| DTALEN |
404	 *   +--------------------------------------------------------------+
405	 *   63      46 45    40 39 38 36 35 32 31   24             15       0
406	 */
407
408	for (n = 0; n < adapter->num_tx_queues; n++) {
409		tx_ring = adapter->tx_ring[n];
410		printk(KERN_INFO "------------------------------------\n");
411		printk(KERN_INFO "TX QUEUE INDEX = %d\n", tx_ring->queue_index);
412		printk(KERN_INFO "------------------------------------\n");
413		printk(KERN_INFO "T [desc]     [address 63:0  ] "
414			"[PlPOCIStDDM Ln] [bi->dma       ] "
415			"leng  ntw timestamp        bi->skb\n");
416
417		for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) {
418			struct igb_tx_buffer *buffer_info;
419			tx_desc = IGB_TX_DESC(tx_ring, i);
420			buffer_info = &tx_ring->tx_buffer_info[i];
421			u0 = (struct my_u0 *)tx_desc;
422			printk(KERN_INFO "T [0x%03X]    %016llX %016llX %016llX"
423				" %04X  %p %016llX %p", i,
424				le64_to_cpu(u0->a),
425				le64_to_cpu(u0->b),
426				(u64)buffer_info->dma,
427				buffer_info->length,
428				buffer_info->next_to_watch,
429				(u64)buffer_info->time_stamp,
430				buffer_info->skb);
431			if (i == tx_ring->next_to_use &&
432				i == tx_ring->next_to_clean)
433				printk(KERN_CONT " NTC/U\n");
434			else if (i == tx_ring->next_to_use)
435				printk(KERN_CONT " NTU\n");
436			else if (i == tx_ring->next_to_clean)
437				printk(KERN_CONT " NTC\n");
438			else
439				printk(KERN_CONT "\n");
440
441			if (netif_msg_pktdata(adapter) && buffer_info->dma != 0)
442				print_hex_dump(KERN_INFO, "",
443					DUMP_PREFIX_ADDRESS,
444					16, 1, phys_to_virt(buffer_info->dma),
445					buffer_info->length, true);
446		}
447	}
448
449	/* Print RX Rings Summary */
450rx_ring_summary:
451	dev_info(&adapter->pdev->dev, "RX Rings Summary\n");
452	printk(KERN_INFO "Queue [NTU] [NTC]\n");
453	for (n = 0; n < adapter->num_rx_queues; n++) {
454		rx_ring = adapter->rx_ring[n];
455		printk(KERN_INFO " %5d %5X %5X\n", n,
456			   rx_ring->next_to_use, rx_ring->next_to_clean);
457	}
458
459	/* Print RX Rings */
460	if (!netif_msg_rx_status(adapter))
461		goto exit;
462
463	dev_info(&adapter->pdev->dev, "RX Rings Dump\n");
464
465	/* Advanced Receive Descriptor (Read) Format
466	 *    63                                           1        0
467	 *    +-----------------------------------------------------+
468	 *  0 |       Packet Buffer Address [63:1]           |A0/NSE|
469	 *    +----------------------------------------------+------+
470	 *  8 |       Header Buffer Address [63:1]           |  DD  |
471	 *    +-----------------------------------------------------+
472	 *
473	 *
474	 * Advanced Receive Descriptor (Write-Back) Format
475	 *
476	 *   63       48 47    32 31  30      21 20 17 16   4 3     0
477	 *   +------------------------------------------------------+
478	 * 0 | Packet     IP     |SPH| HDR_LEN   | RSV|Packet|  RSS |
479	 *   | Checksum   Ident  |   |           |    | Type | Type |
480	 *   +------------------------------------------------------+
481	 * 8 | VLAN Tag | Length | Extended Error | Extended Status |
482	 *   +------------------------------------------------------+
483	 *   63       48 47    32 31            20 19               0
484	 */
485
486	for (n = 0; n < adapter->num_rx_queues; n++) {
487		rx_ring = adapter->rx_ring[n];
488		printk(KERN_INFO "------------------------------------\n");
489		printk(KERN_INFO "RX QUEUE INDEX = %d\n", rx_ring->queue_index);
490		printk(KERN_INFO "------------------------------------\n");
491		printk(KERN_INFO "R  [desc]      [ PktBuf     A0] "
492			"[  HeadBuf   DD] [bi->dma       ] [bi->skb] "
493			"<-- Adv Rx Read format\n");
494		printk(KERN_INFO "RWB[desc]      [PcsmIpSHl PtRs] "
495			"[vl er S cks ln] ---------------- [bi->skb] "
496			"<-- Adv Rx Write-Back format\n");
497
498		for (i = 0; i < rx_ring->count; i++) {
499			struct igb_rx_buffer *buffer_info;
500			buffer_info = &rx_ring->rx_buffer_info[i];
501			rx_desc = IGB_RX_DESC(rx_ring, i);
502			u0 = (struct my_u0 *)rx_desc;
503			staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
504			if (staterr & E1000_RXD_STAT_DD) {
505				/* Descriptor Done */
506				printk(KERN_INFO "RWB[0x%03X]     %016llX "
507					"%016llX ---------------- %p", i,
508					le64_to_cpu(u0->a),
509					le64_to_cpu(u0->b),
510					buffer_info->skb);
511			} else {
512				printk(KERN_INFO "R  [0x%03X]     %016llX "
513					"%016llX %016llX %p", i,
514					le64_to_cpu(u0->a),
515					le64_to_cpu(u0->b),
516					(u64)buffer_info->dma,
517					buffer_info->skb);
518
519				if (netif_msg_pktdata(adapter)) {
520					print_hex_dump(KERN_INFO, "",
521						DUMP_PREFIX_ADDRESS,
522						16, 1,
523						phys_to_virt(buffer_info->dma),
524						IGB_RX_HDR_LEN, true);
525					print_hex_dump(KERN_INFO, "",
526					  DUMP_PREFIX_ADDRESS,
527					  16, 1,
528					  phys_to_virt(
529					    buffer_info->page_dma +
530					    buffer_info->page_offset),
531					  PAGE_SIZE/2, true);
532				}
533			}
534
535			if (i == rx_ring->next_to_use)
536				printk(KERN_CONT " NTU\n");
537			else if (i == rx_ring->next_to_clean)
538				printk(KERN_CONT " NTC\n");
539			else
540				printk(KERN_CONT "\n");
541
542		}
543	}
544
545exit:
546	return;
547}
548
549
550/**
551 * igb_read_clock - read raw cycle counter (to be used by time counter)
552 */
553static cycle_t igb_read_clock(const struct cyclecounter *tc)
554{
555	struct igb_adapter *adapter =
556		container_of(tc, struct igb_adapter, cycles);
557	struct e1000_hw *hw = &adapter->hw;
558	u64 stamp = 0;
559	int shift = 0;
560
561	/*
562	 * The timestamp latches on lowest register read. For the 82580
563	 * the lowest register is SYSTIMR instead of SYSTIML.  However we never
564	 * adjusted TIMINCA so SYSTIMR will just read as all 0s so ignore it.
565	 */
566	if (hw->mac.type >= e1000_82580) {
567		stamp = rd32(E1000_SYSTIMR) >> 8;
568		shift = IGB_82580_TSYNC_SHIFT;
569	}
570
571	stamp |= (u64)rd32(E1000_SYSTIML) << shift;
572	stamp |= (u64)rd32(E1000_SYSTIMH) << (shift + 32);
573	return stamp;
574}
575
576/**
577 * igb_get_hw_dev - return device
578 * used by hardware layer to print debugging information
579 **/
580struct net_device *igb_get_hw_dev(struct e1000_hw *hw)
581{
582	struct igb_adapter *adapter = hw->back;
583	return adapter->netdev;
584}
585
586/**
587 * igb_init_module - Driver Registration Routine
588 *
589 * igb_init_module is the first routine called when the driver is
590 * loaded. All it does is register with the PCI subsystem.
591 **/
592static int __init igb_init_module(void)
593{
594	int ret;
595	printk(KERN_INFO "%s - version %s\n",
596	       igb_driver_string, igb_driver_version);
597
598	printk(KERN_INFO "%s\n", igb_copyright);
599
600#ifdef CONFIG_IGB_DCA
601	dca_register_notify(&dca_notifier);
602#endif
603	ret = pci_register_driver(&igb_driver);
604	return ret;
605}
606
607module_init(igb_init_module);
608
609/**
610 * igb_exit_module - Driver Exit Cleanup Routine
611 *
612 * igb_exit_module is called just before the driver is removed
613 * from memory.
614 **/
615static void __exit igb_exit_module(void)
616{
617#ifdef CONFIG_IGB_DCA
618	dca_unregister_notify(&dca_notifier);
619#endif
620	pci_unregister_driver(&igb_driver);
621}
622
623module_exit(igb_exit_module);
624
625#define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
626/**
627 * igb_cache_ring_register - Descriptor ring to register mapping
628 * @adapter: board private structure to initialize
629 *
630 * Once we know the feature-set enabled for the device, we'll cache
631 * the register offset the descriptor ring is assigned to.
632 **/
633static void igb_cache_ring_register(struct igb_adapter *adapter)
634{
635	int i = 0, j = 0;
636	u32 rbase_offset = adapter->vfs_allocated_count;
637
638	switch (adapter->hw.mac.type) {
639	case e1000_82576:
640		/* The queues are allocated for virtualization such that VF 0
641		 * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
642		 * In order to avoid collision we start at the first free queue
643		 * and continue consuming queues in the same sequence
644		 */
645		if (adapter->vfs_allocated_count) {
646			for (; i < adapter->rss_queues; i++)
647				adapter->rx_ring[i]->reg_idx = rbase_offset +
648				                               Q_IDX_82576(i);
649		}
650	case e1000_82575:
651	case e1000_82580:
652	case e1000_i350:
653	default:
654		for (; i < adapter->num_rx_queues; i++)
655			adapter->rx_ring[i]->reg_idx = rbase_offset + i;
656		for (; j < adapter->num_tx_queues; j++)
657			adapter->tx_ring[j]->reg_idx = rbase_offset + j;
658		break;
659	}
660}
661
662static void igb_free_queues(struct igb_adapter *adapter)
663{
664	int i;
665
666	for (i = 0; i < adapter->num_tx_queues; i++) {
667		kfree(adapter->tx_ring[i]);
668		adapter->tx_ring[i] = NULL;
669	}
670	for (i = 0; i < adapter->num_rx_queues; i++) {
671		kfree(adapter->rx_ring[i]);
672		adapter->rx_ring[i] = NULL;
673	}
674	adapter->num_rx_queues = 0;
675	adapter->num_tx_queues = 0;
676}
677
678/**
679 * igb_alloc_queues - Allocate memory for all rings
680 * @adapter: board private structure to initialize
681 *
682 * We allocate one ring per queue at run-time since we don't know the
683 * number of queues at compile-time.
684 **/
685static int igb_alloc_queues(struct igb_adapter *adapter)
686{
687	struct igb_ring *ring;
688	int i;
689	int orig_node = adapter->node;
690
691	for (i = 0; i < adapter->num_tx_queues; i++) {
692		if (orig_node == -1) {
693			int cur_node = next_online_node(adapter->node);
694			if (cur_node == MAX_NUMNODES)
695				cur_node = first_online_node;
696			adapter->node = cur_node;
697		}
698		ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL,
699				    adapter->node);
700		if (!ring)
701			ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
702		if (!ring)
703			goto err;
704		ring->count = adapter->tx_ring_count;
705		ring->queue_index = i;
706		ring->dev = &adapter->pdev->dev;
707		ring->netdev = adapter->netdev;
708		ring->numa_node = adapter->node;
709		/* For 82575, context index must be unique per ring. */
710		if (adapter->hw.mac.type == e1000_82575)
711			set_bit(IGB_RING_FLAG_TX_CTX_IDX, &ring->flags);
712		adapter->tx_ring[i] = ring;
713	}
714	/* Restore the adapter's original node */
715	adapter->node = orig_node;
716
717	for (i = 0; i < adapter->num_rx_queues; i++) {
718		if (orig_node == -1) {
719			int cur_node = next_online_node(adapter->node);
720			if (cur_node == MAX_NUMNODES)
721				cur_node = first_online_node;
722			adapter->node = cur_node;
723		}
724		ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL,
725				    adapter->node);
726		if (!ring)
727			ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
728		if (!ring)
729			goto err;
730		ring->count = adapter->rx_ring_count;
731		ring->queue_index = i;
732		ring->dev = &adapter->pdev->dev;
733		ring->netdev = adapter->netdev;
734		ring->numa_node = adapter->node;
735		/* set flag indicating ring supports SCTP checksum offload */
736		if (adapter->hw.mac.type >= e1000_82576)
737			set_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags);
738
739		/* On i350, loopback VLAN packets have the tag byte-swapped. */
740		if (adapter->hw.mac.type == e1000_i350)
741			set_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &ring->flags);
742
743		adapter->rx_ring[i] = ring;
744	}
745	/* Restore the adapter's original node */
746	adapter->node = orig_node;
747
748	igb_cache_ring_register(adapter);
749
750	return 0;
751
752err:
753	/* Restore the adapter's original node */
754	adapter->node = orig_node;
755	igb_free_queues(adapter);
756
757	return -ENOMEM;
758}
759
760/**
761 *  igb_write_ivar - configure ivar for given MSI-X vector
762 *  @hw: pointer to the HW structure
763 *  @msix_vector: vector number we are allocating to a given ring
764 *  @index: row index of IVAR register to write within IVAR table
765 *  @offset: column offset of in IVAR, should be multiple of 8
766 *
767 *  This function is intended to handle the writing of the IVAR register
768 *  for adapters 82576 and newer.  The IVAR table consists of 2 columns,
769 *  each containing an cause allocation for an Rx and Tx ring, and a
770 *  variable number of rows depending on the number of queues supported.
771 **/
772static void igb_write_ivar(struct e1000_hw *hw, int msix_vector,
773			   int index, int offset)
774{
775	u32 ivar = array_rd32(E1000_IVAR0, index);
776
777	/* clear any bits that are currently set */
778	ivar &= ~((u32)0xFF << offset);
779
780	/* write vector and valid bit */
781	ivar |= (msix_vector | E1000_IVAR_VALID) << offset;
782
783	array_wr32(E1000_IVAR0, index, ivar);
784}
785
786#define IGB_N0_QUEUE -1
787static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
788{
789	struct igb_adapter *adapter = q_vector->adapter;
790	struct e1000_hw *hw = &adapter->hw;
791	int rx_queue = IGB_N0_QUEUE;
792	int tx_queue = IGB_N0_QUEUE;
793	u32 msixbm = 0;
794
795	if (q_vector->rx.ring)
796		rx_queue = q_vector->rx.ring->reg_idx;
797	if (q_vector->tx.ring)
798		tx_queue = q_vector->tx.ring->reg_idx;
799
800	switch (hw->mac.type) {
801	case e1000_82575:
802		/* The 82575 assigns vectors using a bitmask, which matches the
803		   bitmask for the EICR/EIMS/EIMC registers.  To assign one
804		   or more queues to a vector, we write the appropriate bits
805		   into the MSIXBM register for that vector. */
806		if (rx_queue > IGB_N0_QUEUE)
807			msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
808		if (tx_queue > IGB_N0_QUEUE)
809			msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
810		if (!adapter->msix_entries && msix_vector == 0)
811			msixbm |= E1000_EIMS_OTHER;
812		array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
813		q_vector->eims_value = msixbm;
814		break;
815	case e1000_82576:
816		/*
817		 * 82576 uses a table that essentially consists of 2 columns
818		 * with 8 rows.  The ordering is column-major so we use the
819		 * lower 3 bits as the row index, and the 4th bit as the
820		 * column offset.
821		 */
822		if (rx_queue > IGB_N0_QUEUE)
823			igb_write_ivar(hw, msix_vector,
824				       rx_queue & 0x7,
825				       (rx_queue & 0x8) << 1);
826		if (tx_queue > IGB_N0_QUEUE)
827			igb_write_ivar(hw, msix_vector,
828				       tx_queue & 0x7,
829				       ((tx_queue & 0x8) << 1) + 8);
830		q_vector->eims_value = 1 << msix_vector;
831		break;
832	case e1000_82580:
833	case e1000_i350:
834		/*
835		 * On 82580 and newer adapters the scheme is similar to 82576
836		 * however instead of ordering column-major we have things
837		 * ordered row-major.  So we traverse the table by using
838		 * bit 0 as the column offset, and the remaining bits as the
839		 * row index.
840		 */
841		if (rx_queue > IGB_N0_QUEUE)
842			igb_write_ivar(hw, msix_vector,
843				       rx_queue >> 1,
844				       (rx_queue & 0x1) << 4);
845		if (tx_queue > IGB_N0_QUEUE)
846			igb_write_ivar(hw, msix_vector,
847				       tx_queue >> 1,
848				       ((tx_queue & 0x1) << 4) + 8);
849		q_vector->eims_value = 1 << msix_vector;
850		break;
851	default:
852		BUG();
853		break;
854	}
855
856	/* add q_vector eims value to global eims_enable_mask */
857	adapter->eims_enable_mask |= q_vector->eims_value;
858
859	/* configure q_vector to set itr on first interrupt */
860	q_vector->set_itr = 1;
861}
862
863/**
864 * igb_configure_msix - Configure MSI-X hardware
865 *
866 * igb_configure_msix sets up the hardware to properly
867 * generate MSI-X interrupts.
868 **/
869static void igb_configure_msix(struct igb_adapter *adapter)
870{
871	u32 tmp;
872	int i, vector = 0;
873	struct e1000_hw *hw = &adapter->hw;
874
875	adapter->eims_enable_mask = 0;
876
877	/* set vector for other causes, i.e. link changes */
878	switch (hw->mac.type) {
879	case e1000_82575:
880		tmp = rd32(E1000_CTRL_EXT);
881		/* enable MSI-X PBA support*/
882		tmp |= E1000_CTRL_EXT_PBA_CLR;
883
884		/* Auto-Mask interrupts upon ICR read. */
885		tmp |= E1000_CTRL_EXT_EIAME;
886		tmp |= E1000_CTRL_EXT_IRCA;
887
888		wr32(E1000_CTRL_EXT, tmp);
889
890		/* enable msix_other interrupt */
891		array_wr32(E1000_MSIXBM(0), vector++,
892		                      E1000_EIMS_OTHER);
893		adapter->eims_other = E1000_EIMS_OTHER;
894
895		break;
896
897	case e1000_82576:
898	case e1000_82580:
899	case e1000_i350:
900		/* Turn on MSI-X capability first, or our settings
901		 * won't stick.  And it will take days to debug. */
902		wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
903		                E1000_GPIE_PBA | E1000_GPIE_EIAME |
904		                E1000_GPIE_NSICR);
905
906		/* enable msix_other interrupt */
907		adapter->eims_other = 1 << vector;
908		tmp = (vector++ | E1000_IVAR_VALID) << 8;
909
910		wr32(E1000_IVAR_MISC, tmp);
911		break;
912	default:
913		/* do nothing, since nothing else supports MSI-X */
914		break;
915	} /* switch (hw->mac.type) */
916
917	adapter->eims_enable_mask |= adapter->eims_other;
918
919	for (i = 0; i < adapter->num_q_vectors; i++)
920		igb_assign_vector(adapter->q_vector[i], vector++);
921
922	wrfl();
923}
924
925/**
926 * igb_request_msix - Initialize MSI-X interrupts
927 *
928 * igb_request_msix allocates MSI-X vectors and requests interrupts from the
929 * kernel.
930 **/
931static int igb_request_msix(struct igb_adapter *adapter)
932{
933	struct net_device *netdev = adapter->netdev;
934	struct e1000_hw *hw = &adapter->hw;
935	int i, err = 0, vector = 0;
936
937	err = request_irq(adapter->msix_entries[vector].vector,
938	                  igb_msix_other, 0, netdev->name, adapter);
939	if (err)
940		goto out;
941	vector++;
942
943	for (i = 0; i < adapter->num_q_vectors; i++) {
944		struct igb_q_vector *q_vector = adapter->q_vector[i];
945
946		q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
947
948		if (q_vector->rx.ring && q_vector->tx.ring)
949			sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
950				q_vector->rx.ring->queue_index);
951		else if (q_vector->tx.ring)
952			sprintf(q_vector->name, "%s-tx-%u", netdev->name,
953				q_vector->tx.ring->queue_index);
954		else if (q_vector->rx.ring)
955			sprintf(q_vector->name, "%s-rx-%u", netdev->name,
956				q_vector->rx.ring->queue_index);
957		else
958			sprintf(q_vector->name, "%s-unused", netdev->name);
959
960		err = request_irq(adapter->msix_entries[vector].vector,
961		                  igb_msix_ring, 0, q_vector->name,
962		                  q_vector);
963		if (err)
964			goto out;
965		vector++;
966	}
967
968	igb_configure_msix(adapter);
969	return 0;
970out:
971	return err;
972}
973
974static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
975{
976	if (adapter->msix_entries) {
977		pci_disable_msix(adapter->pdev);
978		kfree(adapter->msix_entries);
979		adapter->msix_entries = NULL;
980	} else if (adapter->flags & IGB_FLAG_HAS_MSI) {
981		pci_disable_msi(adapter->pdev);
982	}
983}
984
985/**
986 * igb_free_q_vectors - Free memory allocated for interrupt vectors
987 * @adapter: board private structure to initialize
988 *
989 * This function frees the memory allocated to the q_vectors.  In addition if
990 * NAPI is enabled it will delete any references to the NAPI struct prior
991 * to freeing the q_vector.
992 **/
993static void igb_free_q_vectors(struct igb_adapter *adapter)
994{
995	int v_idx;
996
997	for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
998		struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
999		adapter->q_vector[v_idx] = NULL;
1000		if (!q_vector)
1001			continue;
1002		netif_napi_del(&q_vector->napi);
1003		kfree(q_vector);
1004	}
1005	adapter->num_q_vectors = 0;
1006}
1007
1008/**
1009 * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
1010 *
1011 * This function resets the device so that it has 0 rx queues, tx queues, and
1012 * MSI-X interrupts allocated.
1013 */
1014static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
1015{
1016	igb_free_queues(adapter);
1017	igb_free_q_vectors(adapter);
1018	igb_reset_interrupt_capability(adapter);
1019}
1020
1021/**
1022 * igb_set_interrupt_capability - set MSI or MSI-X if supported
1023 *
1024 * Attempt to configure interrupts using the best available
1025 * capabilities of the hardware and kernel.
1026 **/
1027static int igb_set_interrupt_capability(struct igb_adapter *adapter)
1028{
1029	int err;
1030	int numvecs, i;
1031
1032	/* Number of supported queues. */
1033	adapter->num_rx_queues = adapter->rss_queues;
1034	if (adapter->vfs_allocated_count)
1035		adapter->num_tx_queues = 1;
1036	else
1037		adapter->num_tx_queues = adapter->rss_queues;
1038
1039	/* start with one vector for every rx queue */
1040	numvecs = adapter->num_rx_queues;
1041
1042	/* if tx handler is separate add 1 for every tx queue */
1043	if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
1044		numvecs += adapter->num_tx_queues;
1045
1046	/* store the number of vectors reserved for queues */
1047	adapter->num_q_vectors = numvecs;
1048
1049	/* add 1 vector for link status interrupts */
1050	numvecs++;
1051	adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
1052					GFP_KERNEL);
1053	if (!adapter->msix_entries)
1054		goto msi_only;
1055
1056	for (i = 0; i < numvecs; i++)
1057		adapter->msix_entries[i].entry = i;
1058
1059	err = pci_enable_msix(adapter->pdev,
1060			      adapter->msix_entries,
1061			      numvecs);
1062	if (err == 0)
1063		goto out;
1064
1065	igb_reset_interrupt_capability(adapter);
1066
1067	/* If we can't do MSI-X, try MSI */
1068msi_only:
1069#ifdef CONFIG_PCI_IOV
1070	/* disable SR-IOV for non MSI-X configurations */
1071	if (adapter->vf_data) {
1072		struct e1000_hw *hw = &adapter->hw;
1073		/* disable iov and allow time for transactions to clear */
1074		pci_disable_sriov(adapter->pdev);
1075		msleep(500);
1076
1077		kfree(adapter->vf_data);
1078		adapter->vf_data = NULL;
1079		wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1080		wrfl();
1081		msleep(100);
1082		dev_info(&adapter->pdev->dev, "IOV Disabled\n");
1083	}
1084#endif
1085	adapter->vfs_allocated_count = 0;
1086	adapter->rss_queues = 1;
1087	adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1088	adapter->num_rx_queues = 1;
1089	adapter->num_tx_queues = 1;
1090	adapter->num_q_vectors = 1;
1091	if (!pci_enable_msi(adapter->pdev))
1092		adapter->flags |= IGB_FLAG_HAS_MSI;
1093out:
1094	/* Notify the stack of the (possibly) reduced queue counts. */
1095	netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues);
1096	return netif_set_real_num_rx_queues(adapter->netdev,
1097					    adapter->num_rx_queues);
1098}
1099
1100/**
1101 * igb_alloc_q_vectors - Allocate memory for interrupt vectors
1102 * @adapter: board private structure to initialize
1103 *
1104 * We allocate one q_vector per queue interrupt.  If allocation fails we
1105 * return -ENOMEM.
1106 **/
1107static int igb_alloc_q_vectors(struct igb_adapter *adapter)
1108{
1109	struct igb_q_vector *q_vector;
1110	struct e1000_hw *hw = &adapter->hw;
1111	int v_idx;
1112	int orig_node = adapter->node;
1113
1114	for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1115		if ((adapter->num_q_vectors == (adapter->num_rx_queues +
1116						adapter->num_tx_queues)) &&
1117		    (adapter->num_rx_queues == v_idx))
1118			adapter->node = orig_node;
1119		if (orig_node == -1) {
1120			int cur_node = next_online_node(adapter->node);
1121			if (cur_node == MAX_NUMNODES)
1122				cur_node = first_online_node;
1123			adapter->node = cur_node;
1124		}
1125		q_vector = kzalloc_node(sizeof(struct igb_q_vector), GFP_KERNEL,
1126					adapter->node);
1127		if (!q_vector)
1128			q_vector = kzalloc(sizeof(struct igb_q_vector),
1129					   GFP_KERNEL);
1130		if (!q_vector)
1131			goto err_out;
1132		q_vector->adapter = adapter;
1133		q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
1134		q_vector->itr_val = IGB_START_ITR;
1135		netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
1136		adapter->q_vector[v_idx] = q_vector;
1137	}
1138	/* Restore the adapter's original node */
1139	adapter->node = orig_node;
1140
1141	return 0;
1142
1143err_out:
1144	/* Restore the adapter's original node */
1145	adapter->node = orig_node;
1146	igb_free_q_vectors(adapter);
1147	return -ENOMEM;
1148}
1149
1150static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
1151                                      int ring_idx, int v_idx)
1152{
1153	struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1154
1155	q_vector->rx.ring = adapter->rx_ring[ring_idx];
1156	q_vector->rx.ring->q_vector = q_vector;
1157	q_vector->rx.count++;
1158	q_vector->itr_val = adapter->rx_itr_setting;
1159	if (q_vector->itr_val && q_vector->itr_val <= 3)
1160		q_vector->itr_val = IGB_START_ITR;
1161}
1162
1163static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
1164                                      int ring_idx, int v_idx)
1165{
1166	struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1167
1168	q_vector->tx.ring = adapter->tx_ring[ring_idx];
1169	q_vector->tx.ring->q_vector = q_vector;
1170	q_vector->tx.count++;
1171	q_vector->itr_val = adapter->tx_itr_setting;
1172	q_vector->tx.work_limit = adapter->tx_work_limit;
1173	if (q_vector->itr_val && q_vector->itr_val <= 3)
1174		q_vector->itr_val = IGB_START_ITR;
1175}
1176
1177/**
1178 * igb_map_ring_to_vector - maps allocated queues to vectors
1179 *
1180 * This function maps the recently allocated queues to vectors.
1181 **/
1182static int igb_map_ring_to_vector(struct igb_adapter *adapter)
1183{
1184	int i;
1185	int v_idx = 0;
1186
1187	if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
1188	    (adapter->num_q_vectors < adapter->num_tx_queues))
1189		return -ENOMEM;
1190
1191	if (adapter->num_q_vectors >=
1192	    (adapter->num_rx_queues + adapter->num_tx_queues)) {
1193		for (i = 0; i < adapter->num_rx_queues; i++)
1194			igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1195		for (i = 0; i < adapter->num_tx_queues; i++)
1196			igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1197	} else {
1198		for (i = 0; i < adapter->num_rx_queues; i++) {
1199			if (i < adapter->num_tx_queues)
1200				igb_map_tx_ring_to_vector(adapter, i, v_idx);
1201			igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1202		}
1203		for (; i < adapter->num_tx_queues; i++)
1204			igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1205	}
1206	return 0;
1207}
1208
1209/**
1210 * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
1211 *
1212 * This function initializes the interrupts and allocates all of the queues.
1213 **/
1214static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
1215{
1216	struct pci_dev *pdev = adapter->pdev;
1217	int err;
1218
1219	err = igb_set_interrupt_capability(adapter);
1220	if (err)
1221		return err;
1222
1223	err = igb_alloc_q_vectors(adapter);
1224	if (err) {
1225		dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
1226		goto err_alloc_q_vectors;
1227	}
1228
1229	err = igb_alloc_queues(adapter);
1230	if (err) {
1231		dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1232		goto err_alloc_queues;
1233	}
1234
1235	err = igb_map_ring_to_vector(adapter);
1236	if (err) {
1237		dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
1238		goto err_map_queues;
1239	}
1240
1241
1242	return 0;
1243err_map_queues:
1244	igb_free_queues(adapter);
1245err_alloc_queues:
1246	igb_free_q_vectors(adapter);
1247err_alloc_q_vectors:
1248	igb_reset_interrupt_capability(adapter);
1249	return err;
1250}
1251
1252/**
1253 * igb_request_irq - initialize interrupts
1254 *
1255 * Attempts to configure interrupts using the best available
1256 * capabilities of the hardware and kernel.
1257 **/
1258static int igb_request_irq(struct igb_adapter *adapter)
1259{
1260	struct net_device *netdev = adapter->netdev;
1261	struct pci_dev *pdev = adapter->pdev;
1262	int err = 0;
1263
1264	if (adapter->msix_entries) {
1265		err = igb_request_msix(adapter);
1266		if (!err)
1267			goto request_done;
1268		/* fall back to MSI */
1269		igb_clear_interrupt_scheme(adapter);
1270		if (!pci_enable_msi(pdev))
1271			adapter->flags |= IGB_FLAG_HAS_MSI;
1272		igb_free_all_tx_resources(adapter);
1273		igb_free_all_rx_resources(adapter);
1274		adapter->num_tx_queues = 1;
1275		adapter->num_rx_queues = 1;
1276		adapter->num_q_vectors = 1;
1277		err = igb_alloc_q_vectors(adapter);
1278		if (err) {
1279			dev_err(&pdev->dev,
1280			        "Unable to allocate memory for vectors\n");
1281			goto request_done;
1282		}
1283		err = igb_alloc_queues(adapter);
1284		if (err) {
1285			dev_err(&pdev->dev,
1286			        "Unable to allocate memory for queues\n");
1287			igb_free_q_vectors(adapter);
1288			goto request_done;
1289		}
1290		igb_setup_all_tx_resources(adapter);
1291		igb_setup_all_rx_resources(adapter);
1292	}
1293
1294	igb_assign_vector(adapter->q_vector[0], 0);
1295
1296	if (adapter->flags & IGB_FLAG_HAS_MSI) {
1297		err = request_irq(pdev->irq, igb_intr_msi, 0,
1298				  netdev->name, adapter);
1299		if (!err)
1300			goto request_done;
1301
1302		/* fall back to legacy interrupts */
1303		igb_reset_interrupt_capability(adapter);
1304		adapter->flags &= ~IGB_FLAG_HAS_MSI;
1305	}
1306
1307	err = request_irq(pdev->irq, igb_intr, IRQF_SHARED,
1308			  netdev->name, adapter);
1309
1310	if (err)
1311		dev_err(&pdev->dev, "Error %d getting interrupt\n",
1312			err);
1313
1314request_done:
1315	return err;
1316}
1317
1318static void igb_free_irq(struct igb_adapter *adapter)
1319{
1320	if (adapter->msix_entries) {
1321		int vector = 0, i;
1322
1323		free_irq(adapter->msix_entries[vector++].vector, adapter);
1324
1325		for (i = 0; i < adapter->num_q_vectors; i++)
1326			free_irq(adapter->msix_entries[vector++].vector,
1327				 adapter->q_vector[i]);
1328	} else {
1329		free_irq(adapter->pdev->irq, adapter);
1330	}
1331}
1332
1333/**
1334 * igb_irq_disable - Mask off interrupt generation on the NIC
1335 * @adapter: board private structure
1336 **/
1337static void igb_irq_disable(struct igb_adapter *adapter)
1338{
1339	struct e1000_hw *hw = &adapter->hw;
1340
1341	/*
1342	 * we need to be careful when disabling interrupts.  The VFs are also
1343	 * mapped into these registers and so clearing the bits can cause
1344	 * issues on the VF drivers so we only need to clear what we set
1345	 */
1346	if (adapter->msix_entries) {
1347		u32 regval = rd32(E1000_EIAM);
1348		wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
1349		wr32(E1000_EIMC, adapter->eims_enable_mask);
1350		regval = rd32(E1000_EIAC);
1351		wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
1352	}
1353
1354	wr32(E1000_IAM, 0);
1355	wr32(E1000_IMC, ~0);
1356	wrfl();
1357	if (adapter->msix_entries) {
1358		int i;
1359		for (i = 0; i < adapter->num_q_vectors; i++)
1360			synchronize_irq(adapter->msix_entries[i].vector);
1361	} else {
1362		synchronize_irq(adapter->pdev->irq);
1363	}
1364}
1365
1366/**
1367 * igb_irq_enable - Enable default interrupt generation settings
1368 * @adapter: board private structure
1369 **/
1370static void igb_irq_enable(struct igb_adapter *adapter)
1371{
1372	struct e1000_hw *hw = &adapter->hw;
1373
1374	if (adapter->msix_entries) {
1375		u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC | E1000_IMS_DRSTA;
1376		u32 regval = rd32(E1000_EIAC);
1377		wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
1378		regval = rd32(E1000_EIAM);
1379		wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
1380		wr32(E1000_EIMS, adapter->eims_enable_mask);
1381		if (adapter->vfs_allocated_count) {
1382			wr32(E1000_MBVFIMR, 0xFF);
1383			ims |= E1000_IMS_VMMB;
1384		}
1385		wr32(E1000_IMS, ims);
1386	} else {
1387		wr32(E1000_IMS, IMS_ENABLE_MASK |
1388				E1000_IMS_DRSTA);
1389		wr32(E1000_IAM, IMS_ENABLE_MASK |
1390				E1000_IMS_DRSTA);
1391	}
1392}
1393
1394static void igb_update_mng_vlan(struct igb_adapter *adapter)
1395{
1396	struct e1000_hw *hw = &adapter->hw;
1397	u16 vid = adapter->hw.mng_cookie.vlan_id;
1398	u16 old_vid = adapter->mng_vlan_id;
1399
1400	if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1401		/* add VID to filter table */
1402		igb_vfta_set(hw, vid, true);
1403		adapter->mng_vlan_id = vid;
1404	} else {
1405		adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1406	}
1407
1408	if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1409	    (vid != old_vid) &&
1410	    !test_bit(old_vid, adapter->active_vlans)) {
1411		/* remove VID from filter table */
1412		igb_vfta_set(hw, old_vid, false);
1413	}
1414}
1415
1416/**
1417 * igb_release_hw_control - release control of the h/w to f/w
1418 * @adapter: address of board private structure
1419 *
1420 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1421 * For ASF and Pass Through versions of f/w this means that the
1422 * driver is no longer loaded.
1423 *
1424 **/
1425static void igb_release_hw_control(struct igb_adapter *adapter)
1426{
1427	struct e1000_hw *hw = &adapter->hw;
1428	u32 ctrl_ext;
1429
1430	/* Let firmware take over control of h/w */
1431	ctrl_ext = rd32(E1000_CTRL_EXT);
1432	wr32(E1000_CTRL_EXT,
1433			ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1434}
1435
1436/**
1437 * igb_get_hw_control - get control of the h/w from f/w
1438 * @adapter: address of board private structure
1439 *
1440 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1441 * For ASF and Pass Through versions of f/w this means that
1442 * the driver is loaded.
1443 *
1444 **/
1445static void igb_get_hw_control(struct igb_adapter *adapter)
1446{
1447	struct e1000_hw *hw = &adapter->hw;
1448	u32 ctrl_ext;
1449
1450	/* Let firmware know the driver has taken over */
1451	ctrl_ext = rd32(E1000_CTRL_EXT);
1452	wr32(E1000_CTRL_EXT,
1453			ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1454}
1455
1456/**
1457 * igb_configure - configure the hardware for RX and TX
1458 * @adapter: private board structure
1459 **/
1460static void igb_configure(struct igb_adapter *adapter)
1461{
1462	struct net_device *netdev = adapter->netdev;
1463	int i;
1464
1465	igb_get_hw_control(adapter);
1466	igb_set_rx_mode(netdev);
1467
1468	igb_restore_vlan(adapter);
1469
1470	igb_setup_tctl(adapter);
1471	igb_setup_mrqc(adapter);
1472	igb_setup_rctl(adapter);
1473
1474	igb_configure_tx(adapter);
1475	igb_configure_rx(adapter);
1476
1477	igb_rx_fifo_flush_82575(&adapter->hw);
1478
1479	/* call igb_desc_unused which always leaves
1480	 * at least 1 descriptor unused to make sure
1481	 * next_to_use != next_to_clean */
1482	for (i = 0; i < adapter->num_rx_queues; i++) {
1483		struct igb_ring *ring = adapter->rx_ring[i];
1484		igb_alloc_rx_buffers(ring, igb_desc_unused(ring));
1485	}
1486}
1487
1488/**
1489 * igb_power_up_link - Power up the phy/serdes link
1490 * @adapter: address of board private structure
1491 **/
1492void igb_power_up_link(struct igb_adapter *adapter)
1493{
1494	if (adapter->hw.phy.media_type == e1000_media_type_copper)
1495		igb_power_up_phy_copper(&adapter->hw);
1496	else
1497		igb_power_up_serdes_link_82575(&adapter->hw);
1498}
1499
1500/**
1501 * igb_power_down_link - Power down the phy/serdes link
1502 * @adapter: address of board private structure
1503 */
1504static void igb_power_down_link(struct igb_adapter *adapter)
1505{
1506	if (adapter->hw.phy.media_type == e1000_media_type_copper)
1507		igb_power_down_phy_copper_82575(&adapter->hw);
1508	else
1509		igb_shutdown_serdes_link_82575(&adapter->hw);
1510}
1511
1512/**
1513 * igb_up - Open the interface and prepare it to handle traffic
1514 * @adapter: board private structure
1515 **/
1516int igb_up(struct igb_adapter *adapter)
1517{
1518	struct e1000_hw *hw = &adapter->hw;
1519	int i;
1520
1521	/* hardware has been reset, we need to reload some things */
1522	igb_configure(adapter);
1523
1524	clear_bit(__IGB_DOWN, &adapter->state);
1525
1526	for (i = 0; i < adapter->num_q_vectors; i++)
1527		napi_enable(&(adapter->q_vector[i]->napi));
1528
1529	if (adapter->msix_entries)
1530		igb_configure_msix(adapter);
1531	else
1532		igb_assign_vector(adapter->q_vector[0], 0);
1533
1534	/* Clear any pending interrupts. */
1535	rd32(E1000_ICR);
1536	igb_irq_enable(adapter);
1537
1538	/* notify VFs that reset has been completed */
1539	if (adapter->vfs_allocated_count) {
1540		u32 reg_data = rd32(E1000_CTRL_EXT);
1541		reg_data |= E1000_CTRL_EXT_PFRSTD;
1542		wr32(E1000_CTRL_EXT, reg_data);
1543	}
1544
1545	netif_tx_start_all_queues(adapter->netdev);
1546
1547	/* start the watchdog. */
1548	hw->mac.get_link_status = 1;
1549	schedule_work(&adapter->watchdog_task);
1550
1551	return 0;
1552}
1553
1554void igb_down(struct igb_adapter *adapter)
1555{
1556	struct net_device *netdev = adapter->netdev;
1557	struct e1000_hw *hw = &adapter->hw;
1558	u32 tctl, rctl;
1559	int i;
1560
1561	/* signal that we're down so the interrupt handler does not
1562	 * reschedule our watchdog timer */
1563	set_bit(__IGB_DOWN, &adapter->state);
1564
1565	/* disable receives in the hardware */
1566	rctl = rd32(E1000_RCTL);
1567	wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1568	/* flush and sleep below */
1569
1570	netif_tx_stop_all_queues(netdev);
1571
1572	/* disable transmits in the hardware */
1573	tctl = rd32(E1000_TCTL);
1574	tctl &= ~E1000_TCTL_EN;
1575	wr32(E1000_TCTL, tctl);
1576	/* flush both disables and wait for them to finish */
1577	wrfl();
1578	msleep(10);
1579
1580	for (i = 0; i < adapter->num_q_vectors; i++)
1581		napi_disable(&(adapter->q_vector[i]->napi));
1582
1583	igb_irq_disable(adapter);
1584
1585	del_timer_sync(&adapter->watchdog_timer);
1586	del_timer_sync(&adapter->phy_info_timer);
1587
1588	netif_carrier_off(netdev);
1589
1590	/* record the stats before reset*/
1591	spin_lock(&adapter->stats64_lock);
1592	igb_update_stats(adapter, &adapter->stats64);
1593	spin_unlock(&adapter->stats64_lock);
1594
1595	adapter->link_speed = 0;
1596	adapter->link_duplex = 0;
1597
1598	if (!pci_channel_offline(adapter->pdev))
1599		igb_reset(adapter);
1600	igb_clean_all_tx_rings(adapter);
1601	igb_clean_all_rx_rings(adapter);
1602#ifdef CONFIG_IGB_DCA
1603
1604	/* since we reset the hardware DCA settings were cleared */
1605	igb_setup_dca(adapter);
1606#endif
1607}
1608
1609void igb_reinit_locked(struct igb_adapter *adapter)
1610{
1611	WARN_ON(in_interrupt());
1612	while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1613		msleep(1);
1614	igb_down(adapter);
1615	igb_up(adapter);
1616	clear_bit(__IGB_RESETTING, &adapter->state);
1617}
1618
1619void igb_reset(struct igb_adapter *adapter)
1620{
1621	struct pci_dev *pdev = adapter->pdev;
1622	struct e1000_hw *hw = &adapter->hw;
1623	struct e1000_mac_info *mac = &hw->mac;
1624	struct e1000_fc_info *fc = &hw->fc;
1625	u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1626	u16 hwm;
1627
1628	/* Repartition Pba for greater than 9k mtu
1629	 * To take effect CTRL.RST is required.
1630	 */
1631	switch (mac->type) {
1632	case e1000_i350:
1633	case e1000_82580:
1634		pba = rd32(E1000_RXPBS);
1635		pba = igb_rxpbs_adjust_82580(pba);
1636		break;
1637	case e1000_82576:
1638		pba = rd32(E1000_RXPBS);
1639		pba &= E1000_RXPBS_SIZE_MASK_82576;
1640		break;
1641	case e1000_82575:
1642	default:
1643		pba = E1000_PBA_34K;
1644		break;
1645	}
1646
1647	if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1648	    (mac->type < e1000_82576)) {
1649		/* adjust PBA for jumbo frames */
1650		wr32(E1000_PBA, pba);
1651
1652		/* To maintain wire speed transmits, the Tx FIFO should be
1653		 * large enough to accommodate two full transmit packets,
1654		 * rounded up to the next 1KB and expressed in KB.  Likewise,
1655		 * the Rx FIFO should be large enough to accommodate at least
1656		 * one full receive packet and is similarly rounded up and
1657		 * expressed in KB. */
1658		pba = rd32(E1000_PBA);
1659		/* upper 16 bits has Tx packet buffer allocation size in KB */
1660		tx_space = pba >> 16;
1661		/* lower 16 bits has Rx packet buffer allocation size in KB */
1662		pba &= 0xffff;
1663		/* the tx fifo also stores 16 bytes of information about the tx
1664		 * but don't include ethernet FCS because hardware appends it */
1665		min_tx_space = (adapter->max_frame_size +
1666				sizeof(union e1000_adv_tx_desc) -
1667				ETH_FCS_LEN) * 2;
1668		min_tx_space = ALIGN(min_tx_space, 1024);
1669		min_tx_space >>= 10;
1670		/* software strips receive CRC, so leave room for it */
1671		min_rx_space = adapter->max_frame_size;
1672		min_rx_space = ALIGN(min_rx_space, 1024);
1673		min_rx_space >>= 10;
1674
1675		/* If current Tx allocation is less than the min Tx FIFO size,
1676		 * and the min Tx FIFO size is less than the current Rx FIFO
1677		 * allocation, take space away from current Rx allocation */
1678		if (tx_space < min_tx_space &&
1679		    ((min_tx_space - tx_space) < pba)) {
1680			pba = pba - (min_tx_space - tx_space);
1681
1682			/* if short on rx space, rx wins and must trump tx
1683			 * adjustment */
1684			if (pba < min_rx_space)
1685				pba = min_rx_space;
1686		}
1687		wr32(E1000_PBA, pba);
1688	}
1689
1690	/* flow control settings */
1691	/* The high water mark must be low enough to fit one full frame
1692	 * (or the size used for early receive) above it in the Rx FIFO.
1693	 * Set it to the lower of:
1694	 * - 90% of the Rx FIFO size, or
1695	 * - the full Rx FIFO size minus one full frame */
1696	hwm = min(((pba << 10) * 9 / 10),
1697			((pba << 10) - 2 * adapter->max_frame_size));
1698
1699	fc->high_water = hwm & 0xFFF0;	/* 16-byte granularity */
1700	fc->low_water = fc->high_water - 16;
1701	fc->pause_time = 0xFFFF;
1702	fc->send_xon = 1;
1703	fc->current_mode = fc->requested_mode;
1704
1705	/* disable receive for all VFs and wait one second */
1706	if (adapter->vfs_allocated_count) {
1707		int i;
1708		for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1709			adapter->vf_data[i].flags &= IGB_VF_FLAG_PF_SET_MAC;
1710
1711		/* ping all the active vfs to let them know we are going down */
1712		igb_ping_all_vfs(adapter);
1713
1714		/* disable transmits and receives */
1715		wr32(E1000_VFRE, 0);
1716		wr32(E1000_VFTE, 0);
1717	}
1718
1719	/* Allow time for pending master requests to run */
1720	hw->mac.ops.reset_hw(hw);
1721	wr32(E1000_WUC, 0);
1722
1723	if (hw->mac.ops.init_hw(hw))
1724		dev_err(&pdev->dev, "Hardware Error\n");
1725	if (hw->mac.type > e1000_82580) {
1726		if (adapter->flags & IGB_FLAG_DMAC) {
1727			u32 reg;
1728
1729			/*
1730			 * DMA Coalescing high water mark needs to be higher
1731			 * than * the * Rx threshold.  The Rx threshold is
1732			 * currently * pba - 6, so we * should use a high water
1733			 * mark of pba * - 4. */
1734			hwm = (pba - 4) << 10;
1735
1736			reg = (((pba-6) << E1000_DMACR_DMACTHR_SHIFT)
1737			       & E1000_DMACR_DMACTHR_MASK);
1738
1739			/* transition to L0x or L1 if available..*/
1740			reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
1741
1742			/* watchdog timer= +-1000 usec in 32usec intervals */
1743			reg |= (1000 >> 5);
1744			wr32(E1000_DMACR, reg);
1745
1746			/* no lower threshold to disable coalescing(smart fifb)
1747			 * -UTRESH=0*/
1748			wr32(E1000_DMCRTRH, 0);
1749
1750			/* set hwm to PBA -  2 * max frame size */
1751			wr32(E1000_FCRTC, hwm);
1752
1753			/*
1754			 * This sets the time to wait before requesting tran-
1755			 * sition to * low power state to number of usecs needed
1756			 * to receive 1 512 * byte frame at gigabit line rate
1757			 */
1758			reg = rd32(E1000_DMCTLX);
1759			reg |= IGB_DMCTLX_DCFLUSH_DIS;
1760
1761			/* Delay 255 usec before entering Lx state. */
1762			reg |= 0xFF;
1763			wr32(E1000_DMCTLX, reg);
1764
1765			/* free space in Tx packet buffer to wake from DMAC */
1766			wr32(E1000_DMCTXTH,
1767			     (IGB_MIN_TXPBSIZE -
1768			     (IGB_TX_BUF_4096 + adapter->max_frame_size))
1769			     >> 6);
1770
1771			/* make low power state decision controlled by DMAC */
1772			reg = rd32(E1000_PCIEMISC);
1773			reg |= E1000_PCIEMISC_LX_DECISION;
1774			wr32(E1000_PCIEMISC, reg);
1775		} /* end if IGB_FLAG_DMAC set */
1776	}
1777	if (hw->mac.type == e1000_82580) {
1778		u32 reg = rd32(E1000_PCIEMISC);
1779		wr32(E1000_PCIEMISC,
1780		                reg & ~E1000_PCIEMISC_LX_DECISION);
1781	}
1782	if (!netif_running(adapter->netdev))
1783		igb_power_down_link(adapter);
1784
1785	igb_update_mng_vlan(adapter);
1786
1787	/* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1788	wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1789
1790	igb_get_phy_info(hw);
1791}
1792
1793static u32 igb_fix_features(struct net_device *netdev, u32 features)
1794{
1795	/*
1796	 * Since there is no support for separate rx/tx vlan accel
1797	 * enable/disable make sure tx flag is always in same state as rx.
1798	 */
1799	if (features & NETIF_F_HW_VLAN_RX)
1800		features |= NETIF_F_HW_VLAN_TX;
1801	else
1802		features &= ~NETIF_F_HW_VLAN_TX;
1803
1804	return features;
1805}
1806
1807static int igb_set_features(struct net_device *netdev, u32 features)
1808{
1809	u32 changed = netdev->features ^ features;
1810
1811	if (changed & NETIF_F_HW_VLAN_RX)
1812		igb_vlan_mode(netdev, features);
1813
1814	return 0;
1815}
1816
1817static const struct net_device_ops igb_netdev_ops = {
1818	.ndo_open		= igb_open,
1819	.ndo_stop		= igb_close,
1820	.ndo_start_xmit		= igb_xmit_frame,
1821	.ndo_get_stats64	= igb_get_stats64,
1822	.ndo_set_rx_mode	= igb_set_rx_mode,
1823	.ndo_set_mac_address	= igb_set_mac,
1824	.ndo_change_mtu		= igb_change_mtu,
1825	.ndo_do_ioctl		= igb_ioctl,
1826	.ndo_tx_timeout		= igb_tx_timeout,
1827	.ndo_validate_addr	= eth_validate_addr,
1828	.ndo_vlan_rx_add_vid	= igb_vlan_rx_add_vid,
1829	.ndo_vlan_rx_kill_vid	= igb_vlan_rx_kill_vid,
1830	.ndo_set_vf_mac		= igb_ndo_set_vf_mac,
1831	.ndo_set_vf_vlan	= igb_ndo_set_vf_vlan,
1832	.ndo_set_vf_tx_rate	= igb_ndo_set_vf_bw,
1833	.ndo_get_vf_config	= igb_ndo_get_vf_config,
1834#ifdef CONFIG_NET_POLL_CONTROLLER
1835	.ndo_poll_controller	= igb_netpoll,
1836#endif
1837	.ndo_fix_features	= igb_fix_features,
1838	.ndo_set_features	= igb_set_features,
1839};
1840
1841/**
1842 * igb_probe - Device Initialization Routine
1843 * @pdev: PCI device information struct
1844 * @ent: entry in igb_pci_tbl
1845 *
1846 * Returns 0 on success, negative on failure
1847 *
1848 * igb_probe initializes an adapter identified by a pci_dev structure.
1849 * The OS initialization, configuring of the adapter private structure,
1850 * and a hardware reset occur.
1851 **/
1852static int __devinit igb_probe(struct pci_dev *pdev,
1853			       const struct pci_device_id *ent)
1854{
1855	struct net_device *netdev;
1856	struct igb_adapter *adapter;
1857	struct e1000_hw *hw;
1858	u16 eeprom_data = 0;
1859	s32 ret_val;
1860	static int global_quad_port_a; /* global quad port a indication */
1861	const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1862	unsigned long mmio_start, mmio_len;
1863	int err, pci_using_dac;
1864	u16 eeprom_apme_mask = IGB_EEPROM_APME;
1865	u8 part_str[E1000_PBANUM_LENGTH];
1866
1867	/* Catch broken hardware that put the wrong VF device ID in
1868	 * the PCIe SR-IOV capability.
1869	 */
1870	if (pdev->is_virtfn) {
1871		WARN(1, KERN_ERR "%s (%hx:%hx) should not be a VF!\n",
1872		     pci_name(pdev), pdev->vendor, pdev->device);
1873		return -EINVAL;
1874	}
1875
1876	err = pci_enable_device_mem(pdev);
1877	if (err)
1878		return err;
1879
1880	pci_using_dac = 0;
1881	err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
1882	if (!err) {
1883		err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
1884		if (!err)
1885			pci_using_dac = 1;
1886	} else {
1887		err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
1888		if (err) {
1889			err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
1890			if (err) {
1891				dev_err(&pdev->dev, "No usable DMA "
1892					"configuration, aborting\n");
1893				goto err_dma;
1894			}
1895		}
1896	}
1897
1898	err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1899	                                   IORESOURCE_MEM),
1900	                                   igb_driver_name);
1901	if (err)
1902		goto err_pci_reg;
1903
1904	pci_enable_pcie_error_reporting(pdev);
1905
1906	pci_set_master(pdev);
1907	pci_save_state(pdev);
1908
1909	err = -ENOMEM;
1910	netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1911				   IGB_MAX_TX_QUEUES);
1912	if (!netdev)
1913		goto err_alloc_etherdev;
1914
1915	SET_NETDEV_DEV(netdev, &pdev->dev);
1916
1917	pci_set_drvdata(pdev, netdev);
1918	adapter = netdev_priv(netdev);
1919	adapter->netdev = netdev;
1920	adapter->pdev = pdev;
1921	hw = &adapter->hw;
1922	hw->back = adapter;
1923	adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE;
1924
1925	mmio_start = pci_resource_start(pdev, 0);
1926	mmio_len = pci_resource_len(pdev, 0);
1927
1928	err = -EIO;
1929	hw->hw_addr = ioremap(mmio_start, mmio_len);
1930	if (!hw->hw_addr)
1931		goto err_ioremap;
1932
1933	netdev->netdev_ops = &igb_netdev_ops;
1934	igb_set_ethtool_ops(netdev);
1935	netdev->watchdog_timeo = 5 * HZ;
1936
1937	strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1938
1939	netdev->mem_start = mmio_start;
1940	netdev->mem_end = mmio_start + mmio_len;
1941
1942	/* PCI config space info */
1943	hw->vendor_id = pdev->vendor;
1944	hw->device_id = pdev->device;
1945	hw->revision_id = pdev->revision;
1946	hw->subsystem_vendor_id = pdev->subsystem_vendor;
1947	hw->subsystem_device_id = pdev->subsystem_device;
1948
1949	/* Copy the default MAC, PHY and NVM function pointers */
1950	memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1951	memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1952	memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1953	/* Initialize skew-specific constants */
1954	err = ei->get_invariants(hw);
1955	if (err)
1956		goto err_sw_init;
1957
1958	/* setup the private structure */
1959	err = igb_sw_init(adapter);
1960	if (err)
1961		goto err_sw_init;
1962
1963	igb_get_bus_info_pcie(hw);
1964
1965	hw->phy.autoneg_wait_to_complete = false;
1966
1967	/* Copper options */
1968	if (hw->phy.media_type == e1000_media_type_copper) {
1969		hw->phy.mdix = AUTO_ALL_MODES;
1970		hw->phy.disable_polarity_correction = false;
1971		hw->phy.ms_type = e1000_ms_hw_default;
1972	}
1973
1974	if (igb_check_reset_block(hw))
1975		dev_info(&pdev->dev,
1976			"PHY reset is blocked due to SOL/IDER session.\n");
1977
1978	/*
1979	 * features is initialized to 0 in allocation, it might have bits
1980	 * set by igb_sw_init so we should use an or instead of an
1981	 * assignment.
1982	 */
1983	netdev->features |= NETIF_F_SG |
1984			    NETIF_F_IP_CSUM |
1985			    NETIF_F_IPV6_CSUM |
1986			    NETIF_F_TSO |
1987			    NETIF_F_TSO6 |
1988			    NETIF_F_RXHASH |
1989			    NETIF_F_RXCSUM |
1990			    NETIF_F_HW_VLAN_RX |
1991			    NETIF_F_HW_VLAN_TX;
1992
1993	/* copy netdev features into list of user selectable features */
1994	netdev->hw_features |= netdev->features;
1995
1996	/* set this bit last since it cannot be part of hw_features */
1997	netdev->features |= NETIF_F_HW_VLAN_FILTER;
1998
1999	netdev->vlan_features |= NETIF_F_TSO |
2000				 NETIF_F_TSO6 |
2001				 NETIF_F_IP_CSUM |
2002				 NETIF_F_IPV6_CSUM |
2003				 NETIF_F_SG;
2004
2005	if (pci_using_dac) {
2006		netdev->features |= NETIF_F_HIGHDMA;
2007		netdev->vlan_features |= NETIF_F_HIGHDMA;
2008	}
2009
2010	if (hw->mac.type >= e1000_82576) {
2011		netdev->hw_features |= NETIF_F_SCTP_CSUM;
2012		netdev->features |= NETIF_F_SCTP_CSUM;
2013	}
2014
2015	netdev->priv_flags |= IFF_UNICAST_FLT;
2016
2017	adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
2018
2019	/* before reading the NVM, reset the controller to put the device in a
2020	 * known good starting state */
2021	hw->mac.ops.reset_hw(hw);
2022
2023	/* make sure the NVM is good */
2024	if (hw->nvm.ops.validate(hw) < 0) {
2025		dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
2026		err = -EIO;
2027		goto err_eeprom;
2028	}
2029
2030	/* copy the MAC address out of the NVM */
2031	if (hw->mac.ops.read_mac_addr(hw))
2032		dev_err(&pdev->dev, "NVM Read Error\n");
2033
2034	memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
2035	memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
2036
2037	if (!is_valid_ether_addr(netdev->perm_addr)) {
2038		dev_err(&pdev->dev, "Invalid MAC Address\n");
2039		err = -EIO;
2040		goto err_eeprom;
2041	}
2042
2043	setup_timer(&adapter->watchdog_timer, igb_watchdog,
2044	            (unsigned long) adapter);
2045	setup_timer(&adapter->phy_info_timer, igb_update_phy_info,
2046	            (unsigned long) adapter);
2047
2048	INIT_WORK(&adapter->reset_task, igb_reset_task);
2049	INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
2050
2051	/* Initialize link properties that are user-changeable */
2052	adapter->fc_autoneg = true;
2053	hw->mac.autoneg = true;
2054	hw->phy.autoneg_advertised = 0x2f;
2055
2056	hw->fc.requested_mode = e1000_fc_default;
2057	hw->fc.current_mode = e1000_fc_default;
2058
2059	igb_validate_mdi_setting(hw);
2060
2061	/* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
2062	 * enable the ACPI Magic Packet filter
2063	 */
2064
2065	if (hw->bus.func == 0)
2066		hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
2067	else if (hw->mac.type >= e1000_82580)
2068		hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
2069		                 NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
2070		                 &eeprom_data);
2071	else if (hw->bus.func == 1)
2072		hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
2073
2074	if (eeprom_data & eeprom_apme_mask)
2075		adapter->eeprom_wol |= E1000_WUFC_MAG;
2076
2077	/* now that we have the eeprom settings, apply the special cases where
2078	 * the eeprom may be wrong or the board simply won't support wake on
2079	 * lan on a particular port */
2080	switch (pdev->device) {
2081	case E1000_DEV_ID_82575GB_QUAD_COPPER:
2082		adapter->eeprom_wol = 0;
2083		break;
2084	case E1000_DEV_ID_82575EB_FIBER_SERDES:
2085	case E1000_DEV_ID_82576_FIBER:
2086	case E1000_DEV_ID_82576_SERDES:
2087		/* Wake events only supported on port A for dual fiber
2088		 * regardless of eeprom setting */
2089		if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
2090			adapter->eeprom_wol = 0;
2091		break;
2092	case E1000_DEV_ID_82576_QUAD_COPPER:
2093	case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
2094		/* if quad port adapter, disable WoL on all but port A */
2095		if (global_quad_port_a != 0)
2096			adapter->eeprom_wol = 0;
2097		else
2098			adapter->flags |= IGB_FLAG_QUAD_PORT_A;
2099		/* Reset for multiple quad port adapters */
2100		if (++global_quad_port_a == 4)
2101			global_quad_port_a = 0;
2102		break;
2103	}
2104
2105	/* initialize the wol settings based on the eeprom settings */
2106	adapter->wol = adapter->eeprom_wol;
2107	device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
2108
2109	/* reset the hardware with the new settings */
2110	igb_reset(adapter);
2111
2112	/* let the f/w know that the h/w is now under the control of the
2113	 * driver. */
2114	igb_get_hw_control(adapter);
2115
2116	strcpy(netdev->name, "eth%d");
2117	err = register_netdev(netdev);
2118	if (err)
2119		goto err_register;
2120
2121	/* carrier off reporting is important to ethtool even BEFORE open */
2122	netif_carrier_off(netdev);
2123
2124#ifdef CONFIG_IGB_DCA
2125	if (dca_add_requester(&pdev->dev) == 0) {
2126		adapter->flags |= IGB_FLAG_DCA_ENABLED;
2127		dev_info(&pdev->dev, "DCA enabled\n");
2128		igb_setup_dca(adapter);
2129	}
2130
2131#endif
2132	/* do hw tstamp init after resetting */
2133	igb_init_hw_timer(adapter);
2134
2135	dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
2136	/* print bus type/speed/width info */
2137	dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
2138		 netdev->name,
2139		 ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
2140		  (hw->bus.speed == e1000_bus_speed_5000) ? "5.0Gb/s" :
2141		                                            "unknown"),
2142		 ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
2143		  (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
2144		  (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
2145		   "unknown"),
2146		 netdev->dev_addr);
2147
2148	ret_val = igb_read_part_string(hw, part_str, E1000_PBANUM_LENGTH);
2149	if (ret_val)
2150		strcpy(part_str, "Unknown");
2151	dev_info(&pdev->dev, "%s: PBA No: %s\n", netdev->name, part_str);
2152	dev_info(&pdev->dev,
2153		"Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
2154		adapter->msix_entries ? "MSI-X" :
2155		(adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
2156		adapter->num_rx_queues, adapter->num_tx_queues);
2157	switch (hw->mac.type) {
2158	case e1000_i350:
2159		igb_set_eee_i350(hw);
2160		break;
2161	default:
2162		break;
2163	}
2164	return 0;
2165
2166err_register:
2167	igb_release_hw_control(adapter);
2168err_eeprom:
2169	if (!igb_check_reset_block(hw))
2170		igb_reset_phy(hw);
2171
2172	if (hw->flash_address)
2173		iounmap(hw->flash_address);
2174err_sw_init:
2175	igb_clear_interrupt_scheme(adapter);
2176	iounmap(hw->hw_addr);
2177err_ioremap:
2178	free_netdev(netdev);
2179err_alloc_etherdev:
2180	pci_release_selected_regions(pdev,
2181	                             pci_select_bars(pdev, IORESOURCE_MEM));
2182err_pci_reg:
2183err_dma:
2184	pci_disable_device(pdev);
2185	return err;
2186}
2187
2188/**
2189 * igb_remove - Device Removal Routine
2190 * @pdev: PCI device information struct
2191 *
2192 * igb_remove is called by the PCI subsystem to alert the driver
2193 * that it should release a PCI device.  The could be caused by a
2194 * Hot-Plug event, or because the driver is going to be removed from
2195 * memory.
2196 **/
2197static void __devexit igb_remove(struct pci_dev *pdev)
2198{
2199	struct net_device *netdev = pci_get_drvdata(pdev);
2200	struct igb_adapter *adapter = netdev_priv(netdev);
2201	struct e1000_hw *hw = &adapter->hw;
2202
2203	/*
2204	 * The watchdog timer may be rescheduled, so explicitly
2205	 * disable watchdog from being rescheduled.
2206	 */
2207	set_bit(__IGB_DOWN, &adapter->state);
2208	del_timer_sync(&adapter->watchdog_timer);
2209	del_timer_sync(&adapter->phy_info_timer);
2210
2211	cancel_work_sync(&adapter->reset_task);
2212	cancel_work_sync(&adapter->watchdog_task);
2213
2214#ifdef CONFIG_IGB_DCA
2215	if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
2216		dev_info(&pdev->dev, "DCA disabled\n");
2217		dca_remove_requester(&pdev->dev);
2218		adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
2219		wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
2220	}
2221#endif
2222
2223	/* Release control of h/w to f/w.  If f/w is AMT enabled, this
2224	 * would have already happened in close and is redundant. */
2225	igb_release_hw_control(adapter);
2226
2227	unregister_netdev(netdev);
2228
2229	igb_clear_interrupt_scheme(adapter);
2230
2231#ifdef CONFIG_PCI_IOV
2232	/* reclaim resources allocated to VFs */
2233	if (adapter->vf_data) {
2234		/* disable iov and allow time for transactions to clear */
2235		pci_disable_sriov(pdev);
2236		msleep(500);
2237
2238		kfree(adapter->vf_data);
2239		adapter->vf_data = NULL;
2240		wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
2241		wrfl();
2242		msleep(100);
2243		dev_info(&pdev->dev, "IOV Disabled\n");
2244	}
2245#endif
2246
2247	iounmap(hw->hw_addr);
2248	if (hw->flash_address)
2249		iounmap(hw->flash_address);
2250	pci_release_selected_regions(pdev,
2251	                             pci_select_bars(pdev, IORESOURCE_MEM));
2252
2253	free_netdev(netdev);
2254
2255	pci_disable_pcie_error_reporting(pdev);
2256
2257	pci_disable_device(pdev);
2258}
2259
2260/**
2261 * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
2262 * @adapter: board private structure to initialize
2263 *
2264 * This function initializes the vf specific data storage and then attempts to
2265 * allocate the VFs.  The reason for ordering it this way is because it is much
2266 * mor expensive time wise to disable SR-IOV than it is to allocate and free
2267 * the memory for the VFs.
2268 **/
2269static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
2270{
2271#ifdef CONFIG_PCI_IOV
2272	struct pci_dev *pdev = adapter->pdev;
2273
2274	if (adapter->vfs_allocated_count) {
2275		adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
2276		                           sizeof(struct vf_data_storage),
2277		                           GFP_KERNEL);
2278		/* if allocation failed then we do not support SR-IOV */
2279		if (!adapter->vf_data) {
2280			adapter->vfs_allocated_count = 0;
2281			dev_err(&pdev->dev, "Unable to allocate memory for VF "
2282			        "Data Storage\n");
2283		}
2284	}
2285
2286	if (pci_enable_sriov(pdev, adapter->vfs_allocated_count)) {
2287		kfree(adapter->vf_data);
2288		adapter->vf_data = NULL;
2289#endif /* CONFIG_PCI_IOV */
2290		adapter->vfs_allocated_count = 0;
2291#ifdef CONFIG_PCI_IOV
2292	} else {
2293		unsigned char mac_addr[ETH_ALEN];
2294		int i;
2295		dev_info(&pdev->dev, "%d vfs allocated\n",
2296		         adapter->vfs_allocated_count);
2297		for (i = 0; i < adapter->vfs_allocated_count; i++) {
2298			random_ether_addr(mac_addr);
2299			igb_set_vf_mac(adapter, i, mac_addr);
2300		}
2301		/* DMA Coalescing is not supported in IOV mode. */
2302		if (adapter->flags & IGB_FLAG_DMAC)
2303			adapter->flags &= ~IGB_FLAG_DMAC;
2304	}
2305#endif /* CONFIG_PCI_IOV */
2306}
2307
2308
2309/**
2310 * igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp
2311 * @adapter: board private structure to initialize
2312 *
2313 * igb_init_hw_timer initializes the function pointer and values for the hw
2314 * timer found in hardware.
2315 **/
2316static void igb_init_hw_timer(struct igb_adapter *adapter)
2317{
2318	struct e1000_hw *hw = &adapter->hw;
2319
2320	switch (hw->mac.type) {
2321	case e1000_i350:
2322	case e1000_82580:
2323		memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2324		adapter->cycles.read = igb_read_clock;
2325		adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2326		adapter->cycles.mult = 1;
2327		/*
2328		 * The 82580 timesync updates the system timer every 8ns by 8ns
2329		 * and the value cannot be shifted.  Instead we need to shift
2330		 * the registers to generate a 64bit timer value.  As a result
2331		 * SYSTIMR/L/H, TXSTMPL/H, RXSTMPL/H all have to be shifted by
2332		 * 24 in order to generate a larger value for synchronization.
2333		 */
2334		adapter->cycles.shift = IGB_82580_TSYNC_SHIFT;
2335		/* disable system timer temporarily by setting bit 31 */
2336		wr32(E1000_TSAUXC, 0x80000000);
2337		wrfl();
2338
2339		/* Set registers so that rollover occurs soon to test this. */
2340		wr32(E1000_SYSTIMR, 0x00000000);
2341		wr32(E1000_SYSTIML, 0x80000000);
2342		wr32(E1000_SYSTIMH, 0x000000FF);
2343		wrfl();
2344
2345		/* enable system timer by clearing bit 31 */
2346		wr32(E1000_TSAUXC, 0x0);
2347		wrfl();
2348
2349		timecounter_init(&adapter->clock,
2350				 &adapter->cycles,
2351				 ktime_to_ns(ktime_get_real()));
2352		/*
2353		 * Synchronize our NIC clock against system wall clock. NIC
2354		 * time stamp reading requires ~3us per sample, each sample
2355		 * was pretty stable even under load => only require 10
2356		 * samples for each offset comparison.
2357		 */
2358		memset(&adapter->compare, 0, sizeof(adapter->compare));
2359		adapter->compare.source = &adapter->clock;
2360		adapter->compare.target = ktime_get_real;
2361		adapter->compare.num_samples = 10;
2362		timecompare_update(&adapter->compare, 0);
2363		break;
2364	case e1000_82576:
2365		/*
2366		 * Initialize hardware timer: we keep it running just in case
2367		 * that some program needs it later on.
2368		 */
2369		memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2370		adapter->cycles.read = igb_read_clock;
2371		adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2372		adapter->cycles.mult = 1;
2373		/**
2374		 * Scale the NIC clock cycle by a large factor so that
2375		 * relatively small clock corrections can be added or
2376		 * subtracted at each clock tick. The drawbacks of a large
2377		 * factor are a) that the clock register overflows more quickly
2378		 * (not such a big deal) and b) that the increment per tick has
2379		 * to fit into 24 bits.  As a result we need to use a shift of
2380		 * 19 so we can fit a value of 16 into the TIMINCA register.
2381		 */
2382		adapter->cycles.shift = IGB_82576_TSYNC_SHIFT;
2383		wr32(E1000_TIMINCA,
2384		                (1 << E1000_TIMINCA_16NS_SHIFT) |
2385		                (16 << IGB_82576_TSYNC_SHIFT));
2386
2387		/* Set registers so that rollover occurs soon to test this. */
2388		wr32(E1000_SYSTIML, 0x00000000);
2389		wr32(E1000_SYSTIMH, 0xFF800000);
2390		wrfl();
2391
2392		timecounter_init(&adapter->clock,
2393				 &adapter->cycles,
2394				 ktime_to_ns(ktime_get_real()));
2395		/*
2396		 * Synchronize our NIC clock against system wall clock. NIC
2397		 * time stamp reading requires ~3us per sample, each sample
2398		 * was pretty stable even under load => only require 10
2399		 * samples for each offset comparison.
2400		 */
2401		memset(&adapter->compare, 0, sizeof(adapter->compare));
2402		adapter->compare.source = &adapter->clock;
2403		adapter->compare.target = ktime_get_real;
2404		adapter->compare.num_samples = 10;
2405		timecompare_update(&adapter->compare, 0);
2406		break;
2407	case e1000_82575:
2408		/* 82575 does not support timesync */
2409	default:
2410		break;
2411	}
2412
2413}
2414
2415/**
2416 * igb_sw_init - Initialize general software structures (struct igb_adapter)
2417 * @adapter: board private structure to initialize
2418 *
2419 * igb_sw_init initializes the Adapter private data structure.
2420 * Fields are initialized based on PCI device information and
2421 * OS network device settings (MTU size).
2422 **/
2423static int __devinit igb_sw_init(struct igb_adapter *adapter)
2424{
2425	struct e1000_hw *hw = &adapter->hw;
2426	struct net_device *netdev = adapter->netdev;
2427	struct pci_dev *pdev = adapter->pdev;
2428
2429	pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
2430
2431	/* set default ring sizes */
2432	adapter->tx_ring_count = IGB_DEFAULT_TXD;
2433	adapter->rx_ring_count = IGB_DEFAULT_RXD;
2434
2435	/* set default ITR values */
2436	adapter->rx_itr_setting = IGB_DEFAULT_ITR;
2437	adapter->tx_itr_setting = IGB_DEFAULT_ITR;
2438
2439	/* set default work limits */
2440	adapter->tx_work_limit = IGB_DEFAULT_TX_WORK;
2441
2442	adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN +
2443				  VLAN_HLEN;
2444	adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
2445
2446	adapter->node = -1;
2447
2448	spin_lock_init(&adapter->stats64_lock);
2449#ifdef CONFIG_PCI_IOV
2450	switch (hw->mac.type) {
2451	case e1000_82576:
2452	case e1000_i350:
2453		if (max_vfs > 7) {
2454			dev_warn(&pdev->dev,
2455				 "Maximum of 7 VFs per PF, using max\n");
2456			adapter->vfs_allocated_count = 7;
2457		} else
2458			adapter->vfs_allocated_count = max_vfs;
2459		break;
2460	default:
2461		break;
2462	}
2463#endif /* CONFIG_PCI_IOV */
2464	adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
2465	/* i350 cannot do RSS and SR-IOV at the same time */
2466	if (hw->mac.type == e1000_i350 && adapter->vfs_allocated_count)
2467		adapter->rss_queues = 1;
2468
2469	/*
2470	 * if rss_queues > 4 or vfs are going to be allocated with rss_queues
2471	 * then we should combine the queues into a queue pair in order to
2472	 * conserve interrupts due to limited supply
2473	 */
2474	if ((adapter->rss_queues > 4) ||
2475	    ((adapter->rss_queues > 1) && (adapter->vfs_allocated_count > 6)))
2476		adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
2477
2478	/* This call may decrease the number of queues */
2479	if (igb_init_interrupt_scheme(adapter)) {
2480		dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
2481		return -ENOMEM;
2482	}
2483
2484	igb_probe_vfs(adapter);
2485
2486	/* Explicitly disable IRQ since the NIC can be in any state. */
2487	igb_irq_disable(adapter);
2488
2489	if (hw->mac.type == e1000_i350)
2490		adapter->flags &= ~IGB_FLAG_DMAC;
2491
2492	set_bit(__IGB_DOWN, &adapter->state);
2493	return 0;
2494}
2495
2496/**
2497 * igb_open - Called when a network interface is made active
2498 * @netdev: network interface device structure
2499 *
2500 * Returns 0 on success, negative value on failure
2501 *
2502 * The open entry point is called when a network interface is made
2503 * active by the system (IFF_UP).  At this point all resources needed
2504 * for transmit and receive operations are allocated, the interrupt
2505 * handler is registered with the OS, the watchdog timer is started,
2506 * and the stack is notified that the interface is ready.
2507 **/
2508static int igb_open(struct net_device *netdev)
2509{
2510	struct igb_adapter *adapter = netdev_priv(netdev);
2511	struct e1000_hw *hw = &adapter->hw;
2512	int err;
2513	int i;
2514
2515	/* disallow open during test */
2516	if (test_bit(__IGB_TESTING, &adapter->state))
2517		return -EBUSY;
2518
2519	netif_carrier_off(netdev);
2520
2521	/* allocate transmit descriptors */
2522	err = igb_setup_all_tx_resources(adapter);
2523	if (err)
2524		goto err_setup_tx;
2525
2526	/* allocate receive descriptors */
2527	err = igb_setup_all_rx_resources(adapter);
2528	if (err)
2529		goto err_setup_rx;
2530
2531	igb_power_up_link(adapter);
2532
2533	/* before we allocate an interrupt, we must be ready to handle it.
2534	 * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2535	 * as soon as we call pci_request_irq, so we have to setup our
2536	 * clean_rx handler before we do so.  */
2537	igb_configure(adapter);
2538
2539	err = igb_request_irq(adapter);
2540	if (err)
2541		goto err_req_irq;
2542
2543	/* From here on the code is the same as igb_up() */
2544	clear_bit(__IGB_DOWN, &adapter->state);
2545
2546	for (i = 0; i < adapter->num_q_vectors; i++)
2547		napi_enable(&(adapter->q_vector[i]->napi));
2548
2549	/* Clear any pending interrupts. */
2550	rd32(E1000_ICR);
2551
2552	igb_irq_enable(adapter);
2553
2554	/* notify VFs that reset has been completed */
2555	if (adapter->vfs_allocated_count) {
2556		u32 reg_data = rd32(E1000_CTRL_EXT);
2557		reg_data |= E1000_CTRL_EXT_PFRSTD;
2558		wr32(E1000_CTRL_EXT, reg_data);
2559	}
2560
2561	netif_tx_start_all_queues(netdev);
2562
2563	/* start the watchdog. */
2564	hw->mac.get_link_status = 1;
2565	schedule_work(&adapter->watchdog_task);
2566
2567	return 0;
2568
2569err_req_irq:
2570	igb_release_hw_control(adapter);
2571	igb_power_down_link(adapter);
2572	igb_free_all_rx_resources(adapter);
2573err_setup_rx:
2574	igb_free_all_tx_resources(adapter);
2575err_setup_tx:
2576	igb_reset(adapter);
2577
2578	return err;
2579}
2580
2581/**
2582 * igb_close - Disables a network interface
2583 * @netdev: network interface device structure
2584 *
2585 * Returns 0, this is not allowed to fail
2586 *
2587 * The close entry point is called when an interface is de-activated
2588 * by the OS.  The hardware is still under the driver's control, but
2589 * needs to be disabled.  A global MAC reset is issued to stop the
2590 * hardware, and all transmit and receive resources are freed.
2591 **/
2592static int igb_close(struct net_device *netdev)
2593{
2594	struct igb_adapter *adapter = netdev_priv(netdev);
2595
2596	WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
2597	igb_down(adapter);
2598
2599	igb_free_irq(adapter);
2600
2601	igb_free_all_tx_resources(adapter);
2602	igb_free_all_rx_resources(adapter);
2603
2604	return 0;
2605}
2606
2607/**
2608 * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2609 * @tx_ring: tx descriptor ring (for a specific queue) to setup
2610 *
2611 * Return 0 on success, negative on failure
2612 **/
2613int igb_setup_tx_resources(struct igb_ring *tx_ring)
2614{
2615	struct device *dev = tx_ring->dev;
2616	int orig_node = dev_to_node(dev);
2617	int size;
2618
2619	size = sizeof(struct igb_tx_buffer) * tx_ring->count;
2620	tx_ring->tx_buffer_info = vzalloc_node(size, tx_ring->numa_node);
2621	if (!tx_ring->tx_buffer_info)
2622		tx_ring->tx_buffer_info = vzalloc(size);
2623	if (!tx_ring->tx_buffer_info)
2624		goto err;
2625
2626	/* round up to nearest 4K */
2627	tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2628	tx_ring->size = ALIGN(tx_ring->size, 4096);
2629
2630	set_dev_node(dev, tx_ring->numa_node);
2631	tx_ring->desc = dma_alloc_coherent(dev,
2632					   tx_ring->size,
2633					   &tx_ring->dma,
2634					   GFP_KERNEL);
2635	set_dev_node(dev, orig_node);
2636	if (!tx_ring->desc)
2637		tx_ring->desc = dma_alloc_coherent(dev,
2638						   tx_ring->size,
2639						   &tx_ring->dma,
2640						   GFP_KERNEL);
2641
2642	if (!tx_ring->desc)
2643		goto err;
2644
2645	tx_ring->next_to_use = 0;
2646	tx_ring->next_to_clean = 0;
2647
2648	return 0;
2649
2650err:
2651	vfree(tx_ring->tx_buffer_info);
2652	dev_err(dev,
2653		"Unable to allocate memory for the transmit descriptor ring\n");
2654	return -ENOMEM;
2655}
2656
2657/**
2658 * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2659 *				  (Descriptors) for all queues
2660 * @adapter: board private structure
2661 *
2662 * Return 0 on success, negative on failure
2663 **/
2664static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2665{
2666	struct pci_dev *pdev = adapter->pdev;
2667	int i, err = 0;
2668
2669	for (i = 0; i < adapter->num_tx_queues; i++) {
2670		err = igb_setup_tx_resources(adapter->tx_ring[i]);
2671		if (err) {
2672			dev_err(&pdev->dev,
2673				"Allocation for Tx Queue %u failed\n", i);
2674			for (i--; i >= 0; i--)
2675				igb_free_tx_resources(adapter->tx_ring[i]);
2676			break;
2677		}
2678	}
2679
2680	return err;
2681}
2682
2683/**
2684 * igb_setup_tctl - configure the transmit control registers
2685 * @adapter: Board private structure
2686 **/
2687void igb_setup_tctl(struct igb_adapter *adapter)
2688{
2689	struct e1000_hw *hw = &adapter->hw;
2690	u32 tctl;
2691
2692	/* disable queue 0 which is enabled by default on 82575 and 82576 */
2693	wr32(E1000_TXDCTL(0), 0);
2694
2695	/* Program the Transmit Control Register */
2696	tctl = rd32(E1000_TCTL);
2697	tctl &= ~E1000_TCTL_CT;
2698	tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2699		(E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2700
2701	igb_config_collision_dist(hw);
2702
2703	/* Enable transmits */
2704	tctl |= E1000_TCTL_EN;
2705
2706	wr32(E1000_TCTL, tctl);
2707}
2708
2709/**
2710 * igb_configure_tx_ring - Configure transmit ring after Reset
2711 * @adapter: board private structure
2712 * @ring: tx ring to configure
2713 *
2714 * Configure a transmit ring after a reset.
2715 **/
2716void igb_configure_tx_ring(struct igb_adapter *adapter,
2717                           struct igb_ring *ring)
2718{
2719	struct e1000_hw *hw = &adapter->hw;
2720	u32 txdctl = 0;
2721	u64 tdba = ring->dma;
2722	int reg_idx = ring->reg_idx;
2723
2724	/* disable the queue */
2725	wr32(E1000_TXDCTL(reg_idx), 0);
2726	wrfl();
2727	mdelay(10);
2728
2729	wr32(E1000_TDLEN(reg_idx),
2730	                ring->count * sizeof(union e1000_adv_tx_desc));
2731	wr32(E1000_TDBAL(reg_idx),
2732	                tdba & 0x00000000ffffffffULL);
2733	wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2734
2735	ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2736	wr32(E1000_TDH(reg_idx), 0);
2737	writel(0, ring->tail);
2738
2739	txdctl |= IGB_TX_PTHRESH;
2740	txdctl |= IGB_TX_HTHRESH << 8;
2741	txdctl |= IGB_TX_WTHRESH << 16;
2742
2743	txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2744	wr32(E1000_TXDCTL(reg_idx), txdctl);
2745}
2746
2747/**
2748 * igb_configure_tx - Configure transmit Unit after Reset
2749 * @adapter: board private structure
2750 *
2751 * Configure the Tx unit of the MAC after a reset.
2752 **/
2753static void igb_configure_tx(struct igb_adapter *adapter)
2754{
2755	int i;
2756
2757	for (i = 0; i < adapter->num_tx_queues; i++)
2758		igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
2759}
2760
2761/**
2762 * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2763 * @rx_ring:    rx descriptor ring (for a specific queue) to setup
2764 *
2765 * Returns 0 on success, negative on failure
2766 **/
2767int igb_setup_rx_resources(struct igb_ring *rx_ring)
2768{
2769	struct device *dev = rx_ring->dev;
2770	int orig_node = dev_to_node(dev);
2771	int size, desc_len;
2772
2773	size = sizeof(struct igb_rx_buffer) * rx_ring->count;
2774	rx_ring->rx_buffer_info = vzalloc_node(size, rx_ring->numa_node);
2775	if (!rx_ring->rx_buffer_info)
2776		rx_ring->rx_buffer_info = vzalloc(size);
2777	if (!rx_ring->rx_buffer_info)
2778		goto err;
2779
2780	desc_len = sizeof(union e1000_adv_rx_desc);
2781
2782	/* Round up to nearest 4K */
2783	rx_ring->size = rx_ring->count * desc_len;
2784	rx_ring->size = ALIGN(rx_ring->size, 4096);
2785
2786	set_dev_node(dev, rx_ring->numa_node);
2787	rx_ring->desc = dma_alloc_coherent(dev,
2788					   rx_ring->size,
2789					   &rx_ring->dma,
2790					   GFP_KERNEL);
2791	set_dev_node(dev, orig_node);
2792	if (!rx_ring->desc)
2793		rx_ring->desc = dma_alloc_coherent(dev,
2794						   rx_ring->size,
2795						   &rx_ring->dma,
2796						   GFP_KERNEL);
2797
2798	if (!rx_ring->desc)
2799		goto err;
2800
2801	rx_ring->next_to_clean = 0;
2802	rx_ring->next_to_use = 0;
2803
2804	return 0;
2805
2806err:
2807	vfree(rx_ring->rx_buffer_info);
2808	rx_ring->rx_buffer_info = NULL;
2809	dev_err(dev, "Unable to allocate memory for the receive descriptor"
2810		" ring\n");
2811	return -ENOMEM;
2812}
2813
2814/**
2815 * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2816 *				  (Descriptors) for all queues
2817 * @adapter: board private structure
2818 *
2819 * Return 0 on success, negative on failure
2820 **/
2821static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2822{
2823	struct pci_dev *pdev = adapter->pdev;
2824	int i, err = 0;
2825
2826	for (i = 0; i < adapter->num_rx_queues; i++) {
2827		err = igb_setup_rx_resources(adapter->rx_ring[i]);
2828		if (err) {
2829			dev_err(&pdev->dev,
2830				"Allocation for Rx Queue %u failed\n", i);
2831			for (i--; i >= 0; i--)
2832				igb_free_rx_resources(adapter->rx_ring[i]);
2833			break;
2834		}
2835	}
2836
2837	return err;
2838}
2839
2840/**
2841 * igb_setup_mrqc - configure the multiple receive queue control registers
2842 * @adapter: Board private structure
2843 **/
2844static void igb_setup_mrqc(struct igb_adapter *adapter)
2845{
2846	struct e1000_hw *hw = &adapter->hw;
2847	u32 mrqc, rxcsum;
2848	u32 j, num_rx_queues, shift = 0, shift2 = 0;
2849	union e1000_reta {
2850		u32 dword;
2851		u8  bytes[4];
2852	} reta;
2853	static const u8 rsshash[40] = {
2854		0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2855		0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2856		0xae, 0x7b, 0x30, 0xb4,	0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2857		0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2858
2859	/* Fill out hash function seeds */
2860	for (j = 0; j < 10; j++) {
2861		u32 rsskey = rsshash[(j * 4)];
2862		rsskey |= rsshash[(j * 4) + 1] << 8;
2863		rsskey |= rsshash[(j * 4) + 2] << 16;
2864		rsskey |= rsshash[(j * 4) + 3] << 24;
2865		array_wr32(E1000_RSSRK(0), j, rsskey);
2866	}
2867
2868	num_rx_queues = adapter->rss_queues;
2869
2870	if (adapter->vfs_allocated_count) {
2871		/* 82575 and 82576 supports 2 RSS queues for VMDq */
2872		switch (hw->mac.type) {
2873		case e1000_i350:
2874		case e1000_82580:
2875			num_rx_queues = 1;
2876			shift = 0;
2877			break;
2878		case e1000_82576:
2879			shift = 3;
2880			num_rx_queues = 2;
2881			break;
2882		case e1000_82575:
2883			shift = 2;
2884			shift2 = 6;
2885		default:
2886			break;
2887		}
2888	} else {
2889		if (hw->mac.type == e1000_82575)
2890			shift = 6;
2891	}
2892
2893	for (j = 0; j < (32 * 4); j++) {
2894		reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2895		if (shift2)
2896			reta.bytes[j & 3] |= num_rx_queues << shift2;
2897		if ((j & 3) == 3)
2898			wr32(E1000_RETA(j >> 2), reta.dword);
2899	}
2900
2901	/*
2902	 * Disable raw packet checksumming so that RSS hash is placed in
2903	 * descriptor on writeback.  No need to enable TCP/UDP/IP checksum
2904	 * offloads as they are enabled by default
2905	 */
2906	rxcsum = rd32(E1000_RXCSUM);
2907	rxcsum |= E1000_RXCSUM_PCSD;
2908
2909	if (adapter->hw.mac.type >= e1000_82576)
2910		/* Enable Receive Checksum Offload for SCTP */
2911		rxcsum |= E1000_RXCSUM_CRCOFL;
2912
2913	/* Don't need to set TUOFL or IPOFL, they default to 1 */
2914	wr32(E1000_RXCSUM, rxcsum);
2915
2916	/* If VMDq is enabled then we set the appropriate mode for that, else
2917	 * we default to RSS so that an RSS hash is calculated per packet even
2918	 * if we are only using one queue */
2919	if (adapter->vfs_allocated_count) {
2920		if (hw->mac.type > e1000_82575) {
2921			/* Set the default pool for the PF's first queue */
2922			u32 vtctl = rd32(E1000_VT_CTL);
2923			vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2924				   E1000_VT_CTL_DISABLE_DEF_POOL);
2925			vtctl |= adapter->vfs_allocated_count <<
2926				E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2927			wr32(E1000_VT_CTL, vtctl);
2928		}
2929		if (adapter->rss_queues > 1)
2930			mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2931		else
2932			mrqc = E1000_MRQC_ENABLE_VMDQ;
2933	} else {
2934		mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2935	}
2936	igb_vmm_control(adapter);
2937
2938	/*
2939	 * Generate RSS hash based on TCP port numbers and/or
2940	 * IPv4/v6 src and dst addresses since UDP cannot be
2941	 * hashed reliably due to IP fragmentation
2942	 */
2943	mrqc |= E1000_MRQC_RSS_FIELD_IPV4 |
2944		E1000_MRQC_RSS_FIELD_IPV4_TCP |
2945		E1000_MRQC_RSS_FIELD_IPV6 |
2946		E1000_MRQC_RSS_FIELD_IPV6_TCP |
2947		E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
2948
2949	wr32(E1000_MRQC, mrqc);
2950}
2951
2952/**
2953 * igb_setup_rctl - configure the receive control registers
2954 * @adapter: Board private structure
2955 **/
2956void igb_setup_rctl(struct igb_adapter *adapter)
2957{
2958	struct e1000_hw *hw = &adapter->hw;
2959	u32 rctl;
2960
2961	rctl = rd32(E1000_RCTL);
2962
2963	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2964	rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2965
2966	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2967		(hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2968
2969	/*
2970	 * enable stripping of CRC. It's unlikely this will break BMC
2971	 * redirection as it did with e1000. Newer features require
2972	 * that the HW strips the CRC.
2973	 */
2974	rctl |= E1000_RCTL_SECRC;
2975
2976	/* disable store bad packets and clear size bits. */
2977	rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
2978
2979	/* enable LPE to prevent packets larger than max_frame_size */
2980	rctl |= E1000_RCTL_LPE;
2981
2982	/* disable queue 0 to prevent tail write w/o re-config */
2983	wr32(E1000_RXDCTL(0), 0);
2984
2985	/* Attention!!!  For SR-IOV PF driver operations you must enable
2986	 * queue drop for all VF and PF queues to prevent head of line blocking
2987	 * if an un-trusted VF does not provide descriptors to hardware.
2988	 */
2989	if (adapter->vfs_allocated_count) {
2990		/* set all queue drop enable bits */
2991		wr32(E1000_QDE, ALL_QUEUES);
2992	}
2993
2994	wr32(E1000_RCTL, rctl);
2995}
2996
2997static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
2998                                   int vfn)
2999{
3000	struct e1000_hw *hw = &adapter->hw;
3001	u32 vmolr;
3002
3003	/* if it isn't the PF check to see if VFs are enabled and
3004	 * increase the size to support vlan tags */
3005	if (vfn < adapter->vfs_allocated_count &&
3006	    adapter->vf_data[vfn].vlans_enabled)
3007		size += VLAN_TAG_SIZE;
3008
3009	vmolr = rd32(E1000_VMOLR(vfn));
3010	vmolr &= ~E1000_VMOLR_RLPML_MASK;
3011	vmolr |= size | E1000_VMOLR_LPE;
3012	wr32(E1000_VMOLR(vfn), vmolr);
3013
3014	return 0;
3015}
3016
3017/**
3018 * igb_rlpml_set - set maximum receive packet size
3019 * @adapter: board private structure
3020 *
3021 * Configure maximum receivable packet size.
3022 **/
3023static void igb_rlpml_set(struct igb_adapter *adapter)
3024{
3025	u32 max_frame_size = adapter->max_frame_size;
3026	struct e1000_hw *hw = &adapter->hw;
3027	u16 pf_id = adapter->vfs_allocated_count;
3028
3029	if (pf_id) {
3030		igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
3031		/*
3032		 * If we're in VMDQ or SR-IOV mode, then set global RLPML
3033		 * to our max jumbo frame size, in case we need to enable
3034		 * jumbo frames on one of the rings later.
3035		 * This will not pass over-length frames into the default
3036		 * queue because it's gated by the VMOLR.RLPML.
3037		 */
3038		max_frame_size = MAX_JUMBO_FRAME_SIZE;
3039	}
3040
3041	wr32(E1000_RLPML, max_frame_size);
3042}
3043
3044static inline void igb_set_vmolr(struct igb_adapter *adapter,
3045				 int vfn, bool aupe)
3046{
3047	struct e1000_hw *hw = &adapter->hw;
3048	u32 vmolr;
3049
3050	/*
3051	 * This register exists only on 82576 and newer so if we are older then
3052	 * we should exit and do nothing
3053	 */
3054	if (hw->mac.type < e1000_82576)
3055		return;
3056
3057	vmolr = rd32(E1000_VMOLR(vfn));
3058	vmolr |= E1000_VMOLR_STRVLAN;      /* Strip vlan tags */
3059	if (aupe)
3060		vmolr |= E1000_VMOLR_AUPE;        /* Accept untagged packets */
3061	else
3062		vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
3063
3064	/* clear all bits that might not be set */
3065	vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
3066
3067	if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
3068		vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
3069	/*
3070	 * for VMDq only allow the VFs and pool 0 to accept broadcast and
3071	 * multicast packets
3072	 */
3073	if (vfn <= adapter->vfs_allocated_count)
3074		vmolr |= E1000_VMOLR_BAM;	   /* Accept broadcast */
3075
3076	wr32(E1000_VMOLR(vfn), vmolr);
3077}
3078
3079/**
3080 * igb_configure_rx_ring - Configure a receive ring after Reset
3081 * @adapter: board private structure
3082 * @ring: receive ring to be configured
3083 *
3084 * Configure the Rx unit of the MAC after a reset.
3085 **/
3086void igb_configure_rx_ring(struct igb_adapter *adapter,
3087                           struct igb_ring *ring)
3088{
3089	struct e1000_hw *hw = &adapter->hw;
3090	u64 rdba = ring->dma;
3091	int reg_idx = ring->reg_idx;
3092	u32 srrctl = 0, rxdctl = 0;
3093
3094	/* disable the queue */
3095	wr32(E1000_RXDCTL(reg_idx), 0);
3096
3097	/* Set DMA base address registers */
3098	wr32(E1000_RDBAL(reg_idx),
3099	     rdba & 0x00000000ffffffffULL);
3100	wr32(E1000_RDBAH(reg_idx), rdba >> 32);
3101	wr32(E1000_RDLEN(reg_idx),
3102	               ring->count * sizeof(union e1000_adv_rx_desc));
3103
3104	/* initialize head and tail */
3105	ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
3106	wr32(E1000_RDH(reg_idx), 0);
3107	writel(0, ring->tail);
3108
3109	/* set descriptor configuration */
3110	srrctl = IGB_RX_HDR_LEN << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
3111#if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
3112	srrctl |= IGB_RXBUFFER_16384 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3113#else
3114	srrctl |= (PAGE_SIZE / 2) >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3115#endif
3116	srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3117	if (hw->mac.type >= e1000_82580)
3118		srrctl |= E1000_SRRCTL_TIMESTAMP;
3119	/* Only set Drop Enable if we are supporting multiple queues */
3120	if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
3121		srrctl |= E1000_SRRCTL_DROP_EN;
3122
3123	wr32(E1000_SRRCTL(reg_idx), srrctl);
3124
3125	/* set filtering for VMDQ pools */
3126	igb_set_vmolr(adapter, reg_idx & 0x7, true);
3127
3128	rxdctl |= IGB_RX_PTHRESH;
3129	rxdctl |= IGB_RX_HTHRESH << 8;
3130	rxdctl |= IGB_RX_WTHRESH << 16;
3131
3132	/* enable receive descriptor fetching */
3133	rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
3134	wr32(E1000_RXDCTL(reg_idx), rxdctl);
3135}
3136
3137/**
3138 * igb_configure_rx - Configure receive Unit after Reset
3139 * @adapter: board private structure
3140 *
3141 * Configure the Rx unit of the MAC after a reset.
3142 **/
3143static void igb_configure_rx(struct igb_adapter *adapter)
3144{
3145	int i;
3146
3147	/* set UTA to appropriate mode */
3148	igb_set_uta(adapter);
3149
3150	/* set the correct pool for the PF default MAC address in entry 0 */
3151	igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
3152	                 adapter->vfs_allocated_count);
3153
3154	/* Setup the HW Rx Head and Tail Descriptor Pointers and
3155	 * the Base and Length of the Rx Descriptor Ring */
3156	for (i = 0; i < adapter->num_rx_queues; i++)
3157		igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
3158}
3159
3160/**
3161 * igb_free_tx_resources - Free Tx Resources per Queue
3162 * @tx_ring: Tx descriptor ring for a specific queue
3163 *
3164 * Free all transmit software resources
3165 **/
3166void igb_free_tx_resources(struct igb_ring *tx_ring)
3167{
3168	igb_clean_tx_ring(tx_ring);
3169
3170	vfree(tx_ring->tx_buffer_info);
3171	tx_ring->tx_buffer_info = NULL;
3172
3173	/* if not set, then don't free */
3174	if (!tx_ring->desc)
3175		return;
3176
3177	dma_free_coherent(tx_ring->dev, tx_ring->size,
3178			  tx_ring->desc, tx_ring->dma);
3179
3180	tx_ring->desc = NULL;
3181}
3182
3183/**
3184 * igb_free_all_tx_resources - Free Tx Resources for All Queues
3185 * @adapter: board private structure
3186 *
3187 * Free all transmit software resources
3188 **/
3189static void igb_free_all_tx_resources(struct igb_adapter *adapter)
3190{
3191	int i;
3192
3193	for (i = 0; i < adapter->num_tx_queues; i++)
3194		igb_free_tx_resources(adapter->tx_ring[i]);
3195}
3196
3197void igb_unmap_and_free_tx_resource(struct igb_ring *ring,
3198				    struct igb_tx_buffer *tx_buffer)
3199{
3200	if (tx_buffer->skb) {
3201		dev_kfree_skb_any(tx_buffer->skb);
3202		if (tx_buffer->dma)
3203			dma_unmap_single(ring->dev,
3204					 tx_buffer->dma,
3205					 tx_buffer->length,
3206					 DMA_TO_DEVICE);
3207	} else if (tx_buffer->dma) {
3208		dma_unmap_page(ring->dev,
3209			       tx_buffer->dma,
3210			       tx_buffer->length,
3211			       DMA_TO_DEVICE);
3212	}
3213	tx_buffer->next_to_watch = NULL;
3214	tx_buffer->skb = NULL;
3215	tx_buffer->dma = 0;
3216	/* buffer_info must be completely set up in the transmit path */
3217}
3218
3219/**
3220 * igb_clean_tx_ring - Free Tx Buffers
3221 * @tx_ring: ring to be cleaned
3222 **/
3223static void igb_clean_tx_ring(struct igb_ring *tx_ring)
3224{
3225	struct igb_tx_buffer *buffer_info;
3226	unsigned long size;
3227	u16 i;
3228
3229	if (!tx_ring->tx_buffer_info)
3230		return;
3231	/* Free all the Tx ring sk_buffs */
3232
3233	for (i = 0; i < tx_ring->count; i++) {
3234		buffer_info = &tx_ring->tx_buffer_info[i];
3235		igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3236	}
3237
3238	size = sizeof(struct igb_tx_buffer) * tx_ring->count;
3239	memset(tx_ring->tx_buffer_info, 0, size);
3240
3241	/* Zero out the descriptor ring */
3242	memset(tx_ring->desc, 0, tx_ring->size);
3243
3244	tx_ring->next_to_use = 0;
3245	tx_ring->next_to_clean = 0;
3246}
3247
3248/**
3249 * igb_clean_all_tx_rings - Free Tx Buffers for all queues
3250 * @adapter: board private structure
3251 **/
3252static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
3253{
3254	int i;
3255
3256	for (i = 0; i < adapter->num_tx_queues; i++)
3257		igb_clean_tx_ring(adapter->tx_ring[i]);
3258}
3259
3260/**
3261 * igb_free_rx_resources - Free Rx Resources
3262 * @rx_ring: ring to clean the resources from
3263 *
3264 * Free all receive software resources
3265 **/
3266void igb_free_rx_resources(struct igb_ring *rx_ring)
3267{
3268	igb_clean_rx_ring(rx_ring);
3269
3270	vfree(rx_ring->rx_buffer_info);
3271	rx_ring->rx_buffer_info = NULL;
3272
3273	/* if not set, then don't free */
3274	if (!rx_ring->desc)
3275		return;
3276
3277	dma_free_coherent(rx_ring->dev, rx_ring->size,
3278			  rx_ring->desc, rx_ring->dma);
3279
3280	rx_ring->desc = NULL;
3281}
3282
3283/**
3284 * igb_free_all_rx_resources - Free Rx Resources for All Queues
3285 * @adapter: board private structure
3286 *
3287 * Free all receive software resources
3288 **/
3289static void igb_free_all_rx_resources(struct igb_adapter *adapter)
3290{
3291	int i;
3292
3293	for (i = 0; i < adapter->num_rx_queues; i++)
3294		igb_free_rx_resources(adapter->rx_ring[i]);
3295}
3296
3297/**
3298 * igb_clean_rx_ring - Free Rx Buffers per Queue
3299 * @rx_ring: ring to free buffers from
3300 **/
3301static void igb_clean_rx_ring(struct igb_ring *rx_ring)
3302{
3303	unsigned long size;
3304	u16 i;
3305
3306	if (!rx_ring->rx_buffer_info)
3307		return;
3308
3309	/* Free all the Rx ring sk_buffs */
3310	for (i = 0; i < rx_ring->count; i++) {
3311		struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
3312		if (buffer_info->dma) {
3313			dma_unmap_single(rx_ring->dev,
3314			                 buffer_info->dma,
3315					 IGB_RX_HDR_LEN,
3316					 DMA_FROM_DEVICE);
3317			buffer_info->dma = 0;
3318		}
3319
3320		if (buffer_info->skb) {
3321			dev_kfree_skb(buffer_info->skb);
3322			buffer_info->skb = NULL;
3323		}
3324		if (buffer_info->page_dma) {
3325			dma_unmap_page(rx_ring->dev,
3326			               buffer_info->page_dma,
3327				       PAGE_SIZE / 2,
3328				       DMA_FROM_DEVICE);
3329			buffer_info->page_dma = 0;
3330		}
3331		if (buffer_info->page) {
3332			put_page(buffer_info->page);
3333			buffer_info->page = NULL;
3334			buffer_info->page_offset = 0;
3335		}
3336	}
3337
3338	size = sizeof(struct igb_rx_buffer) * rx_ring->count;
3339	memset(rx_ring->rx_buffer_info, 0, size);
3340
3341	/* Zero out the descriptor ring */
3342	memset(rx_ring->desc, 0, rx_ring->size);
3343
3344	rx_ring->next_to_clean = 0;
3345	rx_ring->next_to_use = 0;
3346}
3347
3348/**
3349 * igb_clean_all_rx_rings - Free Rx Buffers for all queues
3350 * @adapter: board private structure
3351 **/
3352static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
3353{
3354	int i;
3355
3356	for (i = 0; i < adapter->num_rx_queues; i++)
3357		igb_clean_rx_ring(adapter->rx_ring[i]);
3358}
3359
3360/**
3361 * igb_set_mac - Change the Ethernet Address of the NIC
3362 * @netdev: network interface device structure
3363 * @p: pointer to an address structure
3364 *
3365 * Returns 0 on success, negative on failure
3366 **/
3367static int igb_set_mac(struct net_device *netdev, void *p)
3368{
3369	struct igb_adapter *adapter = netdev_priv(netdev);
3370	struct e1000_hw *hw = &adapter->hw;
3371	struct sockaddr *addr = p;
3372
3373	if (!is_valid_ether_addr(addr->sa_data))
3374		return -EADDRNOTAVAIL;
3375
3376	memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
3377	memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
3378
3379	/* set the correct pool for the new PF MAC address in entry 0 */
3380	igb_rar_set_qsel(adapter, hw->mac.addr, 0,
3381	                 adapter->vfs_allocated_count);
3382
3383	return 0;
3384}
3385
3386/**
3387 * igb_write_mc_addr_list - write multicast addresses to MTA
3388 * @netdev: network interface device structure
3389 *
3390 * Writes multicast address list to the MTA hash table.
3391 * Returns: -ENOMEM on failure
3392 *                0 on no addresses written
3393 *                X on writing X addresses to MTA
3394 **/
3395static int igb_write_mc_addr_list(struct net_device *netdev)
3396{
3397	struct igb_adapter *adapter = netdev_priv(netdev);
3398	struct e1000_hw *hw = &adapter->hw;
3399	struct netdev_hw_addr *ha;
3400	u8  *mta_list;
3401	int i;
3402
3403	if (netdev_mc_empty(netdev)) {
3404		/* nothing to program, so clear mc list */
3405		igb_update_mc_addr_list(hw, NULL, 0);
3406		igb_restore_vf_multicasts(adapter);
3407		return 0;
3408	}
3409
3410	mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
3411	if (!mta_list)
3412		return -ENOMEM;
3413
3414	/* The shared function expects a packed array of only addresses. */
3415	i = 0;
3416	netdev_for_each_mc_addr(ha, netdev)
3417		memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
3418
3419	igb_update_mc_addr_list(hw, mta_list, i);
3420	kfree(mta_list);
3421
3422	return netdev_mc_count(netdev);
3423}
3424
3425/**
3426 * igb_write_uc_addr_list - write unicast addresses to RAR table
3427 * @netdev: network interface device structure
3428 *
3429 * Writes unicast address list to the RAR table.
3430 * Returns: -ENOMEM on failure/insufficient address space
3431 *                0 on no addresses written
3432 *                X on writing X addresses to the RAR table
3433 **/
3434static int igb_write_uc_addr_list(struct net_device *netdev)
3435{
3436	struct igb_adapter *adapter = netdev_priv(netdev);
3437	struct e1000_hw *hw = &adapter->hw;
3438	unsigned int vfn = adapter->vfs_allocated_count;
3439	unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
3440	int count = 0;
3441
3442	/* return ENOMEM indicating insufficient memory for addresses */
3443	if (netdev_uc_count(netdev) > rar_entries)
3444		return -ENOMEM;
3445
3446	if (!netdev_uc_empty(netdev) && rar_entries) {
3447		struct netdev_hw_addr *ha;
3448
3449		netdev_for_each_uc_addr(ha, netdev) {
3450			if (!rar_entries)
3451				break;
3452			igb_rar_set_qsel(adapter, ha->addr,
3453			                 rar_entries--,
3454			                 vfn);
3455			count++;
3456		}
3457	}
3458	/* write the addresses in reverse order to avoid write combining */
3459	for (; rar_entries > 0 ; rar_entries--) {
3460		wr32(E1000_RAH(rar_entries), 0);
3461		wr32(E1000_RAL(rar_entries), 0);
3462	}
3463	wrfl();
3464
3465	return count;
3466}
3467
3468/**
3469 * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
3470 * @netdev: network interface device structure
3471 *
3472 * The set_rx_mode entry point is called whenever the unicast or multicast
3473 * address lists or the network interface flags are updated.  This routine is
3474 * responsible for configuring the hardware for proper unicast, multicast,
3475 * promiscuous mode, and all-multi behavior.
3476 **/
3477static void igb_set_rx_mode(struct net_device *netdev)
3478{
3479	struct igb_adapter *adapter = netdev_priv(netdev);
3480	struct e1000_hw *hw = &adapter->hw;
3481	unsigned int vfn = adapter->vfs_allocated_count;
3482	u32 rctl, vmolr = 0;
3483	int count;
3484
3485	/* Check for Promiscuous and All Multicast modes */
3486	rctl = rd32(E1000_RCTL);
3487
3488	/* clear the effected bits */
3489	rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
3490
3491	if (netdev->flags & IFF_PROMISC) {
3492		rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
3493		vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
3494	} else {
3495		if (netdev->flags & IFF_ALLMULTI) {
3496			rctl |= E1000_RCTL_MPE;
3497			vmolr |= E1000_VMOLR_MPME;
3498		} else {
3499			/*
3500			 * Write addresses to the MTA, if the attempt fails
3501			 * then we should just turn on promiscuous mode so
3502			 * that we can at least receive multicast traffic
3503			 */
3504			count = igb_write_mc_addr_list(netdev);
3505			if (count < 0) {
3506				rctl |= E1000_RCTL_MPE;
3507				vmolr |= E1000_VMOLR_MPME;
3508			} else if (count) {
3509				vmolr |= E1000_VMOLR_ROMPE;
3510			}
3511		}
3512		/*
3513		 * Write addresses to available RAR registers, if there is not
3514		 * sufficient space to store all the addresses then enable
3515		 * unicast promiscuous mode
3516		 */
3517		count = igb_write_uc_addr_list(netdev);
3518		if (count < 0) {
3519			rctl |= E1000_RCTL_UPE;
3520			vmolr |= E1000_VMOLR_ROPE;
3521		}
3522		rctl |= E1000_RCTL_VFE;
3523	}
3524	wr32(E1000_RCTL, rctl);
3525
3526	/*
3527	 * In order to support SR-IOV and eventually VMDq it is necessary to set
3528	 * the VMOLR to enable the appropriate modes.  Without this workaround
3529	 * we will have issues with VLAN tag stripping not being done for frames
3530	 * that are only arriving because we are the default pool
3531	 */
3532	if (hw->mac.type < e1000_82576)
3533		return;
3534
3535	vmolr |= rd32(E1000_VMOLR(vfn)) &
3536	         ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
3537	wr32(E1000_VMOLR(vfn), vmolr);
3538	igb_restore_vf_multicasts(adapter);
3539}
3540
3541static void igb_check_wvbr(struct igb_adapter *adapter)
3542{
3543	struct e1000_hw *hw = &adapter->hw;
3544	u32 wvbr = 0;
3545
3546	switch (hw->mac.type) {
3547	case e1000_82576:
3548	case e1000_i350:
3549		if (!(wvbr = rd32(E1000_WVBR)))
3550			return;
3551		break;
3552	default:
3553		break;
3554	}
3555
3556	adapter->wvbr |= wvbr;
3557}
3558
3559#define IGB_STAGGERED_QUEUE_OFFSET 8
3560
3561static void igb_spoof_check(struct igb_adapter *adapter)
3562{
3563	int j;
3564
3565	if (!adapter->wvbr)
3566		return;
3567
3568	for(j = 0; j < adapter->vfs_allocated_count; j++) {
3569		if (adapter->wvbr & (1 << j) ||
3570		    adapter->wvbr & (1 << (j + IGB_STAGGERED_QUEUE_OFFSET))) {
3571			dev_warn(&adapter->pdev->dev,
3572				"Spoof event(s) detected on VF %d\n", j);
3573			adapter->wvbr &=
3574				~((1 << j) |
3575				  (1 << (j + IGB_STAGGERED_QUEUE_OFFSET)));
3576		}
3577	}
3578}
3579
3580/* Need to wait a few seconds after link up to get diagnostic information from
3581 * the phy */
3582static void igb_update_phy_info(unsigned long data)
3583{
3584	struct igb_adapter *adapter = (struct igb_adapter *) data;
3585	igb_get_phy_info(&adapter->hw);
3586}
3587
3588/**
3589 * igb_has_link - check shared code for link and determine up/down
3590 * @adapter: pointer to driver private info
3591 **/
3592bool igb_has_link(struct igb_adapter *adapter)
3593{
3594	struct e1000_hw *hw = &adapter->hw;
3595	bool link_active = false;
3596	s32 ret_val = 0;
3597
3598	/* get_link_status is set on LSC (link status) interrupt or
3599	 * rx sequence error interrupt.  get_link_status will stay
3600	 * false until the e1000_check_for_link establishes link
3601	 * for copper adapters ONLY
3602	 */
3603	switch (hw->phy.media_type) {
3604	case e1000_media_type_copper:
3605		if (hw->mac.get_link_status) {
3606			ret_val = hw->mac.ops.check_for_link(hw);
3607			link_active = !hw->mac.get_link_status;
3608		} else {
3609			link_active = true;
3610		}
3611		break;
3612	case e1000_media_type_internal_serdes:
3613		ret_val = hw->mac.ops.check_for_link(hw);
3614		link_active = hw->mac.serdes_has_link;
3615		break;
3616	default:
3617	case e1000_media_type_unknown:
3618		break;
3619	}
3620
3621	return link_active;
3622}
3623
3624static bool igb_thermal_sensor_event(struct e1000_hw *hw, u32 event)
3625{
3626	bool ret = false;
3627	u32 ctrl_ext, thstat;
3628
3629	/* check for thermal sensor event on i350, copper only */
3630	if (hw->mac.type == e1000_i350) {
3631		thstat = rd32(E1000_THSTAT);
3632		ctrl_ext = rd32(E1000_CTRL_EXT);
3633
3634		if ((hw->phy.media_type == e1000_media_type_copper) &&
3635		    !(ctrl_ext & E1000_CTRL_EXT_LINK_MODE_SGMII)) {
3636			ret = !!(thstat & event);
3637		}
3638	}
3639
3640	return ret;
3641}
3642
3643/**
3644 * igb_watchdog - Timer Call-back
3645 * @data: pointer to adapter cast into an unsigned long
3646 **/
3647static void igb_watchdog(unsigned long data)
3648{
3649	struct igb_adapter *adapter = (struct igb_adapter *)data;
3650	/* Do the rest outside of interrupt context */
3651	schedule_work(&adapter->watchdog_task);
3652}
3653
3654static void igb_watchdog_task(struct work_struct *work)
3655{
3656	struct igb_adapter *adapter = container_of(work,
3657	                                           struct igb_adapter,
3658                                                   watchdog_task);
3659	struct e1000_hw *hw = &adapter->hw;
3660	struct net_device *netdev = adapter->netdev;
3661	u32 link;
3662	int i;
3663
3664	link = igb_has_link(adapter);
3665	if (link) {
3666		if (!netif_carrier_ok(netdev)) {
3667			u32 ctrl;
3668			hw->mac.ops.get_speed_and_duplex(hw,
3669			                                 &adapter->link_speed,
3670			                                 &adapter->link_duplex);
3671
3672			ctrl = rd32(E1000_CTRL);
3673			/* Links status message must follow this format */
3674			printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s, "
3675				 "Flow Control: %s\n",
3676			       netdev->name,
3677			       adapter->link_speed,
3678			       adapter->link_duplex == FULL_DUPLEX ?
3679				 "Full Duplex" : "Half Duplex",
3680			       ((ctrl & E1000_CTRL_TFCE) &&
3681			        (ctrl & E1000_CTRL_RFCE)) ? "RX/TX" :
3682			       ((ctrl & E1000_CTRL_RFCE) ?  "RX" :
3683			       ((ctrl & E1000_CTRL_TFCE) ?  "TX" : "None")));
3684
3685			/* check for thermal sensor event */
3686			if (igb_thermal_sensor_event(hw, E1000_THSTAT_LINK_THROTTLE)) {
3687				printk(KERN_INFO "igb: %s The network adapter "
3688						 "link speed was downshifted "
3689						 "because it overheated.\n",
3690						 netdev->name);
3691			}
3692
3693			/* adjust timeout factor according to speed/duplex */
3694			adapter->tx_timeout_factor = 1;
3695			switch (adapter->link_speed) {
3696			case SPEED_10:
3697				adapter->tx_timeout_factor = 14;
3698				break;
3699			case SPEED_100:
3700				/* maybe add some timeout factor ? */
3701				break;
3702			}
3703
3704			netif_carrier_on(netdev);
3705
3706			igb_ping_all_vfs(adapter);
3707			igb_check_vf_rate_limit(adapter);
3708
3709			/* link state has changed, schedule phy info update */
3710			if (!test_bit(__IGB_DOWN, &adapter->state))
3711				mod_timer(&adapter->phy_info_timer,
3712					  round_jiffies(jiffies + 2 * HZ));
3713		}
3714	} else {
3715		if (netif_carrier_ok(netdev)) {
3716			adapter->link_speed = 0;
3717			adapter->link_duplex = 0;
3718
3719			/* check for thermal sensor event */
3720			if (igb_thermal_sensor_event(hw, E1000_THSTAT_PWR_DOWN)) {
3721				printk(KERN_ERR "igb: %s The network adapter "
3722						"was stopped because it "
3723						"overheated.\n",
3724						netdev->name);
3725			}
3726
3727			/* Links status message must follow this format */
3728			printk(KERN_INFO "igb: %s NIC Link is Down\n",
3729			       netdev->name);
3730			netif_carrier_off(netdev);
3731
3732			igb_ping_all_vfs(adapter);
3733
3734			/* link state has changed, schedule phy info update */
3735			if (!test_bit(__IGB_DOWN, &adapter->state))
3736				mod_timer(&adapter->phy_info_timer,
3737					  round_jiffies(jiffies + 2 * HZ));
3738		}
3739	}
3740
3741	spin_lock(&adapter->stats64_lock);
3742	igb_update_stats(adapter, &adapter->stats64);
3743	spin_unlock(&adapter->stats64_lock);
3744
3745	for (i = 0; i < adapter->num_tx_queues; i++) {
3746		struct igb_ring *tx_ring = adapter->tx_ring[i];
3747		if (!netif_carrier_ok(netdev)) {
3748			/* We've lost link, so the controller stops DMA,
3749			 * but we've got queued Tx work that's never going
3750			 * to get done, so reset controller to flush Tx.
3751			 * (Do the reset outside of interrupt context). */
3752			if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3753				adapter->tx_timeout_count++;
3754				schedule_work(&adapter->reset_task);
3755				/* return immediately since reset is imminent */
3756				return;
3757			}
3758		}
3759
3760		/* Force detection of hung controller every watchdog period */
3761		set_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
3762	}
3763
3764	/* Cause software interrupt to ensure rx ring is cleaned */
3765	if (adapter->msix_entries) {
3766		u32 eics = 0;
3767		for (i = 0; i < adapter->num_q_vectors; i++)
3768			eics |= adapter->q_vector[i]->eims_value;
3769		wr32(E1000_EICS, eics);
3770	} else {
3771		wr32(E1000_ICS, E1000_ICS_RXDMT0);
3772	}
3773
3774	igb_spoof_check(adapter);
3775
3776	/* Reset the timer */
3777	if (!test_bit(__IGB_DOWN, &adapter->state))
3778		mod_timer(&adapter->watchdog_timer,
3779			  round_jiffies(jiffies + 2 * HZ));
3780}
3781
3782enum latency_range {
3783	lowest_latency = 0,
3784	low_latency = 1,
3785	bulk_latency = 2,
3786	latency_invalid = 255
3787};
3788
3789/**
3790 * igb_update_ring_itr - update the dynamic ITR value based on packet size
3791 *
3792 *      Stores a new ITR value based on strictly on packet size.  This
3793 *      algorithm is less sophisticated than that used in igb_update_itr,
3794 *      due to the difficulty of synchronizing statistics across multiple
3795 *      receive rings.  The divisors and thresholds used by this function
3796 *      were determined based on theoretical maximum wire speed and testing
3797 *      data, in order to minimize response time while increasing bulk
3798 *      throughput.
3799 *      This functionality is controlled by the InterruptThrottleRate module
3800 *      parameter (see igb_param.c)
3801 *      NOTE:  This function is called only when operating in a multiqueue
3802 *             receive environment.
3803 * @q_vector: pointer to q_vector
3804 **/
3805static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3806{
3807	int new_val = q_vector->itr_val;
3808	int avg_wire_size = 0;
3809	struct igb_adapter *adapter = q_vector->adapter;
3810	unsigned int packets;
3811
3812	/* For non-gigabit speeds, just fix the interrupt rate at 4000
3813	 * ints/sec - ITR timer value of 120 ticks.
3814	 */
3815	if (adapter->link_speed != SPEED_1000) {
3816		new_val = IGB_4K_ITR;
3817		goto set_itr_val;
3818	}
3819
3820	packets = q_vector->rx.total_packets;
3821	if (packets)
3822		avg_wire_size = q_vector->rx.total_bytes / packets;
3823
3824	packets = q_vector->tx.total_packets;
3825	if (packets)
3826		avg_wire_size = max_t(u32, avg_wire_size,
3827				      q_vector->tx.total_bytes / packets);
3828
3829	/* if avg_wire_size isn't set no work was done */
3830	if (!avg_wire_size)
3831		goto clear_counts;
3832
3833	/* Add 24 bytes to size to account for CRC, preamble, and gap */
3834	avg_wire_size += 24;
3835
3836	/* Don't starve jumbo frames */
3837	avg_wire_size = min(avg_wire_size, 3000);
3838
3839	/* Give a little boost to mid-size frames */
3840	if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3841		new_val = avg_wire_size / 3;
3842	else
3843		new_val = avg_wire_size / 2;
3844
3845	/* conservative mode (itr 3) eliminates the lowest_latency setting */
3846	if (new_val < IGB_20K_ITR &&
3847	    ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
3848	     (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
3849		new_val = IGB_20K_ITR;
3850
3851set_itr_val:
3852	if (new_val != q_vector->itr_val) {
3853		q_vector->itr_val = new_val;
3854		q_vector->set_itr = 1;
3855	}
3856clear_counts:
3857	q_vector->rx.total_bytes = 0;
3858	q_vector->rx.total_packets = 0;
3859	q_vector->tx.total_bytes = 0;
3860	q_vector->tx.total_packets = 0;
3861}
3862
3863/**
3864 * igb_update_itr - update the dynamic ITR value based on statistics
3865 *      Stores a new ITR value based on packets and byte
3866 *      counts during the last interrupt.  The advantage of per interrupt
3867 *      computation is faster updates and more accurate ITR for the current
3868 *      traffic pattern.  Constants in this function were computed
3869 *      based on theoretical maximum wire speed and thresholds were set based
3870 *      on testing data as well as attempting to minimize response time
3871 *      while increasing bulk throughput.
3872 *      this functionality is controlled by the InterruptThrottleRate module
3873 *      parameter (see igb_param.c)
3874 *      NOTE:  These calculations are only valid when operating in a single-
3875 *             queue environment.
3876 * @q_vector: pointer to q_vector
3877 * @ring_container: ring info to update the itr for
3878 **/
3879static void igb_update_itr(struct igb_q_vector *q_vector,
3880			   struct igb_ring_container *ring_container)
3881{
3882	unsigned int packets = ring_container->total_packets;
3883	unsigned int bytes = ring_container->total_bytes;
3884	u8 itrval = ring_container->itr;
3885
3886	/* no packets, exit with status unchanged */
3887	if (packets == 0)
3888		return;
3889
3890	switch (itrval) {
3891	case lowest_latency:
3892		/* handle TSO and jumbo frames */
3893		if (bytes/packets > 8000)
3894			itrval = bulk_latency;
3895		else if ((packets < 5) && (bytes > 512))
3896			itrval = low_latency;
3897		break;
3898	case low_latency:  /* 50 usec aka 20000 ints/s */
3899		if (bytes > 10000) {
3900			/* this if handles the TSO accounting */
3901			if (bytes/packets > 8000) {
3902				itrval = bulk_latency;
3903			} else if ((packets < 10) || ((bytes/packets) > 1200)) {
3904				itrval = bulk_latency;
3905			} else if ((packets > 35)) {
3906				itrval = lowest_latency;
3907			}
3908		} else if (bytes/packets > 2000) {
3909			itrval = bulk_latency;
3910		} else if (packets <= 2 && bytes < 512) {
3911			itrval = lowest_latency;
3912		}
3913		break;
3914	case bulk_latency: /* 250 usec aka 4000 ints/s */
3915		if (bytes > 25000) {
3916			if (packets > 35)
3917				itrval = low_latency;
3918		} else if (bytes < 1500) {
3919			itrval = low_latency;
3920		}
3921		break;
3922	}
3923
3924	/* clear work counters since we have the values we need */
3925	ring_container->total_bytes = 0;
3926	ring_container->total_packets = 0;
3927
3928	/* write updated itr to ring container */
3929	ring_container->itr = itrval;
3930}
3931
3932static void igb_set_itr(struct igb_q_vector *q_vector)
3933{
3934	struct igb_adapter *adapter = q_vector->adapter;
3935	u32 new_itr = q_vector->itr_val;
3936	u8 current_itr = 0;
3937
3938	/* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3939	if (adapter->link_speed != SPEED_1000) {
3940		current_itr = 0;
3941		new_itr = IGB_4K_ITR;
3942		goto set_itr_now;
3943	}
3944
3945	igb_update_itr(q_vector, &q_vector->tx);
3946	igb_update_itr(q_vector, &q_vector->rx);
3947
3948	current_itr = max(q_vector->rx.itr, q_vector->tx.itr);
3949
3950	/* conservative mode (itr 3) eliminates the lowest_latency setting */
3951	if (current_itr == lowest_latency &&
3952	    ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
3953	     (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
3954		current_itr = low_latency;
3955
3956	switch (current_itr) {
3957	/* counts and packets in update_itr are dependent on these numbers */
3958	case lowest_latency:
3959		new_itr = IGB_70K_ITR; /* 70,000 ints/sec */
3960		break;
3961	case low_latency:
3962		new_itr = IGB_20K_ITR; /* 20,000 ints/sec */
3963		break;
3964	case bulk_latency:
3965		new_itr = IGB_4K_ITR;  /* 4,000 ints/sec */
3966		break;
3967	default:
3968		break;
3969	}
3970
3971set_itr_now:
3972	if (new_itr != q_vector->itr_val) {
3973		/* this attempts to bias the interrupt rate towards Bulk
3974		 * by adding intermediate steps when interrupt rate is
3975		 * increasing */
3976		new_itr = new_itr > q_vector->itr_val ?
3977		             max((new_itr * q_vector->itr_val) /
3978		                 (new_itr + (q_vector->itr_val >> 2)),
3979				 new_itr) :
3980			     new_itr;
3981		/* Don't write the value here; it resets the adapter's
3982		 * internal timer, and causes us to delay far longer than
3983		 * we should between interrupts.  Instead, we write the ITR
3984		 * value at the beginning of the next interrupt so the timing
3985		 * ends up being correct.
3986		 */
3987		q_vector->itr_val = new_itr;
3988		q_vector->set_itr = 1;
3989	}
3990}
3991
3992void igb_tx_ctxtdesc(struct igb_ring *tx_ring, u32 vlan_macip_lens,
3993		     u32 type_tucmd, u32 mss_l4len_idx)
3994{
3995	struct e1000_adv_tx_context_desc *context_desc;
3996	u16 i = tx_ring->next_to_use;
3997
3998	context_desc = IGB_TX_CTXTDESC(tx_ring, i);
3999
4000	i++;
4001	tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
4002
4003	/* set bits to identify this as an advanced context descriptor */
4004	type_tucmd |= E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
4005
4006	/* For 82575, context index must be unique per ring. */
4007	if (test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
4008		mss_l4len_idx |= tx_ring->reg_idx << 4;
4009
4010	context_desc->vlan_macip_lens	= cpu_to_le32(vlan_macip_lens);
4011	context_desc->seqnum_seed	= 0;
4012	context_desc->type_tucmd_mlhl	= cpu_to_le32(type_tucmd);
4013	context_desc->mss_l4len_idx	= cpu_to_le32(mss_l4len_idx);
4014}
4015
4016static int igb_tso(struct igb_ring *tx_ring,
4017		   struct igb_tx_buffer *first,
4018		   u8 *hdr_len)
4019{
4020	struct sk_buff *skb = first->skb;
4021	u32 vlan_macip_lens, type_tucmd;
4022	u32 mss_l4len_idx, l4len;
4023
4024	if (!skb_is_gso(skb))
4025		return 0;
4026
4027	if (skb_header_cloned(skb)) {
4028		int err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
4029		if (err)
4030			return err;
4031	}
4032
4033	/* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
4034	type_tucmd = E1000_ADVTXD_TUCMD_L4T_TCP;
4035
4036	if (first->protocol == __constant_htons(ETH_P_IP)) {
4037		struct iphdr *iph = ip_hdr(skb);
4038		iph->tot_len = 0;
4039		iph->check = 0;
4040		tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
4041							 iph->daddr, 0,
4042							 IPPROTO_TCP,
4043							 0);
4044		type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
4045		first->tx_flags |= IGB_TX_FLAGS_TSO |
4046				   IGB_TX_FLAGS_CSUM |
4047				   IGB_TX_FLAGS_IPV4;
4048	} else if (skb_is_gso_v6(skb)) {
4049		ipv6_hdr(skb)->payload_len = 0;
4050		tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
4051						       &ipv6_hdr(skb)->daddr,
4052						       0, IPPROTO_TCP, 0);
4053		first->tx_flags |= IGB_TX_FLAGS_TSO |
4054				   IGB_TX_FLAGS_CSUM;
4055	}
4056
4057	/* compute header lengths */
4058	l4len = tcp_hdrlen(skb);
4059	*hdr_len = skb_transport_offset(skb) + l4len;
4060
4061	/* update gso size and bytecount with header size */
4062	first->gso_segs = skb_shinfo(skb)->gso_segs;
4063	first->bytecount += (first->gso_segs - 1) * *hdr_len;
4064
4065	/* MSS L4LEN IDX */
4066	mss_l4len_idx = l4len << E1000_ADVTXD_L4LEN_SHIFT;
4067	mss_l4len_idx |= skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT;
4068
4069	/* VLAN MACLEN IPLEN */
4070	vlan_macip_lens = skb_network_header_len(skb);
4071	vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
4072	vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
4073
4074	igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
4075
4076	return 1;
4077}
4078
4079static void igb_tx_csum(struct igb_ring *tx_ring, struct igb_tx_buffer *first)
4080{
4081	struct sk_buff *skb = first->skb;
4082	u32 vlan_macip_lens = 0;
4083	u32 mss_l4len_idx = 0;
4084	u32 type_tucmd = 0;
4085
4086	if (skb->ip_summed != CHECKSUM_PARTIAL) {
4087		if (!(first->tx_flags & IGB_TX_FLAGS_VLAN))
4088			return;
4089	} else {
4090		u8 l4_hdr = 0;
4091		switch (first->protocol) {
4092		case __constant_htons(ETH_P_IP):
4093			vlan_macip_lens |= skb_network_header_len(skb);
4094			type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
4095			l4_hdr = ip_hdr(skb)->protocol;
4096			break;
4097		case __constant_htons(ETH_P_IPV6):
4098			vlan_macip_lens |= skb_network_header_len(skb);
4099			l4_hdr = ipv6_hdr(skb)->nexthdr;
4100			break;
4101		default:
4102			if (unlikely(net_ratelimit())) {
4103				dev_warn(tx_ring->dev,
4104				 "partial checksum but proto=%x!\n",
4105				 first->protocol);
4106			}
4107			break;
4108		}
4109
4110		switch (l4_hdr) {
4111		case IPPROTO_TCP:
4112			type_tucmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
4113			mss_l4len_idx = tcp_hdrlen(skb) <<
4114					E1000_ADVTXD_L4LEN_SHIFT;
4115			break;
4116		case IPPROTO_SCTP:
4117			type_tucmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
4118			mss_l4len_idx = sizeof(struct sctphdr) <<
4119					E1000_ADVTXD_L4LEN_SHIFT;
4120			break;
4121		case IPPROTO_UDP:
4122			mss_l4len_idx = sizeof(struct udphdr) <<
4123					E1000_ADVTXD_L4LEN_SHIFT;
4124			break;
4125		default:
4126			if (unlikely(net_ratelimit())) {
4127				dev_warn(tx_ring->dev,
4128				 "partial checksum but l4 proto=%x!\n",
4129				 l4_hdr);
4130			}
4131			break;
4132		}
4133
4134		/* update TX checksum flag */
4135		first->tx_flags |= IGB_TX_FLAGS_CSUM;
4136	}
4137
4138	vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
4139	vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
4140
4141	igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
4142}
4143
4144static __le32 igb_tx_cmd_type(u32 tx_flags)
4145{
4146	/* set type for advanced descriptor with frame checksum insertion */
4147	__le32 cmd_type = cpu_to_le32(E1000_ADVTXD_DTYP_DATA |
4148				      E1000_ADVTXD_DCMD_IFCS |
4149				      E1000_ADVTXD_DCMD_DEXT);
4150
4151	/* set HW vlan bit if vlan is present */
4152	if (tx_flags & IGB_TX_FLAGS_VLAN)
4153		cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_VLE);
4154
4155	/* set timestamp bit if present */
4156	if (tx_flags & IGB_TX_FLAGS_TSTAMP)
4157		cmd_type |= cpu_to_le32(E1000_ADVTXD_MAC_TSTAMP);
4158
4159	/* set segmentation bits for TSO */
4160	if (tx_flags & IGB_TX_FLAGS_TSO)
4161		cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_TSE);
4162
4163	return cmd_type;
4164}
4165
4166static void igb_tx_olinfo_status(struct igb_ring *tx_ring,
4167				 union e1000_adv_tx_desc *tx_desc,
4168				 u32 tx_flags, unsigned int paylen)
4169{
4170	u32 olinfo_status = paylen << E1000_ADVTXD_PAYLEN_SHIFT;
4171
4172	/* 82575 requires a unique index per ring if any offload is enabled */
4173	if ((tx_flags & (IGB_TX_FLAGS_CSUM | IGB_TX_FLAGS_VLAN)) &&
4174	    test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
4175		olinfo_status |= tx_ring->reg_idx << 4;
4176
4177	/* insert L4 checksum */
4178	if (tx_flags & IGB_TX_FLAGS_CSUM) {
4179		olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4180
4181		/* insert IPv4 checksum */
4182		if (tx_flags & IGB_TX_FLAGS_IPV4)
4183			olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
4184	}
4185
4186	tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
4187}
4188
4189/*
4190 * The largest size we can write to the descriptor is 65535.  In order to
4191 * maintain a power of two alignment we have to limit ourselves to 32K.
4192 */
4193#define IGB_MAX_TXD_PWR	15
4194#define IGB_MAX_DATA_PER_TXD	(1<<IGB_MAX_TXD_PWR)
4195
4196static void igb_tx_map(struct igb_ring *tx_ring,
4197		       struct igb_tx_buffer *first,
4198		       const u8 hdr_len)
4199{
4200	struct sk_buff *skb = first->skb;
4201	struct igb_tx_buffer *tx_buffer_info;
4202	union e1000_adv_tx_desc *tx_desc;
4203	dma_addr_t dma;
4204	struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0];
4205	unsigned int data_len = skb->data_len;
4206	unsigned int size = skb_headlen(skb);
4207	unsigned int paylen = skb->len - hdr_len;
4208	__le32 cmd_type;
4209	u32 tx_flags = first->tx_flags;
4210	u16 i = tx_ring->next_to_use;
4211
4212	tx_desc = IGB_TX_DESC(tx_ring, i);
4213
4214	igb_tx_olinfo_status(tx_ring, tx_desc, tx_flags, paylen);
4215	cmd_type = igb_tx_cmd_type(tx_flags);
4216
4217	dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
4218	if (dma_mapping_error(tx_ring->dev, dma))
4219		goto dma_error;
4220
4221	/* record length, and DMA address */
4222	first->length = size;
4223	first->dma = dma;
4224	tx_desc->read.buffer_addr = cpu_to_le64(dma);
4225
4226	for (;;) {
4227		while (unlikely(size > IGB_MAX_DATA_PER_TXD)) {
4228			tx_desc->read.cmd_type_len =
4229				cmd_type | cpu_to_le32(IGB_MAX_DATA_PER_TXD);
4230
4231			i++;
4232			tx_desc++;
4233			if (i == tx_ring->count) {
4234				tx_desc = IGB_TX_DESC(tx_ring, 0);
4235				i = 0;
4236			}
4237
4238			dma += IGB_MAX_DATA_PER_TXD;
4239			size -= IGB_MAX_DATA_PER_TXD;
4240
4241			tx_desc->read.olinfo_status = 0;
4242			tx_desc->read.buffer_addr = cpu_to_le64(dma);
4243		}
4244
4245		if (likely(!data_len))
4246			break;
4247
4248		tx_desc->read.cmd_type_len = cmd_type | cpu_to_le32(size);
4249
4250		i++;
4251		tx_desc++;
4252		if (i == tx_ring->count) {
4253			tx_desc = IGB_TX_DESC(tx_ring, 0);
4254			i = 0;
4255		}
4256
4257		size = frag->size;
4258		data_len -= size;
4259
4260		dma = skb_frag_dma_map(tx_ring->dev, frag, 0,
4261				   size, DMA_TO_DEVICE);
4262		if (dma_mapping_error(tx_ring->dev, dma))
4263			goto dma_error;
4264
4265		tx_buffer_info = &tx_ring->tx_buffer_info[i];
4266		tx_buffer_info->length = size;
4267		tx_buffer_info->dma = dma;
4268
4269		tx_desc->read.olinfo_status = 0;
4270		tx_desc->read.buffer_addr = cpu_to_le64(dma);
4271
4272		frag++;
4273	}
4274
4275	/* write last descriptor with RS and EOP bits */
4276	cmd_type |= cpu_to_le32(size) | cpu_to_le32(IGB_TXD_DCMD);
4277	tx_desc->read.cmd_type_len = cmd_type;
4278
4279	/* set the timestamp */
4280	first->time_stamp = jiffies;
4281
4282	/*
4283	 * Force memory writes to complete before letting h/w know there
4284	 * are new descriptors to fetch.  (Only applicable for weak-ordered
4285	 * memory model archs, such as IA-64).
4286	 *
4287	 * We also need this memory barrier to make certain all of the
4288	 * status bits have been updated before next_to_watch is written.
4289	 */
4290	wmb();
4291
4292	/* set next_to_watch value indicating a packet is present */
4293	first->next_to_watch = tx_desc;
4294
4295	i++;
4296	if (i == tx_ring->count)
4297		i = 0;
4298
4299	tx_ring->next_to_use = i;
4300
4301	writel(i, tx_ring->tail);
4302
4303	/* we need this if more than one processor can write to our tail
4304	 * at a time, it syncronizes IO on IA64/Altix systems */
4305	mmiowb();
4306
4307	return;
4308
4309dma_error:
4310	dev_err(tx_ring->dev, "TX DMA map failed\n");
4311
4312	/* clear dma mappings for failed tx_buffer_info map */
4313	for (;;) {
4314		tx_buffer_info = &tx_ring->tx_buffer_info[i];
4315		igb_unmap_and_free_tx_resource(tx_ring, tx_buffer_info);
4316		if (tx_buffer_info == first)
4317			break;
4318		if (i == 0)
4319			i = tx_ring->count;
4320		i--;
4321	}
4322
4323	tx_ring->next_to_use = i;
4324}
4325
4326static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
4327{
4328	struct net_device *netdev = tx_ring->netdev;
4329
4330	netif_stop_subqueue(netdev, tx_ring->queue_index);
4331
4332	/* Herbert's original patch had:
4333	 *  smp_mb__after_netif_stop_queue();
4334	 * but since that doesn't exist yet, just open code it. */
4335	smp_mb();
4336
4337	/* We need to check again in a case another CPU has just
4338	 * made room available. */
4339	if (igb_desc_unused(tx_ring) < size)
4340		return -EBUSY;
4341
4342	/* A reprieve! */
4343	netif_wake_subqueue(netdev, tx_ring->queue_index);
4344
4345	u64_stats_update_begin(&tx_ring->tx_syncp2);
4346	tx_ring->tx_stats.restart_queue2++;
4347	u64_stats_update_end(&tx_ring->tx_syncp2);
4348
4349	return 0;
4350}
4351
4352static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
4353{
4354	if (igb_desc_unused(tx_ring) >= size)
4355		return 0;
4356	return __igb_maybe_stop_tx(tx_ring, size);
4357}
4358
4359netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb,
4360				struct igb_ring *tx_ring)
4361{
4362	struct igb_tx_buffer *first;
4363	int tso;
4364	u32 tx_flags = 0;
4365	__be16 protocol = vlan_get_protocol(skb);
4366	u8 hdr_len = 0;
4367
4368	/* need: 1 descriptor per page,
4369	 *       + 2 desc gap to keep tail from touching head,
4370	 *       + 1 desc for skb->data,
4371	 *       + 1 desc for context descriptor,
4372	 * otherwise try next time */
4373	if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
4374		/* this is a hard error */
4375		return NETDEV_TX_BUSY;
4376	}
4377
4378	/* record the location of the first descriptor for this packet */
4379	first = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
4380	first->skb = skb;
4381	first->bytecount = skb->len;
4382	first->gso_segs = 1;
4383
4384	if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
4385		skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
4386		tx_flags |= IGB_TX_FLAGS_TSTAMP;
4387	}
4388
4389	if (vlan_tx_tag_present(skb)) {
4390		tx_flags |= IGB_TX_FLAGS_VLAN;
4391		tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
4392	}
4393
4394	/* record initial flags and protocol */
4395	first->tx_flags = tx_flags;
4396	first->protocol = protocol;
4397
4398	tso = igb_tso(tx_ring, first, &hdr_len);
4399	if (tso < 0)
4400		goto out_drop;
4401	else if (!tso)
4402		igb_tx_csum(tx_ring, first);
4403
4404	igb_tx_map(tx_ring, first, hdr_len);
4405
4406	/* Make sure there is space in the ring for the next send. */
4407	igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
4408
4409	return NETDEV_TX_OK;
4410
4411out_drop:
4412	igb_unmap_and_free_tx_resource(tx_ring, first);
4413
4414	return NETDEV_TX_OK;
4415}
4416
4417static inline struct igb_ring *igb_tx_queue_mapping(struct igb_adapter *adapter,
4418						    struct sk_buff *skb)
4419{
4420	unsigned int r_idx = skb->queue_mapping;
4421
4422	if (r_idx >= adapter->num_tx_queues)
4423		r_idx = r_idx % adapter->num_tx_queues;
4424
4425	return adapter->tx_ring[r_idx];
4426}
4427
4428static netdev_tx_t igb_xmit_frame(struct sk_buff *skb,
4429				  struct net_device *netdev)
4430{
4431	struct igb_adapter *adapter = netdev_priv(netdev);
4432
4433	if (test_bit(__IGB_DOWN, &adapter->state)) {
4434		dev_kfree_skb_any(skb);
4435		return NETDEV_TX_OK;
4436	}
4437
4438	if (skb->len <= 0) {
4439		dev_kfree_skb_any(skb);
4440		return NETDEV_TX_OK;
4441	}
4442
4443	/*
4444	 * The minimum packet size with TCTL.PSP set is 17 so pad the skb
4445	 * in order to meet this minimum size requirement.
4446	 */
4447	if (skb->len < 17) {
4448		if (skb_padto(skb, 17))
4449			return NETDEV_TX_OK;
4450		skb->len = 17;
4451	}
4452
4453	return igb_xmit_frame_ring(skb, igb_tx_queue_mapping(adapter, skb));
4454}
4455
4456/**
4457 * igb_tx_timeout - Respond to a Tx Hang
4458 * @netdev: network interface device structure
4459 **/
4460static void igb_tx_timeout(struct net_device *netdev)
4461{
4462	struct igb_adapter *adapter = netdev_priv(netdev);
4463	struct e1000_hw *hw = &adapter->hw;
4464
4465	/* Do the reset outside of interrupt context */
4466	adapter->tx_timeout_count++;
4467
4468	if (hw->mac.type >= e1000_82580)
4469		hw->dev_spec._82575.global_device_reset = true;
4470
4471	schedule_work(&adapter->reset_task);
4472	wr32(E1000_EICS,
4473	     (adapter->eims_enable_mask & ~adapter->eims_other));
4474}
4475
4476static void igb_reset_task(struct work_struct *work)
4477{
4478	struct igb_adapter *adapter;
4479	adapter = container_of(work, struct igb_adapter, reset_task);
4480
4481	igb_dump(adapter);
4482	netdev_err(adapter->netdev, "Reset adapter\n");
4483	igb_reinit_locked(adapter);
4484}
4485
4486/**
4487 * igb_get_stats64 - Get System Network Statistics
4488 * @netdev: network interface device structure
4489 * @stats: rtnl_link_stats64 pointer
4490 *
4491 **/
4492static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *netdev,
4493						 struct rtnl_link_stats64 *stats)
4494{
4495	struct igb_adapter *adapter = netdev_priv(netdev);
4496
4497	spin_lock(&adapter->stats64_lock);
4498	igb_update_stats(adapter, &adapter->stats64);
4499	memcpy(stats, &adapter->stats64, sizeof(*stats));
4500	spin_unlock(&adapter->stats64_lock);
4501
4502	return stats;
4503}
4504
4505/**
4506 * igb_change_mtu - Change the Maximum Transfer Unit
4507 * @netdev: network interface device structure
4508 * @new_mtu: new value for maximum frame size
4509 *
4510 * Returns 0 on success, negative on failure
4511 **/
4512static int igb_change_mtu(struct net_device *netdev, int new_mtu)
4513{
4514	struct igb_adapter *adapter = netdev_priv(netdev);
4515	struct pci_dev *pdev = adapter->pdev;
4516	int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
4517
4518	if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
4519		dev_err(&pdev->dev, "Invalid MTU setting\n");
4520		return -EINVAL;
4521	}
4522
4523#define MAX_STD_JUMBO_FRAME_SIZE 9238
4524	if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
4525		dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
4526		return -EINVAL;
4527	}
4528
4529	while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
4530		msleep(1);
4531
4532	/* igb_down has a dependency on max_frame_size */
4533	adapter->max_frame_size = max_frame;
4534
4535	if (netif_running(netdev))
4536		igb_down(adapter);
4537
4538	dev_info(&pdev->dev, "changing MTU from %d to %d\n",
4539		 netdev->mtu, new_mtu);
4540	netdev->mtu = new_mtu;
4541
4542	if (netif_running(netdev))
4543		igb_up(adapter);
4544	else
4545		igb_reset(adapter);
4546
4547	clear_bit(__IGB_RESETTING, &adapter->state);
4548
4549	return 0;
4550}
4551
4552/**
4553 * igb_update_stats - Update the board statistics counters
4554 * @adapter: board private structure
4555 **/
4556
4557void igb_update_stats(struct igb_adapter *adapter,
4558		      struct rtnl_link_stats64 *net_stats)
4559{
4560	struct e1000_hw *hw = &adapter->hw;
4561	struct pci_dev *pdev = adapter->pdev;
4562	u32 reg, mpc;
4563	u16 phy_tmp;
4564	int i;
4565	u64 bytes, packets;
4566	unsigned int start;
4567	u64 _bytes, _packets;
4568
4569#define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
4570
4571	/*
4572	 * Prevent stats update while adapter is being reset, or if the pci
4573	 * connection is down.
4574	 */
4575	if (adapter->link_speed == 0)
4576		return;
4577	if (pci_channel_offline(pdev))
4578		return;
4579
4580	bytes = 0;
4581	packets = 0;
4582	for (i = 0; i < adapter->num_rx_queues; i++) {
4583		u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
4584		struct igb_ring *ring = adapter->rx_ring[i];
4585
4586		ring->rx_stats.drops += rqdpc_tmp;
4587		net_stats->rx_fifo_errors += rqdpc_tmp;
4588
4589		do {
4590			start = u64_stats_fetch_begin_bh(&ring->rx_syncp);
4591			_bytes = ring->rx_stats.bytes;
4592			_packets = ring->rx_stats.packets;
4593		} while (u64_stats_fetch_retry_bh(&ring->rx_syncp, start));
4594		bytes += _bytes;
4595		packets += _packets;
4596	}
4597
4598	net_stats->rx_bytes = bytes;
4599	net_stats->rx_packets = packets;
4600
4601	bytes = 0;
4602	packets = 0;
4603	for (i = 0; i < adapter->num_tx_queues; i++) {
4604		struct igb_ring *ring = adapter->tx_ring[i];
4605		do {
4606			start = u64_stats_fetch_begin_bh(&ring->tx_syncp);
4607			_bytes = ring->tx_stats.bytes;
4608			_packets = ring->tx_stats.packets;
4609		} while (u64_stats_fetch_retry_bh(&ring->tx_syncp, start));
4610		bytes += _bytes;
4611		packets += _packets;
4612	}
4613	net_stats->tx_bytes = bytes;
4614	net_stats->tx_packets = packets;
4615
4616	/* read stats registers */
4617	adapter->stats.crcerrs += rd32(E1000_CRCERRS);
4618	adapter->stats.gprc += rd32(E1000_GPRC);
4619	adapter->stats.gorc += rd32(E1000_GORCL);
4620	rd32(E1000_GORCH); /* clear GORCL */
4621	adapter->stats.bprc += rd32(E1000_BPRC);
4622	adapter->stats.mprc += rd32(E1000_MPRC);
4623	adapter->stats.roc += rd32(E1000_ROC);
4624
4625	adapter->stats.prc64 += rd32(E1000_PRC64);
4626	adapter->stats.prc127 += rd32(E1000_PRC127);
4627	adapter->stats.prc255 += rd32(E1000_PRC255);
4628	adapter->stats.prc511 += rd32(E1000_PRC511);
4629	adapter->stats.prc1023 += rd32(E1000_PRC1023);
4630	adapter->stats.prc1522 += rd32(E1000_PRC1522);
4631	adapter->stats.symerrs += rd32(E1000_SYMERRS);
4632	adapter->stats.sec += rd32(E1000_SEC);
4633
4634	mpc = rd32(E1000_MPC);
4635	adapter->stats.mpc += mpc;
4636	net_stats->rx_fifo_errors += mpc;
4637	adapter->stats.scc += rd32(E1000_SCC);
4638	adapter->stats.ecol += rd32(E1000_ECOL);
4639	adapter->stats.mcc += rd32(E1000_MCC);
4640	adapter->stats.latecol += rd32(E1000_LATECOL);
4641	adapter->stats.dc += rd32(E1000_DC);
4642	adapter->stats.rlec += rd32(E1000_RLEC);
4643	adapter->stats.xonrxc += rd32(E1000_XONRXC);
4644	adapter->stats.xontxc += rd32(E1000_XONTXC);
4645	adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
4646	adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
4647	adapter->stats.fcruc += rd32(E1000_FCRUC);
4648	adapter->stats.gptc += rd32(E1000_GPTC);
4649	adapter->stats.gotc += rd32(E1000_GOTCL);
4650	rd32(E1000_GOTCH); /* clear GOTCL */
4651	adapter->stats.rnbc += rd32(E1000_RNBC);
4652	adapter->stats.ruc += rd32(E1000_RUC);
4653	adapter->stats.rfc += rd32(E1000_RFC);
4654	adapter->stats.rjc += rd32(E1000_RJC);
4655	adapter->stats.tor += rd32(E1000_TORH);
4656	adapter->stats.tot += rd32(E1000_TOTH);
4657	adapter->stats.tpr += rd32(E1000_TPR);
4658
4659	adapter->stats.ptc64 += rd32(E1000_PTC64);
4660	adapter->stats.ptc127 += rd32(E1000_PTC127);
4661	adapter->stats.ptc255 += rd32(E1000_PTC255);
4662	adapter->stats.ptc511 += rd32(E1000_PTC511);
4663	adapter->stats.ptc1023 += rd32(E1000_PTC1023);
4664	adapter->stats.ptc1522 += rd32(E1000_PTC1522);
4665
4666	adapter->stats.mptc += rd32(E1000_MPTC);
4667	adapter->stats.bptc += rd32(E1000_BPTC);
4668
4669	adapter->stats.tpt += rd32(E1000_TPT);
4670	adapter->stats.colc += rd32(E1000_COLC);
4671
4672	adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
4673	/* read internal phy specific stats */
4674	reg = rd32(E1000_CTRL_EXT);
4675	if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
4676		adapter->stats.rxerrc += rd32(E1000_RXERRC);
4677		adapter->stats.tncrs += rd32(E1000_TNCRS);
4678	}
4679
4680	adapter->stats.tsctc += rd32(E1000_TSCTC);
4681	adapter->stats.tsctfc += rd32(E1000_TSCTFC);
4682
4683	adapter->stats.iac += rd32(E1000_IAC);
4684	adapter->stats.icrxoc += rd32(E1000_ICRXOC);
4685	adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
4686	adapter->stats.icrxatc += rd32(E1000_ICRXATC);
4687	adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
4688	adapter->stats.ictxatc += rd32(E1000_ICTXATC);
4689	adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
4690	adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
4691	adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
4692
4693	/* Fill out the OS statistics structure */
4694	net_stats->multicast = adapter->stats.mprc;
4695	net_stats->collisions = adapter->stats.colc;
4696
4697	/* Rx Errors */
4698
4699	/* RLEC on some newer hardware can be incorrect so build
4700	 * our own version based on RUC and ROC */
4701	net_stats->rx_errors = adapter->stats.rxerrc +
4702		adapter->stats.crcerrs + adapter->stats.algnerrc +
4703		adapter->stats.ruc + adapter->stats.roc +
4704		adapter->stats.cexterr;
4705	net_stats->rx_length_errors = adapter->stats.ruc +
4706				      adapter->stats.roc;
4707	net_stats->rx_crc_errors = adapter->stats.crcerrs;
4708	net_stats->rx_frame_errors = adapter->stats.algnerrc;
4709	net_stats->rx_missed_errors = adapter->stats.mpc;
4710
4711	/* Tx Errors */
4712	net_stats->tx_errors = adapter->stats.ecol +
4713			       adapter->stats.latecol;
4714	net_stats->tx_aborted_errors = adapter->stats.ecol;
4715	net_stats->tx_window_errors = adapter->stats.latecol;
4716	net_stats->tx_carrier_errors = adapter->stats.tncrs;
4717
4718	/* Tx Dropped needs to be maintained elsewhere */
4719
4720	/* Phy Stats */
4721	if (hw->phy.media_type == e1000_media_type_copper) {
4722		if ((adapter->link_speed == SPEED_1000) &&
4723		   (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
4724			phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
4725			adapter->phy_stats.idle_errors += phy_tmp;
4726		}
4727	}
4728
4729	/* Management Stats */
4730	adapter->stats.mgptc += rd32(E1000_MGTPTC);
4731	adapter->stats.mgprc += rd32(E1000_MGTPRC);
4732	adapter->stats.mgpdc += rd32(E1000_MGTPDC);
4733
4734	/* OS2BMC Stats */
4735	reg = rd32(E1000_MANC);
4736	if (reg & E1000_MANC_EN_BMC2OS) {
4737		adapter->stats.o2bgptc += rd32(E1000_O2BGPTC);
4738		adapter->stats.o2bspc += rd32(E1000_O2BSPC);
4739		adapter->stats.b2ospc += rd32(E1000_B2OSPC);
4740		adapter->stats.b2ogprc += rd32(E1000_B2OGPRC);
4741	}
4742}
4743
4744static irqreturn_t igb_msix_other(int irq, void *data)
4745{
4746	struct igb_adapter *adapter = data;
4747	struct e1000_hw *hw = &adapter->hw;
4748	u32 icr = rd32(E1000_ICR);
4749	/* reading ICR causes bit 31 of EICR to be cleared */
4750
4751	if (icr & E1000_ICR_DRSTA)
4752		schedule_work(&adapter->reset_task);
4753
4754	if (icr & E1000_ICR_DOUTSYNC) {
4755		/* HW is reporting DMA is out of sync */
4756		adapter->stats.doosync++;
4757		/* The DMA Out of Sync is also indication of a spoof event
4758		 * in IOV mode. Check the Wrong VM Behavior register to
4759		 * see if it is really a spoof event. */
4760		igb_check_wvbr(adapter);
4761	}
4762
4763	/* Check for a mailbox event */
4764	if (icr & E1000_ICR_VMMB)
4765		igb_msg_task(adapter);
4766
4767	if (icr & E1000_ICR_LSC) {
4768		hw->mac.get_link_status = 1;
4769		/* guard against interrupt when we're going down */
4770		if (!test_bit(__IGB_DOWN, &adapter->state))
4771			mod_timer(&adapter->watchdog_timer, jiffies + 1);
4772	}
4773
4774	wr32(E1000_EIMS, adapter->eims_other);
4775
4776	return IRQ_HANDLED;
4777}
4778
4779static void igb_write_itr(struct igb_q_vector *q_vector)
4780{
4781	struct igb_adapter *adapter = q_vector->adapter;
4782	u32 itr_val = q_vector->itr_val & 0x7FFC;
4783
4784	if (!q_vector->set_itr)
4785		return;
4786
4787	if (!itr_val)
4788		itr_val = 0x4;
4789
4790	if (adapter->hw.mac.type == e1000_82575)
4791		itr_val |= itr_val << 16;
4792	else
4793		itr_val |= E1000_EITR_CNT_IGNR;
4794
4795	writel(itr_val, q_vector->itr_register);
4796	q_vector->set_itr = 0;
4797}
4798
4799static irqreturn_t igb_msix_ring(int irq, void *data)
4800{
4801	struct igb_q_vector *q_vector = data;
4802
4803	/* Write the ITR value calculated from the previous interrupt. */
4804	igb_write_itr(q_vector);
4805
4806	napi_schedule(&q_vector->napi);
4807
4808	return IRQ_HANDLED;
4809}
4810
4811#ifdef CONFIG_IGB_DCA
4812static void igb_update_dca(struct igb_q_vector *q_vector)
4813{
4814	struct igb_adapter *adapter = q_vector->adapter;
4815	struct e1000_hw *hw = &adapter->hw;
4816	int cpu = get_cpu();
4817
4818	if (q_vector->cpu == cpu)
4819		goto out_no_update;
4820
4821	if (q_vector->tx.ring) {
4822		int q = q_vector->tx.ring->reg_idx;
4823		u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4824		if (hw->mac.type == e1000_82575) {
4825			dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4826			dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4827		} else {
4828			dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4829			dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4830			              E1000_DCA_TXCTRL_CPUID_SHIFT;
4831		}
4832		dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4833		wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4834	}
4835	if (q_vector->rx.ring) {
4836		int q = q_vector->rx.ring->reg_idx;
4837		u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4838		if (hw->mac.type == e1000_82575) {
4839			dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4840			dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4841		} else {
4842			dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4843			dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4844			              E1000_DCA_RXCTRL_CPUID_SHIFT;
4845		}
4846		dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4847		dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4848		dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4849		wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4850	}
4851	q_vector->cpu = cpu;
4852out_no_update:
4853	put_cpu();
4854}
4855
4856static void igb_setup_dca(struct igb_adapter *adapter)
4857{
4858	struct e1000_hw *hw = &adapter->hw;
4859	int i;
4860
4861	if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4862		return;
4863
4864	/* Always use CB2 mode, difference is masked in the CB driver. */
4865	wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4866
4867	for (i = 0; i < adapter->num_q_vectors; i++) {
4868		adapter->q_vector[i]->cpu = -1;
4869		igb_update_dca(adapter->q_vector[i]);
4870	}
4871}
4872
4873static int __igb_notify_dca(struct device *dev, void *data)
4874{
4875	struct net_device *netdev = dev_get_drvdata(dev);
4876	struct igb_adapter *adapter = netdev_priv(netdev);
4877	struct pci_dev *pdev = adapter->pdev;
4878	struct e1000_hw *hw = &adapter->hw;
4879	unsigned long event = *(unsigned long *)data;
4880
4881	switch (event) {
4882	case DCA_PROVIDER_ADD:
4883		/* if already enabled, don't do it again */
4884		if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4885			break;
4886		if (dca_add_requester(dev) == 0) {
4887			adapter->flags |= IGB_FLAG_DCA_ENABLED;
4888			dev_info(&pdev->dev, "DCA enabled\n");
4889			igb_setup_dca(adapter);
4890			break;
4891		}
4892		/* Fall Through since DCA is disabled. */
4893	case DCA_PROVIDER_REMOVE:
4894		if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4895			/* without this a class_device is left
4896			 * hanging around in the sysfs model */
4897			dca_remove_requester(dev);
4898			dev_info(&pdev->dev, "DCA disabled\n");
4899			adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4900			wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4901		}
4902		break;
4903	}
4904
4905	return 0;
4906}
4907
4908static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4909                          void *p)
4910{
4911	int ret_val;
4912
4913	ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4914	                                 __igb_notify_dca);
4915
4916	return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4917}
4918#endif /* CONFIG_IGB_DCA */
4919
4920static void igb_ping_all_vfs(struct igb_adapter *adapter)
4921{
4922	struct e1000_hw *hw = &adapter->hw;
4923	u32 ping;
4924	int i;
4925
4926	for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
4927		ping = E1000_PF_CONTROL_MSG;
4928		if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
4929			ping |= E1000_VT_MSGTYPE_CTS;
4930		igb_write_mbx(hw, &ping, 1, i);
4931	}
4932}
4933
4934static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4935{
4936	struct e1000_hw *hw = &adapter->hw;
4937	u32 vmolr = rd32(E1000_VMOLR(vf));
4938	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4939
4940	vf_data->flags &= ~(IGB_VF_FLAG_UNI_PROMISC |
4941	                    IGB_VF_FLAG_MULTI_PROMISC);
4942	vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4943
4944	if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
4945		vmolr |= E1000_VMOLR_MPME;
4946		vf_data->flags |= IGB_VF_FLAG_MULTI_PROMISC;
4947		*msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
4948	} else {
4949		/*
4950		 * if we have hashes and we are clearing a multicast promisc
4951		 * flag we need to write the hashes to the MTA as this step
4952		 * was previously skipped
4953		 */
4954		if (vf_data->num_vf_mc_hashes > 30) {
4955			vmolr |= E1000_VMOLR_MPME;
4956		} else if (vf_data->num_vf_mc_hashes) {
4957			int j;
4958			vmolr |= E1000_VMOLR_ROMPE;
4959			for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4960				igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4961		}
4962	}
4963
4964	wr32(E1000_VMOLR(vf), vmolr);
4965
4966	/* there are flags left unprocessed, likely not supported */
4967	if (*msgbuf & E1000_VT_MSGINFO_MASK)
4968		return -EINVAL;
4969
4970	return 0;
4971
4972}
4973
4974static int igb_set_vf_multicasts(struct igb_adapter *adapter,
4975				  u32 *msgbuf, u32 vf)
4976{
4977	int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4978	u16 *hash_list = (u16 *)&msgbuf[1];
4979	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4980	int i;
4981
4982	/* salt away the number of multicast addresses assigned
4983	 * to this VF for later use to restore when the PF multi cast
4984	 * list changes
4985	 */
4986	vf_data->num_vf_mc_hashes = n;
4987
4988	/* only up to 30 hash values supported */
4989	if (n > 30)
4990		n = 30;
4991
4992	/* store the hashes for later use */
4993	for (i = 0; i < n; i++)
4994		vf_data->vf_mc_hashes[i] = hash_list[i];
4995
4996	/* Flush and reset the mta with the new values */
4997	igb_set_rx_mode(adapter->netdev);
4998
4999	return 0;
5000}
5001
5002static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
5003{
5004	struct e1000_hw *hw = &adapter->hw;
5005	struct vf_data_storage *vf_data;
5006	int i, j;
5007
5008	for (i = 0; i < adapter->vfs_allocated_count; i++) {
5009		u32 vmolr = rd32(E1000_VMOLR(i));
5010		vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
5011
5012		vf_data = &adapter->vf_data[i];
5013
5014		if ((vf_data->num_vf_mc_hashes > 30) ||
5015		    (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
5016			vmolr |= E1000_VMOLR_MPME;
5017		} else if (vf_data->num_vf_mc_hashes) {
5018			vmolr |= E1000_VMOLR_ROMPE;
5019			for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
5020				igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
5021		}
5022		wr32(E1000_VMOLR(i), vmolr);
5023	}
5024}
5025
5026static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
5027{
5028	struct e1000_hw *hw = &adapter->hw;
5029	u32 pool_mask, reg, vid;
5030	int i;
5031
5032	pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5033
5034	/* Find the vlan filter for this id */
5035	for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5036		reg = rd32(E1000_VLVF(i));
5037
5038		/* remove the vf from the pool */
5039		reg &= ~pool_mask;
5040
5041		/* if pool is empty then remove entry from vfta */
5042		if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
5043		    (reg & E1000_VLVF_VLANID_ENABLE)) {
5044			reg = 0;
5045			vid = reg & E1000_VLVF_VLANID_MASK;
5046			igb_vfta_set(hw, vid, false);
5047		}
5048
5049		wr32(E1000_VLVF(i), reg);
5050	}
5051
5052	adapter->vf_data[vf].vlans_enabled = 0;
5053}
5054
5055static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
5056{
5057	struct e1000_hw *hw = &adapter->hw;
5058	u32 reg, i;
5059
5060	/* The vlvf table only exists on 82576 hardware and newer */
5061	if (hw->mac.type < e1000_82576)
5062		return -1;
5063
5064	/* we only need to do this if VMDq is enabled */
5065	if (!adapter->vfs_allocated_count)
5066		return -1;
5067
5068	/* Find the vlan filter for this id */
5069	for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5070		reg = rd32(E1000_VLVF(i));
5071		if ((reg & E1000_VLVF_VLANID_ENABLE) &&
5072		    vid == (reg & E1000_VLVF_VLANID_MASK))
5073			break;
5074	}
5075
5076	if (add) {
5077		if (i == E1000_VLVF_ARRAY_SIZE) {
5078			/* Did not find a matching VLAN ID entry that was
5079			 * enabled.  Search for a free filter entry, i.e.
5080			 * one without the enable bit set
5081			 */
5082			for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5083				reg = rd32(E1000_VLVF(i));
5084				if (!(reg & E1000_VLVF_VLANID_ENABLE))
5085					break;
5086			}
5087		}
5088		if (i < E1000_VLVF_ARRAY_SIZE) {
5089			/* Found an enabled/available entry */
5090			reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5091
5092			/* if !enabled we need to set this up in vfta */
5093			if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
5094				/* add VID to filter table */
5095				igb_vfta_set(hw, vid, true);
5096				reg |= E1000_VLVF_VLANID_ENABLE;
5097			}
5098			reg &= ~E1000_VLVF_VLANID_MASK;
5099			reg |= vid;
5100			wr32(E1000_VLVF(i), reg);
5101
5102			/* do not modify RLPML for PF devices */
5103			if (vf >= adapter->vfs_allocated_count)
5104				return 0;
5105
5106			if (!adapter->vf_data[vf].vlans_enabled) {
5107				u32 size;
5108				reg = rd32(E1000_VMOLR(vf));
5109				size = reg & E1000_VMOLR_RLPML_MASK;
5110				size += 4;
5111				reg &= ~E1000_VMOLR_RLPML_MASK;
5112				reg |= size;
5113				wr32(E1000_VMOLR(vf), reg);
5114			}
5115
5116			adapter->vf_data[vf].vlans_enabled++;
5117		}
5118	} else {
5119		if (i < E1000_VLVF_ARRAY_SIZE) {
5120			/* remove vf from the pool */
5121			reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
5122			/* if pool is empty then remove entry from vfta */
5123			if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
5124				reg = 0;
5125				igb_vfta_set(hw, vid, false);
5126			}
5127			wr32(E1000_VLVF(i), reg);
5128
5129			/* do not modify RLPML for PF devices */
5130			if (vf >= adapter->vfs_allocated_count)
5131				return 0;
5132
5133			adapter->vf_data[vf].vlans_enabled--;
5134			if (!adapter->vf_data[vf].vlans_enabled) {
5135				u32 size;
5136				reg = rd32(E1000_VMOLR(vf));
5137				size = reg & E1000_VMOLR_RLPML_MASK;
5138				size -= 4;
5139				reg &= ~E1000_VMOLR_RLPML_MASK;
5140				reg |= size;
5141				wr32(E1000_VMOLR(vf), reg);
5142			}
5143		}
5144	}
5145	return 0;
5146}
5147
5148static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
5149{
5150	struct e1000_hw *hw = &adapter->hw;
5151
5152	if (vid)
5153		wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
5154	else
5155		wr32(E1000_VMVIR(vf), 0);
5156}
5157
5158static int igb_ndo_set_vf_vlan(struct net_device *netdev,
5159			       int vf, u16 vlan, u8 qos)
5160{
5161	int err = 0;
5162	struct igb_adapter *adapter = netdev_priv(netdev);
5163
5164	if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
5165		return -EINVAL;
5166	if (vlan || qos) {
5167		err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
5168		if (err)
5169			goto out;
5170		igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
5171		igb_set_vmolr(adapter, vf, !vlan);
5172		adapter->vf_data[vf].pf_vlan = vlan;
5173		adapter->vf_data[vf].pf_qos = qos;
5174		dev_info(&adapter->pdev->dev,
5175			 "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
5176		if (test_bit(__IGB_DOWN, &adapter->state)) {
5177			dev_warn(&adapter->pdev->dev,
5178				 "The VF VLAN has been set,"
5179				 " but the PF device is not up.\n");
5180			dev_warn(&adapter->pdev->dev,
5181				 "Bring the PF device up before"
5182				 " attempting to use the VF device.\n");
5183		}
5184	} else {
5185		igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
5186				   false, vf);
5187		igb_set_vmvir(adapter, vlan, vf);
5188		igb_set_vmolr(adapter, vf, true);
5189		adapter->vf_data[vf].pf_vlan = 0;
5190		adapter->vf_data[vf].pf_qos = 0;
5191       }
5192out:
5193       return err;
5194}
5195
5196static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5197{
5198	int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5199	int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
5200
5201	return igb_vlvf_set(adapter, vid, add, vf);
5202}
5203
5204static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
5205{
5206	/* clear flags - except flag that indicates PF has set the MAC */
5207	adapter->vf_data[vf].flags &= IGB_VF_FLAG_PF_SET_MAC;
5208	adapter->vf_data[vf].last_nack = jiffies;
5209
5210	/* reset offloads to defaults */
5211	igb_set_vmolr(adapter, vf, true);
5212
5213	/* reset vlans for device */
5214	igb_clear_vf_vfta(adapter, vf);
5215	if (adapter->vf_data[vf].pf_vlan)
5216		igb_ndo_set_vf_vlan(adapter->netdev, vf,
5217				    adapter->vf_data[vf].pf_vlan,
5218				    adapter->vf_data[vf].pf_qos);
5219	else
5220		igb_clear_vf_vfta(adapter, vf);
5221
5222	/* reset multicast table array for vf */
5223	adapter->vf_data[vf].num_vf_mc_hashes = 0;
5224
5225	/* Flush and reset the mta with the new values */
5226	igb_set_rx_mode(adapter->netdev);
5227}
5228
5229static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
5230{
5231	unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5232
5233	/* generate a new mac address as we were hotplug removed/added */
5234	if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
5235		random_ether_addr(vf_mac);
5236
5237	/* process remaining reset events */
5238	igb_vf_reset(adapter, vf);
5239}
5240
5241static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
5242{
5243	struct e1000_hw *hw = &adapter->hw;
5244	unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5245	int rar_entry = hw->mac.rar_entry_count - (vf + 1);
5246	u32 reg, msgbuf[3];
5247	u8 *addr = (u8 *)(&msgbuf[1]);
5248
5249	/* process all the same items cleared in a function level reset */
5250	igb_vf_reset(adapter, vf);
5251
5252	/* set vf mac address */
5253	igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
5254
5255	/* enable transmit and receive for vf */
5256	reg = rd32(E1000_VFTE);
5257	wr32(E1000_VFTE, reg | (1 << vf));
5258	reg = rd32(E1000_VFRE);
5259	wr32(E1000_VFRE, reg | (1 << vf));
5260
5261	adapter->vf_data[vf].flags |= IGB_VF_FLAG_CTS;
5262
5263	/* reply to reset with ack and vf mac address */
5264	msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
5265	memcpy(addr, vf_mac, 6);
5266	igb_write_mbx(hw, msgbuf, 3, vf);
5267}
5268
5269static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
5270{
5271	/*
5272	 * The VF MAC Address is stored in a packed array of bytes
5273	 * starting at the second 32 bit word of the msg array
5274	 */
5275	unsigned char *addr = (char *)&msg[1];
5276	int err = -1;
5277
5278	if (is_valid_ether_addr(addr))
5279		err = igb_set_vf_mac(adapter, vf, addr);
5280
5281	return err;
5282}
5283
5284static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
5285{
5286	struct e1000_hw *hw = &adapter->hw;
5287	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5288	u32 msg = E1000_VT_MSGTYPE_NACK;
5289
5290	/* if device isn't clear to send it shouldn't be reading either */
5291	if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
5292	    time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
5293		igb_write_mbx(hw, &msg, 1, vf);
5294		vf_data->last_nack = jiffies;
5295	}
5296}
5297
5298static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
5299{
5300	struct pci_dev *pdev = adapter->pdev;
5301	u32 msgbuf[E1000_VFMAILBOX_SIZE];
5302	struct e1000_hw *hw = &adapter->hw;
5303	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5304	s32 retval;
5305
5306	retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
5307
5308	if (retval) {
5309		/* if receive failed revoke VF CTS stats and restart init */
5310		dev_err(&pdev->dev, "Error receiving message from VF\n");
5311		vf_data->flags &= ~IGB_VF_FLAG_CTS;
5312		if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5313			return;
5314		goto out;
5315	}
5316
5317	/* this is a message we already processed, do nothing */
5318	if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
5319		return;
5320
5321	/*
5322	 * until the vf completes a reset it should not be
5323	 * allowed to start any configuration.
5324	 */
5325
5326	if (msgbuf[0] == E1000_VF_RESET) {
5327		igb_vf_reset_msg(adapter, vf);
5328		return;
5329	}
5330
5331	if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
5332		if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5333			return;
5334		retval = -1;
5335		goto out;
5336	}
5337
5338	switch ((msgbuf[0] & 0xFFFF)) {
5339	case E1000_VF_SET_MAC_ADDR:
5340		retval = -EINVAL;
5341		if (!(vf_data->flags & IGB_VF_FLAG_PF_SET_MAC))
5342			retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
5343		else
5344			dev_warn(&pdev->dev,
5345				 "VF %d attempted to override administratively "
5346				 "set MAC address\nReload the VF driver to "
5347				 "resume operations\n", vf);
5348		break;
5349	case E1000_VF_SET_PROMISC:
5350		retval = igb_set_vf_promisc(adapter, msgbuf, vf);
5351		break;
5352	case E1000_VF_SET_MULTICAST:
5353		retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
5354		break;
5355	case E1000_VF_SET_LPE:
5356		retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
5357		break;
5358	case E1000_VF_SET_VLAN:
5359		retval = -1;
5360		if (vf_data->pf_vlan)
5361			dev_warn(&pdev->dev,
5362				 "VF %d attempted to override administratively "
5363				 "set VLAN tag\nReload the VF driver to "
5364				 "resume operations\n", vf);
5365		else
5366			retval = igb_set_vf_vlan(adapter, msgbuf, vf);
5367		break;
5368	default:
5369		dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
5370		retval = -1;
5371		break;
5372	}
5373
5374	msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
5375out:
5376	/* notify the VF of the results of what it sent us */
5377	if (retval)
5378		msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
5379	else
5380		msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
5381
5382	igb_write_mbx(hw, msgbuf, 1, vf);
5383}
5384
5385static void igb_msg_task(struct igb_adapter *adapter)
5386{
5387	struct e1000_hw *hw = &adapter->hw;
5388	u32 vf;
5389
5390	for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
5391		/* process any reset requests */
5392		if (!igb_check_for_rst(hw, vf))
5393			igb_vf_reset_event(adapter, vf);
5394
5395		/* process any messages pending */
5396		if (!igb_check_for_msg(hw, vf))
5397			igb_rcv_msg_from_vf(adapter, vf);
5398
5399		/* process any acks */
5400		if (!igb_check_for_ack(hw, vf))
5401			igb_rcv_ack_from_vf(adapter, vf);
5402	}
5403}
5404
5405/**
5406 *  igb_set_uta - Set unicast filter table address
5407 *  @adapter: board private structure
5408 *
5409 *  The unicast table address is a register array of 32-bit registers.
5410 *  The table is meant to be used in a way similar to how the MTA is used
5411 *  however due to certain limitations in the hardware it is necessary to
5412 *  set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscuous
5413 *  enable bit to allow vlan tag stripping when promiscuous mode is enabled
5414 **/
5415static void igb_set_uta(struct igb_adapter *adapter)
5416{
5417	struct e1000_hw *hw = &adapter->hw;
5418	int i;
5419
5420	/* The UTA table only exists on 82576 hardware and newer */
5421	if (hw->mac.type < e1000_82576)
5422		return;
5423
5424	/* we only need to do this if VMDq is enabled */
5425	if (!adapter->vfs_allocated_count)
5426		return;
5427
5428	for (i = 0; i < hw->mac.uta_reg_count; i++)
5429		array_wr32(E1000_UTA, i, ~0);
5430}
5431
5432/**
5433 * igb_intr_msi - Interrupt Handler
5434 * @irq: interrupt number
5435 * @data: pointer to a network interface device structure
5436 **/
5437static irqreturn_t igb_intr_msi(int irq, void *data)
5438{
5439	struct igb_adapter *adapter = data;
5440	struct igb_q_vector *q_vector = adapter->q_vector[0];
5441	struct e1000_hw *hw = &adapter->hw;
5442	/* read ICR disables interrupts using IAM */
5443	u32 icr = rd32(E1000_ICR);
5444
5445	igb_write_itr(q_vector);
5446
5447	if (icr & E1000_ICR_DRSTA)
5448		schedule_work(&adapter->reset_task);
5449
5450	if (icr & E1000_ICR_DOUTSYNC) {
5451		/* HW is reporting DMA is out of sync */
5452		adapter->stats.doosync++;
5453	}
5454
5455	if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5456		hw->mac.get_link_status = 1;
5457		if (!test_bit(__IGB_DOWN, &adapter->state))
5458			mod_timer(&adapter->watchdog_timer, jiffies + 1);
5459	}
5460
5461	napi_schedule(&q_vector->napi);
5462
5463	return IRQ_HANDLED;
5464}
5465
5466/**
5467 * igb_intr - Legacy Interrupt Handler
5468 * @irq: interrupt number
5469 * @data: pointer to a network interface device structure
5470 **/
5471static irqreturn_t igb_intr(int irq, void *data)
5472{
5473	struct igb_adapter *adapter = data;
5474	struct igb_q_vector *q_vector = adapter->q_vector[0];
5475	struct e1000_hw *hw = &adapter->hw;
5476	/* Interrupt Auto-Mask...upon reading ICR, interrupts are masked.  No
5477	 * need for the IMC write */
5478	u32 icr = rd32(E1000_ICR);
5479
5480	/* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
5481	 * not set, then the adapter didn't send an interrupt */
5482	if (!(icr & E1000_ICR_INT_ASSERTED))
5483		return IRQ_NONE;
5484
5485	igb_write_itr(q_vector);
5486
5487	if (icr & E1000_ICR_DRSTA)
5488		schedule_work(&adapter->reset_task);
5489
5490	if (icr & E1000_ICR_DOUTSYNC) {
5491		/* HW is reporting DMA is out of sync */
5492		adapter->stats.doosync++;
5493	}
5494
5495	if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5496		hw->mac.get_link_status = 1;
5497		/* guard against interrupt when we're going down */
5498		if (!test_bit(__IGB_DOWN, &adapter->state))
5499			mod_timer(&adapter->watchdog_timer, jiffies + 1);
5500	}
5501
5502	napi_schedule(&q_vector->napi);
5503
5504	return IRQ_HANDLED;
5505}
5506
5507void igb_ring_irq_enable(struct igb_q_vector *q_vector)
5508{
5509	struct igb_adapter *adapter = q_vector->adapter;
5510	struct e1000_hw *hw = &adapter->hw;
5511
5512	if ((q_vector->rx.ring && (adapter->rx_itr_setting & 3)) ||
5513	    (!q_vector->rx.ring && (adapter->tx_itr_setting & 3))) {
5514		if ((adapter->num_q_vectors == 1) && !adapter->vf_data)
5515			igb_set_itr(q_vector);
5516		else
5517			igb_update_ring_itr(q_vector);
5518	}
5519
5520	if (!test_bit(__IGB_DOWN, &adapter->state)) {
5521		if (adapter->msix_entries)
5522			wr32(E1000_EIMS, q_vector->eims_value);
5523		else
5524			igb_irq_enable(adapter);
5525	}
5526}
5527
5528/**
5529 * igb_poll - NAPI Rx polling callback
5530 * @napi: napi polling structure
5531 * @budget: count of how many packets we should handle
5532 **/
5533static int igb_poll(struct napi_struct *napi, int budget)
5534{
5535	struct igb_q_vector *q_vector = container_of(napi,
5536	                                             struct igb_q_vector,
5537	                                             napi);
5538	bool clean_complete = true;
5539
5540#ifdef CONFIG_IGB_DCA
5541	if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
5542		igb_update_dca(q_vector);
5543#endif
5544	if (q_vector->tx.ring)
5545		clean_complete = igb_clean_tx_irq(q_vector);
5546
5547	if (q_vector->rx.ring)
5548		clean_complete &= igb_clean_rx_irq(q_vector, budget);
5549
5550	/* If all work not completed, return budget and keep polling */
5551	if (!clean_complete)
5552		return budget;
5553
5554	/* If not enough Rx work done, exit the polling mode */
5555	napi_complete(napi);
5556	igb_ring_irq_enable(q_vector);
5557
5558	return 0;
5559}
5560
5561/**
5562 * igb_systim_to_hwtstamp - convert system time value to hw timestamp
5563 * @adapter: board private structure
5564 * @shhwtstamps: timestamp structure to update
5565 * @regval: unsigned 64bit system time value.
5566 *
5567 * We need to convert the system time value stored in the RX/TXSTMP registers
5568 * into a hwtstamp which can be used by the upper level timestamping functions
5569 */
5570static void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
5571                                   struct skb_shared_hwtstamps *shhwtstamps,
5572                                   u64 regval)
5573{
5574	u64 ns;
5575
5576	/*
5577	 * The 82580 starts with 1ns at bit 0 in RX/TXSTMPL, shift this up to
5578	 * 24 to match clock shift we setup earlier.
5579	 */
5580	if (adapter->hw.mac.type >= e1000_82580)
5581		regval <<= IGB_82580_TSYNC_SHIFT;
5582
5583	ns = timecounter_cyc2time(&adapter->clock, regval);
5584	timecompare_update(&adapter->compare, ns);
5585	memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
5586	shhwtstamps->hwtstamp = ns_to_ktime(ns);
5587	shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns);
5588}
5589
5590/**
5591 * igb_tx_hwtstamp - utility function which checks for TX time stamp
5592 * @q_vector: pointer to q_vector containing needed info
5593 * @buffer: pointer to igb_tx_buffer structure
5594 *
5595 * If we were asked to do hardware stamping and such a time stamp is
5596 * available, then it must have been for this skb here because we only
5597 * allow only one such packet into the queue.
5598 */
5599static void igb_tx_hwtstamp(struct igb_q_vector *q_vector,
5600			    struct igb_tx_buffer *buffer_info)
5601{
5602	struct igb_adapter *adapter = q_vector->adapter;
5603	struct e1000_hw *hw = &adapter->hw;
5604	struct skb_shared_hwtstamps shhwtstamps;
5605	u64 regval;
5606
5607	/* if skb does not support hw timestamp or TX stamp not valid exit */
5608	if (likely(!(buffer_info->tx_flags & IGB_TX_FLAGS_TSTAMP)) ||
5609	    !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
5610		return;
5611
5612	regval = rd32(E1000_TXSTMPL);
5613	regval |= (u64)rd32(E1000_TXSTMPH) << 32;
5614
5615	igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
5616	skb_tstamp_tx(buffer_info->skb, &shhwtstamps);
5617}
5618
5619/**
5620 * igb_clean_tx_irq - Reclaim resources after transmit completes
5621 * @q_vector: pointer to q_vector containing needed info
5622 * returns true if ring is completely cleaned
5623 **/
5624static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
5625{
5626	struct igb_adapter *adapter = q_vector->adapter;
5627	struct igb_ring *tx_ring = q_vector->tx.ring;
5628	struct igb_tx_buffer *tx_buffer;
5629	union e1000_adv_tx_desc *tx_desc, *eop_desc;
5630	unsigned int total_bytes = 0, total_packets = 0;
5631	unsigned int budget = q_vector->tx.work_limit;
5632	unsigned int i = tx_ring->next_to_clean;
5633
5634	if (test_bit(__IGB_DOWN, &adapter->state))
5635		return true;
5636
5637	tx_buffer = &tx_ring->tx_buffer_info[i];
5638	tx_desc = IGB_TX_DESC(tx_ring, i);
5639	i -= tx_ring->count;
5640
5641	for (; budget; budget--) {
5642		eop_desc = tx_buffer->next_to_watch;
5643
5644		/* prevent any other reads prior to eop_desc */
5645		rmb();
5646
5647		/* if next_to_watch is not set then there is no work pending */
5648		if (!eop_desc)
5649			break;
5650
5651		/* if DD is not set pending work has not been completed */
5652		if (!(eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)))
5653			break;
5654
5655		/* clear next_to_watch to prevent false hangs */
5656		tx_buffer->next_to_watch = NULL;
5657
5658		/* update the statistics for this packet */
5659		total_bytes += tx_buffer->bytecount;
5660		total_packets += tx_buffer->gso_segs;
5661
5662		/* retrieve hardware timestamp */
5663		igb_tx_hwtstamp(q_vector, tx_buffer);
5664
5665		/* free the skb */
5666		dev_kfree_skb_any(tx_buffer->skb);
5667		tx_buffer->skb = NULL;
5668
5669		/* unmap skb header data */
5670		dma_unmap_single(tx_ring->dev,
5671				 tx_buffer->dma,
5672				 tx_buffer->length,
5673				 DMA_TO_DEVICE);
5674
5675		/* clear last DMA location and unmap remaining buffers */
5676		while (tx_desc != eop_desc) {
5677			tx_buffer->dma = 0;
5678
5679			tx_buffer++;
5680			tx_desc++;
5681			i++;
5682			if (unlikely(!i)) {
5683				i -= tx_ring->count;
5684				tx_buffer = tx_ring->tx_buffer_info;
5685				tx_desc = IGB_TX_DESC(tx_ring, 0);
5686			}
5687
5688			/* unmap any remaining paged data */
5689			if (tx_buffer->dma) {
5690				dma_unmap_page(tx_ring->dev,
5691					       tx_buffer->dma,
5692					       tx_buffer->length,
5693					       DMA_TO_DEVICE);
5694			}
5695		}
5696
5697		/* clear last DMA location */
5698		tx_buffer->dma = 0;
5699
5700		/* move us one more past the eop_desc for start of next pkt */
5701		tx_buffer++;
5702		tx_desc++;
5703		i++;
5704		if (unlikely(!i)) {
5705			i -= tx_ring->count;
5706			tx_buffer = tx_ring->tx_buffer_info;
5707			tx_desc = IGB_TX_DESC(tx_ring, 0);
5708		}
5709	}
5710
5711	i += tx_ring->count;
5712	tx_ring->next_to_clean = i;
5713	u64_stats_update_begin(&tx_ring->tx_syncp);
5714	tx_ring->tx_stats.bytes += total_bytes;
5715	tx_ring->tx_stats.packets += total_packets;
5716	u64_stats_update_end(&tx_ring->tx_syncp);
5717	q_vector->tx.total_bytes += total_bytes;
5718	q_vector->tx.total_packets += total_packets;
5719
5720	if (test_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags)) {
5721		struct e1000_hw *hw = &adapter->hw;
5722
5723		eop_desc = tx_buffer->next_to_watch;
5724
5725		/* Detect a transmit hang in hardware, this serializes the
5726		 * check with the clearing of time_stamp and movement of i */
5727		clear_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
5728		if (eop_desc &&
5729		    time_after(jiffies, tx_buffer->time_stamp +
5730			       (adapter->tx_timeout_factor * HZ)) &&
5731		    !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
5732
5733			/* detected Tx unit hang */
5734			dev_err(tx_ring->dev,
5735				"Detected Tx Unit Hang\n"
5736				"  Tx Queue             <%d>\n"
5737				"  TDH                  <%x>\n"
5738				"  TDT                  <%x>\n"
5739				"  next_to_use          <%x>\n"
5740				"  next_to_clean        <%x>\n"
5741				"buffer_info[next_to_clean]\n"
5742				"  time_stamp           <%lx>\n"
5743				"  next_to_watch        <%p>\n"
5744				"  jiffies              <%lx>\n"
5745				"  desc.status          <%x>\n",
5746				tx_ring->queue_index,
5747				rd32(E1000_TDH(tx_ring->reg_idx)),
5748				readl(tx_ring->tail),
5749				tx_ring->next_to_use,
5750				tx_ring->next_to_clean,
5751				tx_buffer->time_stamp,
5752				eop_desc,
5753				jiffies,
5754				eop_desc->wb.status);
5755			netif_stop_subqueue(tx_ring->netdev,
5756					    tx_ring->queue_index);
5757
5758			/* we are about to reset, no point in enabling stuff */
5759			return true;
5760		}
5761	}
5762
5763	if (unlikely(total_packets &&
5764		     netif_carrier_ok(tx_ring->netdev) &&
5765		     igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
5766		/* Make sure that anybody stopping the queue after this
5767		 * sees the new next_to_clean.
5768		 */
5769		smp_mb();
5770		if (__netif_subqueue_stopped(tx_ring->netdev,
5771					     tx_ring->queue_index) &&
5772		    !(test_bit(__IGB_DOWN, &adapter->state))) {
5773			netif_wake_subqueue(tx_ring->netdev,
5774					    tx_ring->queue_index);
5775
5776			u64_stats_update_begin(&tx_ring->tx_syncp);
5777			tx_ring->tx_stats.restart_queue++;
5778			u64_stats_update_end(&tx_ring->tx_syncp);
5779		}
5780	}
5781
5782	return !!budget;
5783}
5784
5785static inline void igb_rx_checksum(struct igb_ring *ring,
5786				   union e1000_adv_rx_desc *rx_desc,
5787				   struct sk_buff *skb)
5788{
5789	skb_checksum_none_assert(skb);
5790
5791	/* Ignore Checksum bit is set */
5792	if (igb_test_staterr(rx_desc, E1000_RXD_STAT_IXSM))
5793		return;
5794
5795	/* Rx checksum disabled via ethtool */
5796	if (!(ring->netdev->features & NETIF_F_RXCSUM))
5797		return;
5798
5799	/* TCP/UDP checksum error bit is set */
5800	if (igb_test_staterr(rx_desc,
5801			     E1000_RXDEXT_STATERR_TCPE |
5802			     E1000_RXDEXT_STATERR_IPE)) {
5803		/*
5804		 * work around errata with sctp packets where the TCPE aka
5805		 * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
5806		 * packets, (aka let the stack check the crc32c)
5807		 */
5808		if (!((skb->len == 60) &&
5809		      test_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags))) {
5810			u64_stats_update_begin(&ring->rx_syncp);
5811			ring->rx_stats.csum_err++;
5812			u64_stats_update_end(&ring->rx_syncp);
5813		}
5814		/* let the stack verify checksum errors */
5815		return;
5816	}
5817	/* It must be a TCP or UDP packet with a valid checksum */
5818	if (igb_test_staterr(rx_desc, E1000_RXD_STAT_TCPCS |
5819				      E1000_RXD_STAT_UDPCS))
5820		skb->ip_summed = CHECKSUM_UNNECESSARY;
5821
5822	dev_dbg(ring->dev, "cksum success: bits %08X\n",
5823		le32_to_cpu(rx_desc->wb.upper.status_error));
5824}
5825
5826static inline void igb_rx_hash(struct igb_ring *ring,
5827			       union e1000_adv_rx_desc *rx_desc,
5828			       struct sk_buff *skb)
5829{
5830	if (ring->netdev->features & NETIF_F_RXHASH)
5831		skb->rxhash = le32_to_cpu(rx_desc->wb.lower.hi_dword.rss);
5832}
5833
5834static void igb_rx_hwtstamp(struct igb_q_vector *q_vector,
5835			    union e1000_adv_rx_desc *rx_desc,
5836			    struct sk_buff *skb)
5837{
5838	struct igb_adapter *adapter = q_vector->adapter;
5839	struct e1000_hw *hw = &adapter->hw;
5840	u64 regval;
5841
5842	if (!igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP |
5843				       E1000_RXDADV_STAT_TS))
5844		return;
5845
5846	/*
5847	 * If this bit is set, then the RX registers contain the time stamp. No
5848	 * other packet will be time stamped until we read these registers, so
5849	 * read the registers to make them available again. Because only one
5850	 * packet can be time stamped at a time, we know that the register
5851	 * values must belong to this one here and therefore we don't need to
5852	 * compare any of the additional attributes stored for it.
5853	 *
5854	 * If nothing went wrong, then it should have a shared tx_flags that we
5855	 * can turn into a skb_shared_hwtstamps.
5856	 */
5857	if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) {
5858		u32 *stamp = (u32 *)skb->data;
5859		regval = le32_to_cpu(*(stamp + 2));
5860		regval |= (u64)le32_to_cpu(*(stamp + 3)) << 32;
5861		skb_pull(skb, IGB_TS_HDR_LEN);
5862	} else {
5863		if(!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
5864			return;
5865
5866		regval = rd32(E1000_RXSTMPL);
5867		regval |= (u64)rd32(E1000_RXSTMPH) << 32;
5868	}
5869
5870	igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
5871}
5872
5873static void igb_rx_vlan(struct igb_ring *ring,
5874			union e1000_adv_rx_desc *rx_desc,
5875			struct sk_buff *skb)
5876{
5877	if (igb_test_staterr(rx_desc, E1000_RXD_STAT_VP)) {
5878		u16 vid;
5879		if (igb_test_staterr(rx_desc, E1000_RXDEXT_STATERR_LB) &&
5880		    test_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &ring->flags))
5881			vid = be16_to_cpu(rx_desc->wb.upper.vlan);
5882		else
5883			vid = le16_to_cpu(rx_desc->wb.upper.vlan);
5884
5885		__vlan_hwaccel_put_tag(skb, vid);
5886	}
5887}
5888
5889static inline u16 igb_get_hlen(union e1000_adv_rx_desc *rx_desc)
5890{
5891	/* HW will not DMA in data larger than the given buffer, even if it
5892	 * parses the (NFS, of course) header to be larger.  In that case, it
5893	 * fills the header buffer and spills the rest into the page.
5894	 */
5895	u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
5896	           E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
5897	if (hlen > IGB_RX_HDR_LEN)
5898		hlen = IGB_RX_HDR_LEN;
5899	return hlen;
5900}
5901
5902static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, int budget)
5903{
5904	struct igb_ring *rx_ring = q_vector->rx.ring;
5905	union e1000_adv_rx_desc *rx_desc;
5906	const int current_node = numa_node_id();
5907	unsigned int total_bytes = 0, total_packets = 0;
5908	u16 cleaned_count = igb_desc_unused(rx_ring);
5909	u16 i = rx_ring->next_to_clean;
5910
5911	rx_desc = IGB_RX_DESC(rx_ring, i);
5912
5913	while (igb_test_staterr(rx_desc, E1000_RXD_STAT_DD)) {
5914		struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
5915		struct sk_buff *skb = buffer_info->skb;
5916		union e1000_adv_rx_desc *next_rxd;
5917
5918		buffer_info->skb = NULL;
5919		prefetch(skb->data);
5920
5921		i++;
5922		if (i == rx_ring->count)
5923			i = 0;
5924
5925		next_rxd = IGB_RX_DESC(rx_ring, i);
5926		prefetch(next_rxd);
5927
5928		/*
5929		 * This memory barrier is needed to keep us from reading
5930		 * any other fields out of the rx_desc until we know the
5931		 * RXD_STAT_DD bit is set
5932		 */
5933		rmb();
5934
5935		if (!skb_is_nonlinear(skb)) {
5936			__skb_put(skb, igb_get_hlen(rx_desc));
5937			dma_unmap_single(rx_ring->dev, buffer_info->dma,
5938					 IGB_RX_HDR_LEN,
5939					 DMA_FROM_DEVICE);
5940			buffer_info->dma = 0;
5941		}
5942
5943		if (rx_desc->wb.upper.length) {
5944			u16 length = le16_to_cpu(rx_desc->wb.upper.length);
5945
5946			skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
5947						buffer_info->page,
5948						buffer_info->page_offset,
5949						length);
5950
5951			skb->len += length;
5952			skb->data_len += length;
5953			skb->truesize += PAGE_SIZE / 2;
5954
5955			if ((page_count(buffer_info->page) != 1) ||
5956			    (page_to_nid(buffer_info->page) != current_node))
5957				buffer_info->page = NULL;
5958			else
5959				get_page(buffer_info->page);
5960
5961			dma_unmap_page(rx_ring->dev, buffer_info->page_dma,
5962				       PAGE_SIZE / 2, DMA_FROM_DEVICE);
5963			buffer_info->page_dma = 0;
5964		}
5965
5966		if (!igb_test_staterr(rx_desc, E1000_RXD_STAT_EOP)) {
5967			struct igb_rx_buffer *next_buffer;
5968			next_buffer = &rx_ring->rx_buffer_info[i];
5969			buffer_info->skb = next_buffer->skb;
5970			buffer_info->dma = next_buffer->dma;
5971			next_buffer->skb = skb;
5972			next_buffer->dma = 0;
5973			goto next_desc;
5974		}
5975
5976		if (igb_test_staterr(rx_desc,
5977				     E1000_RXDEXT_ERR_FRAME_ERR_MASK)) {
5978			dev_kfree_skb_any(skb);
5979			goto next_desc;
5980		}
5981
5982		igb_rx_hwtstamp(q_vector, rx_desc, skb);
5983		igb_rx_hash(rx_ring, rx_desc, skb);
5984		igb_rx_checksum(rx_ring, rx_desc, skb);
5985		igb_rx_vlan(rx_ring, rx_desc, skb);
5986
5987		total_bytes += skb->len;
5988		total_packets++;
5989
5990		skb->protocol = eth_type_trans(skb, rx_ring->netdev);
5991
5992		napi_gro_receive(&q_vector->napi, skb);
5993
5994		budget--;
5995next_desc:
5996		if (!budget)
5997			break;
5998
5999		cleaned_count++;
6000		/* return some buffers to hardware, one at a time is too slow */
6001		if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
6002			igb_alloc_rx_buffers(rx_ring, cleaned_count);
6003			cleaned_count = 0;
6004		}
6005
6006		/* use prefetched values */
6007		rx_desc = next_rxd;
6008	}
6009
6010	rx_ring->next_to_clean = i;
6011	u64_stats_update_begin(&rx_ring->rx_syncp);
6012	rx_ring->rx_stats.packets += total_packets;
6013	rx_ring->rx_stats.bytes += total_bytes;
6014	u64_stats_update_end(&rx_ring->rx_syncp);
6015	q_vector->rx.total_packets += total_packets;
6016	q_vector->rx.total_bytes += total_bytes;
6017
6018	if (cleaned_count)
6019		igb_alloc_rx_buffers(rx_ring, cleaned_count);
6020
6021	return !!budget;
6022}
6023
6024static bool igb_alloc_mapped_skb(struct igb_ring *rx_ring,
6025				 struct igb_rx_buffer *bi)
6026{
6027	struct sk_buff *skb = bi->skb;
6028	dma_addr_t dma = bi->dma;
6029
6030	if (dma)
6031		return true;
6032
6033	if (likely(!skb)) {
6034		skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
6035						IGB_RX_HDR_LEN);
6036		bi->skb = skb;
6037		if (!skb) {
6038			rx_ring->rx_stats.alloc_failed++;
6039			return false;
6040		}
6041
6042		/* initialize skb for ring */
6043		skb_record_rx_queue(skb, rx_ring->queue_index);
6044	}
6045
6046	dma = dma_map_single(rx_ring->dev, skb->data,
6047			     IGB_RX_HDR_LEN, DMA_FROM_DEVICE);
6048
6049	if (dma_mapping_error(rx_ring->dev, dma)) {
6050		rx_ring->rx_stats.alloc_failed++;
6051		return false;
6052	}
6053
6054	bi->dma = dma;
6055	return true;
6056}
6057
6058static bool igb_alloc_mapped_page(struct igb_ring *rx_ring,
6059				  struct igb_rx_buffer *bi)
6060{
6061	struct page *page = bi->page;
6062	dma_addr_t page_dma = bi->page_dma;
6063	unsigned int page_offset = bi->page_offset ^ (PAGE_SIZE / 2);
6064
6065	if (page_dma)
6066		return true;
6067
6068	if (!page) {
6069		page = netdev_alloc_page(rx_ring->netdev);
6070		bi->page = page;
6071		if (unlikely(!page)) {
6072			rx_ring->rx_stats.alloc_failed++;
6073			return false;
6074		}
6075	}
6076
6077	page_dma = dma_map_page(rx_ring->dev, page,
6078				page_offset, PAGE_SIZE / 2,
6079				DMA_FROM_DEVICE);
6080
6081	if (dma_mapping_error(rx_ring->dev, page_dma)) {
6082		rx_ring->rx_stats.alloc_failed++;
6083		return false;
6084	}
6085
6086	bi->page_dma = page_dma;
6087	bi->page_offset = page_offset;
6088	return true;
6089}
6090
6091/**
6092 * igb_alloc_rx_buffers - Replace used receive buffers; packet split
6093 * @adapter: address of board private structure
6094 **/
6095void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count)
6096{
6097	union e1000_adv_rx_desc *rx_desc;
6098	struct igb_rx_buffer *bi;
6099	u16 i = rx_ring->next_to_use;
6100
6101	rx_desc = IGB_RX_DESC(rx_ring, i);
6102	bi = &rx_ring->rx_buffer_info[i];
6103	i -= rx_ring->count;
6104
6105	while (cleaned_count--) {
6106		if (!igb_alloc_mapped_skb(rx_ring, bi))
6107			break;
6108
6109		/* Refresh the desc even if buffer_addrs didn't change
6110		 * because each write-back erases this info. */
6111		rx_desc->read.hdr_addr = cpu_to_le64(bi->dma);
6112
6113		if (!igb_alloc_mapped_page(rx_ring, bi))
6114			break;
6115
6116		rx_desc->read.pkt_addr = cpu_to_le64(bi->page_dma);
6117
6118		rx_desc++;
6119		bi++;
6120		i++;
6121		if (unlikely(!i)) {
6122			rx_desc = IGB_RX_DESC(rx_ring, 0);
6123			bi = rx_ring->rx_buffer_info;
6124			i -= rx_ring->count;
6125		}
6126
6127		/* clear the hdr_addr for the next_to_use descriptor */
6128		rx_desc->read.hdr_addr = 0;
6129	}
6130
6131	i += rx_ring->count;
6132
6133	if (rx_ring->next_to_use != i) {
6134		rx_ring->next_to_use = i;
6135
6136		/* Force memory writes to complete before letting h/w
6137		 * know there are new descriptors to fetch.  (Only
6138		 * applicable for weak-ordered memory model archs,
6139		 * such as IA-64). */
6140		wmb();
6141		writel(i, rx_ring->tail);
6142	}
6143}
6144
6145/**
6146 * igb_mii_ioctl -
6147 * @netdev:
6148 * @ifreq:
6149 * @cmd:
6150 **/
6151static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6152{
6153	struct igb_adapter *adapter = netdev_priv(netdev);
6154	struct mii_ioctl_data *data = if_mii(ifr);
6155
6156	if (adapter->hw.phy.media_type != e1000_media_type_copper)
6157		return -EOPNOTSUPP;
6158
6159	switch (cmd) {
6160	case SIOCGMIIPHY:
6161		data->phy_id = adapter->hw.phy.addr;
6162		break;
6163	case SIOCGMIIREG:
6164		if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
6165		                     &data->val_out))
6166			return -EIO;
6167		break;
6168	case SIOCSMIIREG:
6169	default:
6170		return -EOPNOTSUPP;
6171	}
6172	return 0;
6173}
6174
6175/**
6176 * igb_hwtstamp_ioctl - control hardware time stamping
6177 * @netdev:
6178 * @ifreq:
6179 * @cmd:
6180 *
6181 * Outgoing time stamping can be enabled and disabled. Play nice and
6182 * disable it when requested, although it shouldn't case any overhead
6183 * when no packet needs it. At most one packet in the queue may be
6184 * marked for time stamping, otherwise it would be impossible to tell
6185 * for sure to which packet the hardware time stamp belongs.
6186 *
6187 * Incoming time stamping has to be configured via the hardware
6188 * filters. Not all combinations are supported, in particular event
6189 * type has to be specified. Matching the kind of event packet is
6190 * not supported, with the exception of "all V2 events regardless of
6191 * level 2 or 4".
6192 *
6193 **/
6194static int igb_hwtstamp_ioctl(struct net_device *netdev,
6195			      struct ifreq *ifr, int cmd)
6196{
6197	struct igb_adapter *adapter = netdev_priv(netdev);
6198	struct e1000_hw *hw = &adapter->hw;
6199	struct hwtstamp_config config;
6200	u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
6201	u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6202	u32 tsync_rx_cfg = 0;
6203	bool is_l4 = false;
6204	bool is_l2 = false;
6205	u32 regval;
6206
6207	if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
6208		return -EFAULT;
6209
6210	/* reserved for future extensions */
6211	if (config.flags)
6212		return -EINVAL;
6213
6214	switch (config.tx_type) {
6215	case HWTSTAMP_TX_OFF:
6216		tsync_tx_ctl = 0;
6217	case HWTSTAMP_TX_ON:
6218		break;
6219	default:
6220		return -ERANGE;
6221	}
6222
6223	switch (config.rx_filter) {
6224	case HWTSTAMP_FILTER_NONE:
6225		tsync_rx_ctl = 0;
6226		break;
6227	case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
6228	case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
6229	case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
6230	case HWTSTAMP_FILTER_ALL:
6231		/*
6232		 * register TSYNCRXCFG must be set, therefore it is not
6233		 * possible to time stamp both Sync and Delay_Req messages
6234		 * => fall back to time stamping all packets
6235		 */
6236		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6237		config.rx_filter = HWTSTAMP_FILTER_ALL;
6238		break;
6239	case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
6240		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6241		tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
6242		is_l4 = true;
6243		break;
6244	case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
6245		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6246		tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
6247		is_l4 = true;
6248		break;
6249	case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
6250	case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
6251		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6252		tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
6253		is_l2 = true;
6254		is_l4 = true;
6255		config.rx_filter = HWTSTAMP_FILTER_SOME;
6256		break;
6257	case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
6258	case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
6259		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6260		tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
6261		is_l2 = true;
6262		is_l4 = true;
6263		config.rx_filter = HWTSTAMP_FILTER_SOME;
6264		break;
6265	case HWTSTAMP_FILTER_PTP_V2_EVENT:
6266	case HWTSTAMP_FILTER_PTP_V2_SYNC:
6267	case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
6268		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
6269		config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
6270		is_l2 = true;
6271		is_l4 = true;
6272		break;
6273	default:
6274		return -ERANGE;
6275	}
6276
6277	if (hw->mac.type == e1000_82575) {
6278		if (tsync_rx_ctl | tsync_tx_ctl)
6279			return -EINVAL;
6280		return 0;
6281	}
6282
6283	/*
6284	 * Per-packet timestamping only works if all packets are
6285	 * timestamped, so enable timestamping in all packets as
6286	 * long as one rx filter was configured.
6287	 */
6288	if ((hw->mac.type >= e1000_82580) && tsync_rx_ctl) {
6289		tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6290		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6291	}
6292
6293	/* enable/disable TX */
6294	regval = rd32(E1000_TSYNCTXCTL);
6295	regval &= ~E1000_TSYNCTXCTL_ENABLED;
6296	regval |= tsync_tx_ctl;
6297	wr32(E1000_TSYNCTXCTL, regval);
6298
6299	/* enable/disable RX */
6300	regval = rd32(E1000_TSYNCRXCTL);
6301	regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
6302	regval |= tsync_rx_ctl;
6303	wr32(E1000_TSYNCRXCTL, regval);
6304
6305	/* define which PTP packets are time stamped */
6306	wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
6307
6308	/* define ethertype filter for timestamped packets */
6309	if (is_l2)
6310		wr32(E1000_ETQF(3),
6311		                (E1000_ETQF_FILTER_ENABLE | /* enable filter */
6312		                 E1000_ETQF_1588 | /* enable timestamping */
6313		                 ETH_P_1588));     /* 1588 eth protocol type */
6314	else
6315		wr32(E1000_ETQF(3), 0);
6316
6317#define PTP_PORT 319
6318	/* L4 Queue Filter[3]: filter by destination port and protocol */
6319	if (is_l4) {
6320		u32 ftqf = (IPPROTO_UDP /* UDP */
6321			| E1000_FTQF_VF_BP /* VF not compared */
6322			| E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
6323			| E1000_FTQF_MASK); /* mask all inputs */
6324		ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
6325
6326		wr32(E1000_IMIR(3), htons(PTP_PORT));
6327		wr32(E1000_IMIREXT(3),
6328		     (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
6329		if (hw->mac.type == e1000_82576) {
6330			/* enable source port check */
6331			wr32(E1000_SPQF(3), htons(PTP_PORT));
6332			ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
6333		}
6334		wr32(E1000_FTQF(3), ftqf);
6335	} else {
6336		wr32(E1000_FTQF(3), E1000_FTQF_MASK);
6337	}
6338	wrfl();
6339
6340	adapter->hwtstamp_config = config;
6341
6342	/* clear TX/RX time stamp registers, just to be sure */
6343	regval = rd32(E1000_TXSTMPH);
6344	regval = rd32(E1000_RXSTMPH);
6345
6346	return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
6347		-EFAULT : 0;
6348}
6349
6350/**
6351 * igb_ioctl -
6352 * @netdev:
6353 * @ifreq:
6354 * @cmd:
6355 **/
6356static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6357{
6358	switch (cmd) {
6359	case SIOCGMIIPHY:
6360	case SIOCGMIIREG:
6361	case SIOCSMIIREG:
6362		return igb_mii_ioctl(netdev, ifr, cmd);
6363	case SIOCSHWTSTAMP:
6364		return igb_hwtstamp_ioctl(netdev, ifr, cmd);
6365	default:
6366		return -EOPNOTSUPP;
6367	}
6368}
6369
6370s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6371{
6372	struct igb_adapter *adapter = hw->back;
6373	u16 cap_offset;
6374
6375	cap_offset = adapter->pdev->pcie_cap;
6376	if (!cap_offset)
6377		return -E1000_ERR_CONFIG;
6378
6379	pci_read_config_word(adapter->pdev, cap_offset + reg, value);
6380
6381	return 0;
6382}
6383
6384s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6385{
6386	struct igb_adapter *adapter = hw->back;
6387	u16 cap_offset;
6388
6389	cap_offset = adapter->pdev->pcie_cap;
6390	if (!cap_offset)
6391		return -E1000_ERR_CONFIG;
6392
6393	pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
6394
6395	return 0;
6396}
6397
6398static void igb_vlan_mode(struct net_device *netdev, u32 features)
6399{
6400	struct igb_adapter *adapter = netdev_priv(netdev);
6401	struct e1000_hw *hw = &adapter->hw;
6402	u32 ctrl, rctl;
6403	bool enable = !!(features & NETIF_F_HW_VLAN_RX);
6404
6405	if (enable) {
6406		/* enable VLAN tag insert/strip */
6407		ctrl = rd32(E1000_CTRL);
6408		ctrl |= E1000_CTRL_VME;
6409		wr32(E1000_CTRL, ctrl);
6410
6411		/* Disable CFI check */
6412		rctl = rd32(E1000_RCTL);
6413		rctl &= ~E1000_RCTL_CFIEN;
6414		wr32(E1000_RCTL, rctl);
6415	} else {
6416		/* disable VLAN tag insert/strip */
6417		ctrl = rd32(E1000_CTRL);
6418		ctrl &= ~E1000_CTRL_VME;
6419		wr32(E1000_CTRL, ctrl);
6420	}
6421
6422	igb_rlpml_set(adapter);
6423}
6424
6425static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
6426{
6427	struct igb_adapter *adapter = netdev_priv(netdev);
6428	struct e1000_hw *hw = &adapter->hw;
6429	int pf_id = adapter->vfs_allocated_count;
6430
6431	/* attempt to add filter to vlvf array */
6432	igb_vlvf_set(adapter, vid, true, pf_id);
6433
6434	/* add the filter since PF can receive vlans w/o entry in vlvf */
6435	igb_vfta_set(hw, vid, true);
6436
6437	set_bit(vid, adapter->active_vlans);
6438}
6439
6440static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
6441{
6442	struct igb_adapter *adapter = netdev_priv(netdev);
6443	struct e1000_hw *hw = &adapter->hw;
6444	int pf_id = adapter->vfs_allocated_count;
6445	s32 err;
6446
6447	/* remove vlan from VLVF table array */
6448	err = igb_vlvf_set(adapter, vid, false, pf_id);
6449
6450	/* if vid was not present in VLVF just remove it from table */
6451	if (err)
6452		igb_vfta_set(hw, vid, false);
6453
6454	clear_bit(vid, adapter->active_vlans);
6455}
6456
6457static void igb_restore_vlan(struct igb_adapter *adapter)
6458{
6459	u16 vid;
6460
6461	igb_vlan_mode(adapter->netdev, adapter->netdev->features);
6462
6463	for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID)
6464		igb_vlan_rx_add_vid(adapter->netdev, vid);
6465}
6466
6467int igb_set_spd_dplx(struct igb_adapter *adapter, u32 spd, u8 dplx)
6468{
6469	struct pci_dev *pdev = adapter->pdev;
6470	struct e1000_mac_info *mac = &adapter->hw.mac;
6471
6472	mac->autoneg = 0;
6473
6474	/* Make sure dplx is at most 1 bit and lsb of speed is not set
6475	 * for the switch() below to work */
6476	if ((spd & 1) || (dplx & ~1))
6477		goto err_inval;
6478
6479	/* Fiber NIC's only allow 1000 Gbps Full duplex */
6480	if ((adapter->hw.phy.media_type == e1000_media_type_internal_serdes) &&
6481	    spd != SPEED_1000 &&
6482	    dplx != DUPLEX_FULL)
6483		goto err_inval;
6484
6485	switch (spd + dplx) {
6486	case SPEED_10 + DUPLEX_HALF:
6487		mac->forced_speed_duplex = ADVERTISE_10_HALF;
6488		break;
6489	case SPEED_10 + DUPLEX_FULL:
6490		mac->forced_speed_duplex = ADVERTISE_10_FULL;
6491		break;
6492	case SPEED_100 + DUPLEX_HALF:
6493		mac->forced_speed_duplex = ADVERTISE_100_HALF;
6494		break;
6495	case SPEED_100 + DUPLEX_FULL:
6496		mac->forced_speed_duplex = ADVERTISE_100_FULL;
6497		break;
6498	case SPEED_1000 + DUPLEX_FULL:
6499		mac->autoneg = 1;
6500		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
6501		break;
6502	case SPEED_1000 + DUPLEX_HALF: /* not supported */
6503	default:
6504		goto err_inval;
6505	}
6506	return 0;
6507
6508err_inval:
6509	dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
6510	return -EINVAL;
6511}
6512
6513static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake)
6514{
6515	struct net_device *netdev = pci_get_drvdata(pdev);
6516	struct igb_adapter *adapter = netdev_priv(netdev);
6517	struct e1000_hw *hw = &adapter->hw;
6518	u32 ctrl, rctl, status;
6519	u32 wufc = adapter->wol;
6520#ifdef CONFIG_PM
6521	int retval = 0;
6522#endif
6523
6524	netif_device_detach(netdev);
6525
6526	if (netif_running(netdev))
6527		igb_close(netdev);
6528
6529	igb_clear_interrupt_scheme(adapter);
6530
6531#ifdef CONFIG_PM
6532	retval = pci_save_state(pdev);
6533	if (retval)
6534		return retval;
6535#endif
6536
6537	status = rd32(E1000_STATUS);
6538	if (status & E1000_STATUS_LU)
6539		wufc &= ~E1000_WUFC_LNKC;
6540
6541	if (wufc) {
6542		igb_setup_rctl(adapter);
6543		igb_set_rx_mode(netdev);
6544
6545		/* turn on all-multi mode if wake on multicast is enabled */
6546		if (wufc & E1000_WUFC_MC) {
6547			rctl = rd32(E1000_RCTL);
6548			rctl |= E1000_RCTL_MPE;
6549			wr32(E1000_RCTL, rctl);
6550		}
6551
6552		ctrl = rd32(E1000_CTRL);
6553		/* advertise wake from D3Cold */
6554		#define E1000_CTRL_ADVD3WUC 0x00100000
6555		/* phy power management enable */
6556		#define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
6557		ctrl |= E1000_CTRL_ADVD3WUC;
6558		wr32(E1000_CTRL, ctrl);
6559
6560		/* Allow time for pending master requests to run */
6561		igb_disable_pcie_master(hw);
6562
6563		wr32(E1000_WUC, E1000_WUC_PME_EN);
6564		wr32(E1000_WUFC, wufc);
6565	} else {
6566		wr32(E1000_WUC, 0);
6567		wr32(E1000_WUFC, 0);
6568	}
6569
6570	*enable_wake = wufc || adapter->en_mng_pt;
6571	if (!*enable_wake)
6572		igb_power_down_link(adapter);
6573	else
6574		igb_power_up_link(adapter);
6575
6576	/* Release control of h/w to f/w.  If f/w is AMT enabled, this
6577	 * would have already happened in close and is redundant. */
6578	igb_release_hw_control(adapter);
6579
6580	pci_disable_device(pdev);
6581
6582	return 0;
6583}
6584
6585#ifdef CONFIG_PM
6586static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
6587{
6588	int retval;
6589	bool wake;
6590
6591	retval = __igb_shutdown(pdev, &wake);
6592	if (retval)
6593		return retval;
6594
6595	if (wake) {
6596		pci_prepare_to_sleep(pdev);
6597	} else {
6598		pci_wake_from_d3(pdev, false);
6599		pci_set_power_state(pdev, PCI_D3hot);
6600	}
6601
6602	return 0;
6603}
6604
6605static int igb_resume(struct pci_dev *pdev)
6606{
6607	struct net_device *netdev = pci_get_drvdata(pdev);
6608	struct igb_adapter *adapter = netdev_priv(netdev);
6609	struct e1000_hw *hw = &adapter->hw;
6610	u32 err;
6611
6612	pci_set_power_state(pdev, PCI_D0);
6613	pci_restore_state(pdev);
6614	pci_save_state(pdev);
6615
6616	err = pci_enable_device_mem(pdev);
6617	if (err) {
6618		dev_err(&pdev->dev,
6619			"igb: Cannot enable PCI device from suspend\n");
6620		return err;
6621	}
6622	pci_set_master(pdev);
6623
6624	pci_enable_wake(pdev, PCI_D3hot, 0);
6625	pci_enable_wake(pdev, PCI_D3cold, 0);
6626
6627	if (igb_init_interrupt_scheme(adapter)) {
6628		dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
6629		return -ENOMEM;
6630	}
6631
6632	igb_reset(adapter);
6633
6634	/* let the f/w know that the h/w is now under the control of the
6635	 * driver. */
6636	igb_get_hw_control(adapter);
6637
6638	wr32(E1000_WUS, ~0);
6639
6640	if (netif_running(netdev)) {
6641		err = igb_open(netdev);
6642		if (err)
6643			return err;
6644	}
6645
6646	netif_device_attach(netdev);
6647
6648	return 0;
6649}
6650#endif
6651
6652static void igb_shutdown(struct pci_dev *pdev)
6653{
6654	bool wake;
6655
6656	__igb_shutdown(pdev, &wake);
6657
6658	if (system_state == SYSTEM_POWER_OFF) {
6659		pci_wake_from_d3(pdev, wake);
6660		pci_set_power_state(pdev, PCI_D3hot);
6661	}
6662}
6663
6664#ifdef CONFIG_NET_POLL_CONTROLLER
6665/*
6666 * Polling 'interrupt' - used by things like netconsole to send skbs
6667 * without having to re-enable interrupts. It's not called while
6668 * the interrupt routine is executing.
6669 */
6670static void igb_netpoll(struct net_device *netdev)
6671{
6672	struct igb_adapter *adapter = netdev_priv(netdev);
6673	struct e1000_hw *hw = &adapter->hw;
6674	struct igb_q_vector *q_vector;
6675	int i;
6676
6677	for (i = 0; i < adapter->num_q_vectors; i++) {
6678		q_vector = adapter->q_vector[i];
6679		if (adapter->msix_entries)
6680			wr32(E1000_EIMC, q_vector->eims_value);
6681		else
6682			igb_irq_disable(adapter);
6683		napi_schedule(&q_vector->napi);
6684	}
6685}
6686#endif /* CONFIG_NET_POLL_CONTROLLER */
6687
6688/**
6689 * igb_io_error_detected - called when PCI error is detected
6690 * @pdev: Pointer to PCI device
6691 * @state: The current pci connection state
6692 *
6693 * This function is called after a PCI bus error affecting
6694 * this device has been detected.
6695 */
6696static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
6697					      pci_channel_state_t state)
6698{
6699	struct net_device *netdev = pci_get_drvdata(pdev);
6700	struct igb_adapter *adapter = netdev_priv(netdev);
6701
6702	netif_device_detach(netdev);
6703
6704	if (state == pci_channel_io_perm_failure)
6705		return PCI_ERS_RESULT_DISCONNECT;
6706
6707	if (netif_running(netdev))
6708		igb_down(adapter);
6709	pci_disable_device(pdev);
6710
6711	/* Request a slot slot reset. */
6712	return PCI_ERS_RESULT_NEED_RESET;
6713}
6714
6715/**
6716 * igb_io_slot_reset - called after the pci bus has been reset.
6717 * @pdev: Pointer to PCI device
6718 *
6719 * Restart the card from scratch, as if from a cold-boot. Implementation
6720 * resembles the first-half of the igb_resume routine.
6721 */
6722static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
6723{
6724	struct net_device *netdev = pci_get_drvdata(pdev);
6725	struct igb_adapter *adapter = netdev_priv(netdev);
6726	struct e1000_hw *hw = &adapter->hw;
6727	pci_ers_result_t result;
6728	int err;
6729
6730	if (pci_enable_device_mem(pdev)) {
6731		dev_err(&pdev->dev,
6732			"Cannot re-enable PCI device after reset.\n");
6733		result = PCI_ERS_RESULT_DISCONNECT;
6734	} else {
6735		pci_set_master(pdev);
6736		pci_restore_state(pdev);
6737		pci_save_state(pdev);
6738
6739		pci_enable_wake(pdev, PCI_D3hot, 0);
6740		pci_enable_wake(pdev, PCI_D3cold, 0);
6741
6742		igb_reset(adapter);
6743		wr32(E1000_WUS, ~0);
6744		result = PCI_ERS_RESULT_RECOVERED;
6745	}
6746
6747	err = pci_cleanup_aer_uncorrect_error_status(pdev);
6748	if (err) {
6749		dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
6750		        "failed 0x%0x\n", err);
6751		/* non-fatal, continue */
6752	}
6753
6754	return result;
6755}
6756
6757/**
6758 * igb_io_resume - called when traffic can start flowing again.
6759 * @pdev: Pointer to PCI device
6760 *
6761 * This callback is called when the error recovery driver tells us that
6762 * its OK to resume normal operation. Implementation resembles the
6763 * second-half of the igb_resume routine.
6764 */
6765static void igb_io_resume(struct pci_dev *pdev)
6766{
6767	struct net_device *netdev = pci_get_drvdata(pdev);
6768	struct igb_adapter *adapter = netdev_priv(netdev);
6769
6770	if (netif_running(netdev)) {
6771		if (igb_up(adapter)) {
6772			dev_err(&pdev->dev, "igb_up failed after reset\n");
6773			return;
6774		}
6775	}
6776
6777	netif_device_attach(netdev);
6778
6779	/* let the f/w know that the h/w is now under the control of the
6780	 * driver. */
6781	igb_get_hw_control(adapter);
6782}
6783
6784static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
6785                             u8 qsel)
6786{
6787	u32 rar_low, rar_high;
6788	struct e1000_hw *hw = &adapter->hw;
6789
6790	/* HW expects these in little endian so we reverse the byte order
6791	 * from network order (big endian) to little endian
6792	 */
6793	rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
6794	          ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
6795	rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
6796
6797	/* Indicate to hardware the Address is Valid. */
6798	rar_high |= E1000_RAH_AV;
6799
6800	if (hw->mac.type == e1000_82575)
6801		rar_high |= E1000_RAH_POOL_1 * qsel;
6802	else
6803		rar_high |= E1000_RAH_POOL_1 << qsel;
6804
6805	wr32(E1000_RAL(index), rar_low);
6806	wrfl();
6807	wr32(E1000_RAH(index), rar_high);
6808	wrfl();
6809}
6810
6811static int igb_set_vf_mac(struct igb_adapter *adapter,
6812                          int vf, unsigned char *mac_addr)
6813{
6814	struct e1000_hw *hw = &adapter->hw;
6815	/* VF MAC addresses start at end of receive addresses and moves
6816	 * torwards the first, as a result a collision should not be possible */
6817	int rar_entry = hw->mac.rar_entry_count - (vf + 1);
6818
6819	memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
6820
6821	igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
6822
6823	return 0;
6824}
6825
6826static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
6827{
6828	struct igb_adapter *adapter = netdev_priv(netdev);
6829	if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
6830		return -EINVAL;
6831	adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
6832	dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
6833	dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
6834				      " change effective.");
6835	if (test_bit(__IGB_DOWN, &adapter->state)) {
6836		dev_warn(&adapter->pdev->dev, "The VF MAC address has been set,"
6837			 " but the PF device is not up.\n");
6838		dev_warn(&adapter->pdev->dev, "Bring the PF device up before"
6839			 " attempting to use the VF device.\n");
6840	}
6841	return igb_set_vf_mac(adapter, vf, mac);
6842}
6843
6844static int igb_link_mbps(int internal_link_speed)
6845{
6846	switch (internal_link_speed) {
6847	case SPEED_100:
6848		return 100;
6849	case SPEED_1000:
6850		return 1000;
6851	default:
6852		return 0;
6853	}
6854}
6855
6856static void igb_set_vf_rate_limit(struct e1000_hw *hw, int vf, int tx_rate,
6857				  int link_speed)
6858{
6859	int rf_dec, rf_int;
6860	u32 bcnrc_val;
6861
6862	if (tx_rate != 0) {
6863		/* Calculate the rate factor values to set */
6864		rf_int = link_speed / tx_rate;
6865		rf_dec = (link_speed - (rf_int * tx_rate));
6866		rf_dec = (rf_dec * (1<<E1000_RTTBCNRC_RF_INT_SHIFT)) / tx_rate;
6867
6868		bcnrc_val = E1000_RTTBCNRC_RS_ENA;
6869		bcnrc_val |= ((rf_int<<E1000_RTTBCNRC_RF_INT_SHIFT) &
6870		               E1000_RTTBCNRC_RF_INT_MASK);
6871		bcnrc_val |= (rf_dec & E1000_RTTBCNRC_RF_DEC_MASK);
6872	} else {
6873		bcnrc_val = 0;
6874	}
6875
6876	wr32(E1000_RTTDQSEL, vf); /* vf X uses queue X */
6877	wr32(E1000_RTTBCNRC, bcnrc_val);
6878}
6879
6880static void igb_check_vf_rate_limit(struct igb_adapter *adapter)
6881{
6882	int actual_link_speed, i;
6883	bool reset_rate = false;
6884
6885	/* VF TX rate limit was not set or not supported */
6886	if ((adapter->vf_rate_link_speed == 0) ||
6887	    (adapter->hw.mac.type != e1000_82576))
6888		return;
6889
6890	actual_link_speed = igb_link_mbps(adapter->link_speed);
6891	if (actual_link_speed != adapter->vf_rate_link_speed) {
6892		reset_rate = true;
6893		adapter->vf_rate_link_speed = 0;
6894		dev_info(&adapter->pdev->dev,
6895		         "Link speed has been changed. VF Transmit "
6896		         "rate is disabled\n");
6897	}
6898
6899	for (i = 0; i < adapter->vfs_allocated_count; i++) {
6900		if (reset_rate)
6901			adapter->vf_data[i].tx_rate = 0;
6902
6903		igb_set_vf_rate_limit(&adapter->hw, i,
6904		                      adapter->vf_data[i].tx_rate,
6905		                      actual_link_speed);
6906	}
6907}
6908
6909static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
6910{
6911	struct igb_adapter *adapter = netdev_priv(netdev);
6912	struct e1000_hw *hw = &adapter->hw;
6913	int actual_link_speed;
6914
6915	if (hw->mac.type != e1000_82576)
6916		return -EOPNOTSUPP;
6917
6918	actual_link_speed = igb_link_mbps(adapter->link_speed);
6919	if ((vf >= adapter->vfs_allocated_count) ||
6920	    (!(rd32(E1000_STATUS) & E1000_STATUS_LU)) ||
6921	    (tx_rate < 0) || (tx_rate > actual_link_speed))
6922		return -EINVAL;
6923
6924	adapter->vf_rate_link_speed = actual_link_speed;
6925	adapter->vf_data[vf].tx_rate = (u16)tx_rate;
6926	igb_set_vf_rate_limit(hw, vf, tx_rate, actual_link_speed);
6927
6928	return 0;
6929}
6930
6931static int igb_ndo_get_vf_config(struct net_device *netdev,
6932				 int vf, struct ifla_vf_info *ivi)
6933{
6934	struct igb_adapter *adapter = netdev_priv(netdev);
6935	if (vf >= adapter->vfs_allocated_count)
6936		return -EINVAL;
6937	ivi->vf = vf;
6938	memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
6939	ivi->tx_rate = adapter->vf_data[vf].tx_rate;
6940	ivi->vlan = adapter->vf_data[vf].pf_vlan;
6941	ivi->qos = adapter->vf_data[vf].pf_qos;
6942	return 0;
6943}
6944
6945static void igb_vmm_control(struct igb_adapter *adapter)
6946{
6947	struct e1000_hw *hw = &adapter->hw;
6948	u32 reg;
6949
6950	switch (hw->mac.type) {
6951	case e1000_82575:
6952	default:
6953		/* replication is not supported for 82575 */
6954		return;
6955	case e1000_82576:
6956		/* notify HW that the MAC is adding vlan tags */
6957		reg = rd32(E1000_DTXCTL);
6958		reg |= E1000_DTXCTL_VLAN_ADDED;
6959		wr32(E1000_DTXCTL, reg);
6960	case e1000_82580:
6961		/* enable replication vlan tag stripping */
6962		reg = rd32(E1000_RPLOLR);
6963		reg |= E1000_RPLOLR_STRVLAN;
6964		wr32(E1000_RPLOLR, reg);
6965	case e1000_i350:
6966		/* none of the above registers are supported by i350 */
6967		break;
6968	}
6969
6970	if (adapter->vfs_allocated_count) {
6971		igb_vmdq_set_loopback_pf(hw, true);
6972		igb_vmdq_set_replication_pf(hw, true);
6973		igb_vmdq_set_anti_spoofing_pf(hw, true,
6974						adapter->vfs_allocated_count);
6975	} else {
6976		igb_vmdq_set_loopback_pf(hw, false);
6977		igb_vmdq_set_replication_pf(hw, false);
6978	}
6979}
6980
6981/* igb_main.c */
6982