igb_main.c revision 6d095fa8cb1bb87fe8bf956cdf6211e784b4c9e4
1/*******************************************************************************
2
3  Intel(R) Gigabit Ethernet Linux driver
4  Copyright(c) 2007-2011 Intel Corporation.
5
6  This program is free software; you can redistribute it and/or modify it
7  under the terms and conditions of the GNU General Public License,
8  version 2, as published by the Free Software Foundation.
9
10  This program is distributed in the hope it will be useful, but WITHOUT
11  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13  more details.
14
15  You should have received a copy of the GNU General Public License along with
16  this program; if not, write to the Free Software Foundation, Inc.,
17  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18
19  The full GNU General Public License is included in this distribution in
20  the file called "COPYING".
21
22  Contact Information:
23  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
25
26*******************************************************************************/
27
28#include <linux/module.h>
29#include <linux/types.h>
30#include <linux/init.h>
31#include <linux/bitops.h>
32#include <linux/vmalloc.h>
33#include <linux/pagemap.h>
34#include <linux/netdevice.h>
35#include <linux/ipv6.h>
36#include <linux/slab.h>
37#include <net/checksum.h>
38#include <net/ip6_checksum.h>
39#include <linux/net_tstamp.h>
40#include <linux/mii.h>
41#include <linux/ethtool.h>
42#include <linux/if.h>
43#include <linux/if_vlan.h>
44#include <linux/pci.h>
45#include <linux/pci-aspm.h>
46#include <linux/delay.h>
47#include <linux/interrupt.h>
48#include <linux/ip.h>
49#include <linux/tcp.h>
50#include <linux/sctp.h>
51#include <linux/if_ether.h>
52#include <linux/aer.h>
53#include <linux/prefetch.h>
54#ifdef CONFIG_IGB_DCA
55#include <linux/dca.h>
56#endif
57#include "igb.h"
58
59#define MAJ 3
60#define MIN 0
61#define BUILD 6
62#define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \
63__stringify(BUILD) "-k"
64char igb_driver_name[] = "igb";
65char igb_driver_version[] = DRV_VERSION;
66static const char igb_driver_string[] =
67				"Intel(R) Gigabit Ethernet Network Driver";
68static const char igb_copyright[] = "Copyright (c) 2007-2011 Intel Corporation.";
69
70static const struct e1000_info *igb_info_tbl[] = {
71	[board_82575] = &e1000_82575_info,
72};
73
74static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
75	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER), board_82575 },
76	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER), board_82575 },
77	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES), board_82575 },
78	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII), board_82575 },
79	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
80	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
81	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_QUAD_FIBER), board_82575 },
82	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
83	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
84	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
85	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SGMII), board_82575 },
86	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SERDES), board_82575 },
87	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_BACKPLANE), board_82575 },
88	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SFP), board_82575 },
89	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
90	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
91	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
92	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
93	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
94	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
95	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2), board_82575 },
96	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
97	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
98	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
99	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
100	/* required last entry */
101	{0, }
102};
103
104MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
105
106void igb_reset(struct igb_adapter *);
107static int igb_setup_all_tx_resources(struct igb_adapter *);
108static int igb_setup_all_rx_resources(struct igb_adapter *);
109static void igb_free_all_tx_resources(struct igb_adapter *);
110static void igb_free_all_rx_resources(struct igb_adapter *);
111static void igb_setup_mrqc(struct igb_adapter *);
112static int igb_probe(struct pci_dev *, const struct pci_device_id *);
113static void __devexit igb_remove(struct pci_dev *pdev);
114static void igb_init_hw_timer(struct igb_adapter *adapter);
115static int igb_sw_init(struct igb_adapter *);
116static int igb_open(struct net_device *);
117static int igb_close(struct net_device *);
118static void igb_configure_tx(struct igb_adapter *);
119static void igb_configure_rx(struct igb_adapter *);
120static void igb_clean_all_tx_rings(struct igb_adapter *);
121static void igb_clean_all_rx_rings(struct igb_adapter *);
122static void igb_clean_tx_ring(struct igb_ring *);
123static void igb_clean_rx_ring(struct igb_ring *);
124static void igb_set_rx_mode(struct net_device *);
125static void igb_update_phy_info(unsigned long);
126static void igb_watchdog(unsigned long);
127static void igb_watchdog_task(struct work_struct *);
128static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, struct net_device *);
129static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *dev,
130						 struct rtnl_link_stats64 *stats);
131static int igb_change_mtu(struct net_device *, int);
132static int igb_set_mac(struct net_device *, void *);
133static void igb_set_uta(struct igb_adapter *adapter);
134static irqreturn_t igb_intr(int irq, void *);
135static irqreturn_t igb_intr_msi(int irq, void *);
136static irqreturn_t igb_msix_other(int irq, void *);
137static irqreturn_t igb_msix_ring(int irq, void *);
138#ifdef CONFIG_IGB_DCA
139static void igb_update_dca(struct igb_q_vector *);
140static void igb_setup_dca(struct igb_adapter *);
141#endif /* CONFIG_IGB_DCA */
142static int igb_poll(struct napi_struct *, int);
143static bool igb_clean_tx_irq(struct igb_q_vector *);
144static bool igb_clean_rx_irq(struct igb_q_vector *, int);
145static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
146static void igb_tx_timeout(struct net_device *);
147static void igb_reset_task(struct work_struct *);
148static void igb_vlan_mode(struct net_device *netdev, u32 features);
149static void igb_vlan_rx_add_vid(struct net_device *, u16);
150static void igb_vlan_rx_kill_vid(struct net_device *, u16);
151static void igb_restore_vlan(struct igb_adapter *);
152static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
153static void igb_ping_all_vfs(struct igb_adapter *);
154static void igb_msg_task(struct igb_adapter *);
155static void igb_vmm_control(struct igb_adapter *);
156static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
157static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
158static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
159static int igb_ndo_set_vf_vlan(struct net_device *netdev,
160			       int vf, u16 vlan, u8 qos);
161static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
162static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
163				 struct ifla_vf_info *ivi);
164static void igb_check_vf_rate_limit(struct igb_adapter *);
165
166#ifdef CONFIG_PM
167static int igb_suspend(struct pci_dev *, pm_message_t);
168static int igb_resume(struct pci_dev *);
169#endif
170static void igb_shutdown(struct pci_dev *);
171#ifdef CONFIG_IGB_DCA
172static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
173static struct notifier_block dca_notifier = {
174	.notifier_call	= igb_notify_dca,
175	.next		= NULL,
176	.priority	= 0
177};
178#endif
179#ifdef CONFIG_NET_POLL_CONTROLLER
180/* for netdump / net console */
181static void igb_netpoll(struct net_device *);
182#endif
183#ifdef CONFIG_PCI_IOV
184static unsigned int max_vfs = 0;
185module_param(max_vfs, uint, 0);
186MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
187                 "per physical function");
188#endif /* CONFIG_PCI_IOV */
189
190static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
191		     pci_channel_state_t);
192static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
193static void igb_io_resume(struct pci_dev *);
194
195static struct pci_error_handlers igb_err_handler = {
196	.error_detected = igb_io_error_detected,
197	.slot_reset = igb_io_slot_reset,
198	.resume = igb_io_resume,
199};
200
201
202static struct pci_driver igb_driver = {
203	.name     = igb_driver_name,
204	.id_table = igb_pci_tbl,
205	.probe    = igb_probe,
206	.remove   = __devexit_p(igb_remove),
207#ifdef CONFIG_PM
208	/* Power Management Hooks */
209	.suspend  = igb_suspend,
210	.resume   = igb_resume,
211#endif
212	.shutdown = igb_shutdown,
213	.err_handler = &igb_err_handler
214};
215
216MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
217MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
218MODULE_LICENSE("GPL");
219MODULE_VERSION(DRV_VERSION);
220
221struct igb_reg_info {
222	u32 ofs;
223	char *name;
224};
225
226static const struct igb_reg_info igb_reg_info_tbl[] = {
227
228	/* General Registers */
229	{E1000_CTRL, "CTRL"},
230	{E1000_STATUS, "STATUS"},
231	{E1000_CTRL_EXT, "CTRL_EXT"},
232
233	/* Interrupt Registers */
234	{E1000_ICR, "ICR"},
235
236	/* RX Registers */
237	{E1000_RCTL, "RCTL"},
238	{E1000_RDLEN(0), "RDLEN"},
239	{E1000_RDH(0), "RDH"},
240	{E1000_RDT(0), "RDT"},
241	{E1000_RXDCTL(0), "RXDCTL"},
242	{E1000_RDBAL(0), "RDBAL"},
243	{E1000_RDBAH(0), "RDBAH"},
244
245	/* TX Registers */
246	{E1000_TCTL, "TCTL"},
247	{E1000_TDBAL(0), "TDBAL"},
248	{E1000_TDBAH(0), "TDBAH"},
249	{E1000_TDLEN(0), "TDLEN"},
250	{E1000_TDH(0), "TDH"},
251	{E1000_TDT(0), "TDT"},
252	{E1000_TXDCTL(0), "TXDCTL"},
253	{E1000_TDFH, "TDFH"},
254	{E1000_TDFT, "TDFT"},
255	{E1000_TDFHS, "TDFHS"},
256	{E1000_TDFPC, "TDFPC"},
257
258	/* List Terminator */
259	{}
260};
261
262/*
263 * igb_regdump - register printout routine
264 */
265static void igb_regdump(struct e1000_hw *hw, struct igb_reg_info *reginfo)
266{
267	int n = 0;
268	char rname[16];
269	u32 regs[8];
270
271	switch (reginfo->ofs) {
272	case E1000_RDLEN(0):
273		for (n = 0; n < 4; n++)
274			regs[n] = rd32(E1000_RDLEN(n));
275		break;
276	case E1000_RDH(0):
277		for (n = 0; n < 4; n++)
278			regs[n] = rd32(E1000_RDH(n));
279		break;
280	case E1000_RDT(0):
281		for (n = 0; n < 4; n++)
282			regs[n] = rd32(E1000_RDT(n));
283		break;
284	case E1000_RXDCTL(0):
285		for (n = 0; n < 4; n++)
286			regs[n] = rd32(E1000_RXDCTL(n));
287		break;
288	case E1000_RDBAL(0):
289		for (n = 0; n < 4; n++)
290			regs[n] = rd32(E1000_RDBAL(n));
291		break;
292	case E1000_RDBAH(0):
293		for (n = 0; n < 4; n++)
294			regs[n] = rd32(E1000_RDBAH(n));
295		break;
296	case E1000_TDBAL(0):
297		for (n = 0; n < 4; n++)
298			regs[n] = rd32(E1000_RDBAL(n));
299		break;
300	case E1000_TDBAH(0):
301		for (n = 0; n < 4; n++)
302			regs[n] = rd32(E1000_TDBAH(n));
303		break;
304	case E1000_TDLEN(0):
305		for (n = 0; n < 4; n++)
306			regs[n] = rd32(E1000_TDLEN(n));
307		break;
308	case E1000_TDH(0):
309		for (n = 0; n < 4; n++)
310			regs[n] = rd32(E1000_TDH(n));
311		break;
312	case E1000_TDT(0):
313		for (n = 0; n < 4; n++)
314			regs[n] = rd32(E1000_TDT(n));
315		break;
316	case E1000_TXDCTL(0):
317		for (n = 0; n < 4; n++)
318			regs[n] = rd32(E1000_TXDCTL(n));
319		break;
320	default:
321		printk(KERN_INFO "%-15s %08x\n",
322			reginfo->name, rd32(reginfo->ofs));
323		return;
324	}
325
326	snprintf(rname, 16, "%s%s", reginfo->name, "[0-3]");
327	printk(KERN_INFO "%-15s ", rname);
328	for (n = 0; n < 4; n++)
329		printk(KERN_CONT "%08x ", regs[n]);
330	printk(KERN_CONT "\n");
331}
332
333/*
334 * igb_dump - Print registers, tx-rings and rx-rings
335 */
336static void igb_dump(struct igb_adapter *adapter)
337{
338	struct net_device *netdev = adapter->netdev;
339	struct e1000_hw *hw = &adapter->hw;
340	struct igb_reg_info *reginfo;
341	struct igb_ring *tx_ring;
342	union e1000_adv_tx_desc *tx_desc;
343	struct my_u0 { u64 a; u64 b; } *u0;
344	struct igb_ring *rx_ring;
345	union e1000_adv_rx_desc *rx_desc;
346	u32 staterr;
347	u16 i, n;
348
349	if (!netif_msg_hw(adapter))
350		return;
351
352	/* Print netdevice Info */
353	if (netdev) {
354		dev_info(&adapter->pdev->dev, "Net device Info\n");
355		printk(KERN_INFO "Device Name     state            "
356			"trans_start      last_rx\n");
357		printk(KERN_INFO "%-15s %016lX %016lX %016lX\n",
358		netdev->name,
359		netdev->state,
360		netdev->trans_start,
361		netdev->last_rx);
362	}
363
364	/* Print Registers */
365	dev_info(&adapter->pdev->dev, "Register Dump\n");
366	printk(KERN_INFO " Register Name   Value\n");
367	for (reginfo = (struct igb_reg_info *)igb_reg_info_tbl;
368	     reginfo->name; reginfo++) {
369		igb_regdump(hw, reginfo);
370	}
371
372	/* Print TX Ring Summary */
373	if (!netdev || !netif_running(netdev))
374		goto exit;
375
376	dev_info(&adapter->pdev->dev, "TX Rings Summary\n");
377	printk(KERN_INFO "Queue [NTU] [NTC] [bi(ntc)->dma  ]"
378		" leng ntw timestamp\n");
379	for (n = 0; n < adapter->num_tx_queues; n++) {
380		struct igb_tx_buffer *buffer_info;
381		tx_ring = adapter->tx_ring[n];
382		buffer_info = &tx_ring->tx_buffer_info[tx_ring->next_to_clean];
383		printk(KERN_INFO " %5d %5X %5X %016llX %04X %p %016llX\n",
384			   n, tx_ring->next_to_use, tx_ring->next_to_clean,
385			   (u64)buffer_info->dma,
386			   buffer_info->length,
387			   buffer_info->next_to_watch,
388			   (u64)buffer_info->time_stamp);
389	}
390
391	/* Print TX Rings */
392	if (!netif_msg_tx_done(adapter))
393		goto rx_ring_summary;
394
395	dev_info(&adapter->pdev->dev, "TX Rings Dump\n");
396
397	/* Transmit Descriptor Formats
398	 *
399	 * Advanced Transmit Descriptor
400	 *   +--------------------------------------------------------------+
401	 * 0 |         Buffer Address [63:0]                                |
402	 *   +--------------------------------------------------------------+
403	 * 8 | PAYLEN  | PORTS  |CC|IDX | STA | DCMD  |DTYP|MAC|RSV| DTALEN |
404	 *   +--------------------------------------------------------------+
405	 *   63      46 45    40 39 38 36 35 32 31   24             15       0
406	 */
407
408	for (n = 0; n < adapter->num_tx_queues; n++) {
409		tx_ring = adapter->tx_ring[n];
410		printk(KERN_INFO "------------------------------------\n");
411		printk(KERN_INFO "TX QUEUE INDEX = %d\n", tx_ring->queue_index);
412		printk(KERN_INFO "------------------------------------\n");
413		printk(KERN_INFO "T [desc]     [address 63:0  ] "
414			"[PlPOCIStDDM Ln] [bi->dma       ] "
415			"leng  ntw timestamp        bi->skb\n");
416
417		for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) {
418			struct igb_tx_buffer *buffer_info;
419			tx_desc = IGB_TX_DESC(tx_ring, i);
420			buffer_info = &tx_ring->tx_buffer_info[i];
421			u0 = (struct my_u0 *)tx_desc;
422			printk(KERN_INFO "T [0x%03X]    %016llX %016llX %016llX"
423				" %04X  %p %016llX %p", i,
424				le64_to_cpu(u0->a),
425				le64_to_cpu(u0->b),
426				(u64)buffer_info->dma,
427				buffer_info->length,
428				buffer_info->next_to_watch,
429				(u64)buffer_info->time_stamp,
430				buffer_info->skb);
431			if (i == tx_ring->next_to_use &&
432				i == tx_ring->next_to_clean)
433				printk(KERN_CONT " NTC/U\n");
434			else if (i == tx_ring->next_to_use)
435				printk(KERN_CONT " NTU\n");
436			else if (i == tx_ring->next_to_clean)
437				printk(KERN_CONT " NTC\n");
438			else
439				printk(KERN_CONT "\n");
440
441			if (netif_msg_pktdata(adapter) && buffer_info->dma != 0)
442				print_hex_dump(KERN_INFO, "",
443					DUMP_PREFIX_ADDRESS,
444					16, 1, phys_to_virt(buffer_info->dma),
445					buffer_info->length, true);
446		}
447	}
448
449	/* Print RX Rings Summary */
450rx_ring_summary:
451	dev_info(&adapter->pdev->dev, "RX Rings Summary\n");
452	printk(KERN_INFO "Queue [NTU] [NTC]\n");
453	for (n = 0; n < adapter->num_rx_queues; n++) {
454		rx_ring = adapter->rx_ring[n];
455		printk(KERN_INFO " %5d %5X %5X\n", n,
456			   rx_ring->next_to_use, rx_ring->next_to_clean);
457	}
458
459	/* Print RX Rings */
460	if (!netif_msg_rx_status(adapter))
461		goto exit;
462
463	dev_info(&adapter->pdev->dev, "RX Rings Dump\n");
464
465	/* Advanced Receive Descriptor (Read) Format
466	 *    63                                           1        0
467	 *    +-----------------------------------------------------+
468	 *  0 |       Packet Buffer Address [63:1]           |A0/NSE|
469	 *    +----------------------------------------------+------+
470	 *  8 |       Header Buffer Address [63:1]           |  DD  |
471	 *    +-----------------------------------------------------+
472	 *
473	 *
474	 * Advanced Receive Descriptor (Write-Back) Format
475	 *
476	 *   63       48 47    32 31  30      21 20 17 16   4 3     0
477	 *   +------------------------------------------------------+
478	 * 0 | Packet     IP     |SPH| HDR_LEN   | RSV|Packet|  RSS |
479	 *   | Checksum   Ident  |   |           |    | Type | Type |
480	 *   +------------------------------------------------------+
481	 * 8 | VLAN Tag | Length | Extended Error | Extended Status |
482	 *   +------------------------------------------------------+
483	 *   63       48 47    32 31            20 19               0
484	 */
485
486	for (n = 0; n < adapter->num_rx_queues; n++) {
487		rx_ring = adapter->rx_ring[n];
488		printk(KERN_INFO "------------------------------------\n");
489		printk(KERN_INFO "RX QUEUE INDEX = %d\n", rx_ring->queue_index);
490		printk(KERN_INFO "------------------------------------\n");
491		printk(KERN_INFO "R  [desc]      [ PktBuf     A0] "
492			"[  HeadBuf   DD] [bi->dma       ] [bi->skb] "
493			"<-- Adv Rx Read format\n");
494		printk(KERN_INFO "RWB[desc]      [PcsmIpSHl PtRs] "
495			"[vl er S cks ln] ---------------- [bi->skb] "
496			"<-- Adv Rx Write-Back format\n");
497
498		for (i = 0; i < rx_ring->count; i++) {
499			struct igb_rx_buffer *buffer_info;
500			buffer_info = &rx_ring->rx_buffer_info[i];
501			rx_desc = IGB_RX_DESC(rx_ring, i);
502			u0 = (struct my_u0 *)rx_desc;
503			staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
504			if (staterr & E1000_RXD_STAT_DD) {
505				/* Descriptor Done */
506				printk(KERN_INFO "RWB[0x%03X]     %016llX "
507					"%016llX ---------------- %p", i,
508					le64_to_cpu(u0->a),
509					le64_to_cpu(u0->b),
510					buffer_info->skb);
511			} else {
512				printk(KERN_INFO "R  [0x%03X]     %016llX "
513					"%016llX %016llX %p", i,
514					le64_to_cpu(u0->a),
515					le64_to_cpu(u0->b),
516					(u64)buffer_info->dma,
517					buffer_info->skb);
518
519				if (netif_msg_pktdata(adapter)) {
520					print_hex_dump(KERN_INFO, "",
521						DUMP_PREFIX_ADDRESS,
522						16, 1,
523						phys_to_virt(buffer_info->dma),
524						IGB_RX_HDR_LEN, true);
525					print_hex_dump(KERN_INFO, "",
526					  DUMP_PREFIX_ADDRESS,
527					  16, 1,
528					  phys_to_virt(
529					    buffer_info->page_dma +
530					    buffer_info->page_offset),
531					  PAGE_SIZE/2, true);
532				}
533			}
534
535			if (i == rx_ring->next_to_use)
536				printk(KERN_CONT " NTU\n");
537			else if (i == rx_ring->next_to_clean)
538				printk(KERN_CONT " NTC\n");
539			else
540				printk(KERN_CONT "\n");
541
542		}
543	}
544
545exit:
546	return;
547}
548
549
550/**
551 * igb_read_clock - read raw cycle counter (to be used by time counter)
552 */
553static cycle_t igb_read_clock(const struct cyclecounter *tc)
554{
555	struct igb_adapter *adapter =
556		container_of(tc, struct igb_adapter, cycles);
557	struct e1000_hw *hw = &adapter->hw;
558	u64 stamp = 0;
559	int shift = 0;
560
561	/*
562	 * The timestamp latches on lowest register read. For the 82580
563	 * the lowest register is SYSTIMR instead of SYSTIML.  However we never
564	 * adjusted TIMINCA so SYSTIMR will just read as all 0s so ignore it.
565	 */
566	if (hw->mac.type == e1000_82580) {
567		stamp = rd32(E1000_SYSTIMR) >> 8;
568		shift = IGB_82580_TSYNC_SHIFT;
569	}
570
571	stamp |= (u64)rd32(E1000_SYSTIML) << shift;
572	stamp |= (u64)rd32(E1000_SYSTIMH) << (shift + 32);
573	return stamp;
574}
575
576/**
577 * igb_get_hw_dev - return device
578 * used by hardware layer to print debugging information
579 **/
580struct net_device *igb_get_hw_dev(struct e1000_hw *hw)
581{
582	struct igb_adapter *adapter = hw->back;
583	return adapter->netdev;
584}
585
586/**
587 * igb_init_module - Driver Registration Routine
588 *
589 * igb_init_module is the first routine called when the driver is
590 * loaded. All it does is register with the PCI subsystem.
591 **/
592static int __init igb_init_module(void)
593{
594	int ret;
595	printk(KERN_INFO "%s - version %s\n",
596	       igb_driver_string, igb_driver_version);
597
598	printk(KERN_INFO "%s\n", igb_copyright);
599
600#ifdef CONFIG_IGB_DCA
601	dca_register_notify(&dca_notifier);
602#endif
603	ret = pci_register_driver(&igb_driver);
604	return ret;
605}
606
607module_init(igb_init_module);
608
609/**
610 * igb_exit_module - Driver Exit Cleanup Routine
611 *
612 * igb_exit_module is called just before the driver is removed
613 * from memory.
614 **/
615static void __exit igb_exit_module(void)
616{
617#ifdef CONFIG_IGB_DCA
618	dca_unregister_notify(&dca_notifier);
619#endif
620	pci_unregister_driver(&igb_driver);
621}
622
623module_exit(igb_exit_module);
624
625#define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
626/**
627 * igb_cache_ring_register - Descriptor ring to register mapping
628 * @adapter: board private structure to initialize
629 *
630 * Once we know the feature-set enabled for the device, we'll cache
631 * the register offset the descriptor ring is assigned to.
632 **/
633static void igb_cache_ring_register(struct igb_adapter *adapter)
634{
635	int i = 0, j = 0;
636	u32 rbase_offset = adapter->vfs_allocated_count;
637
638	switch (adapter->hw.mac.type) {
639	case e1000_82576:
640		/* The queues are allocated for virtualization such that VF 0
641		 * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
642		 * In order to avoid collision we start at the first free queue
643		 * and continue consuming queues in the same sequence
644		 */
645		if (adapter->vfs_allocated_count) {
646			for (; i < adapter->rss_queues; i++)
647				adapter->rx_ring[i]->reg_idx = rbase_offset +
648				                               Q_IDX_82576(i);
649		}
650	case e1000_82575:
651	case e1000_82580:
652	case e1000_i350:
653	default:
654		for (; i < adapter->num_rx_queues; i++)
655			adapter->rx_ring[i]->reg_idx = rbase_offset + i;
656		for (; j < adapter->num_tx_queues; j++)
657			adapter->tx_ring[j]->reg_idx = rbase_offset + j;
658		break;
659	}
660}
661
662static void igb_free_queues(struct igb_adapter *adapter)
663{
664	int i;
665
666	for (i = 0; i < adapter->num_tx_queues; i++) {
667		kfree(adapter->tx_ring[i]);
668		adapter->tx_ring[i] = NULL;
669	}
670	for (i = 0; i < adapter->num_rx_queues; i++) {
671		kfree(adapter->rx_ring[i]);
672		adapter->rx_ring[i] = NULL;
673	}
674	adapter->num_rx_queues = 0;
675	adapter->num_tx_queues = 0;
676}
677
678/**
679 * igb_alloc_queues - Allocate memory for all rings
680 * @adapter: board private structure to initialize
681 *
682 * We allocate one ring per queue at run-time since we don't know the
683 * number of queues at compile-time.
684 **/
685static int igb_alloc_queues(struct igb_adapter *adapter)
686{
687	struct igb_ring *ring;
688	int i;
689	int orig_node = adapter->node;
690
691	for (i = 0; i < adapter->num_tx_queues; i++) {
692		if (orig_node == -1) {
693			int cur_node = next_online_node(adapter->node);
694			if (cur_node == MAX_NUMNODES)
695				cur_node = first_online_node;
696			adapter->node = cur_node;
697		}
698		ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL,
699				    adapter->node);
700		if (!ring)
701			ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
702		if (!ring)
703			goto err;
704		ring->count = adapter->tx_ring_count;
705		ring->queue_index = i;
706		ring->dev = &adapter->pdev->dev;
707		ring->netdev = adapter->netdev;
708		ring->numa_node = adapter->node;
709		/* For 82575, context index must be unique per ring. */
710		if (adapter->hw.mac.type == e1000_82575)
711			set_bit(IGB_RING_FLAG_TX_CTX_IDX, &ring->flags);
712		adapter->tx_ring[i] = ring;
713	}
714	/* Restore the adapter's original node */
715	adapter->node = orig_node;
716
717	for (i = 0; i < adapter->num_rx_queues; i++) {
718		if (orig_node == -1) {
719			int cur_node = next_online_node(adapter->node);
720			if (cur_node == MAX_NUMNODES)
721				cur_node = first_online_node;
722			adapter->node = cur_node;
723		}
724		ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL,
725				    adapter->node);
726		if (!ring)
727			ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
728		if (!ring)
729			goto err;
730		ring->count = adapter->rx_ring_count;
731		ring->queue_index = i;
732		ring->dev = &adapter->pdev->dev;
733		ring->netdev = adapter->netdev;
734		ring->numa_node = adapter->node;
735		/* set flag indicating ring supports SCTP checksum offload */
736		if (adapter->hw.mac.type >= e1000_82576)
737			set_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags);
738		adapter->rx_ring[i] = ring;
739	}
740	/* Restore the adapter's original node */
741	adapter->node = orig_node;
742
743	igb_cache_ring_register(adapter);
744
745	return 0;
746
747err:
748	/* Restore the adapter's original node */
749	adapter->node = orig_node;
750	igb_free_queues(adapter);
751
752	return -ENOMEM;
753}
754
755/**
756 *  igb_write_ivar - configure ivar for given MSI-X vector
757 *  @hw: pointer to the HW structure
758 *  @msix_vector: vector number we are allocating to a given ring
759 *  @index: row index of IVAR register to write within IVAR table
760 *  @offset: column offset of in IVAR, should be multiple of 8
761 *
762 *  This function is intended to handle the writing of the IVAR register
763 *  for adapters 82576 and newer.  The IVAR table consists of 2 columns,
764 *  each containing an cause allocation for an Rx and Tx ring, and a
765 *  variable number of rows depending on the number of queues supported.
766 **/
767static void igb_write_ivar(struct e1000_hw *hw, int msix_vector,
768			   int index, int offset)
769{
770	u32 ivar = array_rd32(E1000_IVAR0, index);
771
772	/* clear any bits that are currently set */
773	ivar &= ~((u32)0xFF << offset);
774
775	/* write vector and valid bit */
776	ivar |= (msix_vector | E1000_IVAR_VALID) << offset;
777
778	array_wr32(E1000_IVAR0, index, ivar);
779}
780
781#define IGB_N0_QUEUE -1
782static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
783{
784	struct igb_adapter *adapter = q_vector->adapter;
785	struct e1000_hw *hw = &adapter->hw;
786	int rx_queue = IGB_N0_QUEUE;
787	int tx_queue = IGB_N0_QUEUE;
788	u32 msixbm = 0;
789
790	if (q_vector->rx.ring)
791		rx_queue = q_vector->rx.ring->reg_idx;
792	if (q_vector->tx.ring)
793		tx_queue = q_vector->tx.ring->reg_idx;
794
795	switch (hw->mac.type) {
796	case e1000_82575:
797		/* The 82575 assigns vectors using a bitmask, which matches the
798		   bitmask for the EICR/EIMS/EIMC registers.  To assign one
799		   or more queues to a vector, we write the appropriate bits
800		   into the MSIXBM register for that vector. */
801		if (rx_queue > IGB_N0_QUEUE)
802			msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
803		if (tx_queue > IGB_N0_QUEUE)
804			msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
805		if (!adapter->msix_entries && msix_vector == 0)
806			msixbm |= E1000_EIMS_OTHER;
807		array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
808		q_vector->eims_value = msixbm;
809		break;
810	case e1000_82576:
811		/*
812		 * 82576 uses a table that essentially consists of 2 columns
813		 * with 8 rows.  The ordering is column-major so we use the
814		 * lower 3 bits as the row index, and the 4th bit as the
815		 * column offset.
816		 */
817		if (rx_queue > IGB_N0_QUEUE)
818			igb_write_ivar(hw, msix_vector,
819				       rx_queue & 0x7,
820				       (rx_queue & 0x8) << 1);
821		if (tx_queue > IGB_N0_QUEUE)
822			igb_write_ivar(hw, msix_vector,
823				       tx_queue & 0x7,
824				       ((tx_queue & 0x8) << 1) + 8);
825		q_vector->eims_value = 1 << msix_vector;
826		break;
827	case e1000_82580:
828	case e1000_i350:
829		/*
830		 * On 82580 and newer adapters the scheme is similar to 82576
831		 * however instead of ordering column-major we have things
832		 * ordered row-major.  So we traverse the table by using
833		 * bit 0 as the column offset, and the remaining bits as the
834		 * row index.
835		 */
836		if (rx_queue > IGB_N0_QUEUE)
837			igb_write_ivar(hw, msix_vector,
838				       rx_queue >> 1,
839				       (rx_queue & 0x1) << 4);
840		if (tx_queue > IGB_N0_QUEUE)
841			igb_write_ivar(hw, msix_vector,
842				       tx_queue >> 1,
843				       ((tx_queue & 0x1) << 4) + 8);
844		q_vector->eims_value = 1 << msix_vector;
845		break;
846	default:
847		BUG();
848		break;
849	}
850
851	/* add q_vector eims value to global eims_enable_mask */
852	adapter->eims_enable_mask |= q_vector->eims_value;
853
854	/* configure q_vector to set itr on first interrupt */
855	q_vector->set_itr = 1;
856}
857
858/**
859 * igb_configure_msix - Configure MSI-X hardware
860 *
861 * igb_configure_msix sets up the hardware to properly
862 * generate MSI-X interrupts.
863 **/
864static void igb_configure_msix(struct igb_adapter *adapter)
865{
866	u32 tmp;
867	int i, vector = 0;
868	struct e1000_hw *hw = &adapter->hw;
869
870	adapter->eims_enable_mask = 0;
871
872	/* set vector for other causes, i.e. link changes */
873	switch (hw->mac.type) {
874	case e1000_82575:
875		tmp = rd32(E1000_CTRL_EXT);
876		/* enable MSI-X PBA support*/
877		tmp |= E1000_CTRL_EXT_PBA_CLR;
878
879		/* Auto-Mask interrupts upon ICR read. */
880		tmp |= E1000_CTRL_EXT_EIAME;
881		tmp |= E1000_CTRL_EXT_IRCA;
882
883		wr32(E1000_CTRL_EXT, tmp);
884
885		/* enable msix_other interrupt */
886		array_wr32(E1000_MSIXBM(0), vector++,
887		                      E1000_EIMS_OTHER);
888		adapter->eims_other = E1000_EIMS_OTHER;
889
890		break;
891
892	case e1000_82576:
893	case e1000_82580:
894	case e1000_i350:
895		/* Turn on MSI-X capability first, or our settings
896		 * won't stick.  And it will take days to debug. */
897		wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
898		                E1000_GPIE_PBA | E1000_GPIE_EIAME |
899		                E1000_GPIE_NSICR);
900
901		/* enable msix_other interrupt */
902		adapter->eims_other = 1 << vector;
903		tmp = (vector++ | E1000_IVAR_VALID) << 8;
904
905		wr32(E1000_IVAR_MISC, tmp);
906		break;
907	default:
908		/* do nothing, since nothing else supports MSI-X */
909		break;
910	} /* switch (hw->mac.type) */
911
912	adapter->eims_enable_mask |= adapter->eims_other;
913
914	for (i = 0; i < adapter->num_q_vectors; i++)
915		igb_assign_vector(adapter->q_vector[i], vector++);
916
917	wrfl();
918}
919
920/**
921 * igb_request_msix - Initialize MSI-X interrupts
922 *
923 * igb_request_msix allocates MSI-X vectors and requests interrupts from the
924 * kernel.
925 **/
926static int igb_request_msix(struct igb_adapter *adapter)
927{
928	struct net_device *netdev = adapter->netdev;
929	struct e1000_hw *hw = &adapter->hw;
930	int i, err = 0, vector = 0;
931
932	err = request_irq(adapter->msix_entries[vector].vector,
933	                  igb_msix_other, 0, netdev->name, adapter);
934	if (err)
935		goto out;
936	vector++;
937
938	for (i = 0; i < adapter->num_q_vectors; i++) {
939		struct igb_q_vector *q_vector = adapter->q_vector[i];
940
941		q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
942
943		if (q_vector->rx.ring && q_vector->tx.ring)
944			sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
945				q_vector->rx.ring->queue_index);
946		else if (q_vector->tx.ring)
947			sprintf(q_vector->name, "%s-tx-%u", netdev->name,
948				q_vector->tx.ring->queue_index);
949		else if (q_vector->rx.ring)
950			sprintf(q_vector->name, "%s-rx-%u", netdev->name,
951				q_vector->rx.ring->queue_index);
952		else
953			sprintf(q_vector->name, "%s-unused", netdev->name);
954
955		err = request_irq(adapter->msix_entries[vector].vector,
956		                  igb_msix_ring, 0, q_vector->name,
957		                  q_vector);
958		if (err)
959			goto out;
960		vector++;
961	}
962
963	igb_configure_msix(adapter);
964	return 0;
965out:
966	return err;
967}
968
969static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
970{
971	if (adapter->msix_entries) {
972		pci_disable_msix(adapter->pdev);
973		kfree(adapter->msix_entries);
974		adapter->msix_entries = NULL;
975	} else if (adapter->flags & IGB_FLAG_HAS_MSI) {
976		pci_disable_msi(adapter->pdev);
977	}
978}
979
980/**
981 * igb_free_q_vectors - Free memory allocated for interrupt vectors
982 * @adapter: board private structure to initialize
983 *
984 * This function frees the memory allocated to the q_vectors.  In addition if
985 * NAPI is enabled it will delete any references to the NAPI struct prior
986 * to freeing the q_vector.
987 **/
988static void igb_free_q_vectors(struct igb_adapter *adapter)
989{
990	int v_idx;
991
992	for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
993		struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
994		adapter->q_vector[v_idx] = NULL;
995		if (!q_vector)
996			continue;
997		netif_napi_del(&q_vector->napi);
998		kfree(q_vector);
999	}
1000	adapter->num_q_vectors = 0;
1001}
1002
1003/**
1004 * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
1005 *
1006 * This function resets the device so that it has 0 rx queues, tx queues, and
1007 * MSI-X interrupts allocated.
1008 */
1009static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
1010{
1011	igb_free_queues(adapter);
1012	igb_free_q_vectors(adapter);
1013	igb_reset_interrupt_capability(adapter);
1014}
1015
1016/**
1017 * igb_set_interrupt_capability - set MSI or MSI-X if supported
1018 *
1019 * Attempt to configure interrupts using the best available
1020 * capabilities of the hardware and kernel.
1021 **/
1022static int igb_set_interrupt_capability(struct igb_adapter *adapter)
1023{
1024	int err;
1025	int numvecs, i;
1026
1027	/* Number of supported queues. */
1028	adapter->num_rx_queues = adapter->rss_queues;
1029	if (adapter->vfs_allocated_count)
1030		adapter->num_tx_queues = 1;
1031	else
1032		adapter->num_tx_queues = adapter->rss_queues;
1033
1034	/* start with one vector for every rx queue */
1035	numvecs = adapter->num_rx_queues;
1036
1037	/* if tx handler is separate add 1 for every tx queue */
1038	if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
1039		numvecs += adapter->num_tx_queues;
1040
1041	/* store the number of vectors reserved for queues */
1042	adapter->num_q_vectors = numvecs;
1043
1044	/* add 1 vector for link status interrupts */
1045	numvecs++;
1046	adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
1047					GFP_KERNEL);
1048	if (!adapter->msix_entries)
1049		goto msi_only;
1050
1051	for (i = 0; i < numvecs; i++)
1052		adapter->msix_entries[i].entry = i;
1053
1054	err = pci_enable_msix(adapter->pdev,
1055			      adapter->msix_entries,
1056			      numvecs);
1057	if (err == 0)
1058		goto out;
1059
1060	igb_reset_interrupt_capability(adapter);
1061
1062	/* If we can't do MSI-X, try MSI */
1063msi_only:
1064#ifdef CONFIG_PCI_IOV
1065	/* disable SR-IOV for non MSI-X configurations */
1066	if (adapter->vf_data) {
1067		struct e1000_hw *hw = &adapter->hw;
1068		/* disable iov and allow time for transactions to clear */
1069		pci_disable_sriov(adapter->pdev);
1070		msleep(500);
1071
1072		kfree(adapter->vf_data);
1073		adapter->vf_data = NULL;
1074		wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1075		wrfl();
1076		msleep(100);
1077		dev_info(&adapter->pdev->dev, "IOV Disabled\n");
1078	}
1079#endif
1080	adapter->vfs_allocated_count = 0;
1081	adapter->rss_queues = 1;
1082	adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1083	adapter->num_rx_queues = 1;
1084	adapter->num_tx_queues = 1;
1085	adapter->num_q_vectors = 1;
1086	if (!pci_enable_msi(adapter->pdev))
1087		adapter->flags |= IGB_FLAG_HAS_MSI;
1088out:
1089	/* Notify the stack of the (possibly) reduced queue counts. */
1090	netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues);
1091	return netif_set_real_num_rx_queues(adapter->netdev,
1092					    adapter->num_rx_queues);
1093}
1094
1095/**
1096 * igb_alloc_q_vectors - Allocate memory for interrupt vectors
1097 * @adapter: board private structure to initialize
1098 *
1099 * We allocate one q_vector per queue interrupt.  If allocation fails we
1100 * return -ENOMEM.
1101 **/
1102static int igb_alloc_q_vectors(struct igb_adapter *adapter)
1103{
1104	struct igb_q_vector *q_vector;
1105	struct e1000_hw *hw = &adapter->hw;
1106	int v_idx;
1107	int orig_node = adapter->node;
1108
1109	for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1110		if ((adapter->num_q_vectors == (adapter->num_rx_queues +
1111						adapter->num_tx_queues)) &&
1112		    (adapter->num_rx_queues == v_idx))
1113			adapter->node = orig_node;
1114		if (orig_node == -1) {
1115			int cur_node = next_online_node(adapter->node);
1116			if (cur_node == MAX_NUMNODES)
1117				cur_node = first_online_node;
1118			adapter->node = cur_node;
1119		}
1120		q_vector = kzalloc_node(sizeof(struct igb_q_vector), GFP_KERNEL,
1121					adapter->node);
1122		if (!q_vector)
1123			q_vector = kzalloc(sizeof(struct igb_q_vector),
1124					   GFP_KERNEL);
1125		if (!q_vector)
1126			goto err_out;
1127		q_vector->adapter = adapter;
1128		q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
1129		q_vector->itr_val = IGB_START_ITR;
1130		netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
1131		adapter->q_vector[v_idx] = q_vector;
1132	}
1133	/* Restore the adapter's original node */
1134	adapter->node = orig_node;
1135
1136	return 0;
1137
1138err_out:
1139	/* Restore the adapter's original node */
1140	adapter->node = orig_node;
1141	igb_free_q_vectors(adapter);
1142	return -ENOMEM;
1143}
1144
1145static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
1146                                      int ring_idx, int v_idx)
1147{
1148	struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1149
1150	q_vector->rx.ring = adapter->rx_ring[ring_idx];
1151	q_vector->rx.ring->q_vector = q_vector;
1152	q_vector->rx.count++;
1153	q_vector->itr_val = adapter->rx_itr_setting;
1154	if (q_vector->itr_val && q_vector->itr_val <= 3)
1155		q_vector->itr_val = IGB_START_ITR;
1156}
1157
1158static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
1159                                      int ring_idx, int v_idx)
1160{
1161	struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1162
1163	q_vector->tx.ring = adapter->tx_ring[ring_idx];
1164	q_vector->tx.ring->q_vector = q_vector;
1165	q_vector->tx.count++;
1166	q_vector->itr_val = adapter->tx_itr_setting;
1167	q_vector->tx.work_limit = adapter->tx_work_limit;
1168	if (q_vector->itr_val && q_vector->itr_val <= 3)
1169		q_vector->itr_val = IGB_START_ITR;
1170}
1171
1172/**
1173 * igb_map_ring_to_vector - maps allocated queues to vectors
1174 *
1175 * This function maps the recently allocated queues to vectors.
1176 **/
1177static int igb_map_ring_to_vector(struct igb_adapter *adapter)
1178{
1179	int i;
1180	int v_idx = 0;
1181
1182	if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
1183	    (adapter->num_q_vectors < adapter->num_tx_queues))
1184		return -ENOMEM;
1185
1186	if (adapter->num_q_vectors >=
1187	    (adapter->num_rx_queues + adapter->num_tx_queues)) {
1188		for (i = 0; i < adapter->num_rx_queues; i++)
1189			igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1190		for (i = 0; i < adapter->num_tx_queues; i++)
1191			igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1192	} else {
1193		for (i = 0; i < adapter->num_rx_queues; i++) {
1194			if (i < adapter->num_tx_queues)
1195				igb_map_tx_ring_to_vector(adapter, i, v_idx);
1196			igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1197		}
1198		for (; i < adapter->num_tx_queues; i++)
1199			igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1200	}
1201	return 0;
1202}
1203
1204/**
1205 * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
1206 *
1207 * This function initializes the interrupts and allocates all of the queues.
1208 **/
1209static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
1210{
1211	struct pci_dev *pdev = adapter->pdev;
1212	int err;
1213
1214	err = igb_set_interrupt_capability(adapter);
1215	if (err)
1216		return err;
1217
1218	err = igb_alloc_q_vectors(adapter);
1219	if (err) {
1220		dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
1221		goto err_alloc_q_vectors;
1222	}
1223
1224	err = igb_alloc_queues(adapter);
1225	if (err) {
1226		dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1227		goto err_alloc_queues;
1228	}
1229
1230	err = igb_map_ring_to_vector(adapter);
1231	if (err) {
1232		dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
1233		goto err_map_queues;
1234	}
1235
1236
1237	return 0;
1238err_map_queues:
1239	igb_free_queues(adapter);
1240err_alloc_queues:
1241	igb_free_q_vectors(adapter);
1242err_alloc_q_vectors:
1243	igb_reset_interrupt_capability(adapter);
1244	return err;
1245}
1246
1247/**
1248 * igb_request_irq - initialize interrupts
1249 *
1250 * Attempts to configure interrupts using the best available
1251 * capabilities of the hardware and kernel.
1252 **/
1253static int igb_request_irq(struct igb_adapter *adapter)
1254{
1255	struct net_device *netdev = adapter->netdev;
1256	struct pci_dev *pdev = adapter->pdev;
1257	int err = 0;
1258
1259	if (adapter->msix_entries) {
1260		err = igb_request_msix(adapter);
1261		if (!err)
1262			goto request_done;
1263		/* fall back to MSI */
1264		igb_clear_interrupt_scheme(adapter);
1265		if (!pci_enable_msi(adapter->pdev))
1266			adapter->flags |= IGB_FLAG_HAS_MSI;
1267		igb_free_all_tx_resources(adapter);
1268		igb_free_all_rx_resources(adapter);
1269		adapter->num_tx_queues = 1;
1270		adapter->num_rx_queues = 1;
1271		adapter->num_q_vectors = 1;
1272		err = igb_alloc_q_vectors(adapter);
1273		if (err) {
1274			dev_err(&pdev->dev,
1275			        "Unable to allocate memory for vectors\n");
1276			goto request_done;
1277		}
1278		err = igb_alloc_queues(adapter);
1279		if (err) {
1280			dev_err(&pdev->dev,
1281			        "Unable to allocate memory for queues\n");
1282			igb_free_q_vectors(adapter);
1283			goto request_done;
1284		}
1285		igb_setup_all_tx_resources(adapter);
1286		igb_setup_all_rx_resources(adapter);
1287	} else {
1288		igb_assign_vector(adapter->q_vector[0], 0);
1289	}
1290
1291	if (adapter->flags & IGB_FLAG_HAS_MSI) {
1292		err = request_irq(adapter->pdev->irq, igb_intr_msi, 0,
1293				  netdev->name, adapter);
1294		if (!err)
1295			goto request_done;
1296
1297		/* fall back to legacy interrupts */
1298		igb_reset_interrupt_capability(adapter);
1299		adapter->flags &= ~IGB_FLAG_HAS_MSI;
1300	}
1301
1302	err = request_irq(adapter->pdev->irq, igb_intr, IRQF_SHARED,
1303			  netdev->name, adapter);
1304
1305	if (err)
1306		dev_err(&adapter->pdev->dev, "Error %d getting interrupt\n",
1307			err);
1308
1309request_done:
1310	return err;
1311}
1312
1313static void igb_free_irq(struct igb_adapter *adapter)
1314{
1315	if (adapter->msix_entries) {
1316		int vector = 0, i;
1317
1318		free_irq(adapter->msix_entries[vector++].vector, adapter);
1319
1320		for (i = 0; i < adapter->num_q_vectors; i++) {
1321			struct igb_q_vector *q_vector = adapter->q_vector[i];
1322			free_irq(adapter->msix_entries[vector++].vector,
1323			         q_vector);
1324		}
1325	} else {
1326		free_irq(adapter->pdev->irq, adapter);
1327	}
1328}
1329
1330/**
1331 * igb_irq_disable - Mask off interrupt generation on the NIC
1332 * @adapter: board private structure
1333 **/
1334static void igb_irq_disable(struct igb_adapter *adapter)
1335{
1336	struct e1000_hw *hw = &adapter->hw;
1337
1338	/*
1339	 * we need to be careful when disabling interrupts.  The VFs are also
1340	 * mapped into these registers and so clearing the bits can cause
1341	 * issues on the VF drivers so we only need to clear what we set
1342	 */
1343	if (adapter->msix_entries) {
1344		u32 regval = rd32(E1000_EIAM);
1345		wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
1346		wr32(E1000_EIMC, adapter->eims_enable_mask);
1347		regval = rd32(E1000_EIAC);
1348		wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
1349	}
1350
1351	wr32(E1000_IAM, 0);
1352	wr32(E1000_IMC, ~0);
1353	wrfl();
1354	if (adapter->msix_entries) {
1355		int i;
1356		for (i = 0; i < adapter->num_q_vectors; i++)
1357			synchronize_irq(adapter->msix_entries[i].vector);
1358	} else {
1359		synchronize_irq(adapter->pdev->irq);
1360	}
1361}
1362
1363/**
1364 * igb_irq_enable - Enable default interrupt generation settings
1365 * @adapter: board private structure
1366 **/
1367static void igb_irq_enable(struct igb_adapter *adapter)
1368{
1369	struct e1000_hw *hw = &adapter->hw;
1370
1371	if (adapter->msix_entries) {
1372		u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC;
1373		u32 regval = rd32(E1000_EIAC);
1374		wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
1375		regval = rd32(E1000_EIAM);
1376		wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
1377		wr32(E1000_EIMS, adapter->eims_enable_mask);
1378		if (adapter->vfs_allocated_count) {
1379			wr32(E1000_MBVFIMR, 0xFF);
1380			ims |= E1000_IMS_VMMB;
1381		}
1382		if (adapter->hw.mac.type == e1000_82580)
1383			ims |= E1000_IMS_DRSTA;
1384
1385		wr32(E1000_IMS, ims);
1386	} else {
1387		wr32(E1000_IMS, IMS_ENABLE_MASK |
1388				E1000_IMS_DRSTA);
1389		wr32(E1000_IAM, IMS_ENABLE_MASK |
1390				E1000_IMS_DRSTA);
1391	}
1392}
1393
1394static void igb_update_mng_vlan(struct igb_adapter *adapter)
1395{
1396	struct e1000_hw *hw = &adapter->hw;
1397	u16 vid = adapter->hw.mng_cookie.vlan_id;
1398	u16 old_vid = adapter->mng_vlan_id;
1399
1400	if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1401		/* add VID to filter table */
1402		igb_vfta_set(hw, vid, true);
1403		adapter->mng_vlan_id = vid;
1404	} else {
1405		adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1406	}
1407
1408	if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1409	    (vid != old_vid) &&
1410	    !test_bit(old_vid, adapter->active_vlans)) {
1411		/* remove VID from filter table */
1412		igb_vfta_set(hw, old_vid, false);
1413	}
1414}
1415
1416/**
1417 * igb_release_hw_control - release control of the h/w to f/w
1418 * @adapter: address of board private structure
1419 *
1420 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1421 * For ASF and Pass Through versions of f/w this means that the
1422 * driver is no longer loaded.
1423 *
1424 **/
1425static void igb_release_hw_control(struct igb_adapter *adapter)
1426{
1427	struct e1000_hw *hw = &adapter->hw;
1428	u32 ctrl_ext;
1429
1430	/* Let firmware take over control of h/w */
1431	ctrl_ext = rd32(E1000_CTRL_EXT);
1432	wr32(E1000_CTRL_EXT,
1433			ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1434}
1435
1436/**
1437 * igb_get_hw_control - get control of the h/w from f/w
1438 * @adapter: address of board private structure
1439 *
1440 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1441 * For ASF and Pass Through versions of f/w this means that
1442 * the driver is loaded.
1443 *
1444 **/
1445static void igb_get_hw_control(struct igb_adapter *adapter)
1446{
1447	struct e1000_hw *hw = &adapter->hw;
1448	u32 ctrl_ext;
1449
1450	/* Let firmware know the driver has taken over */
1451	ctrl_ext = rd32(E1000_CTRL_EXT);
1452	wr32(E1000_CTRL_EXT,
1453			ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1454}
1455
1456/**
1457 * igb_configure - configure the hardware for RX and TX
1458 * @adapter: private board structure
1459 **/
1460static void igb_configure(struct igb_adapter *adapter)
1461{
1462	struct net_device *netdev = adapter->netdev;
1463	int i;
1464
1465	igb_get_hw_control(adapter);
1466	igb_set_rx_mode(netdev);
1467
1468	igb_restore_vlan(adapter);
1469
1470	igb_setup_tctl(adapter);
1471	igb_setup_mrqc(adapter);
1472	igb_setup_rctl(adapter);
1473
1474	igb_configure_tx(adapter);
1475	igb_configure_rx(adapter);
1476
1477	igb_rx_fifo_flush_82575(&adapter->hw);
1478
1479	/* call igb_desc_unused which always leaves
1480	 * at least 1 descriptor unused to make sure
1481	 * next_to_use != next_to_clean */
1482	for (i = 0; i < adapter->num_rx_queues; i++) {
1483		struct igb_ring *ring = adapter->rx_ring[i];
1484		igb_alloc_rx_buffers(ring, igb_desc_unused(ring));
1485	}
1486}
1487
1488/**
1489 * igb_power_up_link - Power up the phy/serdes link
1490 * @adapter: address of board private structure
1491 **/
1492void igb_power_up_link(struct igb_adapter *adapter)
1493{
1494	if (adapter->hw.phy.media_type == e1000_media_type_copper)
1495		igb_power_up_phy_copper(&adapter->hw);
1496	else
1497		igb_power_up_serdes_link_82575(&adapter->hw);
1498}
1499
1500/**
1501 * igb_power_down_link - Power down the phy/serdes link
1502 * @adapter: address of board private structure
1503 */
1504static void igb_power_down_link(struct igb_adapter *adapter)
1505{
1506	if (adapter->hw.phy.media_type == e1000_media_type_copper)
1507		igb_power_down_phy_copper_82575(&adapter->hw);
1508	else
1509		igb_shutdown_serdes_link_82575(&adapter->hw);
1510}
1511
1512/**
1513 * igb_up - Open the interface and prepare it to handle traffic
1514 * @adapter: board private structure
1515 **/
1516int igb_up(struct igb_adapter *adapter)
1517{
1518	struct e1000_hw *hw = &adapter->hw;
1519	int i;
1520
1521	/* hardware has been reset, we need to reload some things */
1522	igb_configure(adapter);
1523
1524	clear_bit(__IGB_DOWN, &adapter->state);
1525
1526	for (i = 0; i < adapter->num_q_vectors; i++) {
1527		struct igb_q_vector *q_vector = adapter->q_vector[i];
1528		napi_enable(&q_vector->napi);
1529	}
1530	if (adapter->msix_entries)
1531		igb_configure_msix(adapter);
1532	else
1533		igb_assign_vector(adapter->q_vector[0], 0);
1534
1535	/* Clear any pending interrupts. */
1536	rd32(E1000_ICR);
1537	igb_irq_enable(adapter);
1538
1539	/* notify VFs that reset has been completed */
1540	if (adapter->vfs_allocated_count) {
1541		u32 reg_data = rd32(E1000_CTRL_EXT);
1542		reg_data |= E1000_CTRL_EXT_PFRSTD;
1543		wr32(E1000_CTRL_EXT, reg_data);
1544	}
1545
1546	netif_tx_start_all_queues(adapter->netdev);
1547
1548	/* start the watchdog. */
1549	hw->mac.get_link_status = 1;
1550	schedule_work(&adapter->watchdog_task);
1551
1552	return 0;
1553}
1554
1555void igb_down(struct igb_adapter *adapter)
1556{
1557	struct net_device *netdev = adapter->netdev;
1558	struct e1000_hw *hw = &adapter->hw;
1559	u32 tctl, rctl;
1560	int i;
1561
1562	/* signal that we're down so the interrupt handler does not
1563	 * reschedule our watchdog timer */
1564	set_bit(__IGB_DOWN, &adapter->state);
1565
1566	/* disable receives in the hardware */
1567	rctl = rd32(E1000_RCTL);
1568	wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1569	/* flush and sleep below */
1570
1571	netif_tx_stop_all_queues(netdev);
1572
1573	/* disable transmits in the hardware */
1574	tctl = rd32(E1000_TCTL);
1575	tctl &= ~E1000_TCTL_EN;
1576	wr32(E1000_TCTL, tctl);
1577	/* flush both disables and wait for them to finish */
1578	wrfl();
1579	msleep(10);
1580
1581	for (i = 0; i < adapter->num_q_vectors; i++) {
1582		struct igb_q_vector *q_vector = adapter->q_vector[i];
1583		napi_disable(&q_vector->napi);
1584	}
1585
1586	igb_irq_disable(adapter);
1587
1588	del_timer_sync(&adapter->watchdog_timer);
1589	del_timer_sync(&adapter->phy_info_timer);
1590
1591	netif_carrier_off(netdev);
1592
1593	/* record the stats before reset*/
1594	spin_lock(&adapter->stats64_lock);
1595	igb_update_stats(adapter, &adapter->stats64);
1596	spin_unlock(&adapter->stats64_lock);
1597
1598	adapter->link_speed = 0;
1599	adapter->link_duplex = 0;
1600
1601	if (!pci_channel_offline(adapter->pdev))
1602		igb_reset(adapter);
1603	igb_clean_all_tx_rings(adapter);
1604	igb_clean_all_rx_rings(adapter);
1605#ifdef CONFIG_IGB_DCA
1606
1607	/* since we reset the hardware DCA settings were cleared */
1608	igb_setup_dca(adapter);
1609#endif
1610}
1611
1612void igb_reinit_locked(struct igb_adapter *adapter)
1613{
1614	WARN_ON(in_interrupt());
1615	while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1616		msleep(1);
1617	igb_down(adapter);
1618	igb_up(adapter);
1619	clear_bit(__IGB_RESETTING, &adapter->state);
1620}
1621
1622void igb_reset(struct igb_adapter *adapter)
1623{
1624	struct pci_dev *pdev = adapter->pdev;
1625	struct e1000_hw *hw = &adapter->hw;
1626	struct e1000_mac_info *mac = &hw->mac;
1627	struct e1000_fc_info *fc = &hw->fc;
1628	u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1629	u16 hwm;
1630
1631	/* Repartition Pba for greater than 9k mtu
1632	 * To take effect CTRL.RST is required.
1633	 */
1634	switch (mac->type) {
1635	case e1000_i350:
1636	case e1000_82580:
1637		pba = rd32(E1000_RXPBS);
1638		pba = igb_rxpbs_adjust_82580(pba);
1639		break;
1640	case e1000_82576:
1641		pba = rd32(E1000_RXPBS);
1642		pba &= E1000_RXPBS_SIZE_MASK_82576;
1643		break;
1644	case e1000_82575:
1645	default:
1646		pba = E1000_PBA_34K;
1647		break;
1648	}
1649
1650	if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1651	    (mac->type < e1000_82576)) {
1652		/* adjust PBA for jumbo frames */
1653		wr32(E1000_PBA, pba);
1654
1655		/* To maintain wire speed transmits, the Tx FIFO should be
1656		 * large enough to accommodate two full transmit packets,
1657		 * rounded up to the next 1KB and expressed in KB.  Likewise,
1658		 * the Rx FIFO should be large enough to accommodate at least
1659		 * one full receive packet and is similarly rounded up and
1660		 * expressed in KB. */
1661		pba = rd32(E1000_PBA);
1662		/* upper 16 bits has Tx packet buffer allocation size in KB */
1663		tx_space = pba >> 16;
1664		/* lower 16 bits has Rx packet buffer allocation size in KB */
1665		pba &= 0xffff;
1666		/* the tx fifo also stores 16 bytes of information about the tx
1667		 * but don't include ethernet FCS because hardware appends it */
1668		min_tx_space = (adapter->max_frame_size +
1669				sizeof(union e1000_adv_tx_desc) -
1670				ETH_FCS_LEN) * 2;
1671		min_tx_space = ALIGN(min_tx_space, 1024);
1672		min_tx_space >>= 10;
1673		/* software strips receive CRC, so leave room for it */
1674		min_rx_space = adapter->max_frame_size;
1675		min_rx_space = ALIGN(min_rx_space, 1024);
1676		min_rx_space >>= 10;
1677
1678		/* If current Tx allocation is less than the min Tx FIFO size,
1679		 * and the min Tx FIFO size is less than the current Rx FIFO
1680		 * allocation, take space away from current Rx allocation */
1681		if (tx_space < min_tx_space &&
1682		    ((min_tx_space - tx_space) < pba)) {
1683			pba = pba - (min_tx_space - tx_space);
1684
1685			/* if short on rx space, rx wins and must trump tx
1686			 * adjustment */
1687			if (pba < min_rx_space)
1688				pba = min_rx_space;
1689		}
1690		wr32(E1000_PBA, pba);
1691	}
1692
1693	/* flow control settings */
1694	/* The high water mark must be low enough to fit one full frame
1695	 * (or the size used for early receive) above it in the Rx FIFO.
1696	 * Set it to the lower of:
1697	 * - 90% of the Rx FIFO size, or
1698	 * - the full Rx FIFO size minus one full frame */
1699	hwm = min(((pba << 10) * 9 / 10),
1700			((pba << 10) - 2 * adapter->max_frame_size));
1701
1702	fc->high_water = hwm & 0xFFF0;	/* 16-byte granularity */
1703	fc->low_water = fc->high_water - 16;
1704	fc->pause_time = 0xFFFF;
1705	fc->send_xon = 1;
1706	fc->current_mode = fc->requested_mode;
1707
1708	/* disable receive for all VFs and wait one second */
1709	if (adapter->vfs_allocated_count) {
1710		int i;
1711		for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1712			adapter->vf_data[i].flags &= IGB_VF_FLAG_PF_SET_MAC;
1713
1714		/* ping all the active vfs to let them know we are going down */
1715		igb_ping_all_vfs(adapter);
1716
1717		/* disable transmits and receives */
1718		wr32(E1000_VFRE, 0);
1719		wr32(E1000_VFTE, 0);
1720	}
1721
1722	/* Allow time for pending master requests to run */
1723	hw->mac.ops.reset_hw(hw);
1724	wr32(E1000_WUC, 0);
1725
1726	if (hw->mac.ops.init_hw(hw))
1727		dev_err(&pdev->dev, "Hardware Error\n");
1728	if (hw->mac.type > e1000_82580) {
1729		if (adapter->flags & IGB_FLAG_DMAC) {
1730			u32 reg;
1731
1732			/*
1733			 * DMA Coalescing high water mark needs to be higher
1734			 * than * the * Rx threshold.  The Rx threshold is
1735			 * currently * pba - 6, so we * should use a high water
1736			 * mark of pba * - 4. */
1737			hwm = (pba - 4) << 10;
1738
1739			reg = (((pba-6) << E1000_DMACR_DMACTHR_SHIFT)
1740			       & E1000_DMACR_DMACTHR_MASK);
1741
1742			/* transition to L0x or L1 if available..*/
1743			reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
1744
1745			/* watchdog timer= +-1000 usec in 32usec intervals */
1746			reg |= (1000 >> 5);
1747			wr32(E1000_DMACR, reg);
1748
1749			/* no lower threshold to disable coalescing(smart fifb)
1750			 * -UTRESH=0*/
1751			wr32(E1000_DMCRTRH, 0);
1752
1753			/* set hwm to PBA -  2 * max frame size */
1754			wr32(E1000_FCRTC, hwm);
1755
1756			/*
1757			 * This sets the time to wait before requesting tran-
1758			 * sition to * low power state to number of usecs needed
1759			 * to receive 1 512 * byte frame at gigabit line rate
1760			 */
1761			reg = rd32(E1000_DMCTLX);
1762			reg |= IGB_DMCTLX_DCFLUSH_DIS;
1763
1764			/* Delay 255 usec before entering Lx state. */
1765			reg |= 0xFF;
1766			wr32(E1000_DMCTLX, reg);
1767
1768			/* free space in Tx packet buffer to wake from DMAC */
1769			wr32(E1000_DMCTXTH,
1770			     (IGB_MIN_TXPBSIZE -
1771			     (IGB_TX_BUF_4096 + adapter->max_frame_size))
1772			     >> 6);
1773
1774			/* make low power state decision controlled by DMAC */
1775			reg = rd32(E1000_PCIEMISC);
1776			reg |= E1000_PCIEMISC_LX_DECISION;
1777			wr32(E1000_PCIEMISC, reg);
1778		} /* end if IGB_FLAG_DMAC set */
1779	}
1780	if (hw->mac.type == e1000_82580) {
1781		u32 reg = rd32(E1000_PCIEMISC);
1782		wr32(E1000_PCIEMISC,
1783		                reg & ~E1000_PCIEMISC_LX_DECISION);
1784	}
1785	if (!netif_running(adapter->netdev))
1786		igb_power_down_link(adapter);
1787
1788	igb_update_mng_vlan(adapter);
1789
1790	/* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1791	wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1792
1793	igb_get_phy_info(hw);
1794}
1795
1796static u32 igb_fix_features(struct net_device *netdev, u32 features)
1797{
1798	/*
1799	 * Since there is no support for separate rx/tx vlan accel
1800	 * enable/disable make sure tx flag is always in same state as rx.
1801	 */
1802	if (features & NETIF_F_HW_VLAN_RX)
1803		features |= NETIF_F_HW_VLAN_TX;
1804	else
1805		features &= ~NETIF_F_HW_VLAN_TX;
1806
1807	return features;
1808}
1809
1810static int igb_set_features(struct net_device *netdev, u32 features)
1811{
1812	u32 changed = netdev->features ^ features;
1813
1814	if (changed & NETIF_F_HW_VLAN_RX)
1815		igb_vlan_mode(netdev, features);
1816
1817	return 0;
1818}
1819
1820static const struct net_device_ops igb_netdev_ops = {
1821	.ndo_open		= igb_open,
1822	.ndo_stop		= igb_close,
1823	.ndo_start_xmit		= igb_xmit_frame,
1824	.ndo_get_stats64	= igb_get_stats64,
1825	.ndo_set_rx_mode	= igb_set_rx_mode,
1826	.ndo_set_mac_address	= igb_set_mac,
1827	.ndo_change_mtu		= igb_change_mtu,
1828	.ndo_do_ioctl		= igb_ioctl,
1829	.ndo_tx_timeout		= igb_tx_timeout,
1830	.ndo_validate_addr	= eth_validate_addr,
1831	.ndo_vlan_rx_add_vid	= igb_vlan_rx_add_vid,
1832	.ndo_vlan_rx_kill_vid	= igb_vlan_rx_kill_vid,
1833	.ndo_set_vf_mac		= igb_ndo_set_vf_mac,
1834	.ndo_set_vf_vlan	= igb_ndo_set_vf_vlan,
1835	.ndo_set_vf_tx_rate	= igb_ndo_set_vf_bw,
1836	.ndo_get_vf_config	= igb_ndo_get_vf_config,
1837#ifdef CONFIG_NET_POLL_CONTROLLER
1838	.ndo_poll_controller	= igb_netpoll,
1839#endif
1840	.ndo_fix_features	= igb_fix_features,
1841	.ndo_set_features	= igb_set_features,
1842};
1843
1844/**
1845 * igb_probe - Device Initialization Routine
1846 * @pdev: PCI device information struct
1847 * @ent: entry in igb_pci_tbl
1848 *
1849 * Returns 0 on success, negative on failure
1850 *
1851 * igb_probe initializes an adapter identified by a pci_dev structure.
1852 * The OS initialization, configuring of the adapter private structure,
1853 * and a hardware reset occur.
1854 **/
1855static int __devinit igb_probe(struct pci_dev *pdev,
1856			       const struct pci_device_id *ent)
1857{
1858	struct net_device *netdev;
1859	struct igb_adapter *adapter;
1860	struct e1000_hw *hw;
1861	u16 eeprom_data = 0;
1862	s32 ret_val;
1863	static int global_quad_port_a; /* global quad port a indication */
1864	const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1865	unsigned long mmio_start, mmio_len;
1866	int err, pci_using_dac;
1867	u16 eeprom_apme_mask = IGB_EEPROM_APME;
1868	u8 part_str[E1000_PBANUM_LENGTH];
1869
1870	/* Catch broken hardware that put the wrong VF device ID in
1871	 * the PCIe SR-IOV capability.
1872	 */
1873	if (pdev->is_virtfn) {
1874		WARN(1, KERN_ERR "%s (%hx:%hx) should not be a VF!\n",
1875		     pci_name(pdev), pdev->vendor, pdev->device);
1876		return -EINVAL;
1877	}
1878
1879	err = pci_enable_device_mem(pdev);
1880	if (err)
1881		return err;
1882
1883	pci_using_dac = 0;
1884	err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
1885	if (!err) {
1886		err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
1887		if (!err)
1888			pci_using_dac = 1;
1889	} else {
1890		err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
1891		if (err) {
1892			err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
1893			if (err) {
1894				dev_err(&pdev->dev, "No usable DMA "
1895					"configuration, aborting\n");
1896				goto err_dma;
1897			}
1898		}
1899	}
1900
1901	err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1902	                                   IORESOURCE_MEM),
1903	                                   igb_driver_name);
1904	if (err)
1905		goto err_pci_reg;
1906
1907	pci_enable_pcie_error_reporting(pdev);
1908
1909	pci_set_master(pdev);
1910	pci_save_state(pdev);
1911
1912	err = -ENOMEM;
1913	netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1914				   IGB_MAX_TX_QUEUES);
1915	if (!netdev)
1916		goto err_alloc_etherdev;
1917
1918	SET_NETDEV_DEV(netdev, &pdev->dev);
1919
1920	pci_set_drvdata(pdev, netdev);
1921	adapter = netdev_priv(netdev);
1922	adapter->netdev = netdev;
1923	adapter->pdev = pdev;
1924	hw = &adapter->hw;
1925	hw->back = adapter;
1926	adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE;
1927
1928	mmio_start = pci_resource_start(pdev, 0);
1929	mmio_len = pci_resource_len(pdev, 0);
1930
1931	err = -EIO;
1932	hw->hw_addr = ioremap(mmio_start, mmio_len);
1933	if (!hw->hw_addr)
1934		goto err_ioremap;
1935
1936	netdev->netdev_ops = &igb_netdev_ops;
1937	igb_set_ethtool_ops(netdev);
1938	netdev->watchdog_timeo = 5 * HZ;
1939
1940	strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1941
1942	netdev->mem_start = mmio_start;
1943	netdev->mem_end = mmio_start + mmio_len;
1944
1945	/* PCI config space info */
1946	hw->vendor_id = pdev->vendor;
1947	hw->device_id = pdev->device;
1948	hw->revision_id = pdev->revision;
1949	hw->subsystem_vendor_id = pdev->subsystem_vendor;
1950	hw->subsystem_device_id = pdev->subsystem_device;
1951
1952	/* Copy the default MAC, PHY and NVM function pointers */
1953	memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1954	memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1955	memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1956	/* Initialize skew-specific constants */
1957	err = ei->get_invariants(hw);
1958	if (err)
1959		goto err_sw_init;
1960
1961	/* setup the private structure */
1962	err = igb_sw_init(adapter);
1963	if (err)
1964		goto err_sw_init;
1965
1966	igb_get_bus_info_pcie(hw);
1967
1968	hw->phy.autoneg_wait_to_complete = false;
1969
1970	/* Copper options */
1971	if (hw->phy.media_type == e1000_media_type_copper) {
1972		hw->phy.mdix = AUTO_ALL_MODES;
1973		hw->phy.disable_polarity_correction = false;
1974		hw->phy.ms_type = e1000_ms_hw_default;
1975	}
1976
1977	if (igb_check_reset_block(hw))
1978		dev_info(&pdev->dev,
1979			"PHY reset is blocked due to SOL/IDER session.\n");
1980
1981	netdev->hw_features = NETIF_F_SG |
1982			   NETIF_F_IP_CSUM |
1983			   NETIF_F_IPV6_CSUM |
1984			   NETIF_F_TSO |
1985			   NETIF_F_TSO6 |
1986			   NETIF_F_RXCSUM |
1987			   NETIF_F_HW_VLAN_RX;
1988
1989	netdev->features = netdev->hw_features |
1990			   NETIF_F_HW_VLAN_TX |
1991			   NETIF_F_HW_VLAN_FILTER;
1992
1993	netdev->vlan_features |= NETIF_F_TSO;
1994	netdev->vlan_features |= NETIF_F_TSO6;
1995	netdev->vlan_features |= NETIF_F_IP_CSUM;
1996	netdev->vlan_features |= NETIF_F_IPV6_CSUM;
1997	netdev->vlan_features |= NETIF_F_SG;
1998
1999	if (pci_using_dac) {
2000		netdev->features |= NETIF_F_HIGHDMA;
2001		netdev->vlan_features |= NETIF_F_HIGHDMA;
2002	}
2003
2004	if (hw->mac.type >= e1000_82576) {
2005		netdev->hw_features |= NETIF_F_SCTP_CSUM;
2006		netdev->features |= NETIF_F_SCTP_CSUM;
2007	}
2008
2009	netdev->priv_flags |= IFF_UNICAST_FLT;
2010
2011	adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
2012
2013	/* before reading the NVM, reset the controller to put the device in a
2014	 * known good starting state */
2015	hw->mac.ops.reset_hw(hw);
2016
2017	/* make sure the NVM is good */
2018	if (hw->nvm.ops.validate(hw) < 0) {
2019		dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
2020		err = -EIO;
2021		goto err_eeprom;
2022	}
2023
2024	/* copy the MAC address out of the NVM */
2025	if (hw->mac.ops.read_mac_addr(hw))
2026		dev_err(&pdev->dev, "NVM Read Error\n");
2027
2028	memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
2029	memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
2030
2031	if (!is_valid_ether_addr(netdev->perm_addr)) {
2032		dev_err(&pdev->dev, "Invalid MAC Address\n");
2033		err = -EIO;
2034		goto err_eeprom;
2035	}
2036
2037	setup_timer(&adapter->watchdog_timer, igb_watchdog,
2038	            (unsigned long) adapter);
2039	setup_timer(&adapter->phy_info_timer, igb_update_phy_info,
2040	            (unsigned long) adapter);
2041
2042	INIT_WORK(&adapter->reset_task, igb_reset_task);
2043	INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
2044
2045	/* Initialize link properties that are user-changeable */
2046	adapter->fc_autoneg = true;
2047	hw->mac.autoneg = true;
2048	hw->phy.autoneg_advertised = 0x2f;
2049
2050	hw->fc.requested_mode = e1000_fc_default;
2051	hw->fc.current_mode = e1000_fc_default;
2052
2053	igb_validate_mdi_setting(hw);
2054
2055	/* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
2056	 * enable the ACPI Magic Packet filter
2057	 */
2058
2059	if (hw->bus.func == 0)
2060		hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
2061	else if (hw->mac.type >= e1000_82580)
2062		hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
2063		                 NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
2064		                 &eeprom_data);
2065	else if (hw->bus.func == 1)
2066		hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
2067
2068	if (eeprom_data & eeprom_apme_mask)
2069		adapter->eeprom_wol |= E1000_WUFC_MAG;
2070
2071	/* now that we have the eeprom settings, apply the special cases where
2072	 * the eeprom may be wrong or the board simply won't support wake on
2073	 * lan on a particular port */
2074	switch (pdev->device) {
2075	case E1000_DEV_ID_82575GB_QUAD_COPPER:
2076		adapter->eeprom_wol = 0;
2077		break;
2078	case E1000_DEV_ID_82575EB_FIBER_SERDES:
2079	case E1000_DEV_ID_82576_FIBER:
2080	case E1000_DEV_ID_82576_SERDES:
2081		/* Wake events only supported on port A for dual fiber
2082		 * regardless of eeprom setting */
2083		if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
2084			adapter->eeprom_wol = 0;
2085		break;
2086	case E1000_DEV_ID_82576_QUAD_COPPER:
2087	case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
2088		/* if quad port adapter, disable WoL on all but port A */
2089		if (global_quad_port_a != 0)
2090			adapter->eeprom_wol = 0;
2091		else
2092			adapter->flags |= IGB_FLAG_QUAD_PORT_A;
2093		/* Reset for multiple quad port adapters */
2094		if (++global_quad_port_a == 4)
2095			global_quad_port_a = 0;
2096		break;
2097	}
2098
2099	/* initialize the wol settings based on the eeprom settings */
2100	adapter->wol = adapter->eeprom_wol;
2101	device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
2102
2103	/* reset the hardware with the new settings */
2104	igb_reset(adapter);
2105
2106	/* let the f/w know that the h/w is now under the control of the
2107	 * driver. */
2108	igb_get_hw_control(adapter);
2109
2110	strcpy(netdev->name, "eth%d");
2111	err = register_netdev(netdev);
2112	if (err)
2113		goto err_register;
2114
2115	/* carrier off reporting is important to ethtool even BEFORE open */
2116	netif_carrier_off(netdev);
2117
2118#ifdef CONFIG_IGB_DCA
2119	if (dca_add_requester(&pdev->dev) == 0) {
2120		adapter->flags |= IGB_FLAG_DCA_ENABLED;
2121		dev_info(&pdev->dev, "DCA enabled\n");
2122		igb_setup_dca(adapter);
2123	}
2124
2125#endif
2126	/* do hw tstamp init after resetting */
2127	igb_init_hw_timer(adapter);
2128
2129	dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
2130	/* print bus type/speed/width info */
2131	dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
2132		 netdev->name,
2133		 ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
2134		  (hw->bus.speed == e1000_bus_speed_5000) ? "5.0Gb/s" :
2135		                                            "unknown"),
2136		 ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
2137		  (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
2138		  (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
2139		   "unknown"),
2140		 netdev->dev_addr);
2141
2142	ret_val = igb_read_part_string(hw, part_str, E1000_PBANUM_LENGTH);
2143	if (ret_val)
2144		strcpy(part_str, "Unknown");
2145	dev_info(&pdev->dev, "%s: PBA No: %s\n", netdev->name, part_str);
2146	dev_info(&pdev->dev,
2147		"Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
2148		adapter->msix_entries ? "MSI-X" :
2149		(adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
2150		adapter->num_rx_queues, adapter->num_tx_queues);
2151	switch (hw->mac.type) {
2152	case e1000_i350:
2153		igb_set_eee_i350(hw);
2154		break;
2155	default:
2156		break;
2157	}
2158	return 0;
2159
2160err_register:
2161	igb_release_hw_control(adapter);
2162err_eeprom:
2163	if (!igb_check_reset_block(hw))
2164		igb_reset_phy(hw);
2165
2166	if (hw->flash_address)
2167		iounmap(hw->flash_address);
2168err_sw_init:
2169	igb_clear_interrupt_scheme(adapter);
2170	iounmap(hw->hw_addr);
2171err_ioremap:
2172	free_netdev(netdev);
2173err_alloc_etherdev:
2174	pci_release_selected_regions(pdev,
2175	                             pci_select_bars(pdev, IORESOURCE_MEM));
2176err_pci_reg:
2177err_dma:
2178	pci_disable_device(pdev);
2179	return err;
2180}
2181
2182/**
2183 * igb_remove - Device Removal Routine
2184 * @pdev: PCI device information struct
2185 *
2186 * igb_remove is called by the PCI subsystem to alert the driver
2187 * that it should release a PCI device.  The could be caused by a
2188 * Hot-Plug event, or because the driver is going to be removed from
2189 * memory.
2190 **/
2191static void __devexit igb_remove(struct pci_dev *pdev)
2192{
2193	struct net_device *netdev = pci_get_drvdata(pdev);
2194	struct igb_adapter *adapter = netdev_priv(netdev);
2195	struct e1000_hw *hw = &adapter->hw;
2196
2197	/*
2198	 * The watchdog timer may be rescheduled, so explicitly
2199	 * disable watchdog from being rescheduled.
2200	 */
2201	set_bit(__IGB_DOWN, &adapter->state);
2202	del_timer_sync(&adapter->watchdog_timer);
2203	del_timer_sync(&adapter->phy_info_timer);
2204
2205	cancel_work_sync(&adapter->reset_task);
2206	cancel_work_sync(&adapter->watchdog_task);
2207
2208#ifdef CONFIG_IGB_DCA
2209	if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
2210		dev_info(&pdev->dev, "DCA disabled\n");
2211		dca_remove_requester(&pdev->dev);
2212		adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
2213		wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
2214	}
2215#endif
2216
2217	/* Release control of h/w to f/w.  If f/w is AMT enabled, this
2218	 * would have already happened in close and is redundant. */
2219	igb_release_hw_control(adapter);
2220
2221	unregister_netdev(netdev);
2222
2223	igb_clear_interrupt_scheme(adapter);
2224
2225#ifdef CONFIG_PCI_IOV
2226	/* reclaim resources allocated to VFs */
2227	if (adapter->vf_data) {
2228		/* disable iov and allow time for transactions to clear */
2229		pci_disable_sriov(pdev);
2230		msleep(500);
2231
2232		kfree(adapter->vf_data);
2233		adapter->vf_data = NULL;
2234		wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
2235		wrfl();
2236		msleep(100);
2237		dev_info(&pdev->dev, "IOV Disabled\n");
2238	}
2239#endif
2240
2241	iounmap(hw->hw_addr);
2242	if (hw->flash_address)
2243		iounmap(hw->flash_address);
2244	pci_release_selected_regions(pdev,
2245	                             pci_select_bars(pdev, IORESOURCE_MEM));
2246
2247	free_netdev(netdev);
2248
2249	pci_disable_pcie_error_reporting(pdev);
2250
2251	pci_disable_device(pdev);
2252}
2253
2254/**
2255 * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
2256 * @adapter: board private structure to initialize
2257 *
2258 * This function initializes the vf specific data storage and then attempts to
2259 * allocate the VFs.  The reason for ordering it this way is because it is much
2260 * mor expensive time wise to disable SR-IOV than it is to allocate and free
2261 * the memory for the VFs.
2262 **/
2263static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
2264{
2265#ifdef CONFIG_PCI_IOV
2266	struct pci_dev *pdev = adapter->pdev;
2267
2268	if (adapter->vfs_allocated_count) {
2269		adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
2270		                           sizeof(struct vf_data_storage),
2271		                           GFP_KERNEL);
2272		/* if allocation failed then we do not support SR-IOV */
2273		if (!adapter->vf_data) {
2274			adapter->vfs_allocated_count = 0;
2275			dev_err(&pdev->dev, "Unable to allocate memory for VF "
2276			        "Data Storage\n");
2277		}
2278	}
2279
2280	if (pci_enable_sriov(pdev, adapter->vfs_allocated_count)) {
2281		kfree(adapter->vf_data);
2282		adapter->vf_data = NULL;
2283#endif /* CONFIG_PCI_IOV */
2284		adapter->vfs_allocated_count = 0;
2285#ifdef CONFIG_PCI_IOV
2286	} else {
2287		unsigned char mac_addr[ETH_ALEN];
2288		int i;
2289		dev_info(&pdev->dev, "%d vfs allocated\n",
2290		         adapter->vfs_allocated_count);
2291		for (i = 0; i < adapter->vfs_allocated_count; i++) {
2292			random_ether_addr(mac_addr);
2293			igb_set_vf_mac(adapter, i, mac_addr);
2294		}
2295		/* DMA Coalescing is not supported in IOV mode. */
2296		if (adapter->flags & IGB_FLAG_DMAC)
2297			adapter->flags &= ~IGB_FLAG_DMAC;
2298	}
2299#endif /* CONFIG_PCI_IOV */
2300}
2301
2302
2303/**
2304 * igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp
2305 * @adapter: board private structure to initialize
2306 *
2307 * igb_init_hw_timer initializes the function pointer and values for the hw
2308 * timer found in hardware.
2309 **/
2310static void igb_init_hw_timer(struct igb_adapter *adapter)
2311{
2312	struct e1000_hw *hw = &adapter->hw;
2313
2314	switch (hw->mac.type) {
2315	case e1000_i350:
2316	case e1000_82580:
2317		memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2318		adapter->cycles.read = igb_read_clock;
2319		adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2320		adapter->cycles.mult = 1;
2321		/*
2322		 * The 82580 timesync updates the system timer every 8ns by 8ns
2323		 * and the value cannot be shifted.  Instead we need to shift
2324		 * the registers to generate a 64bit timer value.  As a result
2325		 * SYSTIMR/L/H, TXSTMPL/H, RXSTMPL/H all have to be shifted by
2326		 * 24 in order to generate a larger value for synchronization.
2327		 */
2328		adapter->cycles.shift = IGB_82580_TSYNC_SHIFT;
2329		/* disable system timer temporarily by setting bit 31 */
2330		wr32(E1000_TSAUXC, 0x80000000);
2331		wrfl();
2332
2333		/* Set registers so that rollover occurs soon to test this. */
2334		wr32(E1000_SYSTIMR, 0x00000000);
2335		wr32(E1000_SYSTIML, 0x80000000);
2336		wr32(E1000_SYSTIMH, 0x000000FF);
2337		wrfl();
2338
2339		/* enable system timer by clearing bit 31 */
2340		wr32(E1000_TSAUXC, 0x0);
2341		wrfl();
2342
2343		timecounter_init(&adapter->clock,
2344				 &adapter->cycles,
2345				 ktime_to_ns(ktime_get_real()));
2346		/*
2347		 * Synchronize our NIC clock against system wall clock. NIC
2348		 * time stamp reading requires ~3us per sample, each sample
2349		 * was pretty stable even under load => only require 10
2350		 * samples for each offset comparison.
2351		 */
2352		memset(&adapter->compare, 0, sizeof(adapter->compare));
2353		adapter->compare.source = &adapter->clock;
2354		adapter->compare.target = ktime_get_real;
2355		adapter->compare.num_samples = 10;
2356		timecompare_update(&adapter->compare, 0);
2357		break;
2358	case e1000_82576:
2359		/*
2360		 * Initialize hardware timer: we keep it running just in case
2361		 * that some program needs it later on.
2362		 */
2363		memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2364		adapter->cycles.read = igb_read_clock;
2365		adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2366		adapter->cycles.mult = 1;
2367		/**
2368		 * Scale the NIC clock cycle by a large factor so that
2369		 * relatively small clock corrections can be added or
2370		 * subtracted at each clock tick. The drawbacks of a large
2371		 * factor are a) that the clock register overflows more quickly
2372		 * (not such a big deal) and b) that the increment per tick has
2373		 * to fit into 24 bits.  As a result we need to use a shift of
2374		 * 19 so we can fit a value of 16 into the TIMINCA register.
2375		 */
2376		adapter->cycles.shift = IGB_82576_TSYNC_SHIFT;
2377		wr32(E1000_TIMINCA,
2378		                (1 << E1000_TIMINCA_16NS_SHIFT) |
2379		                (16 << IGB_82576_TSYNC_SHIFT));
2380
2381		/* Set registers so that rollover occurs soon to test this. */
2382		wr32(E1000_SYSTIML, 0x00000000);
2383		wr32(E1000_SYSTIMH, 0xFF800000);
2384		wrfl();
2385
2386		timecounter_init(&adapter->clock,
2387				 &adapter->cycles,
2388				 ktime_to_ns(ktime_get_real()));
2389		/*
2390		 * Synchronize our NIC clock against system wall clock. NIC
2391		 * time stamp reading requires ~3us per sample, each sample
2392		 * was pretty stable even under load => only require 10
2393		 * samples for each offset comparison.
2394		 */
2395		memset(&adapter->compare, 0, sizeof(adapter->compare));
2396		adapter->compare.source = &adapter->clock;
2397		adapter->compare.target = ktime_get_real;
2398		adapter->compare.num_samples = 10;
2399		timecompare_update(&adapter->compare, 0);
2400		break;
2401	case e1000_82575:
2402		/* 82575 does not support timesync */
2403	default:
2404		break;
2405	}
2406
2407}
2408
2409/**
2410 * igb_sw_init - Initialize general software structures (struct igb_adapter)
2411 * @adapter: board private structure to initialize
2412 *
2413 * igb_sw_init initializes the Adapter private data structure.
2414 * Fields are initialized based on PCI device information and
2415 * OS network device settings (MTU size).
2416 **/
2417static int __devinit igb_sw_init(struct igb_adapter *adapter)
2418{
2419	struct e1000_hw *hw = &adapter->hw;
2420	struct net_device *netdev = adapter->netdev;
2421	struct pci_dev *pdev = adapter->pdev;
2422
2423	pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
2424
2425	/* set default ring sizes */
2426	adapter->tx_ring_count = IGB_DEFAULT_TXD;
2427	adapter->rx_ring_count = IGB_DEFAULT_RXD;
2428
2429	/* set default ITR values */
2430	adapter->rx_itr_setting = IGB_DEFAULT_ITR;
2431	adapter->tx_itr_setting = IGB_DEFAULT_ITR;
2432
2433	/* set default work limits */
2434	adapter->tx_work_limit = IGB_DEFAULT_TX_WORK;
2435
2436	adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN +
2437				  VLAN_HLEN;
2438	adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
2439
2440	adapter->node = -1;
2441
2442	spin_lock_init(&adapter->stats64_lock);
2443#ifdef CONFIG_PCI_IOV
2444	switch (hw->mac.type) {
2445	case e1000_82576:
2446	case e1000_i350:
2447		if (max_vfs > 7) {
2448			dev_warn(&pdev->dev,
2449				 "Maximum of 7 VFs per PF, using max\n");
2450			adapter->vfs_allocated_count = 7;
2451		} else
2452			adapter->vfs_allocated_count = max_vfs;
2453		break;
2454	default:
2455		break;
2456	}
2457#endif /* CONFIG_PCI_IOV */
2458	adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
2459	/* i350 cannot do RSS and SR-IOV at the same time */
2460	if (hw->mac.type == e1000_i350 && adapter->vfs_allocated_count)
2461		adapter->rss_queues = 1;
2462
2463	/*
2464	 * if rss_queues > 4 or vfs are going to be allocated with rss_queues
2465	 * then we should combine the queues into a queue pair in order to
2466	 * conserve interrupts due to limited supply
2467	 */
2468	if ((adapter->rss_queues > 4) ||
2469	    ((adapter->rss_queues > 1) && (adapter->vfs_allocated_count > 6)))
2470		adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
2471
2472	/* This call may decrease the number of queues */
2473	if (igb_init_interrupt_scheme(adapter)) {
2474		dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
2475		return -ENOMEM;
2476	}
2477
2478	igb_probe_vfs(adapter);
2479
2480	/* Explicitly disable IRQ since the NIC can be in any state. */
2481	igb_irq_disable(adapter);
2482
2483	if (hw->mac.type == e1000_i350)
2484		adapter->flags &= ~IGB_FLAG_DMAC;
2485
2486	set_bit(__IGB_DOWN, &adapter->state);
2487	return 0;
2488}
2489
2490/**
2491 * igb_open - Called when a network interface is made active
2492 * @netdev: network interface device structure
2493 *
2494 * Returns 0 on success, negative value on failure
2495 *
2496 * The open entry point is called when a network interface is made
2497 * active by the system (IFF_UP).  At this point all resources needed
2498 * for transmit and receive operations are allocated, the interrupt
2499 * handler is registered with the OS, the watchdog timer is started,
2500 * and the stack is notified that the interface is ready.
2501 **/
2502static int igb_open(struct net_device *netdev)
2503{
2504	struct igb_adapter *adapter = netdev_priv(netdev);
2505	struct e1000_hw *hw = &adapter->hw;
2506	int err;
2507	int i;
2508
2509	/* disallow open during test */
2510	if (test_bit(__IGB_TESTING, &adapter->state))
2511		return -EBUSY;
2512
2513	netif_carrier_off(netdev);
2514
2515	/* allocate transmit descriptors */
2516	err = igb_setup_all_tx_resources(adapter);
2517	if (err)
2518		goto err_setup_tx;
2519
2520	/* allocate receive descriptors */
2521	err = igb_setup_all_rx_resources(adapter);
2522	if (err)
2523		goto err_setup_rx;
2524
2525	igb_power_up_link(adapter);
2526
2527	/* before we allocate an interrupt, we must be ready to handle it.
2528	 * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2529	 * as soon as we call pci_request_irq, so we have to setup our
2530	 * clean_rx handler before we do so.  */
2531	igb_configure(adapter);
2532
2533	err = igb_request_irq(adapter);
2534	if (err)
2535		goto err_req_irq;
2536
2537	/* From here on the code is the same as igb_up() */
2538	clear_bit(__IGB_DOWN, &adapter->state);
2539
2540	for (i = 0; i < adapter->num_q_vectors; i++) {
2541		struct igb_q_vector *q_vector = adapter->q_vector[i];
2542		napi_enable(&q_vector->napi);
2543	}
2544
2545	/* Clear any pending interrupts. */
2546	rd32(E1000_ICR);
2547
2548	igb_irq_enable(adapter);
2549
2550	/* notify VFs that reset has been completed */
2551	if (adapter->vfs_allocated_count) {
2552		u32 reg_data = rd32(E1000_CTRL_EXT);
2553		reg_data |= E1000_CTRL_EXT_PFRSTD;
2554		wr32(E1000_CTRL_EXT, reg_data);
2555	}
2556
2557	netif_tx_start_all_queues(netdev);
2558
2559	/* start the watchdog. */
2560	hw->mac.get_link_status = 1;
2561	schedule_work(&adapter->watchdog_task);
2562
2563	return 0;
2564
2565err_req_irq:
2566	igb_release_hw_control(adapter);
2567	igb_power_down_link(adapter);
2568	igb_free_all_rx_resources(adapter);
2569err_setup_rx:
2570	igb_free_all_tx_resources(adapter);
2571err_setup_tx:
2572	igb_reset(adapter);
2573
2574	return err;
2575}
2576
2577/**
2578 * igb_close - Disables a network interface
2579 * @netdev: network interface device structure
2580 *
2581 * Returns 0, this is not allowed to fail
2582 *
2583 * The close entry point is called when an interface is de-activated
2584 * by the OS.  The hardware is still under the driver's control, but
2585 * needs to be disabled.  A global MAC reset is issued to stop the
2586 * hardware, and all transmit and receive resources are freed.
2587 **/
2588static int igb_close(struct net_device *netdev)
2589{
2590	struct igb_adapter *adapter = netdev_priv(netdev);
2591
2592	WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
2593	igb_down(adapter);
2594
2595	igb_free_irq(adapter);
2596
2597	igb_free_all_tx_resources(adapter);
2598	igb_free_all_rx_resources(adapter);
2599
2600	return 0;
2601}
2602
2603/**
2604 * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2605 * @tx_ring: tx descriptor ring (for a specific queue) to setup
2606 *
2607 * Return 0 on success, negative on failure
2608 **/
2609int igb_setup_tx_resources(struct igb_ring *tx_ring)
2610{
2611	struct device *dev = tx_ring->dev;
2612	int orig_node = dev_to_node(dev);
2613	int size;
2614
2615	size = sizeof(struct igb_tx_buffer) * tx_ring->count;
2616	tx_ring->tx_buffer_info = vzalloc_node(size, tx_ring->numa_node);
2617	if (!tx_ring->tx_buffer_info)
2618		tx_ring->tx_buffer_info = vzalloc(size);
2619	if (!tx_ring->tx_buffer_info)
2620		goto err;
2621
2622	/* round up to nearest 4K */
2623	tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2624	tx_ring->size = ALIGN(tx_ring->size, 4096);
2625
2626	set_dev_node(dev, tx_ring->numa_node);
2627	tx_ring->desc = dma_alloc_coherent(dev,
2628					   tx_ring->size,
2629					   &tx_ring->dma,
2630					   GFP_KERNEL);
2631	set_dev_node(dev, orig_node);
2632	if (!tx_ring->desc)
2633		tx_ring->desc = dma_alloc_coherent(dev,
2634						   tx_ring->size,
2635						   &tx_ring->dma,
2636						   GFP_KERNEL);
2637
2638	if (!tx_ring->desc)
2639		goto err;
2640
2641	tx_ring->next_to_use = 0;
2642	tx_ring->next_to_clean = 0;
2643
2644	return 0;
2645
2646err:
2647	vfree(tx_ring->tx_buffer_info);
2648	dev_err(dev,
2649		"Unable to allocate memory for the transmit descriptor ring\n");
2650	return -ENOMEM;
2651}
2652
2653/**
2654 * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2655 *				  (Descriptors) for all queues
2656 * @adapter: board private structure
2657 *
2658 * Return 0 on success, negative on failure
2659 **/
2660static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2661{
2662	struct pci_dev *pdev = adapter->pdev;
2663	int i, err = 0;
2664
2665	for (i = 0; i < adapter->num_tx_queues; i++) {
2666		err = igb_setup_tx_resources(adapter->tx_ring[i]);
2667		if (err) {
2668			dev_err(&pdev->dev,
2669				"Allocation for Tx Queue %u failed\n", i);
2670			for (i--; i >= 0; i--)
2671				igb_free_tx_resources(adapter->tx_ring[i]);
2672			break;
2673		}
2674	}
2675
2676	return err;
2677}
2678
2679/**
2680 * igb_setup_tctl - configure the transmit control registers
2681 * @adapter: Board private structure
2682 **/
2683void igb_setup_tctl(struct igb_adapter *adapter)
2684{
2685	struct e1000_hw *hw = &adapter->hw;
2686	u32 tctl;
2687
2688	/* disable queue 0 which is enabled by default on 82575 and 82576 */
2689	wr32(E1000_TXDCTL(0), 0);
2690
2691	/* Program the Transmit Control Register */
2692	tctl = rd32(E1000_TCTL);
2693	tctl &= ~E1000_TCTL_CT;
2694	tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2695		(E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2696
2697	igb_config_collision_dist(hw);
2698
2699	/* Enable transmits */
2700	tctl |= E1000_TCTL_EN;
2701
2702	wr32(E1000_TCTL, tctl);
2703}
2704
2705/**
2706 * igb_configure_tx_ring - Configure transmit ring after Reset
2707 * @adapter: board private structure
2708 * @ring: tx ring to configure
2709 *
2710 * Configure a transmit ring after a reset.
2711 **/
2712void igb_configure_tx_ring(struct igb_adapter *adapter,
2713                           struct igb_ring *ring)
2714{
2715	struct e1000_hw *hw = &adapter->hw;
2716	u32 txdctl = 0;
2717	u64 tdba = ring->dma;
2718	int reg_idx = ring->reg_idx;
2719
2720	/* disable the queue */
2721	wr32(E1000_TXDCTL(reg_idx), 0);
2722	wrfl();
2723	mdelay(10);
2724
2725	wr32(E1000_TDLEN(reg_idx),
2726	                ring->count * sizeof(union e1000_adv_tx_desc));
2727	wr32(E1000_TDBAL(reg_idx),
2728	                tdba & 0x00000000ffffffffULL);
2729	wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2730
2731	ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2732	wr32(E1000_TDH(reg_idx), 0);
2733	writel(0, ring->tail);
2734
2735	txdctl |= IGB_TX_PTHRESH;
2736	txdctl |= IGB_TX_HTHRESH << 8;
2737	txdctl |= IGB_TX_WTHRESH << 16;
2738
2739	txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2740	wr32(E1000_TXDCTL(reg_idx), txdctl);
2741}
2742
2743/**
2744 * igb_configure_tx - Configure transmit Unit after Reset
2745 * @adapter: board private structure
2746 *
2747 * Configure the Tx unit of the MAC after a reset.
2748 **/
2749static void igb_configure_tx(struct igb_adapter *adapter)
2750{
2751	int i;
2752
2753	for (i = 0; i < adapter->num_tx_queues; i++)
2754		igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
2755}
2756
2757/**
2758 * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2759 * @rx_ring:    rx descriptor ring (for a specific queue) to setup
2760 *
2761 * Returns 0 on success, negative on failure
2762 **/
2763int igb_setup_rx_resources(struct igb_ring *rx_ring)
2764{
2765	struct device *dev = rx_ring->dev;
2766	int orig_node = dev_to_node(dev);
2767	int size, desc_len;
2768
2769	size = sizeof(struct igb_rx_buffer) * rx_ring->count;
2770	rx_ring->rx_buffer_info = vzalloc_node(size, rx_ring->numa_node);
2771	if (!rx_ring->rx_buffer_info)
2772		rx_ring->rx_buffer_info = vzalloc(size);
2773	if (!rx_ring->rx_buffer_info)
2774		goto err;
2775
2776	desc_len = sizeof(union e1000_adv_rx_desc);
2777
2778	/* Round up to nearest 4K */
2779	rx_ring->size = rx_ring->count * desc_len;
2780	rx_ring->size = ALIGN(rx_ring->size, 4096);
2781
2782	set_dev_node(dev, rx_ring->numa_node);
2783	rx_ring->desc = dma_alloc_coherent(dev,
2784					   rx_ring->size,
2785					   &rx_ring->dma,
2786					   GFP_KERNEL);
2787	set_dev_node(dev, orig_node);
2788	if (!rx_ring->desc)
2789		rx_ring->desc = dma_alloc_coherent(dev,
2790						   rx_ring->size,
2791						   &rx_ring->dma,
2792						   GFP_KERNEL);
2793
2794	if (!rx_ring->desc)
2795		goto err;
2796
2797	rx_ring->next_to_clean = 0;
2798	rx_ring->next_to_use = 0;
2799
2800	return 0;
2801
2802err:
2803	vfree(rx_ring->rx_buffer_info);
2804	rx_ring->rx_buffer_info = NULL;
2805	dev_err(dev, "Unable to allocate memory for the receive descriptor"
2806		" ring\n");
2807	return -ENOMEM;
2808}
2809
2810/**
2811 * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2812 *				  (Descriptors) for all queues
2813 * @adapter: board private structure
2814 *
2815 * Return 0 on success, negative on failure
2816 **/
2817static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2818{
2819	struct pci_dev *pdev = adapter->pdev;
2820	int i, err = 0;
2821
2822	for (i = 0; i < adapter->num_rx_queues; i++) {
2823		err = igb_setup_rx_resources(adapter->rx_ring[i]);
2824		if (err) {
2825			dev_err(&pdev->dev,
2826				"Allocation for Rx Queue %u failed\n", i);
2827			for (i--; i >= 0; i--)
2828				igb_free_rx_resources(adapter->rx_ring[i]);
2829			break;
2830		}
2831	}
2832
2833	return err;
2834}
2835
2836/**
2837 * igb_setup_mrqc - configure the multiple receive queue control registers
2838 * @adapter: Board private structure
2839 **/
2840static void igb_setup_mrqc(struct igb_adapter *adapter)
2841{
2842	struct e1000_hw *hw = &adapter->hw;
2843	u32 mrqc, rxcsum;
2844	u32 j, num_rx_queues, shift = 0, shift2 = 0;
2845	union e1000_reta {
2846		u32 dword;
2847		u8  bytes[4];
2848	} reta;
2849	static const u8 rsshash[40] = {
2850		0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2851		0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2852		0xae, 0x7b, 0x30, 0xb4,	0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2853		0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2854
2855	/* Fill out hash function seeds */
2856	for (j = 0; j < 10; j++) {
2857		u32 rsskey = rsshash[(j * 4)];
2858		rsskey |= rsshash[(j * 4) + 1] << 8;
2859		rsskey |= rsshash[(j * 4) + 2] << 16;
2860		rsskey |= rsshash[(j * 4) + 3] << 24;
2861		array_wr32(E1000_RSSRK(0), j, rsskey);
2862	}
2863
2864	num_rx_queues = adapter->rss_queues;
2865
2866	if (adapter->vfs_allocated_count) {
2867		/* 82575 and 82576 supports 2 RSS queues for VMDq */
2868		switch (hw->mac.type) {
2869		case e1000_i350:
2870		case e1000_82580:
2871			num_rx_queues = 1;
2872			shift = 0;
2873			break;
2874		case e1000_82576:
2875			shift = 3;
2876			num_rx_queues = 2;
2877			break;
2878		case e1000_82575:
2879			shift = 2;
2880			shift2 = 6;
2881		default:
2882			break;
2883		}
2884	} else {
2885		if (hw->mac.type == e1000_82575)
2886			shift = 6;
2887	}
2888
2889	for (j = 0; j < (32 * 4); j++) {
2890		reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2891		if (shift2)
2892			reta.bytes[j & 3] |= num_rx_queues << shift2;
2893		if ((j & 3) == 3)
2894			wr32(E1000_RETA(j >> 2), reta.dword);
2895	}
2896
2897	/*
2898	 * Disable raw packet checksumming so that RSS hash is placed in
2899	 * descriptor on writeback.  No need to enable TCP/UDP/IP checksum
2900	 * offloads as they are enabled by default
2901	 */
2902	rxcsum = rd32(E1000_RXCSUM);
2903	rxcsum |= E1000_RXCSUM_PCSD;
2904
2905	if (adapter->hw.mac.type >= e1000_82576)
2906		/* Enable Receive Checksum Offload for SCTP */
2907		rxcsum |= E1000_RXCSUM_CRCOFL;
2908
2909	/* Don't need to set TUOFL or IPOFL, they default to 1 */
2910	wr32(E1000_RXCSUM, rxcsum);
2911
2912	/* If VMDq is enabled then we set the appropriate mode for that, else
2913	 * we default to RSS so that an RSS hash is calculated per packet even
2914	 * if we are only using one queue */
2915	if (adapter->vfs_allocated_count) {
2916		if (hw->mac.type > e1000_82575) {
2917			/* Set the default pool for the PF's first queue */
2918			u32 vtctl = rd32(E1000_VT_CTL);
2919			vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2920				   E1000_VT_CTL_DISABLE_DEF_POOL);
2921			vtctl |= adapter->vfs_allocated_count <<
2922				E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2923			wr32(E1000_VT_CTL, vtctl);
2924		}
2925		if (adapter->rss_queues > 1)
2926			mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2927		else
2928			mrqc = E1000_MRQC_ENABLE_VMDQ;
2929	} else {
2930		mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2931	}
2932	igb_vmm_control(adapter);
2933
2934	/*
2935	 * Generate RSS hash based on TCP port numbers and/or
2936	 * IPv4/v6 src and dst addresses since UDP cannot be
2937	 * hashed reliably due to IP fragmentation
2938	 */
2939	mrqc |= E1000_MRQC_RSS_FIELD_IPV4 |
2940		E1000_MRQC_RSS_FIELD_IPV4_TCP |
2941		E1000_MRQC_RSS_FIELD_IPV6 |
2942		E1000_MRQC_RSS_FIELD_IPV6_TCP |
2943		E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
2944
2945	wr32(E1000_MRQC, mrqc);
2946}
2947
2948/**
2949 * igb_setup_rctl - configure the receive control registers
2950 * @adapter: Board private structure
2951 **/
2952void igb_setup_rctl(struct igb_adapter *adapter)
2953{
2954	struct e1000_hw *hw = &adapter->hw;
2955	u32 rctl;
2956
2957	rctl = rd32(E1000_RCTL);
2958
2959	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2960	rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2961
2962	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2963		(hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2964
2965	/*
2966	 * enable stripping of CRC. It's unlikely this will break BMC
2967	 * redirection as it did with e1000. Newer features require
2968	 * that the HW strips the CRC.
2969	 */
2970	rctl |= E1000_RCTL_SECRC;
2971
2972	/* disable store bad packets and clear size bits. */
2973	rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
2974
2975	/* enable LPE to prevent packets larger than max_frame_size */
2976	rctl |= E1000_RCTL_LPE;
2977
2978	/* disable queue 0 to prevent tail write w/o re-config */
2979	wr32(E1000_RXDCTL(0), 0);
2980
2981	/* Attention!!!  For SR-IOV PF driver operations you must enable
2982	 * queue drop for all VF and PF queues to prevent head of line blocking
2983	 * if an un-trusted VF does not provide descriptors to hardware.
2984	 */
2985	if (adapter->vfs_allocated_count) {
2986		/* set all queue drop enable bits */
2987		wr32(E1000_QDE, ALL_QUEUES);
2988	}
2989
2990	wr32(E1000_RCTL, rctl);
2991}
2992
2993static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
2994                                   int vfn)
2995{
2996	struct e1000_hw *hw = &adapter->hw;
2997	u32 vmolr;
2998
2999	/* if it isn't the PF check to see if VFs are enabled and
3000	 * increase the size to support vlan tags */
3001	if (vfn < adapter->vfs_allocated_count &&
3002	    adapter->vf_data[vfn].vlans_enabled)
3003		size += VLAN_TAG_SIZE;
3004
3005	vmolr = rd32(E1000_VMOLR(vfn));
3006	vmolr &= ~E1000_VMOLR_RLPML_MASK;
3007	vmolr |= size | E1000_VMOLR_LPE;
3008	wr32(E1000_VMOLR(vfn), vmolr);
3009
3010	return 0;
3011}
3012
3013/**
3014 * igb_rlpml_set - set maximum receive packet size
3015 * @adapter: board private structure
3016 *
3017 * Configure maximum receivable packet size.
3018 **/
3019static void igb_rlpml_set(struct igb_adapter *adapter)
3020{
3021	u32 max_frame_size = adapter->max_frame_size;
3022	struct e1000_hw *hw = &adapter->hw;
3023	u16 pf_id = adapter->vfs_allocated_count;
3024
3025	if (pf_id) {
3026		igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
3027		/*
3028		 * If we're in VMDQ or SR-IOV mode, then set global RLPML
3029		 * to our max jumbo frame size, in case we need to enable
3030		 * jumbo frames on one of the rings later.
3031		 * This will not pass over-length frames into the default
3032		 * queue because it's gated by the VMOLR.RLPML.
3033		 */
3034		max_frame_size = MAX_JUMBO_FRAME_SIZE;
3035	}
3036
3037	wr32(E1000_RLPML, max_frame_size);
3038}
3039
3040static inline void igb_set_vmolr(struct igb_adapter *adapter,
3041				 int vfn, bool aupe)
3042{
3043	struct e1000_hw *hw = &adapter->hw;
3044	u32 vmolr;
3045
3046	/*
3047	 * This register exists only on 82576 and newer so if we are older then
3048	 * we should exit and do nothing
3049	 */
3050	if (hw->mac.type < e1000_82576)
3051		return;
3052
3053	vmolr = rd32(E1000_VMOLR(vfn));
3054	vmolr |= E1000_VMOLR_STRVLAN;      /* Strip vlan tags */
3055	if (aupe)
3056		vmolr |= E1000_VMOLR_AUPE;        /* Accept untagged packets */
3057	else
3058		vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
3059
3060	/* clear all bits that might not be set */
3061	vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
3062
3063	if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
3064		vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
3065	/*
3066	 * for VMDq only allow the VFs and pool 0 to accept broadcast and
3067	 * multicast packets
3068	 */
3069	if (vfn <= adapter->vfs_allocated_count)
3070		vmolr |= E1000_VMOLR_BAM;	   /* Accept broadcast */
3071
3072	wr32(E1000_VMOLR(vfn), vmolr);
3073}
3074
3075/**
3076 * igb_configure_rx_ring - Configure a receive ring after Reset
3077 * @adapter: board private structure
3078 * @ring: receive ring to be configured
3079 *
3080 * Configure the Rx unit of the MAC after a reset.
3081 **/
3082void igb_configure_rx_ring(struct igb_adapter *adapter,
3083                           struct igb_ring *ring)
3084{
3085	struct e1000_hw *hw = &adapter->hw;
3086	u64 rdba = ring->dma;
3087	int reg_idx = ring->reg_idx;
3088	u32 srrctl = 0, rxdctl = 0;
3089
3090	/* disable the queue */
3091	wr32(E1000_RXDCTL(reg_idx), 0);
3092
3093	/* Set DMA base address registers */
3094	wr32(E1000_RDBAL(reg_idx),
3095	     rdba & 0x00000000ffffffffULL);
3096	wr32(E1000_RDBAH(reg_idx), rdba >> 32);
3097	wr32(E1000_RDLEN(reg_idx),
3098	               ring->count * sizeof(union e1000_adv_rx_desc));
3099
3100	/* initialize head and tail */
3101	ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
3102	wr32(E1000_RDH(reg_idx), 0);
3103	writel(0, ring->tail);
3104
3105	/* set descriptor configuration */
3106	srrctl = IGB_RX_HDR_LEN << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
3107#if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
3108	srrctl |= IGB_RXBUFFER_16384 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3109#else
3110	srrctl |= (PAGE_SIZE / 2) >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3111#endif
3112	srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3113	if (hw->mac.type == e1000_82580)
3114		srrctl |= E1000_SRRCTL_TIMESTAMP;
3115	/* Only set Drop Enable if we are supporting multiple queues */
3116	if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
3117		srrctl |= E1000_SRRCTL_DROP_EN;
3118
3119	wr32(E1000_SRRCTL(reg_idx), srrctl);
3120
3121	/* set filtering for VMDQ pools */
3122	igb_set_vmolr(adapter, reg_idx & 0x7, true);
3123
3124	rxdctl |= IGB_RX_PTHRESH;
3125	rxdctl |= IGB_RX_HTHRESH << 8;
3126	rxdctl |= IGB_RX_WTHRESH << 16;
3127
3128	/* enable receive descriptor fetching */
3129	rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
3130	wr32(E1000_RXDCTL(reg_idx), rxdctl);
3131}
3132
3133/**
3134 * igb_configure_rx - Configure receive Unit after Reset
3135 * @adapter: board private structure
3136 *
3137 * Configure the Rx unit of the MAC after a reset.
3138 **/
3139static void igb_configure_rx(struct igb_adapter *adapter)
3140{
3141	int i;
3142
3143	/* set UTA to appropriate mode */
3144	igb_set_uta(adapter);
3145
3146	/* set the correct pool for the PF default MAC address in entry 0 */
3147	igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
3148	                 adapter->vfs_allocated_count);
3149
3150	/* Setup the HW Rx Head and Tail Descriptor Pointers and
3151	 * the Base and Length of the Rx Descriptor Ring */
3152	for (i = 0; i < adapter->num_rx_queues; i++)
3153		igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
3154}
3155
3156/**
3157 * igb_free_tx_resources - Free Tx Resources per Queue
3158 * @tx_ring: Tx descriptor ring for a specific queue
3159 *
3160 * Free all transmit software resources
3161 **/
3162void igb_free_tx_resources(struct igb_ring *tx_ring)
3163{
3164	igb_clean_tx_ring(tx_ring);
3165
3166	vfree(tx_ring->tx_buffer_info);
3167	tx_ring->tx_buffer_info = NULL;
3168
3169	/* if not set, then don't free */
3170	if (!tx_ring->desc)
3171		return;
3172
3173	dma_free_coherent(tx_ring->dev, tx_ring->size,
3174			  tx_ring->desc, tx_ring->dma);
3175
3176	tx_ring->desc = NULL;
3177}
3178
3179/**
3180 * igb_free_all_tx_resources - Free Tx Resources for All Queues
3181 * @adapter: board private structure
3182 *
3183 * Free all transmit software resources
3184 **/
3185static void igb_free_all_tx_resources(struct igb_adapter *adapter)
3186{
3187	int i;
3188
3189	for (i = 0; i < adapter->num_tx_queues; i++)
3190		igb_free_tx_resources(adapter->tx_ring[i]);
3191}
3192
3193void igb_unmap_and_free_tx_resource(struct igb_ring *ring,
3194				    struct igb_tx_buffer *tx_buffer)
3195{
3196	if (tx_buffer->skb) {
3197		dev_kfree_skb_any(tx_buffer->skb);
3198		if (tx_buffer->dma)
3199			dma_unmap_single(ring->dev,
3200					 tx_buffer->dma,
3201					 tx_buffer->length,
3202					 DMA_TO_DEVICE);
3203	} else if (tx_buffer->dma) {
3204		dma_unmap_page(ring->dev,
3205			       tx_buffer->dma,
3206			       tx_buffer->length,
3207			       DMA_TO_DEVICE);
3208	}
3209	tx_buffer->next_to_watch = NULL;
3210	tx_buffer->skb = NULL;
3211	tx_buffer->dma = 0;
3212	/* buffer_info must be completely set up in the transmit path */
3213}
3214
3215/**
3216 * igb_clean_tx_ring - Free Tx Buffers
3217 * @tx_ring: ring to be cleaned
3218 **/
3219static void igb_clean_tx_ring(struct igb_ring *tx_ring)
3220{
3221	struct igb_tx_buffer *buffer_info;
3222	unsigned long size;
3223	u16 i;
3224
3225	if (!tx_ring->tx_buffer_info)
3226		return;
3227	/* Free all the Tx ring sk_buffs */
3228
3229	for (i = 0; i < tx_ring->count; i++) {
3230		buffer_info = &tx_ring->tx_buffer_info[i];
3231		igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3232	}
3233
3234	size = sizeof(struct igb_tx_buffer) * tx_ring->count;
3235	memset(tx_ring->tx_buffer_info, 0, size);
3236
3237	/* Zero out the descriptor ring */
3238	memset(tx_ring->desc, 0, tx_ring->size);
3239
3240	tx_ring->next_to_use = 0;
3241	tx_ring->next_to_clean = 0;
3242}
3243
3244/**
3245 * igb_clean_all_tx_rings - Free Tx Buffers for all queues
3246 * @adapter: board private structure
3247 **/
3248static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
3249{
3250	int i;
3251
3252	for (i = 0; i < adapter->num_tx_queues; i++)
3253		igb_clean_tx_ring(adapter->tx_ring[i]);
3254}
3255
3256/**
3257 * igb_free_rx_resources - Free Rx Resources
3258 * @rx_ring: ring to clean the resources from
3259 *
3260 * Free all receive software resources
3261 **/
3262void igb_free_rx_resources(struct igb_ring *rx_ring)
3263{
3264	igb_clean_rx_ring(rx_ring);
3265
3266	vfree(rx_ring->rx_buffer_info);
3267	rx_ring->rx_buffer_info = NULL;
3268
3269	/* if not set, then don't free */
3270	if (!rx_ring->desc)
3271		return;
3272
3273	dma_free_coherent(rx_ring->dev, rx_ring->size,
3274			  rx_ring->desc, rx_ring->dma);
3275
3276	rx_ring->desc = NULL;
3277}
3278
3279/**
3280 * igb_free_all_rx_resources - Free Rx Resources for All Queues
3281 * @adapter: board private structure
3282 *
3283 * Free all receive software resources
3284 **/
3285static void igb_free_all_rx_resources(struct igb_adapter *adapter)
3286{
3287	int i;
3288
3289	for (i = 0; i < adapter->num_rx_queues; i++)
3290		igb_free_rx_resources(adapter->rx_ring[i]);
3291}
3292
3293/**
3294 * igb_clean_rx_ring - Free Rx Buffers per Queue
3295 * @rx_ring: ring to free buffers from
3296 **/
3297static void igb_clean_rx_ring(struct igb_ring *rx_ring)
3298{
3299	unsigned long size;
3300	u16 i;
3301
3302	if (!rx_ring->rx_buffer_info)
3303		return;
3304
3305	/* Free all the Rx ring sk_buffs */
3306	for (i = 0; i < rx_ring->count; i++) {
3307		struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
3308		if (buffer_info->dma) {
3309			dma_unmap_single(rx_ring->dev,
3310			                 buffer_info->dma,
3311					 IGB_RX_HDR_LEN,
3312					 DMA_FROM_DEVICE);
3313			buffer_info->dma = 0;
3314		}
3315
3316		if (buffer_info->skb) {
3317			dev_kfree_skb(buffer_info->skb);
3318			buffer_info->skb = NULL;
3319		}
3320		if (buffer_info->page_dma) {
3321			dma_unmap_page(rx_ring->dev,
3322			               buffer_info->page_dma,
3323				       PAGE_SIZE / 2,
3324				       DMA_FROM_DEVICE);
3325			buffer_info->page_dma = 0;
3326		}
3327		if (buffer_info->page) {
3328			put_page(buffer_info->page);
3329			buffer_info->page = NULL;
3330			buffer_info->page_offset = 0;
3331		}
3332	}
3333
3334	size = sizeof(struct igb_rx_buffer) * rx_ring->count;
3335	memset(rx_ring->rx_buffer_info, 0, size);
3336
3337	/* Zero out the descriptor ring */
3338	memset(rx_ring->desc, 0, rx_ring->size);
3339
3340	rx_ring->next_to_clean = 0;
3341	rx_ring->next_to_use = 0;
3342}
3343
3344/**
3345 * igb_clean_all_rx_rings - Free Rx Buffers for all queues
3346 * @adapter: board private structure
3347 **/
3348static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
3349{
3350	int i;
3351
3352	for (i = 0; i < adapter->num_rx_queues; i++)
3353		igb_clean_rx_ring(adapter->rx_ring[i]);
3354}
3355
3356/**
3357 * igb_set_mac - Change the Ethernet Address of the NIC
3358 * @netdev: network interface device structure
3359 * @p: pointer to an address structure
3360 *
3361 * Returns 0 on success, negative on failure
3362 **/
3363static int igb_set_mac(struct net_device *netdev, void *p)
3364{
3365	struct igb_adapter *adapter = netdev_priv(netdev);
3366	struct e1000_hw *hw = &adapter->hw;
3367	struct sockaddr *addr = p;
3368
3369	if (!is_valid_ether_addr(addr->sa_data))
3370		return -EADDRNOTAVAIL;
3371
3372	memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
3373	memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
3374
3375	/* set the correct pool for the new PF MAC address in entry 0 */
3376	igb_rar_set_qsel(adapter, hw->mac.addr, 0,
3377	                 adapter->vfs_allocated_count);
3378
3379	return 0;
3380}
3381
3382/**
3383 * igb_write_mc_addr_list - write multicast addresses to MTA
3384 * @netdev: network interface device structure
3385 *
3386 * Writes multicast address list to the MTA hash table.
3387 * Returns: -ENOMEM on failure
3388 *                0 on no addresses written
3389 *                X on writing X addresses to MTA
3390 **/
3391static int igb_write_mc_addr_list(struct net_device *netdev)
3392{
3393	struct igb_adapter *adapter = netdev_priv(netdev);
3394	struct e1000_hw *hw = &adapter->hw;
3395	struct netdev_hw_addr *ha;
3396	u8  *mta_list;
3397	int i;
3398
3399	if (netdev_mc_empty(netdev)) {
3400		/* nothing to program, so clear mc list */
3401		igb_update_mc_addr_list(hw, NULL, 0);
3402		igb_restore_vf_multicasts(adapter);
3403		return 0;
3404	}
3405
3406	mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
3407	if (!mta_list)
3408		return -ENOMEM;
3409
3410	/* The shared function expects a packed array of only addresses. */
3411	i = 0;
3412	netdev_for_each_mc_addr(ha, netdev)
3413		memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
3414
3415	igb_update_mc_addr_list(hw, mta_list, i);
3416	kfree(mta_list);
3417
3418	return netdev_mc_count(netdev);
3419}
3420
3421/**
3422 * igb_write_uc_addr_list - write unicast addresses to RAR table
3423 * @netdev: network interface device structure
3424 *
3425 * Writes unicast address list to the RAR table.
3426 * Returns: -ENOMEM on failure/insufficient address space
3427 *                0 on no addresses written
3428 *                X on writing X addresses to the RAR table
3429 **/
3430static int igb_write_uc_addr_list(struct net_device *netdev)
3431{
3432	struct igb_adapter *adapter = netdev_priv(netdev);
3433	struct e1000_hw *hw = &adapter->hw;
3434	unsigned int vfn = adapter->vfs_allocated_count;
3435	unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
3436	int count = 0;
3437
3438	/* return ENOMEM indicating insufficient memory for addresses */
3439	if (netdev_uc_count(netdev) > rar_entries)
3440		return -ENOMEM;
3441
3442	if (!netdev_uc_empty(netdev) && rar_entries) {
3443		struct netdev_hw_addr *ha;
3444
3445		netdev_for_each_uc_addr(ha, netdev) {
3446			if (!rar_entries)
3447				break;
3448			igb_rar_set_qsel(adapter, ha->addr,
3449			                 rar_entries--,
3450			                 vfn);
3451			count++;
3452		}
3453	}
3454	/* write the addresses in reverse order to avoid write combining */
3455	for (; rar_entries > 0 ; rar_entries--) {
3456		wr32(E1000_RAH(rar_entries), 0);
3457		wr32(E1000_RAL(rar_entries), 0);
3458	}
3459	wrfl();
3460
3461	return count;
3462}
3463
3464/**
3465 * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
3466 * @netdev: network interface device structure
3467 *
3468 * The set_rx_mode entry point is called whenever the unicast or multicast
3469 * address lists or the network interface flags are updated.  This routine is
3470 * responsible for configuring the hardware for proper unicast, multicast,
3471 * promiscuous mode, and all-multi behavior.
3472 **/
3473static void igb_set_rx_mode(struct net_device *netdev)
3474{
3475	struct igb_adapter *adapter = netdev_priv(netdev);
3476	struct e1000_hw *hw = &adapter->hw;
3477	unsigned int vfn = adapter->vfs_allocated_count;
3478	u32 rctl, vmolr = 0;
3479	int count;
3480
3481	/* Check for Promiscuous and All Multicast modes */
3482	rctl = rd32(E1000_RCTL);
3483
3484	/* clear the effected bits */
3485	rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
3486
3487	if (netdev->flags & IFF_PROMISC) {
3488		rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
3489		vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
3490	} else {
3491		if (netdev->flags & IFF_ALLMULTI) {
3492			rctl |= E1000_RCTL_MPE;
3493			vmolr |= E1000_VMOLR_MPME;
3494		} else {
3495			/*
3496			 * Write addresses to the MTA, if the attempt fails
3497			 * then we should just turn on promiscuous mode so
3498			 * that we can at least receive multicast traffic
3499			 */
3500			count = igb_write_mc_addr_list(netdev);
3501			if (count < 0) {
3502				rctl |= E1000_RCTL_MPE;
3503				vmolr |= E1000_VMOLR_MPME;
3504			} else if (count) {
3505				vmolr |= E1000_VMOLR_ROMPE;
3506			}
3507		}
3508		/*
3509		 * Write addresses to available RAR registers, if there is not
3510		 * sufficient space to store all the addresses then enable
3511		 * unicast promiscuous mode
3512		 */
3513		count = igb_write_uc_addr_list(netdev);
3514		if (count < 0) {
3515			rctl |= E1000_RCTL_UPE;
3516			vmolr |= E1000_VMOLR_ROPE;
3517		}
3518		rctl |= E1000_RCTL_VFE;
3519	}
3520	wr32(E1000_RCTL, rctl);
3521
3522	/*
3523	 * In order to support SR-IOV and eventually VMDq it is necessary to set
3524	 * the VMOLR to enable the appropriate modes.  Without this workaround
3525	 * we will have issues with VLAN tag stripping not being done for frames
3526	 * that are only arriving because we are the default pool
3527	 */
3528	if (hw->mac.type < e1000_82576)
3529		return;
3530
3531	vmolr |= rd32(E1000_VMOLR(vfn)) &
3532	         ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
3533	wr32(E1000_VMOLR(vfn), vmolr);
3534	igb_restore_vf_multicasts(adapter);
3535}
3536
3537static void igb_check_wvbr(struct igb_adapter *adapter)
3538{
3539	struct e1000_hw *hw = &adapter->hw;
3540	u32 wvbr = 0;
3541
3542	switch (hw->mac.type) {
3543	case e1000_82576:
3544	case e1000_i350:
3545		if (!(wvbr = rd32(E1000_WVBR)))
3546			return;
3547		break;
3548	default:
3549		break;
3550	}
3551
3552	adapter->wvbr |= wvbr;
3553}
3554
3555#define IGB_STAGGERED_QUEUE_OFFSET 8
3556
3557static void igb_spoof_check(struct igb_adapter *adapter)
3558{
3559	int j;
3560
3561	if (!adapter->wvbr)
3562		return;
3563
3564	for(j = 0; j < adapter->vfs_allocated_count; j++) {
3565		if (adapter->wvbr & (1 << j) ||
3566		    adapter->wvbr & (1 << (j + IGB_STAGGERED_QUEUE_OFFSET))) {
3567			dev_warn(&adapter->pdev->dev,
3568				"Spoof event(s) detected on VF %d\n", j);
3569			adapter->wvbr &=
3570				~((1 << j) |
3571				  (1 << (j + IGB_STAGGERED_QUEUE_OFFSET)));
3572		}
3573	}
3574}
3575
3576/* Need to wait a few seconds after link up to get diagnostic information from
3577 * the phy */
3578static void igb_update_phy_info(unsigned long data)
3579{
3580	struct igb_adapter *adapter = (struct igb_adapter *) data;
3581	igb_get_phy_info(&adapter->hw);
3582}
3583
3584/**
3585 * igb_has_link - check shared code for link and determine up/down
3586 * @adapter: pointer to driver private info
3587 **/
3588bool igb_has_link(struct igb_adapter *adapter)
3589{
3590	struct e1000_hw *hw = &adapter->hw;
3591	bool link_active = false;
3592	s32 ret_val = 0;
3593
3594	/* get_link_status is set on LSC (link status) interrupt or
3595	 * rx sequence error interrupt.  get_link_status will stay
3596	 * false until the e1000_check_for_link establishes link
3597	 * for copper adapters ONLY
3598	 */
3599	switch (hw->phy.media_type) {
3600	case e1000_media_type_copper:
3601		if (hw->mac.get_link_status) {
3602			ret_val = hw->mac.ops.check_for_link(hw);
3603			link_active = !hw->mac.get_link_status;
3604		} else {
3605			link_active = true;
3606		}
3607		break;
3608	case e1000_media_type_internal_serdes:
3609		ret_val = hw->mac.ops.check_for_link(hw);
3610		link_active = hw->mac.serdes_has_link;
3611		break;
3612	default:
3613	case e1000_media_type_unknown:
3614		break;
3615	}
3616
3617	return link_active;
3618}
3619
3620static bool igb_thermal_sensor_event(struct e1000_hw *hw, u32 event)
3621{
3622	bool ret = false;
3623	u32 ctrl_ext, thstat;
3624
3625	/* check for thermal sensor event on i350, copper only */
3626	if (hw->mac.type == e1000_i350) {
3627		thstat = rd32(E1000_THSTAT);
3628		ctrl_ext = rd32(E1000_CTRL_EXT);
3629
3630		if ((hw->phy.media_type == e1000_media_type_copper) &&
3631		    !(ctrl_ext & E1000_CTRL_EXT_LINK_MODE_SGMII)) {
3632			ret = !!(thstat & event);
3633		}
3634	}
3635
3636	return ret;
3637}
3638
3639/**
3640 * igb_watchdog - Timer Call-back
3641 * @data: pointer to adapter cast into an unsigned long
3642 **/
3643static void igb_watchdog(unsigned long data)
3644{
3645	struct igb_adapter *adapter = (struct igb_adapter *)data;
3646	/* Do the rest outside of interrupt context */
3647	schedule_work(&adapter->watchdog_task);
3648}
3649
3650static void igb_watchdog_task(struct work_struct *work)
3651{
3652	struct igb_adapter *adapter = container_of(work,
3653	                                           struct igb_adapter,
3654                                                   watchdog_task);
3655	struct e1000_hw *hw = &adapter->hw;
3656	struct net_device *netdev = adapter->netdev;
3657	u32 link;
3658	int i;
3659
3660	link = igb_has_link(adapter);
3661	if (link) {
3662		if (!netif_carrier_ok(netdev)) {
3663			u32 ctrl;
3664			hw->mac.ops.get_speed_and_duplex(hw,
3665			                                 &adapter->link_speed,
3666			                                 &adapter->link_duplex);
3667
3668			ctrl = rd32(E1000_CTRL);
3669			/* Links status message must follow this format */
3670			printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s, "
3671				 "Flow Control: %s\n",
3672			       netdev->name,
3673			       adapter->link_speed,
3674			       adapter->link_duplex == FULL_DUPLEX ?
3675				 "Full Duplex" : "Half Duplex",
3676			       ((ctrl & E1000_CTRL_TFCE) &&
3677			        (ctrl & E1000_CTRL_RFCE)) ? "RX/TX" :
3678			       ((ctrl & E1000_CTRL_RFCE) ?  "RX" :
3679			       ((ctrl & E1000_CTRL_TFCE) ?  "TX" : "None")));
3680
3681			/* check for thermal sensor event */
3682			if (igb_thermal_sensor_event(hw, E1000_THSTAT_LINK_THROTTLE)) {
3683				printk(KERN_INFO "igb: %s The network adapter "
3684						 "link speed was downshifted "
3685						 "because it overheated.\n",
3686						 netdev->name);
3687			}
3688
3689			/* adjust timeout factor according to speed/duplex */
3690			adapter->tx_timeout_factor = 1;
3691			switch (adapter->link_speed) {
3692			case SPEED_10:
3693				adapter->tx_timeout_factor = 14;
3694				break;
3695			case SPEED_100:
3696				/* maybe add some timeout factor ? */
3697				break;
3698			}
3699
3700			netif_carrier_on(netdev);
3701
3702			igb_ping_all_vfs(adapter);
3703			igb_check_vf_rate_limit(adapter);
3704
3705			/* link state has changed, schedule phy info update */
3706			if (!test_bit(__IGB_DOWN, &adapter->state))
3707				mod_timer(&adapter->phy_info_timer,
3708					  round_jiffies(jiffies + 2 * HZ));
3709		}
3710	} else {
3711		if (netif_carrier_ok(netdev)) {
3712			adapter->link_speed = 0;
3713			adapter->link_duplex = 0;
3714
3715			/* check for thermal sensor event */
3716			if (igb_thermal_sensor_event(hw, E1000_THSTAT_PWR_DOWN)) {
3717				printk(KERN_ERR "igb: %s The network adapter "
3718						"was stopped because it "
3719						"overheated.\n",
3720						netdev->name);
3721			}
3722
3723			/* Links status message must follow this format */
3724			printk(KERN_INFO "igb: %s NIC Link is Down\n",
3725			       netdev->name);
3726			netif_carrier_off(netdev);
3727
3728			igb_ping_all_vfs(adapter);
3729
3730			/* link state has changed, schedule phy info update */
3731			if (!test_bit(__IGB_DOWN, &adapter->state))
3732				mod_timer(&adapter->phy_info_timer,
3733					  round_jiffies(jiffies + 2 * HZ));
3734		}
3735	}
3736
3737	spin_lock(&adapter->stats64_lock);
3738	igb_update_stats(adapter, &adapter->stats64);
3739	spin_unlock(&adapter->stats64_lock);
3740
3741	for (i = 0; i < adapter->num_tx_queues; i++) {
3742		struct igb_ring *tx_ring = adapter->tx_ring[i];
3743		if (!netif_carrier_ok(netdev)) {
3744			/* We've lost link, so the controller stops DMA,
3745			 * but we've got queued Tx work that's never going
3746			 * to get done, so reset controller to flush Tx.
3747			 * (Do the reset outside of interrupt context). */
3748			if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3749				adapter->tx_timeout_count++;
3750				schedule_work(&adapter->reset_task);
3751				/* return immediately since reset is imminent */
3752				return;
3753			}
3754		}
3755
3756		/* Force detection of hung controller every watchdog period */
3757		set_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
3758	}
3759
3760	/* Cause software interrupt to ensure rx ring is cleaned */
3761	if (adapter->msix_entries) {
3762		u32 eics = 0;
3763		for (i = 0; i < adapter->num_q_vectors; i++) {
3764			struct igb_q_vector *q_vector = adapter->q_vector[i];
3765			eics |= q_vector->eims_value;
3766		}
3767		wr32(E1000_EICS, eics);
3768	} else {
3769		wr32(E1000_ICS, E1000_ICS_RXDMT0);
3770	}
3771
3772	igb_spoof_check(adapter);
3773
3774	/* Reset the timer */
3775	if (!test_bit(__IGB_DOWN, &adapter->state))
3776		mod_timer(&adapter->watchdog_timer,
3777			  round_jiffies(jiffies + 2 * HZ));
3778}
3779
3780enum latency_range {
3781	lowest_latency = 0,
3782	low_latency = 1,
3783	bulk_latency = 2,
3784	latency_invalid = 255
3785};
3786
3787/**
3788 * igb_update_ring_itr - update the dynamic ITR value based on packet size
3789 *
3790 *      Stores a new ITR value based on strictly on packet size.  This
3791 *      algorithm is less sophisticated than that used in igb_update_itr,
3792 *      due to the difficulty of synchronizing statistics across multiple
3793 *      receive rings.  The divisors and thresholds used by this function
3794 *      were determined based on theoretical maximum wire speed and testing
3795 *      data, in order to minimize response time while increasing bulk
3796 *      throughput.
3797 *      This functionality is controlled by the InterruptThrottleRate module
3798 *      parameter (see igb_param.c)
3799 *      NOTE:  This function is called only when operating in a multiqueue
3800 *             receive environment.
3801 * @q_vector: pointer to q_vector
3802 **/
3803static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3804{
3805	int new_val = q_vector->itr_val;
3806	int avg_wire_size = 0;
3807	struct igb_adapter *adapter = q_vector->adapter;
3808	unsigned int packets;
3809
3810	/* For non-gigabit speeds, just fix the interrupt rate at 4000
3811	 * ints/sec - ITR timer value of 120 ticks.
3812	 */
3813	if (adapter->link_speed != SPEED_1000) {
3814		new_val = IGB_4K_ITR;
3815		goto set_itr_val;
3816	}
3817
3818	packets = q_vector->rx.total_packets;
3819	if (packets)
3820		avg_wire_size = q_vector->rx.total_bytes / packets;
3821
3822	packets = q_vector->tx.total_packets;
3823	if (packets)
3824		avg_wire_size = max_t(u32, avg_wire_size,
3825				      q_vector->tx.total_bytes / packets);
3826
3827	/* if avg_wire_size isn't set no work was done */
3828	if (!avg_wire_size)
3829		goto clear_counts;
3830
3831	/* Add 24 bytes to size to account for CRC, preamble, and gap */
3832	avg_wire_size += 24;
3833
3834	/* Don't starve jumbo frames */
3835	avg_wire_size = min(avg_wire_size, 3000);
3836
3837	/* Give a little boost to mid-size frames */
3838	if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3839		new_val = avg_wire_size / 3;
3840	else
3841		new_val = avg_wire_size / 2;
3842
3843	/* conservative mode (itr 3) eliminates the lowest_latency setting */
3844	if (new_val < IGB_20K_ITR &&
3845	    ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
3846	     (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
3847		new_val = IGB_20K_ITR;
3848
3849set_itr_val:
3850	if (new_val != q_vector->itr_val) {
3851		q_vector->itr_val = new_val;
3852		q_vector->set_itr = 1;
3853	}
3854clear_counts:
3855	q_vector->rx.total_bytes = 0;
3856	q_vector->rx.total_packets = 0;
3857	q_vector->tx.total_bytes = 0;
3858	q_vector->tx.total_packets = 0;
3859}
3860
3861/**
3862 * igb_update_itr - update the dynamic ITR value based on statistics
3863 *      Stores a new ITR value based on packets and byte
3864 *      counts during the last interrupt.  The advantage of per interrupt
3865 *      computation is faster updates and more accurate ITR for the current
3866 *      traffic pattern.  Constants in this function were computed
3867 *      based on theoretical maximum wire speed and thresholds were set based
3868 *      on testing data as well as attempting to minimize response time
3869 *      while increasing bulk throughput.
3870 *      this functionality is controlled by the InterruptThrottleRate module
3871 *      parameter (see igb_param.c)
3872 *      NOTE:  These calculations are only valid when operating in a single-
3873 *             queue environment.
3874 * @q_vector: pointer to q_vector
3875 * @ring_container: ring info to update the itr for
3876 **/
3877static void igb_update_itr(struct igb_q_vector *q_vector,
3878			   struct igb_ring_container *ring_container)
3879{
3880	unsigned int packets = ring_container->total_packets;
3881	unsigned int bytes = ring_container->total_bytes;
3882	u8 itrval = ring_container->itr;
3883
3884	/* no packets, exit with status unchanged */
3885	if (packets == 0)
3886		return;
3887
3888	switch (itrval) {
3889	case lowest_latency:
3890		/* handle TSO and jumbo frames */
3891		if (bytes/packets > 8000)
3892			itrval = bulk_latency;
3893		else if ((packets < 5) && (bytes > 512))
3894			itrval = low_latency;
3895		break;
3896	case low_latency:  /* 50 usec aka 20000 ints/s */
3897		if (bytes > 10000) {
3898			/* this if handles the TSO accounting */
3899			if (bytes/packets > 8000) {
3900				itrval = bulk_latency;
3901			} else if ((packets < 10) || ((bytes/packets) > 1200)) {
3902				itrval = bulk_latency;
3903			} else if ((packets > 35)) {
3904				itrval = lowest_latency;
3905			}
3906		} else if (bytes/packets > 2000) {
3907			itrval = bulk_latency;
3908		} else if (packets <= 2 && bytes < 512) {
3909			itrval = lowest_latency;
3910		}
3911		break;
3912	case bulk_latency: /* 250 usec aka 4000 ints/s */
3913		if (bytes > 25000) {
3914			if (packets > 35)
3915				itrval = low_latency;
3916		} else if (bytes < 1500) {
3917			itrval = low_latency;
3918		}
3919		break;
3920	}
3921
3922	/* clear work counters since we have the values we need */
3923	ring_container->total_bytes = 0;
3924	ring_container->total_packets = 0;
3925
3926	/* write updated itr to ring container */
3927	ring_container->itr = itrval;
3928}
3929
3930static void igb_set_itr(struct igb_q_vector *q_vector)
3931{
3932	struct igb_adapter *adapter = q_vector->adapter;
3933	u32 new_itr = q_vector->itr_val;
3934	u8 current_itr = 0;
3935
3936	/* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3937	if (adapter->link_speed != SPEED_1000) {
3938		current_itr = 0;
3939		new_itr = IGB_4K_ITR;
3940		goto set_itr_now;
3941	}
3942
3943	igb_update_itr(q_vector, &q_vector->tx);
3944	igb_update_itr(q_vector, &q_vector->rx);
3945
3946	current_itr = max(q_vector->rx.itr, q_vector->tx.itr);
3947
3948	/* conservative mode (itr 3) eliminates the lowest_latency setting */
3949	if (current_itr == lowest_latency &&
3950	    ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
3951	     (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
3952		current_itr = low_latency;
3953
3954	switch (current_itr) {
3955	/* counts and packets in update_itr are dependent on these numbers */
3956	case lowest_latency:
3957		new_itr = IGB_70K_ITR; /* 70,000 ints/sec */
3958		break;
3959	case low_latency:
3960		new_itr = IGB_20K_ITR; /* 20,000 ints/sec */
3961		break;
3962	case bulk_latency:
3963		new_itr = IGB_4K_ITR;  /* 4,000 ints/sec */
3964		break;
3965	default:
3966		break;
3967	}
3968
3969set_itr_now:
3970	if (new_itr != q_vector->itr_val) {
3971		/* this attempts to bias the interrupt rate towards Bulk
3972		 * by adding intermediate steps when interrupt rate is
3973		 * increasing */
3974		new_itr = new_itr > q_vector->itr_val ?
3975		             max((new_itr * q_vector->itr_val) /
3976		                 (new_itr + (q_vector->itr_val >> 2)),
3977				 new_itr) :
3978			     new_itr;
3979		/* Don't write the value here; it resets the adapter's
3980		 * internal timer, and causes us to delay far longer than
3981		 * we should between interrupts.  Instead, we write the ITR
3982		 * value at the beginning of the next interrupt so the timing
3983		 * ends up being correct.
3984		 */
3985		q_vector->itr_val = new_itr;
3986		q_vector->set_itr = 1;
3987	}
3988}
3989
3990void igb_tx_ctxtdesc(struct igb_ring *tx_ring, u32 vlan_macip_lens,
3991		     u32 type_tucmd, u32 mss_l4len_idx)
3992{
3993	struct e1000_adv_tx_context_desc *context_desc;
3994	u16 i = tx_ring->next_to_use;
3995
3996	context_desc = IGB_TX_CTXTDESC(tx_ring, i);
3997
3998	i++;
3999	tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
4000
4001	/* set bits to identify this as an advanced context descriptor */
4002	type_tucmd |= E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
4003
4004	/* For 82575, context index must be unique per ring. */
4005	if (test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
4006		mss_l4len_idx |= tx_ring->reg_idx << 4;
4007
4008	context_desc->vlan_macip_lens	= cpu_to_le32(vlan_macip_lens);
4009	context_desc->seqnum_seed	= 0;
4010	context_desc->type_tucmd_mlhl	= cpu_to_le32(type_tucmd);
4011	context_desc->mss_l4len_idx	= cpu_to_le32(mss_l4len_idx);
4012}
4013
4014static int igb_tso(struct igb_ring *tx_ring,
4015		   struct igb_tx_buffer *first,
4016		   u8 *hdr_len)
4017{
4018	struct sk_buff *skb = first->skb;
4019	u32 vlan_macip_lens, type_tucmd;
4020	u32 mss_l4len_idx, l4len;
4021
4022	if (!skb_is_gso(skb))
4023		return 0;
4024
4025	if (skb_header_cloned(skb)) {
4026		int err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
4027		if (err)
4028			return err;
4029	}
4030
4031	/* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
4032	type_tucmd = E1000_ADVTXD_TUCMD_L4T_TCP;
4033
4034	if (first->protocol == __constant_htons(ETH_P_IP)) {
4035		struct iphdr *iph = ip_hdr(skb);
4036		iph->tot_len = 0;
4037		iph->check = 0;
4038		tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
4039							 iph->daddr, 0,
4040							 IPPROTO_TCP,
4041							 0);
4042		type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
4043		first->tx_flags |= IGB_TX_FLAGS_TSO |
4044				   IGB_TX_FLAGS_CSUM |
4045				   IGB_TX_FLAGS_IPV4;
4046	} else if (skb_is_gso_v6(skb)) {
4047		ipv6_hdr(skb)->payload_len = 0;
4048		tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
4049						       &ipv6_hdr(skb)->daddr,
4050						       0, IPPROTO_TCP, 0);
4051		first->tx_flags |= IGB_TX_FLAGS_TSO |
4052				   IGB_TX_FLAGS_CSUM;
4053	}
4054
4055	/* compute header lengths */
4056	l4len = tcp_hdrlen(skb);
4057	*hdr_len = skb_transport_offset(skb) + l4len;
4058
4059	/* update gso size and bytecount with header size */
4060	first->gso_segs = skb_shinfo(skb)->gso_segs;
4061	first->bytecount += (first->gso_segs - 1) * *hdr_len;
4062
4063	/* MSS L4LEN IDX */
4064	mss_l4len_idx = l4len << E1000_ADVTXD_L4LEN_SHIFT;
4065	mss_l4len_idx |= skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT;
4066
4067	/* VLAN MACLEN IPLEN */
4068	vlan_macip_lens = skb_network_header_len(skb);
4069	vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
4070	vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
4071
4072	igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
4073
4074	return 1;
4075}
4076
4077static void igb_tx_csum(struct igb_ring *tx_ring, struct igb_tx_buffer *first)
4078{
4079	struct sk_buff *skb = first->skb;
4080	u32 vlan_macip_lens = 0;
4081	u32 mss_l4len_idx = 0;
4082	u32 type_tucmd = 0;
4083
4084	if (skb->ip_summed != CHECKSUM_PARTIAL) {
4085		if (!(first->tx_flags & IGB_TX_FLAGS_VLAN))
4086			return;
4087	} else {
4088		u8 l4_hdr = 0;
4089		switch (first->protocol) {
4090		case __constant_htons(ETH_P_IP):
4091			vlan_macip_lens |= skb_network_header_len(skb);
4092			type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
4093			l4_hdr = ip_hdr(skb)->protocol;
4094			break;
4095		case __constant_htons(ETH_P_IPV6):
4096			vlan_macip_lens |= skb_network_header_len(skb);
4097			l4_hdr = ipv6_hdr(skb)->nexthdr;
4098			break;
4099		default:
4100			if (unlikely(net_ratelimit())) {
4101				dev_warn(tx_ring->dev,
4102				 "partial checksum but proto=%x!\n",
4103				 first->protocol);
4104			}
4105			break;
4106		}
4107
4108		switch (l4_hdr) {
4109		case IPPROTO_TCP:
4110			type_tucmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
4111			mss_l4len_idx = tcp_hdrlen(skb) <<
4112					E1000_ADVTXD_L4LEN_SHIFT;
4113			break;
4114		case IPPROTO_SCTP:
4115			type_tucmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
4116			mss_l4len_idx = sizeof(struct sctphdr) <<
4117					E1000_ADVTXD_L4LEN_SHIFT;
4118			break;
4119		case IPPROTO_UDP:
4120			mss_l4len_idx = sizeof(struct udphdr) <<
4121					E1000_ADVTXD_L4LEN_SHIFT;
4122			break;
4123		default:
4124			if (unlikely(net_ratelimit())) {
4125				dev_warn(tx_ring->dev,
4126				 "partial checksum but l4 proto=%x!\n",
4127				 l4_hdr);
4128			}
4129			break;
4130		}
4131
4132		/* update TX checksum flag */
4133		first->tx_flags |= IGB_TX_FLAGS_CSUM;
4134	}
4135
4136	vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
4137	vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
4138
4139	igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
4140}
4141
4142static __le32 igb_tx_cmd_type(u32 tx_flags)
4143{
4144	/* set type for advanced descriptor with frame checksum insertion */
4145	__le32 cmd_type = cpu_to_le32(E1000_ADVTXD_DTYP_DATA |
4146				      E1000_ADVTXD_DCMD_IFCS |
4147				      E1000_ADVTXD_DCMD_DEXT);
4148
4149	/* set HW vlan bit if vlan is present */
4150	if (tx_flags & IGB_TX_FLAGS_VLAN)
4151		cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_VLE);
4152
4153	/* set timestamp bit if present */
4154	if (tx_flags & IGB_TX_FLAGS_TSTAMP)
4155		cmd_type |= cpu_to_le32(E1000_ADVTXD_MAC_TSTAMP);
4156
4157	/* set segmentation bits for TSO */
4158	if (tx_flags & IGB_TX_FLAGS_TSO)
4159		cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_TSE);
4160
4161	return cmd_type;
4162}
4163
4164static void igb_tx_olinfo_status(struct igb_ring *tx_ring,
4165				 union e1000_adv_tx_desc *tx_desc,
4166				 u32 tx_flags, unsigned int paylen)
4167{
4168	u32 olinfo_status = paylen << E1000_ADVTXD_PAYLEN_SHIFT;
4169
4170	/* 82575 requires a unique index per ring if any offload is enabled */
4171	if ((tx_flags & (IGB_TX_FLAGS_CSUM | IGB_TX_FLAGS_VLAN)) &&
4172	    test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
4173		olinfo_status |= tx_ring->reg_idx << 4;
4174
4175	/* insert L4 checksum */
4176	if (tx_flags & IGB_TX_FLAGS_CSUM) {
4177		olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4178
4179		/* insert IPv4 checksum */
4180		if (tx_flags & IGB_TX_FLAGS_IPV4)
4181			olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
4182	}
4183
4184	tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
4185}
4186
4187/*
4188 * The largest size we can write to the descriptor is 65535.  In order to
4189 * maintain a power of two alignment we have to limit ourselves to 32K.
4190 */
4191#define IGB_MAX_TXD_PWR	15
4192#define IGB_MAX_DATA_PER_TXD	(1<<IGB_MAX_TXD_PWR)
4193
4194static void igb_tx_map(struct igb_ring *tx_ring,
4195		       struct igb_tx_buffer *first,
4196		       const u8 hdr_len)
4197{
4198	struct sk_buff *skb = first->skb;
4199	struct igb_tx_buffer *tx_buffer_info;
4200	union e1000_adv_tx_desc *tx_desc;
4201	dma_addr_t dma;
4202	struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0];
4203	unsigned int data_len = skb->data_len;
4204	unsigned int size = skb_headlen(skb);
4205	unsigned int paylen = skb->len - hdr_len;
4206	__le32 cmd_type;
4207	u32 tx_flags = first->tx_flags;
4208	u16 i = tx_ring->next_to_use;
4209
4210	tx_desc = IGB_TX_DESC(tx_ring, i);
4211
4212	igb_tx_olinfo_status(tx_ring, tx_desc, tx_flags, paylen);
4213	cmd_type = igb_tx_cmd_type(tx_flags);
4214
4215	dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
4216	if (dma_mapping_error(tx_ring->dev, dma))
4217		goto dma_error;
4218
4219	/* record length, and DMA address */
4220	first->length = size;
4221	first->dma = dma;
4222	tx_desc->read.buffer_addr = cpu_to_le64(dma);
4223
4224	for (;;) {
4225		while (unlikely(size > IGB_MAX_DATA_PER_TXD)) {
4226			tx_desc->read.cmd_type_len =
4227				cmd_type | cpu_to_le32(IGB_MAX_DATA_PER_TXD);
4228
4229			i++;
4230			tx_desc++;
4231			if (i == tx_ring->count) {
4232				tx_desc = IGB_TX_DESC(tx_ring, 0);
4233				i = 0;
4234			}
4235
4236			dma += IGB_MAX_DATA_PER_TXD;
4237			size -= IGB_MAX_DATA_PER_TXD;
4238
4239			tx_desc->read.olinfo_status = 0;
4240			tx_desc->read.buffer_addr = cpu_to_le64(dma);
4241		}
4242
4243		if (likely(!data_len))
4244			break;
4245
4246		tx_desc->read.cmd_type_len = cmd_type | cpu_to_le32(size);
4247
4248		i++;
4249		tx_desc++;
4250		if (i == tx_ring->count) {
4251			tx_desc = IGB_TX_DESC(tx_ring, 0);
4252			i = 0;
4253		}
4254
4255		size = frag->size;
4256		data_len -= size;
4257
4258		dma = skb_frag_dma_map(tx_ring->dev, frag, 0,
4259				   size, DMA_TO_DEVICE);
4260		if (dma_mapping_error(tx_ring->dev, dma))
4261			goto dma_error;
4262
4263		tx_buffer_info = &tx_ring->tx_buffer_info[i];
4264		tx_buffer_info->length = size;
4265		tx_buffer_info->dma = dma;
4266
4267		tx_desc->read.olinfo_status = 0;
4268		tx_desc->read.buffer_addr = cpu_to_le64(dma);
4269
4270		frag++;
4271	}
4272
4273	/* write last descriptor with RS and EOP bits */
4274	cmd_type |= cpu_to_le32(size) | cpu_to_le32(IGB_TXD_DCMD);
4275	tx_desc->read.cmd_type_len = cmd_type;
4276
4277	/* set the timestamp */
4278	first->time_stamp = jiffies;
4279
4280	/*
4281	 * Force memory writes to complete before letting h/w know there
4282	 * are new descriptors to fetch.  (Only applicable for weak-ordered
4283	 * memory model archs, such as IA-64).
4284	 *
4285	 * We also need this memory barrier to make certain all of the
4286	 * status bits have been updated before next_to_watch is written.
4287	 */
4288	wmb();
4289
4290	/* set next_to_watch value indicating a packet is present */
4291	first->next_to_watch = tx_desc;
4292
4293	i++;
4294	if (i == tx_ring->count)
4295		i = 0;
4296
4297	tx_ring->next_to_use = i;
4298
4299	writel(i, tx_ring->tail);
4300
4301	/* we need this if more than one processor can write to our tail
4302	 * at a time, it syncronizes IO on IA64/Altix systems */
4303	mmiowb();
4304
4305	return;
4306
4307dma_error:
4308	dev_err(tx_ring->dev, "TX DMA map failed\n");
4309
4310	/* clear dma mappings for failed tx_buffer_info map */
4311	for (;;) {
4312		tx_buffer_info = &tx_ring->tx_buffer_info[i];
4313		igb_unmap_and_free_tx_resource(tx_ring, tx_buffer_info);
4314		if (tx_buffer_info == first)
4315			break;
4316		if (i == 0)
4317			i = tx_ring->count;
4318		i--;
4319	}
4320
4321	tx_ring->next_to_use = i;
4322}
4323
4324static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
4325{
4326	struct net_device *netdev = tx_ring->netdev;
4327
4328	netif_stop_subqueue(netdev, tx_ring->queue_index);
4329
4330	/* Herbert's original patch had:
4331	 *  smp_mb__after_netif_stop_queue();
4332	 * but since that doesn't exist yet, just open code it. */
4333	smp_mb();
4334
4335	/* We need to check again in a case another CPU has just
4336	 * made room available. */
4337	if (igb_desc_unused(tx_ring) < size)
4338		return -EBUSY;
4339
4340	/* A reprieve! */
4341	netif_wake_subqueue(netdev, tx_ring->queue_index);
4342
4343	u64_stats_update_begin(&tx_ring->tx_syncp2);
4344	tx_ring->tx_stats.restart_queue2++;
4345	u64_stats_update_end(&tx_ring->tx_syncp2);
4346
4347	return 0;
4348}
4349
4350static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
4351{
4352	if (igb_desc_unused(tx_ring) >= size)
4353		return 0;
4354	return __igb_maybe_stop_tx(tx_ring, size);
4355}
4356
4357netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb,
4358				struct igb_ring *tx_ring)
4359{
4360	struct igb_tx_buffer *first;
4361	int tso;
4362	u32 tx_flags = 0;
4363	__be16 protocol = vlan_get_protocol(skb);
4364	u8 hdr_len = 0;
4365
4366	/* need: 1 descriptor per page,
4367	 *       + 2 desc gap to keep tail from touching head,
4368	 *       + 1 desc for skb->data,
4369	 *       + 1 desc for context descriptor,
4370	 * otherwise try next time */
4371	if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
4372		/* this is a hard error */
4373		return NETDEV_TX_BUSY;
4374	}
4375
4376	/* record the location of the first descriptor for this packet */
4377	first = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
4378	first->skb = skb;
4379	first->bytecount = skb->len;
4380	first->gso_segs = 1;
4381
4382	if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
4383		skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
4384		tx_flags |= IGB_TX_FLAGS_TSTAMP;
4385	}
4386
4387	if (vlan_tx_tag_present(skb)) {
4388		tx_flags |= IGB_TX_FLAGS_VLAN;
4389		tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
4390	}
4391
4392	/* record initial flags and protocol */
4393	first->tx_flags = tx_flags;
4394	first->protocol = protocol;
4395
4396	tso = igb_tso(tx_ring, first, &hdr_len);
4397	if (tso < 0)
4398		goto out_drop;
4399	else if (!tso)
4400		igb_tx_csum(tx_ring, first);
4401
4402	igb_tx_map(tx_ring, first, hdr_len);
4403
4404	/* Make sure there is space in the ring for the next send. */
4405	igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
4406
4407	return NETDEV_TX_OK;
4408
4409out_drop:
4410	igb_unmap_and_free_tx_resource(tx_ring, first);
4411
4412	return NETDEV_TX_OK;
4413}
4414
4415static inline struct igb_ring *igb_tx_queue_mapping(struct igb_adapter *adapter,
4416						    struct sk_buff *skb)
4417{
4418	unsigned int r_idx = skb->queue_mapping;
4419
4420	if (r_idx >= adapter->num_tx_queues)
4421		r_idx = r_idx % adapter->num_tx_queues;
4422
4423	return adapter->tx_ring[r_idx];
4424}
4425
4426static netdev_tx_t igb_xmit_frame(struct sk_buff *skb,
4427				  struct net_device *netdev)
4428{
4429	struct igb_adapter *adapter = netdev_priv(netdev);
4430
4431	if (test_bit(__IGB_DOWN, &adapter->state)) {
4432		dev_kfree_skb_any(skb);
4433		return NETDEV_TX_OK;
4434	}
4435
4436	if (skb->len <= 0) {
4437		dev_kfree_skb_any(skb);
4438		return NETDEV_TX_OK;
4439	}
4440
4441	/*
4442	 * The minimum packet size with TCTL.PSP set is 17 so pad the skb
4443	 * in order to meet this minimum size requirement.
4444	 */
4445	if (skb->len < 17) {
4446		if (skb_padto(skb, 17))
4447			return NETDEV_TX_OK;
4448		skb->len = 17;
4449	}
4450
4451	return igb_xmit_frame_ring(skb, igb_tx_queue_mapping(adapter, skb));
4452}
4453
4454/**
4455 * igb_tx_timeout - Respond to a Tx Hang
4456 * @netdev: network interface device structure
4457 **/
4458static void igb_tx_timeout(struct net_device *netdev)
4459{
4460	struct igb_adapter *adapter = netdev_priv(netdev);
4461	struct e1000_hw *hw = &adapter->hw;
4462
4463	/* Do the reset outside of interrupt context */
4464	adapter->tx_timeout_count++;
4465
4466	if (hw->mac.type == e1000_82580)
4467		hw->dev_spec._82575.global_device_reset = true;
4468
4469	schedule_work(&adapter->reset_task);
4470	wr32(E1000_EICS,
4471	     (adapter->eims_enable_mask & ~adapter->eims_other));
4472}
4473
4474static void igb_reset_task(struct work_struct *work)
4475{
4476	struct igb_adapter *adapter;
4477	adapter = container_of(work, struct igb_adapter, reset_task);
4478
4479	igb_dump(adapter);
4480	netdev_err(adapter->netdev, "Reset adapter\n");
4481	igb_reinit_locked(adapter);
4482}
4483
4484/**
4485 * igb_get_stats64 - Get System Network Statistics
4486 * @netdev: network interface device structure
4487 * @stats: rtnl_link_stats64 pointer
4488 *
4489 **/
4490static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *netdev,
4491						 struct rtnl_link_stats64 *stats)
4492{
4493	struct igb_adapter *adapter = netdev_priv(netdev);
4494
4495	spin_lock(&adapter->stats64_lock);
4496	igb_update_stats(adapter, &adapter->stats64);
4497	memcpy(stats, &adapter->stats64, sizeof(*stats));
4498	spin_unlock(&adapter->stats64_lock);
4499
4500	return stats;
4501}
4502
4503/**
4504 * igb_change_mtu - Change the Maximum Transfer Unit
4505 * @netdev: network interface device structure
4506 * @new_mtu: new value for maximum frame size
4507 *
4508 * Returns 0 on success, negative on failure
4509 **/
4510static int igb_change_mtu(struct net_device *netdev, int new_mtu)
4511{
4512	struct igb_adapter *adapter = netdev_priv(netdev);
4513	struct pci_dev *pdev = adapter->pdev;
4514	int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
4515
4516	if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
4517		dev_err(&pdev->dev, "Invalid MTU setting\n");
4518		return -EINVAL;
4519	}
4520
4521#define MAX_STD_JUMBO_FRAME_SIZE 9238
4522	if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
4523		dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
4524		return -EINVAL;
4525	}
4526
4527	while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
4528		msleep(1);
4529
4530	/* igb_down has a dependency on max_frame_size */
4531	adapter->max_frame_size = max_frame;
4532
4533	if (netif_running(netdev))
4534		igb_down(adapter);
4535
4536	dev_info(&pdev->dev, "changing MTU from %d to %d\n",
4537		 netdev->mtu, new_mtu);
4538	netdev->mtu = new_mtu;
4539
4540	if (netif_running(netdev))
4541		igb_up(adapter);
4542	else
4543		igb_reset(adapter);
4544
4545	clear_bit(__IGB_RESETTING, &adapter->state);
4546
4547	return 0;
4548}
4549
4550/**
4551 * igb_update_stats - Update the board statistics counters
4552 * @adapter: board private structure
4553 **/
4554
4555void igb_update_stats(struct igb_adapter *adapter,
4556		      struct rtnl_link_stats64 *net_stats)
4557{
4558	struct e1000_hw *hw = &adapter->hw;
4559	struct pci_dev *pdev = adapter->pdev;
4560	u32 reg, mpc;
4561	u16 phy_tmp;
4562	int i;
4563	u64 bytes, packets;
4564	unsigned int start;
4565	u64 _bytes, _packets;
4566
4567#define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
4568
4569	/*
4570	 * Prevent stats update while adapter is being reset, or if the pci
4571	 * connection is down.
4572	 */
4573	if (adapter->link_speed == 0)
4574		return;
4575	if (pci_channel_offline(pdev))
4576		return;
4577
4578	bytes = 0;
4579	packets = 0;
4580	for (i = 0; i < adapter->num_rx_queues; i++) {
4581		u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
4582		struct igb_ring *ring = adapter->rx_ring[i];
4583
4584		ring->rx_stats.drops += rqdpc_tmp;
4585		net_stats->rx_fifo_errors += rqdpc_tmp;
4586
4587		do {
4588			start = u64_stats_fetch_begin_bh(&ring->rx_syncp);
4589			_bytes = ring->rx_stats.bytes;
4590			_packets = ring->rx_stats.packets;
4591		} while (u64_stats_fetch_retry_bh(&ring->rx_syncp, start));
4592		bytes += _bytes;
4593		packets += _packets;
4594	}
4595
4596	net_stats->rx_bytes = bytes;
4597	net_stats->rx_packets = packets;
4598
4599	bytes = 0;
4600	packets = 0;
4601	for (i = 0; i < adapter->num_tx_queues; i++) {
4602		struct igb_ring *ring = adapter->tx_ring[i];
4603		do {
4604			start = u64_stats_fetch_begin_bh(&ring->tx_syncp);
4605			_bytes = ring->tx_stats.bytes;
4606			_packets = ring->tx_stats.packets;
4607		} while (u64_stats_fetch_retry_bh(&ring->tx_syncp, start));
4608		bytes += _bytes;
4609		packets += _packets;
4610	}
4611	net_stats->tx_bytes = bytes;
4612	net_stats->tx_packets = packets;
4613
4614	/* read stats registers */
4615	adapter->stats.crcerrs += rd32(E1000_CRCERRS);
4616	adapter->stats.gprc += rd32(E1000_GPRC);
4617	adapter->stats.gorc += rd32(E1000_GORCL);
4618	rd32(E1000_GORCH); /* clear GORCL */
4619	adapter->stats.bprc += rd32(E1000_BPRC);
4620	adapter->stats.mprc += rd32(E1000_MPRC);
4621	adapter->stats.roc += rd32(E1000_ROC);
4622
4623	adapter->stats.prc64 += rd32(E1000_PRC64);
4624	adapter->stats.prc127 += rd32(E1000_PRC127);
4625	adapter->stats.prc255 += rd32(E1000_PRC255);
4626	adapter->stats.prc511 += rd32(E1000_PRC511);
4627	adapter->stats.prc1023 += rd32(E1000_PRC1023);
4628	adapter->stats.prc1522 += rd32(E1000_PRC1522);
4629	adapter->stats.symerrs += rd32(E1000_SYMERRS);
4630	adapter->stats.sec += rd32(E1000_SEC);
4631
4632	mpc = rd32(E1000_MPC);
4633	adapter->stats.mpc += mpc;
4634	net_stats->rx_fifo_errors += mpc;
4635	adapter->stats.scc += rd32(E1000_SCC);
4636	adapter->stats.ecol += rd32(E1000_ECOL);
4637	adapter->stats.mcc += rd32(E1000_MCC);
4638	adapter->stats.latecol += rd32(E1000_LATECOL);
4639	adapter->stats.dc += rd32(E1000_DC);
4640	adapter->stats.rlec += rd32(E1000_RLEC);
4641	adapter->stats.xonrxc += rd32(E1000_XONRXC);
4642	adapter->stats.xontxc += rd32(E1000_XONTXC);
4643	adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
4644	adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
4645	adapter->stats.fcruc += rd32(E1000_FCRUC);
4646	adapter->stats.gptc += rd32(E1000_GPTC);
4647	adapter->stats.gotc += rd32(E1000_GOTCL);
4648	rd32(E1000_GOTCH); /* clear GOTCL */
4649	adapter->stats.rnbc += rd32(E1000_RNBC);
4650	adapter->stats.ruc += rd32(E1000_RUC);
4651	adapter->stats.rfc += rd32(E1000_RFC);
4652	adapter->stats.rjc += rd32(E1000_RJC);
4653	adapter->stats.tor += rd32(E1000_TORH);
4654	adapter->stats.tot += rd32(E1000_TOTH);
4655	adapter->stats.tpr += rd32(E1000_TPR);
4656
4657	adapter->stats.ptc64 += rd32(E1000_PTC64);
4658	adapter->stats.ptc127 += rd32(E1000_PTC127);
4659	adapter->stats.ptc255 += rd32(E1000_PTC255);
4660	adapter->stats.ptc511 += rd32(E1000_PTC511);
4661	adapter->stats.ptc1023 += rd32(E1000_PTC1023);
4662	adapter->stats.ptc1522 += rd32(E1000_PTC1522);
4663
4664	adapter->stats.mptc += rd32(E1000_MPTC);
4665	adapter->stats.bptc += rd32(E1000_BPTC);
4666
4667	adapter->stats.tpt += rd32(E1000_TPT);
4668	adapter->stats.colc += rd32(E1000_COLC);
4669
4670	adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
4671	/* read internal phy specific stats */
4672	reg = rd32(E1000_CTRL_EXT);
4673	if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
4674		adapter->stats.rxerrc += rd32(E1000_RXERRC);
4675		adapter->stats.tncrs += rd32(E1000_TNCRS);
4676	}
4677
4678	adapter->stats.tsctc += rd32(E1000_TSCTC);
4679	adapter->stats.tsctfc += rd32(E1000_TSCTFC);
4680
4681	adapter->stats.iac += rd32(E1000_IAC);
4682	adapter->stats.icrxoc += rd32(E1000_ICRXOC);
4683	adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
4684	adapter->stats.icrxatc += rd32(E1000_ICRXATC);
4685	adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
4686	adapter->stats.ictxatc += rd32(E1000_ICTXATC);
4687	adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
4688	adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
4689	adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
4690
4691	/* Fill out the OS statistics structure */
4692	net_stats->multicast = adapter->stats.mprc;
4693	net_stats->collisions = adapter->stats.colc;
4694
4695	/* Rx Errors */
4696
4697	/* RLEC on some newer hardware can be incorrect so build
4698	 * our own version based on RUC and ROC */
4699	net_stats->rx_errors = adapter->stats.rxerrc +
4700		adapter->stats.crcerrs + adapter->stats.algnerrc +
4701		adapter->stats.ruc + adapter->stats.roc +
4702		adapter->stats.cexterr;
4703	net_stats->rx_length_errors = adapter->stats.ruc +
4704				      adapter->stats.roc;
4705	net_stats->rx_crc_errors = adapter->stats.crcerrs;
4706	net_stats->rx_frame_errors = adapter->stats.algnerrc;
4707	net_stats->rx_missed_errors = adapter->stats.mpc;
4708
4709	/* Tx Errors */
4710	net_stats->tx_errors = adapter->stats.ecol +
4711			       adapter->stats.latecol;
4712	net_stats->tx_aborted_errors = adapter->stats.ecol;
4713	net_stats->tx_window_errors = adapter->stats.latecol;
4714	net_stats->tx_carrier_errors = adapter->stats.tncrs;
4715
4716	/* Tx Dropped needs to be maintained elsewhere */
4717
4718	/* Phy Stats */
4719	if (hw->phy.media_type == e1000_media_type_copper) {
4720		if ((adapter->link_speed == SPEED_1000) &&
4721		   (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
4722			phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
4723			adapter->phy_stats.idle_errors += phy_tmp;
4724		}
4725	}
4726
4727	/* Management Stats */
4728	adapter->stats.mgptc += rd32(E1000_MGTPTC);
4729	adapter->stats.mgprc += rd32(E1000_MGTPRC);
4730	adapter->stats.mgpdc += rd32(E1000_MGTPDC);
4731
4732	/* OS2BMC Stats */
4733	reg = rd32(E1000_MANC);
4734	if (reg & E1000_MANC_EN_BMC2OS) {
4735		adapter->stats.o2bgptc += rd32(E1000_O2BGPTC);
4736		adapter->stats.o2bspc += rd32(E1000_O2BSPC);
4737		adapter->stats.b2ospc += rd32(E1000_B2OSPC);
4738		adapter->stats.b2ogprc += rd32(E1000_B2OGPRC);
4739	}
4740}
4741
4742static irqreturn_t igb_msix_other(int irq, void *data)
4743{
4744	struct igb_adapter *adapter = data;
4745	struct e1000_hw *hw = &adapter->hw;
4746	u32 icr = rd32(E1000_ICR);
4747	/* reading ICR causes bit 31 of EICR to be cleared */
4748
4749	if (icr & E1000_ICR_DRSTA)
4750		schedule_work(&adapter->reset_task);
4751
4752	if (icr & E1000_ICR_DOUTSYNC) {
4753		/* HW is reporting DMA is out of sync */
4754		adapter->stats.doosync++;
4755		/* The DMA Out of Sync is also indication of a spoof event
4756		 * in IOV mode. Check the Wrong VM Behavior register to
4757		 * see if it is really a spoof event. */
4758		igb_check_wvbr(adapter);
4759	}
4760
4761	/* Check for a mailbox event */
4762	if (icr & E1000_ICR_VMMB)
4763		igb_msg_task(adapter);
4764
4765	if (icr & E1000_ICR_LSC) {
4766		hw->mac.get_link_status = 1;
4767		/* guard against interrupt when we're going down */
4768		if (!test_bit(__IGB_DOWN, &adapter->state))
4769			mod_timer(&adapter->watchdog_timer, jiffies + 1);
4770	}
4771
4772	if (adapter->vfs_allocated_count)
4773		wr32(E1000_IMS, E1000_IMS_LSC |
4774				E1000_IMS_VMMB |
4775				E1000_IMS_DOUTSYNC);
4776	else
4777		wr32(E1000_IMS, E1000_IMS_LSC | E1000_IMS_DOUTSYNC);
4778	wr32(E1000_EIMS, adapter->eims_other);
4779
4780	return IRQ_HANDLED;
4781}
4782
4783static void igb_write_itr(struct igb_q_vector *q_vector)
4784{
4785	struct igb_adapter *adapter = q_vector->adapter;
4786	u32 itr_val = q_vector->itr_val & 0x7FFC;
4787
4788	if (!q_vector->set_itr)
4789		return;
4790
4791	if (!itr_val)
4792		itr_val = 0x4;
4793
4794	if (adapter->hw.mac.type == e1000_82575)
4795		itr_val |= itr_val << 16;
4796	else
4797		itr_val |= E1000_EITR_CNT_IGNR;
4798
4799	writel(itr_val, q_vector->itr_register);
4800	q_vector->set_itr = 0;
4801}
4802
4803static irqreturn_t igb_msix_ring(int irq, void *data)
4804{
4805	struct igb_q_vector *q_vector = data;
4806
4807	/* Write the ITR value calculated from the previous interrupt. */
4808	igb_write_itr(q_vector);
4809
4810	napi_schedule(&q_vector->napi);
4811
4812	return IRQ_HANDLED;
4813}
4814
4815#ifdef CONFIG_IGB_DCA
4816static void igb_update_dca(struct igb_q_vector *q_vector)
4817{
4818	struct igb_adapter *adapter = q_vector->adapter;
4819	struct e1000_hw *hw = &adapter->hw;
4820	int cpu = get_cpu();
4821
4822	if (q_vector->cpu == cpu)
4823		goto out_no_update;
4824
4825	if (q_vector->tx.ring) {
4826		int q = q_vector->tx.ring->reg_idx;
4827		u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4828		if (hw->mac.type == e1000_82575) {
4829			dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4830			dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4831		} else {
4832			dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4833			dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4834			              E1000_DCA_TXCTRL_CPUID_SHIFT;
4835		}
4836		dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4837		wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4838	}
4839	if (q_vector->rx.ring) {
4840		int q = q_vector->rx.ring->reg_idx;
4841		u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4842		if (hw->mac.type == e1000_82575) {
4843			dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4844			dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4845		} else {
4846			dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4847			dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4848			              E1000_DCA_RXCTRL_CPUID_SHIFT;
4849		}
4850		dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4851		dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4852		dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4853		wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4854	}
4855	q_vector->cpu = cpu;
4856out_no_update:
4857	put_cpu();
4858}
4859
4860static void igb_setup_dca(struct igb_adapter *adapter)
4861{
4862	struct e1000_hw *hw = &adapter->hw;
4863	int i;
4864
4865	if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4866		return;
4867
4868	/* Always use CB2 mode, difference is masked in the CB driver. */
4869	wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4870
4871	for (i = 0; i < adapter->num_q_vectors; i++) {
4872		adapter->q_vector[i]->cpu = -1;
4873		igb_update_dca(adapter->q_vector[i]);
4874	}
4875}
4876
4877static int __igb_notify_dca(struct device *dev, void *data)
4878{
4879	struct net_device *netdev = dev_get_drvdata(dev);
4880	struct igb_adapter *adapter = netdev_priv(netdev);
4881	struct pci_dev *pdev = adapter->pdev;
4882	struct e1000_hw *hw = &adapter->hw;
4883	unsigned long event = *(unsigned long *)data;
4884
4885	switch (event) {
4886	case DCA_PROVIDER_ADD:
4887		/* if already enabled, don't do it again */
4888		if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4889			break;
4890		if (dca_add_requester(dev) == 0) {
4891			adapter->flags |= IGB_FLAG_DCA_ENABLED;
4892			dev_info(&pdev->dev, "DCA enabled\n");
4893			igb_setup_dca(adapter);
4894			break;
4895		}
4896		/* Fall Through since DCA is disabled. */
4897	case DCA_PROVIDER_REMOVE:
4898		if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4899			/* without this a class_device is left
4900			 * hanging around in the sysfs model */
4901			dca_remove_requester(dev);
4902			dev_info(&pdev->dev, "DCA disabled\n");
4903			adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4904			wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4905		}
4906		break;
4907	}
4908
4909	return 0;
4910}
4911
4912static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4913                          void *p)
4914{
4915	int ret_val;
4916
4917	ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4918	                                 __igb_notify_dca);
4919
4920	return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4921}
4922#endif /* CONFIG_IGB_DCA */
4923
4924static void igb_ping_all_vfs(struct igb_adapter *adapter)
4925{
4926	struct e1000_hw *hw = &adapter->hw;
4927	u32 ping;
4928	int i;
4929
4930	for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
4931		ping = E1000_PF_CONTROL_MSG;
4932		if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
4933			ping |= E1000_VT_MSGTYPE_CTS;
4934		igb_write_mbx(hw, &ping, 1, i);
4935	}
4936}
4937
4938static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4939{
4940	struct e1000_hw *hw = &adapter->hw;
4941	u32 vmolr = rd32(E1000_VMOLR(vf));
4942	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4943
4944	vf_data->flags &= ~(IGB_VF_FLAG_UNI_PROMISC |
4945	                    IGB_VF_FLAG_MULTI_PROMISC);
4946	vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4947
4948	if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
4949		vmolr |= E1000_VMOLR_MPME;
4950		vf_data->flags |= IGB_VF_FLAG_MULTI_PROMISC;
4951		*msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
4952	} else {
4953		/*
4954		 * if we have hashes and we are clearing a multicast promisc
4955		 * flag we need to write the hashes to the MTA as this step
4956		 * was previously skipped
4957		 */
4958		if (vf_data->num_vf_mc_hashes > 30) {
4959			vmolr |= E1000_VMOLR_MPME;
4960		} else if (vf_data->num_vf_mc_hashes) {
4961			int j;
4962			vmolr |= E1000_VMOLR_ROMPE;
4963			for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4964				igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4965		}
4966	}
4967
4968	wr32(E1000_VMOLR(vf), vmolr);
4969
4970	/* there are flags left unprocessed, likely not supported */
4971	if (*msgbuf & E1000_VT_MSGINFO_MASK)
4972		return -EINVAL;
4973
4974	return 0;
4975
4976}
4977
4978static int igb_set_vf_multicasts(struct igb_adapter *adapter,
4979				  u32 *msgbuf, u32 vf)
4980{
4981	int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4982	u16 *hash_list = (u16 *)&msgbuf[1];
4983	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4984	int i;
4985
4986	/* salt away the number of multicast addresses assigned
4987	 * to this VF for later use to restore when the PF multi cast
4988	 * list changes
4989	 */
4990	vf_data->num_vf_mc_hashes = n;
4991
4992	/* only up to 30 hash values supported */
4993	if (n > 30)
4994		n = 30;
4995
4996	/* store the hashes for later use */
4997	for (i = 0; i < n; i++)
4998		vf_data->vf_mc_hashes[i] = hash_list[i];
4999
5000	/* Flush and reset the mta with the new values */
5001	igb_set_rx_mode(adapter->netdev);
5002
5003	return 0;
5004}
5005
5006static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
5007{
5008	struct e1000_hw *hw = &adapter->hw;
5009	struct vf_data_storage *vf_data;
5010	int i, j;
5011
5012	for (i = 0; i < adapter->vfs_allocated_count; i++) {
5013		u32 vmolr = rd32(E1000_VMOLR(i));
5014		vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
5015
5016		vf_data = &adapter->vf_data[i];
5017
5018		if ((vf_data->num_vf_mc_hashes > 30) ||
5019		    (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
5020			vmolr |= E1000_VMOLR_MPME;
5021		} else if (vf_data->num_vf_mc_hashes) {
5022			vmolr |= E1000_VMOLR_ROMPE;
5023			for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
5024				igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
5025		}
5026		wr32(E1000_VMOLR(i), vmolr);
5027	}
5028}
5029
5030static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
5031{
5032	struct e1000_hw *hw = &adapter->hw;
5033	u32 pool_mask, reg, vid;
5034	int i;
5035
5036	pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5037
5038	/* Find the vlan filter for this id */
5039	for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5040		reg = rd32(E1000_VLVF(i));
5041
5042		/* remove the vf from the pool */
5043		reg &= ~pool_mask;
5044
5045		/* if pool is empty then remove entry from vfta */
5046		if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
5047		    (reg & E1000_VLVF_VLANID_ENABLE)) {
5048			reg = 0;
5049			vid = reg & E1000_VLVF_VLANID_MASK;
5050			igb_vfta_set(hw, vid, false);
5051		}
5052
5053		wr32(E1000_VLVF(i), reg);
5054	}
5055
5056	adapter->vf_data[vf].vlans_enabled = 0;
5057}
5058
5059static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
5060{
5061	struct e1000_hw *hw = &adapter->hw;
5062	u32 reg, i;
5063
5064	/* The vlvf table only exists on 82576 hardware and newer */
5065	if (hw->mac.type < e1000_82576)
5066		return -1;
5067
5068	/* we only need to do this if VMDq is enabled */
5069	if (!adapter->vfs_allocated_count)
5070		return -1;
5071
5072	/* Find the vlan filter for this id */
5073	for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5074		reg = rd32(E1000_VLVF(i));
5075		if ((reg & E1000_VLVF_VLANID_ENABLE) &&
5076		    vid == (reg & E1000_VLVF_VLANID_MASK))
5077			break;
5078	}
5079
5080	if (add) {
5081		if (i == E1000_VLVF_ARRAY_SIZE) {
5082			/* Did not find a matching VLAN ID entry that was
5083			 * enabled.  Search for a free filter entry, i.e.
5084			 * one without the enable bit set
5085			 */
5086			for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5087				reg = rd32(E1000_VLVF(i));
5088				if (!(reg & E1000_VLVF_VLANID_ENABLE))
5089					break;
5090			}
5091		}
5092		if (i < E1000_VLVF_ARRAY_SIZE) {
5093			/* Found an enabled/available entry */
5094			reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5095
5096			/* if !enabled we need to set this up in vfta */
5097			if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
5098				/* add VID to filter table */
5099				igb_vfta_set(hw, vid, true);
5100				reg |= E1000_VLVF_VLANID_ENABLE;
5101			}
5102			reg &= ~E1000_VLVF_VLANID_MASK;
5103			reg |= vid;
5104			wr32(E1000_VLVF(i), reg);
5105
5106			/* do not modify RLPML for PF devices */
5107			if (vf >= adapter->vfs_allocated_count)
5108				return 0;
5109
5110			if (!adapter->vf_data[vf].vlans_enabled) {
5111				u32 size;
5112				reg = rd32(E1000_VMOLR(vf));
5113				size = reg & E1000_VMOLR_RLPML_MASK;
5114				size += 4;
5115				reg &= ~E1000_VMOLR_RLPML_MASK;
5116				reg |= size;
5117				wr32(E1000_VMOLR(vf), reg);
5118			}
5119
5120			adapter->vf_data[vf].vlans_enabled++;
5121		}
5122	} else {
5123		if (i < E1000_VLVF_ARRAY_SIZE) {
5124			/* remove vf from the pool */
5125			reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
5126			/* if pool is empty then remove entry from vfta */
5127			if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
5128				reg = 0;
5129				igb_vfta_set(hw, vid, false);
5130			}
5131			wr32(E1000_VLVF(i), reg);
5132
5133			/* do not modify RLPML for PF devices */
5134			if (vf >= adapter->vfs_allocated_count)
5135				return 0;
5136
5137			adapter->vf_data[vf].vlans_enabled--;
5138			if (!adapter->vf_data[vf].vlans_enabled) {
5139				u32 size;
5140				reg = rd32(E1000_VMOLR(vf));
5141				size = reg & E1000_VMOLR_RLPML_MASK;
5142				size -= 4;
5143				reg &= ~E1000_VMOLR_RLPML_MASK;
5144				reg |= size;
5145				wr32(E1000_VMOLR(vf), reg);
5146			}
5147		}
5148	}
5149	return 0;
5150}
5151
5152static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
5153{
5154	struct e1000_hw *hw = &adapter->hw;
5155
5156	if (vid)
5157		wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
5158	else
5159		wr32(E1000_VMVIR(vf), 0);
5160}
5161
5162static int igb_ndo_set_vf_vlan(struct net_device *netdev,
5163			       int vf, u16 vlan, u8 qos)
5164{
5165	int err = 0;
5166	struct igb_adapter *adapter = netdev_priv(netdev);
5167
5168	if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
5169		return -EINVAL;
5170	if (vlan || qos) {
5171		err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
5172		if (err)
5173			goto out;
5174		igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
5175		igb_set_vmolr(adapter, vf, !vlan);
5176		adapter->vf_data[vf].pf_vlan = vlan;
5177		adapter->vf_data[vf].pf_qos = qos;
5178		dev_info(&adapter->pdev->dev,
5179			 "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
5180		if (test_bit(__IGB_DOWN, &adapter->state)) {
5181			dev_warn(&adapter->pdev->dev,
5182				 "The VF VLAN has been set,"
5183				 " but the PF device is not up.\n");
5184			dev_warn(&adapter->pdev->dev,
5185				 "Bring the PF device up before"
5186				 " attempting to use the VF device.\n");
5187		}
5188	} else {
5189		igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
5190				   false, vf);
5191		igb_set_vmvir(adapter, vlan, vf);
5192		igb_set_vmolr(adapter, vf, true);
5193		adapter->vf_data[vf].pf_vlan = 0;
5194		adapter->vf_data[vf].pf_qos = 0;
5195       }
5196out:
5197       return err;
5198}
5199
5200static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5201{
5202	int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5203	int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
5204
5205	return igb_vlvf_set(adapter, vid, add, vf);
5206}
5207
5208static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
5209{
5210	/* clear flags - except flag that indicates PF has set the MAC */
5211	adapter->vf_data[vf].flags &= IGB_VF_FLAG_PF_SET_MAC;
5212	adapter->vf_data[vf].last_nack = jiffies;
5213
5214	/* reset offloads to defaults */
5215	igb_set_vmolr(adapter, vf, true);
5216
5217	/* reset vlans for device */
5218	igb_clear_vf_vfta(adapter, vf);
5219	if (adapter->vf_data[vf].pf_vlan)
5220		igb_ndo_set_vf_vlan(adapter->netdev, vf,
5221				    adapter->vf_data[vf].pf_vlan,
5222				    adapter->vf_data[vf].pf_qos);
5223	else
5224		igb_clear_vf_vfta(adapter, vf);
5225
5226	/* reset multicast table array for vf */
5227	adapter->vf_data[vf].num_vf_mc_hashes = 0;
5228
5229	/* Flush and reset the mta with the new values */
5230	igb_set_rx_mode(adapter->netdev);
5231}
5232
5233static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
5234{
5235	unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5236
5237	/* generate a new mac address as we were hotplug removed/added */
5238	if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
5239		random_ether_addr(vf_mac);
5240
5241	/* process remaining reset events */
5242	igb_vf_reset(adapter, vf);
5243}
5244
5245static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
5246{
5247	struct e1000_hw *hw = &adapter->hw;
5248	unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5249	int rar_entry = hw->mac.rar_entry_count - (vf + 1);
5250	u32 reg, msgbuf[3];
5251	u8 *addr = (u8 *)(&msgbuf[1]);
5252
5253	/* process all the same items cleared in a function level reset */
5254	igb_vf_reset(adapter, vf);
5255
5256	/* set vf mac address */
5257	igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
5258
5259	/* enable transmit and receive for vf */
5260	reg = rd32(E1000_VFTE);
5261	wr32(E1000_VFTE, reg | (1 << vf));
5262	reg = rd32(E1000_VFRE);
5263	wr32(E1000_VFRE, reg | (1 << vf));
5264
5265	adapter->vf_data[vf].flags |= IGB_VF_FLAG_CTS;
5266
5267	/* reply to reset with ack and vf mac address */
5268	msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
5269	memcpy(addr, vf_mac, 6);
5270	igb_write_mbx(hw, msgbuf, 3, vf);
5271}
5272
5273static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
5274{
5275	/*
5276	 * The VF MAC Address is stored in a packed array of bytes
5277	 * starting at the second 32 bit word of the msg array
5278	 */
5279	unsigned char *addr = (char *)&msg[1];
5280	int err = -1;
5281
5282	if (is_valid_ether_addr(addr))
5283		err = igb_set_vf_mac(adapter, vf, addr);
5284
5285	return err;
5286}
5287
5288static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
5289{
5290	struct e1000_hw *hw = &adapter->hw;
5291	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5292	u32 msg = E1000_VT_MSGTYPE_NACK;
5293
5294	/* if device isn't clear to send it shouldn't be reading either */
5295	if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
5296	    time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
5297		igb_write_mbx(hw, &msg, 1, vf);
5298		vf_data->last_nack = jiffies;
5299	}
5300}
5301
5302static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
5303{
5304	struct pci_dev *pdev = adapter->pdev;
5305	u32 msgbuf[E1000_VFMAILBOX_SIZE];
5306	struct e1000_hw *hw = &adapter->hw;
5307	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5308	s32 retval;
5309
5310	retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
5311
5312	if (retval) {
5313		/* if receive failed revoke VF CTS stats and restart init */
5314		dev_err(&pdev->dev, "Error receiving message from VF\n");
5315		vf_data->flags &= ~IGB_VF_FLAG_CTS;
5316		if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5317			return;
5318		goto out;
5319	}
5320
5321	/* this is a message we already processed, do nothing */
5322	if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
5323		return;
5324
5325	/*
5326	 * until the vf completes a reset it should not be
5327	 * allowed to start any configuration.
5328	 */
5329
5330	if (msgbuf[0] == E1000_VF_RESET) {
5331		igb_vf_reset_msg(adapter, vf);
5332		return;
5333	}
5334
5335	if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
5336		if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5337			return;
5338		retval = -1;
5339		goto out;
5340	}
5341
5342	switch ((msgbuf[0] & 0xFFFF)) {
5343	case E1000_VF_SET_MAC_ADDR:
5344		retval = -EINVAL;
5345		if (!(vf_data->flags & IGB_VF_FLAG_PF_SET_MAC))
5346			retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
5347		else
5348			dev_warn(&pdev->dev,
5349				 "VF %d attempted to override administratively "
5350				 "set MAC address\nReload the VF driver to "
5351				 "resume operations\n", vf);
5352		break;
5353	case E1000_VF_SET_PROMISC:
5354		retval = igb_set_vf_promisc(adapter, msgbuf, vf);
5355		break;
5356	case E1000_VF_SET_MULTICAST:
5357		retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
5358		break;
5359	case E1000_VF_SET_LPE:
5360		retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
5361		break;
5362	case E1000_VF_SET_VLAN:
5363		retval = -1;
5364		if (vf_data->pf_vlan)
5365			dev_warn(&pdev->dev,
5366				 "VF %d attempted to override administratively "
5367				 "set VLAN tag\nReload the VF driver to "
5368				 "resume operations\n", vf);
5369		else
5370			retval = igb_set_vf_vlan(adapter, msgbuf, vf);
5371		break;
5372	default:
5373		dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
5374		retval = -1;
5375		break;
5376	}
5377
5378	msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
5379out:
5380	/* notify the VF of the results of what it sent us */
5381	if (retval)
5382		msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
5383	else
5384		msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
5385
5386	igb_write_mbx(hw, msgbuf, 1, vf);
5387}
5388
5389static void igb_msg_task(struct igb_adapter *adapter)
5390{
5391	struct e1000_hw *hw = &adapter->hw;
5392	u32 vf;
5393
5394	for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
5395		/* process any reset requests */
5396		if (!igb_check_for_rst(hw, vf))
5397			igb_vf_reset_event(adapter, vf);
5398
5399		/* process any messages pending */
5400		if (!igb_check_for_msg(hw, vf))
5401			igb_rcv_msg_from_vf(adapter, vf);
5402
5403		/* process any acks */
5404		if (!igb_check_for_ack(hw, vf))
5405			igb_rcv_ack_from_vf(adapter, vf);
5406	}
5407}
5408
5409/**
5410 *  igb_set_uta - Set unicast filter table address
5411 *  @adapter: board private structure
5412 *
5413 *  The unicast table address is a register array of 32-bit registers.
5414 *  The table is meant to be used in a way similar to how the MTA is used
5415 *  however due to certain limitations in the hardware it is necessary to
5416 *  set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscuous
5417 *  enable bit to allow vlan tag stripping when promiscuous mode is enabled
5418 **/
5419static void igb_set_uta(struct igb_adapter *adapter)
5420{
5421	struct e1000_hw *hw = &adapter->hw;
5422	int i;
5423
5424	/* The UTA table only exists on 82576 hardware and newer */
5425	if (hw->mac.type < e1000_82576)
5426		return;
5427
5428	/* we only need to do this if VMDq is enabled */
5429	if (!adapter->vfs_allocated_count)
5430		return;
5431
5432	for (i = 0; i < hw->mac.uta_reg_count; i++)
5433		array_wr32(E1000_UTA, i, ~0);
5434}
5435
5436/**
5437 * igb_intr_msi - Interrupt Handler
5438 * @irq: interrupt number
5439 * @data: pointer to a network interface device structure
5440 **/
5441static irqreturn_t igb_intr_msi(int irq, void *data)
5442{
5443	struct igb_adapter *adapter = data;
5444	struct igb_q_vector *q_vector = adapter->q_vector[0];
5445	struct e1000_hw *hw = &adapter->hw;
5446	/* read ICR disables interrupts using IAM */
5447	u32 icr = rd32(E1000_ICR);
5448
5449	igb_write_itr(q_vector);
5450
5451	if (icr & E1000_ICR_DRSTA)
5452		schedule_work(&adapter->reset_task);
5453
5454	if (icr & E1000_ICR_DOUTSYNC) {
5455		/* HW is reporting DMA is out of sync */
5456		adapter->stats.doosync++;
5457	}
5458
5459	if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5460		hw->mac.get_link_status = 1;
5461		if (!test_bit(__IGB_DOWN, &adapter->state))
5462			mod_timer(&adapter->watchdog_timer, jiffies + 1);
5463	}
5464
5465	napi_schedule(&q_vector->napi);
5466
5467	return IRQ_HANDLED;
5468}
5469
5470/**
5471 * igb_intr - Legacy Interrupt Handler
5472 * @irq: interrupt number
5473 * @data: pointer to a network interface device structure
5474 **/
5475static irqreturn_t igb_intr(int irq, void *data)
5476{
5477	struct igb_adapter *adapter = data;
5478	struct igb_q_vector *q_vector = adapter->q_vector[0];
5479	struct e1000_hw *hw = &adapter->hw;
5480	/* Interrupt Auto-Mask...upon reading ICR, interrupts are masked.  No
5481	 * need for the IMC write */
5482	u32 icr = rd32(E1000_ICR);
5483
5484	/* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
5485	 * not set, then the adapter didn't send an interrupt */
5486	if (!(icr & E1000_ICR_INT_ASSERTED))
5487		return IRQ_NONE;
5488
5489	igb_write_itr(q_vector);
5490
5491	if (icr & E1000_ICR_DRSTA)
5492		schedule_work(&adapter->reset_task);
5493
5494	if (icr & E1000_ICR_DOUTSYNC) {
5495		/* HW is reporting DMA is out of sync */
5496		adapter->stats.doosync++;
5497	}
5498
5499	if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5500		hw->mac.get_link_status = 1;
5501		/* guard against interrupt when we're going down */
5502		if (!test_bit(__IGB_DOWN, &adapter->state))
5503			mod_timer(&adapter->watchdog_timer, jiffies + 1);
5504	}
5505
5506	napi_schedule(&q_vector->napi);
5507
5508	return IRQ_HANDLED;
5509}
5510
5511void igb_ring_irq_enable(struct igb_q_vector *q_vector)
5512{
5513	struct igb_adapter *adapter = q_vector->adapter;
5514	struct e1000_hw *hw = &adapter->hw;
5515
5516	if ((q_vector->rx.ring && (adapter->rx_itr_setting & 3)) ||
5517	    (!q_vector->rx.ring && (adapter->tx_itr_setting & 3))) {
5518		if ((adapter->num_q_vectors == 1) && !adapter->vf_data)
5519			igb_set_itr(q_vector);
5520		else
5521			igb_update_ring_itr(q_vector);
5522	}
5523
5524	if (!test_bit(__IGB_DOWN, &adapter->state)) {
5525		if (adapter->msix_entries)
5526			wr32(E1000_EIMS, q_vector->eims_value);
5527		else
5528			igb_irq_enable(adapter);
5529	}
5530}
5531
5532/**
5533 * igb_poll - NAPI Rx polling callback
5534 * @napi: napi polling structure
5535 * @budget: count of how many packets we should handle
5536 **/
5537static int igb_poll(struct napi_struct *napi, int budget)
5538{
5539	struct igb_q_vector *q_vector = container_of(napi,
5540	                                             struct igb_q_vector,
5541	                                             napi);
5542	bool clean_complete = true;
5543
5544#ifdef CONFIG_IGB_DCA
5545	if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
5546		igb_update_dca(q_vector);
5547#endif
5548	if (q_vector->tx.ring)
5549		clean_complete = igb_clean_tx_irq(q_vector);
5550
5551	if (q_vector->rx.ring)
5552		clean_complete &= igb_clean_rx_irq(q_vector, budget);
5553
5554	/* If all work not completed, return budget and keep polling */
5555	if (!clean_complete)
5556		return budget;
5557
5558	/* If not enough Rx work done, exit the polling mode */
5559	napi_complete(napi);
5560	igb_ring_irq_enable(q_vector);
5561
5562	return 0;
5563}
5564
5565/**
5566 * igb_systim_to_hwtstamp - convert system time value to hw timestamp
5567 * @adapter: board private structure
5568 * @shhwtstamps: timestamp structure to update
5569 * @regval: unsigned 64bit system time value.
5570 *
5571 * We need to convert the system time value stored in the RX/TXSTMP registers
5572 * into a hwtstamp which can be used by the upper level timestamping functions
5573 */
5574static void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
5575                                   struct skb_shared_hwtstamps *shhwtstamps,
5576                                   u64 regval)
5577{
5578	u64 ns;
5579
5580	/*
5581	 * The 82580 starts with 1ns at bit 0 in RX/TXSTMPL, shift this up to
5582	 * 24 to match clock shift we setup earlier.
5583	 */
5584	if (adapter->hw.mac.type == e1000_82580)
5585		regval <<= IGB_82580_TSYNC_SHIFT;
5586
5587	ns = timecounter_cyc2time(&adapter->clock, regval);
5588	timecompare_update(&adapter->compare, ns);
5589	memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
5590	shhwtstamps->hwtstamp = ns_to_ktime(ns);
5591	shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns);
5592}
5593
5594/**
5595 * igb_tx_hwtstamp - utility function which checks for TX time stamp
5596 * @q_vector: pointer to q_vector containing needed info
5597 * @buffer: pointer to igb_tx_buffer structure
5598 *
5599 * If we were asked to do hardware stamping and such a time stamp is
5600 * available, then it must have been for this skb here because we only
5601 * allow only one such packet into the queue.
5602 */
5603static void igb_tx_hwtstamp(struct igb_q_vector *q_vector,
5604			    struct igb_tx_buffer *buffer_info)
5605{
5606	struct igb_adapter *adapter = q_vector->adapter;
5607	struct e1000_hw *hw = &adapter->hw;
5608	struct skb_shared_hwtstamps shhwtstamps;
5609	u64 regval;
5610
5611	/* if skb does not support hw timestamp or TX stamp not valid exit */
5612	if (likely(!(buffer_info->tx_flags & IGB_TX_FLAGS_TSTAMP)) ||
5613	    !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
5614		return;
5615
5616	regval = rd32(E1000_TXSTMPL);
5617	regval |= (u64)rd32(E1000_TXSTMPH) << 32;
5618
5619	igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
5620	skb_tstamp_tx(buffer_info->skb, &shhwtstamps);
5621}
5622
5623/**
5624 * igb_clean_tx_irq - Reclaim resources after transmit completes
5625 * @q_vector: pointer to q_vector containing needed info
5626 * returns true if ring is completely cleaned
5627 **/
5628static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
5629{
5630	struct igb_adapter *adapter = q_vector->adapter;
5631	struct igb_ring *tx_ring = q_vector->tx.ring;
5632	struct igb_tx_buffer *tx_buffer;
5633	union e1000_adv_tx_desc *tx_desc, *eop_desc;
5634	unsigned int total_bytes = 0, total_packets = 0;
5635	unsigned int budget = q_vector->tx.work_limit;
5636	unsigned int i = tx_ring->next_to_clean;
5637
5638	if (test_bit(__IGB_DOWN, &adapter->state))
5639		return true;
5640
5641	tx_buffer = &tx_ring->tx_buffer_info[i];
5642	tx_desc = IGB_TX_DESC(tx_ring, i);
5643	i -= tx_ring->count;
5644
5645	for (; budget; budget--) {
5646		eop_desc = tx_buffer->next_to_watch;
5647
5648		/* prevent any other reads prior to eop_desc */
5649		rmb();
5650
5651		/* if next_to_watch is not set then there is no work pending */
5652		if (!eop_desc)
5653			break;
5654
5655		/* if DD is not set pending work has not been completed */
5656		if (!(eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)))
5657			break;
5658
5659		/* clear next_to_watch to prevent false hangs */
5660		tx_buffer->next_to_watch = NULL;
5661
5662		/* update the statistics for this packet */
5663		total_bytes += tx_buffer->bytecount;
5664		total_packets += tx_buffer->gso_segs;
5665
5666		/* retrieve hardware timestamp */
5667		igb_tx_hwtstamp(q_vector, tx_buffer);
5668
5669		/* free the skb */
5670		dev_kfree_skb_any(tx_buffer->skb);
5671		tx_buffer->skb = NULL;
5672
5673		/* unmap skb header data */
5674		dma_unmap_single(tx_ring->dev,
5675				 tx_buffer->dma,
5676				 tx_buffer->length,
5677				 DMA_TO_DEVICE);
5678
5679		/* clear last DMA location and unmap remaining buffers */
5680		while (tx_desc != eop_desc) {
5681			tx_buffer->dma = 0;
5682
5683			tx_buffer++;
5684			tx_desc++;
5685			i++;
5686			if (unlikely(!i)) {
5687				i -= tx_ring->count;
5688				tx_buffer = tx_ring->tx_buffer_info;
5689				tx_desc = IGB_TX_DESC(tx_ring, 0);
5690			}
5691
5692			/* unmap any remaining paged data */
5693			if (tx_buffer->dma) {
5694				dma_unmap_page(tx_ring->dev,
5695					       tx_buffer->dma,
5696					       tx_buffer->length,
5697					       DMA_TO_DEVICE);
5698			}
5699		}
5700
5701		/* clear last DMA location */
5702		tx_buffer->dma = 0;
5703
5704		/* move us one more past the eop_desc for start of next pkt */
5705		tx_buffer++;
5706		tx_desc++;
5707		i++;
5708		if (unlikely(!i)) {
5709			i -= tx_ring->count;
5710			tx_buffer = tx_ring->tx_buffer_info;
5711			tx_desc = IGB_TX_DESC(tx_ring, 0);
5712		}
5713	}
5714
5715	i += tx_ring->count;
5716	tx_ring->next_to_clean = i;
5717	u64_stats_update_begin(&tx_ring->tx_syncp);
5718	tx_ring->tx_stats.bytes += total_bytes;
5719	tx_ring->tx_stats.packets += total_packets;
5720	u64_stats_update_end(&tx_ring->tx_syncp);
5721	q_vector->tx.total_bytes += total_bytes;
5722	q_vector->tx.total_packets += total_packets;
5723
5724	if (test_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags)) {
5725		struct e1000_hw *hw = &adapter->hw;
5726
5727		eop_desc = tx_buffer->next_to_watch;
5728
5729		/* Detect a transmit hang in hardware, this serializes the
5730		 * check with the clearing of time_stamp and movement of i */
5731		clear_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
5732		if (eop_desc &&
5733		    time_after(jiffies, tx_buffer->time_stamp +
5734			       (adapter->tx_timeout_factor * HZ)) &&
5735		    !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
5736
5737			/* detected Tx unit hang */
5738			dev_err(tx_ring->dev,
5739				"Detected Tx Unit Hang\n"
5740				"  Tx Queue             <%d>\n"
5741				"  TDH                  <%x>\n"
5742				"  TDT                  <%x>\n"
5743				"  next_to_use          <%x>\n"
5744				"  next_to_clean        <%x>\n"
5745				"buffer_info[next_to_clean]\n"
5746				"  time_stamp           <%lx>\n"
5747				"  next_to_watch        <%p>\n"
5748				"  jiffies              <%lx>\n"
5749				"  desc.status          <%x>\n",
5750				tx_ring->queue_index,
5751				rd32(E1000_TDH(tx_ring->reg_idx)),
5752				readl(tx_ring->tail),
5753				tx_ring->next_to_use,
5754				tx_ring->next_to_clean,
5755				tx_buffer->time_stamp,
5756				eop_desc,
5757				jiffies,
5758				eop_desc->wb.status);
5759			netif_stop_subqueue(tx_ring->netdev,
5760					    tx_ring->queue_index);
5761
5762			/* we are about to reset, no point in enabling stuff */
5763			return true;
5764		}
5765	}
5766
5767	if (unlikely(total_packets &&
5768		     netif_carrier_ok(tx_ring->netdev) &&
5769		     igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
5770		/* Make sure that anybody stopping the queue after this
5771		 * sees the new next_to_clean.
5772		 */
5773		smp_mb();
5774		if (__netif_subqueue_stopped(tx_ring->netdev,
5775					     tx_ring->queue_index) &&
5776		    !(test_bit(__IGB_DOWN, &adapter->state))) {
5777			netif_wake_subqueue(tx_ring->netdev,
5778					    tx_ring->queue_index);
5779
5780			u64_stats_update_begin(&tx_ring->tx_syncp);
5781			tx_ring->tx_stats.restart_queue++;
5782			u64_stats_update_end(&tx_ring->tx_syncp);
5783		}
5784	}
5785
5786	return !!budget;
5787}
5788
5789static inline void igb_rx_checksum(struct igb_ring *ring,
5790				   union e1000_adv_rx_desc *rx_desc,
5791				   struct sk_buff *skb)
5792{
5793	skb_checksum_none_assert(skb);
5794
5795	/* Ignore Checksum bit is set */
5796	if (igb_test_staterr(rx_desc, E1000_RXD_STAT_IXSM))
5797		return;
5798
5799	/* Rx checksum disabled via ethtool */
5800	if (!(ring->netdev->features & NETIF_F_RXCSUM))
5801		return;
5802
5803	/* TCP/UDP checksum error bit is set */
5804	if (igb_test_staterr(rx_desc,
5805			     E1000_RXDEXT_STATERR_TCPE |
5806			     E1000_RXDEXT_STATERR_IPE)) {
5807		/*
5808		 * work around errata with sctp packets where the TCPE aka
5809		 * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
5810		 * packets, (aka let the stack check the crc32c)
5811		 */
5812		if (!((skb->len == 60) &&
5813		      test_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags))) {
5814			u64_stats_update_begin(&ring->rx_syncp);
5815			ring->rx_stats.csum_err++;
5816			u64_stats_update_end(&ring->rx_syncp);
5817		}
5818		/* let the stack verify checksum errors */
5819		return;
5820	}
5821	/* It must be a TCP or UDP packet with a valid checksum */
5822	if (igb_test_staterr(rx_desc, E1000_RXD_STAT_TCPCS |
5823				      E1000_RXD_STAT_UDPCS))
5824		skb->ip_summed = CHECKSUM_UNNECESSARY;
5825
5826	dev_dbg(ring->dev, "cksum success: bits %08X\n",
5827		le32_to_cpu(rx_desc->wb.upper.status_error));
5828}
5829
5830static void igb_rx_hwtstamp(struct igb_q_vector *q_vector,
5831			    union e1000_adv_rx_desc *rx_desc,
5832			    struct sk_buff *skb)
5833{
5834	struct igb_adapter *adapter = q_vector->adapter;
5835	struct e1000_hw *hw = &adapter->hw;
5836	u64 regval;
5837
5838	if (!igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP |
5839				       E1000_RXDADV_STAT_TS))
5840		return;
5841
5842	/*
5843	 * If this bit is set, then the RX registers contain the time stamp. No
5844	 * other packet will be time stamped until we read these registers, so
5845	 * read the registers to make them available again. Because only one
5846	 * packet can be time stamped at a time, we know that the register
5847	 * values must belong to this one here and therefore we don't need to
5848	 * compare any of the additional attributes stored for it.
5849	 *
5850	 * If nothing went wrong, then it should have a shared tx_flags that we
5851	 * can turn into a skb_shared_hwtstamps.
5852	 */
5853	if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) {
5854		u32 *stamp = (u32 *)skb->data;
5855		regval = le32_to_cpu(*(stamp + 2));
5856		regval |= (u64)le32_to_cpu(*(stamp + 3)) << 32;
5857		skb_pull(skb, IGB_TS_HDR_LEN);
5858	} else {
5859		if(!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
5860			return;
5861
5862		regval = rd32(E1000_RXSTMPL);
5863		regval |= (u64)rd32(E1000_RXSTMPH) << 32;
5864	}
5865
5866	igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
5867}
5868static inline u16 igb_get_hlen(union e1000_adv_rx_desc *rx_desc)
5869{
5870	/* HW will not DMA in data larger than the given buffer, even if it
5871	 * parses the (NFS, of course) header to be larger.  In that case, it
5872	 * fills the header buffer and spills the rest into the page.
5873	 */
5874	u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
5875	           E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
5876	if (hlen > IGB_RX_HDR_LEN)
5877		hlen = IGB_RX_HDR_LEN;
5878	return hlen;
5879}
5880
5881static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, int budget)
5882{
5883	struct igb_ring *rx_ring = q_vector->rx.ring;
5884	union e1000_adv_rx_desc *rx_desc;
5885	const int current_node = numa_node_id();
5886	unsigned int total_bytes = 0, total_packets = 0;
5887	u16 cleaned_count = igb_desc_unused(rx_ring);
5888	u16 i = rx_ring->next_to_clean;
5889
5890	rx_desc = IGB_RX_DESC(rx_ring, i);
5891
5892	while (igb_test_staterr(rx_desc, E1000_RXD_STAT_DD)) {
5893		struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
5894		struct sk_buff *skb = buffer_info->skb;
5895		union e1000_adv_rx_desc *next_rxd;
5896
5897		buffer_info->skb = NULL;
5898		prefetch(skb->data);
5899
5900		i++;
5901		if (i == rx_ring->count)
5902			i = 0;
5903
5904		next_rxd = IGB_RX_DESC(rx_ring, i);
5905		prefetch(next_rxd);
5906
5907		/*
5908		 * This memory barrier is needed to keep us from reading
5909		 * any other fields out of the rx_desc until we know the
5910		 * RXD_STAT_DD bit is set
5911		 */
5912		rmb();
5913
5914		if (!skb_is_nonlinear(skb)) {
5915			__skb_put(skb, igb_get_hlen(rx_desc));
5916			dma_unmap_single(rx_ring->dev, buffer_info->dma,
5917					 IGB_RX_HDR_LEN,
5918					 DMA_FROM_DEVICE);
5919			buffer_info->dma = 0;
5920		}
5921
5922		if (rx_desc->wb.upper.length) {
5923			u16 length = le16_to_cpu(rx_desc->wb.upper.length);
5924
5925			skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
5926						buffer_info->page,
5927						buffer_info->page_offset,
5928						length);
5929
5930			skb->len += length;
5931			skb->data_len += length;
5932			skb->truesize += length;
5933
5934			if ((page_count(buffer_info->page) != 1) ||
5935			    (page_to_nid(buffer_info->page) != current_node))
5936				buffer_info->page = NULL;
5937			else
5938				get_page(buffer_info->page);
5939
5940			dma_unmap_page(rx_ring->dev, buffer_info->page_dma,
5941				       PAGE_SIZE / 2, DMA_FROM_DEVICE);
5942			buffer_info->page_dma = 0;
5943		}
5944
5945		if (!igb_test_staterr(rx_desc, E1000_RXD_STAT_EOP)) {
5946			struct igb_rx_buffer *next_buffer;
5947			next_buffer = &rx_ring->rx_buffer_info[i];
5948			buffer_info->skb = next_buffer->skb;
5949			buffer_info->dma = next_buffer->dma;
5950			next_buffer->skb = skb;
5951			next_buffer->dma = 0;
5952			goto next_desc;
5953		}
5954
5955		if (igb_test_staterr(rx_desc,
5956				     E1000_RXDEXT_ERR_FRAME_ERR_MASK)) {
5957			dev_kfree_skb_any(skb);
5958			goto next_desc;
5959		}
5960
5961		igb_rx_hwtstamp(q_vector, rx_desc, skb);
5962		igb_rx_checksum(rx_ring, rx_desc, skb);
5963
5964		if (igb_test_staterr(rx_desc, E1000_RXD_STAT_VP)) {
5965			u16 vid = le16_to_cpu(rx_desc->wb.upper.vlan);
5966
5967			__vlan_hwaccel_put_tag(skb, vid);
5968		}
5969
5970		total_bytes += skb->len;
5971		total_packets++;
5972
5973		skb->protocol = eth_type_trans(skb, rx_ring->netdev);
5974
5975		napi_gro_receive(&q_vector->napi, skb);
5976
5977		budget--;
5978next_desc:
5979		if (!budget)
5980			break;
5981
5982		cleaned_count++;
5983		/* return some buffers to hardware, one at a time is too slow */
5984		if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
5985			igb_alloc_rx_buffers(rx_ring, cleaned_count);
5986			cleaned_count = 0;
5987		}
5988
5989		/* use prefetched values */
5990		rx_desc = next_rxd;
5991	}
5992
5993	rx_ring->next_to_clean = i;
5994	u64_stats_update_begin(&rx_ring->rx_syncp);
5995	rx_ring->rx_stats.packets += total_packets;
5996	rx_ring->rx_stats.bytes += total_bytes;
5997	u64_stats_update_end(&rx_ring->rx_syncp);
5998	q_vector->rx.total_packets += total_packets;
5999	q_vector->rx.total_bytes += total_bytes;
6000
6001	if (cleaned_count)
6002		igb_alloc_rx_buffers(rx_ring, cleaned_count);
6003
6004	return !!budget;
6005}
6006
6007static bool igb_alloc_mapped_skb(struct igb_ring *rx_ring,
6008				 struct igb_rx_buffer *bi)
6009{
6010	struct sk_buff *skb = bi->skb;
6011	dma_addr_t dma = bi->dma;
6012
6013	if (dma)
6014		return true;
6015
6016	if (likely(!skb)) {
6017		skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
6018						IGB_RX_HDR_LEN);
6019		bi->skb = skb;
6020		if (!skb) {
6021			rx_ring->rx_stats.alloc_failed++;
6022			return false;
6023		}
6024
6025		/* initialize skb for ring */
6026		skb_record_rx_queue(skb, rx_ring->queue_index);
6027	}
6028
6029	dma = dma_map_single(rx_ring->dev, skb->data,
6030			     IGB_RX_HDR_LEN, DMA_FROM_DEVICE);
6031
6032	if (dma_mapping_error(rx_ring->dev, dma)) {
6033		rx_ring->rx_stats.alloc_failed++;
6034		return false;
6035	}
6036
6037	bi->dma = dma;
6038	return true;
6039}
6040
6041static bool igb_alloc_mapped_page(struct igb_ring *rx_ring,
6042				  struct igb_rx_buffer *bi)
6043{
6044	struct page *page = bi->page;
6045	dma_addr_t page_dma = bi->page_dma;
6046	unsigned int page_offset = bi->page_offset ^ (PAGE_SIZE / 2);
6047
6048	if (page_dma)
6049		return true;
6050
6051	if (!page) {
6052		page = netdev_alloc_page(rx_ring->netdev);
6053		bi->page = page;
6054		if (unlikely(!page)) {
6055			rx_ring->rx_stats.alloc_failed++;
6056			return false;
6057		}
6058	}
6059
6060	page_dma = dma_map_page(rx_ring->dev, page,
6061				page_offset, PAGE_SIZE / 2,
6062				DMA_FROM_DEVICE);
6063
6064	if (dma_mapping_error(rx_ring->dev, page_dma)) {
6065		rx_ring->rx_stats.alloc_failed++;
6066		return false;
6067	}
6068
6069	bi->page_dma = page_dma;
6070	bi->page_offset = page_offset;
6071	return true;
6072}
6073
6074/**
6075 * igb_alloc_rx_buffers - Replace used receive buffers; packet split
6076 * @adapter: address of board private structure
6077 **/
6078void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count)
6079{
6080	union e1000_adv_rx_desc *rx_desc;
6081	struct igb_rx_buffer *bi;
6082	u16 i = rx_ring->next_to_use;
6083
6084	rx_desc = IGB_RX_DESC(rx_ring, i);
6085	bi = &rx_ring->rx_buffer_info[i];
6086	i -= rx_ring->count;
6087
6088	while (cleaned_count--) {
6089		if (!igb_alloc_mapped_skb(rx_ring, bi))
6090			break;
6091
6092		/* Refresh the desc even if buffer_addrs didn't change
6093		 * because each write-back erases this info. */
6094		rx_desc->read.hdr_addr = cpu_to_le64(bi->dma);
6095
6096		if (!igb_alloc_mapped_page(rx_ring, bi))
6097			break;
6098
6099		rx_desc->read.pkt_addr = cpu_to_le64(bi->page_dma);
6100
6101		rx_desc++;
6102		bi++;
6103		i++;
6104		if (unlikely(!i)) {
6105			rx_desc = IGB_RX_DESC(rx_ring, 0);
6106			bi = rx_ring->rx_buffer_info;
6107			i -= rx_ring->count;
6108		}
6109
6110		/* clear the hdr_addr for the next_to_use descriptor */
6111		rx_desc->read.hdr_addr = 0;
6112	}
6113
6114	i += rx_ring->count;
6115
6116	if (rx_ring->next_to_use != i) {
6117		rx_ring->next_to_use = i;
6118
6119		/* Force memory writes to complete before letting h/w
6120		 * know there are new descriptors to fetch.  (Only
6121		 * applicable for weak-ordered memory model archs,
6122		 * such as IA-64). */
6123		wmb();
6124		writel(i, rx_ring->tail);
6125	}
6126}
6127
6128/**
6129 * igb_mii_ioctl -
6130 * @netdev:
6131 * @ifreq:
6132 * @cmd:
6133 **/
6134static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6135{
6136	struct igb_adapter *adapter = netdev_priv(netdev);
6137	struct mii_ioctl_data *data = if_mii(ifr);
6138
6139	if (adapter->hw.phy.media_type != e1000_media_type_copper)
6140		return -EOPNOTSUPP;
6141
6142	switch (cmd) {
6143	case SIOCGMIIPHY:
6144		data->phy_id = adapter->hw.phy.addr;
6145		break;
6146	case SIOCGMIIREG:
6147		if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
6148		                     &data->val_out))
6149			return -EIO;
6150		break;
6151	case SIOCSMIIREG:
6152	default:
6153		return -EOPNOTSUPP;
6154	}
6155	return 0;
6156}
6157
6158/**
6159 * igb_hwtstamp_ioctl - control hardware time stamping
6160 * @netdev:
6161 * @ifreq:
6162 * @cmd:
6163 *
6164 * Outgoing time stamping can be enabled and disabled. Play nice and
6165 * disable it when requested, although it shouldn't case any overhead
6166 * when no packet needs it. At most one packet in the queue may be
6167 * marked for time stamping, otherwise it would be impossible to tell
6168 * for sure to which packet the hardware time stamp belongs.
6169 *
6170 * Incoming time stamping has to be configured via the hardware
6171 * filters. Not all combinations are supported, in particular event
6172 * type has to be specified. Matching the kind of event packet is
6173 * not supported, with the exception of "all V2 events regardless of
6174 * level 2 or 4".
6175 *
6176 **/
6177static int igb_hwtstamp_ioctl(struct net_device *netdev,
6178			      struct ifreq *ifr, int cmd)
6179{
6180	struct igb_adapter *adapter = netdev_priv(netdev);
6181	struct e1000_hw *hw = &adapter->hw;
6182	struct hwtstamp_config config;
6183	u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
6184	u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6185	u32 tsync_rx_cfg = 0;
6186	bool is_l4 = false;
6187	bool is_l2 = false;
6188	u32 regval;
6189
6190	if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
6191		return -EFAULT;
6192
6193	/* reserved for future extensions */
6194	if (config.flags)
6195		return -EINVAL;
6196
6197	switch (config.tx_type) {
6198	case HWTSTAMP_TX_OFF:
6199		tsync_tx_ctl = 0;
6200	case HWTSTAMP_TX_ON:
6201		break;
6202	default:
6203		return -ERANGE;
6204	}
6205
6206	switch (config.rx_filter) {
6207	case HWTSTAMP_FILTER_NONE:
6208		tsync_rx_ctl = 0;
6209		break;
6210	case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
6211	case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
6212	case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
6213	case HWTSTAMP_FILTER_ALL:
6214		/*
6215		 * register TSYNCRXCFG must be set, therefore it is not
6216		 * possible to time stamp both Sync and Delay_Req messages
6217		 * => fall back to time stamping all packets
6218		 */
6219		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6220		config.rx_filter = HWTSTAMP_FILTER_ALL;
6221		break;
6222	case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
6223		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6224		tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
6225		is_l4 = true;
6226		break;
6227	case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
6228		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6229		tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
6230		is_l4 = true;
6231		break;
6232	case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
6233	case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
6234		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6235		tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
6236		is_l2 = true;
6237		is_l4 = true;
6238		config.rx_filter = HWTSTAMP_FILTER_SOME;
6239		break;
6240	case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
6241	case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
6242		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6243		tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
6244		is_l2 = true;
6245		is_l4 = true;
6246		config.rx_filter = HWTSTAMP_FILTER_SOME;
6247		break;
6248	case HWTSTAMP_FILTER_PTP_V2_EVENT:
6249	case HWTSTAMP_FILTER_PTP_V2_SYNC:
6250	case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
6251		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
6252		config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
6253		is_l2 = true;
6254		break;
6255	default:
6256		return -ERANGE;
6257	}
6258
6259	if (hw->mac.type == e1000_82575) {
6260		if (tsync_rx_ctl | tsync_tx_ctl)
6261			return -EINVAL;
6262		return 0;
6263	}
6264
6265	/*
6266	 * Per-packet timestamping only works if all packets are
6267	 * timestamped, so enable timestamping in all packets as
6268	 * long as one rx filter was configured.
6269	 */
6270	if ((hw->mac.type == e1000_82580) && tsync_rx_ctl) {
6271		tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6272		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6273	}
6274
6275	/* enable/disable TX */
6276	regval = rd32(E1000_TSYNCTXCTL);
6277	regval &= ~E1000_TSYNCTXCTL_ENABLED;
6278	regval |= tsync_tx_ctl;
6279	wr32(E1000_TSYNCTXCTL, regval);
6280
6281	/* enable/disable RX */
6282	regval = rd32(E1000_TSYNCRXCTL);
6283	regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
6284	regval |= tsync_rx_ctl;
6285	wr32(E1000_TSYNCRXCTL, regval);
6286
6287	/* define which PTP packets are time stamped */
6288	wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
6289
6290	/* define ethertype filter for timestamped packets */
6291	if (is_l2)
6292		wr32(E1000_ETQF(3),
6293		                (E1000_ETQF_FILTER_ENABLE | /* enable filter */
6294		                 E1000_ETQF_1588 | /* enable timestamping */
6295		                 ETH_P_1588));     /* 1588 eth protocol type */
6296	else
6297		wr32(E1000_ETQF(3), 0);
6298
6299#define PTP_PORT 319
6300	/* L4 Queue Filter[3]: filter by destination port and protocol */
6301	if (is_l4) {
6302		u32 ftqf = (IPPROTO_UDP /* UDP */
6303			| E1000_FTQF_VF_BP /* VF not compared */
6304			| E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
6305			| E1000_FTQF_MASK); /* mask all inputs */
6306		ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
6307
6308		wr32(E1000_IMIR(3), htons(PTP_PORT));
6309		wr32(E1000_IMIREXT(3),
6310		     (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
6311		if (hw->mac.type == e1000_82576) {
6312			/* enable source port check */
6313			wr32(E1000_SPQF(3), htons(PTP_PORT));
6314			ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
6315		}
6316		wr32(E1000_FTQF(3), ftqf);
6317	} else {
6318		wr32(E1000_FTQF(3), E1000_FTQF_MASK);
6319	}
6320	wrfl();
6321
6322	adapter->hwtstamp_config = config;
6323
6324	/* clear TX/RX time stamp registers, just to be sure */
6325	regval = rd32(E1000_TXSTMPH);
6326	regval = rd32(E1000_RXSTMPH);
6327
6328	return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
6329		-EFAULT : 0;
6330}
6331
6332/**
6333 * igb_ioctl -
6334 * @netdev:
6335 * @ifreq:
6336 * @cmd:
6337 **/
6338static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6339{
6340	switch (cmd) {
6341	case SIOCGMIIPHY:
6342	case SIOCGMIIREG:
6343	case SIOCSMIIREG:
6344		return igb_mii_ioctl(netdev, ifr, cmd);
6345	case SIOCSHWTSTAMP:
6346		return igb_hwtstamp_ioctl(netdev, ifr, cmd);
6347	default:
6348		return -EOPNOTSUPP;
6349	}
6350}
6351
6352s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6353{
6354	struct igb_adapter *adapter = hw->back;
6355	u16 cap_offset;
6356
6357	cap_offset = adapter->pdev->pcie_cap;
6358	if (!cap_offset)
6359		return -E1000_ERR_CONFIG;
6360
6361	pci_read_config_word(adapter->pdev, cap_offset + reg, value);
6362
6363	return 0;
6364}
6365
6366s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6367{
6368	struct igb_adapter *adapter = hw->back;
6369	u16 cap_offset;
6370
6371	cap_offset = adapter->pdev->pcie_cap;
6372	if (!cap_offset)
6373		return -E1000_ERR_CONFIG;
6374
6375	pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
6376
6377	return 0;
6378}
6379
6380static void igb_vlan_mode(struct net_device *netdev, u32 features)
6381{
6382	struct igb_adapter *adapter = netdev_priv(netdev);
6383	struct e1000_hw *hw = &adapter->hw;
6384	u32 ctrl, rctl;
6385	bool enable = !!(features & NETIF_F_HW_VLAN_RX);
6386
6387	if (enable) {
6388		/* enable VLAN tag insert/strip */
6389		ctrl = rd32(E1000_CTRL);
6390		ctrl |= E1000_CTRL_VME;
6391		wr32(E1000_CTRL, ctrl);
6392
6393		/* Disable CFI check */
6394		rctl = rd32(E1000_RCTL);
6395		rctl &= ~E1000_RCTL_CFIEN;
6396		wr32(E1000_RCTL, rctl);
6397	} else {
6398		/* disable VLAN tag insert/strip */
6399		ctrl = rd32(E1000_CTRL);
6400		ctrl &= ~E1000_CTRL_VME;
6401		wr32(E1000_CTRL, ctrl);
6402	}
6403
6404	igb_rlpml_set(adapter);
6405}
6406
6407static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
6408{
6409	struct igb_adapter *adapter = netdev_priv(netdev);
6410	struct e1000_hw *hw = &adapter->hw;
6411	int pf_id = adapter->vfs_allocated_count;
6412
6413	/* attempt to add filter to vlvf array */
6414	igb_vlvf_set(adapter, vid, true, pf_id);
6415
6416	/* add the filter since PF can receive vlans w/o entry in vlvf */
6417	igb_vfta_set(hw, vid, true);
6418
6419	set_bit(vid, adapter->active_vlans);
6420}
6421
6422static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
6423{
6424	struct igb_adapter *adapter = netdev_priv(netdev);
6425	struct e1000_hw *hw = &adapter->hw;
6426	int pf_id = adapter->vfs_allocated_count;
6427	s32 err;
6428
6429	/* remove vlan from VLVF table array */
6430	err = igb_vlvf_set(adapter, vid, false, pf_id);
6431
6432	/* if vid was not present in VLVF just remove it from table */
6433	if (err)
6434		igb_vfta_set(hw, vid, false);
6435
6436	clear_bit(vid, adapter->active_vlans);
6437}
6438
6439static void igb_restore_vlan(struct igb_adapter *adapter)
6440{
6441	u16 vid;
6442
6443	igb_vlan_mode(adapter->netdev, adapter->netdev->features);
6444
6445	for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID)
6446		igb_vlan_rx_add_vid(adapter->netdev, vid);
6447}
6448
6449int igb_set_spd_dplx(struct igb_adapter *adapter, u32 spd, u8 dplx)
6450{
6451	struct pci_dev *pdev = adapter->pdev;
6452	struct e1000_mac_info *mac = &adapter->hw.mac;
6453
6454	mac->autoneg = 0;
6455
6456	/* Make sure dplx is at most 1 bit and lsb of speed is not set
6457	 * for the switch() below to work */
6458	if ((spd & 1) || (dplx & ~1))
6459		goto err_inval;
6460
6461	/* Fiber NIC's only allow 1000 Gbps Full duplex */
6462	if ((adapter->hw.phy.media_type == e1000_media_type_internal_serdes) &&
6463	    spd != SPEED_1000 &&
6464	    dplx != DUPLEX_FULL)
6465		goto err_inval;
6466
6467	switch (spd + dplx) {
6468	case SPEED_10 + DUPLEX_HALF:
6469		mac->forced_speed_duplex = ADVERTISE_10_HALF;
6470		break;
6471	case SPEED_10 + DUPLEX_FULL:
6472		mac->forced_speed_duplex = ADVERTISE_10_FULL;
6473		break;
6474	case SPEED_100 + DUPLEX_HALF:
6475		mac->forced_speed_duplex = ADVERTISE_100_HALF;
6476		break;
6477	case SPEED_100 + DUPLEX_FULL:
6478		mac->forced_speed_duplex = ADVERTISE_100_FULL;
6479		break;
6480	case SPEED_1000 + DUPLEX_FULL:
6481		mac->autoneg = 1;
6482		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
6483		break;
6484	case SPEED_1000 + DUPLEX_HALF: /* not supported */
6485	default:
6486		goto err_inval;
6487	}
6488	return 0;
6489
6490err_inval:
6491	dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
6492	return -EINVAL;
6493}
6494
6495static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake)
6496{
6497	struct net_device *netdev = pci_get_drvdata(pdev);
6498	struct igb_adapter *adapter = netdev_priv(netdev);
6499	struct e1000_hw *hw = &adapter->hw;
6500	u32 ctrl, rctl, status;
6501	u32 wufc = adapter->wol;
6502#ifdef CONFIG_PM
6503	int retval = 0;
6504#endif
6505
6506	netif_device_detach(netdev);
6507
6508	if (netif_running(netdev))
6509		igb_close(netdev);
6510
6511	igb_clear_interrupt_scheme(adapter);
6512
6513#ifdef CONFIG_PM
6514	retval = pci_save_state(pdev);
6515	if (retval)
6516		return retval;
6517#endif
6518
6519	status = rd32(E1000_STATUS);
6520	if (status & E1000_STATUS_LU)
6521		wufc &= ~E1000_WUFC_LNKC;
6522
6523	if (wufc) {
6524		igb_setup_rctl(adapter);
6525		igb_set_rx_mode(netdev);
6526
6527		/* turn on all-multi mode if wake on multicast is enabled */
6528		if (wufc & E1000_WUFC_MC) {
6529			rctl = rd32(E1000_RCTL);
6530			rctl |= E1000_RCTL_MPE;
6531			wr32(E1000_RCTL, rctl);
6532		}
6533
6534		ctrl = rd32(E1000_CTRL);
6535		/* advertise wake from D3Cold */
6536		#define E1000_CTRL_ADVD3WUC 0x00100000
6537		/* phy power management enable */
6538		#define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
6539		ctrl |= E1000_CTRL_ADVD3WUC;
6540		wr32(E1000_CTRL, ctrl);
6541
6542		/* Allow time for pending master requests to run */
6543		igb_disable_pcie_master(hw);
6544
6545		wr32(E1000_WUC, E1000_WUC_PME_EN);
6546		wr32(E1000_WUFC, wufc);
6547	} else {
6548		wr32(E1000_WUC, 0);
6549		wr32(E1000_WUFC, 0);
6550	}
6551
6552	*enable_wake = wufc || adapter->en_mng_pt;
6553	if (!*enable_wake)
6554		igb_power_down_link(adapter);
6555	else
6556		igb_power_up_link(adapter);
6557
6558	/* Release control of h/w to f/w.  If f/w is AMT enabled, this
6559	 * would have already happened in close and is redundant. */
6560	igb_release_hw_control(adapter);
6561
6562	pci_disable_device(pdev);
6563
6564	return 0;
6565}
6566
6567#ifdef CONFIG_PM
6568static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
6569{
6570	int retval;
6571	bool wake;
6572
6573	retval = __igb_shutdown(pdev, &wake);
6574	if (retval)
6575		return retval;
6576
6577	if (wake) {
6578		pci_prepare_to_sleep(pdev);
6579	} else {
6580		pci_wake_from_d3(pdev, false);
6581		pci_set_power_state(pdev, PCI_D3hot);
6582	}
6583
6584	return 0;
6585}
6586
6587static int igb_resume(struct pci_dev *pdev)
6588{
6589	struct net_device *netdev = pci_get_drvdata(pdev);
6590	struct igb_adapter *adapter = netdev_priv(netdev);
6591	struct e1000_hw *hw = &adapter->hw;
6592	u32 err;
6593
6594	pci_set_power_state(pdev, PCI_D0);
6595	pci_restore_state(pdev);
6596	pci_save_state(pdev);
6597
6598	err = pci_enable_device_mem(pdev);
6599	if (err) {
6600		dev_err(&pdev->dev,
6601			"igb: Cannot enable PCI device from suspend\n");
6602		return err;
6603	}
6604	pci_set_master(pdev);
6605
6606	pci_enable_wake(pdev, PCI_D3hot, 0);
6607	pci_enable_wake(pdev, PCI_D3cold, 0);
6608
6609	if (igb_init_interrupt_scheme(adapter)) {
6610		dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
6611		return -ENOMEM;
6612	}
6613
6614	igb_reset(adapter);
6615
6616	/* let the f/w know that the h/w is now under the control of the
6617	 * driver. */
6618	igb_get_hw_control(adapter);
6619
6620	wr32(E1000_WUS, ~0);
6621
6622	if (netif_running(netdev)) {
6623		err = igb_open(netdev);
6624		if (err)
6625			return err;
6626	}
6627
6628	netif_device_attach(netdev);
6629
6630	return 0;
6631}
6632#endif
6633
6634static void igb_shutdown(struct pci_dev *pdev)
6635{
6636	bool wake;
6637
6638	__igb_shutdown(pdev, &wake);
6639
6640	if (system_state == SYSTEM_POWER_OFF) {
6641		pci_wake_from_d3(pdev, wake);
6642		pci_set_power_state(pdev, PCI_D3hot);
6643	}
6644}
6645
6646#ifdef CONFIG_NET_POLL_CONTROLLER
6647/*
6648 * Polling 'interrupt' - used by things like netconsole to send skbs
6649 * without having to re-enable interrupts. It's not called while
6650 * the interrupt routine is executing.
6651 */
6652static void igb_netpoll(struct net_device *netdev)
6653{
6654	struct igb_adapter *adapter = netdev_priv(netdev);
6655	struct e1000_hw *hw = &adapter->hw;
6656	int i;
6657
6658	if (!adapter->msix_entries) {
6659		struct igb_q_vector *q_vector = adapter->q_vector[0];
6660		igb_irq_disable(adapter);
6661		napi_schedule(&q_vector->napi);
6662		return;
6663	}
6664
6665	for (i = 0; i < adapter->num_q_vectors; i++) {
6666		struct igb_q_vector *q_vector = adapter->q_vector[i];
6667		wr32(E1000_EIMC, q_vector->eims_value);
6668		napi_schedule(&q_vector->napi);
6669	}
6670}
6671#endif /* CONFIG_NET_POLL_CONTROLLER */
6672
6673/**
6674 * igb_io_error_detected - called when PCI error is detected
6675 * @pdev: Pointer to PCI device
6676 * @state: The current pci connection state
6677 *
6678 * This function is called after a PCI bus error affecting
6679 * this device has been detected.
6680 */
6681static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
6682					      pci_channel_state_t state)
6683{
6684	struct net_device *netdev = pci_get_drvdata(pdev);
6685	struct igb_adapter *adapter = netdev_priv(netdev);
6686
6687	netif_device_detach(netdev);
6688
6689	if (state == pci_channel_io_perm_failure)
6690		return PCI_ERS_RESULT_DISCONNECT;
6691
6692	if (netif_running(netdev))
6693		igb_down(adapter);
6694	pci_disable_device(pdev);
6695
6696	/* Request a slot slot reset. */
6697	return PCI_ERS_RESULT_NEED_RESET;
6698}
6699
6700/**
6701 * igb_io_slot_reset - called after the pci bus has been reset.
6702 * @pdev: Pointer to PCI device
6703 *
6704 * Restart the card from scratch, as if from a cold-boot. Implementation
6705 * resembles the first-half of the igb_resume routine.
6706 */
6707static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
6708{
6709	struct net_device *netdev = pci_get_drvdata(pdev);
6710	struct igb_adapter *adapter = netdev_priv(netdev);
6711	struct e1000_hw *hw = &adapter->hw;
6712	pci_ers_result_t result;
6713	int err;
6714
6715	if (pci_enable_device_mem(pdev)) {
6716		dev_err(&pdev->dev,
6717			"Cannot re-enable PCI device after reset.\n");
6718		result = PCI_ERS_RESULT_DISCONNECT;
6719	} else {
6720		pci_set_master(pdev);
6721		pci_restore_state(pdev);
6722		pci_save_state(pdev);
6723
6724		pci_enable_wake(pdev, PCI_D3hot, 0);
6725		pci_enable_wake(pdev, PCI_D3cold, 0);
6726
6727		igb_reset(adapter);
6728		wr32(E1000_WUS, ~0);
6729		result = PCI_ERS_RESULT_RECOVERED;
6730	}
6731
6732	err = pci_cleanup_aer_uncorrect_error_status(pdev);
6733	if (err) {
6734		dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
6735		        "failed 0x%0x\n", err);
6736		/* non-fatal, continue */
6737	}
6738
6739	return result;
6740}
6741
6742/**
6743 * igb_io_resume - called when traffic can start flowing again.
6744 * @pdev: Pointer to PCI device
6745 *
6746 * This callback is called when the error recovery driver tells us that
6747 * its OK to resume normal operation. Implementation resembles the
6748 * second-half of the igb_resume routine.
6749 */
6750static void igb_io_resume(struct pci_dev *pdev)
6751{
6752	struct net_device *netdev = pci_get_drvdata(pdev);
6753	struct igb_adapter *adapter = netdev_priv(netdev);
6754
6755	if (netif_running(netdev)) {
6756		if (igb_up(adapter)) {
6757			dev_err(&pdev->dev, "igb_up failed after reset\n");
6758			return;
6759		}
6760	}
6761
6762	netif_device_attach(netdev);
6763
6764	/* let the f/w know that the h/w is now under the control of the
6765	 * driver. */
6766	igb_get_hw_control(adapter);
6767}
6768
6769static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
6770                             u8 qsel)
6771{
6772	u32 rar_low, rar_high;
6773	struct e1000_hw *hw = &adapter->hw;
6774
6775	/* HW expects these in little endian so we reverse the byte order
6776	 * from network order (big endian) to little endian
6777	 */
6778	rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
6779	          ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
6780	rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
6781
6782	/* Indicate to hardware the Address is Valid. */
6783	rar_high |= E1000_RAH_AV;
6784
6785	if (hw->mac.type == e1000_82575)
6786		rar_high |= E1000_RAH_POOL_1 * qsel;
6787	else
6788		rar_high |= E1000_RAH_POOL_1 << qsel;
6789
6790	wr32(E1000_RAL(index), rar_low);
6791	wrfl();
6792	wr32(E1000_RAH(index), rar_high);
6793	wrfl();
6794}
6795
6796static int igb_set_vf_mac(struct igb_adapter *adapter,
6797                          int vf, unsigned char *mac_addr)
6798{
6799	struct e1000_hw *hw = &adapter->hw;
6800	/* VF MAC addresses start at end of receive addresses and moves
6801	 * torwards the first, as a result a collision should not be possible */
6802	int rar_entry = hw->mac.rar_entry_count - (vf + 1);
6803
6804	memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
6805
6806	igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
6807
6808	return 0;
6809}
6810
6811static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
6812{
6813	struct igb_adapter *adapter = netdev_priv(netdev);
6814	if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
6815		return -EINVAL;
6816	adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
6817	dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
6818	dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
6819				      " change effective.");
6820	if (test_bit(__IGB_DOWN, &adapter->state)) {
6821		dev_warn(&adapter->pdev->dev, "The VF MAC address has been set,"
6822			 " but the PF device is not up.\n");
6823		dev_warn(&adapter->pdev->dev, "Bring the PF device up before"
6824			 " attempting to use the VF device.\n");
6825	}
6826	return igb_set_vf_mac(adapter, vf, mac);
6827}
6828
6829static int igb_link_mbps(int internal_link_speed)
6830{
6831	switch (internal_link_speed) {
6832	case SPEED_100:
6833		return 100;
6834	case SPEED_1000:
6835		return 1000;
6836	default:
6837		return 0;
6838	}
6839}
6840
6841static void igb_set_vf_rate_limit(struct e1000_hw *hw, int vf, int tx_rate,
6842				  int link_speed)
6843{
6844	int rf_dec, rf_int;
6845	u32 bcnrc_val;
6846
6847	if (tx_rate != 0) {
6848		/* Calculate the rate factor values to set */
6849		rf_int = link_speed / tx_rate;
6850		rf_dec = (link_speed - (rf_int * tx_rate));
6851		rf_dec = (rf_dec * (1<<E1000_RTTBCNRC_RF_INT_SHIFT)) / tx_rate;
6852
6853		bcnrc_val = E1000_RTTBCNRC_RS_ENA;
6854		bcnrc_val |= ((rf_int<<E1000_RTTBCNRC_RF_INT_SHIFT) &
6855		               E1000_RTTBCNRC_RF_INT_MASK);
6856		bcnrc_val |= (rf_dec & E1000_RTTBCNRC_RF_DEC_MASK);
6857	} else {
6858		bcnrc_val = 0;
6859	}
6860
6861	wr32(E1000_RTTDQSEL, vf); /* vf X uses queue X */
6862	wr32(E1000_RTTBCNRC, bcnrc_val);
6863}
6864
6865static void igb_check_vf_rate_limit(struct igb_adapter *adapter)
6866{
6867	int actual_link_speed, i;
6868	bool reset_rate = false;
6869
6870	/* VF TX rate limit was not set or not supported */
6871	if ((adapter->vf_rate_link_speed == 0) ||
6872	    (adapter->hw.mac.type != e1000_82576))
6873		return;
6874
6875	actual_link_speed = igb_link_mbps(adapter->link_speed);
6876	if (actual_link_speed != adapter->vf_rate_link_speed) {
6877		reset_rate = true;
6878		adapter->vf_rate_link_speed = 0;
6879		dev_info(&adapter->pdev->dev,
6880		         "Link speed has been changed. VF Transmit "
6881		         "rate is disabled\n");
6882	}
6883
6884	for (i = 0; i < adapter->vfs_allocated_count; i++) {
6885		if (reset_rate)
6886			adapter->vf_data[i].tx_rate = 0;
6887
6888		igb_set_vf_rate_limit(&adapter->hw, i,
6889		                      adapter->vf_data[i].tx_rate,
6890		                      actual_link_speed);
6891	}
6892}
6893
6894static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
6895{
6896	struct igb_adapter *adapter = netdev_priv(netdev);
6897	struct e1000_hw *hw = &adapter->hw;
6898	int actual_link_speed;
6899
6900	if (hw->mac.type != e1000_82576)
6901		return -EOPNOTSUPP;
6902
6903	actual_link_speed = igb_link_mbps(adapter->link_speed);
6904	if ((vf >= adapter->vfs_allocated_count) ||
6905	    (!(rd32(E1000_STATUS) & E1000_STATUS_LU)) ||
6906	    (tx_rate < 0) || (tx_rate > actual_link_speed))
6907		return -EINVAL;
6908
6909	adapter->vf_rate_link_speed = actual_link_speed;
6910	adapter->vf_data[vf].tx_rate = (u16)tx_rate;
6911	igb_set_vf_rate_limit(hw, vf, tx_rate, actual_link_speed);
6912
6913	return 0;
6914}
6915
6916static int igb_ndo_get_vf_config(struct net_device *netdev,
6917				 int vf, struct ifla_vf_info *ivi)
6918{
6919	struct igb_adapter *adapter = netdev_priv(netdev);
6920	if (vf >= adapter->vfs_allocated_count)
6921		return -EINVAL;
6922	ivi->vf = vf;
6923	memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
6924	ivi->tx_rate = adapter->vf_data[vf].tx_rate;
6925	ivi->vlan = adapter->vf_data[vf].pf_vlan;
6926	ivi->qos = adapter->vf_data[vf].pf_qos;
6927	return 0;
6928}
6929
6930static void igb_vmm_control(struct igb_adapter *adapter)
6931{
6932	struct e1000_hw *hw = &adapter->hw;
6933	u32 reg;
6934
6935	switch (hw->mac.type) {
6936	case e1000_82575:
6937	default:
6938		/* replication is not supported for 82575 */
6939		return;
6940	case e1000_82576:
6941		/* notify HW that the MAC is adding vlan tags */
6942		reg = rd32(E1000_DTXCTL);
6943		reg |= E1000_DTXCTL_VLAN_ADDED;
6944		wr32(E1000_DTXCTL, reg);
6945	case e1000_82580:
6946		/* enable replication vlan tag stripping */
6947		reg = rd32(E1000_RPLOLR);
6948		reg |= E1000_RPLOLR_STRVLAN;
6949		wr32(E1000_RPLOLR, reg);
6950	case e1000_i350:
6951		/* none of the above registers are supported by i350 */
6952		break;
6953	}
6954
6955	if (adapter->vfs_allocated_count) {
6956		igb_vmdq_set_loopback_pf(hw, true);
6957		igb_vmdq_set_replication_pf(hw, true);
6958		igb_vmdq_set_anti_spoofing_pf(hw, true,
6959						adapter->vfs_allocated_count);
6960	} else {
6961		igb_vmdq_set_loopback_pf(hw, false);
6962		igb_vmdq_set_replication_pf(hw, false);
6963	}
6964}
6965
6966/* igb_main.c */
6967