igb_main.c revision 01789349ee52e4a3faf376f1485303d9723c4f1f
1/*******************************************************************************
2
3  Intel(R) Gigabit Ethernet Linux driver
4  Copyright(c) 2007-2011 Intel Corporation.
5
6  This program is free software; you can redistribute it and/or modify it
7  under the terms and conditions of the GNU General Public License,
8  version 2, as published by the Free Software Foundation.
9
10  This program is distributed in the hope it will be useful, but WITHOUT
11  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13  more details.
14
15  You should have received a copy of the GNU General Public License along with
16  this program; if not, write to the Free Software Foundation, Inc.,
17  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18
19  The full GNU General Public License is included in this distribution in
20  the file called "COPYING".
21
22  Contact Information:
23  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
25
26*******************************************************************************/
27
28#include <linux/module.h>
29#include <linux/types.h>
30#include <linux/init.h>
31#include <linux/bitops.h>
32#include <linux/vmalloc.h>
33#include <linux/pagemap.h>
34#include <linux/netdevice.h>
35#include <linux/ipv6.h>
36#include <linux/slab.h>
37#include <net/checksum.h>
38#include <net/ip6_checksum.h>
39#include <linux/net_tstamp.h>
40#include <linux/mii.h>
41#include <linux/ethtool.h>
42#include <linux/if.h>
43#include <linux/if_vlan.h>
44#include <linux/pci.h>
45#include <linux/pci-aspm.h>
46#include <linux/delay.h>
47#include <linux/interrupt.h>
48#include <linux/if_ether.h>
49#include <linux/aer.h>
50#include <linux/prefetch.h>
51#ifdef CONFIG_IGB_DCA
52#include <linux/dca.h>
53#endif
54#include "igb.h"
55
56#define MAJ 3
57#define MIN 0
58#define BUILD 6
59#define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \
60__stringify(BUILD) "-k"
61char igb_driver_name[] = "igb";
62char igb_driver_version[] = DRV_VERSION;
63static const char igb_driver_string[] =
64				"Intel(R) Gigabit Ethernet Network Driver";
65static const char igb_copyright[] = "Copyright (c) 2007-2011 Intel Corporation.";
66
67static const struct e1000_info *igb_info_tbl[] = {
68	[board_82575] = &e1000_82575_info,
69};
70
71static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
72	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER), board_82575 },
73	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER), board_82575 },
74	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES), board_82575 },
75	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII), board_82575 },
76	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
77	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
78	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_QUAD_FIBER), board_82575 },
79	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
80	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
81	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
82	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SGMII), board_82575 },
83	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SERDES), board_82575 },
84	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_BACKPLANE), board_82575 },
85	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SFP), board_82575 },
86	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
87	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
88	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
89	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
90	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
91	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
92	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2), board_82575 },
93	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
94	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
95	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
96	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
97	/* required last entry */
98	{0, }
99};
100
101MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
102
103void igb_reset(struct igb_adapter *);
104static int igb_setup_all_tx_resources(struct igb_adapter *);
105static int igb_setup_all_rx_resources(struct igb_adapter *);
106static void igb_free_all_tx_resources(struct igb_adapter *);
107static void igb_free_all_rx_resources(struct igb_adapter *);
108static void igb_setup_mrqc(struct igb_adapter *);
109static int igb_probe(struct pci_dev *, const struct pci_device_id *);
110static void __devexit igb_remove(struct pci_dev *pdev);
111static void igb_init_hw_timer(struct igb_adapter *adapter);
112static int igb_sw_init(struct igb_adapter *);
113static int igb_open(struct net_device *);
114static int igb_close(struct net_device *);
115static void igb_configure_tx(struct igb_adapter *);
116static void igb_configure_rx(struct igb_adapter *);
117static void igb_clean_all_tx_rings(struct igb_adapter *);
118static void igb_clean_all_rx_rings(struct igb_adapter *);
119static void igb_clean_tx_ring(struct igb_ring *);
120static void igb_clean_rx_ring(struct igb_ring *);
121static void igb_set_rx_mode(struct net_device *);
122static void igb_update_phy_info(unsigned long);
123static void igb_watchdog(unsigned long);
124static void igb_watchdog_task(struct work_struct *);
125static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb, struct net_device *);
126static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *dev,
127						 struct rtnl_link_stats64 *stats);
128static int igb_change_mtu(struct net_device *, int);
129static int igb_set_mac(struct net_device *, void *);
130static void igb_set_uta(struct igb_adapter *adapter);
131static irqreturn_t igb_intr(int irq, void *);
132static irqreturn_t igb_intr_msi(int irq, void *);
133static irqreturn_t igb_msix_other(int irq, void *);
134static irqreturn_t igb_msix_ring(int irq, void *);
135#ifdef CONFIG_IGB_DCA
136static void igb_update_dca(struct igb_q_vector *);
137static void igb_setup_dca(struct igb_adapter *);
138#endif /* CONFIG_IGB_DCA */
139static bool igb_clean_tx_irq(struct igb_q_vector *);
140static int igb_poll(struct napi_struct *, int);
141static bool igb_clean_rx_irq_adv(struct igb_q_vector *, int *, int);
142static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
143static void igb_tx_timeout(struct net_device *);
144static void igb_reset_task(struct work_struct *);
145static void igb_vlan_mode(struct net_device *netdev, u32 features);
146static void igb_vlan_rx_add_vid(struct net_device *, u16);
147static void igb_vlan_rx_kill_vid(struct net_device *, u16);
148static void igb_restore_vlan(struct igb_adapter *);
149static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
150static void igb_ping_all_vfs(struct igb_adapter *);
151static void igb_msg_task(struct igb_adapter *);
152static void igb_vmm_control(struct igb_adapter *);
153static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
154static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
155static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
156static int igb_ndo_set_vf_vlan(struct net_device *netdev,
157			       int vf, u16 vlan, u8 qos);
158static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
159static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
160				 struct ifla_vf_info *ivi);
161static void igb_check_vf_rate_limit(struct igb_adapter *);
162
163#ifdef CONFIG_PM
164static int igb_suspend(struct pci_dev *, pm_message_t);
165static int igb_resume(struct pci_dev *);
166#endif
167static void igb_shutdown(struct pci_dev *);
168#ifdef CONFIG_IGB_DCA
169static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
170static struct notifier_block dca_notifier = {
171	.notifier_call	= igb_notify_dca,
172	.next		= NULL,
173	.priority	= 0
174};
175#endif
176#ifdef CONFIG_NET_POLL_CONTROLLER
177/* for netdump / net console */
178static void igb_netpoll(struct net_device *);
179#endif
180#ifdef CONFIG_PCI_IOV
181static unsigned int max_vfs = 0;
182module_param(max_vfs, uint, 0);
183MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
184                 "per physical function");
185#endif /* CONFIG_PCI_IOV */
186
187static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
188		     pci_channel_state_t);
189static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
190static void igb_io_resume(struct pci_dev *);
191
192static struct pci_error_handlers igb_err_handler = {
193	.error_detected = igb_io_error_detected,
194	.slot_reset = igb_io_slot_reset,
195	.resume = igb_io_resume,
196};
197
198
199static struct pci_driver igb_driver = {
200	.name     = igb_driver_name,
201	.id_table = igb_pci_tbl,
202	.probe    = igb_probe,
203	.remove   = __devexit_p(igb_remove),
204#ifdef CONFIG_PM
205	/* Power Management Hooks */
206	.suspend  = igb_suspend,
207	.resume   = igb_resume,
208#endif
209	.shutdown = igb_shutdown,
210	.err_handler = &igb_err_handler
211};
212
213MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
214MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
215MODULE_LICENSE("GPL");
216MODULE_VERSION(DRV_VERSION);
217
218struct igb_reg_info {
219	u32 ofs;
220	char *name;
221};
222
223static const struct igb_reg_info igb_reg_info_tbl[] = {
224
225	/* General Registers */
226	{E1000_CTRL, "CTRL"},
227	{E1000_STATUS, "STATUS"},
228	{E1000_CTRL_EXT, "CTRL_EXT"},
229
230	/* Interrupt Registers */
231	{E1000_ICR, "ICR"},
232
233	/* RX Registers */
234	{E1000_RCTL, "RCTL"},
235	{E1000_RDLEN(0), "RDLEN"},
236	{E1000_RDH(0), "RDH"},
237	{E1000_RDT(0), "RDT"},
238	{E1000_RXDCTL(0), "RXDCTL"},
239	{E1000_RDBAL(0), "RDBAL"},
240	{E1000_RDBAH(0), "RDBAH"},
241
242	/* TX Registers */
243	{E1000_TCTL, "TCTL"},
244	{E1000_TDBAL(0), "TDBAL"},
245	{E1000_TDBAH(0), "TDBAH"},
246	{E1000_TDLEN(0), "TDLEN"},
247	{E1000_TDH(0), "TDH"},
248	{E1000_TDT(0), "TDT"},
249	{E1000_TXDCTL(0), "TXDCTL"},
250	{E1000_TDFH, "TDFH"},
251	{E1000_TDFT, "TDFT"},
252	{E1000_TDFHS, "TDFHS"},
253	{E1000_TDFPC, "TDFPC"},
254
255	/* List Terminator */
256	{}
257};
258
259/*
260 * igb_regdump - register printout routine
261 */
262static void igb_regdump(struct e1000_hw *hw, struct igb_reg_info *reginfo)
263{
264	int n = 0;
265	char rname[16];
266	u32 regs[8];
267
268	switch (reginfo->ofs) {
269	case E1000_RDLEN(0):
270		for (n = 0; n < 4; n++)
271			regs[n] = rd32(E1000_RDLEN(n));
272		break;
273	case E1000_RDH(0):
274		for (n = 0; n < 4; n++)
275			regs[n] = rd32(E1000_RDH(n));
276		break;
277	case E1000_RDT(0):
278		for (n = 0; n < 4; n++)
279			regs[n] = rd32(E1000_RDT(n));
280		break;
281	case E1000_RXDCTL(0):
282		for (n = 0; n < 4; n++)
283			regs[n] = rd32(E1000_RXDCTL(n));
284		break;
285	case E1000_RDBAL(0):
286		for (n = 0; n < 4; n++)
287			regs[n] = rd32(E1000_RDBAL(n));
288		break;
289	case E1000_RDBAH(0):
290		for (n = 0; n < 4; n++)
291			regs[n] = rd32(E1000_RDBAH(n));
292		break;
293	case E1000_TDBAL(0):
294		for (n = 0; n < 4; n++)
295			regs[n] = rd32(E1000_RDBAL(n));
296		break;
297	case E1000_TDBAH(0):
298		for (n = 0; n < 4; n++)
299			regs[n] = rd32(E1000_TDBAH(n));
300		break;
301	case E1000_TDLEN(0):
302		for (n = 0; n < 4; n++)
303			regs[n] = rd32(E1000_TDLEN(n));
304		break;
305	case E1000_TDH(0):
306		for (n = 0; n < 4; n++)
307			regs[n] = rd32(E1000_TDH(n));
308		break;
309	case E1000_TDT(0):
310		for (n = 0; n < 4; n++)
311			regs[n] = rd32(E1000_TDT(n));
312		break;
313	case E1000_TXDCTL(0):
314		for (n = 0; n < 4; n++)
315			regs[n] = rd32(E1000_TXDCTL(n));
316		break;
317	default:
318		printk(KERN_INFO "%-15s %08x\n",
319			reginfo->name, rd32(reginfo->ofs));
320		return;
321	}
322
323	snprintf(rname, 16, "%s%s", reginfo->name, "[0-3]");
324	printk(KERN_INFO "%-15s ", rname);
325	for (n = 0; n < 4; n++)
326		printk(KERN_CONT "%08x ", regs[n]);
327	printk(KERN_CONT "\n");
328}
329
330/*
331 * igb_dump - Print registers, tx-rings and rx-rings
332 */
333static void igb_dump(struct igb_adapter *adapter)
334{
335	struct net_device *netdev = adapter->netdev;
336	struct e1000_hw *hw = &adapter->hw;
337	struct igb_reg_info *reginfo;
338	int n = 0;
339	struct igb_ring *tx_ring;
340	union e1000_adv_tx_desc *tx_desc;
341	struct my_u0 { u64 a; u64 b; } *u0;
342	struct igb_buffer *buffer_info;
343	struct igb_ring *rx_ring;
344	union e1000_adv_rx_desc *rx_desc;
345	u32 staterr;
346	int i = 0;
347
348	if (!netif_msg_hw(adapter))
349		return;
350
351	/* Print netdevice Info */
352	if (netdev) {
353		dev_info(&adapter->pdev->dev, "Net device Info\n");
354		printk(KERN_INFO "Device Name     state            "
355			"trans_start      last_rx\n");
356		printk(KERN_INFO "%-15s %016lX %016lX %016lX\n",
357		netdev->name,
358		netdev->state,
359		netdev->trans_start,
360		netdev->last_rx);
361	}
362
363	/* Print Registers */
364	dev_info(&adapter->pdev->dev, "Register Dump\n");
365	printk(KERN_INFO " Register Name   Value\n");
366	for (reginfo = (struct igb_reg_info *)igb_reg_info_tbl;
367	     reginfo->name; reginfo++) {
368		igb_regdump(hw, reginfo);
369	}
370
371	/* Print TX Ring Summary */
372	if (!netdev || !netif_running(netdev))
373		goto exit;
374
375	dev_info(&adapter->pdev->dev, "TX Rings Summary\n");
376	printk(KERN_INFO "Queue [NTU] [NTC] [bi(ntc)->dma  ]"
377		" leng ntw timestamp\n");
378	for (n = 0; n < adapter->num_tx_queues; n++) {
379		tx_ring = adapter->tx_ring[n];
380		buffer_info = &tx_ring->buffer_info[tx_ring->next_to_clean];
381		printk(KERN_INFO " %5d %5X %5X %016llX %04X %3X %016llX\n",
382			   n, tx_ring->next_to_use, tx_ring->next_to_clean,
383			   (u64)buffer_info->dma,
384			   buffer_info->length,
385			   buffer_info->next_to_watch,
386			   (u64)buffer_info->time_stamp);
387	}
388
389	/* Print TX Rings */
390	if (!netif_msg_tx_done(adapter))
391		goto rx_ring_summary;
392
393	dev_info(&adapter->pdev->dev, "TX Rings Dump\n");
394
395	/* Transmit Descriptor Formats
396	 *
397	 * Advanced Transmit Descriptor
398	 *   +--------------------------------------------------------------+
399	 * 0 |         Buffer Address [63:0]                                |
400	 *   +--------------------------------------------------------------+
401	 * 8 | PAYLEN  | PORTS  |CC|IDX | STA | DCMD  |DTYP|MAC|RSV| DTALEN |
402	 *   +--------------------------------------------------------------+
403	 *   63      46 45    40 39 38 36 35 32 31   24             15       0
404	 */
405
406	for (n = 0; n < adapter->num_tx_queues; n++) {
407		tx_ring = adapter->tx_ring[n];
408		printk(KERN_INFO "------------------------------------\n");
409		printk(KERN_INFO "TX QUEUE INDEX = %d\n", tx_ring->queue_index);
410		printk(KERN_INFO "------------------------------------\n");
411		printk(KERN_INFO "T [desc]     [address 63:0  ] "
412			"[PlPOCIStDDM Ln] [bi->dma       ] "
413			"leng  ntw timestamp        bi->skb\n");
414
415		for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) {
416			tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
417			buffer_info = &tx_ring->buffer_info[i];
418			u0 = (struct my_u0 *)tx_desc;
419			printk(KERN_INFO "T [0x%03X]    %016llX %016llX %016llX"
420				" %04X  %3X %016llX %p", i,
421				le64_to_cpu(u0->a),
422				le64_to_cpu(u0->b),
423				(u64)buffer_info->dma,
424				buffer_info->length,
425				buffer_info->next_to_watch,
426				(u64)buffer_info->time_stamp,
427				buffer_info->skb);
428			if (i == tx_ring->next_to_use &&
429				i == tx_ring->next_to_clean)
430				printk(KERN_CONT " NTC/U\n");
431			else if (i == tx_ring->next_to_use)
432				printk(KERN_CONT " NTU\n");
433			else if (i == tx_ring->next_to_clean)
434				printk(KERN_CONT " NTC\n");
435			else
436				printk(KERN_CONT "\n");
437
438			if (netif_msg_pktdata(adapter) && buffer_info->dma != 0)
439				print_hex_dump(KERN_INFO, "",
440					DUMP_PREFIX_ADDRESS,
441					16, 1, phys_to_virt(buffer_info->dma),
442					buffer_info->length, true);
443		}
444	}
445
446	/* Print RX Rings Summary */
447rx_ring_summary:
448	dev_info(&adapter->pdev->dev, "RX Rings Summary\n");
449	printk(KERN_INFO "Queue [NTU] [NTC]\n");
450	for (n = 0; n < adapter->num_rx_queues; n++) {
451		rx_ring = adapter->rx_ring[n];
452		printk(KERN_INFO " %5d %5X %5X\n", n,
453			   rx_ring->next_to_use, rx_ring->next_to_clean);
454	}
455
456	/* Print RX Rings */
457	if (!netif_msg_rx_status(adapter))
458		goto exit;
459
460	dev_info(&adapter->pdev->dev, "RX Rings Dump\n");
461
462	/* Advanced Receive Descriptor (Read) Format
463	 *    63                                           1        0
464	 *    +-----------------------------------------------------+
465	 *  0 |       Packet Buffer Address [63:1]           |A0/NSE|
466	 *    +----------------------------------------------+------+
467	 *  8 |       Header Buffer Address [63:1]           |  DD  |
468	 *    +-----------------------------------------------------+
469	 *
470	 *
471	 * Advanced Receive Descriptor (Write-Back) Format
472	 *
473	 *   63       48 47    32 31  30      21 20 17 16   4 3     0
474	 *   +------------------------------------------------------+
475	 * 0 | Packet     IP     |SPH| HDR_LEN   | RSV|Packet|  RSS |
476	 *   | Checksum   Ident  |   |           |    | Type | Type |
477	 *   +------------------------------------------------------+
478	 * 8 | VLAN Tag | Length | Extended Error | Extended Status |
479	 *   +------------------------------------------------------+
480	 *   63       48 47    32 31            20 19               0
481	 */
482
483	for (n = 0; n < adapter->num_rx_queues; n++) {
484		rx_ring = adapter->rx_ring[n];
485		printk(KERN_INFO "------------------------------------\n");
486		printk(KERN_INFO "RX QUEUE INDEX = %d\n", rx_ring->queue_index);
487		printk(KERN_INFO "------------------------------------\n");
488		printk(KERN_INFO "R  [desc]      [ PktBuf     A0] "
489			"[  HeadBuf   DD] [bi->dma       ] [bi->skb] "
490			"<-- Adv Rx Read format\n");
491		printk(KERN_INFO "RWB[desc]      [PcsmIpSHl PtRs] "
492			"[vl er S cks ln] ---------------- [bi->skb] "
493			"<-- Adv Rx Write-Back format\n");
494
495		for (i = 0; i < rx_ring->count; i++) {
496			buffer_info = &rx_ring->buffer_info[i];
497			rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
498			u0 = (struct my_u0 *)rx_desc;
499			staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
500			if (staterr & E1000_RXD_STAT_DD) {
501				/* Descriptor Done */
502				printk(KERN_INFO "RWB[0x%03X]     %016llX "
503					"%016llX ---------------- %p", i,
504					le64_to_cpu(u0->a),
505					le64_to_cpu(u0->b),
506					buffer_info->skb);
507			} else {
508				printk(KERN_INFO "R  [0x%03X]     %016llX "
509					"%016llX %016llX %p", i,
510					le64_to_cpu(u0->a),
511					le64_to_cpu(u0->b),
512					(u64)buffer_info->dma,
513					buffer_info->skb);
514
515				if (netif_msg_pktdata(adapter)) {
516					print_hex_dump(KERN_INFO, "",
517						DUMP_PREFIX_ADDRESS,
518						16, 1,
519						phys_to_virt(buffer_info->dma),
520						rx_ring->rx_buffer_len, true);
521					if (rx_ring->rx_buffer_len
522						< IGB_RXBUFFER_1024)
523						print_hex_dump(KERN_INFO, "",
524						  DUMP_PREFIX_ADDRESS,
525						  16, 1,
526						  phys_to_virt(
527						    buffer_info->page_dma +
528						    buffer_info->page_offset),
529						  PAGE_SIZE/2, true);
530				}
531			}
532
533			if (i == rx_ring->next_to_use)
534				printk(KERN_CONT " NTU\n");
535			else if (i == rx_ring->next_to_clean)
536				printk(KERN_CONT " NTC\n");
537			else
538				printk(KERN_CONT "\n");
539
540		}
541	}
542
543exit:
544	return;
545}
546
547
548/**
549 * igb_read_clock - read raw cycle counter (to be used by time counter)
550 */
551static cycle_t igb_read_clock(const struct cyclecounter *tc)
552{
553	struct igb_adapter *adapter =
554		container_of(tc, struct igb_adapter, cycles);
555	struct e1000_hw *hw = &adapter->hw;
556	u64 stamp = 0;
557	int shift = 0;
558
559	/*
560	 * The timestamp latches on lowest register read. For the 82580
561	 * the lowest register is SYSTIMR instead of SYSTIML.  However we never
562	 * adjusted TIMINCA so SYSTIMR will just read as all 0s so ignore it.
563	 */
564	if (hw->mac.type == e1000_82580) {
565		stamp = rd32(E1000_SYSTIMR) >> 8;
566		shift = IGB_82580_TSYNC_SHIFT;
567	}
568
569	stamp |= (u64)rd32(E1000_SYSTIML) << shift;
570	stamp |= (u64)rd32(E1000_SYSTIMH) << (shift + 32);
571	return stamp;
572}
573
574/**
575 * igb_get_hw_dev - return device
576 * used by hardware layer to print debugging information
577 **/
578struct net_device *igb_get_hw_dev(struct e1000_hw *hw)
579{
580	struct igb_adapter *adapter = hw->back;
581	return adapter->netdev;
582}
583
584/**
585 * igb_init_module - Driver Registration Routine
586 *
587 * igb_init_module is the first routine called when the driver is
588 * loaded. All it does is register with the PCI subsystem.
589 **/
590static int __init igb_init_module(void)
591{
592	int ret;
593	printk(KERN_INFO "%s - version %s\n",
594	       igb_driver_string, igb_driver_version);
595
596	printk(KERN_INFO "%s\n", igb_copyright);
597
598#ifdef CONFIG_IGB_DCA
599	dca_register_notify(&dca_notifier);
600#endif
601	ret = pci_register_driver(&igb_driver);
602	return ret;
603}
604
605module_init(igb_init_module);
606
607/**
608 * igb_exit_module - Driver Exit Cleanup Routine
609 *
610 * igb_exit_module is called just before the driver is removed
611 * from memory.
612 **/
613static void __exit igb_exit_module(void)
614{
615#ifdef CONFIG_IGB_DCA
616	dca_unregister_notify(&dca_notifier);
617#endif
618	pci_unregister_driver(&igb_driver);
619}
620
621module_exit(igb_exit_module);
622
623#define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
624/**
625 * igb_cache_ring_register - Descriptor ring to register mapping
626 * @adapter: board private structure to initialize
627 *
628 * Once we know the feature-set enabled for the device, we'll cache
629 * the register offset the descriptor ring is assigned to.
630 **/
631static void igb_cache_ring_register(struct igb_adapter *adapter)
632{
633	int i = 0, j = 0;
634	u32 rbase_offset = adapter->vfs_allocated_count;
635
636	switch (adapter->hw.mac.type) {
637	case e1000_82576:
638		/* The queues are allocated for virtualization such that VF 0
639		 * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
640		 * In order to avoid collision we start at the first free queue
641		 * and continue consuming queues in the same sequence
642		 */
643		if (adapter->vfs_allocated_count) {
644			for (; i < adapter->rss_queues; i++)
645				adapter->rx_ring[i]->reg_idx = rbase_offset +
646				                               Q_IDX_82576(i);
647		}
648	case e1000_82575:
649	case e1000_82580:
650	case e1000_i350:
651	default:
652		for (; i < adapter->num_rx_queues; i++)
653			adapter->rx_ring[i]->reg_idx = rbase_offset + i;
654		for (; j < adapter->num_tx_queues; j++)
655			adapter->tx_ring[j]->reg_idx = rbase_offset + j;
656		break;
657	}
658}
659
660static void igb_free_queues(struct igb_adapter *adapter)
661{
662	int i;
663
664	for (i = 0; i < adapter->num_tx_queues; i++) {
665		kfree(adapter->tx_ring[i]);
666		adapter->tx_ring[i] = NULL;
667	}
668	for (i = 0; i < adapter->num_rx_queues; i++) {
669		kfree(adapter->rx_ring[i]);
670		adapter->rx_ring[i] = NULL;
671	}
672	adapter->num_rx_queues = 0;
673	adapter->num_tx_queues = 0;
674}
675
676/**
677 * igb_alloc_queues - Allocate memory for all rings
678 * @adapter: board private structure to initialize
679 *
680 * We allocate one ring per queue at run-time since we don't know the
681 * number of queues at compile-time.
682 **/
683static int igb_alloc_queues(struct igb_adapter *adapter)
684{
685	struct igb_ring *ring;
686	int i;
687
688	for (i = 0; i < adapter->num_tx_queues; i++) {
689		ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
690		if (!ring)
691			goto err;
692		ring->count = adapter->tx_ring_count;
693		ring->queue_index = i;
694		ring->dev = &adapter->pdev->dev;
695		ring->netdev = adapter->netdev;
696		/* For 82575, context index must be unique per ring. */
697		if (adapter->hw.mac.type == e1000_82575)
698			ring->flags = IGB_RING_FLAG_TX_CTX_IDX;
699		adapter->tx_ring[i] = ring;
700	}
701
702	for (i = 0; i < adapter->num_rx_queues; i++) {
703		ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
704		if (!ring)
705			goto err;
706		ring->count = adapter->rx_ring_count;
707		ring->queue_index = i;
708		ring->dev = &adapter->pdev->dev;
709		ring->netdev = adapter->netdev;
710		ring->rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
711		ring->flags = IGB_RING_FLAG_RX_CSUM; /* enable rx checksum */
712		/* set flag indicating ring supports SCTP checksum offload */
713		if (adapter->hw.mac.type >= e1000_82576)
714			ring->flags |= IGB_RING_FLAG_RX_SCTP_CSUM;
715		adapter->rx_ring[i] = ring;
716	}
717
718	igb_cache_ring_register(adapter);
719
720	return 0;
721
722err:
723	igb_free_queues(adapter);
724
725	return -ENOMEM;
726}
727
728#define IGB_N0_QUEUE -1
729static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
730{
731	u32 msixbm = 0;
732	struct igb_adapter *adapter = q_vector->adapter;
733	struct e1000_hw *hw = &adapter->hw;
734	u32 ivar, index;
735	int rx_queue = IGB_N0_QUEUE;
736	int tx_queue = IGB_N0_QUEUE;
737
738	if (q_vector->rx_ring)
739		rx_queue = q_vector->rx_ring->reg_idx;
740	if (q_vector->tx_ring)
741		tx_queue = q_vector->tx_ring->reg_idx;
742
743	switch (hw->mac.type) {
744	case e1000_82575:
745		/* The 82575 assigns vectors using a bitmask, which matches the
746		   bitmask for the EICR/EIMS/EIMC registers.  To assign one
747		   or more queues to a vector, we write the appropriate bits
748		   into the MSIXBM register for that vector. */
749		if (rx_queue > IGB_N0_QUEUE)
750			msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
751		if (tx_queue > IGB_N0_QUEUE)
752			msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
753		if (!adapter->msix_entries && msix_vector == 0)
754			msixbm |= E1000_EIMS_OTHER;
755		array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
756		q_vector->eims_value = msixbm;
757		break;
758	case e1000_82576:
759		/* 82576 uses a table-based method for assigning vectors.
760		   Each queue has a single entry in the table to which we write
761		   a vector number along with a "valid" bit.  Sadly, the layout
762		   of the table is somewhat counterintuitive. */
763		if (rx_queue > IGB_N0_QUEUE) {
764			index = (rx_queue & 0x7);
765			ivar = array_rd32(E1000_IVAR0, index);
766			if (rx_queue < 8) {
767				/* vector goes into low byte of register */
768				ivar = ivar & 0xFFFFFF00;
769				ivar |= msix_vector | E1000_IVAR_VALID;
770			} else {
771				/* vector goes into third byte of register */
772				ivar = ivar & 0xFF00FFFF;
773				ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
774			}
775			array_wr32(E1000_IVAR0, index, ivar);
776		}
777		if (tx_queue > IGB_N0_QUEUE) {
778			index = (tx_queue & 0x7);
779			ivar = array_rd32(E1000_IVAR0, index);
780			if (tx_queue < 8) {
781				/* vector goes into second byte of register */
782				ivar = ivar & 0xFFFF00FF;
783				ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
784			} else {
785				/* vector goes into high byte of register */
786				ivar = ivar & 0x00FFFFFF;
787				ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
788			}
789			array_wr32(E1000_IVAR0, index, ivar);
790		}
791		q_vector->eims_value = 1 << msix_vector;
792		break;
793	case e1000_82580:
794	case e1000_i350:
795		/* 82580 uses the same table-based approach as 82576 but has fewer
796		   entries as a result we carry over for queues greater than 4. */
797		if (rx_queue > IGB_N0_QUEUE) {
798			index = (rx_queue >> 1);
799			ivar = array_rd32(E1000_IVAR0, index);
800			if (rx_queue & 0x1) {
801				/* vector goes into third byte of register */
802				ivar = ivar & 0xFF00FFFF;
803				ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
804			} else {
805				/* vector goes into low byte of register */
806				ivar = ivar & 0xFFFFFF00;
807				ivar |= msix_vector | E1000_IVAR_VALID;
808			}
809			array_wr32(E1000_IVAR0, index, ivar);
810		}
811		if (tx_queue > IGB_N0_QUEUE) {
812			index = (tx_queue >> 1);
813			ivar = array_rd32(E1000_IVAR0, index);
814			if (tx_queue & 0x1) {
815				/* vector goes into high byte of register */
816				ivar = ivar & 0x00FFFFFF;
817				ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
818			} else {
819				/* vector goes into second byte of register */
820				ivar = ivar & 0xFFFF00FF;
821				ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
822			}
823			array_wr32(E1000_IVAR0, index, ivar);
824		}
825		q_vector->eims_value = 1 << msix_vector;
826		break;
827	default:
828		BUG();
829		break;
830	}
831
832	/* add q_vector eims value to global eims_enable_mask */
833	adapter->eims_enable_mask |= q_vector->eims_value;
834
835	/* configure q_vector to set itr on first interrupt */
836	q_vector->set_itr = 1;
837}
838
839/**
840 * igb_configure_msix - Configure MSI-X hardware
841 *
842 * igb_configure_msix sets up the hardware to properly
843 * generate MSI-X interrupts.
844 **/
845static void igb_configure_msix(struct igb_adapter *adapter)
846{
847	u32 tmp;
848	int i, vector = 0;
849	struct e1000_hw *hw = &adapter->hw;
850
851	adapter->eims_enable_mask = 0;
852
853	/* set vector for other causes, i.e. link changes */
854	switch (hw->mac.type) {
855	case e1000_82575:
856		tmp = rd32(E1000_CTRL_EXT);
857		/* enable MSI-X PBA support*/
858		tmp |= E1000_CTRL_EXT_PBA_CLR;
859
860		/* Auto-Mask interrupts upon ICR read. */
861		tmp |= E1000_CTRL_EXT_EIAME;
862		tmp |= E1000_CTRL_EXT_IRCA;
863
864		wr32(E1000_CTRL_EXT, tmp);
865
866		/* enable msix_other interrupt */
867		array_wr32(E1000_MSIXBM(0), vector++,
868		                      E1000_EIMS_OTHER);
869		adapter->eims_other = E1000_EIMS_OTHER;
870
871		break;
872
873	case e1000_82576:
874	case e1000_82580:
875	case e1000_i350:
876		/* Turn on MSI-X capability first, or our settings
877		 * won't stick.  And it will take days to debug. */
878		wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
879		                E1000_GPIE_PBA | E1000_GPIE_EIAME |
880		                E1000_GPIE_NSICR);
881
882		/* enable msix_other interrupt */
883		adapter->eims_other = 1 << vector;
884		tmp = (vector++ | E1000_IVAR_VALID) << 8;
885
886		wr32(E1000_IVAR_MISC, tmp);
887		break;
888	default:
889		/* do nothing, since nothing else supports MSI-X */
890		break;
891	} /* switch (hw->mac.type) */
892
893	adapter->eims_enable_mask |= adapter->eims_other;
894
895	for (i = 0; i < adapter->num_q_vectors; i++)
896		igb_assign_vector(adapter->q_vector[i], vector++);
897
898	wrfl();
899}
900
901/**
902 * igb_request_msix - Initialize MSI-X interrupts
903 *
904 * igb_request_msix allocates MSI-X vectors and requests interrupts from the
905 * kernel.
906 **/
907static int igb_request_msix(struct igb_adapter *adapter)
908{
909	struct net_device *netdev = adapter->netdev;
910	struct e1000_hw *hw = &adapter->hw;
911	int i, err = 0, vector = 0;
912
913	err = request_irq(adapter->msix_entries[vector].vector,
914	                  igb_msix_other, 0, netdev->name, adapter);
915	if (err)
916		goto out;
917	vector++;
918
919	for (i = 0; i < adapter->num_q_vectors; i++) {
920		struct igb_q_vector *q_vector = adapter->q_vector[i];
921
922		q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
923
924		if (q_vector->rx_ring && q_vector->tx_ring)
925			sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
926			        q_vector->rx_ring->queue_index);
927		else if (q_vector->tx_ring)
928			sprintf(q_vector->name, "%s-tx-%u", netdev->name,
929			        q_vector->tx_ring->queue_index);
930		else if (q_vector->rx_ring)
931			sprintf(q_vector->name, "%s-rx-%u", netdev->name,
932			        q_vector->rx_ring->queue_index);
933		else
934			sprintf(q_vector->name, "%s-unused", netdev->name);
935
936		err = request_irq(adapter->msix_entries[vector].vector,
937		                  igb_msix_ring, 0, q_vector->name,
938		                  q_vector);
939		if (err)
940			goto out;
941		vector++;
942	}
943
944	igb_configure_msix(adapter);
945	return 0;
946out:
947	return err;
948}
949
950static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
951{
952	if (adapter->msix_entries) {
953		pci_disable_msix(adapter->pdev);
954		kfree(adapter->msix_entries);
955		adapter->msix_entries = NULL;
956	} else if (adapter->flags & IGB_FLAG_HAS_MSI) {
957		pci_disable_msi(adapter->pdev);
958	}
959}
960
961/**
962 * igb_free_q_vectors - Free memory allocated for interrupt vectors
963 * @adapter: board private structure to initialize
964 *
965 * This function frees the memory allocated to the q_vectors.  In addition if
966 * NAPI is enabled it will delete any references to the NAPI struct prior
967 * to freeing the q_vector.
968 **/
969static void igb_free_q_vectors(struct igb_adapter *adapter)
970{
971	int v_idx;
972
973	for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
974		struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
975		adapter->q_vector[v_idx] = NULL;
976		if (!q_vector)
977			continue;
978		netif_napi_del(&q_vector->napi);
979		kfree(q_vector);
980	}
981	adapter->num_q_vectors = 0;
982}
983
984/**
985 * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
986 *
987 * This function resets the device so that it has 0 rx queues, tx queues, and
988 * MSI-X interrupts allocated.
989 */
990static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
991{
992	igb_free_queues(adapter);
993	igb_free_q_vectors(adapter);
994	igb_reset_interrupt_capability(adapter);
995}
996
997/**
998 * igb_set_interrupt_capability - set MSI or MSI-X if supported
999 *
1000 * Attempt to configure interrupts using the best available
1001 * capabilities of the hardware and kernel.
1002 **/
1003static int igb_set_interrupt_capability(struct igb_adapter *adapter)
1004{
1005	int err;
1006	int numvecs, i;
1007
1008	/* Number of supported queues. */
1009	adapter->num_rx_queues = adapter->rss_queues;
1010	if (adapter->vfs_allocated_count)
1011		adapter->num_tx_queues = 1;
1012	else
1013		adapter->num_tx_queues = adapter->rss_queues;
1014
1015	/* start with one vector for every rx queue */
1016	numvecs = adapter->num_rx_queues;
1017
1018	/* if tx handler is separate add 1 for every tx queue */
1019	if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
1020		numvecs += adapter->num_tx_queues;
1021
1022	/* store the number of vectors reserved for queues */
1023	adapter->num_q_vectors = numvecs;
1024
1025	/* add 1 vector for link status interrupts */
1026	numvecs++;
1027	adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
1028					GFP_KERNEL);
1029	if (!adapter->msix_entries)
1030		goto msi_only;
1031
1032	for (i = 0; i < numvecs; i++)
1033		adapter->msix_entries[i].entry = i;
1034
1035	err = pci_enable_msix(adapter->pdev,
1036			      adapter->msix_entries,
1037			      numvecs);
1038	if (err == 0)
1039		goto out;
1040
1041	igb_reset_interrupt_capability(adapter);
1042
1043	/* If we can't do MSI-X, try MSI */
1044msi_only:
1045#ifdef CONFIG_PCI_IOV
1046	/* disable SR-IOV for non MSI-X configurations */
1047	if (adapter->vf_data) {
1048		struct e1000_hw *hw = &adapter->hw;
1049		/* disable iov and allow time for transactions to clear */
1050		pci_disable_sriov(adapter->pdev);
1051		msleep(500);
1052
1053		kfree(adapter->vf_data);
1054		adapter->vf_data = NULL;
1055		wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1056		wrfl();
1057		msleep(100);
1058		dev_info(&adapter->pdev->dev, "IOV Disabled\n");
1059	}
1060#endif
1061	adapter->vfs_allocated_count = 0;
1062	adapter->rss_queues = 1;
1063	adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1064	adapter->num_rx_queues = 1;
1065	adapter->num_tx_queues = 1;
1066	adapter->num_q_vectors = 1;
1067	if (!pci_enable_msi(adapter->pdev))
1068		adapter->flags |= IGB_FLAG_HAS_MSI;
1069out:
1070	/* Notify the stack of the (possibly) reduced queue counts. */
1071	netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues);
1072	return netif_set_real_num_rx_queues(adapter->netdev,
1073					    adapter->num_rx_queues);
1074}
1075
1076/**
1077 * igb_alloc_q_vectors - Allocate memory for interrupt vectors
1078 * @adapter: board private structure to initialize
1079 *
1080 * We allocate one q_vector per queue interrupt.  If allocation fails we
1081 * return -ENOMEM.
1082 **/
1083static int igb_alloc_q_vectors(struct igb_adapter *adapter)
1084{
1085	struct igb_q_vector *q_vector;
1086	struct e1000_hw *hw = &adapter->hw;
1087	int v_idx;
1088
1089	for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1090		q_vector = kzalloc(sizeof(struct igb_q_vector), GFP_KERNEL);
1091		if (!q_vector)
1092			goto err_out;
1093		q_vector->adapter = adapter;
1094		q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
1095		q_vector->itr_val = IGB_START_ITR;
1096		netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
1097		adapter->q_vector[v_idx] = q_vector;
1098	}
1099	return 0;
1100
1101err_out:
1102	igb_free_q_vectors(adapter);
1103	return -ENOMEM;
1104}
1105
1106static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
1107                                      int ring_idx, int v_idx)
1108{
1109	struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1110
1111	q_vector->rx_ring = adapter->rx_ring[ring_idx];
1112	q_vector->rx_ring->q_vector = q_vector;
1113	q_vector->itr_val = adapter->rx_itr_setting;
1114	if (q_vector->itr_val && q_vector->itr_val <= 3)
1115		q_vector->itr_val = IGB_START_ITR;
1116}
1117
1118static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
1119                                      int ring_idx, int v_idx)
1120{
1121	struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1122
1123	q_vector->tx_ring = adapter->tx_ring[ring_idx];
1124	q_vector->tx_ring->q_vector = q_vector;
1125	q_vector->itr_val = adapter->tx_itr_setting;
1126	if (q_vector->itr_val && q_vector->itr_val <= 3)
1127		q_vector->itr_val = IGB_START_ITR;
1128}
1129
1130/**
1131 * igb_map_ring_to_vector - maps allocated queues to vectors
1132 *
1133 * This function maps the recently allocated queues to vectors.
1134 **/
1135static int igb_map_ring_to_vector(struct igb_adapter *adapter)
1136{
1137	int i;
1138	int v_idx = 0;
1139
1140	if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
1141	    (adapter->num_q_vectors < adapter->num_tx_queues))
1142		return -ENOMEM;
1143
1144	if (adapter->num_q_vectors >=
1145	    (adapter->num_rx_queues + adapter->num_tx_queues)) {
1146		for (i = 0; i < adapter->num_rx_queues; i++)
1147			igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1148		for (i = 0; i < adapter->num_tx_queues; i++)
1149			igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1150	} else {
1151		for (i = 0; i < adapter->num_rx_queues; i++) {
1152			if (i < adapter->num_tx_queues)
1153				igb_map_tx_ring_to_vector(adapter, i, v_idx);
1154			igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1155		}
1156		for (; i < adapter->num_tx_queues; i++)
1157			igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1158	}
1159	return 0;
1160}
1161
1162/**
1163 * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
1164 *
1165 * This function initializes the interrupts and allocates all of the queues.
1166 **/
1167static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
1168{
1169	struct pci_dev *pdev = adapter->pdev;
1170	int err;
1171
1172	err = igb_set_interrupt_capability(adapter);
1173	if (err)
1174		return err;
1175
1176	err = igb_alloc_q_vectors(adapter);
1177	if (err) {
1178		dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
1179		goto err_alloc_q_vectors;
1180	}
1181
1182	err = igb_alloc_queues(adapter);
1183	if (err) {
1184		dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1185		goto err_alloc_queues;
1186	}
1187
1188	err = igb_map_ring_to_vector(adapter);
1189	if (err) {
1190		dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
1191		goto err_map_queues;
1192	}
1193
1194
1195	return 0;
1196err_map_queues:
1197	igb_free_queues(adapter);
1198err_alloc_queues:
1199	igb_free_q_vectors(adapter);
1200err_alloc_q_vectors:
1201	igb_reset_interrupt_capability(adapter);
1202	return err;
1203}
1204
1205/**
1206 * igb_request_irq - initialize interrupts
1207 *
1208 * Attempts to configure interrupts using the best available
1209 * capabilities of the hardware and kernel.
1210 **/
1211static int igb_request_irq(struct igb_adapter *adapter)
1212{
1213	struct net_device *netdev = adapter->netdev;
1214	struct pci_dev *pdev = adapter->pdev;
1215	int err = 0;
1216
1217	if (adapter->msix_entries) {
1218		err = igb_request_msix(adapter);
1219		if (!err)
1220			goto request_done;
1221		/* fall back to MSI */
1222		igb_clear_interrupt_scheme(adapter);
1223		if (!pci_enable_msi(adapter->pdev))
1224			adapter->flags |= IGB_FLAG_HAS_MSI;
1225		igb_free_all_tx_resources(adapter);
1226		igb_free_all_rx_resources(adapter);
1227		adapter->num_tx_queues = 1;
1228		adapter->num_rx_queues = 1;
1229		adapter->num_q_vectors = 1;
1230		err = igb_alloc_q_vectors(adapter);
1231		if (err) {
1232			dev_err(&pdev->dev,
1233			        "Unable to allocate memory for vectors\n");
1234			goto request_done;
1235		}
1236		err = igb_alloc_queues(adapter);
1237		if (err) {
1238			dev_err(&pdev->dev,
1239			        "Unable to allocate memory for queues\n");
1240			igb_free_q_vectors(adapter);
1241			goto request_done;
1242		}
1243		igb_setup_all_tx_resources(adapter);
1244		igb_setup_all_rx_resources(adapter);
1245	} else {
1246		igb_assign_vector(adapter->q_vector[0], 0);
1247	}
1248
1249	if (adapter->flags & IGB_FLAG_HAS_MSI) {
1250		err = request_irq(adapter->pdev->irq, igb_intr_msi, 0,
1251				  netdev->name, adapter);
1252		if (!err)
1253			goto request_done;
1254
1255		/* fall back to legacy interrupts */
1256		igb_reset_interrupt_capability(adapter);
1257		adapter->flags &= ~IGB_FLAG_HAS_MSI;
1258	}
1259
1260	err = request_irq(adapter->pdev->irq, igb_intr, IRQF_SHARED,
1261			  netdev->name, adapter);
1262
1263	if (err)
1264		dev_err(&adapter->pdev->dev, "Error %d getting interrupt\n",
1265			err);
1266
1267request_done:
1268	return err;
1269}
1270
1271static void igb_free_irq(struct igb_adapter *adapter)
1272{
1273	if (adapter->msix_entries) {
1274		int vector = 0, i;
1275
1276		free_irq(adapter->msix_entries[vector++].vector, adapter);
1277
1278		for (i = 0; i < adapter->num_q_vectors; i++) {
1279			struct igb_q_vector *q_vector = adapter->q_vector[i];
1280			free_irq(adapter->msix_entries[vector++].vector,
1281			         q_vector);
1282		}
1283	} else {
1284		free_irq(adapter->pdev->irq, adapter);
1285	}
1286}
1287
1288/**
1289 * igb_irq_disable - Mask off interrupt generation on the NIC
1290 * @adapter: board private structure
1291 **/
1292static void igb_irq_disable(struct igb_adapter *adapter)
1293{
1294	struct e1000_hw *hw = &adapter->hw;
1295
1296	/*
1297	 * we need to be careful when disabling interrupts.  The VFs are also
1298	 * mapped into these registers and so clearing the bits can cause
1299	 * issues on the VF drivers so we only need to clear what we set
1300	 */
1301	if (adapter->msix_entries) {
1302		u32 regval = rd32(E1000_EIAM);
1303		wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
1304		wr32(E1000_EIMC, adapter->eims_enable_mask);
1305		regval = rd32(E1000_EIAC);
1306		wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
1307	}
1308
1309	wr32(E1000_IAM, 0);
1310	wr32(E1000_IMC, ~0);
1311	wrfl();
1312	if (adapter->msix_entries) {
1313		int i;
1314		for (i = 0; i < adapter->num_q_vectors; i++)
1315			synchronize_irq(adapter->msix_entries[i].vector);
1316	} else {
1317		synchronize_irq(adapter->pdev->irq);
1318	}
1319}
1320
1321/**
1322 * igb_irq_enable - Enable default interrupt generation settings
1323 * @adapter: board private structure
1324 **/
1325static void igb_irq_enable(struct igb_adapter *adapter)
1326{
1327	struct e1000_hw *hw = &adapter->hw;
1328
1329	if (adapter->msix_entries) {
1330		u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC;
1331		u32 regval = rd32(E1000_EIAC);
1332		wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
1333		regval = rd32(E1000_EIAM);
1334		wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
1335		wr32(E1000_EIMS, adapter->eims_enable_mask);
1336		if (adapter->vfs_allocated_count) {
1337			wr32(E1000_MBVFIMR, 0xFF);
1338			ims |= E1000_IMS_VMMB;
1339		}
1340		if (adapter->hw.mac.type == e1000_82580)
1341			ims |= E1000_IMS_DRSTA;
1342
1343		wr32(E1000_IMS, ims);
1344	} else {
1345		wr32(E1000_IMS, IMS_ENABLE_MASK |
1346				E1000_IMS_DRSTA);
1347		wr32(E1000_IAM, IMS_ENABLE_MASK |
1348				E1000_IMS_DRSTA);
1349	}
1350}
1351
1352static void igb_update_mng_vlan(struct igb_adapter *adapter)
1353{
1354	struct e1000_hw *hw = &adapter->hw;
1355	u16 vid = adapter->hw.mng_cookie.vlan_id;
1356	u16 old_vid = adapter->mng_vlan_id;
1357
1358	if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1359		/* add VID to filter table */
1360		igb_vfta_set(hw, vid, true);
1361		adapter->mng_vlan_id = vid;
1362	} else {
1363		adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1364	}
1365
1366	if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1367	    (vid != old_vid) &&
1368	    !test_bit(old_vid, adapter->active_vlans)) {
1369		/* remove VID from filter table */
1370		igb_vfta_set(hw, old_vid, false);
1371	}
1372}
1373
1374/**
1375 * igb_release_hw_control - release control of the h/w to f/w
1376 * @adapter: address of board private structure
1377 *
1378 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1379 * For ASF and Pass Through versions of f/w this means that the
1380 * driver is no longer loaded.
1381 *
1382 **/
1383static void igb_release_hw_control(struct igb_adapter *adapter)
1384{
1385	struct e1000_hw *hw = &adapter->hw;
1386	u32 ctrl_ext;
1387
1388	/* Let firmware take over control of h/w */
1389	ctrl_ext = rd32(E1000_CTRL_EXT);
1390	wr32(E1000_CTRL_EXT,
1391			ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1392}
1393
1394/**
1395 * igb_get_hw_control - get control of the h/w from f/w
1396 * @adapter: address of board private structure
1397 *
1398 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1399 * For ASF and Pass Through versions of f/w this means that
1400 * the driver is loaded.
1401 *
1402 **/
1403static void igb_get_hw_control(struct igb_adapter *adapter)
1404{
1405	struct e1000_hw *hw = &adapter->hw;
1406	u32 ctrl_ext;
1407
1408	/* Let firmware know the driver has taken over */
1409	ctrl_ext = rd32(E1000_CTRL_EXT);
1410	wr32(E1000_CTRL_EXT,
1411			ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1412}
1413
1414/**
1415 * igb_configure - configure the hardware for RX and TX
1416 * @adapter: private board structure
1417 **/
1418static void igb_configure(struct igb_adapter *adapter)
1419{
1420	struct net_device *netdev = adapter->netdev;
1421	int i;
1422
1423	igb_get_hw_control(adapter);
1424	igb_set_rx_mode(netdev);
1425
1426	igb_restore_vlan(adapter);
1427
1428	igb_setup_tctl(adapter);
1429	igb_setup_mrqc(adapter);
1430	igb_setup_rctl(adapter);
1431
1432	igb_configure_tx(adapter);
1433	igb_configure_rx(adapter);
1434
1435	igb_rx_fifo_flush_82575(&adapter->hw);
1436
1437	/* call igb_desc_unused which always leaves
1438	 * at least 1 descriptor unused to make sure
1439	 * next_to_use != next_to_clean */
1440	for (i = 0; i < adapter->num_rx_queues; i++) {
1441		struct igb_ring *ring = adapter->rx_ring[i];
1442		igb_alloc_rx_buffers_adv(ring, igb_desc_unused(ring));
1443	}
1444}
1445
1446/**
1447 * igb_power_up_link - Power up the phy/serdes link
1448 * @adapter: address of board private structure
1449 **/
1450void igb_power_up_link(struct igb_adapter *adapter)
1451{
1452	if (adapter->hw.phy.media_type == e1000_media_type_copper)
1453		igb_power_up_phy_copper(&adapter->hw);
1454	else
1455		igb_power_up_serdes_link_82575(&adapter->hw);
1456}
1457
1458/**
1459 * igb_power_down_link - Power down the phy/serdes link
1460 * @adapter: address of board private structure
1461 */
1462static void igb_power_down_link(struct igb_adapter *adapter)
1463{
1464	if (adapter->hw.phy.media_type == e1000_media_type_copper)
1465		igb_power_down_phy_copper_82575(&adapter->hw);
1466	else
1467		igb_shutdown_serdes_link_82575(&adapter->hw);
1468}
1469
1470/**
1471 * igb_up - Open the interface and prepare it to handle traffic
1472 * @adapter: board private structure
1473 **/
1474int igb_up(struct igb_adapter *adapter)
1475{
1476	struct e1000_hw *hw = &adapter->hw;
1477	int i;
1478
1479	/* hardware has been reset, we need to reload some things */
1480	igb_configure(adapter);
1481
1482	clear_bit(__IGB_DOWN, &adapter->state);
1483
1484	for (i = 0; i < adapter->num_q_vectors; i++) {
1485		struct igb_q_vector *q_vector = adapter->q_vector[i];
1486		napi_enable(&q_vector->napi);
1487	}
1488	if (adapter->msix_entries)
1489		igb_configure_msix(adapter);
1490	else
1491		igb_assign_vector(adapter->q_vector[0], 0);
1492
1493	/* Clear any pending interrupts. */
1494	rd32(E1000_ICR);
1495	igb_irq_enable(adapter);
1496
1497	/* notify VFs that reset has been completed */
1498	if (adapter->vfs_allocated_count) {
1499		u32 reg_data = rd32(E1000_CTRL_EXT);
1500		reg_data |= E1000_CTRL_EXT_PFRSTD;
1501		wr32(E1000_CTRL_EXT, reg_data);
1502	}
1503
1504	netif_tx_start_all_queues(adapter->netdev);
1505
1506	/* start the watchdog. */
1507	hw->mac.get_link_status = 1;
1508	schedule_work(&adapter->watchdog_task);
1509
1510	return 0;
1511}
1512
1513void igb_down(struct igb_adapter *adapter)
1514{
1515	struct net_device *netdev = adapter->netdev;
1516	struct e1000_hw *hw = &adapter->hw;
1517	u32 tctl, rctl;
1518	int i;
1519
1520	/* signal that we're down so the interrupt handler does not
1521	 * reschedule our watchdog timer */
1522	set_bit(__IGB_DOWN, &adapter->state);
1523
1524	/* disable receives in the hardware */
1525	rctl = rd32(E1000_RCTL);
1526	wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1527	/* flush and sleep below */
1528
1529	netif_tx_stop_all_queues(netdev);
1530
1531	/* disable transmits in the hardware */
1532	tctl = rd32(E1000_TCTL);
1533	tctl &= ~E1000_TCTL_EN;
1534	wr32(E1000_TCTL, tctl);
1535	/* flush both disables and wait for them to finish */
1536	wrfl();
1537	msleep(10);
1538
1539	for (i = 0; i < adapter->num_q_vectors; i++) {
1540		struct igb_q_vector *q_vector = adapter->q_vector[i];
1541		napi_disable(&q_vector->napi);
1542	}
1543
1544	igb_irq_disable(adapter);
1545
1546	del_timer_sync(&adapter->watchdog_timer);
1547	del_timer_sync(&adapter->phy_info_timer);
1548
1549	netif_carrier_off(netdev);
1550
1551	/* record the stats before reset*/
1552	spin_lock(&adapter->stats64_lock);
1553	igb_update_stats(adapter, &adapter->stats64);
1554	spin_unlock(&adapter->stats64_lock);
1555
1556	adapter->link_speed = 0;
1557	adapter->link_duplex = 0;
1558
1559	if (!pci_channel_offline(adapter->pdev))
1560		igb_reset(adapter);
1561	igb_clean_all_tx_rings(adapter);
1562	igb_clean_all_rx_rings(adapter);
1563#ifdef CONFIG_IGB_DCA
1564
1565	/* since we reset the hardware DCA settings were cleared */
1566	igb_setup_dca(adapter);
1567#endif
1568}
1569
1570void igb_reinit_locked(struct igb_adapter *adapter)
1571{
1572	WARN_ON(in_interrupt());
1573	while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1574		msleep(1);
1575	igb_down(adapter);
1576	igb_up(adapter);
1577	clear_bit(__IGB_RESETTING, &adapter->state);
1578}
1579
1580void igb_reset(struct igb_adapter *adapter)
1581{
1582	struct pci_dev *pdev = adapter->pdev;
1583	struct e1000_hw *hw = &adapter->hw;
1584	struct e1000_mac_info *mac = &hw->mac;
1585	struct e1000_fc_info *fc = &hw->fc;
1586	u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1587	u16 hwm;
1588
1589	/* Repartition Pba for greater than 9k mtu
1590	 * To take effect CTRL.RST is required.
1591	 */
1592	switch (mac->type) {
1593	case e1000_i350:
1594	case e1000_82580:
1595		pba = rd32(E1000_RXPBS);
1596		pba = igb_rxpbs_adjust_82580(pba);
1597		break;
1598	case e1000_82576:
1599		pba = rd32(E1000_RXPBS);
1600		pba &= E1000_RXPBS_SIZE_MASK_82576;
1601		break;
1602	case e1000_82575:
1603	default:
1604		pba = E1000_PBA_34K;
1605		break;
1606	}
1607
1608	if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1609	    (mac->type < e1000_82576)) {
1610		/* adjust PBA for jumbo frames */
1611		wr32(E1000_PBA, pba);
1612
1613		/* To maintain wire speed transmits, the Tx FIFO should be
1614		 * large enough to accommodate two full transmit packets,
1615		 * rounded up to the next 1KB and expressed in KB.  Likewise,
1616		 * the Rx FIFO should be large enough to accommodate at least
1617		 * one full receive packet and is similarly rounded up and
1618		 * expressed in KB. */
1619		pba = rd32(E1000_PBA);
1620		/* upper 16 bits has Tx packet buffer allocation size in KB */
1621		tx_space = pba >> 16;
1622		/* lower 16 bits has Rx packet buffer allocation size in KB */
1623		pba &= 0xffff;
1624		/* the tx fifo also stores 16 bytes of information about the tx
1625		 * but don't include ethernet FCS because hardware appends it */
1626		min_tx_space = (adapter->max_frame_size +
1627				sizeof(union e1000_adv_tx_desc) -
1628				ETH_FCS_LEN) * 2;
1629		min_tx_space = ALIGN(min_tx_space, 1024);
1630		min_tx_space >>= 10;
1631		/* software strips receive CRC, so leave room for it */
1632		min_rx_space = adapter->max_frame_size;
1633		min_rx_space = ALIGN(min_rx_space, 1024);
1634		min_rx_space >>= 10;
1635
1636		/* If current Tx allocation is less than the min Tx FIFO size,
1637		 * and the min Tx FIFO size is less than the current Rx FIFO
1638		 * allocation, take space away from current Rx allocation */
1639		if (tx_space < min_tx_space &&
1640		    ((min_tx_space - tx_space) < pba)) {
1641			pba = pba - (min_tx_space - tx_space);
1642
1643			/* if short on rx space, rx wins and must trump tx
1644			 * adjustment */
1645			if (pba < min_rx_space)
1646				pba = min_rx_space;
1647		}
1648		wr32(E1000_PBA, pba);
1649	}
1650
1651	/* flow control settings */
1652	/* The high water mark must be low enough to fit one full frame
1653	 * (or the size used for early receive) above it in the Rx FIFO.
1654	 * Set it to the lower of:
1655	 * - 90% of the Rx FIFO size, or
1656	 * - the full Rx FIFO size minus one full frame */
1657	hwm = min(((pba << 10) * 9 / 10),
1658			((pba << 10) - 2 * adapter->max_frame_size));
1659
1660	fc->high_water = hwm & 0xFFF0;	/* 16-byte granularity */
1661	fc->low_water = fc->high_water - 16;
1662	fc->pause_time = 0xFFFF;
1663	fc->send_xon = 1;
1664	fc->current_mode = fc->requested_mode;
1665
1666	/* disable receive for all VFs and wait one second */
1667	if (adapter->vfs_allocated_count) {
1668		int i;
1669		for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1670			adapter->vf_data[i].flags &= IGB_VF_FLAG_PF_SET_MAC;
1671
1672		/* ping all the active vfs to let them know we are going down */
1673		igb_ping_all_vfs(adapter);
1674
1675		/* disable transmits and receives */
1676		wr32(E1000_VFRE, 0);
1677		wr32(E1000_VFTE, 0);
1678	}
1679
1680	/* Allow time for pending master requests to run */
1681	hw->mac.ops.reset_hw(hw);
1682	wr32(E1000_WUC, 0);
1683
1684	if (hw->mac.ops.init_hw(hw))
1685		dev_err(&pdev->dev, "Hardware Error\n");
1686	if (hw->mac.type > e1000_82580) {
1687		if (adapter->flags & IGB_FLAG_DMAC) {
1688			u32 reg;
1689
1690			/*
1691			 * DMA Coalescing high water mark needs to be higher
1692			 * than * the * Rx threshold.  The Rx threshold is
1693			 * currently * pba - 6, so we * should use a high water
1694			 * mark of pba * - 4. */
1695			hwm = (pba - 4) << 10;
1696
1697			reg = (((pba-6) << E1000_DMACR_DMACTHR_SHIFT)
1698			       & E1000_DMACR_DMACTHR_MASK);
1699
1700			/* transition to L0x or L1 if available..*/
1701			reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
1702
1703			/* watchdog timer= +-1000 usec in 32usec intervals */
1704			reg |= (1000 >> 5);
1705			wr32(E1000_DMACR, reg);
1706
1707			/* no lower threshold to disable coalescing(smart fifb)
1708			 * -UTRESH=0*/
1709			wr32(E1000_DMCRTRH, 0);
1710
1711			/* set hwm to PBA -  2 * max frame size */
1712			wr32(E1000_FCRTC, hwm);
1713
1714			/*
1715			 * This sets the time to wait before requesting tran-
1716			 * sition to * low power state to number of usecs needed
1717			 * to receive 1 512 * byte frame at gigabit line rate
1718			 */
1719			reg = rd32(E1000_DMCTLX);
1720			reg |= IGB_DMCTLX_DCFLUSH_DIS;
1721
1722			/* Delay 255 usec before entering Lx state. */
1723			reg |= 0xFF;
1724			wr32(E1000_DMCTLX, reg);
1725
1726			/* free space in Tx packet buffer to wake from DMAC */
1727			wr32(E1000_DMCTXTH,
1728			     (IGB_MIN_TXPBSIZE -
1729			     (IGB_TX_BUF_4096 + adapter->max_frame_size))
1730			     >> 6);
1731
1732			/* make low power state decision controlled by DMAC */
1733			reg = rd32(E1000_PCIEMISC);
1734			reg |= E1000_PCIEMISC_LX_DECISION;
1735			wr32(E1000_PCIEMISC, reg);
1736		} /* end if IGB_FLAG_DMAC set */
1737	}
1738	if (hw->mac.type == e1000_82580) {
1739		u32 reg = rd32(E1000_PCIEMISC);
1740		wr32(E1000_PCIEMISC,
1741		                reg & ~E1000_PCIEMISC_LX_DECISION);
1742	}
1743	if (!netif_running(adapter->netdev))
1744		igb_power_down_link(adapter);
1745
1746	igb_update_mng_vlan(adapter);
1747
1748	/* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1749	wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1750
1751	igb_get_phy_info(hw);
1752}
1753
1754static u32 igb_fix_features(struct net_device *netdev, u32 features)
1755{
1756	/*
1757	 * Since there is no support for separate rx/tx vlan accel
1758	 * enable/disable make sure tx flag is always in same state as rx.
1759	 */
1760	if (features & NETIF_F_HW_VLAN_RX)
1761		features |= NETIF_F_HW_VLAN_TX;
1762	else
1763		features &= ~NETIF_F_HW_VLAN_TX;
1764
1765	return features;
1766}
1767
1768static int igb_set_features(struct net_device *netdev, u32 features)
1769{
1770	struct igb_adapter *adapter = netdev_priv(netdev);
1771	int i;
1772	u32 changed = netdev->features ^ features;
1773
1774	for (i = 0; i < adapter->num_rx_queues; i++) {
1775		if (features & NETIF_F_RXCSUM)
1776			adapter->rx_ring[i]->flags |= IGB_RING_FLAG_RX_CSUM;
1777		else
1778			adapter->rx_ring[i]->flags &= ~IGB_RING_FLAG_RX_CSUM;
1779	}
1780
1781	if (changed & NETIF_F_HW_VLAN_RX)
1782		igb_vlan_mode(netdev, features);
1783
1784	return 0;
1785}
1786
1787static const struct net_device_ops igb_netdev_ops = {
1788	.ndo_open		= igb_open,
1789	.ndo_stop		= igb_close,
1790	.ndo_start_xmit		= igb_xmit_frame_adv,
1791	.ndo_get_stats64	= igb_get_stats64,
1792	.ndo_set_rx_mode	= igb_set_rx_mode,
1793	.ndo_set_multicast_list	= igb_set_rx_mode,
1794	.ndo_set_mac_address	= igb_set_mac,
1795	.ndo_change_mtu		= igb_change_mtu,
1796	.ndo_do_ioctl		= igb_ioctl,
1797	.ndo_tx_timeout		= igb_tx_timeout,
1798	.ndo_validate_addr	= eth_validate_addr,
1799	.ndo_vlan_rx_add_vid	= igb_vlan_rx_add_vid,
1800	.ndo_vlan_rx_kill_vid	= igb_vlan_rx_kill_vid,
1801	.ndo_set_vf_mac		= igb_ndo_set_vf_mac,
1802	.ndo_set_vf_vlan	= igb_ndo_set_vf_vlan,
1803	.ndo_set_vf_tx_rate	= igb_ndo_set_vf_bw,
1804	.ndo_get_vf_config	= igb_ndo_get_vf_config,
1805#ifdef CONFIG_NET_POLL_CONTROLLER
1806	.ndo_poll_controller	= igb_netpoll,
1807#endif
1808	.ndo_fix_features	= igb_fix_features,
1809	.ndo_set_features	= igb_set_features,
1810};
1811
1812/**
1813 * igb_probe - Device Initialization Routine
1814 * @pdev: PCI device information struct
1815 * @ent: entry in igb_pci_tbl
1816 *
1817 * Returns 0 on success, negative on failure
1818 *
1819 * igb_probe initializes an adapter identified by a pci_dev structure.
1820 * The OS initialization, configuring of the adapter private structure,
1821 * and a hardware reset occur.
1822 **/
1823static int __devinit igb_probe(struct pci_dev *pdev,
1824			       const struct pci_device_id *ent)
1825{
1826	struct net_device *netdev;
1827	struct igb_adapter *adapter;
1828	struct e1000_hw *hw;
1829	u16 eeprom_data = 0;
1830	s32 ret_val;
1831	static int global_quad_port_a; /* global quad port a indication */
1832	const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1833	unsigned long mmio_start, mmio_len;
1834	int err, pci_using_dac;
1835	u16 eeprom_apme_mask = IGB_EEPROM_APME;
1836	u8 part_str[E1000_PBANUM_LENGTH];
1837
1838	/* Catch broken hardware that put the wrong VF device ID in
1839	 * the PCIe SR-IOV capability.
1840	 */
1841	if (pdev->is_virtfn) {
1842		WARN(1, KERN_ERR "%s (%hx:%hx) should not be a VF!\n",
1843		     pci_name(pdev), pdev->vendor, pdev->device);
1844		return -EINVAL;
1845	}
1846
1847	err = pci_enable_device_mem(pdev);
1848	if (err)
1849		return err;
1850
1851	pci_using_dac = 0;
1852	err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
1853	if (!err) {
1854		err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
1855		if (!err)
1856			pci_using_dac = 1;
1857	} else {
1858		err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
1859		if (err) {
1860			err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
1861			if (err) {
1862				dev_err(&pdev->dev, "No usable DMA "
1863					"configuration, aborting\n");
1864				goto err_dma;
1865			}
1866		}
1867	}
1868
1869	err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1870	                                   IORESOURCE_MEM),
1871	                                   igb_driver_name);
1872	if (err)
1873		goto err_pci_reg;
1874
1875	pci_enable_pcie_error_reporting(pdev);
1876
1877	pci_set_master(pdev);
1878	pci_save_state(pdev);
1879
1880	err = -ENOMEM;
1881	netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1882	                           IGB_ABS_MAX_TX_QUEUES);
1883	if (!netdev)
1884		goto err_alloc_etherdev;
1885
1886	SET_NETDEV_DEV(netdev, &pdev->dev);
1887
1888	pci_set_drvdata(pdev, netdev);
1889	adapter = netdev_priv(netdev);
1890	adapter->netdev = netdev;
1891	adapter->pdev = pdev;
1892	hw = &adapter->hw;
1893	hw->back = adapter;
1894	adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE;
1895
1896	mmio_start = pci_resource_start(pdev, 0);
1897	mmio_len = pci_resource_len(pdev, 0);
1898
1899	err = -EIO;
1900	hw->hw_addr = ioremap(mmio_start, mmio_len);
1901	if (!hw->hw_addr)
1902		goto err_ioremap;
1903
1904	netdev->netdev_ops = &igb_netdev_ops;
1905	igb_set_ethtool_ops(netdev);
1906	netdev->watchdog_timeo = 5 * HZ;
1907
1908	strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1909
1910	netdev->mem_start = mmio_start;
1911	netdev->mem_end = mmio_start + mmio_len;
1912
1913	/* PCI config space info */
1914	hw->vendor_id = pdev->vendor;
1915	hw->device_id = pdev->device;
1916	hw->revision_id = pdev->revision;
1917	hw->subsystem_vendor_id = pdev->subsystem_vendor;
1918	hw->subsystem_device_id = pdev->subsystem_device;
1919
1920	/* Copy the default MAC, PHY and NVM function pointers */
1921	memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1922	memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1923	memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1924	/* Initialize skew-specific constants */
1925	err = ei->get_invariants(hw);
1926	if (err)
1927		goto err_sw_init;
1928
1929	/* setup the private structure */
1930	err = igb_sw_init(adapter);
1931	if (err)
1932		goto err_sw_init;
1933
1934	igb_get_bus_info_pcie(hw);
1935
1936	hw->phy.autoneg_wait_to_complete = false;
1937
1938	/* Copper options */
1939	if (hw->phy.media_type == e1000_media_type_copper) {
1940		hw->phy.mdix = AUTO_ALL_MODES;
1941		hw->phy.disable_polarity_correction = false;
1942		hw->phy.ms_type = e1000_ms_hw_default;
1943	}
1944
1945	if (igb_check_reset_block(hw))
1946		dev_info(&pdev->dev,
1947			"PHY reset is blocked due to SOL/IDER session.\n");
1948
1949	netdev->hw_features = NETIF_F_SG |
1950			   NETIF_F_IP_CSUM |
1951			   NETIF_F_IPV6_CSUM |
1952			   NETIF_F_TSO |
1953			   NETIF_F_TSO6 |
1954			   NETIF_F_RXCSUM |
1955			   NETIF_F_HW_VLAN_RX;
1956
1957	netdev->features = netdev->hw_features |
1958			   NETIF_F_HW_VLAN_TX |
1959			   NETIF_F_HW_VLAN_FILTER;
1960
1961	netdev->vlan_features |= NETIF_F_TSO;
1962	netdev->vlan_features |= NETIF_F_TSO6;
1963	netdev->vlan_features |= NETIF_F_IP_CSUM;
1964	netdev->vlan_features |= NETIF_F_IPV6_CSUM;
1965	netdev->vlan_features |= NETIF_F_SG;
1966
1967	if (pci_using_dac) {
1968		netdev->features |= NETIF_F_HIGHDMA;
1969		netdev->vlan_features |= NETIF_F_HIGHDMA;
1970	}
1971
1972	if (hw->mac.type >= e1000_82576) {
1973		netdev->hw_features |= NETIF_F_SCTP_CSUM;
1974		netdev->features |= NETIF_F_SCTP_CSUM;
1975	}
1976
1977	netdev->priv_flags |= IFF_UNICAST_FLT;
1978
1979	adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
1980
1981	/* before reading the NVM, reset the controller to put the device in a
1982	 * known good starting state */
1983	hw->mac.ops.reset_hw(hw);
1984
1985	/* make sure the NVM is good */
1986	if (hw->nvm.ops.validate(hw) < 0) {
1987		dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
1988		err = -EIO;
1989		goto err_eeprom;
1990	}
1991
1992	/* copy the MAC address out of the NVM */
1993	if (hw->mac.ops.read_mac_addr(hw))
1994		dev_err(&pdev->dev, "NVM Read Error\n");
1995
1996	memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
1997	memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
1998
1999	if (!is_valid_ether_addr(netdev->perm_addr)) {
2000		dev_err(&pdev->dev, "Invalid MAC Address\n");
2001		err = -EIO;
2002		goto err_eeprom;
2003	}
2004
2005	setup_timer(&adapter->watchdog_timer, igb_watchdog,
2006	            (unsigned long) adapter);
2007	setup_timer(&adapter->phy_info_timer, igb_update_phy_info,
2008	            (unsigned long) adapter);
2009
2010	INIT_WORK(&adapter->reset_task, igb_reset_task);
2011	INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
2012
2013	/* Initialize link properties that are user-changeable */
2014	adapter->fc_autoneg = true;
2015	hw->mac.autoneg = true;
2016	hw->phy.autoneg_advertised = 0x2f;
2017
2018	hw->fc.requested_mode = e1000_fc_default;
2019	hw->fc.current_mode = e1000_fc_default;
2020
2021	igb_validate_mdi_setting(hw);
2022
2023	/* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
2024	 * enable the ACPI Magic Packet filter
2025	 */
2026
2027	if (hw->bus.func == 0)
2028		hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
2029	else if (hw->mac.type >= e1000_82580)
2030		hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
2031		                 NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
2032		                 &eeprom_data);
2033	else if (hw->bus.func == 1)
2034		hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
2035
2036	if (eeprom_data & eeprom_apme_mask)
2037		adapter->eeprom_wol |= E1000_WUFC_MAG;
2038
2039	/* now that we have the eeprom settings, apply the special cases where
2040	 * the eeprom may be wrong or the board simply won't support wake on
2041	 * lan on a particular port */
2042	switch (pdev->device) {
2043	case E1000_DEV_ID_82575GB_QUAD_COPPER:
2044		adapter->eeprom_wol = 0;
2045		break;
2046	case E1000_DEV_ID_82575EB_FIBER_SERDES:
2047	case E1000_DEV_ID_82576_FIBER:
2048	case E1000_DEV_ID_82576_SERDES:
2049		/* Wake events only supported on port A for dual fiber
2050		 * regardless of eeprom setting */
2051		if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
2052			adapter->eeprom_wol = 0;
2053		break;
2054	case E1000_DEV_ID_82576_QUAD_COPPER:
2055	case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
2056		/* if quad port adapter, disable WoL on all but port A */
2057		if (global_quad_port_a != 0)
2058			adapter->eeprom_wol = 0;
2059		else
2060			adapter->flags |= IGB_FLAG_QUAD_PORT_A;
2061		/* Reset for multiple quad port adapters */
2062		if (++global_quad_port_a == 4)
2063			global_quad_port_a = 0;
2064		break;
2065	}
2066
2067	/* initialize the wol settings based on the eeprom settings */
2068	adapter->wol = adapter->eeprom_wol;
2069	device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
2070
2071	/* reset the hardware with the new settings */
2072	igb_reset(adapter);
2073
2074	/* let the f/w know that the h/w is now under the control of the
2075	 * driver. */
2076	igb_get_hw_control(adapter);
2077
2078	strcpy(netdev->name, "eth%d");
2079	err = register_netdev(netdev);
2080	if (err)
2081		goto err_register;
2082
2083	igb_vlan_mode(netdev, netdev->features);
2084
2085	/* carrier off reporting is important to ethtool even BEFORE open */
2086	netif_carrier_off(netdev);
2087
2088#ifdef CONFIG_IGB_DCA
2089	if (dca_add_requester(&pdev->dev) == 0) {
2090		adapter->flags |= IGB_FLAG_DCA_ENABLED;
2091		dev_info(&pdev->dev, "DCA enabled\n");
2092		igb_setup_dca(adapter);
2093	}
2094
2095#endif
2096	/* do hw tstamp init after resetting */
2097	igb_init_hw_timer(adapter);
2098
2099	dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
2100	/* print bus type/speed/width info */
2101	dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
2102		 netdev->name,
2103		 ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
2104		  (hw->bus.speed == e1000_bus_speed_5000) ? "5.0Gb/s" :
2105		                                            "unknown"),
2106		 ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
2107		  (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
2108		  (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
2109		   "unknown"),
2110		 netdev->dev_addr);
2111
2112	ret_val = igb_read_part_string(hw, part_str, E1000_PBANUM_LENGTH);
2113	if (ret_val)
2114		strcpy(part_str, "Unknown");
2115	dev_info(&pdev->dev, "%s: PBA No: %s\n", netdev->name, part_str);
2116	dev_info(&pdev->dev,
2117		"Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
2118		adapter->msix_entries ? "MSI-X" :
2119		(adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
2120		adapter->num_rx_queues, adapter->num_tx_queues);
2121	switch (hw->mac.type) {
2122	case e1000_i350:
2123		igb_set_eee_i350(hw);
2124		break;
2125	default:
2126		break;
2127	}
2128	return 0;
2129
2130err_register:
2131	igb_release_hw_control(adapter);
2132err_eeprom:
2133	if (!igb_check_reset_block(hw))
2134		igb_reset_phy(hw);
2135
2136	if (hw->flash_address)
2137		iounmap(hw->flash_address);
2138err_sw_init:
2139	igb_clear_interrupt_scheme(adapter);
2140	iounmap(hw->hw_addr);
2141err_ioremap:
2142	free_netdev(netdev);
2143err_alloc_etherdev:
2144	pci_release_selected_regions(pdev,
2145	                             pci_select_bars(pdev, IORESOURCE_MEM));
2146err_pci_reg:
2147err_dma:
2148	pci_disable_device(pdev);
2149	return err;
2150}
2151
2152/**
2153 * igb_remove - Device Removal Routine
2154 * @pdev: PCI device information struct
2155 *
2156 * igb_remove is called by the PCI subsystem to alert the driver
2157 * that it should release a PCI device.  The could be caused by a
2158 * Hot-Plug event, or because the driver is going to be removed from
2159 * memory.
2160 **/
2161static void __devexit igb_remove(struct pci_dev *pdev)
2162{
2163	struct net_device *netdev = pci_get_drvdata(pdev);
2164	struct igb_adapter *adapter = netdev_priv(netdev);
2165	struct e1000_hw *hw = &adapter->hw;
2166
2167	/*
2168	 * The watchdog timer may be rescheduled, so explicitly
2169	 * disable watchdog from being rescheduled.
2170	 */
2171	set_bit(__IGB_DOWN, &adapter->state);
2172	del_timer_sync(&adapter->watchdog_timer);
2173	del_timer_sync(&adapter->phy_info_timer);
2174
2175	cancel_work_sync(&adapter->reset_task);
2176	cancel_work_sync(&adapter->watchdog_task);
2177
2178#ifdef CONFIG_IGB_DCA
2179	if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
2180		dev_info(&pdev->dev, "DCA disabled\n");
2181		dca_remove_requester(&pdev->dev);
2182		adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
2183		wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
2184	}
2185#endif
2186
2187	/* Release control of h/w to f/w.  If f/w is AMT enabled, this
2188	 * would have already happened in close and is redundant. */
2189	igb_release_hw_control(adapter);
2190
2191	unregister_netdev(netdev);
2192
2193	igb_clear_interrupt_scheme(adapter);
2194
2195#ifdef CONFIG_PCI_IOV
2196	/* reclaim resources allocated to VFs */
2197	if (adapter->vf_data) {
2198		/* disable iov and allow time for transactions to clear */
2199		pci_disable_sriov(pdev);
2200		msleep(500);
2201
2202		kfree(adapter->vf_data);
2203		adapter->vf_data = NULL;
2204		wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
2205		wrfl();
2206		msleep(100);
2207		dev_info(&pdev->dev, "IOV Disabled\n");
2208	}
2209#endif
2210
2211	iounmap(hw->hw_addr);
2212	if (hw->flash_address)
2213		iounmap(hw->flash_address);
2214	pci_release_selected_regions(pdev,
2215	                             pci_select_bars(pdev, IORESOURCE_MEM));
2216
2217	free_netdev(netdev);
2218
2219	pci_disable_pcie_error_reporting(pdev);
2220
2221	pci_disable_device(pdev);
2222}
2223
2224/**
2225 * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
2226 * @adapter: board private structure to initialize
2227 *
2228 * This function initializes the vf specific data storage and then attempts to
2229 * allocate the VFs.  The reason for ordering it this way is because it is much
2230 * mor expensive time wise to disable SR-IOV than it is to allocate and free
2231 * the memory for the VFs.
2232 **/
2233static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
2234{
2235#ifdef CONFIG_PCI_IOV
2236	struct pci_dev *pdev = adapter->pdev;
2237
2238	if (adapter->vfs_allocated_count) {
2239		adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
2240		                           sizeof(struct vf_data_storage),
2241		                           GFP_KERNEL);
2242		/* if allocation failed then we do not support SR-IOV */
2243		if (!adapter->vf_data) {
2244			adapter->vfs_allocated_count = 0;
2245			dev_err(&pdev->dev, "Unable to allocate memory for VF "
2246			        "Data Storage\n");
2247		}
2248	}
2249
2250	if (pci_enable_sriov(pdev, adapter->vfs_allocated_count)) {
2251		kfree(adapter->vf_data);
2252		adapter->vf_data = NULL;
2253#endif /* CONFIG_PCI_IOV */
2254		adapter->vfs_allocated_count = 0;
2255#ifdef CONFIG_PCI_IOV
2256	} else {
2257		unsigned char mac_addr[ETH_ALEN];
2258		int i;
2259		dev_info(&pdev->dev, "%d vfs allocated\n",
2260		         adapter->vfs_allocated_count);
2261		for (i = 0; i < adapter->vfs_allocated_count; i++) {
2262			random_ether_addr(mac_addr);
2263			igb_set_vf_mac(adapter, i, mac_addr);
2264		}
2265		/* DMA Coalescing is not supported in IOV mode. */
2266		if (adapter->flags & IGB_FLAG_DMAC)
2267			adapter->flags &= ~IGB_FLAG_DMAC;
2268	}
2269#endif /* CONFIG_PCI_IOV */
2270}
2271
2272
2273/**
2274 * igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp
2275 * @adapter: board private structure to initialize
2276 *
2277 * igb_init_hw_timer initializes the function pointer and values for the hw
2278 * timer found in hardware.
2279 **/
2280static void igb_init_hw_timer(struct igb_adapter *adapter)
2281{
2282	struct e1000_hw *hw = &adapter->hw;
2283
2284	switch (hw->mac.type) {
2285	case e1000_i350:
2286	case e1000_82580:
2287		memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2288		adapter->cycles.read = igb_read_clock;
2289		adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2290		adapter->cycles.mult = 1;
2291		/*
2292		 * The 82580 timesync updates the system timer every 8ns by 8ns
2293		 * and the value cannot be shifted.  Instead we need to shift
2294		 * the registers to generate a 64bit timer value.  As a result
2295		 * SYSTIMR/L/H, TXSTMPL/H, RXSTMPL/H all have to be shifted by
2296		 * 24 in order to generate a larger value for synchronization.
2297		 */
2298		adapter->cycles.shift = IGB_82580_TSYNC_SHIFT;
2299		/* disable system timer temporarily by setting bit 31 */
2300		wr32(E1000_TSAUXC, 0x80000000);
2301		wrfl();
2302
2303		/* Set registers so that rollover occurs soon to test this. */
2304		wr32(E1000_SYSTIMR, 0x00000000);
2305		wr32(E1000_SYSTIML, 0x80000000);
2306		wr32(E1000_SYSTIMH, 0x000000FF);
2307		wrfl();
2308
2309		/* enable system timer by clearing bit 31 */
2310		wr32(E1000_TSAUXC, 0x0);
2311		wrfl();
2312
2313		timecounter_init(&adapter->clock,
2314				 &adapter->cycles,
2315				 ktime_to_ns(ktime_get_real()));
2316		/*
2317		 * Synchronize our NIC clock against system wall clock. NIC
2318		 * time stamp reading requires ~3us per sample, each sample
2319		 * was pretty stable even under load => only require 10
2320		 * samples for each offset comparison.
2321		 */
2322		memset(&adapter->compare, 0, sizeof(adapter->compare));
2323		adapter->compare.source = &adapter->clock;
2324		adapter->compare.target = ktime_get_real;
2325		adapter->compare.num_samples = 10;
2326		timecompare_update(&adapter->compare, 0);
2327		break;
2328	case e1000_82576:
2329		/*
2330		 * Initialize hardware timer: we keep it running just in case
2331		 * that some program needs it later on.
2332		 */
2333		memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2334		adapter->cycles.read = igb_read_clock;
2335		adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2336		adapter->cycles.mult = 1;
2337		/**
2338		 * Scale the NIC clock cycle by a large factor so that
2339		 * relatively small clock corrections can be added or
2340		 * subtracted at each clock tick. The drawbacks of a large
2341		 * factor are a) that the clock register overflows more quickly
2342		 * (not such a big deal) and b) that the increment per tick has
2343		 * to fit into 24 bits.  As a result we need to use a shift of
2344		 * 19 so we can fit a value of 16 into the TIMINCA register.
2345		 */
2346		adapter->cycles.shift = IGB_82576_TSYNC_SHIFT;
2347		wr32(E1000_TIMINCA,
2348		                (1 << E1000_TIMINCA_16NS_SHIFT) |
2349		                (16 << IGB_82576_TSYNC_SHIFT));
2350
2351		/* Set registers so that rollover occurs soon to test this. */
2352		wr32(E1000_SYSTIML, 0x00000000);
2353		wr32(E1000_SYSTIMH, 0xFF800000);
2354		wrfl();
2355
2356		timecounter_init(&adapter->clock,
2357				 &adapter->cycles,
2358				 ktime_to_ns(ktime_get_real()));
2359		/*
2360		 * Synchronize our NIC clock against system wall clock. NIC
2361		 * time stamp reading requires ~3us per sample, each sample
2362		 * was pretty stable even under load => only require 10
2363		 * samples for each offset comparison.
2364		 */
2365		memset(&adapter->compare, 0, sizeof(adapter->compare));
2366		adapter->compare.source = &adapter->clock;
2367		adapter->compare.target = ktime_get_real;
2368		adapter->compare.num_samples = 10;
2369		timecompare_update(&adapter->compare, 0);
2370		break;
2371	case e1000_82575:
2372		/* 82575 does not support timesync */
2373	default:
2374		break;
2375	}
2376
2377}
2378
2379/**
2380 * igb_sw_init - Initialize general software structures (struct igb_adapter)
2381 * @adapter: board private structure to initialize
2382 *
2383 * igb_sw_init initializes the Adapter private data structure.
2384 * Fields are initialized based on PCI device information and
2385 * OS network device settings (MTU size).
2386 **/
2387static int __devinit igb_sw_init(struct igb_adapter *adapter)
2388{
2389	struct e1000_hw *hw = &adapter->hw;
2390	struct net_device *netdev = adapter->netdev;
2391	struct pci_dev *pdev = adapter->pdev;
2392
2393	pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
2394
2395	adapter->tx_ring_count = IGB_DEFAULT_TXD;
2396	adapter->rx_ring_count = IGB_DEFAULT_RXD;
2397	adapter->rx_itr_setting = IGB_DEFAULT_ITR;
2398	adapter->tx_itr_setting = IGB_DEFAULT_ITR;
2399
2400	adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
2401	adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
2402
2403	spin_lock_init(&adapter->stats64_lock);
2404#ifdef CONFIG_PCI_IOV
2405	switch (hw->mac.type) {
2406	case e1000_82576:
2407	case e1000_i350:
2408		if (max_vfs > 7) {
2409			dev_warn(&pdev->dev,
2410				 "Maximum of 7 VFs per PF, using max\n");
2411			adapter->vfs_allocated_count = 7;
2412		} else
2413			adapter->vfs_allocated_count = max_vfs;
2414		break;
2415	default:
2416		break;
2417	}
2418#endif /* CONFIG_PCI_IOV */
2419	adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
2420	/* i350 cannot do RSS and SR-IOV at the same time */
2421	if (hw->mac.type == e1000_i350 && adapter->vfs_allocated_count)
2422		adapter->rss_queues = 1;
2423
2424	/*
2425	 * if rss_queues > 4 or vfs are going to be allocated with rss_queues
2426	 * then we should combine the queues into a queue pair in order to
2427	 * conserve interrupts due to limited supply
2428	 */
2429	if ((adapter->rss_queues > 4) ||
2430	    ((adapter->rss_queues > 1) && (adapter->vfs_allocated_count > 6)))
2431		adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
2432
2433	/* This call may decrease the number of queues */
2434	if (igb_init_interrupt_scheme(adapter)) {
2435		dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
2436		return -ENOMEM;
2437	}
2438
2439	igb_probe_vfs(adapter);
2440
2441	/* Explicitly disable IRQ since the NIC can be in any state. */
2442	igb_irq_disable(adapter);
2443
2444	if (hw->mac.type == e1000_i350)
2445		adapter->flags &= ~IGB_FLAG_DMAC;
2446
2447	set_bit(__IGB_DOWN, &adapter->state);
2448	return 0;
2449}
2450
2451/**
2452 * igb_open - Called when a network interface is made active
2453 * @netdev: network interface device structure
2454 *
2455 * Returns 0 on success, negative value on failure
2456 *
2457 * The open entry point is called when a network interface is made
2458 * active by the system (IFF_UP).  At this point all resources needed
2459 * for transmit and receive operations are allocated, the interrupt
2460 * handler is registered with the OS, the watchdog timer is started,
2461 * and the stack is notified that the interface is ready.
2462 **/
2463static int igb_open(struct net_device *netdev)
2464{
2465	struct igb_adapter *adapter = netdev_priv(netdev);
2466	struct e1000_hw *hw = &adapter->hw;
2467	int err;
2468	int i;
2469
2470	/* disallow open during test */
2471	if (test_bit(__IGB_TESTING, &adapter->state))
2472		return -EBUSY;
2473
2474	netif_carrier_off(netdev);
2475
2476	/* allocate transmit descriptors */
2477	err = igb_setup_all_tx_resources(adapter);
2478	if (err)
2479		goto err_setup_tx;
2480
2481	/* allocate receive descriptors */
2482	err = igb_setup_all_rx_resources(adapter);
2483	if (err)
2484		goto err_setup_rx;
2485
2486	igb_power_up_link(adapter);
2487
2488	/* before we allocate an interrupt, we must be ready to handle it.
2489	 * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2490	 * as soon as we call pci_request_irq, so we have to setup our
2491	 * clean_rx handler before we do so.  */
2492	igb_configure(adapter);
2493
2494	err = igb_request_irq(adapter);
2495	if (err)
2496		goto err_req_irq;
2497
2498	/* From here on the code is the same as igb_up() */
2499	clear_bit(__IGB_DOWN, &adapter->state);
2500
2501	for (i = 0; i < adapter->num_q_vectors; i++) {
2502		struct igb_q_vector *q_vector = adapter->q_vector[i];
2503		napi_enable(&q_vector->napi);
2504	}
2505
2506	/* Clear any pending interrupts. */
2507	rd32(E1000_ICR);
2508
2509	igb_irq_enable(adapter);
2510
2511	/* notify VFs that reset has been completed */
2512	if (adapter->vfs_allocated_count) {
2513		u32 reg_data = rd32(E1000_CTRL_EXT);
2514		reg_data |= E1000_CTRL_EXT_PFRSTD;
2515		wr32(E1000_CTRL_EXT, reg_data);
2516	}
2517
2518	netif_tx_start_all_queues(netdev);
2519
2520	/* start the watchdog. */
2521	hw->mac.get_link_status = 1;
2522	schedule_work(&adapter->watchdog_task);
2523
2524	return 0;
2525
2526err_req_irq:
2527	igb_release_hw_control(adapter);
2528	igb_power_down_link(adapter);
2529	igb_free_all_rx_resources(adapter);
2530err_setup_rx:
2531	igb_free_all_tx_resources(adapter);
2532err_setup_tx:
2533	igb_reset(adapter);
2534
2535	return err;
2536}
2537
2538/**
2539 * igb_close - Disables a network interface
2540 * @netdev: network interface device structure
2541 *
2542 * Returns 0, this is not allowed to fail
2543 *
2544 * The close entry point is called when an interface is de-activated
2545 * by the OS.  The hardware is still under the driver's control, but
2546 * needs to be disabled.  A global MAC reset is issued to stop the
2547 * hardware, and all transmit and receive resources are freed.
2548 **/
2549static int igb_close(struct net_device *netdev)
2550{
2551	struct igb_adapter *adapter = netdev_priv(netdev);
2552
2553	WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
2554	igb_down(adapter);
2555
2556	igb_free_irq(adapter);
2557
2558	igb_free_all_tx_resources(adapter);
2559	igb_free_all_rx_resources(adapter);
2560
2561	return 0;
2562}
2563
2564/**
2565 * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2566 * @tx_ring: tx descriptor ring (for a specific queue) to setup
2567 *
2568 * Return 0 on success, negative on failure
2569 **/
2570int igb_setup_tx_resources(struct igb_ring *tx_ring)
2571{
2572	struct device *dev = tx_ring->dev;
2573	int size;
2574
2575	size = sizeof(struct igb_buffer) * tx_ring->count;
2576	tx_ring->buffer_info = vzalloc(size);
2577	if (!tx_ring->buffer_info)
2578		goto err;
2579
2580	/* round up to nearest 4K */
2581	tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2582	tx_ring->size = ALIGN(tx_ring->size, 4096);
2583
2584	tx_ring->desc = dma_alloc_coherent(dev,
2585					   tx_ring->size,
2586					   &tx_ring->dma,
2587					   GFP_KERNEL);
2588
2589	if (!tx_ring->desc)
2590		goto err;
2591
2592	tx_ring->next_to_use = 0;
2593	tx_ring->next_to_clean = 0;
2594	return 0;
2595
2596err:
2597	vfree(tx_ring->buffer_info);
2598	dev_err(dev,
2599		"Unable to allocate memory for the transmit descriptor ring\n");
2600	return -ENOMEM;
2601}
2602
2603/**
2604 * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2605 *				  (Descriptors) for all queues
2606 * @adapter: board private structure
2607 *
2608 * Return 0 on success, negative on failure
2609 **/
2610static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2611{
2612	struct pci_dev *pdev = adapter->pdev;
2613	int i, err = 0;
2614
2615	for (i = 0; i < adapter->num_tx_queues; i++) {
2616		err = igb_setup_tx_resources(adapter->tx_ring[i]);
2617		if (err) {
2618			dev_err(&pdev->dev,
2619				"Allocation for Tx Queue %u failed\n", i);
2620			for (i--; i >= 0; i--)
2621				igb_free_tx_resources(adapter->tx_ring[i]);
2622			break;
2623		}
2624	}
2625
2626	for (i = 0; i < IGB_ABS_MAX_TX_QUEUES; i++) {
2627		int r_idx = i % adapter->num_tx_queues;
2628		adapter->multi_tx_table[i] = adapter->tx_ring[r_idx];
2629	}
2630	return err;
2631}
2632
2633/**
2634 * igb_setup_tctl - configure the transmit control registers
2635 * @adapter: Board private structure
2636 **/
2637void igb_setup_tctl(struct igb_adapter *adapter)
2638{
2639	struct e1000_hw *hw = &adapter->hw;
2640	u32 tctl;
2641
2642	/* disable queue 0 which is enabled by default on 82575 and 82576 */
2643	wr32(E1000_TXDCTL(0), 0);
2644
2645	/* Program the Transmit Control Register */
2646	tctl = rd32(E1000_TCTL);
2647	tctl &= ~E1000_TCTL_CT;
2648	tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2649		(E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2650
2651	igb_config_collision_dist(hw);
2652
2653	/* Enable transmits */
2654	tctl |= E1000_TCTL_EN;
2655
2656	wr32(E1000_TCTL, tctl);
2657}
2658
2659/**
2660 * igb_configure_tx_ring - Configure transmit ring after Reset
2661 * @adapter: board private structure
2662 * @ring: tx ring to configure
2663 *
2664 * Configure a transmit ring after a reset.
2665 **/
2666void igb_configure_tx_ring(struct igb_adapter *adapter,
2667                           struct igb_ring *ring)
2668{
2669	struct e1000_hw *hw = &adapter->hw;
2670	u32 txdctl;
2671	u64 tdba = ring->dma;
2672	int reg_idx = ring->reg_idx;
2673
2674	/* disable the queue */
2675	txdctl = rd32(E1000_TXDCTL(reg_idx));
2676	wr32(E1000_TXDCTL(reg_idx),
2677	                txdctl & ~E1000_TXDCTL_QUEUE_ENABLE);
2678	wrfl();
2679	mdelay(10);
2680
2681	wr32(E1000_TDLEN(reg_idx),
2682	                ring->count * sizeof(union e1000_adv_tx_desc));
2683	wr32(E1000_TDBAL(reg_idx),
2684	                tdba & 0x00000000ffffffffULL);
2685	wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2686
2687	ring->head = hw->hw_addr + E1000_TDH(reg_idx);
2688	ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2689	writel(0, ring->head);
2690	writel(0, ring->tail);
2691
2692	txdctl |= IGB_TX_PTHRESH;
2693	txdctl |= IGB_TX_HTHRESH << 8;
2694	txdctl |= IGB_TX_WTHRESH << 16;
2695
2696	txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2697	wr32(E1000_TXDCTL(reg_idx), txdctl);
2698}
2699
2700/**
2701 * igb_configure_tx - Configure transmit Unit after Reset
2702 * @adapter: board private structure
2703 *
2704 * Configure the Tx unit of the MAC after a reset.
2705 **/
2706static void igb_configure_tx(struct igb_adapter *adapter)
2707{
2708	int i;
2709
2710	for (i = 0; i < adapter->num_tx_queues; i++)
2711		igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
2712}
2713
2714/**
2715 * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2716 * @rx_ring:    rx descriptor ring (for a specific queue) to setup
2717 *
2718 * Returns 0 on success, negative on failure
2719 **/
2720int igb_setup_rx_resources(struct igb_ring *rx_ring)
2721{
2722	struct device *dev = rx_ring->dev;
2723	int size, desc_len;
2724
2725	size = sizeof(struct igb_buffer) * rx_ring->count;
2726	rx_ring->buffer_info = vzalloc(size);
2727	if (!rx_ring->buffer_info)
2728		goto err;
2729
2730	desc_len = sizeof(union e1000_adv_rx_desc);
2731
2732	/* Round up to nearest 4K */
2733	rx_ring->size = rx_ring->count * desc_len;
2734	rx_ring->size = ALIGN(rx_ring->size, 4096);
2735
2736	rx_ring->desc = dma_alloc_coherent(dev,
2737					   rx_ring->size,
2738					   &rx_ring->dma,
2739					   GFP_KERNEL);
2740
2741	if (!rx_ring->desc)
2742		goto err;
2743
2744	rx_ring->next_to_clean = 0;
2745	rx_ring->next_to_use = 0;
2746
2747	return 0;
2748
2749err:
2750	vfree(rx_ring->buffer_info);
2751	rx_ring->buffer_info = NULL;
2752	dev_err(dev, "Unable to allocate memory for the receive descriptor"
2753		" ring\n");
2754	return -ENOMEM;
2755}
2756
2757/**
2758 * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2759 *				  (Descriptors) for all queues
2760 * @adapter: board private structure
2761 *
2762 * Return 0 on success, negative on failure
2763 **/
2764static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2765{
2766	struct pci_dev *pdev = adapter->pdev;
2767	int i, err = 0;
2768
2769	for (i = 0; i < adapter->num_rx_queues; i++) {
2770		err = igb_setup_rx_resources(adapter->rx_ring[i]);
2771		if (err) {
2772			dev_err(&pdev->dev,
2773				"Allocation for Rx Queue %u failed\n", i);
2774			for (i--; i >= 0; i--)
2775				igb_free_rx_resources(adapter->rx_ring[i]);
2776			break;
2777		}
2778	}
2779
2780	return err;
2781}
2782
2783/**
2784 * igb_setup_mrqc - configure the multiple receive queue control registers
2785 * @adapter: Board private structure
2786 **/
2787static void igb_setup_mrqc(struct igb_adapter *adapter)
2788{
2789	struct e1000_hw *hw = &adapter->hw;
2790	u32 mrqc, rxcsum;
2791	u32 j, num_rx_queues, shift = 0, shift2 = 0;
2792	union e1000_reta {
2793		u32 dword;
2794		u8  bytes[4];
2795	} reta;
2796	static const u8 rsshash[40] = {
2797		0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2798		0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2799		0xae, 0x7b, 0x30, 0xb4,	0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2800		0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2801
2802	/* Fill out hash function seeds */
2803	for (j = 0; j < 10; j++) {
2804		u32 rsskey = rsshash[(j * 4)];
2805		rsskey |= rsshash[(j * 4) + 1] << 8;
2806		rsskey |= rsshash[(j * 4) + 2] << 16;
2807		rsskey |= rsshash[(j * 4) + 3] << 24;
2808		array_wr32(E1000_RSSRK(0), j, rsskey);
2809	}
2810
2811	num_rx_queues = adapter->rss_queues;
2812
2813	if (adapter->vfs_allocated_count) {
2814		/* 82575 and 82576 supports 2 RSS queues for VMDq */
2815		switch (hw->mac.type) {
2816		case e1000_i350:
2817		case e1000_82580:
2818			num_rx_queues = 1;
2819			shift = 0;
2820			break;
2821		case e1000_82576:
2822			shift = 3;
2823			num_rx_queues = 2;
2824			break;
2825		case e1000_82575:
2826			shift = 2;
2827			shift2 = 6;
2828		default:
2829			break;
2830		}
2831	} else {
2832		if (hw->mac.type == e1000_82575)
2833			shift = 6;
2834	}
2835
2836	for (j = 0; j < (32 * 4); j++) {
2837		reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2838		if (shift2)
2839			reta.bytes[j & 3] |= num_rx_queues << shift2;
2840		if ((j & 3) == 3)
2841			wr32(E1000_RETA(j >> 2), reta.dword);
2842	}
2843
2844	/*
2845	 * Disable raw packet checksumming so that RSS hash is placed in
2846	 * descriptor on writeback.  No need to enable TCP/UDP/IP checksum
2847	 * offloads as they are enabled by default
2848	 */
2849	rxcsum = rd32(E1000_RXCSUM);
2850	rxcsum |= E1000_RXCSUM_PCSD;
2851
2852	if (adapter->hw.mac.type >= e1000_82576)
2853		/* Enable Receive Checksum Offload for SCTP */
2854		rxcsum |= E1000_RXCSUM_CRCOFL;
2855
2856	/* Don't need to set TUOFL or IPOFL, they default to 1 */
2857	wr32(E1000_RXCSUM, rxcsum);
2858
2859	/* If VMDq is enabled then we set the appropriate mode for that, else
2860	 * we default to RSS so that an RSS hash is calculated per packet even
2861	 * if we are only using one queue */
2862	if (adapter->vfs_allocated_count) {
2863		if (hw->mac.type > e1000_82575) {
2864			/* Set the default pool for the PF's first queue */
2865			u32 vtctl = rd32(E1000_VT_CTL);
2866			vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2867				   E1000_VT_CTL_DISABLE_DEF_POOL);
2868			vtctl |= adapter->vfs_allocated_count <<
2869				E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2870			wr32(E1000_VT_CTL, vtctl);
2871		}
2872		if (adapter->rss_queues > 1)
2873			mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2874		else
2875			mrqc = E1000_MRQC_ENABLE_VMDQ;
2876	} else {
2877		mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2878	}
2879	igb_vmm_control(adapter);
2880
2881	/*
2882	 * Generate RSS hash based on TCP port numbers and/or
2883	 * IPv4/v6 src and dst addresses since UDP cannot be
2884	 * hashed reliably due to IP fragmentation
2885	 */
2886	mrqc |= E1000_MRQC_RSS_FIELD_IPV4 |
2887		E1000_MRQC_RSS_FIELD_IPV4_TCP |
2888		E1000_MRQC_RSS_FIELD_IPV6 |
2889		E1000_MRQC_RSS_FIELD_IPV6_TCP |
2890		E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
2891
2892	wr32(E1000_MRQC, mrqc);
2893}
2894
2895/**
2896 * igb_setup_rctl - configure the receive control registers
2897 * @adapter: Board private structure
2898 **/
2899void igb_setup_rctl(struct igb_adapter *adapter)
2900{
2901	struct e1000_hw *hw = &adapter->hw;
2902	u32 rctl;
2903
2904	rctl = rd32(E1000_RCTL);
2905
2906	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2907	rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2908
2909	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2910		(hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2911
2912	/*
2913	 * enable stripping of CRC. It's unlikely this will break BMC
2914	 * redirection as it did with e1000. Newer features require
2915	 * that the HW strips the CRC.
2916	 */
2917	rctl |= E1000_RCTL_SECRC;
2918
2919	/* disable store bad packets and clear size bits. */
2920	rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
2921
2922	/* enable LPE to prevent packets larger than max_frame_size */
2923	rctl |= E1000_RCTL_LPE;
2924
2925	/* disable queue 0 to prevent tail write w/o re-config */
2926	wr32(E1000_RXDCTL(0), 0);
2927
2928	/* Attention!!!  For SR-IOV PF driver operations you must enable
2929	 * queue drop for all VF and PF queues to prevent head of line blocking
2930	 * if an un-trusted VF does not provide descriptors to hardware.
2931	 */
2932	if (adapter->vfs_allocated_count) {
2933		/* set all queue drop enable bits */
2934		wr32(E1000_QDE, ALL_QUEUES);
2935	}
2936
2937	wr32(E1000_RCTL, rctl);
2938}
2939
2940static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
2941                                   int vfn)
2942{
2943	struct e1000_hw *hw = &adapter->hw;
2944	u32 vmolr;
2945
2946	/* if it isn't the PF check to see if VFs are enabled and
2947	 * increase the size to support vlan tags */
2948	if (vfn < adapter->vfs_allocated_count &&
2949	    adapter->vf_data[vfn].vlans_enabled)
2950		size += VLAN_TAG_SIZE;
2951
2952	vmolr = rd32(E1000_VMOLR(vfn));
2953	vmolr &= ~E1000_VMOLR_RLPML_MASK;
2954	vmolr |= size | E1000_VMOLR_LPE;
2955	wr32(E1000_VMOLR(vfn), vmolr);
2956
2957	return 0;
2958}
2959
2960/**
2961 * igb_rlpml_set - set maximum receive packet size
2962 * @adapter: board private structure
2963 *
2964 * Configure maximum receivable packet size.
2965 **/
2966static void igb_rlpml_set(struct igb_adapter *adapter)
2967{
2968	u32 max_frame_size;
2969	struct e1000_hw *hw = &adapter->hw;
2970	u16 pf_id = adapter->vfs_allocated_count;
2971
2972	max_frame_size = adapter->max_frame_size + VLAN_TAG_SIZE;
2973
2974	/* if vfs are enabled we set RLPML to the largest possible request
2975	 * size and set the VMOLR RLPML to the size we need */
2976	if (pf_id) {
2977		igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
2978		max_frame_size = MAX_JUMBO_FRAME_SIZE;
2979	}
2980
2981	wr32(E1000_RLPML, max_frame_size);
2982}
2983
2984static inline void igb_set_vmolr(struct igb_adapter *adapter,
2985				 int vfn, bool aupe)
2986{
2987	struct e1000_hw *hw = &adapter->hw;
2988	u32 vmolr;
2989
2990	/*
2991	 * This register exists only on 82576 and newer so if we are older then
2992	 * we should exit and do nothing
2993	 */
2994	if (hw->mac.type < e1000_82576)
2995		return;
2996
2997	vmolr = rd32(E1000_VMOLR(vfn));
2998	vmolr |= E1000_VMOLR_STRVLAN;      /* Strip vlan tags */
2999	if (aupe)
3000		vmolr |= E1000_VMOLR_AUPE;        /* Accept untagged packets */
3001	else
3002		vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
3003
3004	/* clear all bits that might not be set */
3005	vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
3006
3007	if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
3008		vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
3009	/*
3010	 * for VMDq only allow the VFs and pool 0 to accept broadcast and
3011	 * multicast packets
3012	 */
3013	if (vfn <= adapter->vfs_allocated_count)
3014		vmolr |= E1000_VMOLR_BAM;	   /* Accept broadcast */
3015
3016	wr32(E1000_VMOLR(vfn), vmolr);
3017}
3018
3019/**
3020 * igb_configure_rx_ring - Configure a receive ring after Reset
3021 * @adapter: board private structure
3022 * @ring: receive ring to be configured
3023 *
3024 * Configure the Rx unit of the MAC after a reset.
3025 **/
3026void igb_configure_rx_ring(struct igb_adapter *adapter,
3027                           struct igb_ring *ring)
3028{
3029	struct e1000_hw *hw = &adapter->hw;
3030	u64 rdba = ring->dma;
3031	int reg_idx = ring->reg_idx;
3032	u32 srrctl, rxdctl;
3033
3034	/* disable the queue */
3035	rxdctl = rd32(E1000_RXDCTL(reg_idx));
3036	wr32(E1000_RXDCTL(reg_idx),
3037	                rxdctl & ~E1000_RXDCTL_QUEUE_ENABLE);
3038
3039	/* Set DMA base address registers */
3040	wr32(E1000_RDBAL(reg_idx),
3041	     rdba & 0x00000000ffffffffULL);
3042	wr32(E1000_RDBAH(reg_idx), rdba >> 32);
3043	wr32(E1000_RDLEN(reg_idx),
3044	               ring->count * sizeof(union e1000_adv_rx_desc));
3045
3046	/* initialize head and tail */
3047	ring->head = hw->hw_addr + E1000_RDH(reg_idx);
3048	ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
3049	writel(0, ring->head);
3050	writel(0, ring->tail);
3051
3052	/* set descriptor configuration */
3053	if (ring->rx_buffer_len < IGB_RXBUFFER_1024) {
3054		srrctl = ALIGN(ring->rx_buffer_len, 64) <<
3055		         E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
3056#if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
3057		srrctl |= IGB_RXBUFFER_16384 >>
3058		          E1000_SRRCTL_BSIZEPKT_SHIFT;
3059#else
3060		srrctl |= (PAGE_SIZE / 2) >>
3061		          E1000_SRRCTL_BSIZEPKT_SHIFT;
3062#endif
3063		srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3064	} else {
3065		srrctl = ALIGN(ring->rx_buffer_len, 1024) >>
3066		         E1000_SRRCTL_BSIZEPKT_SHIFT;
3067		srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
3068	}
3069	if (hw->mac.type == e1000_82580)
3070		srrctl |= E1000_SRRCTL_TIMESTAMP;
3071	/* Only set Drop Enable if we are supporting multiple queues */
3072	if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
3073		srrctl |= E1000_SRRCTL_DROP_EN;
3074
3075	wr32(E1000_SRRCTL(reg_idx), srrctl);
3076
3077	/* set filtering for VMDQ pools */
3078	igb_set_vmolr(adapter, reg_idx & 0x7, true);
3079
3080	/* enable receive descriptor fetching */
3081	rxdctl = rd32(E1000_RXDCTL(reg_idx));
3082	rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
3083	rxdctl &= 0xFFF00000;
3084	rxdctl |= IGB_RX_PTHRESH;
3085	rxdctl |= IGB_RX_HTHRESH << 8;
3086	rxdctl |= IGB_RX_WTHRESH << 16;
3087	wr32(E1000_RXDCTL(reg_idx), rxdctl);
3088}
3089
3090/**
3091 * igb_configure_rx - Configure receive Unit after Reset
3092 * @adapter: board private structure
3093 *
3094 * Configure the Rx unit of the MAC after a reset.
3095 **/
3096static void igb_configure_rx(struct igb_adapter *adapter)
3097{
3098	int i;
3099
3100	/* set UTA to appropriate mode */
3101	igb_set_uta(adapter);
3102
3103	/* set the correct pool for the PF default MAC address in entry 0 */
3104	igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
3105	                 adapter->vfs_allocated_count);
3106
3107	/* Setup the HW Rx Head and Tail Descriptor Pointers and
3108	 * the Base and Length of the Rx Descriptor Ring */
3109	for (i = 0; i < adapter->num_rx_queues; i++)
3110		igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
3111}
3112
3113/**
3114 * igb_free_tx_resources - Free Tx Resources per Queue
3115 * @tx_ring: Tx descriptor ring for a specific queue
3116 *
3117 * Free all transmit software resources
3118 **/
3119void igb_free_tx_resources(struct igb_ring *tx_ring)
3120{
3121	igb_clean_tx_ring(tx_ring);
3122
3123	vfree(tx_ring->buffer_info);
3124	tx_ring->buffer_info = NULL;
3125
3126	/* if not set, then don't free */
3127	if (!tx_ring->desc)
3128		return;
3129
3130	dma_free_coherent(tx_ring->dev, tx_ring->size,
3131			  tx_ring->desc, tx_ring->dma);
3132
3133	tx_ring->desc = NULL;
3134}
3135
3136/**
3137 * igb_free_all_tx_resources - Free Tx Resources for All Queues
3138 * @adapter: board private structure
3139 *
3140 * Free all transmit software resources
3141 **/
3142static void igb_free_all_tx_resources(struct igb_adapter *adapter)
3143{
3144	int i;
3145
3146	for (i = 0; i < adapter->num_tx_queues; i++)
3147		igb_free_tx_resources(adapter->tx_ring[i]);
3148}
3149
3150void igb_unmap_and_free_tx_resource(struct igb_ring *tx_ring,
3151				    struct igb_buffer *buffer_info)
3152{
3153	if (buffer_info->dma) {
3154		if (buffer_info->mapped_as_page)
3155			dma_unmap_page(tx_ring->dev,
3156					buffer_info->dma,
3157					buffer_info->length,
3158					DMA_TO_DEVICE);
3159		else
3160			dma_unmap_single(tx_ring->dev,
3161					buffer_info->dma,
3162					buffer_info->length,
3163					DMA_TO_DEVICE);
3164		buffer_info->dma = 0;
3165	}
3166	if (buffer_info->skb) {
3167		dev_kfree_skb_any(buffer_info->skb);
3168		buffer_info->skb = NULL;
3169	}
3170	buffer_info->time_stamp = 0;
3171	buffer_info->length = 0;
3172	buffer_info->next_to_watch = 0;
3173	buffer_info->mapped_as_page = false;
3174}
3175
3176/**
3177 * igb_clean_tx_ring - Free Tx Buffers
3178 * @tx_ring: ring to be cleaned
3179 **/
3180static void igb_clean_tx_ring(struct igb_ring *tx_ring)
3181{
3182	struct igb_buffer *buffer_info;
3183	unsigned long size;
3184	unsigned int i;
3185
3186	if (!tx_ring->buffer_info)
3187		return;
3188	/* Free all the Tx ring sk_buffs */
3189
3190	for (i = 0; i < tx_ring->count; i++) {
3191		buffer_info = &tx_ring->buffer_info[i];
3192		igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3193	}
3194
3195	size = sizeof(struct igb_buffer) * tx_ring->count;
3196	memset(tx_ring->buffer_info, 0, size);
3197
3198	/* Zero out the descriptor ring */
3199	memset(tx_ring->desc, 0, tx_ring->size);
3200
3201	tx_ring->next_to_use = 0;
3202	tx_ring->next_to_clean = 0;
3203}
3204
3205/**
3206 * igb_clean_all_tx_rings - Free Tx Buffers for all queues
3207 * @adapter: board private structure
3208 **/
3209static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
3210{
3211	int i;
3212
3213	for (i = 0; i < adapter->num_tx_queues; i++)
3214		igb_clean_tx_ring(adapter->tx_ring[i]);
3215}
3216
3217/**
3218 * igb_free_rx_resources - Free Rx Resources
3219 * @rx_ring: ring to clean the resources from
3220 *
3221 * Free all receive software resources
3222 **/
3223void igb_free_rx_resources(struct igb_ring *rx_ring)
3224{
3225	igb_clean_rx_ring(rx_ring);
3226
3227	vfree(rx_ring->buffer_info);
3228	rx_ring->buffer_info = NULL;
3229
3230	/* if not set, then don't free */
3231	if (!rx_ring->desc)
3232		return;
3233
3234	dma_free_coherent(rx_ring->dev, rx_ring->size,
3235			  rx_ring->desc, rx_ring->dma);
3236
3237	rx_ring->desc = NULL;
3238}
3239
3240/**
3241 * igb_free_all_rx_resources - Free Rx Resources for All Queues
3242 * @adapter: board private structure
3243 *
3244 * Free all receive software resources
3245 **/
3246static void igb_free_all_rx_resources(struct igb_adapter *adapter)
3247{
3248	int i;
3249
3250	for (i = 0; i < adapter->num_rx_queues; i++)
3251		igb_free_rx_resources(adapter->rx_ring[i]);
3252}
3253
3254/**
3255 * igb_clean_rx_ring - Free Rx Buffers per Queue
3256 * @rx_ring: ring to free buffers from
3257 **/
3258static void igb_clean_rx_ring(struct igb_ring *rx_ring)
3259{
3260	struct igb_buffer *buffer_info;
3261	unsigned long size;
3262	unsigned int i;
3263
3264	if (!rx_ring->buffer_info)
3265		return;
3266
3267	/* Free all the Rx ring sk_buffs */
3268	for (i = 0; i < rx_ring->count; i++) {
3269		buffer_info = &rx_ring->buffer_info[i];
3270		if (buffer_info->dma) {
3271			dma_unmap_single(rx_ring->dev,
3272			                 buffer_info->dma,
3273					 rx_ring->rx_buffer_len,
3274					 DMA_FROM_DEVICE);
3275			buffer_info->dma = 0;
3276		}
3277
3278		if (buffer_info->skb) {
3279			dev_kfree_skb(buffer_info->skb);
3280			buffer_info->skb = NULL;
3281		}
3282		if (buffer_info->page_dma) {
3283			dma_unmap_page(rx_ring->dev,
3284			               buffer_info->page_dma,
3285				       PAGE_SIZE / 2,
3286				       DMA_FROM_DEVICE);
3287			buffer_info->page_dma = 0;
3288		}
3289		if (buffer_info->page) {
3290			put_page(buffer_info->page);
3291			buffer_info->page = NULL;
3292			buffer_info->page_offset = 0;
3293		}
3294	}
3295
3296	size = sizeof(struct igb_buffer) * rx_ring->count;
3297	memset(rx_ring->buffer_info, 0, size);
3298
3299	/* Zero out the descriptor ring */
3300	memset(rx_ring->desc, 0, rx_ring->size);
3301
3302	rx_ring->next_to_clean = 0;
3303	rx_ring->next_to_use = 0;
3304}
3305
3306/**
3307 * igb_clean_all_rx_rings - Free Rx Buffers for all queues
3308 * @adapter: board private structure
3309 **/
3310static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
3311{
3312	int i;
3313
3314	for (i = 0; i < adapter->num_rx_queues; i++)
3315		igb_clean_rx_ring(adapter->rx_ring[i]);
3316}
3317
3318/**
3319 * igb_set_mac - Change the Ethernet Address of the NIC
3320 * @netdev: network interface device structure
3321 * @p: pointer to an address structure
3322 *
3323 * Returns 0 on success, negative on failure
3324 **/
3325static int igb_set_mac(struct net_device *netdev, void *p)
3326{
3327	struct igb_adapter *adapter = netdev_priv(netdev);
3328	struct e1000_hw *hw = &adapter->hw;
3329	struct sockaddr *addr = p;
3330
3331	if (!is_valid_ether_addr(addr->sa_data))
3332		return -EADDRNOTAVAIL;
3333
3334	memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
3335	memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
3336
3337	/* set the correct pool for the new PF MAC address in entry 0 */
3338	igb_rar_set_qsel(adapter, hw->mac.addr, 0,
3339	                 adapter->vfs_allocated_count);
3340
3341	return 0;
3342}
3343
3344/**
3345 * igb_write_mc_addr_list - write multicast addresses to MTA
3346 * @netdev: network interface device structure
3347 *
3348 * Writes multicast address list to the MTA hash table.
3349 * Returns: -ENOMEM on failure
3350 *                0 on no addresses written
3351 *                X on writing X addresses to MTA
3352 **/
3353static int igb_write_mc_addr_list(struct net_device *netdev)
3354{
3355	struct igb_adapter *adapter = netdev_priv(netdev);
3356	struct e1000_hw *hw = &adapter->hw;
3357	struct netdev_hw_addr *ha;
3358	u8  *mta_list;
3359	int i;
3360
3361	if (netdev_mc_empty(netdev)) {
3362		/* nothing to program, so clear mc list */
3363		igb_update_mc_addr_list(hw, NULL, 0);
3364		igb_restore_vf_multicasts(adapter);
3365		return 0;
3366	}
3367
3368	mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
3369	if (!mta_list)
3370		return -ENOMEM;
3371
3372	/* The shared function expects a packed array of only addresses. */
3373	i = 0;
3374	netdev_for_each_mc_addr(ha, netdev)
3375		memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
3376
3377	igb_update_mc_addr_list(hw, mta_list, i);
3378	kfree(mta_list);
3379
3380	return netdev_mc_count(netdev);
3381}
3382
3383/**
3384 * igb_write_uc_addr_list - write unicast addresses to RAR table
3385 * @netdev: network interface device structure
3386 *
3387 * Writes unicast address list to the RAR table.
3388 * Returns: -ENOMEM on failure/insufficient address space
3389 *                0 on no addresses written
3390 *                X on writing X addresses to the RAR table
3391 **/
3392static int igb_write_uc_addr_list(struct net_device *netdev)
3393{
3394	struct igb_adapter *adapter = netdev_priv(netdev);
3395	struct e1000_hw *hw = &adapter->hw;
3396	unsigned int vfn = adapter->vfs_allocated_count;
3397	unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
3398	int count = 0;
3399
3400	/* return ENOMEM indicating insufficient memory for addresses */
3401	if (netdev_uc_count(netdev) > rar_entries)
3402		return -ENOMEM;
3403
3404	if (!netdev_uc_empty(netdev) && rar_entries) {
3405		struct netdev_hw_addr *ha;
3406
3407		netdev_for_each_uc_addr(ha, netdev) {
3408			if (!rar_entries)
3409				break;
3410			igb_rar_set_qsel(adapter, ha->addr,
3411			                 rar_entries--,
3412			                 vfn);
3413			count++;
3414		}
3415	}
3416	/* write the addresses in reverse order to avoid write combining */
3417	for (; rar_entries > 0 ; rar_entries--) {
3418		wr32(E1000_RAH(rar_entries), 0);
3419		wr32(E1000_RAL(rar_entries), 0);
3420	}
3421	wrfl();
3422
3423	return count;
3424}
3425
3426/**
3427 * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
3428 * @netdev: network interface device structure
3429 *
3430 * The set_rx_mode entry point is called whenever the unicast or multicast
3431 * address lists or the network interface flags are updated.  This routine is
3432 * responsible for configuring the hardware for proper unicast, multicast,
3433 * promiscuous mode, and all-multi behavior.
3434 **/
3435static void igb_set_rx_mode(struct net_device *netdev)
3436{
3437	struct igb_adapter *adapter = netdev_priv(netdev);
3438	struct e1000_hw *hw = &adapter->hw;
3439	unsigned int vfn = adapter->vfs_allocated_count;
3440	u32 rctl, vmolr = 0;
3441	int count;
3442
3443	/* Check for Promiscuous and All Multicast modes */
3444	rctl = rd32(E1000_RCTL);
3445
3446	/* clear the effected bits */
3447	rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
3448
3449	if (netdev->flags & IFF_PROMISC) {
3450		rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
3451		vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
3452	} else {
3453		if (netdev->flags & IFF_ALLMULTI) {
3454			rctl |= E1000_RCTL_MPE;
3455			vmolr |= E1000_VMOLR_MPME;
3456		} else {
3457			/*
3458			 * Write addresses to the MTA, if the attempt fails
3459			 * then we should just turn on promiscuous mode so
3460			 * that we can at least receive multicast traffic
3461			 */
3462			count = igb_write_mc_addr_list(netdev);
3463			if (count < 0) {
3464				rctl |= E1000_RCTL_MPE;
3465				vmolr |= E1000_VMOLR_MPME;
3466			} else if (count) {
3467				vmolr |= E1000_VMOLR_ROMPE;
3468			}
3469		}
3470		/*
3471		 * Write addresses to available RAR registers, if there is not
3472		 * sufficient space to store all the addresses then enable
3473		 * unicast promiscuous mode
3474		 */
3475		count = igb_write_uc_addr_list(netdev);
3476		if (count < 0) {
3477			rctl |= E1000_RCTL_UPE;
3478			vmolr |= E1000_VMOLR_ROPE;
3479		}
3480		rctl |= E1000_RCTL_VFE;
3481	}
3482	wr32(E1000_RCTL, rctl);
3483
3484	/*
3485	 * In order to support SR-IOV and eventually VMDq it is necessary to set
3486	 * the VMOLR to enable the appropriate modes.  Without this workaround
3487	 * we will have issues with VLAN tag stripping not being done for frames
3488	 * that are only arriving because we are the default pool
3489	 */
3490	if (hw->mac.type < e1000_82576)
3491		return;
3492
3493	vmolr |= rd32(E1000_VMOLR(vfn)) &
3494	         ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
3495	wr32(E1000_VMOLR(vfn), vmolr);
3496	igb_restore_vf_multicasts(adapter);
3497}
3498
3499static void igb_check_wvbr(struct igb_adapter *adapter)
3500{
3501	struct e1000_hw *hw = &adapter->hw;
3502	u32 wvbr = 0;
3503
3504	switch (hw->mac.type) {
3505	case e1000_82576:
3506	case e1000_i350:
3507		if (!(wvbr = rd32(E1000_WVBR)))
3508			return;
3509		break;
3510	default:
3511		break;
3512	}
3513
3514	adapter->wvbr |= wvbr;
3515}
3516
3517#define IGB_STAGGERED_QUEUE_OFFSET 8
3518
3519static void igb_spoof_check(struct igb_adapter *adapter)
3520{
3521	int j;
3522
3523	if (!adapter->wvbr)
3524		return;
3525
3526	for(j = 0; j < adapter->vfs_allocated_count; j++) {
3527		if (adapter->wvbr & (1 << j) ||
3528		    adapter->wvbr & (1 << (j + IGB_STAGGERED_QUEUE_OFFSET))) {
3529			dev_warn(&adapter->pdev->dev,
3530				"Spoof event(s) detected on VF %d\n", j);
3531			adapter->wvbr &=
3532				~((1 << j) |
3533				  (1 << (j + IGB_STAGGERED_QUEUE_OFFSET)));
3534		}
3535	}
3536}
3537
3538/* Need to wait a few seconds after link up to get diagnostic information from
3539 * the phy */
3540static void igb_update_phy_info(unsigned long data)
3541{
3542	struct igb_adapter *adapter = (struct igb_adapter *) data;
3543	igb_get_phy_info(&adapter->hw);
3544}
3545
3546/**
3547 * igb_has_link - check shared code for link and determine up/down
3548 * @adapter: pointer to driver private info
3549 **/
3550bool igb_has_link(struct igb_adapter *adapter)
3551{
3552	struct e1000_hw *hw = &adapter->hw;
3553	bool link_active = false;
3554	s32 ret_val = 0;
3555
3556	/* get_link_status is set on LSC (link status) interrupt or
3557	 * rx sequence error interrupt.  get_link_status will stay
3558	 * false until the e1000_check_for_link establishes link
3559	 * for copper adapters ONLY
3560	 */
3561	switch (hw->phy.media_type) {
3562	case e1000_media_type_copper:
3563		if (hw->mac.get_link_status) {
3564			ret_val = hw->mac.ops.check_for_link(hw);
3565			link_active = !hw->mac.get_link_status;
3566		} else {
3567			link_active = true;
3568		}
3569		break;
3570	case e1000_media_type_internal_serdes:
3571		ret_val = hw->mac.ops.check_for_link(hw);
3572		link_active = hw->mac.serdes_has_link;
3573		break;
3574	default:
3575	case e1000_media_type_unknown:
3576		break;
3577	}
3578
3579	return link_active;
3580}
3581
3582static bool igb_thermal_sensor_event(struct e1000_hw *hw, u32 event)
3583{
3584	bool ret = false;
3585	u32 ctrl_ext, thstat;
3586
3587	/* check for thermal sensor event on i350, copper only */
3588	if (hw->mac.type == e1000_i350) {
3589		thstat = rd32(E1000_THSTAT);
3590		ctrl_ext = rd32(E1000_CTRL_EXT);
3591
3592		if ((hw->phy.media_type == e1000_media_type_copper) &&
3593		    !(ctrl_ext & E1000_CTRL_EXT_LINK_MODE_SGMII)) {
3594			ret = !!(thstat & event);
3595		}
3596	}
3597
3598	return ret;
3599}
3600
3601/**
3602 * igb_watchdog - Timer Call-back
3603 * @data: pointer to adapter cast into an unsigned long
3604 **/
3605static void igb_watchdog(unsigned long data)
3606{
3607	struct igb_adapter *adapter = (struct igb_adapter *)data;
3608	/* Do the rest outside of interrupt context */
3609	schedule_work(&adapter->watchdog_task);
3610}
3611
3612static void igb_watchdog_task(struct work_struct *work)
3613{
3614	struct igb_adapter *adapter = container_of(work,
3615	                                           struct igb_adapter,
3616                                                   watchdog_task);
3617	struct e1000_hw *hw = &adapter->hw;
3618	struct net_device *netdev = adapter->netdev;
3619	u32 link;
3620	int i;
3621
3622	link = igb_has_link(adapter);
3623	if (link) {
3624		if (!netif_carrier_ok(netdev)) {
3625			u32 ctrl;
3626			hw->mac.ops.get_speed_and_duplex(hw,
3627			                                 &adapter->link_speed,
3628			                                 &adapter->link_duplex);
3629
3630			ctrl = rd32(E1000_CTRL);
3631			/* Links status message must follow this format */
3632			printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s, "
3633				 "Flow Control: %s\n",
3634			       netdev->name,
3635			       adapter->link_speed,
3636			       adapter->link_duplex == FULL_DUPLEX ?
3637				 "Full Duplex" : "Half Duplex",
3638			       ((ctrl & E1000_CTRL_TFCE) &&
3639			        (ctrl & E1000_CTRL_RFCE)) ? "RX/TX" :
3640			       ((ctrl & E1000_CTRL_RFCE) ?  "RX" :
3641			       ((ctrl & E1000_CTRL_TFCE) ?  "TX" : "None")));
3642
3643			/* check for thermal sensor event */
3644			if (igb_thermal_sensor_event(hw, E1000_THSTAT_LINK_THROTTLE)) {
3645				printk(KERN_INFO "igb: %s The network adapter "
3646						 "link speed was downshifted "
3647						 "because it overheated.\n",
3648						 netdev->name);
3649			}
3650
3651			/* adjust timeout factor according to speed/duplex */
3652			adapter->tx_timeout_factor = 1;
3653			switch (adapter->link_speed) {
3654			case SPEED_10:
3655				adapter->tx_timeout_factor = 14;
3656				break;
3657			case SPEED_100:
3658				/* maybe add some timeout factor ? */
3659				break;
3660			}
3661
3662			netif_carrier_on(netdev);
3663
3664			igb_ping_all_vfs(adapter);
3665			igb_check_vf_rate_limit(adapter);
3666
3667			/* link state has changed, schedule phy info update */
3668			if (!test_bit(__IGB_DOWN, &adapter->state))
3669				mod_timer(&adapter->phy_info_timer,
3670					  round_jiffies(jiffies + 2 * HZ));
3671		}
3672	} else {
3673		if (netif_carrier_ok(netdev)) {
3674			adapter->link_speed = 0;
3675			adapter->link_duplex = 0;
3676
3677			/* check for thermal sensor event */
3678			if (igb_thermal_sensor_event(hw, E1000_THSTAT_PWR_DOWN)) {
3679				printk(KERN_ERR "igb: %s The network adapter "
3680						"was stopped because it "
3681						"overheated.\n",
3682						netdev->name);
3683			}
3684
3685			/* Links status message must follow this format */
3686			printk(KERN_INFO "igb: %s NIC Link is Down\n",
3687			       netdev->name);
3688			netif_carrier_off(netdev);
3689
3690			igb_ping_all_vfs(adapter);
3691
3692			/* link state has changed, schedule phy info update */
3693			if (!test_bit(__IGB_DOWN, &adapter->state))
3694				mod_timer(&adapter->phy_info_timer,
3695					  round_jiffies(jiffies + 2 * HZ));
3696		}
3697	}
3698
3699	spin_lock(&adapter->stats64_lock);
3700	igb_update_stats(adapter, &adapter->stats64);
3701	spin_unlock(&adapter->stats64_lock);
3702
3703	for (i = 0; i < adapter->num_tx_queues; i++) {
3704		struct igb_ring *tx_ring = adapter->tx_ring[i];
3705		if (!netif_carrier_ok(netdev)) {
3706			/* We've lost link, so the controller stops DMA,
3707			 * but we've got queued Tx work that's never going
3708			 * to get done, so reset controller to flush Tx.
3709			 * (Do the reset outside of interrupt context). */
3710			if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3711				adapter->tx_timeout_count++;
3712				schedule_work(&adapter->reset_task);
3713				/* return immediately since reset is imminent */
3714				return;
3715			}
3716		}
3717
3718		/* Force detection of hung controller every watchdog period */
3719		tx_ring->detect_tx_hung = true;
3720	}
3721
3722	/* Cause software interrupt to ensure rx ring is cleaned */
3723	if (adapter->msix_entries) {
3724		u32 eics = 0;
3725		for (i = 0; i < adapter->num_q_vectors; i++) {
3726			struct igb_q_vector *q_vector = adapter->q_vector[i];
3727			eics |= q_vector->eims_value;
3728		}
3729		wr32(E1000_EICS, eics);
3730	} else {
3731		wr32(E1000_ICS, E1000_ICS_RXDMT0);
3732	}
3733
3734	igb_spoof_check(adapter);
3735
3736	/* Reset the timer */
3737	if (!test_bit(__IGB_DOWN, &adapter->state))
3738		mod_timer(&adapter->watchdog_timer,
3739			  round_jiffies(jiffies + 2 * HZ));
3740}
3741
3742enum latency_range {
3743	lowest_latency = 0,
3744	low_latency = 1,
3745	bulk_latency = 2,
3746	latency_invalid = 255
3747};
3748
3749/**
3750 * igb_update_ring_itr - update the dynamic ITR value based on packet size
3751 *
3752 *      Stores a new ITR value based on strictly on packet size.  This
3753 *      algorithm is less sophisticated than that used in igb_update_itr,
3754 *      due to the difficulty of synchronizing statistics across multiple
3755 *      receive rings.  The divisors and thresholds used by this function
3756 *      were determined based on theoretical maximum wire speed and testing
3757 *      data, in order to minimize response time while increasing bulk
3758 *      throughput.
3759 *      This functionality is controlled by the InterruptThrottleRate module
3760 *      parameter (see igb_param.c)
3761 *      NOTE:  This function is called only when operating in a multiqueue
3762 *             receive environment.
3763 * @q_vector: pointer to q_vector
3764 **/
3765static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3766{
3767	int new_val = q_vector->itr_val;
3768	int avg_wire_size = 0;
3769	struct igb_adapter *adapter = q_vector->adapter;
3770	struct igb_ring *ring;
3771	unsigned int packets;
3772
3773	/* For non-gigabit speeds, just fix the interrupt rate at 4000
3774	 * ints/sec - ITR timer value of 120 ticks.
3775	 */
3776	if (adapter->link_speed != SPEED_1000) {
3777		new_val = 976;
3778		goto set_itr_val;
3779	}
3780
3781	ring = q_vector->rx_ring;
3782	if (ring) {
3783		packets = ACCESS_ONCE(ring->total_packets);
3784
3785		if (packets)
3786			avg_wire_size = ring->total_bytes / packets;
3787	}
3788
3789	ring = q_vector->tx_ring;
3790	if (ring) {
3791		packets = ACCESS_ONCE(ring->total_packets);
3792
3793		if (packets)
3794			avg_wire_size = max_t(u32, avg_wire_size,
3795			                      ring->total_bytes / packets);
3796	}
3797
3798	/* if avg_wire_size isn't set no work was done */
3799	if (!avg_wire_size)
3800		goto clear_counts;
3801
3802	/* Add 24 bytes to size to account for CRC, preamble, and gap */
3803	avg_wire_size += 24;
3804
3805	/* Don't starve jumbo frames */
3806	avg_wire_size = min(avg_wire_size, 3000);
3807
3808	/* Give a little boost to mid-size frames */
3809	if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3810		new_val = avg_wire_size / 3;
3811	else
3812		new_val = avg_wire_size / 2;
3813
3814	/* when in itr mode 3 do not exceed 20K ints/sec */
3815	if (adapter->rx_itr_setting == 3 && new_val < 196)
3816		new_val = 196;
3817
3818set_itr_val:
3819	if (new_val != q_vector->itr_val) {
3820		q_vector->itr_val = new_val;
3821		q_vector->set_itr = 1;
3822	}
3823clear_counts:
3824	if (q_vector->rx_ring) {
3825		q_vector->rx_ring->total_bytes = 0;
3826		q_vector->rx_ring->total_packets = 0;
3827	}
3828	if (q_vector->tx_ring) {
3829		q_vector->tx_ring->total_bytes = 0;
3830		q_vector->tx_ring->total_packets = 0;
3831	}
3832}
3833
3834/**
3835 * igb_update_itr - update the dynamic ITR value based on statistics
3836 *      Stores a new ITR value based on packets and byte
3837 *      counts during the last interrupt.  The advantage of per interrupt
3838 *      computation is faster updates and more accurate ITR for the current
3839 *      traffic pattern.  Constants in this function were computed
3840 *      based on theoretical maximum wire speed and thresholds were set based
3841 *      on testing data as well as attempting to minimize response time
3842 *      while increasing bulk throughput.
3843 *      this functionality is controlled by the InterruptThrottleRate module
3844 *      parameter (see igb_param.c)
3845 *      NOTE:  These calculations are only valid when operating in a single-
3846 *             queue environment.
3847 * @adapter: pointer to adapter
3848 * @itr_setting: current q_vector->itr_val
3849 * @packets: the number of packets during this measurement interval
3850 * @bytes: the number of bytes during this measurement interval
3851 **/
3852static unsigned int igb_update_itr(struct igb_adapter *adapter, u16 itr_setting,
3853				   int packets, int bytes)
3854{
3855	unsigned int retval = itr_setting;
3856
3857	if (packets == 0)
3858		goto update_itr_done;
3859
3860	switch (itr_setting) {
3861	case lowest_latency:
3862		/* handle TSO and jumbo frames */
3863		if (bytes/packets > 8000)
3864			retval = bulk_latency;
3865		else if ((packets < 5) && (bytes > 512))
3866			retval = low_latency;
3867		break;
3868	case low_latency:  /* 50 usec aka 20000 ints/s */
3869		if (bytes > 10000) {
3870			/* this if handles the TSO accounting */
3871			if (bytes/packets > 8000) {
3872				retval = bulk_latency;
3873			} else if ((packets < 10) || ((bytes/packets) > 1200)) {
3874				retval = bulk_latency;
3875			} else if ((packets > 35)) {
3876				retval = lowest_latency;
3877			}
3878		} else if (bytes/packets > 2000) {
3879			retval = bulk_latency;
3880		} else if (packets <= 2 && bytes < 512) {
3881			retval = lowest_latency;
3882		}
3883		break;
3884	case bulk_latency: /* 250 usec aka 4000 ints/s */
3885		if (bytes > 25000) {
3886			if (packets > 35)
3887				retval = low_latency;
3888		} else if (bytes < 1500) {
3889			retval = low_latency;
3890		}
3891		break;
3892	}
3893
3894update_itr_done:
3895	return retval;
3896}
3897
3898static void igb_set_itr(struct igb_adapter *adapter)
3899{
3900	struct igb_q_vector *q_vector = adapter->q_vector[0];
3901	u16 current_itr;
3902	u32 new_itr = q_vector->itr_val;
3903
3904	/* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3905	if (adapter->link_speed != SPEED_1000) {
3906		current_itr = 0;
3907		new_itr = 4000;
3908		goto set_itr_now;
3909	}
3910
3911	adapter->rx_itr = igb_update_itr(adapter,
3912				    adapter->rx_itr,
3913				    q_vector->rx_ring->total_packets,
3914				    q_vector->rx_ring->total_bytes);
3915
3916	adapter->tx_itr = igb_update_itr(adapter,
3917				    adapter->tx_itr,
3918				    q_vector->tx_ring->total_packets,
3919				    q_vector->tx_ring->total_bytes);
3920	current_itr = max(adapter->rx_itr, adapter->tx_itr);
3921
3922	/* conservative mode (itr 3) eliminates the lowest_latency setting */
3923	if (adapter->rx_itr_setting == 3 && current_itr == lowest_latency)
3924		current_itr = low_latency;
3925
3926	switch (current_itr) {
3927	/* counts and packets in update_itr are dependent on these numbers */
3928	case lowest_latency:
3929		new_itr = 56;  /* aka 70,000 ints/sec */
3930		break;
3931	case low_latency:
3932		new_itr = 196; /* aka 20,000 ints/sec */
3933		break;
3934	case bulk_latency:
3935		new_itr = 980; /* aka 4,000 ints/sec */
3936		break;
3937	default:
3938		break;
3939	}
3940
3941set_itr_now:
3942	q_vector->rx_ring->total_bytes = 0;
3943	q_vector->rx_ring->total_packets = 0;
3944	q_vector->tx_ring->total_bytes = 0;
3945	q_vector->tx_ring->total_packets = 0;
3946
3947	if (new_itr != q_vector->itr_val) {
3948		/* this attempts to bias the interrupt rate towards Bulk
3949		 * by adding intermediate steps when interrupt rate is
3950		 * increasing */
3951		new_itr = new_itr > q_vector->itr_val ?
3952		             max((new_itr * q_vector->itr_val) /
3953		                 (new_itr + (q_vector->itr_val >> 2)),
3954		                 new_itr) :
3955			     new_itr;
3956		/* Don't write the value here; it resets the adapter's
3957		 * internal timer, and causes us to delay far longer than
3958		 * we should between interrupts.  Instead, we write the ITR
3959		 * value at the beginning of the next interrupt so the timing
3960		 * ends up being correct.
3961		 */
3962		q_vector->itr_val = new_itr;
3963		q_vector->set_itr = 1;
3964	}
3965}
3966
3967#define IGB_TX_FLAGS_CSUM		0x00000001
3968#define IGB_TX_FLAGS_VLAN		0x00000002
3969#define IGB_TX_FLAGS_TSO		0x00000004
3970#define IGB_TX_FLAGS_IPV4		0x00000008
3971#define IGB_TX_FLAGS_TSTAMP		0x00000010
3972#define IGB_TX_FLAGS_VLAN_MASK		0xffff0000
3973#define IGB_TX_FLAGS_VLAN_SHIFT		        16
3974
3975static inline int igb_tso_adv(struct igb_ring *tx_ring,
3976			      struct sk_buff *skb, u32 tx_flags, u8 *hdr_len)
3977{
3978	struct e1000_adv_tx_context_desc *context_desc;
3979	unsigned int i;
3980	int err;
3981	struct igb_buffer *buffer_info;
3982	u32 info = 0, tu_cmd = 0;
3983	u32 mss_l4len_idx;
3984	u8 l4len;
3985
3986	if (skb_header_cloned(skb)) {
3987		err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
3988		if (err)
3989			return err;
3990	}
3991
3992	l4len = tcp_hdrlen(skb);
3993	*hdr_len += l4len;
3994
3995	if (skb->protocol == htons(ETH_P_IP)) {
3996		struct iphdr *iph = ip_hdr(skb);
3997		iph->tot_len = 0;
3998		iph->check = 0;
3999		tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
4000							 iph->daddr, 0,
4001							 IPPROTO_TCP,
4002							 0);
4003	} else if (skb_is_gso_v6(skb)) {
4004		ipv6_hdr(skb)->payload_len = 0;
4005		tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
4006						       &ipv6_hdr(skb)->daddr,
4007						       0, IPPROTO_TCP, 0);
4008	}
4009
4010	i = tx_ring->next_to_use;
4011
4012	buffer_info = &tx_ring->buffer_info[i];
4013	context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
4014	/* VLAN MACLEN IPLEN */
4015	if (tx_flags & IGB_TX_FLAGS_VLAN)
4016		info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
4017	info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
4018	*hdr_len += skb_network_offset(skb);
4019	info |= skb_network_header_len(skb);
4020	*hdr_len += skb_network_header_len(skb);
4021	context_desc->vlan_macip_lens = cpu_to_le32(info);
4022
4023	/* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
4024	tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
4025
4026	if (skb->protocol == htons(ETH_P_IP))
4027		tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
4028	tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
4029
4030	context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
4031
4032	/* MSS L4LEN IDX */
4033	mss_l4len_idx = (skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT);
4034	mss_l4len_idx |= (l4len << E1000_ADVTXD_L4LEN_SHIFT);
4035
4036	/* For 82575, context index must be unique per ring. */
4037	if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
4038		mss_l4len_idx |= tx_ring->reg_idx << 4;
4039
4040	context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx);
4041	context_desc->seqnum_seed = 0;
4042
4043	buffer_info->time_stamp = jiffies;
4044	buffer_info->next_to_watch = i;
4045	buffer_info->dma = 0;
4046	i++;
4047	if (i == tx_ring->count)
4048		i = 0;
4049
4050	tx_ring->next_to_use = i;
4051
4052	return true;
4053}
4054
4055static inline bool igb_tx_csum_adv(struct igb_ring *tx_ring,
4056				   struct sk_buff *skb, u32 tx_flags)
4057{
4058	struct e1000_adv_tx_context_desc *context_desc;
4059	struct device *dev = tx_ring->dev;
4060	struct igb_buffer *buffer_info;
4061	u32 info = 0, tu_cmd = 0;
4062	unsigned int i;
4063
4064	if ((skb->ip_summed == CHECKSUM_PARTIAL) ||
4065	    (tx_flags & IGB_TX_FLAGS_VLAN)) {
4066		i = tx_ring->next_to_use;
4067		buffer_info = &tx_ring->buffer_info[i];
4068		context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
4069
4070		if (tx_flags & IGB_TX_FLAGS_VLAN)
4071			info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
4072
4073		info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
4074		if (skb->ip_summed == CHECKSUM_PARTIAL)
4075			info |= skb_network_header_len(skb);
4076
4077		context_desc->vlan_macip_lens = cpu_to_le32(info);
4078
4079		tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
4080
4081		if (skb->ip_summed == CHECKSUM_PARTIAL) {
4082			__be16 protocol;
4083
4084			if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
4085				const struct vlan_ethhdr *vhdr =
4086				          (const struct vlan_ethhdr*)skb->data;
4087
4088				protocol = vhdr->h_vlan_encapsulated_proto;
4089			} else {
4090				protocol = skb->protocol;
4091			}
4092
4093			switch (protocol) {
4094			case cpu_to_be16(ETH_P_IP):
4095				tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
4096				if (ip_hdr(skb)->protocol == IPPROTO_TCP)
4097					tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
4098				else if (ip_hdr(skb)->protocol == IPPROTO_SCTP)
4099					tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
4100				break;
4101			case cpu_to_be16(ETH_P_IPV6):
4102				/* XXX what about other V6 headers?? */
4103				if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP)
4104					tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
4105				else if (ipv6_hdr(skb)->nexthdr == IPPROTO_SCTP)
4106					tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
4107				break;
4108			default:
4109				if (unlikely(net_ratelimit()))
4110					dev_warn(dev,
4111					    "partial checksum but proto=%x!\n",
4112					    skb->protocol);
4113				break;
4114			}
4115		}
4116
4117		context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
4118		context_desc->seqnum_seed = 0;
4119		if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
4120			context_desc->mss_l4len_idx =
4121				cpu_to_le32(tx_ring->reg_idx << 4);
4122
4123		buffer_info->time_stamp = jiffies;
4124		buffer_info->next_to_watch = i;
4125		buffer_info->dma = 0;
4126
4127		i++;
4128		if (i == tx_ring->count)
4129			i = 0;
4130		tx_ring->next_to_use = i;
4131
4132		return true;
4133	}
4134	return false;
4135}
4136
4137#define IGB_MAX_TXD_PWR	16
4138#define IGB_MAX_DATA_PER_TXD	(1<<IGB_MAX_TXD_PWR)
4139
4140static inline int igb_tx_map_adv(struct igb_ring *tx_ring, struct sk_buff *skb,
4141				 unsigned int first)
4142{
4143	struct igb_buffer *buffer_info;
4144	struct device *dev = tx_ring->dev;
4145	unsigned int hlen = skb_headlen(skb);
4146	unsigned int count = 0, i;
4147	unsigned int f;
4148	u16 gso_segs = skb_shinfo(skb)->gso_segs ?: 1;
4149
4150	i = tx_ring->next_to_use;
4151
4152	buffer_info = &tx_ring->buffer_info[i];
4153	BUG_ON(hlen >= IGB_MAX_DATA_PER_TXD);
4154	buffer_info->length = hlen;
4155	/* set time_stamp *before* dma to help avoid a possible race */
4156	buffer_info->time_stamp = jiffies;
4157	buffer_info->next_to_watch = i;
4158	buffer_info->dma = dma_map_single(dev, skb->data, hlen,
4159					  DMA_TO_DEVICE);
4160	if (dma_mapping_error(dev, buffer_info->dma))
4161		goto dma_error;
4162
4163	for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) {
4164		struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[f];
4165		unsigned int len = frag->size;
4166
4167		count++;
4168		i++;
4169		if (i == tx_ring->count)
4170			i = 0;
4171
4172		buffer_info = &tx_ring->buffer_info[i];
4173		BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
4174		buffer_info->length = len;
4175		buffer_info->time_stamp = jiffies;
4176		buffer_info->next_to_watch = i;
4177		buffer_info->mapped_as_page = true;
4178		buffer_info->dma = dma_map_page(dev,
4179						frag->page,
4180						frag->page_offset,
4181						len,
4182						DMA_TO_DEVICE);
4183		if (dma_mapping_error(dev, buffer_info->dma))
4184			goto dma_error;
4185
4186	}
4187
4188	tx_ring->buffer_info[i].skb = skb;
4189	tx_ring->buffer_info[i].tx_flags = skb_shinfo(skb)->tx_flags;
4190	/* multiply data chunks by size of headers */
4191	tx_ring->buffer_info[i].bytecount = ((gso_segs - 1) * hlen) + skb->len;
4192	tx_ring->buffer_info[i].gso_segs = gso_segs;
4193	tx_ring->buffer_info[first].next_to_watch = i;
4194
4195	return ++count;
4196
4197dma_error:
4198	dev_err(dev, "TX DMA map failed\n");
4199
4200	/* clear timestamp and dma mappings for failed buffer_info mapping */
4201	buffer_info->dma = 0;
4202	buffer_info->time_stamp = 0;
4203	buffer_info->length = 0;
4204	buffer_info->next_to_watch = 0;
4205	buffer_info->mapped_as_page = false;
4206
4207	/* clear timestamp and dma mappings for remaining portion of packet */
4208	while (count--) {
4209		if (i == 0)
4210			i = tx_ring->count;
4211		i--;
4212		buffer_info = &tx_ring->buffer_info[i];
4213		igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
4214	}
4215
4216	return 0;
4217}
4218
4219static inline void igb_tx_queue_adv(struct igb_ring *tx_ring,
4220				    u32 tx_flags, int count, u32 paylen,
4221				    u8 hdr_len)
4222{
4223	union e1000_adv_tx_desc *tx_desc;
4224	struct igb_buffer *buffer_info;
4225	u32 olinfo_status = 0, cmd_type_len;
4226	unsigned int i = tx_ring->next_to_use;
4227
4228	cmd_type_len = (E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS |
4229			E1000_ADVTXD_DCMD_DEXT);
4230
4231	if (tx_flags & IGB_TX_FLAGS_VLAN)
4232		cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
4233
4234	if (tx_flags & IGB_TX_FLAGS_TSTAMP)
4235		cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP;
4236
4237	if (tx_flags & IGB_TX_FLAGS_TSO) {
4238		cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
4239
4240		/* insert tcp checksum */
4241		olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4242
4243		/* insert ip checksum */
4244		if (tx_flags & IGB_TX_FLAGS_IPV4)
4245			olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
4246
4247	} else if (tx_flags & IGB_TX_FLAGS_CSUM) {
4248		olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4249	}
4250
4251	if ((tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX) &&
4252	    (tx_flags & (IGB_TX_FLAGS_CSUM |
4253	                 IGB_TX_FLAGS_TSO |
4254			 IGB_TX_FLAGS_VLAN)))
4255		olinfo_status |= tx_ring->reg_idx << 4;
4256
4257	olinfo_status |= ((paylen - hdr_len) << E1000_ADVTXD_PAYLEN_SHIFT);
4258
4259	do {
4260		buffer_info = &tx_ring->buffer_info[i];
4261		tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
4262		tx_desc->read.buffer_addr = cpu_to_le64(buffer_info->dma);
4263		tx_desc->read.cmd_type_len =
4264			cpu_to_le32(cmd_type_len | buffer_info->length);
4265		tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
4266		count--;
4267		i++;
4268		if (i == tx_ring->count)
4269			i = 0;
4270	} while (count > 0);
4271
4272	tx_desc->read.cmd_type_len |= cpu_to_le32(IGB_ADVTXD_DCMD);
4273	/* Force memory writes to complete before letting h/w
4274	 * know there are new descriptors to fetch.  (Only
4275	 * applicable for weak-ordered memory model archs,
4276	 * such as IA-64). */
4277	wmb();
4278
4279	tx_ring->next_to_use = i;
4280	writel(i, tx_ring->tail);
4281	/* we need this if more than one processor can write to our tail
4282	 * at a time, it syncronizes IO on IA64/Altix systems */
4283	mmiowb();
4284}
4285
4286static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
4287{
4288	struct net_device *netdev = tx_ring->netdev;
4289
4290	netif_stop_subqueue(netdev, tx_ring->queue_index);
4291
4292	/* Herbert's original patch had:
4293	 *  smp_mb__after_netif_stop_queue();
4294	 * but since that doesn't exist yet, just open code it. */
4295	smp_mb();
4296
4297	/* We need to check again in a case another CPU has just
4298	 * made room available. */
4299	if (igb_desc_unused(tx_ring) < size)
4300		return -EBUSY;
4301
4302	/* A reprieve! */
4303	netif_wake_subqueue(netdev, tx_ring->queue_index);
4304
4305	u64_stats_update_begin(&tx_ring->tx_syncp2);
4306	tx_ring->tx_stats.restart_queue2++;
4307	u64_stats_update_end(&tx_ring->tx_syncp2);
4308
4309	return 0;
4310}
4311
4312static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
4313{
4314	if (igb_desc_unused(tx_ring) >= size)
4315		return 0;
4316	return __igb_maybe_stop_tx(tx_ring, size);
4317}
4318
4319netdev_tx_t igb_xmit_frame_ring_adv(struct sk_buff *skb,
4320				    struct igb_ring *tx_ring)
4321{
4322	int tso = 0, count;
4323	u32 tx_flags = 0;
4324	u16 first;
4325	u8 hdr_len = 0;
4326
4327	/* need: 1 descriptor per page,
4328	 *       + 2 desc gap to keep tail from touching head,
4329	 *       + 1 desc for skb->data,
4330	 *       + 1 desc for context descriptor,
4331	 * otherwise try next time */
4332	if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
4333		/* this is a hard error */
4334		return NETDEV_TX_BUSY;
4335	}
4336
4337	if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
4338		skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
4339		tx_flags |= IGB_TX_FLAGS_TSTAMP;
4340	}
4341
4342	if (vlan_tx_tag_present(skb)) {
4343		tx_flags |= IGB_TX_FLAGS_VLAN;
4344		tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
4345	}
4346
4347	if (skb->protocol == htons(ETH_P_IP))
4348		tx_flags |= IGB_TX_FLAGS_IPV4;
4349
4350	first = tx_ring->next_to_use;
4351	if (skb_is_gso(skb)) {
4352		tso = igb_tso_adv(tx_ring, skb, tx_flags, &hdr_len);
4353
4354		if (tso < 0) {
4355			dev_kfree_skb_any(skb);
4356			return NETDEV_TX_OK;
4357		}
4358	}
4359
4360	if (tso)
4361		tx_flags |= IGB_TX_FLAGS_TSO;
4362	else if (igb_tx_csum_adv(tx_ring, skb, tx_flags) &&
4363	         (skb->ip_summed == CHECKSUM_PARTIAL))
4364		tx_flags |= IGB_TX_FLAGS_CSUM;
4365
4366	/*
4367	 * count reflects descriptors mapped, if 0 or less then mapping error
4368	 * has occurred and we need to rewind the descriptor queue
4369	 */
4370	count = igb_tx_map_adv(tx_ring, skb, first);
4371	if (!count) {
4372		dev_kfree_skb_any(skb);
4373		tx_ring->buffer_info[first].time_stamp = 0;
4374		tx_ring->next_to_use = first;
4375		return NETDEV_TX_OK;
4376	}
4377
4378	igb_tx_queue_adv(tx_ring, tx_flags, count, skb->len, hdr_len);
4379
4380	/* Make sure there is space in the ring for the next send. */
4381	igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
4382
4383	return NETDEV_TX_OK;
4384}
4385
4386static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb,
4387				      struct net_device *netdev)
4388{
4389	struct igb_adapter *adapter = netdev_priv(netdev);
4390	struct igb_ring *tx_ring;
4391	int r_idx = 0;
4392
4393	if (test_bit(__IGB_DOWN, &adapter->state)) {
4394		dev_kfree_skb_any(skb);
4395		return NETDEV_TX_OK;
4396	}
4397
4398	if (skb->len <= 0) {
4399		dev_kfree_skb_any(skb);
4400		return NETDEV_TX_OK;
4401	}
4402
4403	r_idx = skb->queue_mapping & (IGB_ABS_MAX_TX_QUEUES - 1);
4404	tx_ring = adapter->multi_tx_table[r_idx];
4405
4406	/* This goes back to the question of how to logically map a tx queue
4407	 * to a flow.  Right now, performance is impacted slightly negatively
4408	 * if using multiple tx queues.  If the stack breaks away from a
4409	 * single qdisc implementation, we can look at this again. */
4410	return igb_xmit_frame_ring_adv(skb, tx_ring);
4411}
4412
4413/**
4414 * igb_tx_timeout - Respond to a Tx Hang
4415 * @netdev: network interface device structure
4416 **/
4417static void igb_tx_timeout(struct net_device *netdev)
4418{
4419	struct igb_adapter *adapter = netdev_priv(netdev);
4420	struct e1000_hw *hw = &adapter->hw;
4421
4422	/* Do the reset outside of interrupt context */
4423	adapter->tx_timeout_count++;
4424
4425	if (hw->mac.type == e1000_82580)
4426		hw->dev_spec._82575.global_device_reset = true;
4427
4428	schedule_work(&adapter->reset_task);
4429	wr32(E1000_EICS,
4430	     (adapter->eims_enable_mask & ~adapter->eims_other));
4431}
4432
4433static void igb_reset_task(struct work_struct *work)
4434{
4435	struct igb_adapter *adapter;
4436	adapter = container_of(work, struct igb_adapter, reset_task);
4437
4438	igb_dump(adapter);
4439	netdev_err(adapter->netdev, "Reset adapter\n");
4440	igb_reinit_locked(adapter);
4441}
4442
4443/**
4444 * igb_get_stats64 - Get System Network Statistics
4445 * @netdev: network interface device structure
4446 * @stats: rtnl_link_stats64 pointer
4447 *
4448 **/
4449static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *netdev,
4450						 struct rtnl_link_stats64 *stats)
4451{
4452	struct igb_adapter *adapter = netdev_priv(netdev);
4453
4454	spin_lock(&adapter->stats64_lock);
4455	igb_update_stats(adapter, &adapter->stats64);
4456	memcpy(stats, &adapter->stats64, sizeof(*stats));
4457	spin_unlock(&adapter->stats64_lock);
4458
4459	return stats;
4460}
4461
4462/**
4463 * igb_change_mtu - Change the Maximum Transfer Unit
4464 * @netdev: network interface device structure
4465 * @new_mtu: new value for maximum frame size
4466 *
4467 * Returns 0 on success, negative on failure
4468 **/
4469static int igb_change_mtu(struct net_device *netdev, int new_mtu)
4470{
4471	struct igb_adapter *adapter = netdev_priv(netdev);
4472	struct pci_dev *pdev = adapter->pdev;
4473	int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN;
4474	u32 rx_buffer_len, i;
4475
4476	if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
4477		dev_err(&pdev->dev, "Invalid MTU setting\n");
4478		return -EINVAL;
4479	}
4480
4481	if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
4482		dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
4483		return -EINVAL;
4484	}
4485
4486	while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
4487		msleep(1);
4488
4489	/* igb_down has a dependency on max_frame_size */
4490	adapter->max_frame_size = max_frame;
4491
4492	/* NOTE: netdev_alloc_skb reserves 16 bytes, and typically NET_IP_ALIGN
4493	 * means we reserve 2 more, this pushes us to allocate from the next
4494	 * larger slab size.
4495	 * i.e. RXBUFFER_2048 --> size-4096 slab
4496	 */
4497
4498	if (adapter->hw.mac.type == e1000_82580)
4499		max_frame += IGB_TS_HDR_LEN;
4500
4501	if (max_frame <= IGB_RXBUFFER_1024)
4502		rx_buffer_len = IGB_RXBUFFER_1024;
4503	else if (max_frame <= MAXIMUM_ETHERNET_VLAN_SIZE)
4504		rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
4505	else
4506		rx_buffer_len = IGB_RXBUFFER_128;
4507
4508	if ((max_frame == ETH_FRAME_LEN + ETH_FCS_LEN + IGB_TS_HDR_LEN) ||
4509	     (max_frame == MAXIMUM_ETHERNET_VLAN_SIZE + IGB_TS_HDR_LEN))
4510		rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE + IGB_TS_HDR_LEN;
4511
4512	if ((adapter->hw.mac.type == e1000_82580) &&
4513	    (rx_buffer_len == IGB_RXBUFFER_128))
4514		rx_buffer_len += IGB_RXBUFFER_64;
4515
4516	if (netif_running(netdev))
4517		igb_down(adapter);
4518
4519	dev_info(&pdev->dev, "changing MTU from %d to %d\n",
4520		 netdev->mtu, new_mtu);
4521	netdev->mtu = new_mtu;
4522
4523	for (i = 0; i < adapter->num_rx_queues; i++)
4524		adapter->rx_ring[i]->rx_buffer_len = rx_buffer_len;
4525
4526	if (netif_running(netdev))
4527		igb_up(adapter);
4528	else
4529		igb_reset(adapter);
4530
4531	clear_bit(__IGB_RESETTING, &adapter->state);
4532
4533	return 0;
4534}
4535
4536/**
4537 * igb_update_stats - Update the board statistics counters
4538 * @adapter: board private structure
4539 **/
4540
4541void igb_update_stats(struct igb_adapter *adapter,
4542		      struct rtnl_link_stats64 *net_stats)
4543{
4544	struct e1000_hw *hw = &adapter->hw;
4545	struct pci_dev *pdev = adapter->pdev;
4546	u32 reg, mpc;
4547	u16 phy_tmp;
4548	int i;
4549	u64 bytes, packets;
4550	unsigned int start;
4551	u64 _bytes, _packets;
4552
4553#define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
4554
4555	/*
4556	 * Prevent stats update while adapter is being reset, or if the pci
4557	 * connection is down.
4558	 */
4559	if (adapter->link_speed == 0)
4560		return;
4561	if (pci_channel_offline(pdev))
4562		return;
4563
4564	bytes = 0;
4565	packets = 0;
4566	for (i = 0; i < adapter->num_rx_queues; i++) {
4567		u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
4568		struct igb_ring *ring = adapter->rx_ring[i];
4569
4570		ring->rx_stats.drops += rqdpc_tmp;
4571		net_stats->rx_fifo_errors += rqdpc_tmp;
4572
4573		do {
4574			start = u64_stats_fetch_begin_bh(&ring->rx_syncp);
4575			_bytes = ring->rx_stats.bytes;
4576			_packets = ring->rx_stats.packets;
4577		} while (u64_stats_fetch_retry_bh(&ring->rx_syncp, start));
4578		bytes += _bytes;
4579		packets += _packets;
4580	}
4581
4582	net_stats->rx_bytes = bytes;
4583	net_stats->rx_packets = packets;
4584
4585	bytes = 0;
4586	packets = 0;
4587	for (i = 0; i < adapter->num_tx_queues; i++) {
4588		struct igb_ring *ring = adapter->tx_ring[i];
4589		do {
4590			start = u64_stats_fetch_begin_bh(&ring->tx_syncp);
4591			_bytes = ring->tx_stats.bytes;
4592			_packets = ring->tx_stats.packets;
4593		} while (u64_stats_fetch_retry_bh(&ring->tx_syncp, start));
4594		bytes += _bytes;
4595		packets += _packets;
4596	}
4597	net_stats->tx_bytes = bytes;
4598	net_stats->tx_packets = packets;
4599
4600	/* read stats registers */
4601	adapter->stats.crcerrs += rd32(E1000_CRCERRS);
4602	adapter->stats.gprc += rd32(E1000_GPRC);
4603	adapter->stats.gorc += rd32(E1000_GORCL);
4604	rd32(E1000_GORCH); /* clear GORCL */
4605	adapter->stats.bprc += rd32(E1000_BPRC);
4606	adapter->stats.mprc += rd32(E1000_MPRC);
4607	adapter->stats.roc += rd32(E1000_ROC);
4608
4609	adapter->stats.prc64 += rd32(E1000_PRC64);
4610	adapter->stats.prc127 += rd32(E1000_PRC127);
4611	adapter->stats.prc255 += rd32(E1000_PRC255);
4612	adapter->stats.prc511 += rd32(E1000_PRC511);
4613	adapter->stats.prc1023 += rd32(E1000_PRC1023);
4614	adapter->stats.prc1522 += rd32(E1000_PRC1522);
4615	adapter->stats.symerrs += rd32(E1000_SYMERRS);
4616	adapter->stats.sec += rd32(E1000_SEC);
4617
4618	mpc = rd32(E1000_MPC);
4619	adapter->stats.mpc += mpc;
4620	net_stats->rx_fifo_errors += mpc;
4621	adapter->stats.scc += rd32(E1000_SCC);
4622	adapter->stats.ecol += rd32(E1000_ECOL);
4623	adapter->stats.mcc += rd32(E1000_MCC);
4624	adapter->stats.latecol += rd32(E1000_LATECOL);
4625	adapter->stats.dc += rd32(E1000_DC);
4626	adapter->stats.rlec += rd32(E1000_RLEC);
4627	adapter->stats.xonrxc += rd32(E1000_XONRXC);
4628	adapter->stats.xontxc += rd32(E1000_XONTXC);
4629	adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
4630	adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
4631	adapter->stats.fcruc += rd32(E1000_FCRUC);
4632	adapter->stats.gptc += rd32(E1000_GPTC);
4633	adapter->stats.gotc += rd32(E1000_GOTCL);
4634	rd32(E1000_GOTCH); /* clear GOTCL */
4635	adapter->stats.rnbc += rd32(E1000_RNBC);
4636	adapter->stats.ruc += rd32(E1000_RUC);
4637	adapter->stats.rfc += rd32(E1000_RFC);
4638	adapter->stats.rjc += rd32(E1000_RJC);
4639	adapter->stats.tor += rd32(E1000_TORH);
4640	adapter->stats.tot += rd32(E1000_TOTH);
4641	adapter->stats.tpr += rd32(E1000_TPR);
4642
4643	adapter->stats.ptc64 += rd32(E1000_PTC64);
4644	adapter->stats.ptc127 += rd32(E1000_PTC127);
4645	adapter->stats.ptc255 += rd32(E1000_PTC255);
4646	adapter->stats.ptc511 += rd32(E1000_PTC511);
4647	adapter->stats.ptc1023 += rd32(E1000_PTC1023);
4648	adapter->stats.ptc1522 += rd32(E1000_PTC1522);
4649
4650	adapter->stats.mptc += rd32(E1000_MPTC);
4651	adapter->stats.bptc += rd32(E1000_BPTC);
4652
4653	adapter->stats.tpt += rd32(E1000_TPT);
4654	adapter->stats.colc += rd32(E1000_COLC);
4655
4656	adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
4657	/* read internal phy specific stats */
4658	reg = rd32(E1000_CTRL_EXT);
4659	if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
4660		adapter->stats.rxerrc += rd32(E1000_RXERRC);
4661		adapter->stats.tncrs += rd32(E1000_TNCRS);
4662	}
4663
4664	adapter->stats.tsctc += rd32(E1000_TSCTC);
4665	adapter->stats.tsctfc += rd32(E1000_TSCTFC);
4666
4667	adapter->stats.iac += rd32(E1000_IAC);
4668	adapter->stats.icrxoc += rd32(E1000_ICRXOC);
4669	adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
4670	adapter->stats.icrxatc += rd32(E1000_ICRXATC);
4671	adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
4672	adapter->stats.ictxatc += rd32(E1000_ICTXATC);
4673	adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
4674	adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
4675	adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
4676
4677	/* Fill out the OS statistics structure */
4678	net_stats->multicast = adapter->stats.mprc;
4679	net_stats->collisions = adapter->stats.colc;
4680
4681	/* Rx Errors */
4682
4683	/* RLEC on some newer hardware can be incorrect so build
4684	 * our own version based on RUC and ROC */
4685	net_stats->rx_errors = adapter->stats.rxerrc +
4686		adapter->stats.crcerrs + adapter->stats.algnerrc +
4687		adapter->stats.ruc + adapter->stats.roc +
4688		adapter->stats.cexterr;
4689	net_stats->rx_length_errors = adapter->stats.ruc +
4690				      adapter->stats.roc;
4691	net_stats->rx_crc_errors = adapter->stats.crcerrs;
4692	net_stats->rx_frame_errors = adapter->stats.algnerrc;
4693	net_stats->rx_missed_errors = adapter->stats.mpc;
4694
4695	/* Tx Errors */
4696	net_stats->tx_errors = adapter->stats.ecol +
4697			       adapter->stats.latecol;
4698	net_stats->tx_aborted_errors = adapter->stats.ecol;
4699	net_stats->tx_window_errors = adapter->stats.latecol;
4700	net_stats->tx_carrier_errors = adapter->stats.tncrs;
4701
4702	/* Tx Dropped needs to be maintained elsewhere */
4703
4704	/* Phy Stats */
4705	if (hw->phy.media_type == e1000_media_type_copper) {
4706		if ((adapter->link_speed == SPEED_1000) &&
4707		   (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
4708			phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
4709			adapter->phy_stats.idle_errors += phy_tmp;
4710		}
4711	}
4712
4713	/* Management Stats */
4714	adapter->stats.mgptc += rd32(E1000_MGTPTC);
4715	adapter->stats.mgprc += rd32(E1000_MGTPRC);
4716	adapter->stats.mgpdc += rd32(E1000_MGTPDC);
4717
4718	/* OS2BMC Stats */
4719	reg = rd32(E1000_MANC);
4720	if (reg & E1000_MANC_EN_BMC2OS) {
4721		adapter->stats.o2bgptc += rd32(E1000_O2BGPTC);
4722		adapter->stats.o2bspc += rd32(E1000_O2BSPC);
4723		adapter->stats.b2ospc += rd32(E1000_B2OSPC);
4724		adapter->stats.b2ogprc += rd32(E1000_B2OGPRC);
4725	}
4726}
4727
4728static irqreturn_t igb_msix_other(int irq, void *data)
4729{
4730	struct igb_adapter *adapter = data;
4731	struct e1000_hw *hw = &adapter->hw;
4732	u32 icr = rd32(E1000_ICR);
4733	/* reading ICR causes bit 31 of EICR to be cleared */
4734
4735	if (icr & E1000_ICR_DRSTA)
4736		schedule_work(&adapter->reset_task);
4737
4738	if (icr & E1000_ICR_DOUTSYNC) {
4739		/* HW is reporting DMA is out of sync */
4740		adapter->stats.doosync++;
4741		/* The DMA Out of Sync is also indication of a spoof event
4742		 * in IOV mode. Check the Wrong VM Behavior register to
4743		 * see if it is really a spoof event. */
4744		igb_check_wvbr(adapter);
4745	}
4746
4747	/* Check for a mailbox event */
4748	if (icr & E1000_ICR_VMMB)
4749		igb_msg_task(adapter);
4750
4751	if (icr & E1000_ICR_LSC) {
4752		hw->mac.get_link_status = 1;
4753		/* guard against interrupt when we're going down */
4754		if (!test_bit(__IGB_DOWN, &adapter->state))
4755			mod_timer(&adapter->watchdog_timer, jiffies + 1);
4756	}
4757
4758	if (adapter->vfs_allocated_count)
4759		wr32(E1000_IMS, E1000_IMS_LSC |
4760				E1000_IMS_VMMB |
4761				E1000_IMS_DOUTSYNC);
4762	else
4763		wr32(E1000_IMS, E1000_IMS_LSC | E1000_IMS_DOUTSYNC);
4764	wr32(E1000_EIMS, adapter->eims_other);
4765
4766	return IRQ_HANDLED;
4767}
4768
4769static void igb_write_itr(struct igb_q_vector *q_vector)
4770{
4771	struct igb_adapter *adapter = q_vector->adapter;
4772	u32 itr_val = q_vector->itr_val & 0x7FFC;
4773
4774	if (!q_vector->set_itr)
4775		return;
4776
4777	if (!itr_val)
4778		itr_val = 0x4;
4779
4780	if (adapter->hw.mac.type == e1000_82575)
4781		itr_val |= itr_val << 16;
4782	else
4783		itr_val |= 0x8000000;
4784
4785	writel(itr_val, q_vector->itr_register);
4786	q_vector->set_itr = 0;
4787}
4788
4789static irqreturn_t igb_msix_ring(int irq, void *data)
4790{
4791	struct igb_q_vector *q_vector = data;
4792
4793	/* Write the ITR value calculated from the previous interrupt. */
4794	igb_write_itr(q_vector);
4795
4796	napi_schedule(&q_vector->napi);
4797
4798	return IRQ_HANDLED;
4799}
4800
4801#ifdef CONFIG_IGB_DCA
4802static void igb_update_dca(struct igb_q_vector *q_vector)
4803{
4804	struct igb_adapter *adapter = q_vector->adapter;
4805	struct e1000_hw *hw = &adapter->hw;
4806	int cpu = get_cpu();
4807
4808	if (q_vector->cpu == cpu)
4809		goto out_no_update;
4810
4811	if (q_vector->tx_ring) {
4812		int q = q_vector->tx_ring->reg_idx;
4813		u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4814		if (hw->mac.type == e1000_82575) {
4815			dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4816			dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4817		} else {
4818			dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4819			dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4820			              E1000_DCA_TXCTRL_CPUID_SHIFT;
4821		}
4822		dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4823		wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4824	}
4825	if (q_vector->rx_ring) {
4826		int q = q_vector->rx_ring->reg_idx;
4827		u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4828		if (hw->mac.type == e1000_82575) {
4829			dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4830			dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4831		} else {
4832			dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4833			dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4834			              E1000_DCA_RXCTRL_CPUID_SHIFT;
4835		}
4836		dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4837		dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4838		dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4839		wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4840	}
4841	q_vector->cpu = cpu;
4842out_no_update:
4843	put_cpu();
4844}
4845
4846static void igb_setup_dca(struct igb_adapter *adapter)
4847{
4848	struct e1000_hw *hw = &adapter->hw;
4849	int i;
4850
4851	if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4852		return;
4853
4854	/* Always use CB2 mode, difference is masked in the CB driver. */
4855	wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4856
4857	for (i = 0; i < adapter->num_q_vectors; i++) {
4858		adapter->q_vector[i]->cpu = -1;
4859		igb_update_dca(adapter->q_vector[i]);
4860	}
4861}
4862
4863static int __igb_notify_dca(struct device *dev, void *data)
4864{
4865	struct net_device *netdev = dev_get_drvdata(dev);
4866	struct igb_adapter *adapter = netdev_priv(netdev);
4867	struct pci_dev *pdev = adapter->pdev;
4868	struct e1000_hw *hw = &adapter->hw;
4869	unsigned long event = *(unsigned long *)data;
4870
4871	switch (event) {
4872	case DCA_PROVIDER_ADD:
4873		/* if already enabled, don't do it again */
4874		if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4875			break;
4876		if (dca_add_requester(dev) == 0) {
4877			adapter->flags |= IGB_FLAG_DCA_ENABLED;
4878			dev_info(&pdev->dev, "DCA enabled\n");
4879			igb_setup_dca(adapter);
4880			break;
4881		}
4882		/* Fall Through since DCA is disabled. */
4883	case DCA_PROVIDER_REMOVE:
4884		if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4885			/* without this a class_device is left
4886			 * hanging around in the sysfs model */
4887			dca_remove_requester(dev);
4888			dev_info(&pdev->dev, "DCA disabled\n");
4889			adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4890			wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4891		}
4892		break;
4893	}
4894
4895	return 0;
4896}
4897
4898static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4899                          void *p)
4900{
4901	int ret_val;
4902
4903	ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4904	                                 __igb_notify_dca);
4905
4906	return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4907}
4908#endif /* CONFIG_IGB_DCA */
4909
4910static void igb_ping_all_vfs(struct igb_adapter *adapter)
4911{
4912	struct e1000_hw *hw = &adapter->hw;
4913	u32 ping;
4914	int i;
4915
4916	for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
4917		ping = E1000_PF_CONTROL_MSG;
4918		if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
4919			ping |= E1000_VT_MSGTYPE_CTS;
4920		igb_write_mbx(hw, &ping, 1, i);
4921	}
4922}
4923
4924static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4925{
4926	struct e1000_hw *hw = &adapter->hw;
4927	u32 vmolr = rd32(E1000_VMOLR(vf));
4928	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4929
4930	vf_data->flags &= ~(IGB_VF_FLAG_UNI_PROMISC |
4931	                    IGB_VF_FLAG_MULTI_PROMISC);
4932	vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4933
4934	if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
4935		vmolr |= E1000_VMOLR_MPME;
4936		vf_data->flags |= IGB_VF_FLAG_MULTI_PROMISC;
4937		*msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
4938	} else {
4939		/*
4940		 * if we have hashes and we are clearing a multicast promisc
4941		 * flag we need to write the hashes to the MTA as this step
4942		 * was previously skipped
4943		 */
4944		if (vf_data->num_vf_mc_hashes > 30) {
4945			vmolr |= E1000_VMOLR_MPME;
4946		} else if (vf_data->num_vf_mc_hashes) {
4947			int j;
4948			vmolr |= E1000_VMOLR_ROMPE;
4949			for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4950				igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4951		}
4952	}
4953
4954	wr32(E1000_VMOLR(vf), vmolr);
4955
4956	/* there are flags left unprocessed, likely not supported */
4957	if (*msgbuf & E1000_VT_MSGINFO_MASK)
4958		return -EINVAL;
4959
4960	return 0;
4961
4962}
4963
4964static int igb_set_vf_multicasts(struct igb_adapter *adapter,
4965				  u32 *msgbuf, u32 vf)
4966{
4967	int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4968	u16 *hash_list = (u16 *)&msgbuf[1];
4969	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4970	int i;
4971
4972	/* salt away the number of multicast addresses assigned
4973	 * to this VF for later use to restore when the PF multi cast
4974	 * list changes
4975	 */
4976	vf_data->num_vf_mc_hashes = n;
4977
4978	/* only up to 30 hash values supported */
4979	if (n > 30)
4980		n = 30;
4981
4982	/* store the hashes for later use */
4983	for (i = 0; i < n; i++)
4984		vf_data->vf_mc_hashes[i] = hash_list[i];
4985
4986	/* Flush and reset the mta with the new values */
4987	igb_set_rx_mode(adapter->netdev);
4988
4989	return 0;
4990}
4991
4992static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
4993{
4994	struct e1000_hw *hw = &adapter->hw;
4995	struct vf_data_storage *vf_data;
4996	int i, j;
4997
4998	for (i = 0; i < adapter->vfs_allocated_count; i++) {
4999		u32 vmolr = rd32(E1000_VMOLR(i));
5000		vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
5001
5002		vf_data = &adapter->vf_data[i];
5003
5004		if ((vf_data->num_vf_mc_hashes > 30) ||
5005		    (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
5006			vmolr |= E1000_VMOLR_MPME;
5007		} else if (vf_data->num_vf_mc_hashes) {
5008			vmolr |= E1000_VMOLR_ROMPE;
5009			for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
5010				igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
5011		}
5012		wr32(E1000_VMOLR(i), vmolr);
5013	}
5014}
5015
5016static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
5017{
5018	struct e1000_hw *hw = &adapter->hw;
5019	u32 pool_mask, reg, vid;
5020	int i;
5021
5022	pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5023
5024	/* Find the vlan filter for this id */
5025	for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5026		reg = rd32(E1000_VLVF(i));
5027
5028		/* remove the vf from the pool */
5029		reg &= ~pool_mask;
5030
5031		/* if pool is empty then remove entry from vfta */
5032		if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
5033		    (reg & E1000_VLVF_VLANID_ENABLE)) {
5034			reg = 0;
5035			vid = reg & E1000_VLVF_VLANID_MASK;
5036			igb_vfta_set(hw, vid, false);
5037		}
5038
5039		wr32(E1000_VLVF(i), reg);
5040	}
5041
5042	adapter->vf_data[vf].vlans_enabled = 0;
5043}
5044
5045static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
5046{
5047	struct e1000_hw *hw = &adapter->hw;
5048	u32 reg, i;
5049
5050	/* The vlvf table only exists on 82576 hardware and newer */
5051	if (hw->mac.type < e1000_82576)
5052		return -1;
5053
5054	/* we only need to do this if VMDq is enabled */
5055	if (!adapter->vfs_allocated_count)
5056		return -1;
5057
5058	/* Find the vlan filter for this id */
5059	for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5060		reg = rd32(E1000_VLVF(i));
5061		if ((reg & E1000_VLVF_VLANID_ENABLE) &&
5062		    vid == (reg & E1000_VLVF_VLANID_MASK))
5063			break;
5064	}
5065
5066	if (add) {
5067		if (i == E1000_VLVF_ARRAY_SIZE) {
5068			/* Did not find a matching VLAN ID entry that was
5069			 * enabled.  Search for a free filter entry, i.e.
5070			 * one without the enable bit set
5071			 */
5072			for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5073				reg = rd32(E1000_VLVF(i));
5074				if (!(reg & E1000_VLVF_VLANID_ENABLE))
5075					break;
5076			}
5077		}
5078		if (i < E1000_VLVF_ARRAY_SIZE) {
5079			/* Found an enabled/available entry */
5080			reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5081
5082			/* if !enabled we need to set this up in vfta */
5083			if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
5084				/* add VID to filter table */
5085				igb_vfta_set(hw, vid, true);
5086				reg |= E1000_VLVF_VLANID_ENABLE;
5087			}
5088			reg &= ~E1000_VLVF_VLANID_MASK;
5089			reg |= vid;
5090			wr32(E1000_VLVF(i), reg);
5091
5092			/* do not modify RLPML for PF devices */
5093			if (vf >= adapter->vfs_allocated_count)
5094				return 0;
5095
5096			if (!adapter->vf_data[vf].vlans_enabled) {
5097				u32 size;
5098				reg = rd32(E1000_VMOLR(vf));
5099				size = reg & E1000_VMOLR_RLPML_MASK;
5100				size += 4;
5101				reg &= ~E1000_VMOLR_RLPML_MASK;
5102				reg |= size;
5103				wr32(E1000_VMOLR(vf), reg);
5104			}
5105
5106			adapter->vf_data[vf].vlans_enabled++;
5107			return 0;
5108		}
5109	} else {
5110		if (i < E1000_VLVF_ARRAY_SIZE) {
5111			/* remove vf from the pool */
5112			reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
5113			/* if pool is empty then remove entry from vfta */
5114			if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
5115				reg = 0;
5116				igb_vfta_set(hw, vid, false);
5117			}
5118			wr32(E1000_VLVF(i), reg);
5119
5120			/* do not modify RLPML for PF devices */
5121			if (vf >= adapter->vfs_allocated_count)
5122				return 0;
5123
5124			adapter->vf_data[vf].vlans_enabled--;
5125			if (!adapter->vf_data[vf].vlans_enabled) {
5126				u32 size;
5127				reg = rd32(E1000_VMOLR(vf));
5128				size = reg & E1000_VMOLR_RLPML_MASK;
5129				size -= 4;
5130				reg &= ~E1000_VMOLR_RLPML_MASK;
5131				reg |= size;
5132				wr32(E1000_VMOLR(vf), reg);
5133			}
5134		}
5135	}
5136	return 0;
5137}
5138
5139static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
5140{
5141	struct e1000_hw *hw = &adapter->hw;
5142
5143	if (vid)
5144		wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
5145	else
5146		wr32(E1000_VMVIR(vf), 0);
5147}
5148
5149static int igb_ndo_set_vf_vlan(struct net_device *netdev,
5150			       int vf, u16 vlan, u8 qos)
5151{
5152	int err = 0;
5153	struct igb_adapter *adapter = netdev_priv(netdev);
5154
5155	if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
5156		return -EINVAL;
5157	if (vlan || qos) {
5158		err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
5159		if (err)
5160			goto out;
5161		igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
5162		igb_set_vmolr(adapter, vf, !vlan);
5163		adapter->vf_data[vf].pf_vlan = vlan;
5164		adapter->vf_data[vf].pf_qos = qos;
5165		dev_info(&adapter->pdev->dev,
5166			 "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
5167		if (test_bit(__IGB_DOWN, &adapter->state)) {
5168			dev_warn(&adapter->pdev->dev,
5169				 "The VF VLAN has been set,"
5170				 " but the PF device is not up.\n");
5171			dev_warn(&adapter->pdev->dev,
5172				 "Bring the PF device up before"
5173				 " attempting to use the VF device.\n");
5174		}
5175	} else {
5176		igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
5177				   false, vf);
5178		igb_set_vmvir(adapter, vlan, vf);
5179		igb_set_vmolr(adapter, vf, true);
5180		adapter->vf_data[vf].pf_vlan = 0;
5181		adapter->vf_data[vf].pf_qos = 0;
5182       }
5183out:
5184       return err;
5185}
5186
5187static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5188{
5189	int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5190	int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
5191
5192	return igb_vlvf_set(adapter, vid, add, vf);
5193}
5194
5195static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
5196{
5197	/* clear flags - except flag that indicates PF has set the MAC */
5198	adapter->vf_data[vf].flags &= IGB_VF_FLAG_PF_SET_MAC;
5199	adapter->vf_data[vf].last_nack = jiffies;
5200
5201	/* reset offloads to defaults */
5202	igb_set_vmolr(adapter, vf, true);
5203
5204	/* reset vlans for device */
5205	igb_clear_vf_vfta(adapter, vf);
5206	if (adapter->vf_data[vf].pf_vlan)
5207		igb_ndo_set_vf_vlan(adapter->netdev, vf,
5208				    adapter->vf_data[vf].pf_vlan,
5209				    adapter->vf_data[vf].pf_qos);
5210	else
5211		igb_clear_vf_vfta(adapter, vf);
5212
5213	/* reset multicast table array for vf */
5214	adapter->vf_data[vf].num_vf_mc_hashes = 0;
5215
5216	/* Flush and reset the mta with the new values */
5217	igb_set_rx_mode(adapter->netdev);
5218}
5219
5220static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
5221{
5222	unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5223
5224	/* generate a new mac address as we were hotplug removed/added */
5225	if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
5226		random_ether_addr(vf_mac);
5227
5228	/* process remaining reset events */
5229	igb_vf_reset(adapter, vf);
5230}
5231
5232static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
5233{
5234	struct e1000_hw *hw = &adapter->hw;
5235	unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5236	int rar_entry = hw->mac.rar_entry_count - (vf + 1);
5237	u32 reg, msgbuf[3];
5238	u8 *addr = (u8 *)(&msgbuf[1]);
5239
5240	/* process all the same items cleared in a function level reset */
5241	igb_vf_reset(adapter, vf);
5242
5243	/* set vf mac address */
5244	igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
5245
5246	/* enable transmit and receive for vf */
5247	reg = rd32(E1000_VFTE);
5248	wr32(E1000_VFTE, reg | (1 << vf));
5249	reg = rd32(E1000_VFRE);
5250	wr32(E1000_VFRE, reg | (1 << vf));
5251
5252	adapter->vf_data[vf].flags |= IGB_VF_FLAG_CTS;
5253
5254	/* reply to reset with ack and vf mac address */
5255	msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
5256	memcpy(addr, vf_mac, 6);
5257	igb_write_mbx(hw, msgbuf, 3, vf);
5258}
5259
5260static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
5261{
5262	/*
5263	 * The VF MAC Address is stored in a packed array of bytes
5264	 * starting at the second 32 bit word of the msg array
5265	 */
5266	unsigned char *addr = (char *)&msg[1];
5267	int err = -1;
5268
5269	if (is_valid_ether_addr(addr))
5270		err = igb_set_vf_mac(adapter, vf, addr);
5271
5272	return err;
5273}
5274
5275static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
5276{
5277	struct e1000_hw *hw = &adapter->hw;
5278	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5279	u32 msg = E1000_VT_MSGTYPE_NACK;
5280
5281	/* if device isn't clear to send it shouldn't be reading either */
5282	if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
5283	    time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
5284		igb_write_mbx(hw, &msg, 1, vf);
5285		vf_data->last_nack = jiffies;
5286	}
5287}
5288
5289static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
5290{
5291	struct pci_dev *pdev = adapter->pdev;
5292	u32 msgbuf[E1000_VFMAILBOX_SIZE];
5293	struct e1000_hw *hw = &adapter->hw;
5294	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5295	s32 retval;
5296
5297	retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
5298
5299	if (retval) {
5300		/* if receive failed revoke VF CTS stats and restart init */
5301		dev_err(&pdev->dev, "Error receiving message from VF\n");
5302		vf_data->flags &= ~IGB_VF_FLAG_CTS;
5303		if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5304			return;
5305		goto out;
5306	}
5307
5308	/* this is a message we already processed, do nothing */
5309	if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
5310		return;
5311
5312	/*
5313	 * until the vf completes a reset it should not be
5314	 * allowed to start any configuration.
5315	 */
5316
5317	if (msgbuf[0] == E1000_VF_RESET) {
5318		igb_vf_reset_msg(adapter, vf);
5319		return;
5320	}
5321
5322	if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
5323		if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5324			return;
5325		retval = -1;
5326		goto out;
5327	}
5328
5329	switch ((msgbuf[0] & 0xFFFF)) {
5330	case E1000_VF_SET_MAC_ADDR:
5331		retval = -EINVAL;
5332		if (!(vf_data->flags & IGB_VF_FLAG_PF_SET_MAC))
5333			retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
5334		else
5335			dev_warn(&pdev->dev,
5336				 "VF %d attempted to override administratively "
5337				 "set MAC address\nReload the VF driver to "
5338				 "resume operations\n", vf);
5339		break;
5340	case E1000_VF_SET_PROMISC:
5341		retval = igb_set_vf_promisc(adapter, msgbuf, vf);
5342		break;
5343	case E1000_VF_SET_MULTICAST:
5344		retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
5345		break;
5346	case E1000_VF_SET_LPE:
5347		retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
5348		break;
5349	case E1000_VF_SET_VLAN:
5350		retval = -1;
5351		if (vf_data->pf_vlan)
5352			dev_warn(&pdev->dev,
5353				 "VF %d attempted to override administratively "
5354				 "set VLAN tag\nReload the VF driver to "
5355				 "resume operations\n", vf);
5356		else
5357			retval = igb_set_vf_vlan(adapter, msgbuf, vf);
5358		break;
5359	default:
5360		dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
5361		retval = -1;
5362		break;
5363	}
5364
5365	msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
5366out:
5367	/* notify the VF of the results of what it sent us */
5368	if (retval)
5369		msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
5370	else
5371		msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
5372
5373	igb_write_mbx(hw, msgbuf, 1, vf);
5374}
5375
5376static void igb_msg_task(struct igb_adapter *adapter)
5377{
5378	struct e1000_hw *hw = &adapter->hw;
5379	u32 vf;
5380
5381	for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
5382		/* process any reset requests */
5383		if (!igb_check_for_rst(hw, vf))
5384			igb_vf_reset_event(adapter, vf);
5385
5386		/* process any messages pending */
5387		if (!igb_check_for_msg(hw, vf))
5388			igb_rcv_msg_from_vf(adapter, vf);
5389
5390		/* process any acks */
5391		if (!igb_check_for_ack(hw, vf))
5392			igb_rcv_ack_from_vf(adapter, vf);
5393	}
5394}
5395
5396/**
5397 *  igb_set_uta - Set unicast filter table address
5398 *  @adapter: board private structure
5399 *
5400 *  The unicast table address is a register array of 32-bit registers.
5401 *  The table is meant to be used in a way similar to how the MTA is used
5402 *  however due to certain limitations in the hardware it is necessary to
5403 *  set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscuous
5404 *  enable bit to allow vlan tag stripping when promiscuous mode is enabled
5405 **/
5406static void igb_set_uta(struct igb_adapter *adapter)
5407{
5408	struct e1000_hw *hw = &adapter->hw;
5409	int i;
5410
5411	/* The UTA table only exists on 82576 hardware and newer */
5412	if (hw->mac.type < e1000_82576)
5413		return;
5414
5415	/* we only need to do this if VMDq is enabled */
5416	if (!adapter->vfs_allocated_count)
5417		return;
5418
5419	for (i = 0; i < hw->mac.uta_reg_count; i++)
5420		array_wr32(E1000_UTA, i, ~0);
5421}
5422
5423/**
5424 * igb_intr_msi - Interrupt Handler
5425 * @irq: interrupt number
5426 * @data: pointer to a network interface device structure
5427 **/
5428static irqreturn_t igb_intr_msi(int irq, void *data)
5429{
5430	struct igb_adapter *adapter = data;
5431	struct igb_q_vector *q_vector = adapter->q_vector[0];
5432	struct e1000_hw *hw = &adapter->hw;
5433	/* read ICR disables interrupts using IAM */
5434	u32 icr = rd32(E1000_ICR);
5435
5436	igb_write_itr(q_vector);
5437
5438	if (icr & E1000_ICR_DRSTA)
5439		schedule_work(&adapter->reset_task);
5440
5441	if (icr & E1000_ICR_DOUTSYNC) {
5442		/* HW is reporting DMA is out of sync */
5443		adapter->stats.doosync++;
5444	}
5445
5446	if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5447		hw->mac.get_link_status = 1;
5448		if (!test_bit(__IGB_DOWN, &adapter->state))
5449			mod_timer(&adapter->watchdog_timer, jiffies + 1);
5450	}
5451
5452	napi_schedule(&q_vector->napi);
5453
5454	return IRQ_HANDLED;
5455}
5456
5457/**
5458 * igb_intr - Legacy Interrupt Handler
5459 * @irq: interrupt number
5460 * @data: pointer to a network interface device structure
5461 **/
5462static irqreturn_t igb_intr(int irq, void *data)
5463{
5464	struct igb_adapter *adapter = data;
5465	struct igb_q_vector *q_vector = adapter->q_vector[0];
5466	struct e1000_hw *hw = &adapter->hw;
5467	/* Interrupt Auto-Mask...upon reading ICR, interrupts are masked.  No
5468	 * need for the IMC write */
5469	u32 icr = rd32(E1000_ICR);
5470	if (!icr)
5471		return IRQ_NONE;  /* Not our interrupt */
5472
5473	igb_write_itr(q_vector);
5474
5475	/* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
5476	 * not set, then the adapter didn't send an interrupt */
5477	if (!(icr & E1000_ICR_INT_ASSERTED))
5478		return IRQ_NONE;
5479
5480	if (icr & E1000_ICR_DRSTA)
5481		schedule_work(&adapter->reset_task);
5482
5483	if (icr & E1000_ICR_DOUTSYNC) {
5484		/* HW is reporting DMA is out of sync */
5485		adapter->stats.doosync++;
5486	}
5487
5488	if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5489		hw->mac.get_link_status = 1;
5490		/* guard against interrupt when we're going down */
5491		if (!test_bit(__IGB_DOWN, &adapter->state))
5492			mod_timer(&adapter->watchdog_timer, jiffies + 1);
5493	}
5494
5495	napi_schedule(&q_vector->napi);
5496
5497	return IRQ_HANDLED;
5498}
5499
5500static inline void igb_ring_irq_enable(struct igb_q_vector *q_vector)
5501{
5502	struct igb_adapter *adapter = q_vector->adapter;
5503	struct e1000_hw *hw = &adapter->hw;
5504
5505	if ((q_vector->rx_ring && (adapter->rx_itr_setting & 3)) ||
5506	    (!q_vector->rx_ring && (adapter->tx_itr_setting & 3))) {
5507		if (!adapter->msix_entries)
5508			igb_set_itr(adapter);
5509		else
5510			igb_update_ring_itr(q_vector);
5511	}
5512
5513	if (!test_bit(__IGB_DOWN, &adapter->state)) {
5514		if (adapter->msix_entries)
5515			wr32(E1000_EIMS, q_vector->eims_value);
5516		else
5517			igb_irq_enable(adapter);
5518	}
5519}
5520
5521/**
5522 * igb_poll - NAPI Rx polling callback
5523 * @napi: napi polling structure
5524 * @budget: count of how many packets we should handle
5525 **/
5526static int igb_poll(struct napi_struct *napi, int budget)
5527{
5528	struct igb_q_vector *q_vector = container_of(napi,
5529	                                             struct igb_q_vector,
5530	                                             napi);
5531	int tx_clean_complete = 1, work_done = 0;
5532
5533#ifdef CONFIG_IGB_DCA
5534	if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
5535		igb_update_dca(q_vector);
5536#endif
5537	if (q_vector->tx_ring)
5538		tx_clean_complete = igb_clean_tx_irq(q_vector);
5539
5540	if (q_vector->rx_ring)
5541		igb_clean_rx_irq_adv(q_vector, &work_done, budget);
5542
5543	if (!tx_clean_complete)
5544		work_done = budget;
5545
5546	/* If not enough Rx work done, exit the polling mode */
5547	if (work_done < budget) {
5548		napi_complete(napi);
5549		igb_ring_irq_enable(q_vector);
5550	}
5551
5552	return work_done;
5553}
5554
5555/**
5556 * igb_systim_to_hwtstamp - convert system time value to hw timestamp
5557 * @adapter: board private structure
5558 * @shhwtstamps: timestamp structure to update
5559 * @regval: unsigned 64bit system time value.
5560 *
5561 * We need to convert the system time value stored in the RX/TXSTMP registers
5562 * into a hwtstamp which can be used by the upper level timestamping functions
5563 */
5564static void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
5565                                   struct skb_shared_hwtstamps *shhwtstamps,
5566                                   u64 regval)
5567{
5568	u64 ns;
5569
5570	/*
5571	 * The 82580 starts with 1ns at bit 0 in RX/TXSTMPL, shift this up to
5572	 * 24 to match clock shift we setup earlier.
5573	 */
5574	if (adapter->hw.mac.type == e1000_82580)
5575		regval <<= IGB_82580_TSYNC_SHIFT;
5576
5577	ns = timecounter_cyc2time(&adapter->clock, regval);
5578	timecompare_update(&adapter->compare, ns);
5579	memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
5580	shhwtstamps->hwtstamp = ns_to_ktime(ns);
5581	shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns);
5582}
5583
5584/**
5585 * igb_tx_hwtstamp - utility function which checks for TX time stamp
5586 * @q_vector: pointer to q_vector containing needed info
5587 * @buffer: pointer to igb_buffer structure
5588 *
5589 * If we were asked to do hardware stamping and such a time stamp is
5590 * available, then it must have been for this skb here because we only
5591 * allow only one such packet into the queue.
5592 */
5593static void igb_tx_hwtstamp(struct igb_q_vector *q_vector, struct igb_buffer *buffer_info)
5594{
5595	struct igb_adapter *adapter = q_vector->adapter;
5596	struct e1000_hw *hw = &adapter->hw;
5597	struct skb_shared_hwtstamps shhwtstamps;
5598	u64 regval;
5599
5600	/* if skb does not support hw timestamp or TX stamp not valid exit */
5601	if (likely(!(buffer_info->tx_flags & SKBTX_HW_TSTAMP)) ||
5602	    !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
5603		return;
5604
5605	regval = rd32(E1000_TXSTMPL);
5606	regval |= (u64)rd32(E1000_TXSTMPH) << 32;
5607
5608	igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
5609	skb_tstamp_tx(buffer_info->skb, &shhwtstamps);
5610}
5611
5612/**
5613 * igb_clean_tx_irq - Reclaim resources after transmit completes
5614 * @q_vector: pointer to q_vector containing needed info
5615 * returns true if ring is completely cleaned
5616 **/
5617static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
5618{
5619	struct igb_adapter *adapter = q_vector->adapter;
5620	struct igb_ring *tx_ring = q_vector->tx_ring;
5621	struct net_device *netdev = tx_ring->netdev;
5622	struct e1000_hw *hw = &adapter->hw;
5623	struct igb_buffer *buffer_info;
5624	union e1000_adv_tx_desc *tx_desc, *eop_desc;
5625	unsigned int total_bytes = 0, total_packets = 0;
5626	unsigned int i, eop, count = 0;
5627	bool cleaned = false;
5628
5629	i = tx_ring->next_to_clean;
5630	eop = tx_ring->buffer_info[i].next_to_watch;
5631	eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5632
5633	while ((eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)) &&
5634	       (count < tx_ring->count)) {
5635		rmb();	/* read buffer_info after eop_desc status */
5636		for (cleaned = false; !cleaned; count++) {
5637			tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
5638			buffer_info = &tx_ring->buffer_info[i];
5639			cleaned = (i == eop);
5640
5641			if (buffer_info->skb) {
5642				total_bytes += buffer_info->bytecount;
5643				/* gso_segs is currently only valid for tcp */
5644				total_packets += buffer_info->gso_segs;
5645				igb_tx_hwtstamp(q_vector, buffer_info);
5646			}
5647
5648			igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
5649			tx_desc->wb.status = 0;
5650
5651			i++;
5652			if (i == tx_ring->count)
5653				i = 0;
5654		}
5655		eop = tx_ring->buffer_info[i].next_to_watch;
5656		eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5657	}
5658
5659	tx_ring->next_to_clean = i;
5660
5661	if (unlikely(count &&
5662		     netif_carrier_ok(netdev) &&
5663		     igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
5664		/* Make sure that anybody stopping the queue after this
5665		 * sees the new next_to_clean.
5666		 */
5667		smp_mb();
5668		if (__netif_subqueue_stopped(netdev, tx_ring->queue_index) &&
5669		    !(test_bit(__IGB_DOWN, &adapter->state))) {
5670			netif_wake_subqueue(netdev, tx_ring->queue_index);
5671
5672			u64_stats_update_begin(&tx_ring->tx_syncp);
5673			tx_ring->tx_stats.restart_queue++;
5674			u64_stats_update_end(&tx_ring->tx_syncp);
5675		}
5676	}
5677
5678	if (tx_ring->detect_tx_hung) {
5679		/* Detect a transmit hang in hardware, this serializes the
5680		 * check with the clearing of time_stamp and movement of i */
5681		tx_ring->detect_tx_hung = false;
5682		if (tx_ring->buffer_info[i].time_stamp &&
5683		    time_after(jiffies, tx_ring->buffer_info[i].time_stamp +
5684			       (adapter->tx_timeout_factor * HZ)) &&
5685		    !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
5686
5687			/* detected Tx unit hang */
5688			dev_err(tx_ring->dev,
5689				"Detected Tx Unit Hang\n"
5690				"  Tx Queue             <%d>\n"
5691				"  TDH                  <%x>\n"
5692				"  TDT                  <%x>\n"
5693				"  next_to_use          <%x>\n"
5694				"  next_to_clean        <%x>\n"
5695				"buffer_info[next_to_clean]\n"
5696				"  time_stamp           <%lx>\n"
5697				"  next_to_watch        <%x>\n"
5698				"  jiffies              <%lx>\n"
5699				"  desc.status          <%x>\n",
5700				tx_ring->queue_index,
5701				readl(tx_ring->head),
5702				readl(tx_ring->tail),
5703				tx_ring->next_to_use,
5704				tx_ring->next_to_clean,
5705				tx_ring->buffer_info[eop].time_stamp,
5706				eop,
5707				jiffies,
5708				eop_desc->wb.status);
5709			netif_stop_subqueue(netdev, tx_ring->queue_index);
5710		}
5711	}
5712	tx_ring->total_bytes += total_bytes;
5713	tx_ring->total_packets += total_packets;
5714	u64_stats_update_begin(&tx_ring->tx_syncp);
5715	tx_ring->tx_stats.bytes += total_bytes;
5716	tx_ring->tx_stats.packets += total_packets;
5717	u64_stats_update_end(&tx_ring->tx_syncp);
5718	return count < tx_ring->count;
5719}
5720
5721static inline void igb_rx_checksum_adv(struct igb_ring *ring,
5722				       u32 status_err, struct sk_buff *skb)
5723{
5724	skb_checksum_none_assert(skb);
5725
5726	/* Ignore Checksum bit is set or checksum is disabled through ethtool */
5727	if (!(ring->flags & IGB_RING_FLAG_RX_CSUM) ||
5728	     (status_err & E1000_RXD_STAT_IXSM))
5729		return;
5730
5731	/* TCP/UDP checksum error bit is set */
5732	if (status_err &
5733	    (E1000_RXDEXT_STATERR_TCPE | E1000_RXDEXT_STATERR_IPE)) {
5734		/*
5735		 * work around errata with sctp packets where the TCPE aka
5736		 * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
5737		 * packets, (aka let the stack check the crc32c)
5738		 */
5739		if ((skb->len == 60) &&
5740		    (ring->flags & IGB_RING_FLAG_RX_SCTP_CSUM)) {
5741			u64_stats_update_begin(&ring->rx_syncp);
5742			ring->rx_stats.csum_err++;
5743			u64_stats_update_end(&ring->rx_syncp);
5744		}
5745		/* let the stack verify checksum errors */
5746		return;
5747	}
5748	/* It must be a TCP or UDP packet with a valid checksum */
5749	if (status_err & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))
5750		skb->ip_summed = CHECKSUM_UNNECESSARY;
5751
5752	dev_dbg(ring->dev, "cksum success: bits %08X\n", status_err);
5753}
5754
5755static void igb_rx_hwtstamp(struct igb_q_vector *q_vector, u32 staterr,
5756                                   struct sk_buff *skb)
5757{
5758	struct igb_adapter *adapter = q_vector->adapter;
5759	struct e1000_hw *hw = &adapter->hw;
5760	u64 regval;
5761
5762	/*
5763	 * If this bit is set, then the RX registers contain the time stamp. No
5764	 * other packet will be time stamped until we read these registers, so
5765	 * read the registers to make them available again. Because only one
5766	 * packet can be time stamped at a time, we know that the register
5767	 * values must belong to this one here and therefore we don't need to
5768	 * compare any of the additional attributes stored for it.
5769	 *
5770	 * If nothing went wrong, then it should have a shared tx_flags that we
5771	 * can turn into a skb_shared_hwtstamps.
5772	 */
5773	if (staterr & E1000_RXDADV_STAT_TSIP) {
5774		u32 *stamp = (u32 *)skb->data;
5775		regval = le32_to_cpu(*(stamp + 2));
5776		regval |= (u64)le32_to_cpu(*(stamp + 3)) << 32;
5777		skb_pull(skb, IGB_TS_HDR_LEN);
5778	} else {
5779		if(!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
5780			return;
5781
5782		regval = rd32(E1000_RXSTMPL);
5783		regval |= (u64)rd32(E1000_RXSTMPH) << 32;
5784	}
5785
5786	igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
5787}
5788static inline u16 igb_get_hlen(struct igb_ring *rx_ring,
5789                               union e1000_adv_rx_desc *rx_desc)
5790{
5791	/* HW will not DMA in data larger than the given buffer, even if it
5792	 * parses the (NFS, of course) header to be larger.  In that case, it
5793	 * fills the header buffer and spills the rest into the page.
5794	 */
5795	u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
5796	           E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
5797	if (hlen > rx_ring->rx_buffer_len)
5798		hlen = rx_ring->rx_buffer_len;
5799	return hlen;
5800}
5801
5802static bool igb_clean_rx_irq_adv(struct igb_q_vector *q_vector,
5803                                 int *work_done, int budget)
5804{
5805	struct igb_ring *rx_ring = q_vector->rx_ring;
5806	struct net_device *netdev = rx_ring->netdev;
5807	struct device *dev = rx_ring->dev;
5808	union e1000_adv_rx_desc *rx_desc , *next_rxd;
5809	struct igb_buffer *buffer_info , *next_buffer;
5810	struct sk_buff *skb;
5811	bool cleaned = false;
5812	int cleaned_count = 0;
5813	int current_node = numa_node_id();
5814	unsigned int total_bytes = 0, total_packets = 0;
5815	unsigned int i;
5816	u32 staterr;
5817	u16 length;
5818
5819	i = rx_ring->next_to_clean;
5820	buffer_info = &rx_ring->buffer_info[i];
5821	rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5822	staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5823
5824	while (staterr & E1000_RXD_STAT_DD) {
5825		if (*work_done >= budget)
5826			break;
5827		(*work_done)++;
5828		rmb(); /* read descriptor and rx_buffer_info after status DD */
5829
5830		skb = buffer_info->skb;
5831		prefetch(skb->data - NET_IP_ALIGN);
5832		buffer_info->skb = NULL;
5833
5834		i++;
5835		if (i == rx_ring->count)
5836			i = 0;
5837
5838		next_rxd = E1000_RX_DESC_ADV(*rx_ring, i);
5839		prefetch(next_rxd);
5840		next_buffer = &rx_ring->buffer_info[i];
5841
5842		length = le16_to_cpu(rx_desc->wb.upper.length);
5843		cleaned = true;
5844		cleaned_count++;
5845
5846		if (buffer_info->dma) {
5847			dma_unmap_single(dev, buffer_info->dma,
5848					 rx_ring->rx_buffer_len,
5849					 DMA_FROM_DEVICE);
5850			buffer_info->dma = 0;
5851			if (rx_ring->rx_buffer_len >= IGB_RXBUFFER_1024) {
5852				skb_put(skb, length);
5853				goto send_up;
5854			}
5855			skb_put(skb, igb_get_hlen(rx_ring, rx_desc));
5856		}
5857
5858		if (length) {
5859			dma_unmap_page(dev, buffer_info->page_dma,
5860				       PAGE_SIZE / 2, DMA_FROM_DEVICE);
5861			buffer_info->page_dma = 0;
5862
5863			skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
5864						buffer_info->page,
5865						buffer_info->page_offset,
5866						length);
5867
5868			if ((page_count(buffer_info->page) != 1) ||
5869			    (page_to_nid(buffer_info->page) != current_node))
5870				buffer_info->page = NULL;
5871			else
5872				get_page(buffer_info->page);
5873
5874			skb->len += length;
5875			skb->data_len += length;
5876			skb->truesize += length;
5877		}
5878
5879		if (!(staterr & E1000_RXD_STAT_EOP)) {
5880			buffer_info->skb = next_buffer->skb;
5881			buffer_info->dma = next_buffer->dma;
5882			next_buffer->skb = skb;
5883			next_buffer->dma = 0;
5884			goto next_desc;
5885		}
5886send_up:
5887		if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
5888			dev_kfree_skb_irq(skb);
5889			goto next_desc;
5890		}
5891
5892		if (staterr & (E1000_RXDADV_STAT_TSIP | E1000_RXDADV_STAT_TS))
5893			igb_rx_hwtstamp(q_vector, staterr, skb);
5894		total_bytes += skb->len;
5895		total_packets++;
5896
5897		igb_rx_checksum_adv(rx_ring, staterr, skb);
5898
5899		skb->protocol = eth_type_trans(skb, netdev);
5900		skb_record_rx_queue(skb, rx_ring->queue_index);
5901
5902		if (staterr & E1000_RXD_STAT_VP) {
5903			u16 vid = le16_to_cpu(rx_desc->wb.upper.vlan);
5904
5905			__vlan_hwaccel_put_tag(skb, vid);
5906		}
5907		napi_gro_receive(&q_vector->napi, skb);
5908
5909next_desc:
5910		rx_desc->wb.upper.status_error = 0;
5911
5912		/* return some buffers to hardware, one at a time is too slow */
5913		if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
5914			igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5915			cleaned_count = 0;
5916		}
5917
5918		/* use prefetched values */
5919		rx_desc = next_rxd;
5920		buffer_info = next_buffer;
5921		staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5922	}
5923
5924	rx_ring->next_to_clean = i;
5925	cleaned_count = igb_desc_unused(rx_ring);
5926
5927	if (cleaned_count)
5928		igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5929
5930	rx_ring->total_packets += total_packets;
5931	rx_ring->total_bytes += total_bytes;
5932	u64_stats_update_begin(&rx_ring->rx_syncp);
5933	rx_ring->rx_stats.packets += total_packets;
5934	rx_ring->rx_stats.bytes += total_bytes;
5935	u64_stats_update_end(&rx_ring->rx_syncp);
5936	return cleaned;
5937}
5938
5939/**
5940 * igb_alloc_rx_buffers_adv - Replace used receive buffers; packet split
5941 * @adapter: address of board private structure
5942 **/
5943void igb_alloc_rx_buffers_adv(struct igb_ring *rx_ring, int cleaned_count)
5944{
5945	struct net_device *netdev = rx_ring->netdev;
5946	union e1000_adv_rx_desc *rx_desc;
5947	struct igb_buffer *buffer_info;
5948	struct sk_buff *skb;
5949	unsigned int i;
5950	int bufsz;
5951
5952	i = rx_ring->next_to_use;
5953	buffer_info = &rx_ring->buffer_info[i];
5954
5955	bufsz = rx_ring->rx_buffer_len;
5956
5957	while (cleaned_count--) {
5958		rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5959
5960		if ((bufsz < IGB_RXBUFFER_1024) && !buffer_info->page_dma) {
5961			if (!buffer_info->page) {
5962				buffer_info->page = netdev_alloc_page(netdev);
5963				if (unlikely(!buffer_info->page)) {
5964					u64_stats_update_begin(&rx_ring->rx_syncp);
5965					rx_ring->rx_stats.alloc_failed++;
5966					u64_stats_update_end(&rx_ring->rx_syncp);
5967					goto no_buffers;
5968				}
5969				buffer_info->page_offset = 0;
5970			} else {
5971				buffer_info->page_offset ^= PAGE_SIZE / 2;
5972			}
5973			buffer_info->page_dma =
5974				dma_map_page(rx_ring->dev, buffer_info->page,
5975					     buffer_info->page_offset,
5976					     PAGE_SIZE / 2,
5977					     DMA_FROM_DEVICE);
5978			if (dma_mapping_error(rx_ring->dev,
5979					      buffer_info->page_dma)) {
5980				buffer_info->page_dma = 0;
5981				u64_stats_update_begin(&rx_ring->rx_syncp);
5982				rx_ring->rx_stats.alloc_failed++;
5983				u64_stats_update_end(&rx_ring->rx_syncp);
5984				goto no_buffers;
5985			}
5986		}
5987
5988		skb = buffer_info->skb;
5989		if (!skb) {
5990			skb = netdev_alloc_skb_ip_align(netdev, bufsz);
5991			if (unlikely(!skb)) {
5992				u64_stats_update_begin(&rx_ring->rx_syncp);
5993				rx_ring->rx_stats.alloc_failed++;
5994				u64_stats_update_end(&rx_ring->rx_syncp);
5995				goto no_buffers;
5996			}
5997
5998			buffer_info->skb = skb;
5999		}
6000		if (!buffer_info->dma) {
6001			buffer_info->dma = dma_map_single(rx_ring->dev,
6002			                                  skb->data,
6003							  bufsz,
6004							  DMA_FROM_DEVICE);
6005			if (dma_mapping_error(rx_ring->dev,
6006					      buffer_info->dma)) {
6007				buffer_info->dma = 0;
6008				u64_stats_update_begin(&rx_ring->rx_syncp);
6009				rx_ring->rx_stats.alloc_failed++;
6010				u64_stats_update_end(&rx_ring->rx_syncp);
6011				goto no_buffers;
6012			}
6013		}
6014		/* Refresh the desc even if buffer_addrs didn't change because
6015		 * each write-back erases this info. */
6016		if (bufsz < IGB_RXBUFFER_1024) {
6017			rx_desc->read.pkt_addr =
6018			     cpu_to_le64(buffer_info->page_dma);
6019			rx_desc->read.hdr_addr = cpu_to_le64(buffer_info->dma);
6020		} else {
6021			rx_desc->read.pkt_addr = cpu_to_le64(buffer_info->dma);
6022			rx_desc->read.hdr_addr = 0;
6023		}
6024
6025		i++;
6026		if (i == rx_ring->count)
6027			i = 0;
6028		buffer_info = &rx_ring->buffer_info[i];
6029	}
6030
6031no_buffers:
6032	if (rx_ring->next_to_use != i) {
6033		rx_ring->next_to_use = i;
6034		if (i == 0)
6035			i = (rx_ring->count - 1);
6036		else
6037			i--;
6038
6039		/* Force memory writes to complete before letting h/w
6040		 * know there are new descriptors to fetch.  (Only
6041		 * applicable for weak-ordered memory model archs,
6042		 * such as IA-64). */
6043		wmb();
6044		writel(i, rx_ring->tail);
6045	}
6046}
6047
6048/**
6049 * igb_mii_ioctl -
6050 * @netdev:
6051 * @ifreq:
6052 * @cmd:
6053 **/
6054static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6055{
6056	struct igb_adapter *adapter = netdev_priv(netdev);
6057	struct mii_ioctl_data *data = if_mii(ifr);
6058
6059	if (adapter->hw.phy.media_type != e1000_media_type_copper)
6060		return -EOPNOTSUPP;
6061
6062	switch (cmd) {
6063	case SIOCGMIIPHY:
6064		data->phy_id = adapter->hw.phy.addr;
6065		break;
6066	case SIOCGMIIREG:
6067		if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
6068		                     &data->val_out))
6069			return -EIO;
6070		break;
6071	case SIOCSMIIREG:
6072	default:
6073		return -EOPNOTSUPP;
6074	}
6075	return 0;
6076}
6077
6078/**
6079 * igb_hwtstamp_ioctl - control hardware time stamping
6080 * @netdev:
6081 * @ifreq:
6082 * @cmd:
6083 *
6084 * Outgoing time stamping can be enabled and disabled. Play nice and
6085 * disable it when requested, although it shouldn't case any overhead
6086 * when no packet needs it. At most one packet in the queue may be
6087 * marked for time stamping, otherwise it would be impossible to tell
6088 * for sure to which packet the hardware time stamp belongs.
6089 *
6090 * Incoming time stamping has to be configured via the hardware
6091 * filters. Not all combinations are supported, in particular event
6092 * type has to be specified. Matching the kind of event packet is
6093 * not supported, with the exception of "all V2 events regardless of
6094 * level 2 or 4".
6095 *
6096 **/
6097static int igb_hwtstamp_ioctl(struct net_device *netdev,
6098			      struct ifreq *ifr, int cmd)
6099{
6100	struct igb_adapter *adapter = netdev_priv(netdev);
6101	struct e1000_hw *hw = &adapter->hw;
6102	struct hwtstamp_config config;
6103	u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
6104	u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6105	u32 tsync_rx_cfg = 0;
6106	bool is_l4 = false;
6107	bool is_l2 = false;
6108	u32 regval;
6109
6110	if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
6111		return -EFAULT;
6112
6113	/* reserved for future extensions */
6114	if (config.flags)
6115		return -EINVAL;
6116
6117	switch (config.tx_type) {
6118	case HWTSTAMP_TX_OFF:
6119		tsync_tx_ctl = 0;
6120	case HWTSTAMP_TX_ON:
6121		break;
6122	default:
6123		return -ERANGE;
6124	}
6125
6126	switch (config.rx_filter) {
6127	case HWTSTAMP_FILTER_NONE:
6128		tsync_rx_ctl = 0;
6129		break;
6130	case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
6131	case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
6132	case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
6133	case HWTSTAMP_FILTER_ALL:
6134		/*
6135		 * register TSYNCRXCFG must be set, therefore it is not
6136		 * possible to time stamp both Sync and Delay_Req messages
6137		 * => fall back to time stamping all packets
6138		 */
6139		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6140		config.rx_filter = HWTSTAMP_FILTER_ALL;
6141		break;
6142	case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
6143		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6144		tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
6145		is_l4 = true;
6146		break;
6147	case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
6148		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6149		tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
6150		is_l4 = true;
6151		break;
6152	case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
6153	case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
6154		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6155		tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
6156		is_l2 = true;
6157		is_l4 = true;
6158		config.rx_filter = HWTSTAMP_FILTER_SOME;
6159		break;
6160	case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
6161	case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
6162		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6163		tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
6164		is_l2 = true;
6165		is_l4 = true;
6166		config.rx_filter = HWTSTAMP_FILTER_SOME;
6167		break;
6168	case HWTSTAMP_FILTER_PTP_V2_EVENT:
6169	case HWTSTAMP_FILTER_PTP_V2_SYNC:
6170	case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
6171		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
6172		config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
6173		is_l2 = true;
6174		break;
6175	default:
6176		return -ERANGE;
6177	}
6178
6179	if (hw->mac.type == e1000_82575) {
6180		if (tsync_rx_ctl | tsync_tx_ctl)
6181			return -EINVAL;
6182		return 0;
6183	}
6184
6185	/*
6186	 * Per-packet timestamping only works if all packets are
6187	 * timestamped, so enable timestamping in all packets as
6188	 * long as one rx filter was configured.
6189	 */
6190	if ((hw->mac.type == e1000_82580) && tsync_rx_ctl) {
6191		tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6192		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6193	}
6194
6195	/* enable/disable TX */
6196	regval = rd32(E1000_TSYNCTXCTL);
6197	regval &= ~E1000_TSYNCTXCTL_ENABLED;
6198	regval |= tsync_tx_ctl;
6199	wr32(E1000_TSYNCTXCTL, regval);
6200
6201	/* enable/disable RX */
6202	regval = rd32(E1000_TSYNCRXCTL);
6203	regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
6204	regval |= tsync_rx_ctl;
6205	wr32(E1000_TSYNCRXCTL, regval);
6206
6207	/* define which PTP packets are time stamped */
6208	wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
6209
6210	/* define ethertype filter for timestamped packets */
6211	if (is_l2)
6212		wr32(E1000_ETQF(3),
6213		                (E1000_ETQF_FILTER_ENABLE | /* enable filter */
6214		                 E1000_ETQF_1588 | /* enable timestamping */
6215		                 ETH_P_1588));     /* 1588 eth protocol type */
6216	else
6217		wr32(E1000_ETQF(3), 0);
6218
6219#define PTP_PORT 319
6220	/* L4 Queue Filter[3]: filter by destination port and protocol */
6221	if (is_l4) {
6222		u32 ftqf = (IPPROTO_UDP /* UDP */
6223			| E1000_FTQF_VF_BP /* VF not compared */
6224			| E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
6225			| E1000_FTQF_MASK); /* mask all inputs */
6226		ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
6227
6228		wr32(E1000_IMIR(3), htons(PTP_PORT));
6229		wr32(E1000_IMIREXT(3),
6230		     (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
6231		if (hw->mac.type == e1000_82576) {
6232			/* enable source port check */
6233			wr32(E1000_SPQF(3), htons(PTP_PORT));
6234			ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
6235		}
6236		wr32(E1000_FTQF(3), ftqf);
6237	} else {
6238		wr32(E1000_FTQF(3), E1000_FTQF_MASK);
6239	}
6240	wrfl();
6241
6242	adapter->hwtstamp_config = config;
6243
6244	/* clear TX/RX time stamp registers, just to be sure */
6245	regval = rd32(E1000_TXSTMPH);
6246	regval = rd32(E1000_RXSTMPH);
6247
6248	return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
6249		-EFAULT : 0;
6250}
6251
6252/**
6253 * igb_ioctl -
6254 * @netdev:
6255 * @ifreq:
6256 * @cmd:
6257 **/
6258static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6259{
6260	switch (cmd) {
6261	case SIOCGMIIPHY:
6262	case SIOCGMIIREG:
6263	case SIOCSMIIREG:
6264		return igb_mii_ioctl(netdev, ifr, cmd);
6265	case SIOCSHWTSTAMP:
6266		return igb_hwtstamp_ioctl(netdev, ifr, cmd);
6267	default:
6268		return -EOPNOTSUPP;
6269	}
6270}
6271
6272s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6273{
6274	struct igb_adapter *adapter = hw->back;
6275	u16 cap_offset;
6276
6277	cap_offset = adapter->pdev->pcie_cap;
6278	if (!cap_offset)
6279		return -E1000_ERR_CONFIG;
6280
6281	pci_read_config_word(adapter->pdev, cap_offset + reg, value);
6282
6283	return 0;
6284}
6285
6286s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6287{
6288	struct igb_adapter *adapter = hw->back;
6289	u16 cap_offset;
6290
6291	cap_offset = adapter->pdev->pcie_cap;
6292	if (!cap_offset)
6293		return -E1000_ERR_CONFIG;
6294
6295	pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
6296
6297	return 0;
6298}
6299
6300static void igb_vlan_mode(struct net_device *netdev, u32 features)
6301{
6302	struct igb_adapter *adapter = netdev_priv(netdev);
6303	struct e1000_hw *hw = &adapter->hw;
6304	u32 ctrl, rctl;
6305
6306	igb_irq_disable(adapter);
6307
6308	if (features & NETIF_F_HW_VLAN_RX) {
6309		/* enable VLAN tag insert/strip */
6310		ctrl = rd32(E1000_CTRL);
6311		ctrl |= E1000_CTRL_VME;
6312		wr32(E1000_CTRL, ctrl);
6313
6314		/* Disable CFI check */
6315		rctl = rd32(E1000_RCTL);
6316		rctl &= ~E1000_RCTL_CFIEN;
6317		wr32(E1000_RCTL, rctl);
6318	} else {
6319		/* disable VLAN tag insert/strip */
6320		ctrl = rd32(E1000_CTRL);
6321		ctrl &= ~E1000_CTRL_VME;
6322		wr32(E1000_CTRL, ctrl);
6323	}
6324
6325	igb_rlpml_set(adapter);
6326
6327	if (!test_bit(__IGB_DOWN, &adapter->state))
6328		igb_irq_enable(adapter);
6329}
6330
6331static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
6332{
6333	struct igb_adapter *adapter = netdev_priv(netdev);
6334	struct e1000_hw *hw = &adapter->hw;
6335	int pf_id = adapter->vfs_allocated_count;
6336
6337	/* attempt to add filter to vlvf array */
6338	igb_vlvf_set(adapter, vid, true, pf_id);
6339
6340	/* add the filter since PF can receive vlans w/o entry in vlvf */
6341	igb_vfta_set(hw, vid, true);
6342
6343	set_bit(vid, adapter->active_vlans);
6344}
6345
6346static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
6347{
6348	struct igb_adapter *adapter = netdev_priv(netdev);
6349	struct e1000_hw *hw = &adapter->hw;
6350	int pf_id = adapter->vfs_allocated_count;
6351	s32 err;
6352
6353	igb_irq_disable(adapter);
6354
6355	if (!test_bit(__IGB_DOWN, &adapter->state))
6356		igb_irq_enable(adapter);
6357
6358	/* remove vlan from VLVF table array */
6359	err = igb_vlvf_set(adapter, vid, false, pf_id);
6360
6361	/* if vid was not present in VLVF just remove it from table */
6362	if (err)
6363		igb_vfta_set(hw, vid, false);
6364
6365	clear_bit(vid, adapter->active_vlans);
6366}
6367
6368static void igb_restore_vlan(struct igb_adapter *adapter)
6369{
6370	u16 vid;
6371
6372	for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID)
6373		igb_vlan_rx_add_vid(adapter->netdev, vid);
6374}
6375
6376int igb_set_spd_dplx(struct igb_adapter *adapter, u32 spd, u8 dplx)
6377{
6378	struct pci_dev *pdev = adapter->pdev;
6379	struct e1000_mac_info *mac = &adapter->hw.mac;
6380
6381	mac->autoneg = 0;
6382
6383	/* Make sure dplx is at most 1 bit and lsb of speed is not set
6384	 * for the switch() below to work */
6385	if ((spd & 1) || (dplx & ~1))
6386		goto err_inval;
6387
6388	/* Fiber NIC's only allow 1000 Gbps Full duplex */
6389	if ((adapter->hw.phy.media_type == e1000_media_type_internal_serdes) &&
6390	    spd != SPEED_1000 &&
6391	    dplx != DUPLEX_FULL)
6392		goto err_inval;
6393
6394	switch (spd + dplx) {
6395	case SPEED_10 + DUPLEX_HALF:
6396		mac->forced_speed_duplex = ADVERTISE_10_HALF;
6397		break;
6398	case SPEED_10 + DUPLEX_FULL:
6399		mac->forced_speed_duplex = ADVERTISE_10_FULL;
6400		break;
6401	case SPEED_100 + DUPLEX_HALF:
6402		mac->forced_speed_duplex = ADVERTISE_100_HALF;
6403		break;
6404	case SPEED_100 + DUPLEX_FULL:
6405		mac->forced_speed_duplex = ADVERTISE_100_FULL;
6406		break;
6407	case SPEED_1000 + DUPLEX_FULL:
6408		mac->autoneg = 1;
6409		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
6410		break;
6411	case SPEED_1000 + DUPLEX_HALF: /* not supported */
6412	default:
6413		goto err_inval;
6414	}
6415	return 0;
6416
6417err_inval:
6418	dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
6419	return -EINVAL;
6420}
6421
6422static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake)
6423{
6424	struct net_device *netdev = pci_get_drvdata(pdev);
6425	struct igb_adapter *adapter = netdev_priv(netdev);
6426	struct e1000_hw *hw = &adapter->hw;
6427	u32 ctrl, rctl, status;
6428	u32 wufc = adapter->wol;
6429#ifdef CONFIG_PM
6430	int retval = 0;
6431#endif
6432
6433	netif_device_detach(netdev);
6434
6435	if (netif_running(netdev))
6436		igb_close(netdev);
6437
6438	igb_clear_interrupt_scheme(adapter);
6439
6440#ifdef CONFIG_PM
6441	retval = pci_save_state(pdev);
6442	if (retval)
6443		return retval;
6444#endif
6445
6446	status = rd32(E1000_STATUS);
6447	if (status & E1000_STATUS_LU)
6448		wufc &= ~E1000_WUFC_LNKC;
6449
6450	if (wufc) {
6451		igb_setup_rctl(adapter);
6452		igb_set_rx_mode(netdev);
6453
6454		/* turn on all-multi mode if wake on multicast is enabled */
6455		if (wufc & E1000_WUFC_MC) {
6456			rctl = rd32(E1000_RCTL);
6457			rctl |= E1000_RCTL_MPE;
6458			wr32(E1000_RCTL, rctl);
6459		}
6460
6461		ctrl = rd32(E1000_CTRL);
6462		/* advertise wake from D3Cold */
6463		#define E1000_CTRL_ADVD3WUC 0x00100000
6464		/* phy power management enable */
6465		#define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
6466		ctrl |= E1000_CTRL_ADVD3WUC;
6467		wr32(E1000_CTRL, ctrl);
6468
6469		/* Allow time for pending master requests to run */
6470		igb_disable_pcie_master(hw);
6471
6472		wr32(E1000_WUC, E1000_WUC_PME_EN);
6473		wr32(E1000_WUFC, wufc);
6474	} else {
6475		wr32(E1000_WUC, 0);
6476		wr32(E1000_WUFC, 0);
6477	}
6478
6479	*enable_wake = wufc || adapter->en_mng_pt;
6480	if (!*enable_wake)
6481		igb_power_down_link(adapter);
6482	else
6483		igb_power_up_link(adapter);
6484
6485	/* Release control of h/w to f/w.  If f/w is AMT enabled, this
6486	 * would have already happened in close and is redundant. */
6487	igb_release_hw_control(adapter);
6488
6489	pci_disable_device(pdev);
6490
6491	return 0;
6492}
6493
6494#ifdef CONFIG_PM
6495static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
6496{
6497	int retval;
6498	bool wake;
6499
6500	retval = __igb_shutdown(pdev, &wake);
6501	if (retval)
6502		return retval;
6503
6504	if (wake) {
6505		pci_prepare_to_sleep(pdev);
6506	} else {
6507		pci_wake_from_d3(pdev, false);
6508		pci_set_power_state(pdev, PCI_D3hot);
6509	}
6510
6511	return 0;
6512}
6513
6514static int igb_resume(struct pci_dev *pdev)
6515{
6516	struct net_device *netdev = pci_get_drvdata(pdev);
6517	struct igb_adapter *adapter = netdev_priv(netdev);
6518	struct e1000_hw *hw = &adapter->hw;
6519	u32 err;
6520
6521	pci_set_power_state(pdev, PCI_D0);
6522	pci_restore_state(pdev);
6523	pci_save_state(pdev);
6524
6525	err = pci_enable_device_mem(pdev);
6526	if (err) {
6527		dev_err(&pdev->dev,
6528			"igb: Cannot enable PCI device from suspend\n");
6529		return err;
6530	}
6531	pci_set_master(pdev);
6532
6533	pci_enable_wake(pdev, PCI_D3hot, 0);
6534	pci_enable_wake(pdev, PCI_D3cold, 0);
6535
6536	if (igb_init_interrupt_scheme(adapter)) {
6537		dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
6538		return -ENOMEM;
6539	}
6540
6541	igb_reset(adapter);
6542
6543	/* let the f/w know that the h/w is now under the control of the
6544	 * driver. */
6545	igb_get_hw_control(adapter);
6546
6547	wr32(E1000_WUS, ~0);
6548
6549	if (netif_running(netdev)) {
6550		err = igb_open(netdev);
6551		if (err)
6552			return err;
6553	}
6554
6555	netif_device_attach(netdev);
6556
6557	return 0;
6558}
6559#endif
6560
6561static void igb_shutdown(struct pci_dev *pdev)
6562{
6563	bool wake;
6564
6565	__igb_shutdown(pdev, &wake);
6566
6567	if (system_state == SYSTEM_POWER_OFF) {
6568		pci_wake_from_d3(pdev, wake);
6569		pci_set_power_state(pdev, PCI_D3hot);
6570	}
6571}
6572
6573#ifdef CONFIG_NET_POLL_CONTROLLER
6574/*
6575 * Polling 'interrupt' - used by things like netconsole to send skbs
6576 * without having to re-enable interrupts. It's not called while
6577 * the interrupt routine is executing.
6578 */
6579static void igb_netpoll(struct net_device *netdev)
6580{
6581	struct igb_adapter *adapter = netdev_priv(netdev);
6582	struct e1000_hw *hw = &adapter->hw;
6583	int i;
6584
6585	if (!adapter->msix_entries) {
6586		struct igb_q_vector *q_vector = adapter->q_vector[0];
6587		igb_irq_disable(adapter);
6588		napi_schedule(&q_vector->napi);
6589		return;
6590	}
6591
6592	for (i = 0; i < adapter->num_q_vectors; i++) {
6593		struct igb_q_vector *q_vector = adapter->q_vector[i];
6594		wr32(E1000_EIMC, q_vector->eims_value);
6595		napi_schedule(&q_vector->napi);
6596	}
6597}
6598#endif /* CONFIG_NET_POLL_CONTROLLER */
6599
6600/**
6601 * igb_io_error_detected - called when PCI error is detected
6602 * @pdev: Pointer to PCI device
6603 * @state: The current pci connection state
6604 *
6605 * This function is called after a PCI bus error affecting
6606 * this device has been detected.
6607 */
6608static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
6609					      pci_channel_state_t state)
6610{
6611	struct net_device *netdev = pci_get_drvdata(pdev);
6612	struct igb_adapter *adapter = netdev_priv(netdev);
6613
6614	netif_device_detach(netdev);
6615
6616	if (state == pci_channel_io_perm_failure)
6617		return PCI_ERS_RESULT_DISCONNECT;
6618
6619	if (netif_running(netdev))
6620		igb_down(adapter);
6621	pci_disable_device(pdev);
6622
6623	/* Request a slot slot reset. */
6624	return PCI_ERS_RESULT_NEED_RESET;
6625}
6626
6627/**
6628 * igb_io_slot_reset - called after the pci bus has been reset.
6629 * @pdev: Pointer to PCI device
6630 *
6631 * Restart the card from scratch, as if from a cold-boot. Implementation
6632 * resembles the first-half of the igb_resume routine.
6633 */
6634static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
6635{
6636	struct net_device *netdev = pci_get_drvdata(pdev);
6637	struct igb_adapter *adapter = netdev_priv(netdev);
6638	struct e1000_hw *hw = &adapter->hw;
6639	pci_ers_result_t result;
6640	int err;
6641
6642	if (pci_enable_device_mem(pdev)) {
6643		dev_err(&pdev->dev,
6644			"Cannot re-enable PCI device after reset.\n");
6645		result = PCI_ERS_RESULT_DISCONNECT;
6646	} else {
6647		pci_set_master(pdev);
6648		pci_restore_state(pdev);
6649		pci_save_state(pdev);
6650
6651		pci_enable_wake(pdev, PCI_D3hot, 0);
6652		pci_enable_wake(pdev, PCI_D3cold, 0);
6653
6654		igb_reset(adapter);
6655		wr32(E1000_WUS, ~0);
6656		result = PCI_ERS_RESULT_RECOVERED;
6657	}
6658
6659	err = pci_cleanup_aer_uncorrect_error_status(pdev);
6660	if (err) {
6661		dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
6662		        "failed 0x%0x\n", err);
6663		/* non-fatal, continue */
6664	}
6665
6666	return result;
6667}
6668
6669/**
6670 * igb_io_resume - called when traffic can start flowing again.
6671 * @pdev: Pointer to PCI device
6672 *
6673 * This callback is called when the error recovery driver tells us that
6674 * its OK to resume normal operation. Implementation resembles the
6675 * second-half of the igb_resume routine.
6676 */
6677static void igb_io_resume(struct pci_dev *pdev)
6678{
6679	struct net_device *netdev = pci_get_drvdata(pdev);
6680	struct igb_adapter *adapter = netdev_priv(netdev);
6681
6682	if (netif_running(netdev)) {
6683		if (igb_up(adapter)) {
6684			dev_err(&pdev->dev, "igb_up failed after reset\n");
6685			return;
6686		}
6687	}
6688
6689	netif_device_attach(netdev);
6690
6691	/* let the f/w know that the h/w is now under the control of the
6692	 * driver. */
6693	igb_get_hw_control(adapter);
6694}
6695
6696static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
6697                             u8 qsel)
6698{
6699	u32 rar_low, rar_high;
6700	struct e1000_hw *hw = &adapter->hw;
6701
6702	/* HW expects these in little endian so we reverse the byte order
6703	 * from network order (big endian) to little endian
6704	 */
6705	rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
6706	          ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
6707	rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
6708
6709	/* Indicate to hardware the Address is Valid. */
6710	rar_high |= E1000_RAH_AV;
6711
6712	if (hw->mac.type == e1000_82575)
6713		rar_high |= E1000_RAH_POOL_1 * qsel;
6714	else
6715		rar_high |= E1000_RAH_POOL_1 << qsel;
6716
6717	wr32(E1000_RAL(index), rar_low);
6718	wrfl();
6719	wr32(E1000_RAH(index), rar_high);
6720	wrfl();
6721}
6722
6723static int igb_set_vf_mac(struct igb_adapter *adapter,
6724                          int vf, unsigned char *mac_addr)
6725{
6726	struct e1000_hw *hw = &adapter->hw;
6727	/* VF MAC addresses start at end of receive addresses and moves
6728	 * torwards the first, as a result a collision should not be possible */
6729	int rar_entry = hw->mac.rar_entry_count - (vf + 1);
6730
6731	memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
6732
6733	igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
6734
6735	return 0;
6736}
6737
6738static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
6739{
6740	struct igb_adapter *adapter = netdev_priv(netdev);
6741	if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
6742		return -EINVAL;
6743	adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
6744	dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
6745	dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
6746				      " change effective.");
6747	if (test_bit(__IGB_DOWN, &adapter->state)) {
6748		dev_warn(&adapter->pdev->dev, "The VF MAC address has been set,"
6749			 " but the PF device is not up.\n");
6750		dev_warn(&adapter->pdev->dev, "Bring the PF device up before"
6751			 " attempting to use the VF device.\n");
6752	}
6753	return igb_set_vf_mac(adapter, vf, mac);
6754}
6755
6756static int igb_link_mbps(int internal_link_speed)
6757{
6758	switch (internal_link_speed) {
6759	case SPEED_100:
6760		return 100;
6761	case SPEED_1000:
6762		return 1000;
6763	default:
6764		return 0;
6765	}
6766}
6767
6768static void igb_set_vf_rate_limit(struct e1000_hw *hw, int vf, int tx_rate,
6769				  int link_speed)
6770{
6771	int rf_dec, rf_int;
6772	u32 bcnrc_val;
6773
6774	if (tx_rate != 0) {
6775		/* Calculate the rate factor values to set */
6776		rf_int = link_speed / tx_rate;
6777		rf_dec = (link_speed - (rf_int * tx_rate));
6778		rf_dec = (rf_dec * (1<<E1000_RTTBCNRC_RF_INT_SHIFT)) / tx_rate;
6779
6780		bcnrc_val = E1000_RTTBCNRC_RS_ENA;
6781		bcnrc_val |= ((rf_int<<E1000_RTTBCNRC_RF_INT_SHIFT) &
6782		               E1000_RTTBCNRC_RF_INT_MASK);
6783		bcnrc_val |= (rf_dec & E1000_RTTBCNRC_RF_DEC_MASK);
6784	} else {
6785		bcnrc_val = 0;
6786	}
6787
6788	wr32(E1000_RTTDQSEL, vf); /* vf X uses queue X */
6789	wr32(E1000_RTTBCNRC, bcnrc_val);
6790}
6791
6792static void igb_check_vf_rate_limit(struct igb_adapter *adapter)
6793{
6794	int actual_link_speed, i;
6795	bool reset_rate = false;
6796
6797	/* VF TX rate limit was not set or not supported */
6798	if ((adapter->vf_rate_link_speed == 0) ||
6799	    (adapter->hw.mac.type != e1000_82576))
6800		return;
6801
6802	actual_link_speed = igb_link_mbps(adapter->link_speed);
6803	if (actual_link_speed != adapter->vf_rate_link_speed) {
6804		reset_rate = true;
6805		adapter->vf_rate_link_speed = 0;
6806		dev_info(&adapter->pdev->dev,
6807		         "Link speed has been changed. VF Transmit "
6808		         "rate is disabled\n");
6809	}
6810
6811	for (i = 0; i < adapter->vfs_allocated_count; i++) {
6812		if (reset_rate)
6813			adapter->vf_data[i].tx_rate = 0;
6814
6815		igb_set_vf_rate_limit(&adapter->hw, i,
6816		                      adapter->vf_data[i].tx_rate,
6817		                      actual_link_speed);
6818	}
6819}
6820
6821static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
6822{
6823	struct igb_adapter *adapter = netdev_priv(netdev);
6824	struct e1000_hw *hw = &adapter->hw;
6825	int actual_link_speed;
6826
6827	if (hw->mac.type != e1000_82576)
6828		return -EOPNOTSUPP;
6829
6830	actual_link_speed = igb_link_mbps(adapter->link_speed);
6831	if ((vf >= adapter->vfs_allocated_count) ||
6832	    (!(rd32(E1000_STATUS) & E1000_STATUS_LU)) ||
6833	    (tx_rate < 0) || (tx_rate > actual_link_speed))
6834		return -EINVAL;
6835
6836	adapter->vf_rate_link_speed = actual_link_speed;
6837	adapter->vf_data[vf].tx_rate = (u16)tx_rate;
6838	igb_set_vf_rate_limit(hw, vf, tx_rate, actual_link_speed);
6839
6840	return 0;
6841}
6842
6843static int igb_ndo_get_vf_config(struct net_device *netdev,
6844				 int vf, struct ifla_vf_info *ivi)
6845{
6846	struct igb_adapter *adapter = netdev_priv(netdev);
6847	if (vf >= adapter->vfs_allocated_count)
6848		return -EINVAL;
6849	ivi->vf = vf;
6850	memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
6851	ivi->tx_rate = adapter->vf_data[vf].tx_rate;
6852	ivi->vlan = adapter->vf_data[vf].pf_vlan;
6853	ivi->qos = adapter->vf_data[vf].pf_qos;
6854	return 0;
6855}
6856
6857static void igb_vmm_control(struct igb_adapter *adapter)
6858{
6859	struct e1000_hw *hw = &adapter->hw;
6860	u32 reg;
6861
6862	switch (hw->mac.type) {
6863	case e1000_82575:
6864	default:
6865		/* replication is not supported for 82575 */
6866		return;
6867	case e1000_82576:
6868		/* notify HW that the MAC is adding vlan tags */
6869		reg = rd32(E1000_DTXCTL);
6870		reg |= E1000_DTXCTL_VLAN_ADDED;
6871		wr32(E1000_DTXCTL, reg);
6872	case e1000_82580:
6873		/* enable replication vlan tag stripping */
6874		reg = rd32(E1000_RPLOLR);
6875		reg |= E1000_RPLOLR_STRVLAN;
6876		wr32(E1000_RPLOLR, reg);
6877	case e1000_i350:
6878		/* none of the above registers are supported by i350 */
6879		break;
6880	}
6881
6882	if (adapter->vfs_allocated_count) {
6883		igb_vmdq_set_loopback_pf(hw, true);
6884		igb_vmdq_set_replication_pf(hw, true);
6885		igb_vmdq_set_anti_spoofing_pf(hw, true,
6886						adapter->vfs_allocated_count);
6887	} else {
6888		igb_vmdq_set_loopback_pf(hw, false);
6889		igb_vmdq_set_replication_pf(hw, false);
6890	}
6891}
6892
6893/* igb_main.c */
6894