igb_main.c revision 238ac817fd23f7dd5f61a8c51b4678f8d199db57
1/*******************************************************************************
2
3  Intel(R) Gigabit Ethernet Linux driver
4  Copyright(c) 2007-2011 Intel Corporation.
5
6  This program is free software; you can redistribute it and/or modify it
7  under the terms and conditions of the GNU General Public License,
8  version 2, as published by the Free Software Foundation.
9
10  This program is distributed in the hope it will be useful, but WITHOUT
11  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13  more details.
14
15  You should have received a copy of the GNU General Public License along with
16  this program; if not, write to the Free Software Foundation, Inc.,
17  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18
19  The full GNU General Public License is included in this distribution in
20  the file called "COPYING".
21
22  Contact Information:
23  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
25
26*******************************************************************************/
27
28#include <linux/module.h>
29#include <linux/types.h>
30#include <linux/init.h>
31#include <linux/bitops.h>
32#include <linux/vmalloc.h>
33#include <linux/pagemap.h>
34#include <linux/netdevice.h>
35#include <linux/ipv6.h>
36#include <linux/slab.h>
37#include <net/checksum.h>
38#include <net/ip6_checksum.h>
39#include <linux/net_tstamp.h>
40#include <linux/mii.h>
41#include <linux/ethtool.h>
42#include <linux/if.h>
43#include <linux/if_vlan.h>
44#include <linux/pci.h>
45#include <linux/pci-aspm.h>
46#include <linux/delay.h>
47#include <linux/interrupt.h>
48#include <linux/if_ether.h>
49#include <linux/aer.h>
50#include <linux/prefetch.h>
51#ifdef CONFIG_IGB_DCA
52#include <linux/dca.h>
53#endif
54#include "igb.h"
55
56#define MAJ 3
57#define MIN 0
58#define BUILD 6
59#define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \
60__stringify(BUILD) "-k"
61char igb_driver_name[] = "igb";
62char igb_driver_version[] = DRV_VERSION;
63static const char igb_driver_string[] =
64				"Intel(R) Gigabit Ethernet Network Driver";
65static const char igb_copyright[] = "Copyright (c) 2007-2011 Intel Corporation.";
66
67static const struct e1000_info *igb_info_tbl[] = {
68	[board_82575] = &e1000_82575_info,
69};
70
71static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
72	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER), board_82575 },
73	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER), board_82575 },
74	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES), board_82575 },
75	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII), board_82575 },
76	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
77	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
78	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_QUAD_FIBER), board_82575 },
79	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
80	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
81	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
82	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SGMII), board_82575 },
83	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SERDES), board_82575 },
84	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_BACKPLANE), board_82575 },
85	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SFP), board_82575 },
86	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
87	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
88	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
89	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
90	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
91	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
92	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2), board_82575 },
93	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
94	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
95	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
96	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
97	/* required last entry */
98	{0, }
99};
100
101MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
102
103void igb_reset(struct igb_adapter *);
104static int igb_setup_all_tx_resources(struct igb_adapter *);
105static int igb_setup_all_rx_resources(struct igb_adapter *);
106static void igb_free_all_tx_resources(struct igb_adapter *);
107static void igb_free_all_rx_resources(struct igb_adapter *);
108static void igb_setup_mrqc(struct igb_adapter *);
109static int igb_probe(struct pci_dev *, const struct pci_device_id *);
110static void __devexit igb_remove(struct pci_dev *pdev);
111static void igb_init_hw_timer(struct igb_adapter *adapter);
112static int igb_sw_init(struct igb_adapter *);
113static int igb_open(struct net_device *);
114static int igb_close(struct net_device *);
115static void igb_configure_tx(struct igb_adapter *);
116static void igb_configure_rx(struct igb_adapter *);
117static void igb_clean_all_tx_rings(struct igb_adapter *);
118static void igb_clean_all_rx_rings(struct igb_adapter *);
119static void igb_clean_tx_ring(struct igb_ring *);
120static void igb_clean_rx_ring(struct igb_ring *);
121static void igb_set_rx_mode(struct net_device *);
122static void igb_update_phy_info(unsigned long);
123static void igb_watchdog(unsigned long);
124static void igb_watchdog_task(struct work_struct *);
125static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb, struct net_device *);
126static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *dev,
127						 struct rtnl_link_stats64 *stats);
128static int igb_change_mtu(struct net_device *, int);
129static int igb_set_mac(struct net_device *, void *);
130static void igb_set_uta(struct igb_adapter *adapter);
131static irqreturn_t igb_intr(int irq, void *);
132static irqreturn_t igb_intr_msi(int irq, void *);
133static irqreturn_t igb_msix_other(int irq, void *);
134static irqreturn_t igb_msix_ring(int irq, void *);
135#ifdef CONFIG_IGB_DCA
136static void igb_update_dca(struct igb_q_vector *);
137static void igb_setup_dca(struct igb_adapter *);
138#endif /* CONFIG_IGB_DCA */
139static bool igb_clean_tx_irq(struct igb_q_vector *);
140static int igb_poll(struct napi_struct *, int);
141static bool igb_clean_rx_irq_adv(struct igb_q_vector *, int *, int);
142static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
143static void igb_tx_timeout(struct net_device *);
144static void igb_reset_task(struct work_struct *);
145static void igb_vlan_mode(struct net_device *netdev, u32 features);
146static void igb_vlan_rx_add_vid(struct net_device *, u16);
147static void igb_vlan_rx_kill_vid(struct net_device *, u16);
148static void igb_restore_vlan(struct igb_adapter *);
149static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
150static void igb_ping_all_vfs(struct igb_adapter *);
151static void igb_msg_task(struct igb_adapter *);
152static void igb_vmm_control(struct igb_adapter *);
153static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
154static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
155static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
156static int igb_ndo_set_vf_vlan(struct net_device *netdev,
157			       int vf, u16 vlan, u8 qos);
158static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
159static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
160				 struct ifla_vf_info *ivi);
161static void igb_check_vf_rate_limit(struct igb_adapter *);
162
163#ifdef CONFIG_PM
164static int igb_suspend(struct pci_dev *, pm_message_t);
165static int igb_resume(struct pci_dev *);
166#endif
167static void igb_shutdown(struct pci_dev *);
168#ifdef CONFIG_IGB_DCA
169static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
170static struct notifier_block dca_notifier = {
171	.notifier_call	= igb_notify_dca,
172	.next		= NULL,
173	.priority	= 0
174};
175#endif
176#ifdef CONFIG_NET_POLL_CONTROLLER
177/* for netdump / net console */
178static void igb_netpoll(struct net_device *);
179#endif
180#ifdef CONFIG_PCI_IOV
181static unsigned int max_vfs = 0;
182module_param(max_vfs, uint, 0);
183MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
184                 "per physical function");
185#endif /* CONFIG_PCI_IOV */
186
187static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
188		     pci_channel_state_t);
189static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
190static void igb_io_resume(struct pci_dev *);
191
192static struct pci_error_handlers igb_err_handler = {
193	.error_detected = igb_io_error_detected,
194	.slot_reset = igb_io_slot_reset,
195	.resume = igb_io_resume,
196};
197
198
199static struct pci_driver igb_driver = {
200	.name     = igb_driver_name,
201	.id_table = igb_pci_tbl,
202	.probe    = igb_probe,
203	.remove   = __devexit_p(igb_remove),
204#ifdef CONFIG_PM
205	/* Power Management Hooks */
206	.suspend  = igb_suspend,
207	.resume   = igb_resume,
208#endif
209	.shutdown = igb_shutdown,
210	.err_handler = &igb_err_handler
211};
212
213MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
214MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
215MODULE_LICENSE("GPL");
216MODULE_VERSION(DRV_VERSION);
217
218struct igb_reg_info {
219	u32 ofs;
220	char *name;
221};
222
223static const struct igb_reg_info igb_reg_info_tbl[] = {
224
225	/* General Registers */
226	{E1000_CTRL, "CTRL"},
227	{E1000_STATUS, "STATUS"},
228	{E1000_CTRL_EXT, "CTRL_EXT"},
229
230	/* Interrupt Registers */
231	{E1000_ICR, "ICR"},
232
233	/* RX Registers */
234	{E1000_RCTL, "RCTL"},
235	{E1000_RDLEN(0), "RDLEN"},
236	{E1000_RDH(0), "RDH"},
237	{E1000_RDT(0), "RDT"},
238	{E1000_RXDCTL(0), "RXDCTL"},
239	{E1000_RDBAL(0), "RDBAL"},
240	{E1000_RDBAH(0), "RDBAH"},
241
242	/* TX Registers */
243	{E1000_TCTL, "TCTL"},
244	{E1000_TDBAL(0), "TDBAL"},
245	{E1000_TDBAH(0), "TDBAH"},
246	{E1000_TDLEN(0), "TDLEN"},
247	{E1000_TDH(0), "TDH"},
248	{E1000_TDT(0), "TDT"},
249	{E1000_TXDCTL(0), "TXDCTL"},
250	{E1000_TDFH, "TDFH"},
251	{E1000_TDFT, "TDFT"},
252	{E1000_TDFHS, "TDFHS"},
253	{E1000_TDFPC, "TDFPC"},
254
255	/* List Terminator */
256	{}
257};
258
259/*
260 * igb_regdump - register printout routine
261 */
262static void igb_regdump(struct e1000_hw *hw, struct igb_reg_info *reginfo)
263{
264	int n = 0;
265	char rname[16];
266	u32 regs[8];
267
268	switch (reginfo->ofs) {
269	case E1000_RDLEN(0):
270		for (n = 0; n < 4; n++)
271			regs[n] = rd32(E1000_RDLEN(n));
272		break;
273	case E1000_RDH(0):
274		for (n = 0; n < 4; n++)
275			regs[n] = rd32(E1000_RDH(n));
276		break;
277	case E1000_RDT(0):
278		for (n = 0; n < 4; n++)
279			regs[n] = rd32(E1000_RDT(n));
280		break;
281	case E1000_RXDCTL(0):
282		for (n = 0; n < 4; n++)
283			regs[n] = rd32(E1000_RXDCTL(n));
284		break;
285	case E1000_RDBAL(0):
286		for (n = 0; n < 4; n++)
287			regs[n] = rd32(E1000_RDBAL(n));
288		break;
289	case E1000_RDBAH(0):
290		for (n = 0; n < 4; n++)
291			regs[n] = rd32(E1000_RDBAH(n));
292		break;
293	case E1000_TDBAL(0):
294		for (n = 0; n < 4; n++)
295			regs[n] = rd32(E1000_RDBAL(n));
296		break;
297	case E1000_TDBAH(0):
298		for (n = 0; n < 4; n++)
299			regs[n] = rd32(E1000_TDBAH(n));
300		break;
301	case E1000_TDLEN(0):
302		for (n = 0; n < 4; n++)
303			regs[n] = rd32(E1000_TDLEN(n));
304		break;
305	case E1000_TDH(0):
306		for (n = 0; n < 4; n++)
307			regs[n] = rd32(E1000_TDH(n));
308		break;
309	case E1000_TDT(0):
310		for (n = 0; n < 4; n++)
311			regs[n] = rd32(E1000_TDT(n));
312		break;
313	case E1000_TXDCTL(0):
314		for (n = 0; n < 4; n++)
315			regs[n] = rd32(E1000_TXDCTL(n));
316		break;
317	default:
318		printk(KERN_INFO "%-15s %08x\n",
319			reginfo->name, rd32(reginfo->ofs));
320		return;
321	}
322
323	snprintf(rname, 16, "%s%s", reginfo->name, "[0-3]");
324	printk(KERN_INFO "%-15s ", rname);
325	for (n = 0; n < 4; n++)
326		printk(KERN_CONT "%08x ", regs[n]);
327	printk(KERN_CONT "\n");
328}
329
330/*
331 * igb_dump - Print registers, tx-rings and rx-rings
332 */
333static void igb_dump(struct igb_adapter *adapter)
334{
335	struct net_device *netdev = adapter->netdev;
336	struct e1000_hw *hw = &adapter->hw;
337	struct igb_reg_info *reginfo;
338	int n = 0;
339	struct igb_ring *tx_ring;
340	union e1000_adv_tx_desc *tx_desc;
341	struct my_u0 { u64 a; u64 b; } *u0;
342	struct igb_buffer *buffer_info;
343	struct igb_ring *rx_ring;
344	union e1000_adv_rx_desc *rx_desc;
345	u32 staterr;
346	int i = 0;
347
348	if (!netif_msg_hw(adapter))
349		return;
350
351	/* Print netdevice Info */
352	if (netdev) {
353		dev_info(&adapter->pdev->dev, "Net device Info\n");
354		printk(KERN_INFO "Device Name     state            "
355			"trans_start      last_rx\n");
356		printk(KERN_INFO "%-15s %016lX %016lX %016lX\n",
357		netdev->name,
358		netdev->state,
359		netdev->trans_start,
360		netdev->last_rx);
361	}
362
363	/* Print Registers */
364	dev_info(&adapter->pdev->dev, "Register Dump\n");
365	printk(KERN_INFO " Register Name   Value\n");
366	for (reginfo = (struct igb_reg_info *)igb_reg_info_tbl;
367	     reginfo->name; reginfo++) {
368		igb_regdump(hw, reginfo);
369	}
370
371	/* Print TX Ring Summary */
372	if (!netdev || !netif_running(netdev))
373		goto exit;
374
375	dev_info(&adapter->pdev->dev, "TX Rings Summary\n");
376	printk(KERN_INFO "Queue [NTU] [NTC] [bi(ntc)->dma  ]"
377		" leng ntw timestamp\n");
378	for (n = 0; n < adapter->num_tx_queues; n++) {
379		tx_ring = adapter->tx_ring[n];
380		buffer_info = &tx_ring->buffer_info[tx_ring->next_to_clean];
381		printk(KERN_INFO " %5d %5X %5X %016llX %04X %3X %016llX\n",
382			   n, tx_ring->next_to_use, tx_ring->next_to_clean,
383			   (u64)buffer_info->dma,
384			   buffer_info->length,
385			   buffer_info->next_to_watch,
386			   (u64)buffer_info->time_stamp);
387	}
388
389	/* Print TX Rings */
390	if (!netif_msg_tx_done(adapter))
391		goto rx_ring_summary;
392
393	dev_info(&adapter->pdev->dev, "TX Rings Dump\n");
394
395	/* Transmit Descriptor Formats
396	 *
397	 * Advanced Transmit Descriptor
398	 *   +--------------------------------------------------------------+
399	 * 0 |         Buffer Address [63:0]                                |
400	 *   +--------------------------------------------------------------+
401	 * 8 | PAYLEN  | PORTS  |CC|IDX | STA | DCMD  |DTYP|MAC|RSV| DTALEN |
402	 *   +--------------------------------------------------------------+
403	 *   63      46 45    40 39 38 36 35 32 31   24             15       0
404	 */
405
406	for (n = 0; n < adapter->num_tx_queues; n++) {
407		tx_ring = adapter->tx_ring[n];
408		printk(KERN_INFO "------------------------------------\n");
409		printk(KERN_INFO "TX QUEUE INDEX = %d\n", tx_ring->queue_index);
410		printk(KERN_INFO "------------------------------------\n");
411		printk(KERN_INFO "T [desc]     [address 63:0  ] "
412			"[PlPOCIStDDM Ln] [bi->dma       ] "
413			"leng  ntw timestamp        bi->skb\n");
414
415		for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) {
416			tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
417			buffer_info = &tx_ring->buffer_info[i];
418			u0 = (struct my_u0 *)tx_desc;
419			printk(KERN_INFO "T [0x%03X]    %016llX %016llX %016llX"
420				" %04X  %3X %016llX %p", i,
421				le64_to_cpu(u0->a),
422				le64_to_cpu(u0->b),
423				(u64)buffer_info->dma,
424				buffer_info->length,
425				buffer_info->next_to_watch,
426				(u64)buffer_info->time_stamp,
427				buffer_info->skb);
428			if (i == tx_ring->next_to_use &&
429				i == tx_ring->next_to_clean)
430				printk(KERN_CONT " NTC/U\n");
431			else if (i == tx_ring->next_to_use)
432				printk(KERN_CONT " NTU\n");
433			else if (i == tx_ring->next_to_clean)
434				printk(KERN_CONT " NTC\n");
435			else
436				printk(KERN_CONT "\n");
437
438			if (netif_msg_pktdata(adapter) && buffer_info->dma != 0)
439				print_hex_dump(KERN_INFO, "",
440					DUMP_PREFIX_ADDRESS,
441					16, 1, phys_to_virt(buffer_info->dma),
442					buffer_info->length, true);
443		}
444	}
445
446	/* Print RX Rings Summary */
447rx_ring_summary:
448	dev_info(&adapter->pdev->dev, "RX Rings Summary\n");
449	printk(KERN_INFO "Queue [NTU] [NTC]\n");
450	for (n = 0; n < adapter->num_rx_queues; n++) {
451		rx_ring = adapter->rx_ring[n];
452		printk(KERN_INFO " %5d %5X %5X\n", n,
453			   rx_ring->next_to_use, rx_ring->next_to_clean);
454	}
455
456	/* Print RX Rings */
457	if (!netif_msg_rx_status(adapter))
458		goto exit;
459
460	dev_info(&adapter->pdev->dev, "RX Rings Dump\n");
461
462	/* Advanced Receive Descriptor (Read) Format
463	 *    63                                           1        0
464	 *    +-----------------------------------------------------+
465	 *  0 |       Packet Buffer Address [63:1]           |A0/NSE|
466	 *    +----------------------------------------------+------+
467	 *  8 |       Header Buffer Address [63:1]           |  DD  |
468	 *    +-----------------------------------------------------+
469	 *
470	 *
471	 * Advanced Receive Descriptor (Write-Back) Format
472	 *
473	 *   63       48 47    32 31  30      21 20 17 16   4 3     0
474	 *   +------------------------------------------------------+
475	 * 0 | Packet     IP     |SPH| HDR_LEN   | RSV|Packet|  RSS |
476	 *   | Checksum   Ident  |   |           |    | Type | Type |
477	 *   +------------------------------------------------------+
478	 * 8 | VLAN Tag | Length | Extended Error | Extended Status |
479	 *   +------------------------------------------------------+
480	 *   63       48 47    32 31            20 19               0
481	 */
482
483	for (n = 0; n < adapter->num_rx_queues; n++) {
484		rx_ring = adapter->rx_ring[n];
485		printk(KERN_INFO "------------------------------------\n");
486		printk(KERN_INFO "RX QUEUE INDEX = %d\n", rx_ring->queue_index);
487		printk(KERN_INFO "------------------------------------\n");
488		printk(KERN_INFO "R  [desc]      [ PktBuf     A0] "
489			"[  HeadBuf   DD] [bi->dma       ] [bi->skb] "
490			"<-- Adv Rx Read format\n");
491		printk(KERN_INFO "RWB[desc]      [PcsmIpSHl PtRs] "
492			"[vl er S cks ln] ---------------- [bi->skb] "
493			"<-- Adv Rx Write-Back format\n");
494
495		for (i = 0; i < rx_ring->count; i++) {
496			buffer_info = &rx_ring->buffer_info[i];
497			rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
498			u0 = (struct my_u0 *)rx_desc;
499			staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
500			if (staterr & E1000_RXD_STAT_DD) {
501				/* Descriptor Done */
502				printk(KERN_INFO "RWB[0x%03X]     %016llX "
503					"%016llX ---------------- %p", i,
504					le64_to_cpu(u0->a),
505					le64_to_cpu(u0->b),
506					buffer_info->skb);
507			} else {
508				printk(KERN_INFO "R  [0x%03X]     %016llX "
509					"%016llX %016llX %p", i,
510					le64_to_cpu(u0->a),
511					le64_to_cpu(u0->b),
512					(u64)buffer_info->dma,
513					buffer_info->skb);
514
515				if (netif_msg_pktdata(adapter)) {
516					print_hex_dump(KERN_INFO, "",
517						DUMP_PREFIX_ADDRESS,
518						16, 1,
519						phys_to_virt(buffer_info->dma),
520						IGB_RX_HDR_LEN, true);
521					print_hex_dump(KERN_INFO, "",
522					  DUMP_PREFIX_ADDRESS,
523					  16, 1,
524					  phys_to_virt(
525					    buffer_info->page_dma +
526					    buffer_info->page_offset),
527					  PAGE_SIZE/2, true);
528				}
529			}
530
531			if (i == rx_ring->next_to_use)
532				printk(KERN_CONT " NTU\n");
533			else if (i == rx_ring->next_to_clean)
534				printk(KERN_CONT " NTC\n");
535			else
536				printk(KERN_CONT "\n");
537
538		}
539	}
540
541exit:
542	return;
543}
544
545
546/**
547 * igb_read_clock - read raw cycle counter (to be used by time counter)
548 */
549static cycle_t igb_read_clock(const struct cyclecounter *tc)
550{
551	struct igb_adapter *adapter =
552		container_of(tc, struct igb_adapter, cycles);
553	struct e1000_hw *hw = &adapter->hw;
554	u64 stamp = 0;
555	int shift = 0;
556
557	/*
558	 * The timestamp latches on lowest register read. For the 82580
559	 * the lowest register is SYSTIMR instead of SYSTIML.  However we never
560	 * adjusted TIMINCA so SYSTIMR will just read as all 0s so ignore it.
561	 */
562	if (hw->mac.type == e1000_82580) {
563		stamp = rd32(E1000_SYSTIMR) >> 8;
564		shift = IGB_82580_TSYNC_SHIFT;
565	}
566
567	stamp |= (u64)rd32(E1000_SYSTIML) << shift;
568	stamp |= (u64)rd32(E1000_SYSTIMH) << (shift + 32);
569	return stamp;
570}
571
572/**
573 * igb_get_hw_dev - return device
574 * used by hardware layer to print debugging information
575 **/
576struct net_device *igb_get_hw_dev(struct e1000_hw *hw)
577{
578	struct igb_adapter *adapter = hw->back;
579	return adapter->netdev;
580}
581
582/**
583 * igb_init_module - Driver Registration Routine
584 *
585 * igb_init_module is the first routine called when the driver is
586 * loaded. All it does is register with the PCI subsystem.
587 **/
588static int __init igb_init_module(void)
589{
590	int ret;
591	printk(KERN_INFO "%s - version %s\n",
592	       igb_driver_string, igb_driver_version);
593
594	printk(KERN_INFO "%s\n", igb_copyright);
595
596#ifdef CONFIG_IGB_DCA
597	dca_register_notify(&dca_notifier);
598#endif
599	ret = pci_register_driver(&igb_driver);
600	return ret;
601}
602
603module_init(igb_init_module);
604
605/**
606 * igb_exit_module - Driver Exit Cleanup Routine
607 *
608 * igb_exit_module is called just before the driver is removed
609 * from memory.
610 **/
611static void __exit igb_exit_module(void)
612{
613#ifdef CONFIG_IGB_DCA
614	dca_unregister_notify(&dca_notifier);
615#endif
616	pci_unregister_driver(&igb_driver);
617}
618
619module_exit(igb_exit_module);
620
621#define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
622/**
623 * igb_cache_ring_register - Descriptor ring to register mapping
624 * @adapter: board private structure to initialize
625 *
626 * Once we know the feature-set enabled for the device, we'll cache
627 * the register offset the descriptor ring is assigned to.
628 **/
629static void igb_cache_ring_register(struct igb_adapter *adapter)
630{
631	int i = 0, j = 0;
632	u32 rbase_offset = adapter->vfs_allocated_count;
633
634	switch (adapter->hw.mac.type) {
635	case e1000_82576:
636		/* The queues are allocated for virtualization such that VF 0
637		 * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
638		 * In order to avoid collision we start at the first free queue
639		 * and continue consuming queues in the same sequence
640		 */
641		if (adapter->vfs_allocated_count) {
642			for (; i < adapter->rss_queues; i++)
643				adapter->rx_ring[i]->reg_idx = rbase_offset +
644				                               Q_IDX_82576(i);
645		}
646	case e1000_82575:
647	case e1000_82580:
648	case e1000_i350:
649	default:
650		for (; i < adapter->num_rx_queues; i++)
651			adapter->rx_ring[i]->reg_idx = rbase_offset + i;
652		for (; j < adapter->num_tx_queues; j++)
653			adapter->tx_ring[j]->reg_idx = rbase_offset + j;
654		break;
655	}
656}
657
658static void igb_free_queues(struct igb_adapter *adapter)
659{
660	int i;
661
662	for (i = 0; i < adapter->num_tx_queues; i++) {
663		kfree(adapter->tx_ring[i]);
664		adapter->tx_ring[i] = NULL;
665	}
666	for (i = 0; i < adapter->num_rx_queues; i++) {
667		kfree(adapter->rx_ring[i]);
668		adapter->rx_ring[i] = NULL;
669	}
670	adapter->num_rx_queues = 0;
671	adapter->num_tx_queues = 0;
672}
673
674/**
675 * igb_alloc_queues - Allocate memory for all rings
676 * @adapter: board private structure to initialize
677 *
678 * We allocate one ring per queue at run-time since we don't know the
679 * number of queues at compile-time.
680 **/
681static int igb_alloc_queues(struct igb_adapter *adapter)
682{
683	struct igb_ring *ring;
684	int i;
685
686	for (i = 0; i < adapter->num_tx_queues; i++) {
687		ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
688		if (!ring)
689			goto err;
690		ring->count = adapter->tx_ring_count;
691		ring->queue_index = i;
692		ring->dev = &adapter->pdev->dev;
693		ring->netdev = adapter->netdev;
694		/* For 82575, context index must be unique per ring. */
695		if (adapter->hw.mac.type == e1000_82575)
696			ring->flags = IGB_RING_FLAG_TX_CTX_IDX;
697		adapter->tx_ring[i] = ring;
698	}
699
700	for (i = 0; i < adapter->num_rx_queues; i++) {
701		ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
702		if (!ring)
703			goto err;
704		ring->count = adapter->rx_ring_count;
705		ring->queue_index = i;
706		ring->dev = &adapter->pdev->dev;
707		ring->netdev = adapter->netdev;
708		ring->flags = IGB_RING_FLAG_RX_CSUM; /* enable rx checksum */
709		/* set flag indicating ring supports SCTP checksum offload */
710		if (adapter->hw.mac.type >= e1000_82576)
711			ring->flags |= IGB_RING_FLAG_RX_SCTP_CSUM;
712		adapter->rx_ring[i] = ring;
713	}
714
715	igb_cache_ring_register(adapter);
716
717	return 0;
718
719err:
720	igb_free_queues(adapter);
721
722	return -ENOMEM;
723}
724
725#define IGB_N0_QUEUE -1
726static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
727{
728	u32 msixbm = 0;
729	struct igb_adapter *adapter = q_vector->adapter;
730	struct e1000_hw *hw = &adapter->hw;
731	u32 ivar, index;
732	int rx_queue = IGB_N0_QUEUE;
733	int tx_queue = IGB_N0_QUEUE;
734
735	if (q_vector->rx_ring)
736		rx_queue = q_vector->rx_ring->reg_idx;
737	if (q_vector->tx_ring)
738		tx_queue = q_vector->tx_ring->reg_idx;
739
740	switch (hw->mac.type) {
741	case e1000_82575:
742		/* The 82575 assigns vectors using a bitmask, which matches the
743		   bitmask for the EICR/EIMS/EIMC registers.  To assign one
744		   or more queues to a vector, we write the appropriate bits
745		   into the MSIXBM register for that vector. */
746		if (rx_queue > IGB_N0_QUEUE)
747			msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
748		if (tx_queue > IGB_N0_QUEUE)
749			msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
750		if (!adapter->msix_entries && msix_vector == 0)
751			msixbm |= E1000_EIMS_OTHER;
752		array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
753		q_vector->eims_value = msixbm;
754		break;
755	case e1000_82576:
756		/* 82576 uses a table-based method for assigning vectors.
757		   Each queue has a single entry in the table to which we write
758		   a vector number along with a "valid" bit.  Sadly, the layout
759		   of the table is somewhat counterintuitive. */
760		if (rx_queue > IGB_N0_QUEUE) {
761			index = (rx_queue & 0x7);
762			ivar = array_rd32(E1000_IVAR0, index);
763			if (rx_queue < 8) {
764				/* vector goes into low byte of register */
765				ivar = ivar & 0xFFFFFF00;
766				ivar |= msix_vector | E1000_IVAR_VALID;
767			} else {
768				/* vector goes into third byte of register */
769				ivar = ivar & 0xFF00FFFF;
770				ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
771			}
772			array_wr32(E1000_IVAR0, index, ivar);
773		}
774		if (tx_queue > IGB_N0_QUEUE) {
775			index = (tx_queue & 0x7);
776			ivar = array_rd32(E1000_IVAR0, index);
777			if (tx_queue < 8) {
778				/* vector goes into second byte of register */
779				ivar = ivar & 0xFFFF00FF;
780				ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
781			} else {
782				/* vector goes into high byte of register */
783				ivar = ivar & 0x00FFFFFF;
784				ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
785			}
786			array_wr32(E1000_IVAR0, index, ivar);
787		}
788		q_vector->eims_value = 1 << msix_vector;
789		break;
790	case e1000_82580:
791	case e1000_i350:
792		/* 82580 uses the same table-based approach as 82576 but has fewer
793		   entries as a result we carry over for queues greater than 4. */
794		if (rx_queue > IGB_N0_QUEUE) {
795			index = (rx_queue >> 1);
796			ivar = array_rd32(E1000_IVAR0, index);
797			if (rx_queue & 0x1) {
798				/* vector goes into third byte of register */
799				ivar = ivar & 0xFF00FFFF;
800				ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
801			} else {
802				/* vector goes into low byte of register */
803				ivar = ivar & 0xFFFFFF00;
804				ivar |= msix_vector | E1000_IVAR_VALID;
805			}
806			array_wr32(E1000_IVAR0, index, ivar);
807		}
808		if (tx_queue > IGB_N0_QUEUE) {
809			index = (tx_queue >> 1);
810			ivar = array_rd32(E1000_IVAR0, index);
811			if (tx_queue & 0x1) {
812				/* vector goes into high byte of register */
813				ivar = ivar & 0x00FFFFFF;
814				ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
815			} else {
816				/* vector goes into second byte of register */
817				ivar = ivar & 0xFFFF00FF;
818				ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
819			}
820			array_wr32(E1000_IVAR0, index, ivar);
821		}
822		q_vector->eims_value = 1 << msix_vector;
823		break;
824	default:
825		BUG();
826		break;
827	}
828
829	/* add q_vector eims value to global eims_enable_mask */
830	adapter->eims_enable_mask |= q_vector->eims_value;
831
832	/* configure q_vector to set itr on first interrupt */
833	q_vector->set_itr = 1;
834}
835
836/**
837 * igb_configure_msix - Configure MSI-X hardware
838 *
839 * igb_configure_msix sets up the hardware to properly
840 * generate MSI-X interrupts.
841 **/
842static void igb_configure_msix(struct igb_adapter *adapter)
843{
844	u32 tmp;
845	int i, vector = 0;
846	struct e1000_hw *hw = &adapter->hw;
847
848	adapter->eims_enable_mask = 0;
849
850	/* set vector for other causes, i.e. link changes */
851	switch (hw->mac.type) {
852	case e1000_82575:
853		tmp = rd32(E1000_CTRL_EXT);
854		/* enable MSI-X PBA support*/
855		tmp |= E1000_CTRL_EXT_PBA_CLR;
856
857		/* Auto-Mask interrupts upon ICR read. */
858		tmp |= E1000_CTRL_EXT_EIAME;
859		tmp |= E1000_CTRL_EXT_IRCA;
860
861		wr32(E1000_CTRL_EXT, tmp);
862
863		/* enable msix_other interrupt */
864		array_wr32(E1000_MSIXBM(0), vector++,
865		                      E1000_EIMS_OTHER);
866		adapter->eims_other = E1000_EIMS_OTHER;
867
868		break;
869
870	case e1000_82576:
871	case e1000_82580:
872	case e1000_i350:
873		/* Turn on MSI-X capability first, or our settings
874		 * won't stick.  And it will take days to debug. */
875		wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
876		                E1000_GPIE_PBA | E1000_GPIE_EIAME |
877		                E1000_GPIE_NSICR);
878
879		/* enable msix_other interrupt */
880		adapter->eims_other = 1 << vector;
881		tmp = (vector++ | E1000_IVAR_VALID) << 8;
882
883		wr32(E1000_IVAR_MISC, tmp);
884		break;
885	default:
886		/* do nothing, since nothing else supports MSI-X */
887		break;
888	} /* switch (hw->mac.type) */
889
890	adapter->eims_enable_mask |= adapter->eims_other;
891
892	for (i = 0; i < adapter->num_q_vectors; i++)
893		igb_assign_vector(adapter->q_vector[i], vector++);
894
895	wrfl();
896}
897
898/**
899 * igb_request_msix - Initialize MSI-X interrupts
900 *
901 * igb_request_msix allocates MSI-X vectors and requests interrupts from the
902 * kernel.
903 **/
904static int igb_request_msix(struct igb_adapter *adapter)
905{
906	struct net_device *netdev = adapter->netdev;
907	struct e1000_hw *hw = &adapter->hw;
908	int i, err = 0, vector = 0;
909
910	err = request_irq(adapter->msix_entries[vector].vector,
911	                  igb_msix_other, 0, netdev->name, adapter);
912	if (err)
913		goto out;
914	vector++;
915
916	for (i = 0; i < adapter->num_q_vectors; i++) {
917		struct igb_q_vector *q_vector = adapter->q_vector[i];
918
919		q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
920
921		if (q_vector->rx_ring && q_vector->tx_ring)
922			sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
923			        q_vector->rx_ring->queue_index);
924		else if (q_vector->tx_ring)
925			sprintf(q_vector->name, "%s-tx-%u", netdev->name,
926			        q_vector->tx_ring->queue_index);
927		else if (q_vector->rx_ring)
928			sprintf(q_vector->name, "%s-rx-%u", netdev->name,
929			        q_vector->rx_ring->queue_index);
930		else
931			sprintf(q_vector->name, "%s-unused", netdev->name);
932
933		err = request_irq(adapter->msix_entries[vector].vector,
934		                  igb_msix_ring, 0, q_vector->name,
935		                  q_vector);
936		if (err)
937			goto out;
938		vector++;
939	}
940
941	igb_configure_msix(adapter);
942	return 0;
943out:
944	return err;
945}
946
947static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
948{
949	if (adapter->msix_entries) {
950		pci_disable_msix(adapter->pdev);
951		kfree(adapter->msix_entries);
952		adapter->msix_entries = NULL;
953	} else if (adapter->flags & IGB_FLAG_HAS_MSI) {
954		pci_disable_msi(adapter->pdev);
955	}
956}
957
958/**
959 * igb_free_q_vectors - Free memory allocated for interrupt vectors
960 * @adapter: board private structure to initialize
961 *
962 * This function frees the memory allocated to the q_vectors.  In addition if
963 * NAPI is enabled it will delete any references to the NAPI struct prior
964 * to freeing the q_vector.
965 **/
966static void igb_free_q_vectors(struct igb_adapter *adapter)
967{
968	int v_idx;
969
970	for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
971		struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
972		adapter->q_vector[v_idx] = NULL;
973		if (!q_vector)
974			continue;
975		netif_napi_del(&q_vector->napi);
976		kfree(q_vector);
977	}
978	adapter->num_q_vectors = 0;
979}
980
981/**
982 * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
983 *
984 * This function resets the device so that it has 0 rx queues, tx queues, and
985 * MSI-X interrupts allocated.
986 */
987static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
988{
989	igb_free_queues(adapter);
990	igb_free_q_vectors(adapter);
991	igb_reset_interrupt_capability(adapter);
992}
993
994/**
995 * igb_set_interrupt_capability - set MSI or MSI-X if supported
996 *
997 * Attempt to configure interrupts using the best available
998 * capabilities of the hardware and kernel.
999 **/
1000static int igb_set_interrupt_capability(struct igb_adapter *adapter)
1001{
1002	int err;
1003	int numvecs, i;
1004
1005	/* Number of supported queues. */
1006	adapter->num_rx_queues = adapter->rss_queues;
1007	if (adapter->vfs_allocated_count)
1008		adapter->num_tx_queues = 1;
1009	else
1010		adapter->num_tx_queues = adapter->rss_queues;
1011
1012	/* start with one vector for every rx queue */
1013	numvecs = adapter->num_rx_queues;
1014
1015	/* if tx handler is separate add 1 for every tx queue */
1016	if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
1017		numvecs += adapter->num_tx_queues;
1018
1019	/* store the number of vectors reserved for queues */
1020	adapter->num_q_vectors = numvecs;
1021
1022	/* add 1 vector for link status interrupts */
1023	numvecs++;
1024	adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
1025					GFP_KERNEL);
1026	if (!adapter->msix_entries)
1027		goto msi_only;
1028
1029	for (i = 0; i < numvecs; i++)
1030		adapter->msix_entries[i].entry = i;
1031
1032	err = pci_enable_msix(adapter->pdev,
1033			      adapter->msix_entries,
1034			      numvecs);
1035	if (err == 0)
1036		goto out;
1037
1038	igb_reset_interrupt_capability(adapter);
1039
1040	/* If we can't do MSI-X, try MSI */
1041msi_only:
1042#ifdef CONFIG_PCI_IOV
1043	/* disable SR-IOV for non MSI-X configurations */
1044	if (adapter->vf_data) {
1045		struct e1000_hw *hw = &adapter->hw;
1046		/* disable iov and allow time for transactions to clear */
1047		pci_disable_sriov(adapter->pdev);
1048		msleep(500);
1049
1050		kfree(adapter->vf_data);
1051		adapter->vf_data = NULL;
1052		wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1053		wrfl();
1054		msleep(100);
1055		dev_info(&adapter->pdev->dev, "IOV Disabled\n");
1056	}
1057#endif
1058	adapter->vfs_allocated_count = 0;
1059	adapter->rss_queues = 1;
1060	adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1061	adapter->num_rx_queues = 1;
1062	adapter->num_tx_queues = 1;
1063	adapter->num_q_vectors = 1;
1064	if (!pci_enable_msi(adapter->pdev))
1065		adapter->flags |= IGB_FLAG_HAS_MSI;
1066out:
1067	/* Notify the stack of the (possibly) reduced queue counts. */
1068	netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues);
1069	return netif_set_real_num_rx_queues(adapter->netdev,
1070					    adapter->num_rx_queues);
1071}
1072
1073/**
1074 * igb_alloc_q_vectors - Allocate memory for interrupt vectors
1075 * @adapter: board private structure to initialize
1076 *
1077 * We allocate one q_vector per queue interrupt.  If allocation fails we
1078 * return -ENOMEM.
1079 **/
1080static int igb_alloc_q_vectors(struct igb_adapter *adapter)
1081{
1082	struct igb_q_vector *q_vector;
1083	struct e1000_hw *hw = &adapter->hw;
1084	int v_idx;
1085
1086	for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1087		q_vector = kzalloc(sizeof(struct igb_q_vector), GFP_KERNEL);
1088		if (!q_vector)
1089			goto err_out;
1090		q_vector->adapter = adapter;
1091		q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
1092		q_vector->itr_val = IGB_START_ITR;
1093		netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
1094		adapter->q_vector[v_idx] = q_vector;
1095	}
1096	return 0;
1097
1098err_out:
1099	igb_free_q_vectors(adapter);
1100	return -ENOMEM;
1101}
1102
1103static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
1104                                      int ring_idx, int v_idx)
1105{
1106	struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1107
1108	q_vector->rx_ring = adapter->rx_ring[ring_idx];
1109	q_vector->rx_ring->q_vector = q_vector;
1110	q_vector->itr_val = adapter->rx_itr_setting;
1111	if (q_vector->itr_val && q_vector->itr_val <= 3)
1112		q_vector->itr_val = IGB_START_ITR;
1113}
1114
1115static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
1116                                      int ring_idx, int v_idx)
1117{
1118	struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1119
1120	q_vector->tx_ring = adapter->tx_ring[ring_idx];
1121	q_vector->tx_ring->q_vector = q_vector;
1122	q_vector->itr_val = adapter->tx_itr_setting;
1123	if (q_vector->itr_val && q_vector->itr_val <= 3)
1124		q_vector->itr_val = IGB_START_ITR;
1125}
1126
1127/**
1128 * igb_map_ring_to_vector - maps allocated queues to vectors
1129 *
1130 * This function maps the recently allocated queues to vectors.
1131 **/
1132static int igb_map_ring_to_vector(struct igb_adapter *adapter)
1133{
1134	int i;
1135	int v_idx = 0;
1136
1137	if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
1138	    (adapter->num_q_vectors < adapter->num_tx_queues))
1139		return -ENOMEM;
1140
1141	if (adapter->num_q_vectors >=
1142	    (adapter->num_rx_queues + adapter->num_tx_queues)) {
1143		for (i = 0; i < adapter->num_rx_queues; i++)
1144			igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1145		for (i = 0; i < adapter->num_tx_queues; i++)
1146			igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1147	} else {
1148		for (i = 0; i < adapter->num_rx_queues; i++) {
1149			if (i < adapter->num_tx_queues)
1150				igb_map_tx_ring_to_vector(adapter, i, v_idx);
1151			igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1152		}
1153		for (; i < adapter->num_tx_queues; i++)
1154			igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1155	}
1156	return 0;
1157}
1158
1159/**
1160 * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
1161 *
1162 * This function initializes the interrupts and allocates all of the queues.
1163 **/
1164static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
1165{
1166	struct pci_dev *pdev = adapter->pdev;
1167	int err;
1168
1169	err = igb_set_interrupt_capability(adapter);
1170	if (err)
1171		return err;
1172
1173	err = igb_alloc_q_vectors(adapter);
1174	if (err) {
1175		dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
1176		goto err_alloc_q_vectors;
1177	}
1178
1179	err = igb_alloc_queues(adapter);
1180	if (err) {
1181		dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1182		goto err_alloc_queues;
1183	}
1184
1185	err = igb_map_ring_to_vector(adapter);
1186	if (err) {
1187		dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
1188		goto err_map_queues;
1189	}
1190
1191
1192	return 0;
1193err_map_queues:
1194	igb_free_queues(adapter);
1195err_alloc_queues:
1196	igb_free_q_vectors(adapter);
1197err_alloc_q_vectors:
1198	igb_reset_interrupt_capability(adapter);
1199	return err;
1200}
1201
1202/**
1203 * igb_request_irq - initialize interrupts
1204 *
1205 * Attempts to configure interrupts using the best available
1206 * capabilities of the hardware and kernel.
1207 **/
1208static int igb_request_irq(struct igb_adapter *adapter)
1209{
1210	struct net_device *netdev = adapter->netdev;
1211	struct pci_dev *pdev = adapter->pdev;
1212	int err = 0;
1213
1214	if (adapter->msix_entries) {
1215		err = igb_request_msix(adapter);
1216		if (!err)
1217			goto request_done;
1218		/* fall back to MSI */
1219		igb_clear_interrupt_scheme(adapter);
1220		if (!pci_enable_msi(adapter->pdev))
1221			adapter->flags |= IGB_FLAG_HAS_MSI;
1222		igb_free_all_tx_resources(adapter);
1223		igb_free_all_rx_resources(adapter);
1224		adapter->num_tx_queues = 1;
1225		adapter->num_rx_queues = 1;
1226		adapter->num_q_vectors = 1;
1227		err = igb_alloc_q_vectors(adapter);
1228		if (err) {
1229			dev_err(&pdev->dev,
1230			        "Unable to allocate memory for vectors\n");
1231			goto request_done;
1232		}
1233		err = igb_alloc_queues(adapter);
1234		if (err) {
1235			dev_err(&pdev->dev,
1236			        "Unable to allocate memory for queues\n");
1237			igb_free_q_vectors(adapter);
1238			goto request_done;
1239		}
1240		igb_setup_all_tx_resources(adapter);
1241		igb_setup_all_rx_resources(adapter);
1242	} else {
1243		igb_assign_vector(adapter->q_vector[0], 0);
1244	}
1245
1246	if (adapter->flags & IGB_FLAG_HAS_MSI) {
1247		err = request_irq(adapter->pdev->irq, igb_intr_msi, 0,
1248				  netdev->name, adapter);
1249		if (!err)
1250			goto request_done;
1251
1252		/* fall back to legacy interrupts */
1253		igb_reset_interrupt_capability(adapter);
1254		adapter->flags &= ~IGB_FLAG_HAS_MSI;
1255	}
1256
1257	err = request_irq(adapter->pdev->irq, igb_intr, IRQF_SHARED,
1258			  netdev->name, adapter);
1259
1260	if (err)
1261		dev_err(&adapter->pdev->dev, "Error %d getting interrupt\n",
1262			err);
1263
1264request_done:
1265	return err;
1266}
1267
1268static void igb_free_irq(struct igb_adapter *adapter)
1269{
1270	if (adapter->msix_entries) {
1271		int vector = 0, i;
1272
1273		free_irq(adapter->msix_entries[vector++].vector, adapter);
1274
1275		for (i = 0; i < adapter->num_q_vectors; i++) {
1276			struct igb_q_vector *q_vector = adapter->q_vector[i];
1277			free_irq(adapter->msix_entries[vector++].vector,
1278			         q_vector);
1279		}
1280	} else {
1281		free_irq(adapter->pdev->irq, adapter);
1282	}
1283}
1284
1285/**
1286 * igb_irq_disable - Mask off interrupt generation on the NIC
1287 * @adapter: board private structure
1288 **/
1289static void igb_irq_disable(struct igb_adapter *adapter)
1290{
1291	struct e1000_hw *hw = &adapter->hw;
1292
1293	/*
1294	 * we need to be careful when disabling interrupts.  The VFs are also
1295	 * mapped into these registers and so clearing the bits can cause
1296	 * issues on the VF drivers so we only need to clear what we set
1297	 */
1298	if (adapter->msix_entries) {
1299		u32 regval = rd32(E1000_EIAM);
1300		wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
1301		wr32(E1000_EIMC, adapter->eims_enable_mask);
1302		regval = rd32(E1000_EIAC);
1303		wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
1304	}
1305
1306	wr32(E1000_IAM, 0);
1307	wr32(E1000_IMC, ~0);
1308	wrfl();
1309	if (adapter->msix_entries) {
1310		int i;
1311		for (i = 0; i < adapter->num_q_vectors; i++)
1312			synchronize_irq(adapter->msix_entries[i].vector);
1313	} else {
1314		synchronize_irq(adapter->pdev->irq);
1315	}
1316}
1317
1318/**
1319 * igb_irq_enable - Enable default interrupt generation settings
1320 * @adapter: board private structure
1321 **/
1322static void igb_irq_enable(struct igb_adapter *adapter)
1323{
1324	struct e1000_hw *hw = &adapter->hw;
1325
1326	if (adapter->msix_entries) {
1327		u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC;
1328		u32 regval = rd32(E1000_EIAC);
1329		wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
1330		regval = rd32(E1000_EIAM);
1331		wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
1332		wr32(E1000_EIMS, adapter->eims_enable_mask);
1333		if (adapter->vfs_allocated_count) {
1334			wr32(E1000_MBVFIMR, 0xFF);
1335			ims |= E1000_IMS_VMMB;
1336		}
1337		if (adapter->hw.mac.type == e1000_82580)
1338			ims |= E1000_IMS_DRSTA;
1339
1340		wr32(E1000_IMS, ims);
1341	} else {
1342		wr32(E1000_IMS, IMS_ENABLE_MASK |
1343				E1000_IMS_DRSTA);
1344		wr32(E1000_IAM, IMS_ENABLE_MASK |
1345				E1000_IMS_DRSTA);
1346	}
1347}
1348
1349static void igb_update_mng_vlan(struct igb_adapter *adapter)
1350{
1351	struct e1000_hw *hw = &adapter->hw;
1352	u16 vid = adapter->hw.mng_cookie.vlan_id;
1353	u16 old_vid = adapter->mng_vlan_id;
1354
1355	if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1356		/* add VID to filter table */
1357		igb_vfta_set(hw, vid, true);
1358		adapter->mng_vlan_id = vid;
1359	} else {
1360		adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1361	}
1362
1363	if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1364	    (vid != old_vid) &&
1365	    !test_bit(old_vid, adapter->active_vlans)) {
1366		/* remove VID from filter table */
1367		igb_vfta_set(hw, old_vid, false);
1368	}
1369}
1370
1371/**
1372 * igb_release_hw_control - release control of the h/w to f/w
1373 * @adapter: address of board private structure
1374 *
1375 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1376 * For ASF and Pass Through versions of f/w this means that the
1377 * driver is no longer loaded.
1378 *
1379 **/
1380static void igb_release_hw_control(struct igb_adapter *adapter)
1381{
1382	struct e1000_hw *hw = &adapter->hw;
1383	u32 ctrl_ext;
1384
1385	/* Let firmware take over control of h/w */
1386	ctrl_ext = rd32(E1000_CTRL_EXT);
1387	wr32(E1000_CTRL_EXT,
1388			ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1389}
1390
1391/**
1392 * igb_get_hw_control - get control of the h/w from f/w
1393 * @adapter: address of board private structure
1394 *
1395 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1396 * For ASF and Pass Through versions of f/w this means that
1397 * the driver is loaded.
1398 *
1399 **/
1400static void igb_get_hw_control(struct igb_adapter *adapter)
1401{
1402	struct e1000_hw *hw = &adapter->hw;
1403	u32 ctrl_ext;
1404
1405	/* Let firmware know the driver has taken over */
1406	ctrl_ext = rd32(E1000_CTRL_EXT);
1407	wr32(E1000_CTRL_EXT,
1408			ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1409}
1410
1411/**
1412 * igb_configure - configure the hardware for RX and TX
1413 * @adapter: private board structure
1414 **/
1415static void igb_configure(struct igb_adapter *adapter)
1416{
1417	struct net_device *netdev = adapter->netdev;
1418	int i;
1419
1420	igb_get_hw_control(adapter);
1421	igb_set_rx_mode(netdev);
1422
1423	igb_restore_vlan(adapter);
1424
1425	igb_setup_tctl(adapter);
1426	igb_setup_mrqc(adapter);
1427	igb_setup_rctl(adapter);
1428
1429	igb_configure_tx(adapter);
1430	igb_configure_rx(adapter);
1431
1432	igb_rx_fifo_flush_82575(&adapter->hw);
1433
1434	/* call igb_desc_unused which always leaves
1435	 * at least 1 descriptor unused to make sure
1436	 * next_to_use != next_to_clean */
1437	for (i = 0; i < adapter->num_rx_queues; i++) {
1438		struct igb_ring *ring = adapter->rx_ring[i];
1439		igb_alloc_rx_buffers_adv(ring, igb_desc_unused(ring));
1440	}
1441}
1442
1443/**
1444 * igb_power_up_link - Power up the phy/serdes link
1445 * @adapter: address of board private structure
1446 **/
1447void igb_power_up_link(struct igb_adapter *adapter)
1448{
1449	if (adapter->hw.phy.media_type == e1000_media_type_copper)
1450		igb_power_up_phy_copper(&adapter->hw);
1451	else
1452		igb_power_up_serdes_link_82575(&adapter->hw);
1453}
1454
1455/**
1456 * igb_power_down_link - Power down the phy/serdes link
1457 * @adapter: address of board private structure
1458 */
1459static void igb_power_down_link(struct igb_adapter *adapter)
1460{
1461	if (adapter->hw.phy.media_type == e1000_media_type_copper)
1462		igb_power_down_phy_copper_82575(&adapter->hw);
1463	else
1464		igb_shutdown_serdes_link_82575(&adapter->hw);
1465}
1466
1467/**
1468 * igb_up - Open the interface and prepare it to handle traffic
1469 * @adapter: board private structure
1470 **/
1471int igb_up(struct igb_adapter *adapter)
1472{
1473	struct e1000_hw *hw = &adapter->hw;
1474	int i;
1475
1476	/* hardware has been reset, we need to reload some things */
1477	igb_configure(adapter);
1478
1479	clear_bit(__IGB_DOWN, &adapter->state);
1480
1481	for (i = 0; i < adapter->num_q_vectors; i++) {
1482		struct igb_q_vector *q_vector = adapter->q_vector[i];
1483		napi_enable(&q_vector->napi);
1484	}
1485	if (adapter->msix_entries)
1486		igb_configure_msix(adapter);
1487	else
1488		igb_assign_vector(adapter->q_vector[0], 0);
1489
1490	/* Clear any pending interrupts. */
1491	rd32(E1000_ICR);
1492	igb_irq_enable(adapter);
1493
1494	/* notify VFs that reset has been completed */
1495	if (adapter->vfs_allocated_count) {
1496		u32 reg_data = rd32(E1000_CTRL_EXT);
1497		reg_data |= E1000_CTRL_EXT_PFRSTD;
1498		wr32(E1000_CTRL_EXT, reg_data);
1499	}
1500
1501	netif_tx_start_all_queues(adapter->netdev);
1502
1503	/* start the watchdog. */
1504	hw->mac.get_link_status = 1;
1505	schedule_work(&adapter->watchdog_task);
1506
1507	return 0;
1508}
1509
1510void igb_down(struct igb_adapter *adapter)
1511{
1512	struct net_device *netdev = adapter->netdev;
1513	struct e1000_hw *hw = &adapter->hw;
1514	u32 tctl, rctl;
1515	int i;
1516
1517	/* signal that we're down so the interrupt handler does not
1518	 * reschedule our watchdog timer */
1519	set_bit(__IGB_DOWN, &adapter->state);
1520
1521	/* disable receives in the hardware */
1522	rctl = rd32(E1000_RCTL);
1523	wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1524	/* flush and sleep below */
1525
1526	netif_tx_stop_all_queues(netdev);
1527
1528	/* disable transmits in the hardware */
1529	tctl = rd32(E1000_TCTL);
1530	tctl &= ~E1000_TCTL_EN;
1531	wr32(E1000_TCTL, tctl);
1532	/* flush both disables and wait for them to finish */
1533	wrfl();
1534	msleep(10);
1535
1536	for (i = 0; i < adapter->num_q_vectors; i++) {
1537		struct igb_q_vector *q_vector = adapter->q_vector[i];
1538		napi_disable(&q_vector->napi);
1539	}
1540
1541	igb_irq_disable(adapter);
1542
1543	del_timer_sync(&adapter->watchdog_timer);
1544	del_timer_sync(&adapter->phy_info_timer);
1545
1546	netif_carrier_off(netdev);
1547
1548	/* record the stats before reset*/
1549	spin_lock(&adapter->stats64_lock);
1550	igb_update_stats(adapter, &adapter->stats64);
1551	spin_unlock(&adapter->stats64_lock);
1552
1553	adapter->link_speed = 0;
1554	adapter->link_duplex = 0;
1555
1556	if (!pci_channel_offline(adapter->pdev))
1557		igb_reset(adapter);
1558	igb_clean_all_tx_rings(adapter);
1559	igb_clean_all_rx_rings(adapter);
1560#ifdef CONFIG_IGB_DCA
1561
1562	/* since we reset the hardware DCA settings were cleared */
1563	igb_setup_dca(adapter);
1564#endif
1565}
1566
1567void igb_reinit_locked(struct igb_adapter *adapter)
1568{
1569	WARN_ON(in_interrupt());
1570	while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1571		msleep(1);
1572	igb_down(adapter);
1573	igb_up(adapter);
1574	clear_bit(__IGB_RESETTING, &adapter->state);
1575}
1576
1577void igb_reset(struct igb_adapter *adapter)
1578{
1579	struct pci_dev *pdev = adapter->pdev;
1580	struct e1000_hw *hw = &adapter->hw;
1581	struct e1000_mac_info *mac = &hw->mac;
1582	struct e1000_fc_info *fc = &hw->fc;
1583	u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1584	u16 hwm;
1585
1586	/* Repartition Pba for greater than 9k mtu
1587	 * To take effect CTRL.RST is required.
1588	 */
1589	switch (mac->type) {
1590	case e1000_i350:
1591	case e1000_82580:
1592		pba = rd32(E1000_RXPBS);
1593		pba = igb_rxpbs_adjust_82580(pba);
1594		break;
1595	case e1000_82576:
1596		pba = rd32(E1000_RXPBS);
1597		pba &= E1000_RXPBS_SIZE_MASK_82576;
1598		break;
1599	case e1000_82575:
1600	default:
1601		pba = E1000_PBA_34K;
1602		break;
1603	}
1604
1605	if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1606	    (mac->type < e1000_82576)) {
1607		/* adjust PBA for jumbo frames */
1608		wr32(E1000_PBA, pba);
1609
1610		/* To maintain wire speed transmits, the Tx FIFO should be
1611		 * large enough to accommodate two full transmit packets,
1612		 * rounded up to the next 1KB and expressed in KB.  Likewise,
1613		 * the Rx FIFO should be large enough to accommodate at least
1614		 * one full receive packet and is similarly rounded up and
1615		 * expressed in KB. */
1616		pba = rd32(E1000_PBA);
1617		/* upper 16 bits has Tx packet buffer allocation size in KB */
1618		tx_space = pba >> 16;
1619		/* lower 16 bits has Rx packet buffer allocation size in KB */
1620		pba &= 0xffff;
1621		/* the tx fifo also stores 16 bytes of information about the tx
1622		 * but don't include ethernet FCS because hardware appends it */
1623		min_tx_space = (adapter->max_frame_size +
1624				sizeof(union e1000_adv_tx_desc) -
1625				ETH_FCS_LEN) * 2;
1626		min_tx_space = ALIGN(min_tx_space, 1024);
1627		min_tx_space >>= 10;
1628		/* software strips receive CRC, so leave room for it */
1629		min_rx_space = adapter->max_frame_size;
1630		min_rx_space = ALIGN(min_rx_space, 1024);
1631		min_rx_space >>= 10;
1632
1633		/* If current Tx allocation is less than the min Tx FIFO size,
1634		 * and the min Tx FIFO size is less than the current Rx FIFO
1635		 * allocation, take space away from current Rx allocation */
1636		if (tx_space < min_tx_space &&
1637		    ((min_tx_space - tx_space) < pba)) {
1638			pba = pba - (min_tx_space - tx_space);
1639
1640			/* if short on rx space, rx wins and must trump tx
1641			 * adjustment */
1642			if (pba < min_rx_space)
1643				pba = min_rx_space;
1644		}
1645		wr32(E1000_PBA, pba);
1646	}
1647
1648	/* flow control settings */
1649	/* The high water mark must be low enough to fit one full frame
1650	 * (or the size used for early receive) above it in the Rx FIFO.
1651	 * Set it to the lower of:
1652	 * - 90% of the Rx FIFO size, or
1653	 * - the full Rx FIFO size minus one full frame */
1654	hwm = min(((pba << 10) * 9 / 10),
1655			((pba << 10) - 2 * adapter->max_frame_size));
1656
1657	fc->high_water = hwm & 0xFFF0;	/* 16-byte granularity */
1658	fc->low_water = fc->high_water - 16;
1659	fc->pause_time = 0xFFFF;
1660	fc->send_xon = 1;
1661	fc->current_mode = fc->requested_mode;
1662
1663	/* disable receive for all VFs and wait one second */
1664	if (adapter->vfs_allocated_count) {
1665		int i;
1666		for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1667			adapter->vf_data[i].flags &= IGB_VF_FLAG_PF_SET_MAC;
1668
1669		/* ping all the active vfs to let them know we are going down */
1670		igb_ping_all_vfs(adapter);
1671
1672		/* disable transmits and receives */
1673		wr32(E1000_VFRE, 0);
1674		wr32(E1000_VFTE, 0);
1675	}
1676
1677	/* Allow time for pending master requests to run */
1678	hw->mac.ops.reset_hw(hw);
1679	wr32(E1000_WUC, 0);
1680
1681	if (hw->mac.ops.init_hw(hw))
1682		dev_err(&pdev->dev, "Hardware Error\n");
1683	if (hw->mac.type > e1000_82580) {
1684		if (adapter->flags & IGB_FLAG_DMAC) {
1685			u32 reg;
1686
1687			/*
1688			 * DMA Coalescing high water mark needs to be higher
1689			 * than * the * Rx threshold.  The Rx threshold is
1690			 * currently * pba - 6, so we * should use a high water
1691			 * mark of pba * - 4. */
1692			hwm = (pba - 4) << 10;
1693
1694			reg = (((pba-6) << E1000_DMACR_DMACTHR_SHIFT)
1695			       & E1000_DMACR_DMACTHR_MASK);
1696
1697			/* transition to L0x or L1 if available..*/
1698			reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
1699
1700			/* watchdog timer= +-1000 usec in 32usec intervals */
1701			reg |= (1000 >> 5);
1702			wr32(E1000_DMACR, reg);
1703
1704			/* no lower threshold to disable coalescing(smart fifb)
1705			 * -UTRESH=0*/
1706			wr32(E1000_DMCRTRH, 0);
1707
1708			/* set hwm to PBA -  2 * max frame size */
1709			wr32(E1000_FCRTC, hwm);
1710
1711			/*
1712			 * This sets the time to wait before requesting tran-
1713			 * sition to * low power state to number of usecs needed
1714			 * to receive 1 512 * byte frame at gigabit line rate
1715			 */
1716			reg = rd32(E1000_DMCTLX);
1717			reg |= IGB_DMCTLX_DCFLUSH_DIS;
1718
1719			/* Delay 255 usec before entering Lx state. */
1720			reg |= 0xFF;
1721			wr32(E1000_DMCTLX, reg);
1722
1723			/* free space in Tx packet buffer to wake from DMAC */
1724			wr32(E1000_DMCTXTH,
1725			     (IGB_MIN_TXPBSIZE -
1726			     (IGB_TX_BUF_4096 + adapter->max_frame_size))
1727			     >> 6);
1728
1729			/* make low power state decision controlled by DMAC */
1730			reg = rd32(E1000_PCIEMISC);
1731			reg |= E1000_PCIEMISC_LX_DECISION;
1732			wr32(E1000_PCIEMISC, reg);
1733		} /* end if IGB_FLAG_DMAC set */
1734	}
1735	if (hw->mac.type == e1000_82580) {
1736		u32 reg = rd32(E1000_PCIEMISC);
1737		wr32(E1000_PCIEMISC,
1738		                reg & ~E1000_PCIEMISC_LX_DECISION);
1739	}
1740	if (!netif_running(adapter->netdev))
1741		igb_power_down_link(adapter);
1742
1743	igb_update_mng_vlan(adapter);
1744
1745	/* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1746	wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1747
1748	igb_get_phy_info(hw);
1749}
1750
1751static u32 igb_fix_features(struct net_device *netdev, u32 features)
1752{
1753	/*
1754	 * Since there is no support for separate rx/tx vlan accel
1755	 * enable/disable make sure tx flag is always in same state as rx.
1756	 */
1757	if (features & NETIF_F_HW_VLAN_RX)
1758		features |= NETIF_F_HW_VLAN_TX;
1759	else
1760		features &= ~NETIF_F_HW_VLAN_TX;
1761
1762	return features;
1763}
1764
1765static int igb_set_features(struct net_device *netdev, u32 features)
1766{
1767	struct igb_adapter *adapter = netdev_priv(netdev);
1768	int i;
1769	u32 changed = netdev->features ^ features;
1770
1771	for (i = 0; i < adapter->num_rx_queues; i++) {
1772		if (features & NETIF_F_RXCSUM)
1773			adapter->rx_ring[i]->flags |= IGB_RING_FLAG_RX_CSUM;
1774		else
1775			adapter->rx_ring[i]->flags &= ~IGB_RING_FLAG_RX_CSUM;
1776	}
1777
1778	if (changed & NETIF_F_HW_VLAN_RX)
1779		igb_vlan_mode(netdev, features);
1780
1781	return 0;
1782}
1783
1784static const struct net_device_ops igb_netdev_ops = {
1785	.ndo_open		= igb_open,
1786	.ndo_stop		= igb_close,
1787	.ndo_start_xmit		= igb_xmit_frame_adv,
1788	.ndo_get_stats64	= igb_get_stats64,
1789	.ndo_set_rx_mode	= igb_set_rx_mode,
1790	.ndo_set_mac_address	= igb_set_mac,
1791	.ndo_change_mtu		= igb_change_mtu,
1792	.ndo_do_ioctl		= igb_ioctl,
1793	.ndo_tx_timeout		= igb_tx_timeout,
1794	.ndo_validate_addr	= eth_validate_addr,
1795	.ndo_vlan_rx_add_vid	= igb_vlan_rx_add_vid,
1796	.ndo_vlan_rx_kill_vid	= igb_vlan_rx_kill_vid,
1797	.ndo_set_vf_mac		= igb_ndo_set_vf_mac,
1798	.ndo_set_vf_vlan	= igb_ndo_set_vf_vlan,
1799	.ndo_set_vf_tx_rate	= igb_ndo_set_vf_bw,
1800	.ndo_get_vf_config	= igb_ndo_get_vf_config,
1801#ifdef CONFIG_NET_POLL_CONTROLLER
1802	.ndo_poll_controller	= igb_netpoll,
1803#endif
1804	.ndo_fix_features	= igb_fix_features,
1805	.ndo_set_features	= igb_set_features,
1806};
1807
1808/**
1809 * igb_probe - Device Initialization Routine
1810 * @pdev: PCI device information struct
1811 * @ent: entry in igb_pci_tbl
1812 *
1813 * Returns 0 on success, negative on failure
1814 *
1815 * igb_probe initializes an adapter identified by a pci_dev structure.
1816 * The OS initialization, configuring of the adapter private structure,
1817 * and a hardware reset occur.
1818 **/
1819static int __devinit igb_probe(struct pci_dev *pdev,
1820			       const struct pci_device_id *ent)
1821{
1822	struct net_device *netdev;
1823	struct igb_adapter *adapter;
1824	struct e1000_hw *hw;
1825	u16 eeprom_data = 0;
1826	s32 ret_val;
1827	static int global_quad_port_a; /* global quad port a indication */
1828	const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1829	unsigned long mmio_start, mmio_len;
1830	int err, pci_using_dac;
1831	u16 eeprom_apme_mask = IGB_EEPROM_APME;
1832	u8 part_str[E1000_PBANUM_LENGTH];
1833
1834	/* Catch broken hardware that put the wrong VF device ID in
1835	 * the PCIe SR-IOV capability.
1836	 */
1837	if (pdev->is_virtfn) {
1838		WARN(1, KERN_ERR "%s (%hx:%hx) should not be a VF!\n",
1839		     pci_name(pdev), pdev->vendor, pdev->device);
1840		return -EINVAL;
1841	}
1842
1843	err = pci_enable_device_mem(pdev);
1844	if (err)
1845		return err;
1846
1847	pci_using_dac = 0;
1848	err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
1849	if (!err) {
1850		err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
1851		if (!err)
1852			pci_using_dac = 1;
1853	} else {
1854		err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
1855		if (err) {
1856			err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
1857			if (err) {
1858				dev_err(&pdev->dev, "No usable DMA "
1859					"configuration, aborting\n");
1860				goto err_dma;
1861			}
1862		}
1863	}
1864
1865	err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1866	                                   IORESOURCE_MEM),
1867	                                   igb_driver_name);
1868	if (err)
1869		goto err_pci_reg;
1870
1871	pci_enable_pcie_error_reporting(pdev);
1872
1873	pci_set_master(pdev);
1874	pci_save_state(pdev);
1875
1876	err = -ENOMEM;
1877	netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1878	                           IGB_ABS_MAX_TX_QUEUES);
1879	if (!netdev)
1880		goto err_alloc_etherdev;
1881
1882	SET_NETDEV_DEV(netdev, &pdev->dev);
1883
1884	pci_set_drvdata(pdev, netdev);
1885	adapter = netdev_priv(netdev);
1886	adapter->netdev = netdev;
1887	adapter->pdev = pdev;
1888	hw = &adapter->hw;
1889	hw->back = adapter;
1890	adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE;
1891
1892	mmio_start = pci_resource_start(pdev, 0);
1893	mmio_len = pci_resource_len(pdev, 0);
1894
1895	err = -EIO;
1896	hw->hw_addr = ioremap(mmio_start, mmio_len);
1897	if (!hw->hw_addr)
1898		goto err_ioremap;
1899
1900	netdev->netdev_ops = &igb_netdev_ops;
1901	igb_set_ethtool_ops(netdev);
1902	netdev->watchdog_timeo = 5 * HZ;
1903
1904	strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1905
1906	netdev->mem_start = mmio_start;
1907	netdev->mem_end = mmio_start + mmio_len;
1908
1909	/* PCI config space info */
1910	hw->vendor_id = pdev->vendor;
1911	hw->device_id = pdev->device;
1912	hw->revision_id = pdev->revision;
1913	hw->subsystem_vendor_id = pdev->subsystem_vendor;
1914	hw->subsystem_device_id = pdev->subsystem_device;
1915
1916	/* Copy the default MAC, PHY and NVM function pointers */
1917	memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1918	memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1919	memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1920	/* Initialize skew-specific constants */
1921	err = ei->get_invariants(hw);
1922	if (err)
1923		goto err_sw_init;
1924
1925	/* setup the private structure */
1926	err = igb_sw_init(adapter);
1927	if (err)
1928		goto err_sw_init;
1929
1930	igb_get_bus_info_pcie(hw);
1931
1932	hw->phy.autoneg_wait_to_complete = false;
1933
1934	/* Copper options */
1935	if (hw->phy.media_type == e1000_media_type_copper) {
1936		hw->phy.mdix = AUTO_ALL_MODES;
1937		hw->phy.disable_polarity_correction = false;
1938		hw->phy.ms_type = e1000_ms_hw_default;
1939	}
1940
1941	if (igb_check_reset_block(hw))
1942		dev_info(&pdev->dev,
1943			"PHY reset is blocked due to SOL/IDER session.\n");
1944
1945	netdev->hw_features = NETIF_F_SG |
1946			   NETIF_F_IP_CSUM |
1947			   NETIF_F_IPV6_CSUM |
1948			   NETIF_F_TSO |
1949			   NETIF_F_TSO6 |
1950			   NETIF_F_RXCSUM |
1951			   NETIF_F_HW_VLAN_RX;
1952
1953	netdev->features = netdev->hw_features |
1954			   NETIF_F_HW_VLAN_TX |
1955			   NETIF_F_HW_VLAN_FILTER;
1956
1957	netdev->vlan_features |= NETIF_F_TSO;
1958	netdev->vlan_features |= NETIF_F_TSO6;
1959	netdev->vlan_features |= NETIF_F_IP_CSUM;
1960	netdev->vlan_features |= NETIF_F_IPV6_CSUM;
1961	netdev->vlan_features |= NETIF_F_SG;
1962
1963	if (pci_using_dac) {
1964		netdev->features |= NETIF_F_HIGHDMA;
1965		netdev->vlan_features |= NETIF_F_HIGHDMA;
1966	}
1967
1968	if (hw->mac.type >= e1000_82576) {
1969		netdev->hw_features |= NETIF_F_SCTP_CSUM;
1970		netdev->features |= NETIF_F_SCTP_CSUM;
1971	}
1972
1973	netdev->priv_flags |= IFF_UNICAST_FLT;
1974
1975	adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
1976
1977	/* before reading the NVM, reset the controller to put the device in a
1978	 * known good starting state */
1979	hw->mac.ops.reset_hw(hw);
1980
1981	/* make sure the NVM is good */
1982	if (hw->nvm.ops.validate(hw) < 0) {
1983		dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
1984		err = -EIO;
1985		goto err_eeprom;
1986	}
1987
1988	/* copy the MAC address out of the NVM */
1989	if (hw->mac.ops.read_mac_addr(hw))
1990		dev_err(&pdev->dev, "NVM Read Error\n");
1991
1992	memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
1993	memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
1994
1995	if (!is_valid_ether_addr(netdev->perm_addr)) {
1996		dev_err(&pdev->dev, "Invalid MAC Address\n");
1997		err = -EIO;
1998		goto err_eeprom;
1999	}
2000
2001	setup_timer(&adapter->watchdog_timer, igb_watchdog,
2002	            (unsigned long) adapter);
2003	setup_timer(&adapter->phy_info_timer, igb_update_phy_info,
2004	            (unsigned long) adapter);
2005
2006	INIT_WORK(&adapter->reset_task, igb_reset_task);
2007	INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
2008
2009	/* Initialize link properties that are user-changeable */
2010	adapter->fc_autoneg = true;
2011	hw->mac.autoneg = true;
2012	hw->phy.autoneg_advertised = 0x2f;
2013
2014	hw->fc.requested_mode = e1000_fc_default;
2015	hw->fc.current_mode = e1000_fc_default;
2016
2017	igb_validate_mdi_setting(hw);
2018
2019	/* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
2020	 * enable the ACPI Magic Packet filter
2021	 */
2022
2023	if (hw->bus.func == 0)
2024		hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
2025	else if (hw->mac.type >= e1000_82580)
2026		hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
2027		                 NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
2028		                 &eeprom_data);
2029	else if (hw->bus.func == 1)
2030		hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
2031
2032	if (eeprom_data & eeprom_apme_mask)
2033		adapter->eeprom_wol |= E1000_WUFC_MAG;
2034
2035	/* now that we have the eeprom settings, apply the special cases where
2036	 * the eeprom may be wrong or the board simply won't support wake on
2037	 * lan on a particular port */
2038	switch (pdev->device) {
2039	case E1000_DEV_ID_82575GB_QUAD_COPPER:
2040		adapter->eeprom_wol = 0;
2041		break;
2042	case E1000_DEV_ID_82575EB_FIBER_SERDES:
2043	case E1000_DEV_ID_82576_FIBER:
2044	case E1000_DEV_ID_82576_SERDES:
2045		/* Wake events only supported on port A for dual fiber
2046		 * regardless of eeprom setting */
2047		if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
2048			adapter->eeprom_wol = 0;
2049		break;
2050	case E1000_DEV_ID_82576_QUAD_COPPER:
2051	case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
2052		/* if quad port adapter, disable WoL on all but port A */
2053		if (global_quad_port_a != 0)
2054			adapter->eeprom_wol = 0;
2055		else
2056			adapter->flags |= IGB_FLAG_QUAD_PORT_A;
2057		/* Reset for multiple quad port adapters */
2058		if (++global_quad_port_a == 4)
2059			global_quad_port_a = 0;
2060		break;
2061	}
2062
2063	/* initialize the wol settings based on the eeprom settings */
2064	adapter->wol = adapter->eeprom_wol;
2065	device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
2066
2067	/* reset the hardware with the new settings */
2068	igb_reset(adapter);
2069
2070	/* let the f/w know that the h/w is now under the control of the
2071	 * driver. */
2072	igb_get_hw_control(adapter);
2073
2074	strcpy(netdev->name, "eth%d");
2075	err = register_netdev(netdev);
2076	if (err)
2077		goto err_register;
2078
2079	igb_vlan_mode(netdev, netdev->features);
2080
2081	/* carrier off reporting is important to ethtool even BEFORE open */
2082	netif_carrier_off(netdev);
2083
2084#ifdef CONFIG_IGB_DCA
2085	if (dca_add_requester(&pdev->dev) == 0) {
2086		adapter->flags |= IGB_FLAG_DCA_ENABLED;
2087		dev_info(&pdev->dev, "DCA enabled\n");
2088		igb_setup_dca(adapter);
2089	}
2090
2091#endif
2092	/* do hw tstamp init after resetting */
2093	igb_init_hw_timer(adapter);
2094
2095	dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
2096	/* print bus type/speed/width info */
2097	dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
2098		 netdev->name,
2099		 ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
2100		  (hw->bus.speed == e1000_bus_speed_5000) ? "5.0Gb/s" :
2101		                                            "unknown"),
2102		 ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
2103		  (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
2104		  (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
2105		   "unknown"),
2106		 netdev->dev_addr);
2107
2108	ret_val = igb_read_part_string(hw, part_str, E1000_PBANUM_LENGTH);
2109	if (ret_val)
2110		strcpy(part_str, "Unknown");
2111	dev_info(&pdev->dev, "%s: PBA No: %s\n", netdev->name, part_str);
2112	dev_info(&pdev->dev,
2113		"Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
2114		adapter->msix_entries ? "MSI-X" :
2115		(adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
2116		adapter->num_rx_queues, adapter->num_tx_queues);
2117	switch (hw->mac.type) {
2118	case e1000_i350:
2119		igb_set_eee_i350(hw);
2120		break;
2121	default:
2122		break;
2123	}
2124	return 0;
2125
2126err_register:
2127	igb_release_hw_control(adapter);
2128err_eeprom:
2129	if (!igb_check_reset_block(hw))
2130		igb_reset_phy(hw);
2131
2132	if (hw->flash_address)
2133		iounmap(hw->flash_address);
2134err_sw_init:
2135	igb_clear_interrupt_scheme(adapter);
2136	iounmap(hw->hw_addr);
2137err_ioremap:
2138	free_netdev(netdev);
2139err_alloc_etherdev:
2140	pci_release_selected_regions(pdev,
2141	                             pci_select_bars(pdev, IORESOURCE_MEM));
2142err_pci_reg:
2143err_dma:
2144	pci_disable_device(pdev);
2145	return err;
2146}
2147
2148/**
2149 * igb_remove - Device Removal Routine
2150 * @pdev: PCI device information struct
2151 *
2152 * igb_remove is called by the PCI subsystem to alert the driver
2153 * that it should release a PCI device.  The could be caused by a
2154 * Hot-Plug event, or because the driver is going to be removed from
2155 * memory.
2156 **/
2157static void __devexit igb_remove(struct pci_dev *pdev)
2158{
2159	struct net_device *netdev = pci_get_drvdata(pdev);
2160	struct igb_adapter *adapter = netdev_priv(netdev);
2161	struct e1000_hw *hw = &adapter->hw;
2162
2163	/*
2164	 * The watchdog timer may be rescheduled, so explicitly
2165	 * disable watchdog from being rescheduled.
2166	 */
2167	set_bit(__IGB_DOWN, &adapter->state);
2168	del_timer_sync(&adapter->watchdog_timer);
2169	del_timer_sync(&adapter->phy_info_timer);
2170
2171	cancel_work_sync(&adapter->reset_task);
2172	cancel_work_sync(&adapter->watchdog_task);
2173
2174#ifdef CONFIG_IGB_DCA
2175	if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
2176		dev_info(&pdev->dev, "DCA disabled\n");
2177		dca_remove_requester(&pdev->dev);
2178		adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
2179		wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
2180	}
2181#endif
2182
2183	/* Release control of h/w to f/w.  If f/w is AMT enabled, this
2184	 * would have already happened in close and is redundant. */
2185	igb_release_hw_control(adapter);
2186
2187	unregister_netdev(netdev);
2188
2189	igb_clear_interrupt_scheme(adapter);
2190
2191#ifdef CONFIG_PCI_IOV
2192	/* reclaim resources allocated to VFs */
2193	if (adapter->vf_data) {
2194		/* disable iov and allow time for transactions to clear */
2195		pci_disable_sriov(pdev);
2196		msleep(500);
2197
2198		kfree(adapter->vf_data);
2199		adapter->vf_data = NULL;
2200		wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
2201		wrfl();
2202		msleep(100);
2203		dev_info(&pdev->dev, "IOV Disabled\n");
2204	}
2205#endif
2206
2207	iounmap(hw->hw_addr);
2208	if (hw->flash_address)
2209		iounmap(hw->flash_address);
2210	pci_release_selected_regions(pdev,
2211	                             pci_select_bars(pdev, IORESOURCE_MEM));
2212
2213	free_netdev(netdev);
2214
2215	pci_disable_pcie_error_reporting(pdev);
2216
2217	pci_disable_device(pdev);
2218}
2219
2220/**
2221 * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
2222 * @adapter: board private structure to initialize
2223 *
2224 * This function initializes the vf specific data storage and then attempts to
2225 * allocate the VFs.  The reason for ordering it this way is because it is much
2226 * mor expensive time wise to disable SR-IOV than it is to allocate and free
2227 * the memory for the VFs.
2228 **/
2229static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
2230{
2231#ifdef CONFIG_PCI_IOV
2232	struct pci_dev *pdev = adapter->pdev;
2233
2234	if (adapter->vfs_allocated_count) {
2235		adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
2236		                           sizeof(struct vf_data_storage),
2237		                           GFP_KERNEL);
2238		/* if allocation failed then we do not support SR-IOV */
2239		if (!adapter->vf_data) {
2240			adapter->vfs_allocated_count = 0;
2241			dev_err(&pdev->dev, "Unable to allocate memory for VF "
2242			        "Data Storage\n");
2243		}
2244	}
2245
2246	if (pci_enable_sriov(pdev, adapter->vfs_allocated_count)) {
2247		kfree(adapter->vf_data);
2248		adapter->vf_data = NULL;
2249#endif /* CONFIG_PCI_IOV */
2250		adapter->vfs_allocated_count = 0;
2251#ifdef CONFIG_PCI_IOV
2252	} else {
2253		unsigned char mac_addr[ETH_ALEN];
2254		int i;
2255		dev_info(&pdev->dev, "%d vfs allocated\n",
2256		         adapter->vfs_allocated_count);
2257		for (i = 0; i < adapter->vfs_allocated_count; i++) {
2258			random_ether_addr(mac_addr);
2259			igb_set_vf_mac(adapter, i, mac_addr);
2260		}
2261		/* DMA Coalescing is not supported in IOV mode. */
2262		if (adapter->flags & IGB_FLAG_DMAC)
2263			adapter->flags &= ~IGB_FLAG_DMAC;
2264	}
2265#endif /* CONFIG_PCI_IOV */
2266}
2267
2268
2269/**
2270 * igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp
2271 * @adapter: board private structure to initialize
2272 *
2273 * igb_init_hw_timer initializes the function pointer and values for the hw
2274 * timer found in hardware.
2275 **/
2276static void igb_init_hw_timer(struct igb_adapter *adapter)
2277{
2278	struct e1000_hw *hw = &adapter->hw;
2279
2280	switch (hw->mac.type) {
2281	case e1000_i350:
2282	case e1000_82580:
2283		memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2284		adapter->cycles.read = igb_read_clock;
2285		adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2286		adapter->cycles.mult = 1;
2287		/*
2288		 * The 82580 timesync updates the system timer every 8ns by 8ns
2289		 * and the value cannot be shifted.  Instead we need to shift
2290		 * the registers to generate a 64bit timer value.  As a result
2291		 * SYSTIMR/L/H, TXSTMPL/H, RXSTMPL/H all have to be shifted by
2292		 * 24 in order to generate a larger value for synchronization.
2293		 */
2294		adapter->cycles.shift = IGB_82580_TSYNC_SHIFT;
2295		/* disable system timer temporarily by setting bit 31 */
2296		wr32(E1000_TSAUXC, 0x80000000);
2297		wrfl();
2298
2299		/* Set registers so that rollover occurs soon to test this. */
2300		wr32(E1000_SYSTIMR, 0x00000000);
2301		wr32(E1000_SYSTIML, 0x80000000);
2302		wr32(E1000_SYSTIMH, 0x000000FF);
2303		wrfl();
2304
2305		/* enable system timer by clearing bit 31 */
2306		wr32(E1000_TSAUXC, 0x0);
2307		wrfl();
2308
2309		timecounter_init(&adapter->clock,
2310				 &adapter->cycles,
2311				 ktime_to_ns(ktime_get_real()));
2312		/*
2313		 * Synchronize our NIC clock against system wall clock. NIC
2314		 * time stamp reading requires ~3us per sample, each sample
2315		 * was pretty stable even under load => only require 10
2316		 * samples for each offset comparison.
2317		 */
2318		memset(&adapter->compare, 0, sizeof(adapter->compare));
2319		adapter->compare.source = &adapter->clock;
2320		adapter->compare.target = ktime_get_real;
2321		adapter->compare.num_samples = 10;
2322		timecompare_update(&adapter->compare, 0);
2323		break;
2324	case e1000_82576:
2325		/*
2326		 * Initialize hardware timer: we keep it running just in case
2327		 * that some program needs it later on.
2328		 */
2329		memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2330		adapter->cycles.read = igb_read_clock;
2331		adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2332		adapter->cycles.mult = 1;
2333		/**
2334		 * Scale the NIC clock cycle by a large factor so that
2335		 * relatively small clock corrections can be added or
2336		 * subtracted at each clock tick. The drawbacks of a large
2337		 * factor are a) that the clock register overflows more quickly
2338		 * (not such a big deal) and b) that the increment per tick has
2339		 * to fit into 24 bits.  As a result we need to use a shift of
2340		 * 19 so we can fit a value of 16 into the TIMINCA register.
2341		 */
2342		adapter->cycles.shift = IGB_82576_TSYNC_SHIFT;
2343		wr32(E1000_TIMINCA,
2344		                (1 << E1000_TIMINCA_16NS_SHIFT) |
2345		                (16 << IGB_82576_TSYNC_SHIFT));
2346
2347		/* Set registers so that rollover occurs soon to test this. */
2348		wr32(E1000_SYSTIML, 0x00000000);
2349		wr32(E1000_SYSTIMH, 0xFF800000);
2350		wrfl();
2351
2352		timecounter_init(&adapter->clock,
2353				 &adapter->cycles,
2354				 ktime_to_ns(ktime_get_real()));
2355		/*
2356		 * Synchronize our NIC clock against system wall clock. NIC
2357		 * time stamp reading requires ~3us per sample, each sample
2358		 * was pretty stable even under load => only require 10
2359		 * samples for each offset comparison.
2360		 */
2361		memset(&adapter->compare, 0, sizeof(adapter->compare));
2362		adapter->compare.source = &adapter->clock;
2363		adapter->compare.target = ktime_get_real;
2364		adapter->compare.num_samples = 10;
2365		timecompare_update(&adapter->compare, 0);
2366		break;
2367	case e1000_82575:
2368		/* 82575 does not support timesync */
2369	default:
2370		break;
2371	}
2372
2373}
2374
2375/**
2376 * igb_sw_init - Initialize general software structures (struct igb_adapter)
2377 * @adapter: board private structure to initialize
2378 *
2379 * igb_sw_init initializes the Adapter private data structure.
2380 * Fields are initialized based on PCI device information and
2381 * OS network device settings (MTU size).
2382 **/
2383static int __devinit igb_sw_init(struct igb_adapter *adapter)
2384{
2385	struct e1000_hw *hw = &adapter->hw;
2386	struct net_device *netdev = adapter->netdev;
2387	struct pci_dev *pdev = adapter->pdev;
2388
2389	pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
2390
2391	adapter->tx_ring_count = IGB_DEFAULT_TXD;
2392	adapter->rx_ring_count = IGB_DEFAULT_RXD;
2393	adapter->rx_itr_setting = IGB_DEFAULT_ITR;
2394	adapter->tx_itr_setting = IGB_DEFAULT_ITR;
2395
2396	adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN +
2397				  VLAN_HLEN;
2398	adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
2399
2400	spin_lock_init(&adapter->stats64_lock);
2401#ifdef CONFIG_PCI_IOV
2402	switch (hw->mac.type) {
2403	case e1000_82576:
2404	case e1000_i350:
2405		if (max_vfs > 7) {
2406			dev_warn(&pdev->dev,
2407				 "Maximum of 7 VFs per PF, using max\n");
2408			adapter->vfs_allocated_count = 7;
2409		} else
2410			adapter->vfs_allocated_count = max_vfs;
2411		break;
2412	default:
2413		break;
2414	}
2415#endif /* CONFIG_PCI_IOV */
2416	adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
2417	/* i350 cannot do RSS and SR-IOV at the same time */
2418	if (hw->mac.type == e1000_i350 && adapter->vfs_allocated_count)
2419		adapter->rss_queues = 1;
2420
2421	/*
2422	 * if rss_queues > 4 or vfs are going to be allocated with rss_queues
2423	 * then we should combine the queues into a queue pair in order to
2424	 * conserve interrupts due to limited supply
2425	 */
2426	if ((adapter->rss_queues > 4) ||
2427	    ((adapter->rss_queues > 1) && (adapter->vfs_allocated_count > 6)))
2428		adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
2429
2430	/* This call may decrease the number of queues */
2431	if (igb_init_interrupt_scheme(adapter)) {
2432		dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
2433		return -ENOMEM;
2434	}
2435
2436	igb_probe_vfs(adapter);
2437
2438	/* Explicitly disable IRQ since the NIC can be in any state. */
2439	igb_irq_disable(adapter);
2440
2441	if (hw->mac.type == e1000_i350)
2442		adapter->flags &= ~IGB_FLAG_DMAC;
2443
2444	set_bit(__IGB_DOWN, &adapter->state);
2445	return 0;
2446}
2447
2448/**
2449 * igb_open - Called when a network interface is made active
2450 * @netdev: network interface device structure
2451 *
2452 * Returns 0 on success, negative value on failure
2453 *
2454 * The open entry point is called when a network interface is made
2455 * active by the system (IFF_UP).  At this point all resources needed
2456 * for transmit and receive operations are allocated, the interrupt
2457 * handler is registered with the OS, the watchdog timer is started,
2458 * and the stack is notified that the interface is ready.
2459 **/
2460static int igb_open(struct net_device *netdev)
2461{
2462	struct igb_adapter *adapter = netdev_priv(netdev);
2463	struct e1000_hw *hw = &adapter->hw;
2464	int err;
2465	int i;
2466
2467	/* disallow open during test */
2468	if (test_bit(__IGB_TESTING, &adapter->state))
2469		return -EBUSY;
2470
2471	netif_carrier_off(netdev);
2472
2473	/* allocate transmit descriptors */
2474	err = igb_setup_all_tx_resources(adapter);
2475	if (err)
2476		goto err_setup_tx;
2477
2478	/* allocate receive descriptors */
2479	err = igb_setup_all_rx_resources(adapter);
2480	if (err)
2481		goto err_setup_rx;
2482
2483	igb_power_up_link(adapter);
2484
2485	/* before we allocate an interrupt, we must be ready to handle it.
2486	 * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2487	 * as soon as we call pci_request_irq, so we have to setup our
2488	 * clean_rx handler before we do so.  */
2489	igb_configure(adapter);
2490
2491	err = igb_request_irq(adapter);
2492	if (err)
2493		goto err_req_irq;
2494
2495	/* From here on the code is the same as igb_up() */
2496	clear_bit(__IGB_DOWN, &adapter->state);
2497
2498	for (i = 0; i < adapter->num_q_vectors; i++) {
2499		struct igb_q_vector *q_vector = adapter->q_vector[i];
2500		napi_enable(&q_vector->napi);
2501	}
2502
2503	/* Clear any pending interrupts. */
2504	rd32(E1000_ICR);
2505
2506	igb_irq_enable(adapter);
2507
2508	/* notify VFs that reset has been completed */
2509	if (adapter->vfs_allocated_count) {
2510		u32 reg_data = rd32(E1000_CTRL_EXT);
2511		reg_data |= E1000_CTRL_EXT_PFRSTD;
2512		wr32(E1000_CTRL_EXT, reg_data);
2513	}
2514
2515	netif_tx_start_all_queues(netdev);
2516
2517	/* start the watchdog. */
2518	hw->mac.get_link_status = 1;
2519	schedule_work(&adapter->watchdog_task);
2520
2521	return 0;
2522
2523err_req_irq:
2524	igb_release_hw_control(adapter);
2525	igb_power_down_link(adapter);
2526	igb_free_all_rx_resources(adapter);
2527err_setup_rx:
2528	igb_free_all_tx_resources(adapter);
2529err_setup_tx:
2530	igb_reset(adapter);
2531
2532	return err;
2533}
2534
2535/**
2536 * igb_close - Disables a network interface
2537 * @netdev: network interface device structure
2538 *
2539 * Returns 0, this is not allowed to fail
2540 *
2541 * The close entry point is called when an interface is de-activated
2542 * by the OS.  The hardware is still under the driver's control, but
2543 * needs to be disabled.  A global MAC reset is issued to stop the
2544 * hardware, and all transmit and receive resources are freed.
2545 **/
2546static int igb_close(struct net_device *netdev)
2547{
2548	struct igb_adapter *adapter = netdev_priv(netdev);
2549
2550	WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
2551	igb_down(adapter);
2552
2553	igb_free_irq(adapter);
2554
2555	igb_free_all_tx_resources(adapter);
2556	igb_free_all_rx_resources(adapter);
2557
2558	return 0;
2559}
2560
2561/**
2562 * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2563 * @tx_ring: tx descriptor ring (for a specific queue) to setup
2564 *
2565 * Return 0 on success, negative on failure
2566 **/
2567int igb_setup_tx_resources(struct igb_ring *tx_ring)
2568{
2569	struct device *dev = tx_ring->dev;
2570	int size;
2571
2572	size = sizeof(struct igb_buffer) * tx_ring->count;
2573	tx_ring->buffer_info = vzalloc(size);
2574	if (!tx_ring->buffer_info)
2575		goto err;
2576
2577	/* round up to nearest 4K */
2578	tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2579	tx_ring->size = ALIGN(tx_ring->size, 4096);
2580
2581	tx_ring->desc = dma_alloc_coherent(dev,
2582					   tx_ring->size,
2583					   &tx_ring->dma,
2584					   GFP_KERNEL);
2585
2586	if (!tx_ring->desc)
2587		goto err;
2588
2589	tx_ring->next_to_use = 0;
2590	tx_ring->next_to_clean = 0;
2591	return 0;
2592
2593err:
2594	vfree(tx_ring->buffer_info);
2595	dev_err(dev,
2596		"Unable to allocate memory for the transmit descriptor ring\n");
2597	return -ENOMEM;
2598}
2599
2600/**
2601 * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2602 *				  (Descriptors) for all queues
2603 * @adapter: board private structure
2604 *
2605 * Return 0 on success, negative on failure
2606 **/
2607static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2608{
2609	struct pci_dev *pdev = adapter->pdev;
2610	int i, err = 0;
2611
2612	for (i = 0; i < adapter->num_tx_queues; i++) {
2613		err = igb_setup_tx_resources(adapter->tx_ring[i]);
2614		if (err) {
2615			dev_err(&pdev->dev,
2616				"Allocation for Tx Queue %u failed\n", i);
2617			for (i--; i >= 0; i--)
2618				igb_free_tx_resources(adapter->tx_ring[i]);
2619			break;
2620		}
2621	}
2622
2623	for (i = 0; i < IGB_ABS_MAX_TX_QUEUES; i++) {
2624		int r_idx = i % adapter->num_tx_queues;
2625		adapter->multi_tx_table[i] = adapter->tx_ring[r_idx];
2626	}
2627	return err;
2628}
2629
2630/**
2631 * igb_setup_tctl - configure the transmit control registers
2632 * @adapter: Board private structure
2633 **/
2634void igb_setup_tctl(struct igb_adapter *adapter)
2635{
2636	struct e1000_hw *hw = &adapter->hw;
2637	u32 tctl;
2638
2639	/* disable queue 0 which is enabled by default on 82575 and 82576 */
2640	wr32(E1000_TXDCTL(0), 0);
2641
2642	/* Program the Transmit Control Register */
2643	tctl = rd32(E1000_TCTL);
2644	tctl &= ~E1000_TCTL_CT;
2645	tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2646		(E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2647
2648	igb_config_collision_dist(hw);
2649
2650	/* Enable transmits */
2651	tctl |= E1000_TCTL_EN;
2652
2653	wr32(E1000_TCTL, tctl);
2654}
2655
2656/**
2657 * igb_configure_tx_ring - Configure transmit ring after Reset
2658 * @adapter: board private structure
2659 * @ring: tx ring to configure
2660 *
2661 * Configure a transmit ring after a reset.
2662 **/
2663void igb_configure_tx_ring(struct igb_adapter *adapter,
2664                           struct igb_ring *ring)
2665{
2666	struct e1000_hw *hw = &adapter->hw;
2667	u32 txdctl = 0;
2668	u64 tdba = ring->dma;
2669	int reg_idx = ring->reg_idx;
2670
2671	/* disable the queue */
2672	wr32(E1000_TXDCTL(reg_idx), 0);
2673	wrfl();
2674	mdelay(10);
2675
2676	wr32(E1000_TDLEN(reg_idx),
2677	                ring->count * sizeof(union e1000_adv_tx_desc));
2678	wr32(E1000_TDBAL(reg_idx),
2679	                tdba & 0x00000000ffffffffULL);
2680	wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2681
2682	ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2683	wr32(E1000_TDH(reg_idx), 0);
2684	writel(0, ring->tail);
2685
2686	txdctl |= IGB_TX_PTHRESH;
2687	txdctl |= IGB_TX_HTHRESH << 8;
2688	txdctl |= IGB_TX_WTHRESH << 16;
2689
2690	txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2691	wr32(E1000_TXDCTL(reg_idx), txdctl);
2692}
2693
2694/**
2695 * igb_configure_tx - Configure transmit Unit after Reset
2696 * @adapter: board private structure
2697 *
2698 * Configure the Tx unit of the MAC after a reset.
2699 **/
2700static void igb_configure_tx(struct igb_adapter *adapter)
2701{
2702	int i;
2703
2704	for (i = 0; i < adapter->num_tx_queues; i++)
2705		igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
2706}
2707
2708/**
2709 * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2710 * @rx_ring:    rx descriptor ring (for a specific queue) to setup
2711 *
2712 * Returns 0 on success, negative on failure
2713 **/
2714int igb_setup_rx_resources(struct igb_ring *rx_ring)
2715{
2716	struct device *dev = rx_ring->dev;
2717	int size, desc_len;
2718
2719	size = sizeof(struct igb_buffer) * rx_ring->count;
2720	rx_ring->buffer_info = vzalloc(size);
2721	if (!rx_ring->buffer_info)
2722		goto err;
2723
2724	desc_len = sizeof(union e1000_adv_rx_desc);
2725
2726	/* Round up to nearest 4K */
2727	rx_ring->size = rx_ring->count * desc_len;
2728	rx_ring->size = ALIGN(rx_ring->size, 4096);
2729
2730	rx_ring->desc = dma_alloc_coherent(dev,
2731					   rx_ring->size,
2732					   &rx_ring->dma,
2733					   GFP_KERNEL);
2734
2735	if (!rx_ring->desc)
2736		goto err;
2737
2738	rx_ring->next_to_clean = 0;
2739	rx_ring->next_to_use = 0;
2740
2741	return 0;
2742
2743err:
2744	vfree(rx_ring->buffer_info);
2745	rx_ring->buffer_info = NULL;
2746	dev_err(dev, "Unable to allocate memory for the receive descriptor"
2747		" ring\n");
2748	return -ENOMEM;
2749}
2750
2751/**
2752 * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2753 *				  (Descriptors) for all queues
2754 * @adapter: board private structure
2755 *
2756 * Return 0 on success, negative on failure
2757 **/
2758static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2759{
2760	struct pci_dev *pdev = adapter->pdev;
2761	int i, err = 0;
2762
2763	for (i = 0; i < adapter->num_rx_queues; i++) {
2764		err = igb_setup_rx_resources(adapter->rx_ring[i]);
2765		if (err) {
2766			dev_err(&pdev->dev,
2767				"Allocation for Rx Queue %u failed\n", i);
2768			for (i--; i >= 0; i--)
2769				igb_free_rx_resources(adapter->rx_ring[i]);
2770			break;
2771		}
2772	}
2773
2774	return err;
2775}
2776
2777/**
2778 * igb_setup_mrqc - configure the multiple receive queue control registers
2779 * @adapter: Board private structure
2780 **/
2781static void igb_setup_mrqc(struct igb_adapter *adapter)
2782{
2783	struct e1000_hw *hw = &adapter->hw;
2784	u32 mrqc, rxcsum;
2785	u32 j, num_rx_queues, shift = 0, shift2 = 0;
2786	union e1000_reta {
2787		u32 dword;
2788		u8  bytes[4];
2789	} reta;
2790	static const u8 rsshash[40] = {
2791		0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2792		0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2793		0xae, 0x7b, 0x30, 0xb4,	0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2794		0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2795
2796	/* Fill out hash function seeds */
2797	for (j = 0; j < 10; j++) {
2798		u32 rsskey = rsshash[(j * 4)];
2799		rsskey |= rsshash[(j * 4) + 1] << 8;
2800		rsskey |= rsshash[(j * 4) + 2] << 16;
2801		rsskey |= rsshash[(j * 4) + 3] << 24;
2802		array_wr32(E1000_RSSRK(0), j, rsskey);
2803	}
2804
2805	num_rx_queues = adapter->rss_queues;
2806
2807	if (adapter->vfs_allocated_count) {
2808		/* 82575 and 82576 supports 2 RSS queues for VMDq */
2809		switch (hw->mac.type) {
2810		case e1000_i350:
2811		case e1000_82580:
2812			num_rx_queues = 1;
2813			shift = 0;
2814			break;
2815		case e1000_82576:
2816			shift = 3;
2817			num_rx_queues = 2;
2818			break;
2819		case e1000_82575:
2820			shift = 2;
2821			shift2 = 6;
2822		default:
2823			break;
2824		}
2825	} else {
2826		if (hw->mac.type == e1000_82575)
2827			shift = 6;
2828	}
2829
2830	for (j = 0; j < (32 * 4); j++) {
2831		reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2832		if (shift2)
2833			reta.bytes[j & 3] |= num_rx_queues << shift2;
2834		if ((j & 3) == 3)
2835			wr32(E1000_RETA(j >> 2), reta.dword);
2836	}
2837
2838	/*
2839	 * Disable raw packet checksumming so that RSS hash is placed in
2840	 * descriptor on writeback.  No need to enable TCP/UDP/IP checksum
2841	 * offloads as they are enabled by default
2842	 */
2843	rxcsum = rd32(E1000_RXCSUM);
2844	rxcsum |= E1000_RXCSUM_PCSD;
2845
2846	if (adapter->hw.mac.type >= e1000_82576)
2847		/* Enable Receive Checksum Offload for SCTP */
2848		rxcsum |= E1000_RXCSUM_CRCOFL;
2849
2850	/* Don't need to set TUOFL or IPOFL, they default to 1 */
2851	wr32(E1000_RXCSUM, rxcsum);
2852
2853	/* If VMDq is enabled then we set the appropriate mode for that, else
2854	 * we default to RSS so that an RSS hash is calculated per packet even
2855	 * if we are only using one queue */
2856	if (adapter->vfs_allocated_count) {
2857		if (hw->mac.type > e1000_82575) {
2858			/* Set the default pool for the PF's first queue */
2859			u32 vtctl = rd32(E1000_VT_CTL);
2860			vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2861				   E1000_VT_CTL_DISABLE_DEF_POOL);
2862			vtctl |= adapter->vfs_allocated_count <<
2863				E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2864			wr32(E1000_VT_CTL, vtctl);
2865		}
2866		if (adapter->rss_queues > 1)
2867			mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2868		else
2869			mrqc = E1000_MRQC_ENABLE_VMDQ;
2870	} else {
2871		mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2872	}
2873	igb_vmm_control(adapter);
2874
2875	/*
2876	 * Generate RSS hash based on TCP port numbers and/or
2877	 * IPv4/v6 src and dst addresses since UDP cannot be
2878	 * hashed reliably due to IP fragmentation
2879	 */
2880	mrqc |= E1000_MRQC_RSS_FIELD_IPV4 |
2881		E1000_MRQC_RSS_FIELD_IPV4_TCP |
2882		E1000_MRQC_RSS_FIELD_IPV6 |
2883		E1000_MRQC_RSS_FIELD_IPV6_TCP |
2884		E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
2885
2886	wr32(E1000_MRQC, mrqc);
2887}
2888
2889/**
2890 * igb_setup_rctl - configure the receive control registers
2891 * @adapter: Board private structure
2892 **/
2893void igb_setup_rctl(struct igb_adapter *adapter)
2894{
2895	struct e1000_hw *hw = &adapter->hw;
2896	u32 rctl;
2897
2898	rctl = rd32(E1000_RCTL);
2899
2900	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2901	rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2902
2903	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2904		(hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2905
2906	/*
2907	 * enable stripping of CRC. It's unlikely this will break BMC
2908	 * redirection as it did with e1000. Newer features require
2909	 * that the HW strips the CRC.
2910	 */
2911	rctl |= E1000_RCTL_SECRC;
2912
2913	/* disable store bad packets and clear size bits. */
2914	rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
2915
2916	/* enable LPE to prevent packets larger than max_frame_size */
2917	rctl |= E1000_RCTL_LPE;
2918
2919	/* disable queue 0 to prevent tail write w/o re-config */
2920	wr32(E1000_RXDCTL(0), 0);
2921
2922	/* Attention!!!  For SR-IOV PF driver operations you must enable
2923	 * queue drop for all VF and PF queues to prevent head of line blocking
2924	 * if an un-trusted VF does not provide descriptors to hardware.
2925	 */
2926	if (adapter->vfs_allocated_count) {
2927		/* set all queue drop enable bits */
2928		wr32(E1000_QDE, ALL_QUEUES);
2929	}
2930
2931	wr32(E1000_RCTL, rctl);
2932}
2933
2934static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
2935                                   int vfn)
2936{
2937	struct e1000_hw *hw = &adapter->hw;
2938	u32 vmolr;
2939
2940	/* if it isn't the PF check to see if VFs are enabled and
2941	 * increase the size to support vlan tags */
2942	if (vfn < adapter->vfs_allocated_count &&
2943	    adapter->vf_data[vfn].vlans_enabled)
2944		size += VLAN_TAG_SIZE;
2945
2946	vmolr = rd32(E1000_VMOLR(vfn));
2947	vmolr &= ~E1000_VMOLR_RLPML_MASK;
2948	vmolr |= size | E1000_VMOLR_LPE;
2949	wr32(E1000_VMOLR(vfn), vmolr);
2950
2951	return 0;
2952}
2953
2954/**
2955 * igb_rlpml_set - set maximum receive packet size
2956 * @adapter: board private structure
2957 *
2958 * Configure maximum receivable packet size.
2959 **/
2960static void igb_rlpml_set(struct igb_adapter *adapter)
2961{
2962	u32 max_frame_size = adapter->max_frame_size;
2963	struct e1000_hw *hw = &adapter->hw;
2964	u16 pf_id = adapter->vfs_allocated_count;
2965
2966	if (pf_id) {
2967		igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
2968		/*
2969		 * If we're in VMDQ or SR-IOV mode, then set global RLPML
2970		 * to our max jumbo frame size, in case we need to enable
2971		 * jumbo frames on one of the rings later.
2972		 * This will not pass over-length frames into the default
2973		 * queue because it's gated by the VMOLR.RLPML.
2974		 */
2975		max_frame_size = MAX_JUMBO_FRAME_SIZE;
2976	}
2977
2978	wr32(E1000_RLPML, max_frame_size);
2979}
2980
2981static inline void igb_set_vmolr(struct igb_adapter *adapter,
2982				 int vfn, bool aupe)
2983{
2984	struct e1000_hw *hw = &adapter->hw;
2985	u32 vmolr;
2986
2987	/*
2988	 * This register exists only on 82576 and newer so if we are older then
2989	 * we should exit and do nothing
2990	 */
2991	if (hw->mac.type < e1000_82576)
2992		return;
2993
2994	vmolr = rd32(E1000_VMOLR(vfn));
2995	vmolr |= E1000_VMOLR_STRVLAN;      /* Strip vlan tags */
2996	if (aupe)
2997		vmolr |= E1000_VMOLR_AUPE;        /* Accept untagged packets */
2998	else
2999		vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
3000
3001	/* clear all bits that might not be set */
3002	vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
3003
3004	if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
3005		vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
3006	/*
3007	 * for VMDq only allow the VFs and pool 0 to accept broadcast and
3008	 * multicast packets
3009	 */
3010	if (vfn <= adapter->vfs_allocated_count)
3011		vmolr |= E1000_VMOLR_BAM;	   /* Accept broadcast */
3012
3013	wr32(E1000_VMOLR(vfn), vmolr);
3014}
3015
3016/**
3017 * igb_configure_rx_ring - Configure a receive ring after Reset
3018 * @adapter: board private structure
3019 * @ring: receive ring to be configured
3020 *
3021 * Configure the Rx unit of the MAC after a reset.
3022 **/
3023void igb_configure_rx_ring(struct igb_adapter *adapter,
3024                           struct igb_ring *ring)
3025{
3026	struct e1000_hw *hw = &adapter->hw;
3027	u64 rdba = ring->dma;
3028	int reg_idx = ring->reg_idx;
3029	u32 srrctl = 0, rxdctl = 0;
3030
3031	/* disable the queue */
3032	wr32(E1000_RXDCTL(reg_idx), 0);
3033
3034	/* Set DMA base address registers */
3035	wr32(E1000_RDBAL(reg_idx),
3036	     rdba & 0x00000000ffffffffULL);
3037	wr32(E1000_RDBAH(reg_idx), rdba >> 32);
3038	wr32(E1000_RDLEN(reg_idx),
3039	               ring->count * sizeof(union e1000_adv_rx_desc));
3040
3041	/* initialize head and tail */
3042	ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
3043	wr32(E1000_RDH(reg_idx), 0);
3044	writel(0, ring->tail);
3045
3046	/* set descriptor configuration */
3047	srrctl = IGB_RX_HDR_LEN << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
3048#if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
3049	srrctl |= IGB_RXBUFFER_16384 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3050#else
3051	srrctl |= (PAGE_SIZE / 2) >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3052#endif
3053	srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3054	if (hw->mac.type == e1000_82580)
3055		srrctl |= E1000_SRRCTL_TIMESTAMP;
3056	/* Only set Drop Enable if we are supporting multiple queues */
3057	if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
3058		srrctl |= E1000_SRRCTL_DROP_EN;
3059
3060	wr32(E1000_SRRCTL(reg_idx), srrctl);
3061
3062	/* set filtering for VMDQ pools */
3063	igb_set_vmolr(adapter, reg_idx & 0x7, true);
3064
3065	rxdctl |= IGB_RX_PTHRESH;
3066	rxdctl |= IGB_RX_HTHRESH << 8;
3067	rxdctl |= IGB_RX_WTHRESH << 16;
3068
3069	/* enable receive descriptor fetching */
3070	rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
3071	wr32(E1000_RXDCTL(reg_idx), rxdctl);
3072}
3073
3074/**
3075 * igb_configure_rx - Configure receive Unit after Reset
3076 * @adapter: board private structure
3077 *
3078 * Configure the Rx unit of the MAC after a reset.
3079 **/
3080static void igb_configure_rx(struct igb_adapter *adapter)
3081{
3082	int i;
3083
3084	/* set UTA to appropriate mode */
3085	igb_set_uta(adapter);
3086
3087	/* set the correct pool for the PF default MAC address in entry 0 */
3088	igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
3089	                 adapter->vfs_allocated_count);
3090
3091	/* Setup the HW Rx Head and Tail Descriptor Pointers and
3092	 * the Base and Length of the Rx Descriptor Ring */
3093	for (i = 0; i < adapter->num_rx_queues; i++)
3094		igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
3095}
3096
3097/**
3098 * igb_free_tx_resources - Free Tx Resources per Queue
3099 * @tx_ring: Tx descriptor ring for a specific queue
3100 *
3101 * Free all transmit software resources
3102 **/
3103void igb_free_tx_resources(struct igb_ring *tx_ring)
3104{
3105	igb_clean_tx_ring(tx_ring);
3106
3107	vfree(tx_ring->buffer_info);
3108	tx_ring->buffer_info = NULL;
3109
3110	/* if not set, then don't free */
3111	if (!tx_ring->desc)
3112		return;
3113
3114	dma_free_coherent(tx_ring->dev, tx_ring->size,
3115			  tx_ring->desc, tx_ring->dma);
3116
3117	tx_ring->desc = NULL;
3118}
3119
3120/**
3121 * igb_free_all_tx_resources - Free Tx Resources for All Queues
3122 * @adapter: board private structure
3123 *
3124 * Free all transmit software resources
3125 **/
3126static void igb_free_all_tx_resources(struct igb_adapter *adapter)
3127{
3128	int i;
3129
3130	for (i = 0; i < adapter->num_tx_queues; i++)
3131		igb_free_tx_resources(adapter->tx_ring[i]);
3132}
3133
3134void igb_unmap_and_free_tx_resource(struct igb_ring *tx_ring,
3135				    struct igb_buffer *buffer_info)
3136{
3137	if (buffer_info->dma) {
3138		if (buffer_info->mapped_as_page)
3139			dma_unmap_page(tx_ring->dev,
3140					buffer_info->dma,
3141					buffer_info->length,
3142					DMA_TO_DEVICE);
3143		else
3144			dma_unmap_single(tx_ring->dev,
3145					buffer_info->dma,
3146					buffer_info->length,
3147					DMA_TO_DEVICE);
3148		buffer_info->dma = 0;
3149	}
3150	if (buffer_info->skb) {
3151		dev_kfree_skb_any(buffer_info->skb);
3152		buffer_info->skb = NULL;
3153	}
3154	buffer_info->time_stamp = 0;
3155	buffer_info->length = 0;
3156	buffer_info->next_to_watch = 0;
3157	buffer_info->mapped_as_page = false;
3158}
3159
3160/**
3161 * igb_clean_tx_ring - Free Tx Buffers
3162 * @tx_ring: ring to be cleaned
3163 **/
3164static void igb_clean_tx_ring(struct igb_ring *tx_ring)
3165{
3166	struct igb_buffer *buffer_info;
3167	unsigned long size;
3168	unsigned int i;
3169
3170	if (!tx_ring->buffer_info)
3171		return;
3172	/* Free all the Tx ring sk_buffs */
3173
3174	for (i = 0; i < tx_ring->count; i++) {
3175		buffer_info = &tx_ring->buffer_info[i];
3176		igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3177	}
3178
3179	size = sizeof(struct igb_buffer) * tx_ring->count;
3180	memset(tx_ring->buffer_info, 0, size);
3181
3182	/* Zero out the descriptor ring */
3183	memset(tx_ring->desc, 0, tx_ring->size);
3184
3185	tx_ring->next_to_use = 0;
3186	tx_ring->next_to_clean = 0;
3187}
3188
3189/**
3190 * igb_clean_all_tx_rings - Free Tx Buffers for all queues
3191 * @adapter: board private structure
3192 **/
3193static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
3194{
3195	int i;
3196
3197	for (i = 0; i < adapter->num_tx_queues; i++)
3198		igb_clean_tx_ring(adapter->tx_ring[i]);
3199}
3200
3201/**
3202 * igb_free_rx_resources - Free Rx Resources
3203 * @rx_ring: ring to clean the resources from
3204 *
3205 * Free all receive software resources
3206 **/
3207void igb_free_rx_resources(struct igb_ring *rx_ring)
3208{
3209	igb_clean_rx_ring(rx_ring);
3210
3211	vfree(rx_ring->buffer_info);
3212	rx_ring->buffer_info = NULL;
3213
3214	/* if not set, then don't free */
3215	if (!rx_ring->desc)
3216		return;
3217
3218	dma_free_coherent(rx_ring->dev, rx_ring->size,
3219			  rx_ring->desc, rx_ring->dma);
3220
3221	rx_ring->desc = NULL;
3222}
3223
3224/**
3225 * igb_free_all_rx_resources - Free Rx Resources for All Queues
3226 * @adapter: board private structure
3227 *
3228 * Free all receive software resources
3229 **/
3230static void igb_free_all_rx_resources(struct igb_adapter *adapter)
3231{
3232	int i;
3233
3234	for (i = 0; i < adapter->num_rx_queues; i++)
3235		igb_free_rx_resources(adapter->rx_ring[i]);
3236}
3237
3238/**
3239 * igb_clean_rx_ring - Free Rx Buffers per Queue
3240 * @rx_ring: ring to free buffers from
3241 **/
3242static void igb_clean_rx_ring(struct igb_ring *rx_ring)
3243{
3244	unsigned long size;
3245	u16 i;
3246
3247	if (!rx_ring->buffer_info)
3248		return;
3249
3250	/* Free all the Rx ring sk_buffs */
3251	for (i = 0; i < rx_ring->count; i++) {
3252		struct igb_buffer *buffer_info = &rx_ring->buffer_info[i];
3253		if (buffer_info->dma) {
3254			dma_unmap_single(rx_ring->dev,
3255			                 buffer_info->dma,
3256					 IGB_RX_HDR_LEN,
3257					 DMA_FROM_DEVICE);
3258			buffer_info->dma = 0;
3259		}
3260
3261		if (buffer_info->skb) {
3262			dev_kfree_skb(buffer_info->skb);
3263			buffer_info->skb = NULL;
3264		}
3265		if (buffer_info->page_dma) {
3266			dma_unmap_page(rx_ring->dev,
3267			               buffer_info->page_dma,
3268				       PAGE_SIZE / 2,
3269				       DMA_FROM_DEVICE);
3270			buffer_info->page_dma = 0;
3271		}
3272		if (buffer_info->page) {
3273			put_page(buffer_info->page);
3274			buffer_info->page = NULL;
3275			buffer_info->page_offset = 0;
3276		}
3277	}
3278
3279	size = sizeof(struct igb_buffer) * rx_ring->count;
3280	memset(rx_ring->buffer_info, 0, size);
3281
3282	/* Zero out the descriptor ring */
3283	memset(rx_ring->desc, 0, rx_ring->size);
3284
3285	rx_ring->next_to_clean = 0;
3286	rx_ring->next_to_use = 0;
3287}
3288
3289/**
3290 * igb_clean_all_rx_rings - Free Rx Buffers for all queues
3291 * @adapter: board private structure
3292 **/
3293static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
3294{
3295	int i;
3296
3297	for (i = 0; i < adapter->num_rx_queues; i++)
3298		igb_clean_rx_ring(adapter->rx_ring[i]);
3299}
3300
3301/**
3302 * igb_set_mac - Change the Ethernet Address of the NIC
3303 * @netdev: network interface device structure
3304 * @p: pointer to an address structure
3305 *
3306 * Returns 0 on success, negative on failure
3307 **/
3308static int igb_set_mac(struct net_device *netdev, void *p)
3309{
3310	struct igb_adapter *adapter = netdev_priv(netdev);
3311	struct e1000_hw *hw = &adapter->hw;
3312	struct sockaddr *addr = p;
3313
3314	if (!is_valid_ether_addr(addr->sa_data))
3315		return -EADDRNOTAVAIL;
3316
3317	memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
3318	memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
3319
3320	/* set the correct pool for the new PF MAC address in entry 0 */
3321	igb_rar_set_qsel(adapter, hw->mac.addr, 0,
3322	                 adapter->vfs_allocated_count);
3323
3324	return 0;
3325}
3326
3327/**
3328 * igb_write_mc_addr_list - write multicast addresses to MTA
3329 * @netdev: network interface device structure
3330 *
3331 * Writes multicast address list to the MTA hash table.
3332 * Returns: -ENOMEM on failure
3333 *                0 on no addresses written
3334 *                X on writing X addresses to MTA
3335 **/
3336static int igb_write_mc_addr_list(struct net_device *netdev)
3337{
3338	struct igb_adapter *adapter = netdev_priv(netdev);
3339	struct e1000_hw *hw = &adapter->hw;
3340	struct netdev_hw_addr *ha;
3341	u8  *mta_list;
3342	int i;
3343
3344	if (netdev_mc_empty(netdev)) {
3345		/* nothing to program, so clear mc list */
3346		igb_update_mc_addr_list(hw, NULL, 0);
3347		igb_restore_vf_multicasts(adapter);
3348		return 0;
3349	}
3350
3351	mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
3352	if (!mta_list)
3353		return -ENOMEM;
3354
3355	/* The shared function expects a packed array of only addresses. */
3356	i = 0;
3357	netdev_for_each_mc_addr(ha, netdev)
3358		memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
3359
3360	igb_update_mc_addr_list(hw, mta_list, i);
3361	kfree(mta_list);
3362
3363	return netdev_mc_count(netdev);
3364}
3365
3366/**
3367 * igb_write_uc_addr_list - write unicast addresses to RAR table
3368 * @netdev: network interface device structure
3369 *
3370 * Writes unicast address list to the RAR table.
3371 * Returns: -ENOMEM on failure/insufficient address space
3372 *                0 on no addresses written
3373 *                X on writing X addresses to the RAR table
3374 **/
3375static int igb_write_uc_addr_list(struct net_device *netdev)
3376{
3377	struct igb_adapter *adapter = netdev_priv(netdev);
3378	struct e1000_hw *hw = &adapter->hw;
3379	unsigned int vfn = adapter->vfs_allocated_count;
3380	unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
3381	int count = 0;
3382
3383	/* return ENOMEM indicating insufficient memory for addresses */
3384	if (netdev_uc_count(netdev) > rar_entries)
3385		return -ENOMEM;
3386
3387	if (!netdev_uc_empty(netdev) && rar_entries) {
3388		struct netdev_hw_addr *ha;
3389
3390		netdev_for_each_uc_addr(ha, netdev) {
3391			if (!rar_entries)
3392				break;
3393			igb_rar_set_qsel(adapter, ha->addr,
3394			                 rar_entries--,
3395			                 vfn);
3396			count++;
3397		}
3398	}
3399	/* write the addresses in reverse order to avoid write combining */
3400	for (; rar_entries > 0 ; rar_entries--) {
3401		wr32(E1000_RAH(rar_entries), 0);
3402		wr32(E1000_RAL(rar_entries), 0);
3403	}
3404	wrfl();
3405
3406	return count;
3407}
3408
3409/**
3410 * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
3411 * @netdev: network interface device structure
3412 *
3413 * The set_rx_mode entry point is called whenever the unicast or multicast
3414 * address lists or the network interface flags are updated.  This routine is
3415 * responsible for configuring the hardware for proper unicast, multicast,
3416 * promiscuous mode, and all-multi behavior.
3417 **/
3418static void igb_set_rx_mode(struct net_device *netdev)
3419{
3420	struct igb_adapter *adapter = netdev_priv(netdev);
3421	struct e1000_hw *hw = &adapter->hw;
3422	unsigned int vfn = adapter->vfs_allocated_count;
3423	u32 rctl, vmolr = 0;
3424	int count;
3425
3426	/* Check for Promiscuous and All Multicast modes */
3427	rctl = rd32(E1000_RCTL);
3428
3429	/* clear the effected bits */
3430	rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
3431
3432	if (netdev->flags & IFF_PROMISC) {
3433		rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
3434		vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
3435	} else {
3436		if (netdev->flags & IFF_ALLMULTI) {
3437			rctl |= E1000_RCTL_MPE;
3438			vmolr |= E1000_VMOLR_MPME;
3439		} else {
3440			/*
3441			 * Write addresses to the MTA, if the attempt fails
3442			 * then we should just turn on promiscuous mode so
3443			 * that we can at least receive multicast traffic
3444			 */
3445			count = igb_write_mc_addr_list(netdev);
3446			if (count < 0) {
3447				rctl |= E1000_RCTL_MPE;
3448				vmolr |= E1000_VMOLR_MPME;
3449			} else if (count) {
3450				vmolr |= E1000_VMOLR_ROMPE;
3451			}
3452		}
3453		/*
3454		 * Write addresses to available RAR registers, if there is not
3455		 * sufficient space to store all the addresses then enable
3456		 * unicast promiscuous mode
3457		 */
3458		count = igb_write_uc_addr_list(netdev);
3459		if (count < 0) {
3460			rctl |= E1000_RCTL_UPE;
3461			vmolr |= E1000_VMOLR_ROPE;
3462		}
3463		rctl |= E1000_RCTL_VFE;
3464	}
3465	wr32(E1000_RCTL, rctl);
3466
3467	/*
3468	 * In order to support SR-IOV and eventually VMDq it is necessary to set
3469	 * the VMOLR to enable the appropriate modes.  Without this workaround
3470	 * we will have issues with VLAN tag stripping not being done for frames
3471	 * that are only arriving because we are the default pool
3472	 */
3473	if (hw->mac.type < e1000_82576)
3474		return;
3475
3476	vmolr |= rd32(E1000_VMOLR(vfn)) &
3477	         ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
3478	wr32(E1000_VMOLR(vfn), vmolr);
3479	igb_restore_vf_multicasts(adapter);
3480}
3481
3482static void igb_check_wvbr(struct igb_adapter *adapter)
3483{
3484	struct e1000_hw *hw = &adapter->hw;
3485	u32 wvbr = 0;
3486
3487	switch (hw->mac.type) {
3488	case e1000_82576:
3489	case e1000_i350:
3490		if (!(wvbr = rd32(E1000_WVBR)))
3491			return;
3492		break;
3493	default:
3494		break;
3495	}
3496
3497	adapter->wvbr |= wvbr;
3498}
3499
3500#define IGB_STAGGERED_QUEUE_OFFSET 8
3501
3502static void igb_spoof_check(struct igb_adapter *adapter)
3503{
3504	int j;
3505
3506	if (!adapter->wvbr)
3507		return;
3508
3509	for(j = 0; j < adapter->vfs_allocated_count; j++) {
3510		if (adapter->wvbr & (1 << j) ||
3511		    adapter->wvbr & (1 << (j + IGB_STAGGERED_QUEUE_OFFSET))) {
3512			dev_warn(&adapter->pdev->dev,
3513				"Spoof event(s) detected on VF %d\n", j);
3514			adapter->wvbr &=
3515				~((1 << j) |
3516				  (1 << (j + IGB_STAGGERED_QUEUE_OFFSET)));
3517		}
3518	}
3519}
3520
3521/* Need to wait a few seconds after link up to get diagnostic information from
3522 * the phy */
3523static void igb_update_phy_info(unsigned long data)
3524{
3525	struct igb_adapter *adapter = (struct igb_adapter *) data;
3526	igb_get_phy_info(&adapter->hw);
3527}
3528
3529/**
3530 * igb_has_link - check shared code for link and determine up/down
3531 * @adapter: pointer to driver private info
3532 **/
3533bool igb_has_link(struct igb_adapter *adapter)
3534{
3535	struct e1000_hw *hw = &adapter->hw;
3536	bool link_active = false;
3537	s32 ret_val = 0;
3538
3539	/* get_link_status is set on LSC (link status) interrupt or
3540	 * rx sequence error interrupt.  get_link_status will stay
3541	 * false until the e1000_check_for_link establishes link
3542	 * for copper adapters ONLY
3543	 */
3544	switch (hw->phy.media_type) {
3545	case e1000_media_type_copper:
3546		if (hw->mac.get_link_status) {
3547			ret_val = hw->mac.ops.check_for_link(hw);
3548			link_active = !hw->mac.get_link_status;
3549		} else {
3550			link_active = true;
3551		}
3552		break;
3553	case e1000_media_type_internal_serdes:
3554		ret_val = hw->mac.ops.check_for_link(hw);
3555		link_active = hw->mac.serdes_has_link;
3556		break;
3557	default:
3558	case e1000_media_type_unknown:
3559		break;
3560	}
3561
3562	return link_active;
3563}
3564
3565static bool igb_thermal_sensor_event(struct e1000_hw *hw, u32 event)
3566{
3567	bool ret = false;
3568	u32 ctrl_ext, thstat;
3569
3570	/* check for thermal sensor event on i350, copper only */
3571	if (hw->mac.type == e1000_i350) {
3572		thstat = rd32(E1000_THSTAT);
3573		ctrl_ext = rd32(E1000_CTRL_EXT);
3574
3575		if ((hw->phy.media_type == e1000_media_type_copper) &&
3576		    !(ctrl_ext & E1000_CTRL_EXT_LINK_MODE_SGMII)) {
3577			ret = !!(thstat & event);
3578		}
3579	}
3580
3581	return ret;
3582}
3583
3584/**
3585 * igb_watchdog - Timer Call-back
3586 * @data: pointer to adapter cast into an unsigned long
3587 **/
3588static void igb_watchdog(unsigned long data)
3589{
3590	struct igb_adapter *adapter = (struct igb_adapter *)data;
3591	/* Do the rest outside of interrupt context */
3592	schedule_work(&adapter->watchdog_task);
3593}
3594
3595static void igb_watchdog_task(struct work_struct *work)
3596{
3597	struct igb_adapter *adapter = container_of(work,
3598	                                           struct igb_adapter,
3599                                                   watchdog_task);
3600	struct e1000_hw *hw = &adapter->hw;
3601	struct net_device *netdev = adapter->netdev;
3602	u32 link;
3603	int i;
3604
3605	link = igb_has_link(adapter);
3606	if (link) {
3607		if (!netif_carrier_ok(netdev)) {
3608			u32 ctrl;
3609			hw->mac.ops.get_speed_and_duplex(hw,
3610			                                 &adapter->link_speed,
3611			                                 &adapter->link_duplex);
3612
3613			ctrl = rd32(E1000_CTRL);
3614			/* Links status message must follow this format */
3615			printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s, "
3616				 "Flow Control: %s\n",
3617			       netdev->name,
3618			       adapter->link_speed,
3619			       adapter->link_duplex == FULL_DUPLEX ?
3620				 "Full Duplex" : "Half Duplex",
3621			       ((ctrl & E1000_CTRL_TFCE) &&
3622			        (ctrl & E1000_CTRL_RFCE)) ? "RX/TX" :
3623			       ((ctrl & E1000_CTRL_RFCE) ?  "RX" :
3624			       ((ctrl & E1000_CTRL_TFCE) ?  "TX" : "None")));
3625
3626			/* check for thermal sensor event */
3627			if (igb_thermal_sensor_event(hw, E1000_THSTAT_LINK_THROTTLE)) {
3628				printk(KERN_INFO "igb: %s The network adapter "
3629						 "link speed was downshifted "
3630						 "because it overheated.\n",
3631						 netdev->name);
3632			}
3633
3634			/* adjust timeout factor according to speed/duplex */
3635			adapter->tx_timeout_factor = 1;
3636			switch (adapter->link_speed) {
3637			case SPEED_10:
3638				adapter->tx_timeout_factor = 14;
3639				break;
3640			case SPEED_100:
3641				/* maybe add some timeout factor ? */
3642				break;
3643			}
3644
3645			netif_carrier_on(netdev);
3646
3647			igb_ping_all_vfs(adapter);
3648			igb_check_vf_rate_limit(adapter);
3649
3650			/* link state has changed, schedule phy info update */
3651			if (!test_bit(__IGB_DOWN, &adapter->state))
3652				mod_timer(&adapter->phy_info_timer,
3653					  round_jiffies(jiffies + 2 * HZ));
3654		}
3655	} else {
3656		if (netif_carrier_ok(netdev)) {
3657			adapter->link_speed = 0;
3658			adapter->link_duplex = 0;
3659
3660			/* check for thermal sensor event */
3661			if (igb_thermal_sensor_event(hw, E1000_THSTAT_PWR_DOWN)) {
3662				printk(KERN_ERR "igb: %s The network adapter "
3663						"was stopped because it "
3664						"overheated.\n",
3665						netdev->name);
3666			}
3667
3668			/* Links status message must follow this format */
3669			printk(KERN_INFO "igb: %s NIC Link is Down\n",
3670			       netdev->name);
3671			netif_carrier_off(netdev);
3672
3673			igb_ping_all_vfs(adapter);
3674
3675			/* link state has changed, schedule phy info update */
3676			if (!test_bit(__IGB_DOWN, &adapter->state))
3677				mod_timer(&adapter->phy_info_timer,
3678					  round_jiffies(jiffies + 2 * HZ));
3679		}
3680	}
3681
3682	spin_lock(&adapter->stats64_lock);
3683	igb_update_stats(adapter, &adapter->stats64);
3684	spin_unlock(&adapter->stats64_lock);
3685
3686	for (i = 0; i < adapter->num_tx_queues; i++) {
3687		struct igb_ring *tx_ring = adapter->tx_ring[i];
3688		if (!netif_carrier_ok(netdev)) {
3689			/* We've lost link, so the controller stops DMA,
3690			 * but we've got queued Tx work that's never going
3691			 * to get done, so reset controller to flush Tx.
3692			 * (Do the reset outside of interrupt context). */
3693			if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3694				adapter->tx_timeout_count++;
3695				schedule_work(&adapter->reset_task);
3696				/* return immediately since reset is imminent */
3697				return;
3698			}
3699		}
3700
3701		/* Force detection of hung controller every watchdog period */
3702		tx_ring->detect_tx_hung = true;
3703	}
3704
3705	/* Cause software interrupt to ensure rx ring is cleaned */
3706	if (adapter->msix_entries) {
3707		u32 eics = 0;
3708		for (i = 0; i < adapter->num_q_vectors; i++) {
3709			struct igb_q_vector *q_vector = adapter->q_vector[i];
3710			eics |= q_vector->eims_value;
3711		}
3712		wr32(E1000_EICS, eics);
3713	} else {
3714		wr32(E1000_ICS, E1000_ICS_RXDMT0);
3715	}
3716
3717	igb_spoof_check(adapter);
3718
3719	/* Reset the timer */
3720	if (!test_bit(__IGB_DOWN, &adapter->state))
3721		mod_timer(&adapter->watchdog_timer,
3722			  round_jiffies(jiffies + 2 * HZ));
3723}
3724
3725enum latency_range {
3726	lowest_latency = 0,
3727	low_latency = 1,
3728	bulk_latency = 2,
3729	latency_invalid = 255
3730};
3731
3732/**
3733 * igb_update_ring_itr - update the dynamic ITR value based on packet size
3734 *
3735 *      Stores a new ITR value based on strictly on packet size.  This
3736 *      algorithm is less sophisticated than that used in igb_update_itr,
3737 *      due to the difficulty of synchronizing statistics across multiple
3738 *      receive rings.  The divisors and thresholds used by this function
3739 *      were determined based on theoretical maximum wire speed and testing
3740 *      data, in order to minimize response time while increasing bulk
3741 *      throughput.
3742 *      This functionality is controlled by the InterruptThrottleRate module
3743 *      parameter (see igb_param.c)
3744 *      NOTE:  This function is called only when operating in a multiqueue
3745 *             receive environment.
3746 * @q_vector: pointer to q_vector
3747 **/
3748static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3749{
3750	int new_val = q_vector->itr_val;
3751	int avg_wire_size = 0;
3752	struct igb_adapter *adapter = q_vector->adapter;
3753	struct igb_ring *ring;
3754	unsigned int packets;
3755
3756	/* For non-gigabit speeds, just fix the interrupt rate at 4000
3757	 * ints/sec - ITR timer value of 120 ticks.
3758	 */
3759	if (adapter->link_speed != SPEED_1000) {
3760		new_val = 976;
3761		goto set_itr_val;
3762	}
3763
3764	ring = q_vector->rx_ring;
3765	if (ring) {
3766		packets = ACCESS_ONCE(ring->total_packets);
3767
3768		if (packets)
3769			avg_wire_size = ring->total_bytes / packets;
3770	}
3771
3772	ring = q_vector->tx_ring;
3773	if (ring) {
3774		packets = ACCESS_ONCE(ring->total_packets);
3775
3776		if (packets)
3777			avg_wire_size = max_t(u32, avg_wire_size,
3778			                      ring->total_bytes / packets);
3779	}
3780
3781	/* if avg_wire_size isn't set no work was done */
3782	if (!avg_wire_size)
3783		goto clear_counts;
3784
3785	/* Add 24 bytes to size to account for CRC, preamble, and gap */
3786	avg_wire_size += 24;
3787
3788	/* Don't starve jumbo frames */
3789	avg_wire_size = min(avg_wire_size, 3000);
3790
3791	/* Give a little boost to mid-size frames */
3792	if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3793		new_val = avg_wire_size / 3;
3794	else
3795		new_val = avg_wire_size / 2;
3796
3797	/* when in itr mode 3 do not exceed 20K ints/sec */
3798	if (adapter->rx_itr_setting == 3 && new_val < 196)
3799		new_val = 196;
3800
3801set_itr_val:
3802	if (new_val != q_vector->itr_val) {
3803		q_vector->itr_val = new_val;
3804		q_vector->set_itr = 1;
3805	}
3806clear_counts:
3807	if (q_vector->rx_ring) {
3808		q_vector->rx_ring->total_bytes = 0;
3809		q_vector->rx_ring->total_packets = 0;
3810	}
3811	if (q_vector->tx_ring) {
3812		q_vector->tx_ring->total_bytes = 0;
3813		q_vector->tx_ring->total_packets = 0;
3814	}
3815}
3816
3817/**
3818 * igb_update_itr - update the dynamic ITR value based on statistics
3819 *      Stores a new ITR value based on packets and byte
3820 *      counts during the last interrupt.  The advantage of per interrupt
3821 *      computation is faster updates and more accurate ITR for the current
3822 *      traffic pattern.  Constants in this function were computed
3823 *      based on theoretical maximum wire speed and thresholds were set based
3824 *      on testing data as well as attempting to minimize response time
3825 *      while increasing bulk throughput.
3826 *      this functionality is controlled by the InterruptThrottleRate module
3827 *      parameter (see igb_param.c)
3828 *      NOTE:  These calculations are only valid when operating in a single-
3829 *             queue environment.
3830 * @adapter: pointer to adapter
3831 * @itr_setting: current q_vector->itr_val
3832 * @packets: the number of packets during this measurement interval
3833 * @bytes: the number of bytes during this measurement interval
3834 **/
3835static unsigned int igb_update_itr(struct igb_adapter *adapter, u16 itr_setting,
3836				   int packets, int bytes)
3837{
3838	unsigned int retval = itr_setting;
3839
3840	if (packets == 0)
3841		goto update_itr_done;
3842
3843	switch (itr_setting) {
3844	case lowest_latency:
3845		/* handle TSO and jumbo frames */
3846		if (bytes/packets > 8000)
3847			retval = bulk_latency;
3848		else if ((packets < 5) && (bytes > 512))
3849			retval = low_latency;
3850		break;
3851	case low_latency:  /* 50 usec aka 20000 ints/s */
3852		if (bytes > 10000) {
3853			/* this if handles the TSO accounting */
3854			if (bytes/packets > 8000) {
3855				retval = bulk_latency;
3856			} else if ((packets < 10) || ((bytes/packets) > 1200)) {
3857				retval = bulk_latency;
3858			} else if ((packets > 35)) {
3859				retval = lowest_latency;
3860			}
3861		} else if (bytes/packets > 2000) {
3862			retval = bulk_latency;
3863		} else if (packets <= 2 && bytes < 512) {
3864			retval = lowest_latency;
3865		}
3866		break;
3867	case bulk_latency: /* 250 usec aka 4000 ints/s */
3868		if (bytes > 25000) {
3869			if (packets > 35)
3870				retval = low_latency;
3871		} else if (bytes < 1500) {
3872			retval = low_latency;
3873		}
3874		break;
3875	}
3876
3877update_itr_done:
3878	return retval;
3879}
3880
3881static void igb_set_itr(struct igb_adapter *adapter)
3882{
3883	struct igb_q_vector *q_vector = adapter->q_vector[0];
3884	u16 current_itr;
3885	u32 new_itr = q_vector->itr_val;
3886
3887	/* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3888	if (adapter->link_speed != SPEED_1000) {
3889		current_itr = 0;
3890		new_itr = 4000;
3891		goto set_itr_now;
3892	}
3893
3894	adapter->rx_itr = igb_update_itr(adapter,
3895				    adapter->rx_itr,
3896				    q_vector->rx_ring->total_packets,
3897				    q_vector->rx_ring->total_bytes);
3898
3899	adapter->tx_itr = igb_update_itr(adapter,
3900				    adapter->tx_itr,
3901				    q_vector->tx_ring->total_packets,
3902				    q_vector->tx_ring->total_bytes);
3903	current_itr = max(adapter->rx_itr, adapter->tx_itr);
3904
3905	/* conservative mode (itr 3) eliminates the lowest_latency setting */
3906	if (adapter->rx_itr_setting == 3 && current_itr == lowest_latency)
3907		current_itr = low_latency;
3908
3909	switch (current_itr) {
3910	/* counts and packets in update_itr are dependent on these numbers */
3911	case lowest_latency:
3912		new_itr = 56;  /* aka 70,000 ints/sec */
3913		break;
3914	case low_latency:
3915		new_itr = 196; /* aka 20,000 ints/sec */
3916		break;
3917	case bulk_latency:
3918		new_itr = 980; /* aka 4,000 ints/sec */
3919		break;
3920	default:
3921		break;
3922	}
3923
3924set_itr_now:
3925	q_vector->rx_ring->total_bytes = 0;
3926	q_vector->rx_ring->total_packets = 0;
3927	q_vector->tx_ring->total_bytes = 0;
3928	q_vector->tx_ring->total_packets = 0;
3929
3930	if (new_itr != q_vector->itr_val) {
3931		/* this attempts to bias the interrupt rate towards Bulk
3932		 * by adding intermediate steps when interrupt rate is
3933		 * increasing */
3934		new_itr = new_itr > q_vector->itr_val ?
3935		             max((new_itr * q_vector->itr_val) /
3936		                 (new_itr + (q_vector->itr_val >> 2)),
3937		                 new_itr) :
3938			     new_itr;
3939		/* Don't write the value here; it resets the adapter's
3940		 * internal timer, and causes us to delay far longer than
3941		 * we should between interrupts.  Instead, we write the ITR
3942		 * value at the beginning of the next interrupt so the timing
3943		 * ends up being correct.
3944		 */
3945		q_vector->itr_val = new_itr;
3946		q_vector->set_itr = 1;
3947	}
3948}
3949
3950#define IGB_TX_FLAGS_CSUM		0x00000001
3951#define IGB_TX_FLAGS_VLAN		0x00000002
3952#define IGB_TX_FLAGS_TSO		0x00000004
3953#define IGB_TX_FLAGS_IPV4		0x00000008
3954#define IGB_TX_FLAGS_TSTAMP		0x00000010
3955#define IGB_TX_FLAGS_VLAN_MASK		0xffff0000
3956#define IGB_TX_FLAGS_VLAN_SHIFT		        16
3957
3958static inline int igb_tso_adv(struct igb_ring *tx_ring,
3959			      struct sk_buff *skb, u32 tx_flags, u8 *hdr_len)
3960{
3961	struct e1000_adv_tx_context_desc *context_desc;
3962	unsigned int i;
3963	int err;
3964	struct igb_buffer *buffer_info;
3965	u32 info = 0, tu_cmd = 0;
3966	u32 mss_l4len_idx;
3967	u8 l4len;
3968
3969	if (skb_header_cloned(skb)) {
3970		err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
3971		if (err)
3972			return err;
3973	}
3974
3975	l4len = tcp_hdrlen(skb);
3976	*hdr_len += l4len;
3977
3978	if (skb->protocol == htons(ETH_P_IP)) {
3979		struct iphdr *iph = ip_hdr(skb);
3980		iph->tot_len = 0;
3981		iph->check = 0;
3982		tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
3983							 iph->daddr, 0,
3984							 IPPROTO_TCP,
3985							 0);
3986	} else if (skb_is_gso_v6(skb)) {
3987		ipv6_hdr(skb)->payload_len = 0;
3988		tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
3989						       &ipv6_hdr(skb)->daddr,
3990						       0, IPPROTO_TCP, 0);
3991	}
3992
3993	i = tx_ring->next_to_use;
3994
3995	buffer_info = &tx_ring->buffer_info[i];
3996	context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3997	/* VLAN MACLEN IPLEN */
3998	if (tx_flags & IGB_TX_FLAGS_VLAN)
3999		info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
4000	info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
4001	*hdr_len += skb_network_offset(skb);
4002	info |= skb_network_header_len(skb);
4003	*hdr_len += skb_network_header_len(skb);
4004	context_desc->vlan_macip_lens = cpu_to_le32(info);
4005
4006	/* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
4007	tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
4008
4009	if (skb->protocol == htons(ETH_P_IP))
4010		tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
4011	tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
4012
4013	context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
4014
4015	/* MSS L4LEN IDX */
4016	mss_l4len_idx = (skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT);
4017	mss_l4len_idx |= (l4len << E1000_ADVTXD_L4LEN_SHIFT);
4018
4019	/* For 82575, context index must be unique per ring. */
4020	if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
4021		mss_l4len_idx |= tx_ring->reg_idx << 4;
4022
4023	context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx);
4024	context_desc->seqnum_seed = 0;
4025
4026	buffer_info->time_stamp = jiffies;
4027	buffer_info->next_to_watch = i;
4028	buffer_info->dma = 0;
4029	i++;
4030	if (i == tx_ring->count)
4031		i = 0;
4032
4033	tx_ring->next_to_use = i;
4034
4035	return true;
4036}
4037
4038static inline bool igb_tx_csum_adv(struct igb_ring *tx_ring,
4039				   struct sk_buff *skb, u32 tx_flags)
4040{
4041	struct e1000_adv_tx_context_desc *context_desc;
4042	struct device *dev = tx_ring->dev;
4043	struct igb_buffer *buffer_info;
4044	u32 info = 0, tu_cmd = 0;
4045	unsigned int i;
4046
4047	if ((skb->ip_summed == CHECKSUM_PARTIAL) ||
4048	    (tx_flags & IGB_TX_FLAGS_VLAN)) {
4049		i = tx_ring->next_to_use;
4050		buffer_info = &tx_ring->buffer_info[i];
4051		context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
4052
4053		if (tx_flags & IGB_TX_FLAGS_VLAN)
4054			info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
4055
4056		info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
4057		if (skb->ip_summed == CHECKSUM_PARTIAL)
4058			info |= skb_network_header_len(skb);
4059
4060		context_desc->vlan_macip_lens = cpu_to_le32(info);
4061
4062		tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
4063
4064		if (skb->ip_summed == CHECKSUM_PARTIAL) {
4065			__be16 protocol;
4066
4067			if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
4068				const struct vlan_ethhdr *vhdr =
4069				          (const struct vlan_ethhdr*)skb->data;
4070
4071				protocol = vhdr->h_vlan_encapsulated_proto;
4072			} else {
4073				protocol = skb->protocol;
4074			}
4075
4076			switch (protocol) {
4077			case cpu_to_be16(ETH_P_IP):
4078				tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
4079				if (ip_hdr(skb)->protocol == IPPROTO_TCP)
4080					tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
4081				else if (ip_hdr(skb)->protocol == IPPROTO_SCTP)
4082					tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
4083				break;
4084			case cpu_to_be16(ETH_P_IPV6):
4085				/* XXX what about other V6 headers?? */
4086				if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP)
4087					tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
4088				else if (ipv6_hdr(skb)->nexthdr == IPPROTO_SCTP)
4089					tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
4090				break;
4091			default:
4092				if (unlikely(net_ratelimit()))
4093					dev_warn(dev,
4094					    "partial checksum but proto=%x!\n",
4095					    skb->protocol);
4096				break;
4097			}
4098		}
4099
4100		context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
4101		context_desc->seqnum_seed = 0;
4102		if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
4103			context_desc->mss_l4len_idx =
4104				cpu_to_le32(tx_ring->reg_idx << 4);
4105
4106		buffer_info->time_stamp = jiffies;
4107		buffer_info->next_to_watch = i;
4108		buffer_info->dma = 0;
4109
4110		i++;
4111		if (i == tx_ring->count)
4112			i = 0;
4113		tx_ring->next_to_use = i;
4114
4115		return true;
4116	}
4117	return false;
4118}
4119
4120#define IGB_MAX_TXD_PWR	16
4121#define IGB_MAX_DATA_PER_TXD	(1<<IGB_MAX_TXD_PWR)
4122
4123static inline int igb_tx_map_adv(struct igb_ring *tx_ring, struct sk_buff *skb,
4124				 unsigned int first)
4125{
4126	struct igb_buffer *buffer_info;
4127	struct device *dev = tx_ring->dev;
4128	unsigned int hlen = skb_headlen(skb);
4129	unsigned int count = 0, i;
4130	unsigned int f;
4131	u16 gso_segs = skb_shinfo(skb)->gso_segs ?: 1;
4132
4133	i = tx_ring->next_to_use;
4134
4135	buffer_info = &tx_ring->buffer_info[i];
4136	BUG_ON(hlen >= IGB_MAX_DATA_PER_TXD);
4137	buffer_info->length = hlen;
4138	/* set time_stamp *before* dma to help avoid a possible race */
4139	buffer_info->time_stamp = jiffies;
4140	buffer_info->next_to_watch = i;
4141	buffer_info->dma = dma_map_single(dev, skb->data, hlen,
4142					  DMA_TO_DEVICE);
4143	if (dma_mapping_error(dev, buffer_info->dma))
4144		goto dma_error;
4145
4146	for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) {
4147		struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[f];
4148		unsigned int len = frag->size;
4149
4150		count++;
4151		i++;
4152		if (i == tx_ring->count)
4153			i = 0;
4154
4155		buffer_info = &tx_ring->buffer_info[i];
4156		BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
4157		buffer_info->length = len;
4158		buffer_info->time_stamp = jiffies;
4159		buffer_info->next_to_watch = i;
4160		buffer_info->mapped_as_page = true;
4161		buffer_info->dma = skb_frag_dma_map(dev, frag, 0, len,
4162						DMA_TO_DEVICE);
4163		if (dma_mapping_error(dev, buffer_info->dma))
4164			goto dma_error;
4165
4166	}
4167
4168	tx_ring->buffer_info[i].skb = skb;
4169	tx_ring->buffer_info[i].tx_flags = skb_shinfo(skb)->tx_flags;
4170	/* multiply data chunks by size of headers */
4171	tx_ring->buffer_info[i].bytecount = ((gso_segs - 1) * hlen) + skb->len;
4172	tx_ring->buffer_info[i].gso_segs = gso_segs;
4173	tx_ring->buffer_info[first].next_to_watch = i;
4174
4175	return ++count;
4176
4177dma_error:
4178	dev_err(dev, "TX DMA map failed\n");
4179
4180	/* clear timestamp and dma mappings for failed buffer_info mapping */
4181	buffer_info->dma = 0;
4182	buffer_info->time_stamp = 0;
4183	buffer_info->length = 0;
4184	buffer_info->next_to_watch = 0;
4185	buffer_info->mapped_as_page = false;
4186
4187	/* clear timestamp and dma mappings for remaining portion of packet */
4188	while (count--) {
4189		if (i == 0)
4190			i = tx_ring->count;
4191		i--;
4192		buffer_info = &tx_ring->buffer_info[i];
4193		igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
4194	}
4195
4196	return 0;
4197}
4198
4199static inline void igb_tx_queue_adv(struct igb_ring *tx_ring,
4200				    u32 tx_flags, int count, u32 paylen,
4201				    u8 hdr_len)
4202{
4203	union e1000_adv_tx_desc *tx_desc;
4204	struct igb_buffer *buffer_info;
4205	u32 olinfo_status = 0, cmd_type_len;
4206	unsigned int i = tx_ring->next_to_use;
4207
4208	cmd_type_len = (E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS |
4209			E1000_ADVTXD_DCMD_DEXT);
4210
4211	if (tx_flags & IGB_TX_FLAGS_VLAN)
4212		cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
4213
4214	if (tx_flags & IGB_TX_FLAGS_TSTAMP)
4215		cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP;
4216
4217	if (tx_flags & IGB_TX_FLAGS_TSO) {
4218		cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
4219
4220		/* insert tcp checksum */
4221		olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4222
4223		/* insert ip checksum */
4224		if (tx_flags & IGB_TX_FLAGS_IPV4)
4225			olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
4226
4227	} else if (tx_flags & IGB_TX_FLAGS_CSUM) {
4228		olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4229	}
4230
4231	if ((tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX) &&
4232	    (tx_flags & (IGB_TX_FLAGS_CSUM |
4233	                 IGB_TX_FLAGS_TSO |
4234			 IGB_TX_FLAGS_VLAN)))
4235		olinfo_status |= tx_ring->reg_idx << 4;
4236
4237	olinfo_status |= ((paylen - hdr_len) << E1000_ADVTXD_PAYLEN_SHIFT);
4238
4239	do {
4240		buffer_info = &tx_ring->buffer_info[i];
4241		tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
4242		tx_desc->read.buffer_addr = cpu_to_le64(buffer_info->dma);
4243		tx_desc->read.cmd_type_len =
4244			cpu_to_le32(cmd_type_len | buffer_info->length);
4245		tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
4246		count--;
4247		i++;
4248		if (i == tx_ring->count)
4249			i = 0;
4250	} while (count > 0);
4251
4252	tx_desc->read.cmd_type_len |= cpu_to_le32(IGB_ADVTXD_DCMD);
4253	/* Force memory writes to complete before letting h/w
4254	 * know there are new descriptors to fetch.  (Only
4255	 * applicable for weak-ordered memory model archs,
4256	 * such as IA-64). */
4257	wmb();
4258
4259	tx_ring->next_to_use = i;
4260	writel(i, tx_ring->tail);
4261	/* we need this if more than one processor can write to our tail
4262	 * at a time, it syncronizes IO on IA64/Altix systems */
4263	mmiowb();
4264}
4265
4266static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
4267{
4268	struct net_device *netdev = tx_ring->netdev;
4269
4270	netif_stop_subqueue(netdev, tx_ring->queue_index);
4271
4272	/* Herbert's original patch had:
4273	 *  smp_mb__after_netif_stop_queue();
4274	 * but since that doesn't exist yet, just open code it. */
4275	smp_mb();
4276
4277	/* We need to check again in a case another CPU has just
4278	 * made room available. */
4279	if (igb_desc_unused(tx_ring) < size)
4280		return -EBUSY;
4281
4282	/* A reprieve! */
4283	netif_wake_subqueue(netdev, tx_ring->queue_index);
4284
4285	u64_stats_update_begin(&tx_ring->tx_syncp2);
4286	tx_ring->tx_stats.restart_queue2++;
4287	u64_stats_update_end(&tx_ring->tx_syncp2);
4288
4289	return 0;
4290}
4291
4292static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
4293{
4294	if (igb_desc_unused(tx_ring) >= size)
4295		return 0;
4296	return __igb_maybe_stop_tx(tx_ring, size);
4297}
4298
4299netdev_tx_t igb_xmit_frame_ring_adv(struct sk_buff *skb,
4300				    struct igb_ring *tx_ring)
4301{
4302	int tso = 0, count;
4303	u32 tx_flags = 0;
4304	u16 first;
4305	u8 hdr_len = 0;
4306
4307	/* need: 1 descriptor per page,
4308	 *       + 2 desc gap to keep tail from touching head,
4309	 *       + 1 desc for skb->data,
4310	 *       + 1 desc for context descriptor,
4311	 * otherwise try next time */
4312	if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
4313		/* this is a hard error */
4314		return NETDEV_TX_BUSY;
4315	}
4316
4317	if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
4318		skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
4319		tx_flags |= IGB_TX_FLAGS_TSTAMP;
4320	}
4321
4322	if (vlan_tx_tag_present(skb)) {
4323		tx_flags |= IGB_TX_FLAGS_VLAN;
4324		tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
4325	}
4326
4327	if (skb->protocol == htons(ETH_P_IP))
4328		tx_flags |= IGB_TX_FLAGS_IPV4;
4329
4330	first = tx_ring->next_to_use;
4331	if (skb_is_gso(skb)) {
4332		tso = igb_tso_adv(tx_ring, skb, tx_flags, &hdr_len);
4333
4334		if (tso < 0) {
4335			dev_kfree_skb_any(skb);
4336			return NETDEV_TX_OK;
4337		}
4338	}
4339
4340	if (tso)
4341		tx_flags |= IGB_TX_FLAGS_TSO;
4342	else if (igb_tx_csum_adv(tx_ring, skb, tx_flags) &&
4343	         (skb->ip_summed == CHECKSUM_PARTIAL))
4344		tx_flags |= IGB_TX_FLAGS_CSUM;
4345
4346	/*
4347	 * count reflects descriptors mapped, if 0 or less then mapping error
4348	 * has occurred and we need to rewind the descriptor queue
4349	 */
4350	count = igb_tx_map_adv(tx_ring, skb, first);
4351	if (!count) {
4352		dev_kfree_skb_any(skb);
4353		tx_ring->buffer_info[first].time_stamp = 0;
4354		tx_ring->next_to_use = first;
4355		return NETDEV_TX_OK;
4356	}
4357
4358	igb_tx_queue_adv(tx_ring, tx_flags, count, skb->len, hdr_len);
4359
4360	/* Make sure there is space in the ring for the next send. */
4361	igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
4362
4363	return NETDEV_TX_OK;
4364}
4365
4366static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb,
4367				      struct net_device *netdev)
4368{
4369	struct igb_adapter *adapter = netdev_priv(netdev);
4370	struct igb_ring *tx_ring;
4371	int r_idx = 0;
4372
4373	if (test_bit(__IGB_DOWN, &adapter->state)) {
4374		dev_kfree_skb_any(skb);
4375		return NETDEV_TX_OK;
4376	}
4377
4378	if (skb->len <= 0) {
4379		dev_kfree_skb_any(skb);
4380		return NETDEV_TX_OK;
4381	}
4382
4383	r_idx = skb->queue_mapping & (IGB_ABS_MAX_TX_QUEUES - 1);
4384	tx_ring = adapter->multi_tx_table[r_idx];
4385
4386	/* This goes back to the question of how to logically map a tx queue
4387	 * to a flow.  Right now, performance is impacted slightly negatively
4388	 * if using multiple tx queues.  If the stack breaks away from a
4389	 * single qdisc implementation, we can look at this again. */
4390	return igb_xmit_frame_ring_adv(skb, tx_ring);
4391}
4392
4393/**
4394 * igb_tx_timeout - Respond to a Tx Hang
4395 * @netdev: network interface device structure
4396 **/
4397static void igb_tx_timeout(struct net_device *netdev)
4398{
4399	struct igb_adapter *adapter = netdev_priv(netdev);
4400	struct e1000_hw *hw = &adapter->hw;
4401
4402	/* Do the reset outside of interrupt context */
4403	adapter->tx_timeout_count++;
4404
4405	if (hw->mac.type == e1000_82580)
4406		hw->dev_spec._82575.global_device_reset = true;
4407
4408	schedule_work(&adapter->reset_task);
4409	wr32(E1000_EICS,
4410	     (adapter->eims_enable_mask & ~adapter->eims_other));
4411}
4412
4413static void igb_reset_task(struct work_struct *work)
4414{
4415	struct igb_adapter *adapter;
4416	adapter = container_of(work, struct igb_adapter, reset_task);
4417
4418	igb_dump(adapter);
4419	netdev_err(adapter->netdev, "Reset adapter\n");
4420	igb_reinit_locked(adapter);
4421}
4422
4423/**
4424 * igb_get_stats64 - Get System Network Statistics
4425 * @netdev: network interface device structure
4426 * @stats: rtnl_link_stats64 pointer
4427 *
4428 **/
4429static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *netdev,
4430						 struct rtnl_link_stats64 *stats)
4431{
4432	struct igb_adapter *adapter = netdev_priv(netdev);
4433
4434	spin_lock(&adapter->stats64_lock);
4435	igb_update_stats(adapter, &adapter->stats64);
4436	memcpy(stats, &adapter->stats64, sizeof(*stats));
4437	spin_unlock(&adapter->stats64_lock);
4438
4439	return stats;
4440}
4441
4442/**
4443 * igb_change_mtu - Change the Maximum Transfer Unit
4444 * @netdev: network interface device structure
4445 * @new_mtu: new value for maximum frame size
4446 *
4447 * Returns 0 on success, negative on failure
4448 **/
4449static int igb_change_mtu(struct net_device *netdev, int new_mtu)
4450{
4451	struct igb_adapter *adapter = netdev_priv(netdev);
4452	struct pci_dev *pdev = adapter->pdev;
4453	int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
4454
4455	if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
4456		dev_err(&pdev->dev, "Invalid MTU setting\n");
4457		return -EINVAL;
4458	}
4459
4460#define MAX_STD_JUMBO_FRAME_SIZE 9238
4461	if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
4462		dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
4463		return -EINVAL;
4464	}
4465
4466	while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
4467		msleep(1);
4468
4469	/* igb_down has a dependency on max_frame_size */
4470	adapter->max_frame_size = max_frame;
4471
4472	if (netif_running(netdev))
4473		igb_down(adapter);
4474
4475	dev_info(&pdev->dev, "changing MTU from %d to %d\n",
4476		 netdev->mtu, new_mtu);
4477	netdev->mtu = new_mtu;
4478
4479	if (netif_running(netdev))
4480		igb_up(adapter);
4481	else
4482		igb_reset(adapter);
4483
4484	clear_bit(__IGB_RESETTING, &adapter->state);
4485
4486	return 0;
4487}
4488
4489/**
4490 * igb_update_stats - Update the board statistics counters
4491 * @adapter: board private structure
4492 **/
4493
4494void igb_update_stats(struct igb_adapter *adapter,
4495		      struct rtnl_link_stats64 *net_stats)
4496{
4497	struct e1000_hw *hw = &adapter->hw;
4498	struct pci_dev *pdev = adapter->pdev;
4499	u32 reg, mpc;
4500	u16 phy_tmp;
4501	int i;
4502	u64 bytes, packets;
4503	unsigned int start;
4504	u64 _bytes, _packets;
4505
4506#define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
4507
4508	/*
4509	 * Prevent stats update while adapter is being reset, or if the pci
4510	 * connection is down.
4511	 */
4512	if (adapter->link_speed == 0)
4513		return;
4514	if (pci_channel_offline(pdev))
4515		return;
4516
4517	bytes = 0;
4518	packets = 0;
4519	for (i = 0; i < adapter->num_rx_queues; i++) {
4520		u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
4521		struct igb_ring *ring = adapter->rx_ring[i];
4522
4523		ring->rx_stats.drops += rqdpc_tmp;
4524		net_stats->rx_fifo_errors += rqdpc_tmp;
4525
4526		do {
4527			start = u64_stats_fetch_begin_bh(&ring->rx_syncp);
4528			_bytes = ring->rx_stats.bytes;
4529			_packets = ring->rx_stats.packets;
4530		} while (u64_stats_fetch_retry_bh(&ring->rx_syncp, start));
4531		bytes += _bytes;
4532		packets += _packets;
4533	}
4534
4535	net_stats->rx_bytes = bytes;
4536	net_stats->rx_packets = packets;
4537
4538	bytes = 0;
4539	packets = 0;
4540	for (i = 0; i < adapter->num_tx_queues; i++) {
4541		struct igb_ring *ring = adapter->tx_ring[i];
4542		do {
4543			start = u64_stats_fetch_begin_bh(&ring->tx_syncp);
4544			_bytes = ring->tx_stats.bytes;
4545			_packets = ring->tx_stats.packets;
4546		} while (u64_stats_fetch_retry_bh(&ring->tx_syncp, start));
4547		bytes += _bytes;
4548		packets += _packets;
4549	}
4550	net_stats->tx_bytes = bytes;
4551	net_stats->tx_packets = packets;
4552
4553	/* read stats registers */
4554	adapter->stats.crcerrs += rd32(E1000_CRCERRS);
4555	adapter->stats.gprc += rd32(E1000_GPRC);
4556	adapter->stats.gorc += rd32(E1000_GORCL);
4557	rd32(E1000_GORCH); /* clear GORCL */
4558	adapter->stats.bprc += rd32(E1000_BPRC);
4559	adapter->stats.mprc += rd32(E1000_MPRC);
4560	adapter->stats.roc += rd32(E1000_ROC);
4561
4562	adapter->stats.prc64 += rd32(E1000_PRC64);
4563	adapter->stats.prc127 += rd32(E1000_PRC127);
4564	adapter->stats.prc255 += rd32(E1000_PRC255);
4565	adapter->stats.prc511 += rd32(E1000_PRC511);
4566	adapter->stats.prc1023 += rd32(E1000_PRC1023);
4567	adapter->stats.prc1522 += rd32(E1000_PRC1522);
4568	adapter->stats.symerrs += rd32(E1000_SYMERRS);
4569	adapter->stats.sec += rd32(E1000_SEC);
4570
4571	mpc = rd32(E1000_MPC);
4572	adapter->stats.mpc += mpc;
4573	net_stats->rx_fifo_errors += mpc;
4574	adapter->stats.scc += rd32(E1000_SCC);
4575	adapter->stats.ecol += rd32(E1000_ECOL);
4576	adapter->stats.mcc += rd32(E1000_MCC);
4577	adapter->stats.latecol += rd32(E1000_LATECOL);
4578	adapter->stats.dc += rd32(E1000_DC);
4579	adapter->stats.rlec += rd32(E1000_RLEC);
4580	adapter->stats.xonrxc += rd32(E1000_XONRXC);
4581	adapter->stats.xontxc += rd32(E1000_XONTXC);
4582	adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
4583	adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
4584	adapter->stats.fcruc += rd32(E1000_FCRUC);
4585	adapter->stats.gptc += rd32(E1000_GPTC);
4586	adapter->stats.gotc += rd32(E1000_GOTCL);
4587	rd32(E1000_GOTCH); /* clear GOTCL */
4588	adapter->stats.rnbc += rd32(E1000_RNBC);
4589	adapter->stats.ruc += rd32(E1000_RUC);
4590	adapter->stats.rfc += rd32(E1000_RFC);
4591	adapter->stats.rjc += rd32(E1000_RJC);
4592	adapter->stats.tor += rd32(E1000_TORH);
4593	adapter->stats.tot += rd32(E1000_TOTH);
4594	adapter->stats.tpr += rd32(E1000_TPR);
4595
4596	adapter->stats.ptc64 += rd32(E1000_PTC64);
4597	adapter->stats.ptc127 += rd32(E1000_PTC127);
4598	adapter->stats.ptc255 += rd32(E1000_PTC255);
4599	adapter->stats.ptc511 += rd32(E1000_PTC511);
4600	adapter->stats.ptc1023 += rd32(E1000_PTC1023);
4601	adapter->stats.ptc1522 += rd32(E1000_PTC1522);
4602
4603	adapter->stats.mptc += rd32(E1000_MPTC);
4604	adapter->stats.bptc += rd32(E1000_BPTC);
4605
4606	adapter->stats.tpt += rd32(E1000_TPT);
4607	adapter->stats.colc += rd32(E1000_COLC);
4608
4609	adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
4610	/* read internal phy specific stats */
4611	reg = rd32(E1000_CTRL_EXT);
4612	if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
4613		adapter->stats.rxerrc += rd32(E1000_RXERRC);
4614		adapter->stats.tncrs += rd32(E1000_TNCRS);
4615	}
4616
4617	adapter->stats.tsctc += rd32(E1000_TSCTC);
4618	adapter->stats.tsctfc += rd32(E1000_TSCTFC);
4619
4620	adapter->stats.iac += rd32(E1000_IAC);
4621	adapter->stats.icrxoc += rd32(E1000_ICRXOC);
4622	adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
4623	adapter->stats.icrxatc += rd32(E1000_ICRXATC);
4624	adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
4625	adapter->stats.ictxatc += rd32(E1000_ICTXATC);
4626	adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
4627	adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
4628	adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
4629
4630	/* Fill out the OS statistics structure */
4631	net_stats->multicast = adapter->stats.mprc;
4632	net_stats->collisions = adapter->stats.colc;
4633
4634	/* Rx Errors */
4635
4636	/* RLEC on some newer hardware can be incorrect so build
4637	 * our own version based on RUC and ROC */
4638	net_stats->rx_errors = adapter->stats.rxerrc +
4639		adapter->stats.crcerrs + adapter->stats.algnerrc +
4640		adapter->stats.ruc + adapter->stats.roc +
4641		adapter->stats.cexterr;
4642	net_stats->rx_length_errors = adapter->stats.ruc +
4643				      adapter->stats.roc;
4644	net_stats->rx_crc_errors = adapter->stats.crcerrs;
4645	net_stats->rx_frame_errors = adapter->stats.algnerrc;
4646	net_stats->rx_missed_errors = adapter->stats.mpc;
4647
4648	/* Tx Errors */
4649	net_stats->tx_errors = adapter->stats.ecol +
4650			       adapter->stats.latecol;
4651	net_stats->tx_aborted_errors = adapter->stats.ecol;
4652	net_stats->tx_window_errors = adapter->stats.latecol;
4653	net_stats->tx_carrier_errors = adapter->stats.tncrs;
4654
4655	/* Tx Dropped needs to be maintained elsewhere */
4656
4657	/* Phy Stats */
4658	if (hw->phy.media_type == e1000_media_type_copper) {
4659		if ((adapter->link_speed == SPEED_1000) &&
4660		   (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
4661			phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
4662			adapter->phy_stats.idle_errors += phy_tmp;
4663		}
4664	}
4665
4666	/* Management Stats */
4667	adapter->stats.mgptc += rd32(E1000_MGTPTC);
4668	adapter->stats.mgprc += rd32(E1000_MGTPRC);
4669	adapter->stats.mgpdc += rd32(E1000_MGTPDC);
4670
4671	/* OS2BMC Stats */
4672	reg = rd32(E1000_MANC);
4673	if (reg & E1000_MANC_EN_BMC2OS) {
4674		adapter->stats.o2bgptc += rd32(E1000_O2BGPTC);
4675		adapter->stats.o2bspc += rd32(E1000_O2BSPC);
4676		adapter->stats.b2ospc += rd32(E1000_B2OSPC);
4677		adapter->stats.b2ogprc += rd32(E1000_B2OGPRC);
4678	}
4679}
4680
4681static irqreturn_t igb_msix_other(int irq, void *data)
4682{
4683	struct igb_adapter *adapter = data;
4684	struct e1000_hw *hw = &adapter->hw;
4685	u32 icr = rd32(E1000_ICR);
4686	/* reading ICR causes bit 31 of EICR to be cleared */
4687
4688	if (icr & E1000_ICR_DRSTA)
4689		schedule_work(&adapter->reset_task);
4690
4691	if (icr & E1000_ICR_DOUTSYNC) {
4692		/* HW is reporting DMA is out of sync */
4693		adapter->stats.doosync++;
4694		/* The DMA Out of Sync is also indication of a spoof event
4695		 * in IOV mode. Check the Wrong VM Behavior register to
4696		 * see if it is really a spoof event. */
4697		igb_check_wvbr(adapter);
4698	}
4699
4700	/* Check for a mailbox event */
4701	if (icr & E1000_ICR_VMMB)
4702		igb_msg_task(adapter);
4703
4704	if (icr & E1000_ICR_LSC) {
4705		hw->mac.get_link_status = 1;
4706		/* guard against interrupt when we're going down */
4707		if (!test_bit(__IGB_DOWN, &adapter->state))
4708			mod_timer(&adapter->watchdog_timer, jiffies + 1);
4709	}
4710
4711	if (adapter->vfs_allocated_count)
4712		wr32(E1000_IMS, E1000_IMS_LSC |
4713				E1000_IMS_VMMB |
4714				E1000_IMS_DOUTSYNC);
4715	else
4716		wr32(E1000_IMS, E1000_IMS_LSC | E1000_IMS_DOUTSYNC);
4717	wr32(E1000_EIMS, adapter->eims_other);
4718
4719	return IRQ_HANDLED;
4720}
4721
4722static void igb_write_itr(struct igb_q_vector *q_vector)
4723{
4724	struct igb_adapter *adapter = q_vector->adapter;
4725	u32 itr_val = q_vector->itr_val & 0x7FFC;
4726
4727	if (!q_vector->set_itr)
4728		return;
4729
4730	if (!itr_val)
4731		itr_val = 0x4;
4732
4733	if (adapter->hw.mac.type == e1000_82575)
4734		itr_val |= itr_val << 16;
4735	else
4736		itr_val |= 0x8000000;
4737
4738	writel(itr_val, q_vector->itr_register);
4739	q_vector->set_itr = 0;
4740}
4741
4742static irqreturn_t igb_msix_ring(int irq, void *data)
4743{
4744	struct igb_q_vector *q_vector = data;
4745
4746	/* Write the ITR value calculated from the previous interrupt. */
4747	igb_write_itr(q_vector);
4748
4749	napi_schedule(&q_vector->napi);
4750
4751	return IRQ_HANDLED;
4752}
4753
4754#ifdef CONFIG_IGB_DCA
4755static void igb_update_dca(struct igb_q_vector *q_vector)
4756{
4757	struct igb_adapter *adapter = q_vector->adapter;
4758	struct e1000_hw *hw = &adapter->hw;
4759	int cpu = get_cpu();
4760
4761	if (q_vector->cpu == cpu)
4762		goto out_no_update;
4763
4764	if (q_vector->tx_ring) {
4765		int q = q_vector->tx_ring->reg_idx;
4766		u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4767		if (hw->mac.type == e1000_82575) {
4768			dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4769			dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4770		} else {
4771			dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4772			dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4773			              E1000_DCA_TXCTRL_CPUID_SHIFT;
4774		}
4775		dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4776		wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4777	}
4778	if (q_vector->rx_ring) {
4779		int q = q_vector->rx_ring->reg_idx;
4780		u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4781		if (hw->mac.type == e1000_82575) {
4782			dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4783			dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4784		} else {
4785			dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4786			dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4787			              E1000_DCA_RXCTRL_CPUID_SHIFT;
4788		}
4789		dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4790		dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4791		dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4792		wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4793	}
4794	q_vector->cpu = cpu;
4795out_no_update:
4796	put_cpu();
4797}
4798
4799static void igb_setup_dca(struct igb_adapter *adapter)
4800{
4801	struct e1000_hw *hw = &adapter->hw;
4802	int i;
4803
4804	if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4805		return;
4806
4807	/* Always use CB2 mode, difference is masked in the CB driver. */
4808	wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4809
4810	for (i = 0; i < adapter->num_q_vectors; i++) {
4811		adapter->q_vector[i]->cpu = -1;
4812		igb_update_dca(adapter->q_vector[i]);
4813	}
4814}
4815
4816static int __igb_notify_dca(struct device *dev, void *data)
4817{
4818	struct net_device *netdev = dev_get_drvdata(dev);
4819	struct igb_adapter *adapter = netdev_priv(netdev);
4820	struct pci_dev *pdev = adapter->pdev;
4821	struct e1000_hw *hw = &adapter->hw;
4822	unsigned long event = *(unsigned long *)data;
4823
4824	switch (event) {
4825	case DCA_PROVIDER_ADD:
4826		/* if already enabled, don't do it again */
4827		if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4828			break;
4829		if (dca_add_requester(dev) == 0) {
4830			adapter->flags |= IGB_FLAG_DCA_ENABLED;
4831			dev_info(&pdev->dev, "DCA enabled\n");
4832			igb_setup_dca(adapter);
4833			break;
4834		}
4835		/* Fall Through since DCA is disabled. */
4836	case DCA_PROVIDER_REMOVE:
4837		if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4838			/* without this a class_device is left
4839			 * hanging around in the sysfs model */
4840			dca_remove_requester(dev);
4841			dev_info(&pdev->dev, "DCA disabled\n");
4842			adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4843			wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4844		}
4845		break;
4846	}
4847
4848	return 0;
4849}
4850
4851static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4852                          void *p)
4853{
4854	int ret_val;
4855
4856	ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4857	                                 __igb_notify_dca);
4858
4859	return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4860}
4861#endif /* CONFIG_IGB_DCA */
4862
4863static void igb_ping_all_vfs(struct igb_adapter *adapter)
4864{
4865	struct e1000_hw *hw = &adapter->hw;
4866	u32 ping;
4867	int i;
4868
4869	for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
4870		ping = E1000_PF_CONTROL_MSG;
4871		if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
4872			ping |= E1000_VT_MSGTYPE_CTS;
4873		igb_write_mbx(hw, &ping, 1, i);
4874	}
4875}
4876
4877static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4878{
4879	struct e1000_hw *hw = &adapter->hw;
4880	u32 vmolr = rd32(E1000_VMOLR(vf));
4881	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4882
4883	vf_data->flags &= ~(IGB_VF_FLAG_UNI_PROMISC |
4884	                    IGB_VF_FLAG_MULTI_PROMISC);
4885	vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4886
4887	if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
4888		vmolr |= E1000_VMOLR_MPME;
4889		vf_data->flags |= IGB_VF_FLAG_MULTI_PROMISC;
4890		*msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
4891	} else {
4892		/*
4893		 * if we have hashes and we are clearing a multicast promisc
4894		 * flag we need to write the hashes to the MTA as this step
4895		 * was previously skipped
4896		 */
4897		if (vf_data->num_vf_mc_hashes > 30) {
4898			vmolr |= E1000_VMOLR_MPME;
4899		} else if (vf_data->num_vf_mc_hashes) {
4900			int j;
4901			vmolr |= E1000_VMOLR_ROMPE;
4902			for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4903				igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4904		}
4905	}
4906
4907	wr32(E1000_VMOLR(vf), vmolr);
4908
4909	/* there are flags left unprocessed, likely not supported */
4910	if (*msgbuf & E1000_VT_MSGINFO_MASK)
4911		return -EINVAL;
4912
4913	return 0;
4914
4915}
4916
4917static int igb_set_vf_multicasts(struct igb_adapter *adapter,
4918				  u32 *msgbuf, u32 vf)
4919{
4920	int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4921	u16 *hash_list = (u16 *)&msgbuf[1];
4922	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4923	int i;
4924
4925	/* salt away the number of multicast addresses assigned
4926	 * to this VF for later use to restore when the PF multi cast
4927	 * list changes
4928	 */
4929	vf_data->num_vf_mc_hashes = n;
4930
4931	/* only up to 30 hash values supported */
4932	if (n > 30)
4933		n = 30;
4934
4935	/* store the hashes for later use */
4936	for (i = 0; i < n; i++)
4937		vf_data->vf_mc_hashes[i] = hash_list[i];
4938
4939	/* Flush and reset the mta with the new values */
4940	igb_set_rx_mode(adapter->netdev);
4941
4942	return 0;
4943}
4944
4945static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
4946{
4947	struct e1000_hw *hw = &adapter->hw;
4948	struct vf_data_storage *vf_data;
4949	int i, j;
4950
4951	for (i = 0; i < adapter->vfs_allocated_count; i++) {
4952		u32 vmolr = rd32(E1000_VMOLR(i));
4953		vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4954
4955		vf_data = &adapter->vf_data[i];
4956
4957		if ((vf_data->num_vf_mc_hashes > 30) ||
4958		    (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
4959			vmolr |= E1000_VMOLR_MPME;
4960		} else if (vf_data->num_vf_mc_hashes) {
4961			vmolr |= E1000_VMOLR_ROMPE;
4962			for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4963				igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4964		}
4965		wr32(E1000_VMOLR(i), vmolr);
4966	}
4967}
4968
4969static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
4970{
4971	struct e1000_hw *hw = &adapter->hw;
4972	u32 pool_mask, reg, vid;
4973	int i;
4974
4975	pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4976
4977	/* Find the vlan filter for this id */
4978	for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4979		reg = rd32(E1000_VLVF(i));
4980
4981		/* remove the vf from the pool */
4982		reg &= ~pool_mask;
4983
4984		/* if pool is empty then remove entry from vfta */
4985		if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
4986		    (reg & E1000_VLVF_VLANID_ENABLE)) {
4987			reg = 0;
4988			vid = reg & E1000_VLVF_VLANID_MASK;
4989			igb_vfta_set(hw, vid, false);
4990		}
4991
4992		wr32(E1000_VLVF(i), reg);
4993	}
4994
4995	adapter->vf_data[vf].vlans_enabled = 0;
4996}
4997
4998static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
4999{
5000	struct e1000_hw *hw = &adapter->hw;
5001	u32 reg, i;
5002
5003	/* The vlvf table only exists on 82576 hardware and newer */
5004	if (hw->mac.type < e1000_82576)
5005		return -1;
5006
5007	/* we only need to do this if VMDq is enabled */
5008	if (!adapter->vfs_allocated_count)
5009		return -1;
5010
5011	/* Find the vlan filter for this id */
5012	for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5013		reg = rd32(E1000_VLVF(i));
5014		if ((reg & E1000_VLVF_VLANID_ENABLE) &&
5015		    vid == (reg & E1000_VLVF_VLANID_MASK))
5016			break;
5017	}
5018
5019	if (add) {
5020		if (i == E1000_VLVF_ARRAY_SIZE) {
5021			/* Did not find a matching VLAN ID entry that was
5022			 * enabled.  Search for a free filter entry, i.e.
5023			 * one without the enable bit set
5024			 */
5025			for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5026				reg = rd32(E1000_VLVF(i));
5027				if (!(reg & E1000_VLVF_VLANID_ENABLE))
5028					break;
5029			}
5030		}
5031		if (i < E1000_VLVF_ARRAY_SIZE) {
5032			/* Found an enabled/available entry */
5033			reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5034
5035			/* if !enabled we need to set this up in vfta */
5036			if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
5037				/* add VID to filter table */
5038				igb_vfta_set(hw, vid, true);
5039				reg |= E1000_VLVF_VLANID_ENABLE;
5040			}
5041			reg &= ~E1000_VLVF_VLANID_MASK;
5042			reg |= vid;
5043			wr32(E1000_VLVF(i), reg);
5044
5045			/* do not modify RLPML for PF devices */
5046			if (vf >= adapter->vfs_allocated_count)
5047				return 0;
5048
5049			if (!adapter->vf_data[vf].vlans_enabled) {
5050				u32 size;
5051				reg = rd32(E1000_VMOLR(vf));
5052				size = reg & E1000_VMOLR_RLPML_MASK;
5053				size += 4;
5054				reg &= ~E1000_VMOLR_RLPML_MASK;
5055				reg |= size;
5056				wr32(E1000_VMOLR(vf), reg);
5057			}
5058
5059			adapter->vf_data[vf].vlans_enabled++;
5060			return 0;
5061		}
5062	} else {
5063		if (i < E1000_VLVF_ARRAY_SIZE) {
5064			/* remove vf from the pool */
5065			reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
5066			/* if pool is empty then remove entry from vfta */
5067			if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
5068				reg = 0;
5069				igb_vfta_set(hw, vid, false);
5070			}
5071			wr32(E1000_VLVF(i), reg);
5072
5073			/* do not modify RLPML for PF devices */
5074			if (vf >= adapter->vfs_allocated_count)
5075				return 0;
5076
5077			adapter->vf_data[vf].vlans_enabled--;
5078			if (!adapter->vf_data[vf].vlans_enabled) {
5079				u32 size;
5080				reg = rd32(E1000_VMOLR(vf));
5081				size = reg & E1000_VMOLR_RLPML_MASK;
5082				size -= 4;
5083				reg &= ~E1000_VMOLR_RLPML_MASK;
5084				reg |= size;
5085				wr32(E1000_VMOLR(vf), reg);
5086			}
5087		}
5088	}
5089	return 0;
5090}
5091
5092static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
5093{
5094	struct e1000_hw *hw = &adapter->hw;
5095
5096	if (vid)
5097		wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
5098	else
5099		wr32(E1000_VMVIR(vf), 0);
5100}
5101
5102static int igb_ndo_set_vf_vlan(struct net_device *netdev,
5103			       int vf, u16 vlan, u8 qos)
5104{
5105	int err = 0;
5106	struct igb_adapter *adapter = netdev_priv(netdev);
5107
5108	if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
5109		return -EINVAL;
5110	if (vlan || qos) {
5111		err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
5112		if (err)
5113			goto out;
5114		igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
5115		igb_set_vmolr(adapter, vf, !vlan);
5116		adapter->vf_data[vf].pf_vlan = vlan;
5117		adapter->vf_data[vf].pf_qos = qos;
5118		dev_info(&adapter->pdev->dev,
5119			 "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
5120		if (test_bit(__IGB_DOWN, &adapter->state)) {
5121			dev_warn(&adapter->pdev->dev,
5122				 "The VF VLAN has been set,"
5123				 " but the PF device is not up.\n");
5124			dev_warn(&adapter->pdev->dev,
5125				 "Bring the PF device up before"
5126				 " attempting to use the VF device.\n");
5127		}
5128	} else {
5129		igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
5130				   false, vf);
5131		igb_set_vmvir(adapter, vlan, vf);
5132		igb_set_vmolr(adapter, vf, true);
5133		adapter->vf_data[vf].pf_vlan = 0;
5134		adapter->vf_data[vf].pf_qos = 0;
5135       }
5136out:
5137       return err;
5138}
5139
5140static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5141{
5142	int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5143	int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
5144
5145	return igb_vlvf_set(adapter, vid, add, vf);
5146}
5147
5148static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
5149{
5150	/* clear flags - except flag that indicates PF has set the MAC */
5151	adapter->vf_data[vf].flags &= IGB_VF_FLAG_PF_SET_MAC;
5152	adapter->vf_data[vf].last_nack = jiffies;
5153
5154	/* reset offloads to defaults */
5155	igb_set_vmolr(adapter, vf, true);
5156
5157	/* reset vlans for device */
5158	igb_clear_vf_vfta(adapter, vf);
5159	if (adapter->vf_data[vf].pf_vlan)
5160		igb_ndo_set_vf_vlan(adapter->netdev, vf,
5161				    adapter->vf_data[vf].pf_vlan,
5162				    adapter->vf_data[vf].pf_qos);
5163	else
5164		igb_clear_vf_vfta(adapter, vf);
5165
5166	/* reset multicast table array for vf */
5167	adapter->vf_data[vf].num_vf_mc_hashes = 0;
5168
5169	/* Flush and reset the mta with the new values */
5170	igb_set_rx_mode(adapter->netdev);
5171}
5172
5173static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
5174{
5175	unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5176
5177	/* generate a new mac address as we were hotplug removed/added */
5178	if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
5179		random_ether_addr(vf_mac);
5180
5181	/* process remaining reset events */
5182	igb_vf_reset(adapter, vf);
5183}
5184
5185static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
5186{
5187	struct e1000_hw *hw = &adapter->hw;
5188	unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5189	int rar_entry = hw->mac.rar_entry_count - (vf + 1);
5190	u32 reg, msgbuf[3];
5191	u8 *addr = (u8 *)(&msgbuf[1]);
5192
5193	/* process all the same items cleared in a function level reset */
5194	igb_vf_reset(adapter, vf);
5195
5196	/* set vf mac address */
5197	igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
5198
5199	/* enable transmit and receive for vf */
5200	reg = rd32(E1000_VFTE);
5201	wr32(E1000_VFTE, reg | (1 << vf));
5202	reg = rd32(E1000_VFRE);
5203	wr32(E1000_VFRE, reg | (1 << vf));
5204
5205	adapter->vf_data[vf].flags |= IGB_VF_FLAG_CTS;
5206
5207	/* reply to reset with ack and vf mac address */
5208	msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
5209	memcpy(addr, vf_mac, 6);
5210	igb_write_mbx(hw, msgbuf, 3, vf);
5211}
5212
5213static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
5214{
5215	/*
5216	 * The VF MAC Address is stored in a packed array of bytes
5217	 * starting at the second 32 bit word of the msg array
5218	 */
5219	unsigned char *addr = (char *)&msg[1];
5220	int err = -1;
5221
5222	if (is_valid_ether_addr(addr))
5223		err = igb_set_vf_mac(adapter, vf, addr);
5224
5225	return err;
5226}
5227
5228static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
5229{
5230	struct e1000_hw *hw = &adapter->hw;
5231	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5232	u32 msg = E1000_VT_MSGTYPE_NACK;
5233
5234	/* if device isn't clear to send it shouldn't be reading either */
5235	if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
5236	    time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
5237		igb_write_mbx(hw, &msg, 1, vf);
5238		vf_data->last_nack = jiffies;
5239	}
5240}
5241
5242static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
5243{
5244	struct pci_dev *pdev = adapter->pdev;
5245	u32 msgbuf[E1000_VFMAILBOX_SIZE];
5246	struct e1000_hw *hw = &adapter->hw;
5247	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5248	s32 retval;
5249
5250	retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
5251
5252	if (retval) {
5253		/* if receive failed revoke VF CTS stats and restart init */
5254		dev_err(&pdev->dev, "Error receiving message from VF\n");
5255		vf_data->flags &= ~IGB_VF_FLAG_CTS;
5256		if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5257			return;
5258		goto out;
5259	}
5260
5261	/* this is a message we already processed, do nothing */
5262	if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
5263		return;
5264
5265	/*
5266	 * until the vf completes a reset it should not be
5267	 * allowed to start any configuration.
5268	 */
5269
5270	if (msgbuf[0] == E1000_VF_RESET) {
5271		igb_vf_reset_msg(adapter, vf);
5272		return;
5273	}
5274
5275	if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
5276		if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5277			return;
5278		retval = -1;
5279		goto out;
5280	}
5281
5282	switch ((msgbuf[0] & 0xFFFF)) {
5283	case E1000_VF_SET_MAC_ADDR:
5284		retval = -EINVAL;
5285		if (!(vf_data->flags & IGB_VF_FLAG_PF_SET_MAC))
5286			retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
5287		else
5288			dev_warn(&pdev->dev,
5289				 "VF %d attempted to override administratively "
5290				 "set MAC address\nReload the VF driver to "
5291				 "resume operations\n", vf);
5292		break;
5293	case E1000_VF_SET_PROMISC:
5294		retval = igb_set_vf_promisc(adapter, msgbuf, vf);
5295		break;
5296	case E1000_VF_SET_MULTICAST:
5297		retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
5298		break;
5299	case E1000_VF_SET_LPE:
5300		retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
5301		break;
5302	case E1000_VF_SET_VLAN:
5303		retval = -1;
5304		if (vf_data->pf_vlan)
5305			dev_warn(&pdev->dev,
5306				 "VF %d attempted to override administratively "
5307				 "set VLAN tag\nReload the VF driver to "
5308				 "resume operations\n", vf);
5309		else
5310			retval = igb_set_vf_vlan(adapter, msgbuf, vf);
5311		break;
5312	default:
5313		dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
5314		retval = -1;
5315		break;
5316	}
5317
5318	msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
5319out:
5320	/* notify the VF of the results of what it sent us */
5321	if (retval)
5322		msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
5323	else
5324		msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
5325
5326	igb_write_mbx(hw, msgbuf, 1, vf);
5327}
5328
5329static void igb_msg_task(struct igb_adapter *adapter)
5330{
5331	struct e1000_hw *hw = &adapter->hw;
5332	u32 vf;
5333
5334	for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
5335		/* process any reset requests */
5336		if (!igb_check_for_rst(hw, vf))
5337			igb_vf_reset_event(adapter, vf);
5338
5339		/* process any messages pending */
5340		if (!igb_check_for_msg(hw, vf))
5341			igb_rcv_msg_from_vf(adapter, vf);
5342
5343		/* process any acks */
5344		if (!igb_check_for_ack(hw, vf))
5345			igb_rcv_ack_from_vf(adapter, vf);
5346	}
5347}
5348
5349/**
5350 *  igb_set_uta - Set unicast filter table address
5351 *  @adapter: board private structure
5352 *
5353 *  The unicast table address is a register array of 32-bit registers.
5354 *  The table is meant to be used in a way similar to how the MTA is used
5355 *  however due to certain limitations in the hardware it is necessary to
5356 *  set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscuous
5357 *  enable bit to allow vlan tag stripping when promiscuous mode is enabled
5358 **/
5359static void igb_set_uta(struct igb_adapter *adapter)
5360{
5361	struct e1000_hw *hw = &adapter->hw;
5362	int i;
5363
5364	/* The UTA table only exists on 82576 hardware and newer */
5365	if (hw->mac.type < e1000_82576)
5366		return;
5367
5368	/* we only need to do this if VMDq is enabled */
5369	if (!adapter->vfs_allocated_count)
5370		return;
5371
5372	for (i = 0; i < hw->mac.uta_reg_count; i++)
5373		array_wr32(E1000_UTA, i, ~0);
5374}
5375
5376/**
5377 * igb_intr_msi - Interrupt Handler
5378 * @irq: interrupt number
5379 * @data: pointer to a network interface device structure
5380 **/
5381static irqreturn_t igb_intr_msi(int irq, void *data)
5382{
5383	struct igb_adapter *adapter = data;
5384	struct igb_q_vector *q_vector = adapter->q_vector[0];
5385	struct e1000_hw *hw = &adapter->hw;
5386	/* read ICR disables interrupts using IAM */
5387	u32 icr = rd32(E1000_ICR);
5388
5389	igb_write_itr(q_vector);
5390
5391	if (icr & E1000_ICR_DRSTA)
5392		schedule_work(&adapter->reset_task);
5393
5394	if (icr & E1000_ICR_DOUTSYNC) {
5395		/* HW is reporting DMA is out of sync */
5396		adapter->stats.doosync++;
5397	}
5398
5399	if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5400		hw->mac.get_link_status = 1;
5401		if (!test_bit(__IGB_DOWN, &adapter->state))
5402			mod_timer(&adapter->watchdog_timer, jiffies + 1);
5403	}
5404
5405	napi_schedule(&q_vector->napi);
5406
5407	return IRQ_HANDLED;
5408}
5409
5410/**
5411 * igb_intr - Legacy Interrupt Handler
5412 * @irq: interrupt number
5413 * @data: pointer to a network interface device structure
5414 **/
5415static irqreturn_t igb_intr(int irq, void *data)
5416{
5417	struct igb_adapter *adapter = data;
5418	struct igb_q_vector *q_vector = adapter->q_vector[0];
5419	struct e1000_hw *hw = &adapter->hw;
5420	/* Interrupt Auto-Mask...upon reading ICR, interrupts are masked.  No
5421	 * need for the IMC write */
5422	u32 icr = rd32(E1000_ICR);
5423	if (!icr)
5424		return IRQ_NONE;  /* Not our interrupt */
5425
5426	igb_write_itr(q_vector);
5427
5428	/* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
5429	 * not set, then the adapter didn't send an interrupt */
5430	if (!(icr & E1000_ICR_INT_ASSERTED))
5431		return IRQ_NONE;
5432
5433	if (icr & E1000_ICR_DRSTA)
5434		schedule_work(&adapter->reset_task);
5435
5436	if (icr & E1000_ICR_DOUTSYNC) {
5437		/* HW is reporting DMA is out of sync */
5438		adapter->stats.doosync++;
5439	}
5440
5441	if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5442		hw->mac.get_link_status = 1;
5443		/* guard against interrupt when we're going down */
5444		if (!test_bit(__IGB_DOWN, &adapter->state))
5445			mod_timer(&adapter->watchdog_timer, jiffies + 1);
5446	}
5447
5448	napi_schedule(&q_vector->napi);
5449
5450	return IRQ_HANDLED;
5451}
5452
5453static inline void igb_ring_irq_enable(struct igb_q_vector *q_vector)
5454{
5455	struct igb_adapter *adapter = q_vector->adapter;
5456	struct e1000_hw *hw = &adapter->hw;
5457
5458	if ((q_vector->rx_ring && (adapter->rx_itr_setting & 3)) ||
5459	    (!q_vector->rx_ring && (adapter->tx_itr_setting & 3))) {
5460		if (!adapter->msix_entries)
5461			igb_set_itr(adapter);
5462		else
5463			igb_update_ring_itr(q_vector);
5464	}
5465
5466	if (!test_bit(__IGB_DOWN, &adapter->state)) {
5467		if (adapter->msix_entries)
5468			wr32(E1000_EIMS, q_vector->eims_value);
5469		else
5470			igb_irq_enable(adapter);
5471	}
5472}
5473
5474/**
5475 * igb_poll - NAPI Rx polling callback
5476 * @napi: napi polling structure
5477 * @budget: count of how many packets we should handle
5478 **/
5479static int igb_poll(struct napi_struct *napi, int budget)
5480{
5481	struct igb_q_vector *q_vector = container_of(napi,
5482	                                             struct igb_q_vector,
5483	                                             napi);
5484	int tx_clean_complete = 1, work_done = 0;
5485
5486#ifdef CONFIG_IGB_DCA
5487	if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
5488		igb_update_dca(q_vector);
5489#endif
5490	if (q_vector->tx_ring)
5491		tx_clean_complete = igb_clean_tx_irq(q_vector);
5492
5493	if (q_vector->rx_ring)
5494		igb_clean_rx_irq_adv(q_vector, &work_done, budget);
5495
5496	if (!tx_clean_complete)
5497		work_done = budget;
5498
5499	/* If not enough Rx work done, exit the polling mode */
5500	if (work_done < budget) {
5501		napi_complete(napi);
5502		igb_ring_irq_enable(q_vector);
5503	}
5504
5505	return work_done;
5506}
5507
5508/**
5509 * igb_systim_to_hwtstamp - convert system time value to hw timestamp
5510 * @adapter: board private structure
5511 * @shhwtstamps: timestamp structure to update
5512 * @regval: unsigned 64bit system time value.
5513 *
5514 * We need to convert the system time value stored in the RX/TXSTMP registers
5515 * into a hwtstamp which can be used by the upper level timestamping functions
5516 */
5517static void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
5518                                   struct skb_shared_hwtstamps *shhwtstamps,
5519                                   u64 regval)
5520{
5521	u64 ns;
5522
5523	/*
5524	 * The 82580 starts with 1ns at bit 0 in RX/TXSTMPL, shift this up to
5525	 * 24 to match clock shift we setup earlier.
5526	 */
5527	if (adapter->hw.mac.type == e1000_82580)
5528		regval <<= IGB_82580_TSYNC_SHIFT;
5529
5530	ns = timecounter_cyc2time(&adapter->clock, regval);
5531	timecompare_update(&adapter->compare, ns);
5532	memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
5533	shhwtstamps->hwtstamp = ns_to_ktime(ns);
5534	shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns);
5535}
5536
5537/**
5538 * igb_tx_hwtstamp - utility function which checks for TX time stamp
5539 * @q_vector: pointer to q_vector containing needed info
5540 * @buffer: pointer to igb_buffer structure
5541 *
5542 * If we were asked to do hardware stamping and such a time stamp is
5543 * available, then it must have been for this skb here because we only
5544 * allow only one such packet into the queue.
5545 */
5546static void igb_tx_hwtstamp(struct igb_q_vector *q_vector, struct igb_buffer *buffer_info)
5547{
5548	struct igb_adapter *adapter = q_vector->adapter;
5549	struct e1000_hw *hw = &adapter->hw;
5550	struct skb_shared_hwtstamps shhwtstamps;
5551	u64 regval;
5552
5553	/* if skb does not support hw timestamp or TX stamp not valid exit */
5554	if (likely(!(buffer_info->tx_flags & SKBTX_HW_TSTAMP)) ||
5555	    !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
5556		return;
5557
5558	regval = rd32(E1000_TXSTMPL);
5559	regval |= (u64)rd32(E1000_TXSTMPH) << 32;
5560
5561	igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
5562	skb_tstamp_tx(buffer_info->skb, &shhwtstamps);
5563}
5564
5565/**
5566 * igb_clean_tx_irq - Reclaim resources after transmit completes
5567 * @q_vector: pointer to q_vector containing needed info
5568 * returns true if ring is completely cleaned
5569 **/
5570static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
5571{
5572	struct igb_adapter *adapter = q_vector->adapter;
5573	struct igb_ring *tx_ring = q_vector->tx_ring;
5574	struct net_device *netdev = tx_ring->netdev;
5575	struct e1000_hw *hw = &adapter->hw;
5576	struct igb_buffer *buffer_info;
5577	union e1000_adv_tx_desc *tx_desc, *eop_desc;
5578	unsigned int total_bytes = 0, total_packets = 0;
5579	unsigned int i, eop, count = 0;
5580	bool cleaned = false;
5581
5582	i = tx_ring->next_to_clean;
5583	eop = tx_ring->buffer_info[i].next_to_watch;
5584	eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5585
5586	while ((eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)) &&
5587	       (count < tx_ring->count)) {
5588		rmb();	/* read buffer_info after eop_desc status */
5589		for (cleaned = false; !cleaned; count++) {
5590			tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
5591			buffer_info = &tx_ring->buffer_info[i];
5592			cleaned = (i == eop);
5593
5594			if (buffer_info->skb) {
5595				total_bytes += buffer_info->bytecount;
5596				/* gso_segs is currently only valid for tcp */
5597				total_packets += buffer_info->gso_segs;
5598				igb_tx_hwtstamp(q_vector, buffer_info);
5599			}
5600
5601			igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
5602			tx_desc->wb.status = 0;
5603
5604			i++;
5605			if (i == tx_ring->count)
5606				i = 0;
5607		}
5608		eop = tx_ring->buffer_info[i].next_to_watch;
5609		eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5610	}
5611
5612	tx_ring->next_to_clean = i;
5613
5614	if (unlikely(count &&
5615		     netif_carrier_ok(netdev) &&
5616		     igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
5617		/* Make sure that anybody stopping the queue after this
5618		 * sees the new next_to_clean.
5619		 */
5620		smp_mb();
5621		if (__netif_subqueue_stopped(netdev, tx_ring->queue_index) &&
5622		    !(test_bit(__IGB_DOWN, &adapter->state))) {
5623			netif_wake_subqueue(netdev, tx_ring->queue_index);
5624
5625			u64_stats_update_begin(&tx_ring->tx_syncp);
5626			tx_ring->tx_stats.restart_queue++;
5627			u64_stats_update_end(&tx_ring->tx_syncp);
5628		}
5629	}
5630
5631	if (tx_ring->detect_tx_hung) {
5632		/* Detect a transmit hang in hardware, this serializes the
5633		 * check with the clearing of time_stamp and movement of i */
5634		tx_ring->detect_tx_hung = false;
5635		if (tx_ring->buffer_info[i].time_stamp &&
5636		    time_after(jiffies, tx_ring->buffer_info[i].time_stamp +
5637			       (adapter->tx_timeout_factor * HZ)) &&
5638		    !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
5639
5640			/* detected Tx unit hang */
5641			dev_err(tx_ring->dev,
5642				"Detected Tx Unit Hang\n"
5643				"  Tx Queue             <%d>\n"
5644				"  TDH                  <%x>\n"
5645				"  TDT                  <%x>\n"
5646				"  next_to_use          <%x>\n"
5647				"  next_to_clean        <%x>\n"
5648				"buffer_info[next_to_clean]\n"
5649				"  time_stamp           <%lx>\n"
5650				"  next_to_watch        <%x>\n"
5651				"  jiffies              <%lx>\n"
5652				"  desc.status          <%x>\n",
5653				tx_ring->queue_index,
5654				rd32(E1000_TDH(tx_ring->reg_idx)),
5655				readl(tx_ring->tail),
5656				tx_ring->next_to_use,
5657				tx_ring->next_to_clean,
5658				tx_ring->buffer_info[eop].time_stamp,
5659				eop,
5660				jiffies,
5661				eop_desc->wb.status);
5662			netif_stop_subqueue(netdev, tx_ring->queue_index);
5663		}
5664	}
5665	tx_ring->total_bytes += total_bytes;
5666	tx_ring->total_packets += total_packets;
5667	u64_stats_update_begin(&tx_ring->tx_syncp);
5668	tx_ring->tx_stats.bytes += total_bytes;
5669	tx_ring->tx_stats.packets += total_packets;
5670	u64_stats_update_end(&tx_ring->tx_syncp);
5671	return count < tx_ring->count;
5672}
5673
5674static inline void igb_rx_checksum_adv(struct igb_ring *ring,
5675				       u32 status_err, struct sk_buff *skb)
5676{
5677	skb_checksum_none_assert(skb);
5678
5679	/* Ignore Checksum bit is set or checksum is disabled through ethtool */
5680	if (!(ring->flags & IGB_RING_FLAG_RX_CSUM) ||
5681	     (status_err & E1000_RXD_STAT_IXSM))
5682		return;
5683
5684	/* TCP/UDP checksum error bit is set */
5685	if (status_err &
5686	    (E1000_RXDEXT_STATERR_TCPE | E1000_RXDEXT_STATERR_IPE)) {
5687		/*
5688		 * work around errata with sctp packets where the TCPE aka
5689		 * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
5690		 * packets, (aka let the stack check the crc32c)
5691		 */
5692		if ((skb->len == 60) &&
5693		    (ring->flags & IGB_RING_FLAG_RX_SCTP_CSUM)) {
5694			u64_stats_update_begin(&ring->rx_syncp);
5695			ring->rx_stats.csum_err++;
5696			u64_stats_update_end(&ring->rx_syncp);
5697		}
5698		/* let the stack verify checksum errors */
5699		return;
5700	}
5701	/* It must be a TCP or UDP packet with a valid checksum */
5702	if (status_err & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))
5703		skb->ip_summed = CHECKSUM_UNNECESSARY;
5704
5705	dev_dbg(ring->dev, "cksum success: bits %08X\n", status_err);
5706}
5707
5708static void igb_rx_hwtstamp(struct igb_q_vector *q_vector, u32 staterr,
5709                                   struct sk_buff *skb)
5710{
5711	struct igb_adapter *adapter = q_vector->adapter;
5712	struct e1000_hw *hw = &adapter->hw;
5713	u64 regval;
5714
5715	/*
5716	 * If this bit is set, then the RX registers contain the time stamp. No
5717	 * other packet will be time stamped until we read these registers, so
5718	 * read the registers to make them available again. Because only one
5719	 * packet can be time stamped at a time, we know that the register
5720	 * values must belong to this one here and therefore we don't need to
5721	 * compare any of the additional attributes stored for it.
5722	 *
5723	 * If nothing went wrong, then it should have a shared tx_flags that we
5724	 * can turn into a skb_shared_hwtstamps.
5725	 */
5726	if (staterr & E1000_RXDADV_STAT_TSIP) {
5727		u32 *stamp = (u32 *)skb->data;
5728		regval = le32_to_cpu(*(stamp + 2));
5729		regval |= (u64)le32_to_cpu(*(stamp + 3)) << 32;
5730		skb_pull(skb, IGB_TS_HDR_LEN);
5731	} else {
5732		if(!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
5733			return;
5734
5735		regval = rd32(E1000_RXSTMPL);
5736		regval |= (u64)rd32(E1000_RXSTMPH) << 32;
5737	}
5738
5739	igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
5740}
5741static inline u16 igb_get_hlen(union e1000_adv_rx_desc *rx_desc)
5742{
5743	/* HW will not DMA in data larger than the given buffer, even if it
5744	 * parses the (NFS, of course) header to be larger.  In that case, it
5745	 * fills the header buffer and spills the rest into the page.
5746	 */
5747	u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
5748	           E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
5749	if (hlen > IGB_RX_HDR_LEN)
5750		hlen = IGB_RX_HDR_LEN;
5751	return hlen;
5752}
5753
5754static bool igb_clean_rx_irq_adv(struct igb_q_vector *q_vector,
5755                                 int *work_done, int budget)
5756{
5757	struct igb_ring *rx_ring = q_vector->rx_ring;
5758	struct net_device *netdev = rx_ring->netdev;
5759	struct device *dev = rx_ring->dev;
5760	union e1000_adv_rx_desc *rx_desc , *next_rxd;
5761	struct igb_buffer *buffer_info , *next_buffer;
5762	struct sk_buff *skb;
5763	bool cleaned = false;
5764	u16 cleaned_count = igb_desc_unused(rx_ring);
5765	int current_node = numa_node_id();
5766	unsigned int total_bytes = 0, total_packets = 0;
5767	unsigned int i;
5768	u32 staterr;
5769	u16 length;
5770
5771	i = rx_ring->next_to_clean;
5772	buffer_info = &rx_ring->buffer_info[i];
5773	rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5774	staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5775
5776	while (staterr & E1000_RXD_STAT_DD) {
5777		if (*work_done >= budget)
5778			break;
5779		(*work_done)++;
5780		rmb(); /* read descriptor and rx_buffer_info after status DD */
5781
5782		skb = buffer_info->skb;
5783		prefetch(skb->data - NET_IP_ALIGN);
5784		buffer_info->skb = NULL;
5785
5786		i++;
5787		if (i == rx_ring->count)
5788			i = 0;
5789
5790		next_rxd = E1000_RX_DESC_ADV(*rx_ring, i);
5791		prefetch(next_rxd);
5792		next_buffer = &rx_ring->buffer_info[i];
5793
5794		length = le16_to_cpu(rx_desc->wb.upper.length);
5795		cleaned = true;
5796		cleaned_count++;
5797
5798		if (buffer_info->dma) {
5799			dma_unmap_single(dev, buffer_info->dma,
5800					 IGB_RX_HDR_LEN,
5801					 DMA_FROM_DEVICE);
5802			buffer_info->dma = 0;
5803			skb_put(skb, igb_get_hlen(rx_desc));
5804		}
5805
5806		if (length) {
5807			dma_unmap_page(dev, buffer_info->page_dma,
5808				       PAGE_SIZE / 2, DMA_FROM_DEVICE);
5809			buffer_info->page_dma = 0;
5810
5811			skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
5812						buffer_info->page,
5813						buffer_info->page_offset,
5814						length);
5815
5816			if ((page_count(buffer_info->page) != 1) ||
5817			    (page_to_nid(buffer_info->page) != current_node))
5818				buffer_info->page = NULL;
5819			else
5820				get_page(buffer_info->page);
5821
5822			skb->len += length;
5823			skb->data_len += length;
5824			skb->truesize += length;
5825		}
5826
5827		if (!(staterr & E1000_RXD_STAT_EOP)) {
5828			buffer_info->skb = next_buffer->skb;
5829			buffer_info->dma = next_buffer->dma;
5830			next_buffer->skb = skb;
5831			next_buffer->dma = 0;
5832			goto next_desc;
5833		}
5834
5835		if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
5836			dev_kfree_skb_irq(skb);
5837			goto next_desc;
5838		}
5839
5840		if (staterr & (E1000_RXDADV_STAT_TSIP | E1000_RXDADV_STAT_TS))
5841			igb_rx_hwtstamp(q_vector, staterr, skb);
5842		total_bytes += skb->len;
5843		total_packets++;
5844
5845		igb_rx_checksum_adv(rx_ring, staterr, skb);
5846
5847		skb->protocol = eth_type_trans(skb, netdev);
5848
5849		if (staterr & E1000_RXD_STAT_VP) {
5850			u16 vid = le16_to_cpu(rx_desc->wb.upper.vlan);
5851
5852			__vlan_hwaccel_put_tag(skb, vid);
5853		}
5854		napi_gro_receive(&q_vector->napi, skb);
5855
5856next_desc:
5857		/* return some buffers to hardware, one at a time is too slow */
5858		if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
5859			igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5860			cleaned_count = 0;
5861		}
5862
5863		/* use prefetched values */
5864		rx_desc = next_rxd;
5865		buffer_info = next_buffer;
5866		staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5867	}
5868
5869	rx_ring->next_to_clean = i;
5870	u64_stats_update_begin(&rx_ring->rx_syncp);
5871	rx_ring->rx_stats.packets += total_packets;
5872	rx_ring->rx_stats.bytes += total_bytes;
5873	u64_stats_update_end(&rx_ring->rx_syncp);
5874	rx_ring->total_packets += total_packets;
5875	rx_ring->total_bytes += total_bytes;
5876
5877	if (cleaned_count)
5878		igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5879
5880	return cleaned;
5881}
5882
5883static bool igb_alloc_mapped_skb(struct igb_ring *rx_ring,
5884				 struct igb_buffer *bi)
5885{
5886	struct sk_buff *skb = bi->skb;
5887	dma_addr_t dma = bi->dma;
5888
5889	if (dma)
5890		return true;
5891
5892	if (likely(!skb)) {
5893		skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
5894						IGB_RX_HDR_LEN);
5895		bi->skb = skb;
5896		if (!skb) {
5897			rx_ring->rx_stats.alloc_failed++;
5898			return false;
5899		}
5900
5901		/* initialize skb for ring */
5902		skb_record_rx_queue(skb, rx_ring->queue_index);
5903	}
5904
5905	dma = dma_map_single(rx_ring->dev, skb->data,
5906			     IGB_RX_HDR_LEN, DMA_FROM_DEVICE);
5907
5908	if (dma_mapping_error(rx_ring->dev, dma)) {
5909		rx_ring->rx_stats.alloc_failed++;
5910		return false;
5911	}
5912
5913	bi->dma = dma;
5914	return true;
5915}
5916
5917static bool igb_alloc_mapped_page(struct igb_ring *rx_ring,
5918				  struct igb_buffer *bi)
5919{
5920	struct page *page = bi->page;
5921	dma_addr_t page_dma = bi->page_dma;
5922	unsigned int page_offset = bi->page_offset ^ (PAGE_SIZE / 2);
5923
5924	if (page_dma)
5925		return true;
5926
5927	if (!page) {
5928		page = netdev_alloc_page(rx_ring->netdev);
5929		bi->page = page;
5930		if (unlikely(!page)) {
5931			rx_ring->rx_stats.alloc_failed++;
5932			return false;
5933		}
5934	}
5935
5936	page_dma = dma_map_page(rx_ring->dev, page,
5937				page_offset, PAGE_SIZE / 2,
5938				DMA_FROM_DEVICE);
5939
5940	if (dma_mapping_error(rx_ring->dev, page_dma)) {
5941		rx_ring->rx_stats.alloc_failed++;
5942		return false;
5943	}
5944
5945	bi->page_dma = page_dma;
5946	bi->page_offset = page_offset;
5947	return true;
5948}
5949
5950/**
5951 * igb_alloc_rx_buffers_adv - Replace used receive buffers; packet split
5952 * @adapter: address of board private structure
5953 **/
5954void igb_alloc_rx_buffers_adv(struct igb_ring *rx_ring, u16 cleaned_count)
5955{
5956	union e1000_adv_rx_desc *rx_desc;
5957	struct igb_buffer *bi;
5958	u16 i = rx_ring->next_to_use;
5959
5960	rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5961	bi = &rx_ring->buffer_info[i];
5962	i -= rx_ring->count;
5963
5964	while (cleaned_count--) {
5965		if (!igb_alloc_mapped_skb(rx_ring, bi))
5966			break;
5967
5968		/* Refresh the desc even if buffer_addrs didn't change
5969		 * because each write-back erases this info. */
5970		rx_desc->read.hdr_addr = cpu_to_le64(bi->dma);
5971
5972		if (!igb_alloc_mapped_page(rx_ring, bi))
5973			break;
5974
5975		rx_desc->read.pkt_addr = cpu_to_le64(bi->page_dma);
5976
5977		rx_desc++;
5978		bi++;
5979		i++;
5980		if (unlikely(!i)) {
5981			rx_desc = E1000_RX_DESC_ADV(*rx_ring, 0);
5982			bi = rx_ring->buffer_info;
5983			i -= rx_ring->count;
5984		}
5985
5986		/* clear the hdr_addr for the next_to_use descriptor */
5987		rx_desc->read.hdr_addr = 0;
5988	}
5989
5990	i += rx_ring->count;
5991
5992	if (rx_ring->next_to_use != i) {
5993		rx_ring->next_to_use = i;
5994
5995		/* Force memory writes to complete before letting h/w
5996		 * know there are new descriptors to fetch.  (Only
5997		 * applicable for weak-ordered memory model archs,
5998		 * such as IA-64). */
5999		wmb();
6000		writel(i, rx_ring->tail);
6001	}
6002}
6003
6004/**
6005 * igb_mii_ioctl -
6006 * @netdev:
6007 * @ifreq:
6008 * @cmd:
6009 **/
6010static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6011{
6012	struct igb_adapter *adapter = netdev_priv(netdev);
6013	struct mii_ioctl_data *data = if_mii(ifr);
6014
6015	if (adapter->hw.phy.media_type != e1000_media_type_copper)
6016		return -EOPNOTSUPP;
6017
6018	switch (cmd) {
6019	case SIOCGMIIPHY:
6020		data->phy_id = adapter->hw.phy.addr;
6021		break;
6022	case SIOCGMIIREG:
6023		if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
6024		                     &data->val_out))
6025			return -EIO;
6026		break;
6027	case SIOCSMIIREG:
6028	default:
6029		return -EOPNOTSUPP;
6030	}
6031	return 0;
6032}
6033
6034/**
6035 * igb_hwtstamp_ioctl - control hardware time stamping
6036 * @netdev:
6037 * @ifreq:
6038 * @cmd:
6039 *
6040 * Outgoing time stamping can be enabled and disabled. Play nice and
6041 * disable it when requested, although it shouldn't case any overhead
6042 * when no packet needs it. At most one packet in the queue may be
6043 * marked for time stamping, otherwise it would be impossible to tell
6044 * for sure to which packet the hardware time stamp belongs.
6045 *
6046 * Incoming time stamping has to be configured via the hardware
6047 * filters. Not all combinations are supported, in particular event
6048 * type has to be specified. Matching the kind of event packet is
6049 * not supported, with the exception of "all V2 events regardless of
6050 * level 2 or 4".
6051 *
6052 **/
6053static int igb_hwtstamp_ioctl(struct net_device *netdev,
6054			      struct ifreq *ifr, int cmd)
6055{
6056	struct igb_adapter *adapter = netdev_priv(netdev);
6057	struct e1000_hw *hw = &adapter->hw;
6058	struct hwtstamp_config config;
6059	u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
6060	u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6061	u32 tsync_rx_cfg = 0;
6062	bool is_l4 = false;
6063	bool is_l2 = false;
6064	u32 regval;
6065
6066	if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
6067		return -EFAULT;
6068
6069	/* reserved for future extensions */
6070	if (config.flags)
6071		return -EINVAL;
6072
6073	switch (config.tx_type) {
6074	case HWTSTAMP_TX_OFF:
6075		tsync_tx_ctl = 0;
6076	case HWTSTAMP_TX_ON:
6077		break;
6078	default:
6079		return -ERANGE;
6080	}
6081
6082	switch (config.rx_filter) {
6083	case HWTSTAMP_FILTER_NONE:
6084		tsync_rx_ctl = 0;
6085		break;
6086	case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
6087	case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
6088	case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
6089	case HWTSTAMP_FILTER_ALL:
6090		/*
6091		 * register TSYNCRXCFG must be set, therefore it is not
6092		 * possible to time stamp both Sync and Delay_Req messages
6093		 * => fall back to time stamping all packets
6094		 */
6095		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6096		config.rx_filter = HWTSTAMP_FILTER_ALL;
6097		break;
6098	case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
6099		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6100		tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
6101		is_l4 = true;
6102		break;
6103	case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
6104		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6105		tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
6106		is_l4 = true;
6107		break;
6108	case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
6109	case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
6110		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6111		tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
6112		is_l2 = true;
6113		is_l4 = true;
6114		config.rx_filter = HWTSTAMP_FILTER_SOME;
6115		break;
6116	case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
6117	case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
6118		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6119		tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
6120		is_l2 = true;
6121		is_l4 = true;
6122		config.rx_filter = HWTSTAMP_FILTER_SOME;
6123		break;
6124	case HWTSTAMP_FILTER_PTP_V2_EVENT:
6125	case HWTSTAMP_FILTER_PTP_V2_SYNC:
6126	case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
6127		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
6128		config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
6129		is_l2 = true;
6130		break;
6131	default:
6132		return -ERANGE;
6133	}
6134
6135	if (hw->mac.type == e1000_82575) {
6136		if (tsync_rx_ctl | tsync_tx_ctl)
6137			return -EINVAL;
6138		return 0;
6139	}
6140
6141	/*
6142	 * Per-packet timestamping only works if all packets are
6143	 * timestamped, so enable timestamping in all packets as
6144	 * long as one rx filter was configured.
6145	 */
6146	if ((hw->mac.type == e1000_82580) && tsync_rx_ctl) {
6147		tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6148		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6149	}
6150
6151	/* enable/disable TX */
6152	regval = rd32(E1000_TSYNCTXCTL);
6153	regval &= ~E1000_TSYNCTXCTL_ENABLED;
6154	regval |= tsync_tx_ctl;
6155	wr32(E1000_TSYNCTXCTL, regval);
6156
6157	/* enable/disable RX */
6158	regval = rd32(E1000_TSYNCRXCTL);
6159	regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
6160	regval |= tsync_rx_ctl;
6161	wr32(E1000_TSYNCRXCTL, regval);
6162
6163	/* define which PTP packets are time stamped */
6164	wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
6165
6166	/* define ethertype filter for timestamped packets */
6167	if (is_l2)
6168		wr32(E1000_ETQF(3),
6169		                (E1000_ETQF_FILTER_ENABLE | /* enable filter */
6170		                 E1000_ETQF_1588 | /* enable timestamping */
6171		                 ETH_P_1588));     /* 1588 eth protocol type */
6172	else
6173		wr32(E1000_ETQF(3), 0);
6174
6175#define PTP_PORT 319
6176	/* L4 Queue Filter[3]: filter by destination port and protocol */
6177	if (is_l4) {
6178		u32 ftqf = (IPPROTO_UDP /* UDP */
6179			| E1000_FTQF_VF_BP /* VF not compared */
6180			| E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
6181			| E1000_FTQF_MASK); /* mask all inputs */
6182		ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
6183
6184		wr32(E1000_IMIR(3), htons(PTP_PORT));
6185		wr32(E1000_IMIREXT(3),
6186		     (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
6187		if (hw->mac.type == e1000_82576) {
6188			/* enable source port check */
6189			wr32(E1000_SPQF(3), htons(PTP_PORT));
6190			ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
6191		}
6192		wr32(E1000_FTQF(3), ftqf);
6193	} else {
6194		wr32(E1000_FTQF(3), E1000_FTQF_MASK);
6195	}
6196	wrfl();
6197
6198	adapter->hwtstamp_config = config;
6199
6200	/* clear TX/RX time stamp registers, just to be sure */
6201	regval = rd32(E1000_TXSTMPH);
6202	regval = rd32(E1000_RXSTMPH);
6203
6204	return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
6205		-EFAULT : 0;
6206}
6207
6208/**
6209 * igb_ioctl -
6210 * @netdev:
6211 * @ifreq:
6212 * @cmd:
6213 **/
6214static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6215{
6216	switch (cmd) {
6217	case SIOCGMIIPHY:
6218	case SIOCGMIIREG:
6219	case SIOCSMIIREG:
6220		return igb_mii_ioctl(netdev, ifr, cmd);
6221	case SIOCSHWTSTAMP:
6222		return igb_hwtstamp_ioctl(netdev, ifr, cmd);
6223	default:
6224		return -EOPNOTSUPP;
6225	}
6226}
6227
6228s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6229{
6230	struct igb_adapter *adapter = hw->back;
6231	u16 cap_offset;
6232
6233	cap_offset = adapter->pdev->pcie_cap;
6234	if (!cap_offset)
6235		return -E1000_ERR_CONFIG;
6236
6237	pci_read_config_word(adapter->pdev, cap_offset + reg, value);
6238
6239	return 0;
6240}
6241
6242s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6243{
6244	struct igb_adapter *adapter = hw->back;
6245	u16 cap_offset;
6246
6247	cap_offset = adapter->pdev->pcie_cap;
6248	if (!cap_offset)
6249		return -E1000_ERR_CONFIG;
6250
6251	pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
6252
6253	return 0;
6254}
6255
6256static void igb_vlan_mode(struct net_device *netdev, u32 features)
6257{
6258	struct igb_adapter *adapter = netdev_priv(netdev);
6259	struct e1000_hw *hw = &adapter->hw;
6260	u32 ctrl, rctl;
6261
6262	igb_irq_disable(adapter);
6263
6264	if (features & NETIF_F_HW_VLAN_RX) {
6265		/* enable VLAN tag insert/strip */
6266		ctrl = rd32(E1000_CTRL);
6267		ctrl |= E1000_CTRL_VME;
6268		wr32(E1000_CTRL, ctrl);
6269
6270		/* Disable CFI check */
6271		rctl = rd32(E1000_RCTL);
6272		rctl &= ~E1000_RCTL_CFIEN;
6273		wr32(E1000_RCTL, rctl);
6274	} else {
6275		/* disable VLAN tag insert/strip */
6276		ctrl = rd32(E1000_CTRL);
6277		ctrl &= ~E1000_CTRL_VME;
6278		wr32(E1000_CTRL, ctrl);
6279	}
6280
6281	igb_rlpml_set(adapter);
6282
6283	if (!test_bit(__IGB_DOWN, &adapter->state))
6284		igb_irq_enable(adapter);
6285}
6286
6287static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
6288{
6289	struct igb_adapter *adapter = netdev_priv(netdev);
6290	struct e1000_hw *hw = &adapter->hw;
6291	int pf_id = adapter->vfs_allocated_count;
6292
6293	/* attempt to add filter to vlvf array */
6294	igb_vlvf_set(adapter, vid, true, pf_id);
6295
6296	/* add the filter since PF can receive vlans w/o entry in vlvf */
6297	igb_vfta_set(hw, vid, true);
6298
6299	set_bit(vid, adapter->active_vlans);
6300}
6301
6302static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
6303{
6304	struct igb_adapter *adapter = netdev_priv(netdev);
6305	struct e1000_hw *hw = &adapter->hw;
6306	int pf_id = adapter->vfs_allocated_count;
6307	s32 err;
6308
6309	igb_irq_disable(adapter);
6310
6311	if (!test_bit(__IGB_DOWN, &adapter->state))
6312		igb_irq_enable(adapter);
6313
6314	/* remove vlan from VLVF table array */
6315	err = igb_vlvf_set(adapter, vid, false, pf_id);
6316
6317	/* if vid was not present in VLVF just remove it from table */
6318	if (err)
6319		igb_vfta_set(hw, vid, false);
6320
6321	clear_bit(vid, adapter->active_vlans);
6322}
6323
6324static void igb_restore_vlan(struct igb_adapter *adapter)
6325{
6326	u16 vid;
6327
6328	for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID)
6329		igb_vlan_rx_add_vid(adapter->netdev, vid);
6330}
6331
6332int igb_set_spd_dplx(struct igb_adapter *adapter, u32 spd, u8 dplx)
6333{
6334	struct pci_dev *pdev = adapter->pdev;
6335	struct e1000_mac_info *mac = &adapter->hw.mac;
6336
6337	mac->autoneg = 0;
6338
6339	/* Make sure dplx is at most 1 bit and lsb of speed is not set
6340	 * for the switch() below to work */
6341	if ((spd & 1) || (dplx & ~1))
6342		goto err_inval;
6343
6344	/* Fiber NIC's only allow 1000 Gbps Full duplex */
6345	if ((adapter->hw.phy.media_type == e1000_media_type_internal_serdes) &&
6346	    spd != SPEED_1000 &&
6347	    dplx != DUPLEX_FULL)
6348		goto err_inval;
6349
6350	switch (spd + dplx) {
6351	case SPEED_10 + DUPLEX_HALF:
6352		mac->forced_speed_duplex = ADVERTISE_10_HALF;
6353		break;
6354	case SPEED_10 + DUPLEX_FULL:
6355		mac->forced_speed_duplex = ADVERTISE_10_FULL;
6356		break;
6357	case SPEED_100 + DUPLEX_HALF:
6358		mac->forced_speed_duplex = ADVERTISE_100_HALF;
6359		break;
6360	case SPEED_100 + DUPLEX_FULL:
6361		mac->forced_speed_duplex = ADVERTISE_100_FULL;
6362		break;
6363	case SPEED_1000 + DUPLEX_FULL:
6364		mac->autoneg = 1;
6365		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
6366		break;
6367	case SPEED_1000 + DUPLEX_HALF: /* not supported */
6368	default:
6369		goto err_inval;
6370	}
6371	return 0;
6372
6373err_inval:
6374	dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
6375	return -EINVAL;
6376}
6377
6378static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake)
6379{
6380	struct net_device *netdev = pci_get_drvdata(pdev);
6381	struct igb_adapter *adapter = netdev_priv(netdev);
6382	struct e1000_hw *hw = &adapter->hw;
6383	u32 ctrl, rctl, status;
6384	u32 wufc = adapter->wol;
6385#ifdef CONFIG_PM
6386	int retval = 0;
6387#endif
6388
6389	netif_device_detach(netdev);
6390
6391	if (netif_running(netdev))
6392		igb_close(netdev);
6393
6394	igb_clear_interrupt_scheme(adapter);
6395
6396#ifdef CONFIG_PM
6397	retval = pci_save_state(pdev);
6398	if (retval)
6399		return retval;
6400#endif
6401
6402	status = rd32(E1000_STATUS);
6403	if (status & E1000_STATUS_LU)
6404		wufc &= ~E1000_WUFC_LNKC;
6405
6406	if (wufc) {
6407		igb_setup_rctl(adapter);
6408		igb_set_rx_mode(netdev);
6409
6410		/* turn on all-multi mode if wake on multicast is enabled */
6411		if (wufc & E1000_WUFC_MC) {
6412			rctl = rd32(E1000_RCTL);
6413			rctl |= E1000_RCTL_MPE;
6414			wr32(E1000_RCTL, rctl);
6415		}
6416
6417		ctrl = rd32(E1000_CTRL);
6418		/* advertise wake from D3Cold */
6419		#define E1000_CTRL_ADVD3WUC 0x00100000
6420		/* phy power management enable */
6421		#define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
6422		ctrl |= E1000_CTRL_ADVD3WUC;
6423		wr32(E1000_CTRL, ctrl);
6424
6425		/* Allow time for pending master requests to run */
6426		igb_disable_pcie_master(hw);
6427
6428		wr32(E1000_WUC, E1000_WUC_PME_EN);
6429		wr32(E1000_WUFC, wufc);
6430	} else {
6431		wr32(E1000_WUC, 0);
6432		wr32(E1000_WUFC, 0);
6433	}
6434
6435	*enable_wake = wufc || adapter->en_mng_pt;
6436	if (!*enable_wake)
6437		igb_power_down_link(adapter);
6438	else
6439		igb_power_up_link(adapter);
6440
6441	/* Release control of h/w to f/w.  If f/w is AMT enabled, this
6442	 * would have already happened in close and is redundant. */
6443	igb_release_hw_control(adapter);
6444
6445	pci_disable_device(pdev);
6446
6447	return 0;
6448}
6449
6450#ifdef CONFIG_PM
6451static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
6452{
6453	int retval;
6454	bool wake;
6455
6456	retval = __igb_shutdown(pdev, &wake);
6457	if (retval)
6458		return retval;
6459
6460	if (wake) {
6461		pci_prepare_to_sleep(pdev);
6462	} else {
6463		pci_wake_from_d3(pdev, false);
6464		pci_set_power_state(pdev, PCI_D3hot);
6465	}
6466
6467	return 0;
6468}
6469
6470static int igb_resume(struct pci_dev *pdev)
6471{
6472	struct net_device *netdev = pci_get_drvdata(pdev);
6473	struct igb_adapter *adapter = netdev_priv(netdev);
6474	struct e1000_hw *hw = &adapter->hw;
6475	u32 err;
6476
6477	pci_set_power_state(pdev, PCI_D0);
6478	pci_restore_state(pdev);
6479	pci_save_state(pdev);
6480
6481	err = pci_enable_device_mem(pdev);
6482	if (err) {
6483		dev_err(&pdev->dev,
6484			"igb: Cannot enable PCI device from suspend\n");
6485		return err;
6486	}
6487	pci_set_master(pdev);
6488
6489	pci_enable_wake(pdev, PCI_D3hot, 0);
6490	pci_enable_wake(pdev, PCI_D3cold, 0);
6491
6492	if (igb_init_interrupt_scheme(adapter)) {
6493		dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
6494		return -ENOMEM;
6495	}
6496
6497	igb_reset(adapter);
6498
6499	/* let the f/w know that the h/w is now under the control of the
6500	 * driver. */
6501	igb_get_hw_control(adapter);
6502
6503	wr32(E1000_WUS, ~0);
6504
6505	if (netif_running(netdev)) {
6506		err = igb_open(netdev);
6507		if (err)
6508			return err;
6509	}
6510
6511	netif_device_attach(netdev);
6512
6513	return 0;
6514}
6515#endif
6516
6517static void igb_shutdown(struct pci_dev *pdev)
6518{
6519	bool wake;
6520
6521	__igb_shutdown(pdev, &wake);
6522
6523	if (system_state == SYSTEM_POWER_OFF) {
6524		pci_wake_from_d3(pdev, wake);
6525		pci_set_power_state(pdev, PCI_D3hot);
6526	}
6527}
6528
6529#ifdef CONFIG_NET_POLL_CONTROLLER
6530/*
6531 * Polling 'interrupt' - used by things like netconsole to send skbs
6532 * without having to re-enable interrupts. It's not called while
6533 * the interrupt routine is executing.
6534 */
6535static void igb_netpoll(struct net_device *netdev)
6536{
6537	struct igb_adapter *adapter = netdev_priv(netdev);
6538	struct e1000_hw *hw = &adapter->hw;
6539	int i;
6540
6541	if (!adapter->msix_entries) {
6542		struct igb_q_vector *q_vector = adapter->q_vector[0];
6543		igb_irq_disable(adapter);
6544		napi_schedule(&q_vector->napi);
6545		return;
6546	}
6547
6548	for (i = 0; i < adapter->num_q_vectors; i++) {
6549		struct igb_q_vector *q_vector = adapter->q_vector[i];
6550		wr32(E1000_EIMC, q_vector->eims_value);
6551		napi_schedule(&q_vector->napi);
6552	}
6553}
6554#endif /* CONFIG_NET_POLL_CONTROLLER */
6555
6556/**
6557 * igb_io_error_detected - called when PCI error is detected
6558 * @pdev: Pointer to PCI device
6559 * @state: The current pci connection state
6560 *
6561 * This function is called after a PCI bus error affecting
6562 * this device has been detected.
6563 */
6564static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
6565					      pci_channel_state_t state)
6566{
6567	struct net_device *netdev = pci_get_drvdata(pdev);
6568	struct igb_adapter *adapter = netdev_priv(netdev);
6569
6570	netif_device_detach(netdev);
6571
6572	if (state == pci_channel_io_perm_failure)
6573		return PCI_ERS_RESULT_DISCONNECT;
6574
6575	if (netif_running(netdev))
6576		igb_down(adapter);
6577	pci_disable_device(pdev);
6578
6579	/* Request a slot slot reset. */
6580	return PCI_ERS_RESULT_NEED_RESET;
6581}
6582
6583/**
6584 * igb_io_slot_reset - called after the pci bus has been reset.
6585 * @pdev: Pointer to PCI device
6586 *
6587 * Restart the card from scratch, as if from a cold-boot. Implementation
6588 * resembles the first-half of the igb_resume routine.
6589 */
6590static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
6591{
6592	struct net_device *netdev = pci_get_drvdata(pdev);
6593	struct igb_adapter *adapter = netdev_priv(netdev);
6594	struct e1000_hw *hw = &adapter->hw;
6595	pci_ers_result_t result;
6596	int err;
6597
6598	if (pci_enable_device_mem(pdev)) {
6599		dev_err(&pdev->dev,
6600			"Cannot re-enable PCI device after reset.\n");
6601		result = PCI_ERS_RESULT_DISCONNECT;
6602	} else {
6603		pci_set_master(pdev);
6604		pci_restore_state(pdev);
6605		pci_save_state(pdev);
6606
6607		pci_enable_wake(pdev, PCI_D3hot, 0);
6608		pci_enable_wake(pdev, PCI_D3cold, 0);
6609
6610		igb_reset(adapter);
6611		wr32(E1000_WUS, ~0);
6612		result = PCI_ERS_RESULT_RECOVERED;
6613	}
6614
6615	err = pci_cleanup_aer_uncorrect_error_status(pdev);
6616	if (err) {
6617		dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
6618		        "failed 0x%0x\n", err);
6619		/* non-fatal, continue */
6620	}
6621
6622	return result;
6623}
6624
6625/**
6626 * igb_io_resume - called when traffic can start flowing again.
6627 * @pdev: Pointer to PCI device
6628 *
6629 * This callback is called when the error recovery driver tells us that
6630 * its OK to resume normal operation. Implementation resembles the
6631 * second-half of the igb_resume routine.
6632 */
6633static void igb_io_resume(struct pci_dev *pdev)
6634{
6635	struct net_device *netdev = pci_get_drvdata(pdev);
6636	struct igb_adapter *adapter = netdev_priv(netdev);
6637
6638	if (netif_running(netdev)) {
6639		if (igb_up(adapter)) {
6640			dev_err(&pdev->dev, "igb_up failed after reset\n");
6641			return;
6642		}
6643	}
6644
6645	netif_device_attach(netdev);
6646
6647	/* let the f/w know that the h/w is now under the control of the
6648	 * driver. */
6649	igb_get_hw_control(adapter);
6650}
6651
6652static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
6653                             u8 qsel)
6654{
6655	u32 rar_low, rar_high;
6656	struct e1000_hw *hw = &adapter->hw;
6657
6658	/* HW expects these in little endian so we reverse the byte order
6659	 * from network order (big endian) to little endian
6660	 */
6661	rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
6662	          ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
6663	rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
6664
6665	/* Indicate to hardware the Address is Valid. */
6666	rar_high |= E1000_RAH_AV;
6667
6668	if (hw->mac.type == e1000_82575)
6669		rar_high |= E1000_RAH_POOL_1 * qsel;
6670	else
6671		rar_high |= E1000_RAH_POOL_1 << qsel;
6672
6673	wr32(E1000_RAL(index), rar_low);
6674	wrfl();
6675	wr32(E1000_RAH(index), rar_high);
6676	wrfl();
6677}
6678
6679static int igb_set_vf_mac(struct igb_adapter *adapter,
6680                          int vf, unsigned char *mac_addr)
6681{
6682	struct e1000_hw *hw = &adapter->hw;
6683	/* VF MAC addresses start at end of receive addresses and moves
6684	 * torwards the first, as a result a collision should not be possible */
6685	int rar_entry = hw->mac.rar_entry_count - (vf + 1);
6686
6687	memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
6688
6689	igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
6690
6691	return 0;
6692}
6693
6694static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
6695{
6696	struct igb_adapter *adapter = netdev_priv(netdev);
6697	if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
6698		return -EINVAL;
6699	adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
6700	dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
6701	dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
6702				      " change effective.");
6703	if (test_bit(__IGB_DOWN, &adapter->state)) {
6704		dev_warn(&adapter->pdev->dev, "The VF MAC address has been set,"
6705			 " but the PF device is not up.\n");
6706		dev_warn(&adapter->pdev->dev, "Bring the PF device up before"
6707			 " attempting to use the VF device.\n");
6708	}
6709	return igb_set_vf_mac(adapter, vf, mac);
6710}
6711
6712static int igb_link_mbps(int internal_link_speed)
6713{
6714	switch (internal_link_speed) {
6715	case SPEED_100:
6716		return 100;
6717	case SPEED_1000:
6718		return 1000;
6719	default:
6720		return 0;
6721	}
6722}
6723
6724static void igb_set_vf_rate_limit(struct e1000_hw *hw, int vf, int tx_rate,
6725				  int link_speed)
6726{
6727	int rf_dec, rf_int;
6728	u32 bcnrc_val;
6729
6730	if (tx_rate != 0) {
6731		/* Calculate the rate factor values to set */
6732		rf_int = link_speed / tx_rate;
6733		rf_dec = (link_speed - (rf_int * tx_rate));
6734		rf_dec = (rf_dec * (1<<E1000_RTTBCNRC_RF_INT_SHIFT)) / tx_rate;
6735
6736		bcnrc_val = E1000_RTTBCNRC_RS_ENA;
6737		bcnrc_val |= ((rf_int<<E1000_RTTBCNRC_RF_INT_SHIFT) &
6738		               E1000_RTTBCNRC_RF_INT_MASK);
6739		bcnrc_val |= (rf_dec & E1000_RTTBCNRC_RF_DEC_MASK);
6740	} else {
6741		bcnrc_val = 0;
6742	}
6743
6744	wr32(E1000_RTTDQSEL, vf); /* vf X uses queue X */
6745	wr32(E1000_RTTBCNRC, bcnrc_val);
6746}
6747
6748static void igb_check_vf_rate_limit(struct igb_adapter *adapter)
6749{
6750	int actual_link_speed, i;
6751	bool reset_rate = false;
6752
6753	/* VF TX rate limit was not set or not supported */
6754	if ((adapter->vf_rate_link_speed == 0) ||
6755	    (adapter->hw.mac.type != e1000_82576))
6756		return;
6757
6758	actual_link_speed = igb_link_mbps(adapter->link_speed);
6759	if (actual_link_speed != adapter->vf_rate_link_speed) {
6760		reset_rate = true;
6761		adapter->vf_rate_link_speed = 0;
6762		dev_info(&adapter->pdev->dev,
6763		         "Link speed has been changed. VF Transmit "
6764		         "rate is disabled\n");
6765	}
6766
6767	for (i = 0; i < adapter->vfs_allocated_count; i++) {
6768		if (reset_rate)
6769			adapter->vf_data[i].tx_rate = 0;
6770
6771		igb_set_vf_rate_limit(&adapter->hw, i,
6772		                      adapter->vf_data[i].tx_rate,
6773		                      actual_link_speed);
6774	}
6775}
6776
6777static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
6778{
6779	struct igb_adapter *adapter = netdev_priv(netdev);
6780	struct e1000_hw *hw = &adapter->hw;
6781	int actual_link_speed;
6782
6783	if (hw->mac.type != e1000_82576)
6784		return -EOPNOTSUPP;
6785
6786	actual_link_speed = igb_link_mbps(adapter->link_speed);
6787	if ((vf >= adapter->vfs_allocated_count) ||
6788	    (!(rd32(E1000_STATUS) & E1000_STATUS_LU)) ||
6789	    (tx_rate < 0) || (tx_rate > actual_link_speed))
6790		return -EINVAL;
6791
6792	adapter->vf_rate_link_speed = actual_link_speed;
6793	adapter->vf_data[vf].tx_rate = (u16)tx_rate;
6794	igb_set_vf_rate_limit(hw, vf, tx_rate, actual_link_speed);
6795
6796	return 0;
6797}
6798
6799static int igb_ndo_get_vf_config(struct net_device *netdev,
6800				 int vf, struct ifla_vf_info *ivi)
6801{
6802	struct igb_adapter *adapter = netdev_priv(netdev);
6803	if (vf >= adapter->vfs_allocated_count)
6804		return -EINVAL;
6805	ivi->vf = vf;
6806	memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
6807	ivi->tx_rate = adapter->vf_data[vf].tx_rate;
6808	ivi->vlan = adapter->vf_data[vf].pf_vlan;
6809	ivi->qos = adapter->vf_data[vf].pf_qos;
6810	return 0;
6811}
6812
6813static void igb_vmm_control(struct igb_adapter *adapter)
6814{
6815	struct e1000_hw *hw = &adapter->hw;
6816	u32 reg;
6817
6818	switch (hw->mac.type) {
6819	case e1000_82575:
6820	default:
6821		/* replication is not supported for 82575 */
6822		return;
6823	case e1000_82576:
6824		/* notify HW that the MAC is adding vlan tags */
6825		reg = rd32(E1000_DTXCTL);
6826		reg |= E1000_DTXCTL_VLAN_ADDED;
6827		wr32(E1000_DTXCTL, reg);
6828	case e1000_82580:
6829		/* enable replication vlan tag stripping */
6830		reg = rd32(E1000_RPLOLR);
6831		reg |= E1000_RPLOLR_STRVLAN;
6832		wr32(E1000_RPLOLR, reg);
6833	case e1000_i350:
6834		/* none of the above registers are supported by i350 */
6835		break;
6836	}
6837
6838	if (adapter->vfs_allocated_count) {
6839		igb_vmdq_set_loopback_pf(hw, true);
6840		igb_vmdq_set_replication_pf(hw, true);
6841		igb_vmdq_set_anti_spoofing_pf(hw, true,
6842						adapter->vfs_allocated_count);
6843	} else {
6844		igb_vmdq_set_loopback_pf(hw, false);
6845		igb_vmdq_set_replication_pf(hw, false);
6846	}
6847}
6848
6849/* igb_main.c */
6850